woods 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +89 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +406 -0
  7. data/exe/woods-console +59 -0
  8. data/exe/woods-console-mcp +22 -0
  9. data/exe/woods-mcp +34 -0
  10. data/exe/woods-mcp-http +37 -0
  11. data/exe/woods-mcp-start +58 -0
  12. data/lib/generators/woods/install_generator.rb +32 -0
  13. data/lib/generators/woods/pgvector_generator.rb +37 -0
  14. data/lib/generators/woods/templates/add_pgvector_to_woods.rb.erb +15 -0
  15. data/lib/generators/woods/templates/create_woods_tables.rb.erb +43 -0
  16. data/lib/tasks/woods.rake +621 -0
  17. data/lib/tasks/woods_evaluation.rake +115 -0
  18. data/lib/woods/ast/call_site_extractor.rb +106 -0
  19. data/lib/woods/ast/method_extractor.rb +71 -0
  20. data/lib/woods/ast/node.rb +116 -0
  21. data/lib/woods/ast/parser.rb +614 -0
  22. data/lib/woods/ast.rb +6 -0
  23. data/lib/woods/builder.rb +200 -0
  24. data/lib/woods/cache/cache_middleware.rb +199 -0
  25. data/lib/woods/cache/cache_store.rb +264 -0
  26. data/lib/woods/cache/redis_cache_store.rb +116 -0
  27. data/lib/woods/cache/solid_cache_store.rb +111 -0
  28. data/lib/woods/chunking/chunk.rb +84 -0
  29. data/lib/woods/chunking/semantic_chunker.rb +295 -0
  30. data/lib/woods/console/adapters/cache_adapter.rb +58 -0
  31. data/lib/woods/console/adapters/good_job_adapter.rb +33 -0
  32. data/lib/woods/console/adapters/job_adapter.rb +68 -0
  33. data/lib/woods/console/adapters/sidekiq_adapter.rb +33 -0
  34. data/lib/woods/console/adapters/solid_queue_adapter.rb +33 -0
  35. data/lib/woods/console/audit_logger.rb +75 -0
  36. data/lib/woods/console/bridge.rb +177 -0
  37. data/lib/woods/console/confirmation.rb +90 -0
  38. data/lib/woods/console/connection_manager.rb +173 -0
  39. data/lib/woods/console/console_response_renderer.rb +74 -0
  40. data/lib/woods/console/embedded_executor.rb +373 -0
  41. data/lib/woods/console/model_validator.rb +81 -0
  42. data/lib/woods/console/rack_middleware.rb +87 -0
  43. data/lib/woods/console/safe_context.rb +82 -0
  44. data/lib/woods/console/server.rb +612 -0
  45. data/lib/woods/console/sql_validator.rb +172 -0
  46. data/lib/woods/console/tools/tier1.rb +118 -0
  47. data/lib/woods/console/tools/tier2.rb +117 -0
  48. data/lib/woods/console/tools/tier3.rb +110 -0
  49. data/lib/woods/console/tools/tier4.rb +79 -0
  50. data/lib/woods/coordination/pipeline_lock.rb +109 -0
  51. data/lib/woods/cost_model/embedding_cost.rb +88 -0
  52. data/lib/woods/cost_model/estimator.rb +128 -0
  53. data/lib/woods/cost_model/provider_pricing.rb +67 -0
  54. data/lib/woods/cost_model/storage_cost.rb +52 -0
  55. data/lib/woods/cost_model.rb +22 -0
  56. data/lib/woods/db/migrations/001_create_units.rb +38 -0
  57. data/lib/woods/db/migrations/002_create_edges.rb +35 -0
  58. data/lib/woods/db/migrations/003_create_embeddings.rb +37 -0
  59. data/lib/woods/db/migrations/004_create_snapshots.rb +45 -0
  60. data/lib/woods/db/migrations/005_create_snapshot_units.rb +40 -0
  61. data/lib/woods/db/migrations/006_rename_tables.rb +34 -0
  62. data/lib/woods/db/migrator.rb +73 -0
  63. data/lib/woods/db/schema_version.rb +73 -0
  64. data/lib/woods/dependency_graph.rb +236 -0
  65. data/lib/woods/embedding/indexer.rb +140 -0
  66. data/lib/woods/embedding/openai.rb +126 -0
  67. data/lib/woods/embedding/provider.rb +162 -0
  68. data/lib/woods/embedding/text_preparer.rb +112 -0
  69. data/lib/woods/evaluation/baseline_runner.rb +115 -0
  70. data/lib/woods/evaluation/evaluator.rb +139 -0
  71. data/lib/woods/evaluation/metrics.rb +79 -0
  72. data/lib/woods/evaluation/query_set.rb +148 -0
  73. data/lib/woods/evaluation/report_generator.rb +90 -0
  74. data/lib/woods/extracted_unit.rb +145 -0
  75. data/lib/woods/extractor.rb +1028 -0
  76. data/lib/woods/extractors/action_cable_extractor.rb +201 -0
  77. data/lib/woods/extractors/ast_source_extraction.rb +46 -0
  78. data/lib/woods/extractors/behavioral_profile.rb +309 -0
  79. data/lib/woods/extractors/caching_extractor.rb +261 -0
  80. data/lib/woods/extractors/callback_analyzer.rb +246 -0
  81. data/lib/woods/extractors/concern_extractor.rb +292 -0
  82. data/lib/woods/extractors/configuration_extractor.rb +219 -0
  83. data/lib/woods/extractors/controller_extractor.rb +404 -0
  84. data/lib/woods/extractors/database_view_extractor.rb +278 -0
  85. data/lib/woods/extractors/decorator_extractor.rb +253 -0
  86. data/lib/woods/extractors/engine_extractor.rb +223 -0
  87. data/lib/woods/extractors/event_extractor.rb +211 -0
  88. data/lib/woods/extractors/factory_extractor.rb +289 -0
  89. data/lib/woods/extractors/graphql_extractor.rb +892 -0
  90. data/lib/woods/extractors/i18n_extractor.rb +117 -0
  91. data/lib/woods/extractors/job_extractor.rb +374 -0
  92. data/lib/woods/extractors/lib_extractor.rb +218 -0
  93. data/lib/woods/extractors/mailer_extractor.rb +269 -0
  94. data/lib/woods/extractors/manager_extractor.rb +188 -0
  95. data/lib/woods/extractors/middleware_extractor.rb +133 -0
  96. data/lib/woods/extractors/migration_extractor.rb +469 -0
  97. data/lib/woods/extractors/model_extractor.rb +988 -0
  98. data/lib/woods/extractors/phlex_extractor.rb +252 -0
  99. data/lib/woods/extractors/policy_extractor.rb +191 -0
  100. data/lib/woods/extractors/poro_extractor.rb +229 -0
  101. data/lib/woods/extractors/pundit_extractor.rb +223 -0
  102. data/lib/woods/extractors/rails_source_extractor.rb +473 -0
  103. data/lib/woods/extractors/rake_task_extractor.rb +343 -0
  104. data/lib/woods/extractors/route_extractor.rb +181 -0
  105. data/lib/woods/extractors/scheduled_job_extractor.rb +331 -0
  106. data/lib/woods/extractors/serializer_extractor.rb +339 -0
  107. data/lib/woods/extractors/service_extractor.rb +217 -0
  108. data/lib/woods/extractors/shared_dependency_scanner.rb +91 -0
  109. data/lib/woods/extractors/shared_utility_methods.rb +281 -0
  110. data/lib/woods/extractors/state_machine_extractor.rb +398 -0
  111. data/lib/woods/extractors/test_mapping_extractor.rb +225 -0
  112. data/lib/woods/extractors/validator_extractor.rb +211 -0
  113. data/lib/woods/extractors/view_component_extractor.rb +311 -0
  114. data/lib/woods/extractors/view_template_extractor.rb +261 -0
  115. data/lib/woods/feedback/gap_detector.rb +89 -0
  116. data/lib/woods/feedback/store.rb +119 -0
  117. data/lib/woods/filename_utils.rb +32 -0
  118. data/lib/woods/flow_analysis/operation_extractor.rb +206 -0
  119. data/lib/woods/flow_analysis/response_code_mapper.rb +154 -0
  120. data/lib/woods/flow_assembler.rb +290 -0
  121. data/lib/woods/flow_document.rb +191 -0
  122. data/lib/woods/flow_precomputer.rb +102 -0
  123. data/lib/woods/formatting/base.rb +30 -0
  124. data/lib/woods/formatting/claude_adapter.rb +98 -0
  125. data/lib/woods/formatting/generic_adapter.rb +56 -0
  126. data/lib/woods/formatting/gpt_adapter.rb +64 -0
  127. data/lib/woods/formatting/human_adapter.rb +78 -0
  128. data/lib/woods/graph_analyzer.rb +374 -0
  129. data/lib/woods/mcp/bootstrapper.rb +96 -0
  130. data/lib/woods/mcp/index_reader.rb +394 -0
  131. data/lib/woods/mcp/renderers/claude_renderer.rb +81 -0
  132. data/lib/woods/mcp/renderers/json_renderer.rb +17 -0
  133. data/lib/woods/mcp/renderers/markdown_renderer.rb +353 -0
  134. data/lib/woods/mcp/renderers/plain_renderer.rb +240 -0
  135. data/lib/woods/mcp/server.rb +962 -0
  136. data/lib/woods/mcp/tool_response_renderer.rb +85 -0
  137. data/lib/woods/model_name_cache.rb +51 -0
  138. data/lib/woods/notion/client.rb +217 -0
  139. data/lib/woods/notion/exporter.rb +219 -0
  140. data/lib/woods/notion/mapper.rb +40 -0
  141. data/lib/woods/notion/mappers/column_mapper.rb +57 -0
  142. data/lib/woods/notion/mappers/migration_mapper.rb +39 -0
  143. data/lib/woods/notion/mappers/model_mapper.rb +161 -0
  144. data/lib/woods/notion/mappers/shared.rb +22 -0
  145. data/lib/woods/notion/rate_limiter.rb +68 -0
  146. data/lib/woods/observability/health_check.rb +79 -0
  147. data/lib/woods/observability/instrumentation.rb +34 -0
  148. data/lib/woods/observability/structured_logger.rb +57 -0
  149. data/lib/woods/operator/error_escalator.rb +81 -0
  150. data/lib/woods/operator/pipeline_guard.rb +92 -0
  151. data/lib/woods/operator/status_reporter.rb +80 -0
  152. data/lib/woods/railtie.rb +38 -0
  153. data/lib/woods/resilience/circuit_breaker.rb +99 -0
  154. data/lib/woods/resilience/index_validator.rb +167 -0
  155. data/lib/woods/resilience/retryable_provider.rb +108 -0
  156. data/lib/woods/retrieval/context_assembler.rb +261 -0
  157. data/lib/woods/retrieval/query_classifier.rb +133 -0
  158. data/lib/woods/retrieval/ranker.rb +277 -0
  159. data/lib/woods/retrieval/search_executor.rb +316 -0
  160. data/lib/woods/retriever.rb +152 -0
  161. data/lib/woods/ruby_analyzer/class_analyzer.rb +170 -0
  162. data/lib/woods/ruby_analyzer/dataflow_analyzer.rb +77 -0
  163. data/lib/woods/ruby_analyzer/fqn_builder.rb +18 -0
  164. data/lib/woods/ruby_analyzer/mermaid_renderer.rb +280 -0
  165. data/lib/woods/ruby_analyzer/method_analyzer.rb +143 -0
  166. data/lib/woods/ruby_analyzer/trace_enricher.rb +143 -0
  167. data/lib/woods/ruby_analyzer.rb +87 -0
  168. data/lib/woods/session_tracer/file_store.rb +104 -0
  169. data/lib/woods/session_tracer/middleware.rb +143 -0
  170. data/lib/woods/session_tracer/redis_store.rb +106 -0
  171. data/lib/woods/session_tracer/session_flow_assembler.rb +254 -0
  172. data/lib/woods/session_tracer/session_flow_document.rb +223 -0
  173. data/lib/woods/session_tracer/solid_cache_store.rb +139 -0
  174. data/lib/woods/session_tracer/store.rb +81 -0
  175. data/lib/woods/storage/graph_store.rb +120 -0
  176. data/lib/woods/storage/metadata_store.rb +196 -0
  177. data/lib/woods/storage/pgvector.rb +195 -0
  178. data/lib/woods/storage/qdrant.rb +205 -0
  179. data/lib/woods/storage/vector_store.rb +167 -0
  180. data/lib/woods/temporal/json_snapshot_store.rb +245 -0
  181. data/lib/woods/temporal/snapshot_store.rb +345 -0
  182. data/lib/woods/token_utils.rb +19 -0
  183. data/lib/woods/version.rb +5 -0
  184. data/lib/woods.rb +246 -0
  185. metadata +270 -0
@@ -0,0 +1,77 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../ast/parser'
4
+ require_relative '../ast/call_site_extractor'
5
+
6
+ module Woods
7
+ module RubyAnalyzer
8
+ # Annotates existing ExtractedUnit objects with data transformation metadata.
9
+ #
10
+ # Conservative v1: detects common data transformation patterns by scanning
11
+ # for specific method calls that indicate construction, serialization, or
12
+ # deserialization.
13
+ #
14
+ # @example
15
+ # analyzer = RubyAnalyzer::DataFlowAnalyzer.new
16
+ # analyzer.annotate(units)
17
+ # units.first.metadata[:data_transformations]
18
+ # #=> [{ method: "to_json", category: :serialization, line: 5 }]
19
+ #
20
+ class DataFlowAnalyzer
21
+ CONSTRUCTION_METHODS = %w[new].freeze
22
+ SERIALIZATION_METHODS = %w[to_h to_json to_a serialize as_json].freeze
23
+ DESERIALIZATION_METHODS = %w[from_json parse].freeze
24
+ CATEGORY_BY_METHOD = [
25
+ *CONSTRUCTION_METHODS.map { |m| [m, :construction] },
26
+ *SERIALIZATION_METHODS.map { |m| [m, :serialization] },
27
+ *DESERIALIZATION_METHODS.map { |m| [m, :deserialization] }
28
+ ].to_h.freeze
29
+
30
+ # @param parser [Ast::Parser, nil] Parser instance (creates default if nil)
31
+ def initialize(parser: nil)
32
+ @parser = parser || Ast::Parser.new
33
+ @call_site_extractor = Ast::CallSiteExtractor.new
34
+ end
35
+
36
+ # Annotate units with data transformation metadata.
37
+ #
38
+ # Mutates each unit's metadata hash by adding a :data_transformations key.
39
+ #
40
+ # @param units [Array<ExtractedUnit>] Units to annotate
41
+ # @return [Array<ExtractedUnit>] The same units, now annotated
42
+ def annotate(units)
43
+ units.each do |unit|
44
+ next unless unit.source_code
45
+
46
+ transformations = detect_transformations(unit.source_code)
47
+ unit.metadata[:data_transformations] = transformations
48
+ end
49
+ end
50
+
51
+ private
52
+
53
+ def detect_transformations(source)
54
+ root = @parser.parse(source)
55
+ calls = @call_site_extractor.extract(root)
56
+
57
+ calls.filter_map do |call|
58
+ category = categorize(call[:method_name])
59
+ next unless category
60
+
61
+ {
62
+ method: call[:method_name],
63
+ category: category,
64
+ receiver: call[:receiver],
65
+ line: call[:line]
66
+ }
67
+ end
68
+ rescue Woods::ExtractionError
69
+ []
70
+ end
71
+
72
+ def categorize(method_name)
73
+ CATEGORY_BY_METHOD[method_name]
74
+ end
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Woods
4
+ module RubyAnalyzer
5
+ # Shared helper for building fully qualified names from a name and namespace stack.
6
+ module FqnBuilder
7
+ private
8
+
9
+ def build_fqn(name, namespace_stack)
10
+ if namespace_stack.empty?
11
+ name
12
+ else
13
+ "#{namespace_stack.join('::')}::#{name}"
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,280 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
5
+ module Woods
6
+ module RubyAnalyzer
7
+ # Renders Mermaid-format diagrams from extracted units, dependency graphs,
8
+ # and graph analysis data.
9
+ #
10
+ # Produces valid Mermaid markdown strings for call graphs, dependency maps,
11
+ # dataflow charts, and combined architecture documents.
12
+ #
13
+ # @example Rendering a call graph
14
+ # renderer = MermaidRenderer.new
15
+ # units = RubyAnalyzer.analyze(paths: ["lib/"])
16
+ # puts renderer.render_call_graph(units)
17
+ #
18
+ class MermaidRenderer
19
+ # Render a call graph from extracted units showing method call relationships.
20
+ #
21
+ # Each unit with dependencies produces edges to its targets. Nodes are
22
+ # styled by type (class, module, method).
23
+ #
24
+ # @param units [Array<ExtractedUnit>] Units to render
25
+ # @return [String] Mermaid graph TD markdown
26
+ def render_call_graph(units)
27
+ lines = ['graph TD']
28
+ return lines.join("\n") if units.nil? || units.empty?
29
+
30
+ seen_nodes = Set.new
31
+ seen_edges = Set.new
32
+
33
+ units.each do |unit|
34
+ node_id = sanitize_id(unit.identifier)
35
+ lines << " #{node_id}[\"#{escape_label(unit.identifier)}\"]" if seen_nodes.add?(node_id)
36
+
37
+ (unit.dependencies || []).each do |dep|
38
+ target = dep[:target] || dep['target']
39
+ next unless target
40
+
41
+ target_id = sanitize_id(target)
42
+ lines << " #{target_id}[\"#{escape_label(target)}\"]" if seen_nodes.add?(target_id)
43
+
44
+ via = dep[:via] || dep['via']
45
+ edge_key = "#{node_id}->#{target_id}"
46
+ next unless seen_edges.add?(edge_key)
47
+
48
+ lines << if via
49
+ " #{node_id} -->|#{via}| #{target_id}"
50
+ else
51
+ " #{node_id} --> #{target_id}"
52
+ end
53
+ end
54
+ end
55
+
56
+ lines.join("\n")
57
+ end
58
+
59
+ # Render a dependency map from graph data (as returned by DependencyGraph#to_h).
60
+ #
61
+ # Shows nodes grouped by type with edges representing dependencies.
62
+ #
63
+ # @param graph_data [Hash] Serialized graph data with :nodes and :edges keys
64
+ # @return [String] Mermaid graph TD markdown
65
+ def render_dependency_map(graph_data)
66
+ lines = ['graph TD']
67
+ return lines.join("\n") unless graph_data
68
+
69
+ nodes = graph_data[:nodes] || graph_data['nodes'] || {}
70
+ edges = graph_data[:edges] || graph_data['edges'] || {}
71
+
72
+ return lines.join("\n") if nodes.empty?
73
+
74
+ # Group nodes by type for subgraph rendering
75
+ by_type = {}
76
+ nodes.each do |identifier, meta|
77
+ type = (meta[:type] || meta['type'])&.to_sym || :unknown
78
+ by_type[type] ||= []
79
+ by_type[type] << identifier
80
+ end
81
+
82
+ # Render subgraphs per type
83
+ by_type.each do |type, identifiers|
84
+ lines << " subgraph #{type}"
85
+ identifiers.each do |id|
86
+ node_id = sanitize_id(id)
87
+ lines << " #{node_id}[\"#{escape_label(id)}\"]"
88
+ end
89
+ lines << ' end'
90
+ end
91
+
92
+ # Render edges
93
+ seen_edges = Set.new
94
+ edges.each do |source, targets|
95
+ Array(targets).each do |target|
96
+ next unless nodes.key?(target)
97
+
98
+ edge_key = "#{sanitize_id(source)}->#{sanitize_id(target)}"
99
+ next unless seen_edges.add?(edge_key)
100
+
101
+ lines << " #{sanitize_id(source)} --> #{sanitize_id(target)}"
102
+ end
103
+ end
104
+
105
+ lines.join("\n")
106
+ end
107
+
108
+ # Render a dataflow diagram from units that have data_transformations metadata.
109
+ #
110
+ # Shows transformation chains: which units construct, serialize, or
111
+ # deserialize data, with edges flowing between them.
112
+ #
113
+ # @param units [Array<ExtractedUnit>] Units with :data_transformations metadata
114
+ # @return [String] Mermaid flowchart TD markdown
115
+ def render_dataflow(units)
116
+ lines = ['flowchart TD']
117
+ return lines.join("\n") if units.nil? || units.empty?
118
+
119
+ seen_nodes = Set.new
120
+
121
+ units.each do |unit|
122
+ transformations = unit.metadata[:data_transformations] || unit.metadata['data_transformations']
123
+ next unless transformations.is_a?(Array) && transformations.any?
124
+
125
+ node_id = sanitize_id(unit.identifier)
126
+ if seen_nodes.add?(node_id)
127
+ shape = dataflow_shape(transformations)
128
+ lines << " #{node_id}#{shape}"
129
+ end
130
+
131
+ transformations.each do |t|
132
+ receiver = t[:receiver] || t['receiver']
133
+ next unless receiver
134
+
135
+ receiver_id = sanitize_id(receiver)
136
+ category = (t[:category] || t['category'])&.to_s
137
+ method_name = t[:method] || t['method']
138
+
139
+ lines << " #{receiver_id}[\"#{escape_label(receiver)}\"]" if seen_nodes.add?(receiver_id)
140
+
141
+ label = [category, method_name].compact.join(': ')
142
+ lines << " #{node_id} -->|#{label}| #{receiver_id}"
143
+ end
144
+ end
145
+
146
+ lines.join("\n")
147
+ end
148
+
149
+ # Render a combined architecture document with all three diagram types.
150
+ #
151
+ # Returns a markdown document with headers and fenced Mermaid code blocks
152
+ # for call graph, dependency map, and dataflow diagrams, plus a summary
153
+ # of graph analysis findings.
154
+ #
155
+ # @param units [Array<ExtractedUnit>] Extracted units
156
+ # @param graph_data [Hash] Serialized dependency graph data
157
+ # @param analysis [Hash] Graph analysis report from GraphAnalyzer#analyze
158
+ # @return [String] Combined markdown document
159
+ def render_architecture(units, graph_data, analysis)
160
+ sections = []
161
+
162
+ sections << '# Architecture Overview'
163
+ sections << ''
164
+
165
+ # Call graph
166
+ sections << '## Call Graph'
167
+ sections << ''
168
+ sections << '```mermaid'
169
+ sections << render_call_graph(units)
170
+ sections << '```'
171
+ sections << ''
172
+
173
+ # Dependency map
174
+ sections << '## Dependency Map'
175
+ sections << ''
176
+ sections << '```mermaid'
177
+ sections << render_dependency_map(graph_data)
178
+ sections << '```'
179
+ sections << ''
180
+
181
+ # Dataflow
182
+ sections << '## Data Flow'
183
+ sections << ''
184
+ sections << '```mermaid'
185
+ sections << render_dataflow(units)
186
+ sections << '```'
187
+ sections << ''
188
+
189
+ # Analysis summary
190
+ sections << '## Analysis Summary'
191
+ sections << ''
192
+ sections.concat(render_stats_section(analysis))
193
+
194
+ sections.join("\n")
195
+ end
196
+
197
+ private
198
+
199
+ # Render the Analysis Summary section lines for a given analysis hash.
200
+ #
201
+ # @param analysis [Hash, nil] Graph analysis report from GraphAnalyzer#analyze
202
+ # @return [Array<String>] Lines to append to the architecture document
203
+ def render_stats_section(analysis)
204
+ lines = []
205
+ return lines unless analysis
206
+
207
+ stats = analysis[:stats] || analysis['stats'] || {}
208
+ lines << "- **Orphans:** #{stats[:orphan_count] || stats['orphan_count'] || 0}"
209
+ lines << "- **Dead ends:** #{stats[:dead_end_count] || stats['dead_end_count'] || 0}"
210
+ lines << "- **Hubs:** #{stats[:hub_count] || stats['hub_count'] || 0}"
211
+ lines << "- **Cycles:** #{stats[:cycle_count] || stats['cycle_count'] || 0}"
212
+
213
+ hubs = analysis[:hubs] || analysis['hubs'] || []
214
+ lines.concat(render_hubs_section(hubs))
215
+
216
+ cycles = analysis[:cycles] || analysis['cycles'] || []
217
+ if cycles.any?
218
+ lines << ''
219
+ lines << '### Cycles'
220
+ lines << ''
221
+ cycles.each { |cycle| lines << "- #{cycle.join(' -> ')}" }
222
+ end
223
+
224
+ lines
225
+ end
226
+
227
+ # Render the Top Hubs subsection lines.
228
+ #
229
+ # @param hubs [Array<Hash>] Hub entries with :identifier and :dependent_count keys
230
+ # @return [Array<String>] Lines to append, or empty array if no hubs
231
+ def render_hubs_section(hubs)
232
+ return [] unless hubs.any?
233
+
234
+ lines = ['', '### Top Hubs', '']
235
+ hubs.first(5).each do |hub|
236
+ id = hub[:identifier] || hub['identifier']
237
+ count = hub[:dependent_count] || hub['dependent_count']
238
+ lines << "- #{id} (#{count} dependents)"
239
+ end
240
+ lines
241
+ end
242
+
243
+ # Sanitize an identifier for use as a Mermaid node ID.
244
+ #
245
+ # Replaces characters that Mermaid cannot use in node IDs with underscores.
246
+ #
247
+ # @param identifier [String] Raw identifier
248
+ # @return [String] Safe Mermaid node ID
249
+ def sanitize_id(identifier)
250
+ identifier.to_s.gsub(/[^a-zA-Z0-9_]/, '_')
251
+ end
252
+
253
+ # Escape a label string for use inside Mermaid quoted labels.
254
+ #
255
+ # @param label [String] Raw label text
256
+ # @return [String] Escaped label
257
+ def escape_label(label)
258
+ label.to_s.gsub('"', '&quot;')
259
+ end
260
+
261
+ # Determine Mermaid node shape based on dominant transformation category.
262
+ #
263
+ # @param transformations [Array<Hash>] Transformation metadata
264
+ # @return [String] Mermaid shape syntax
265
+ def dataflow_shape(transformations)
266
+ categories = transformations.map { |t| (t[:category] || t['category'])&.to_sym }
267
+
268
+ if categories.include?(:construction)
269
+ "([\"#{escape_label(transformations.first[:method] || 'new')}\"])"
270
+ elsif categories.include?(:serialization)
271
+ '[/"serialization"/]'
272
+ elsif categories.include?(:deserialization)
273
+ '[\"deserialization"\\]'
274
+ else
275
+ '["data"]'
276
+ end
277
+ end
278
+ end
279
+ end
280
+ end
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../ast/parser'
4
+ require_relative '../ast/method_extractor'
5
+ require_relative '../ast/call_site_extractor'
6
+ require_relative '../extracted_unit'
7
+ require_relative 'fqn_builder'
8
+
9
+ module Woods
10
+ module RubyAnalyzer
11
+ # Extracts method-level units from Ruby source code.
12
+ #
13
+ # For each class/module, extracts methods as ExtractedUnit objects with type
14
+ # :ruby_method. Includes visibility, parameters, call graph, and dependencies.
15
+ #
16
+ # @example
17
+ # analyzer = RubyAnalyzer::MethodAnalyzer.new
18
+ # units = analyzer.analyze(source: File.read(path), file_path: path)
19
+ # units.first.identifier #=> "MyClass#my_method"
20
+ #
21
+ class MethodAnalyzer
22
+ include FqnBuilder
23
+
24
+ # @param parser [Ast::Parser, nil] Parser instance (creates default if nil)
25
+ def initialize(parser: nil)
26
+ @parser = parser || Ast::Parser.new
27
+ @call_site_extractor = Ast::CallSiteExtractor.new
28
+ end
29
+
30
+ # Analyze source code and extract method units.
31
+ #
32
+ # @param source [String] Ruby source code
33
+ # @param file_path [String] Absolute path to the source file
34
+ # @return [Array<ExtractedUnit>] Extracted method units
35
+ def analyze(source:, file_path:)
36
+ root = @parser.parse(source)
37
+ units = []
38
+ extract_methods_from_tree(root, source, file_path, [], units)
39
+ units
40
+ end
41
+
42
+ private
43
+
44
+ def extract_methods_from_tree(node, source, file_path, namespace_stack, units)
45
+ return unless node.is_a?(Ast::Node)
46
+
47
+ case node.type
48
+ when :class
49
+ process_container_methods(node, :class, source, file_path, namespace_stack, units)
50
+ when :module
51
+ process_container_methods(node, :module, source, file_path, namespace_stack, units)
52
+ else
53
+ (node.children || []).each do |child|
54
+ extract_methods_from_tree(child, source, file_path, namespace_stack, units)
55
+ end
56
+ end
57
+ end
58
+
59
+ def process_container_methods(node, type, source, file_path, namespace_stack, units)
60
+ name = node.method_name
61
+ fqn = build_fqn(name, namespace_stack)
62
+ body_offset = type == :class ? 2 : 1
63
+ body_children = (node.children || [])[body_offset..] || []
64
+
65
+ visibility_tracker = VisibilityTracker.new
66
+ inner_ns = namespace_stack + [name]
67
+
68
+ body_children.each do |child|
69
+ next unless child.is_a?(Ast::Node)
70
+
71
+ case child.type
72
+ when :send
73
+ visibility_tracker.process_send(child)
74
+ when :def
75
+ units << build_method_unit(child, fqn, '#', visibility_tracker.current, file_path)
76
+ when :defs
77
+ units << build_method_unit(child, fqn, '.', :public, file_path)
78
+ when :class, :module
79
+ extract_methods_from_tree(child, source, file_path, inner_ns, units)
80
+ end
81
+ end
82
+ end
83
+
84
+ def build_method_unit(method_node, class_fqn, separator, visibility, file_path)
85
+ identifier = "#{class_fqn}#{separator}#{method_node.method_name}"
86
+ call_graph = extract_call_graph(method_node)
87
+ dependencies = build_dependencies(call_graph)
88
+ unit = ExtractedUnit.new(type: :ruby_method, identifier: identifier, file_path: file_path)
89
+ unit.namespace = class_fqn
90
+ unit.source_code = method_node.source
91
+ unit.metadata = {
92
+ visibility: visibility,
93
+ call_graph: call_graph
94
+ }
95
+ unit.dependencies = dependencies
96
+ unit
97
+ end
98
+
99
+ def extract_call_graph(method_node)
100
+ calls = @call_site_extractor.extract(method_node)
101
+ calls.filter_map do |call|
102
+ next unless call[:receiver]
103
+ # Only include calls with a capitalized receiver (likely a class/constant)
104
+ next unless call[:receiver].match?(/\A[A-Z]/)
105
+
106
+ {
107
+ target: call[:receiver],
108
+ method: call[:method_name],
109
+ line: call[:line]
110
+ }
111
+ end
112
+ end
113
+
114
+ def build_dependencies(call_graph)
115
+ call_graph.map { |c| c[:target] }.uniq.map do |target|
116
+ { type: :ruby_class, target: target, via: :method_call }
117
+ end
118
+ end
119
+
120
+ # Tracks visibility state as we walk through class body statements.
121
+ class VisibilityTracker
122
+ VISIBILITY_METHODS = %w[private protected public].freeze
123
+
124
+ attr_reader :current
125
+
126
+ def initialize
127
+ @current = :public
128
+ end
129
+
130
+ # Process a send node that might be a visibility modifier.
131
+ def process_send(send_node)
132
+ return unless send_node.method_name
133
+ return unless VISIBILITY_METHODS.include?(send_node.method_name)
134
+ # Only bare calls (no receiver, no arguments) act as section modifiers
135
+ return if send_node.receiver
136
+ return if send_node.arguments && !send_node.arguments.empty?
137
+
138
+ @current = send_node.method_name.to_sym
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,143 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../extracted_unit'
4
+
5
+ module Woods
6
+ module RubyAnalyzer
7
+ # Enriches ExtractedUnit objects with runtime trace data.
8
+ #
9
+ # Two modes:
10
+ # - Recording: wraps a block with TracePoint to capture method calls
11
+ # - Merging: enriches existing units with previously collected trace data
12
+ #
13
+ # @example Recording
14
+ # trace_data = TraceEnricher.record { MyApp.run }
15
+ #
16
+ # @example Merging
17
+ # TraceEnricher.merge(units: units, trace_data: trace_data)
18
+ #
19
+ class TraceEnricher
20
+ # Record method calls during block execution using TracePoint.
21
+ #
22
+ # @yield Block to trace
23
+ # @return [Array<Hash>] Collected trace events
24
+ def self.record(&block)
25
+ traces = []
26
+
27
+ trace = TracePoint.new(:call, :return) do |tp|
28
+ traces << {
29
+ class_name: tp.defined_class&.name || tp.defined_class.to_s,
30
+ method_name: tp.method_id.to_s,
31
+ event: tp.event.to_s,
32
+ path: tp.path,
33
+ line: tp.lineno,
34
+ caller_class: extract_caller_class(tp),
35
+ caller_method: extract_caller_method(tp),
36
+ return_class: tp.event == :return ? safe_return_class(tp) : nil
37
+ }
38
+ end
39
+
40
+ trace.enable(&block)
41
+ traces
42
+ end
43
+
44
+ # Merge trace data into existing units.
45
+ #
46
+ # Mutates each matching unit's metadata by adding a :trace key with
47
+ # call count, callers, and return types.
48
+ #
49
+ # @param units [Array<ExtractedUnit>] Units to enrich
50
+ # @param trace_data [Array<Hash>] Trace events (from recording or JSON fixture)
51
+ # @return [Array<ExtractedUnit>] The same units, now enriched
52
+ def self.merge(units:, trace_data:)
53
+ return units if trace_data.nil? || trace_data.empty?
54
+
55
+ # Index traces by class_name + method_name
56
+ grouped = group_traces(trace_data)
57
+
58
+ units.each do |unit|
59
+ class_name, method_name = parse_identifier(unit.identifier)
60
+ next unless class_name && method_name
61
+
62
+ key = "#{class_name}##{method_name}"
63
+ next unless grouped.key?(key)
64
+
65
+ traces = grouped[key]
66
+
67
+ calls = traces.select { |t| fetch_key(t, :event) == 'call' }
68
+ returns = traces.select { |t| fetch_key(t, :event) == 'return' }
69
+
70
+ callers = calls.filter_map do |t|
71
+ caller_class = fetch_key(t, :caller_class)
72
+ caller_method = fetch_key(t, :caller_method)
73
+ next unless caller_class
74
+
75
+ { 'caller_class' => caller_class, 'caller_method' => caller_method }
76
+ end
77
+
78
+ return_types = returns.filter_map do |t|
79
+ fetch_key(t, :return_class)
80
+ end.uniq
81
+
82
+ unit.metadata[:trace] = {
83
+ call_count: calls.size,
84
+ callers: callers,
85
+ return_types: return_types
86
+ }
87
+ end
88
+ end
89
+
90
+ class << self
91
+ private
92
+
93
+ def fetch_key(hash, key)
94
+ hash[key.to_s] || hash[key.to_sym]
95
+ end
96
+
97
+ def group_traces(trace_data)
98
+ grouped = Hash.new { |h, k| h[k] = [] }
99
+ trace_data.each do |trace|
100
+ class_name = fetch_key(trace, :class_name)
101
+ method_name = fetch_key(trace, :method_name)
102
+ next unless class_name && method_name
103
+
104
+ key = "#{class_name}##{method_name}"
105
+ grouped[key] << trace
106
+ end
107
+ grouped
108
+ end
109
+
110
+ def parse_identifier(identifier)
111
+ # Handle both "Class#method" and "Class.method" formats
112
+ if identifier.include?('#')
113
+ identifier.split('#', 2)
114
+ elsif identifier.include?('.')
115
+ identifier.split('.', 2)
116
+ end
117
+ end
118
+
119
+ def extract_caller_class(tp)
120
+ binding_obj = tp.binding
121
+ receiver = binding_obj.receiver
122
+ receiver.is_a?(Class) || receiver.is_a?(Module) ? receiver.name : receiver.class.name
123
+ rescue StandardError
124
+ nil
125
+ end
126
+
127
+ def extract_caller_method(_tp)
128
+ # TracePoint doesn't directly expose caller method,
129
+ # but we can get it from the call stack
130
+ caller_locations(3, 1)&.first&.label
131
+ rescue StandardError
132
+ nil
133
+ end
134
+
135
+ def safe_return_class(tp)
136
+ tp.return_value.class.name
137
+ rescue StandardError
138
+ nil
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end