prism-merge 1.0.3 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,120 +1,86 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "prism"
4
-
5
3
  module Prism
6
4
  module Merge
7
- # Comprehensive metadata capture for a Ruby file being merged.
8
- # Tracks Prism parse result, line-to-node mapping, comment associations,
9
- # structural signatures, and sequential anchor lines for merge alignment.
5
+ # Simplified file analysis using Prism's native comment attachment.
6
+ # This version leverages parse_result.attach_comments! to automatically
7
+ # attach comments to nodes, eliminating the need for manual comment tracking
8
+ # and the CommentNode class.
9
+ #
10
+ # Key improvements over V1:
11
+ # - Uses Prism's native node.location.leading_comments and trailing_comments
12
+ # - No manual comment tracking or CommentNode class
13
+ # - Simpler freeze block extraction via comment scanning
14
+ # - Better performance (one attach_comments! call vs multiple iterations)
15
+ # - Enhanced freeze block validation (detects partial nodes and non-class/module contexts)
10
16
  class FileAnalysis
11
- # Regex pattern for freeze block start marker.
12
- # Matches comments like: # kettle-dev:freeze
13
- # Case-insensitive to allow variations like FREEZE or Freeze
14
- FREEZE_START = /#\s*kettle-dev:freeze/i
15
-
16
- # Regex pattern for freeze block end marker.
17
- # Matches comments like: # kettle-dev:unfreeze
18
- # Case-insensitive to allow variations like UNFREEZE or Unfreeze
19
- FREEZE_END = /#\s*kettle-dev:unfreeze/i
20
-
21
- # Combined regex pattern for matching complete freeze blocks.
22
- # Captures content between freeze/unfreeze markers (inclusive).
23
- # Used to identify sections that should always be preserved from destination.
17
+ # Default freeze token for identifying freeze blocks
18
+ DEFAULT_FREEZE_TOKEN = "prism-merge"
19
+
20
+ # @return [Prism::ParseResult] The parse result from Prism
21
+ attr_reader :parse_result
22
+
23
+ # @return [String] Source code content
24
+ attr_reader :source
25
+
26
+ # @return [Array<String>] Lines of source code
27
+ attr_reader :lines
28
+
29
+ # @return [String] Token used to mark freeze blocks
30
+ attr_reader :freeze_token
31
+
32
+ # @return [Proc, nil] Custom signature generator
33
+ attr_reader :signature_generator
34
+
35
+ # Initialize file analysis with Prism's native comment handling
24
36
  #
25
- # @example Freeze block in Ruby code
26
- # # kettle-dev:freeze
27
- # CUSTOM_CONFIG = { key: "secret" }
28
- # # kettle-dev:unfreeze
29
- FREEZE_BLOCK = Regexp.new("(#{FREEZE_START.source}).*?(#{FREEZE_END.source})", Regexp::IGNORECASE | Regexp::MULTILINE)
30
-
31
- attr_reader :content, :parse_result, :lines, :statements, :freeze_blocks
32
-
33
- # @param content [String] Ruby source code to analyze
34
- # @param signature_generator [Proc, nil] Optional proc to generate node signatures
35
- def initialize(content, signature_generator: nil)
36
- @content = content
37
- @lines = content.lines
38
- @parse_result = Prism.parse(content)
39
- @statements = extract_statements
40
- @freeze_blocks = extract_freeze_blocks
37
+ # @param source [String] Ruby source code to analyze
38
+ # @param freeze_token [String] Token for freeze block markers (default: "prism-merge")
39
+ # @param signature_generator [Proc, nil] Custom signature generator
40
+ def initialize(source, freeze_token: DEFAULT_FREEZE_TOKEN, signature_generator: nil)
41
+ @source = source
42
+ @lines = source.lines
43
+ @freeze_token = freeze_token
41
44
  @signature_generator = signature_generator
42
- @line_to_node_map = nil
43
- @node_to_line_map = nil
44
- @comment_map = nil
45
- end
45
+ @parse_result = Prism.parse(source)
46
46
 
47
- # Check if parsing was successful
48
- # @return [Boolean]
49
- def valid?
50
- @parse_result.success?
51
- end
47
+ # Use Prism's native comment attachment
48
+ # On JRuby, the Comments class may not be loaded yet, so we need to require it
49
+ attach_comments_safely!
52
50
 
53
- # Get all top-level statement nodes
54
- # @return [Array<Prism::Node>]
55
- def extract_statements
56
- return [] unless valid?
57
- body = @parse_result.value.statements
58
- return [] unless body
51
+ # Extract and validate structure
52
+ @statements = extract_and_integrate_all_nodes
59
53
 
60
- if body.is_a?(Prism::StatementsNode)
61
- body.body.compact
62
- else
63
- [body].compact
64
- end
54
+ DebugLogger.debug("FileAnalysis initialized", {
55
+ signature_generator: signature_generator ? "custom" : "default",
56
+ statements_count: @statements.size,
57
+ freeze_blocks: freeze_blocks.size,
58
+ }) if defined?(DebugLogger)
65
59
  end
66
60
 
67
- # Extract freeze block information
68
- # @return [Array<Hash>] Array of freeze block metadata
69
- def extract_freeze_blocks
70
- return [] unless content.match?(FREEZE_START)
71
-
72
- blocks = []
73
- content.to_enum(:scan, FREEZE_BLOCK).each do
74
- match = Regexp.last_match
75
- next unless match
76
-
77
- start_idx = match.begin(0)
78
- end_idx = match.end(0)
79
- segment = match[0]
80
- start_line = content[0...start_idx].count("\n") + 1
81
- end_line = content[0...end_idx].count("\n") + 1
82
-
83
- blocks << {
84
- range: start_idx...end_idx,
85
- line_range: start_line..end_line,
86
- text: segment,
87
- start_marker: segment&.lines&.first&.strip,
88
- }
89
- end
90
-
91
- blocks
61
+ # Check if parse was successful
62
+ # @return [Boolean]
63
+ def valid?
64
+ @parse_result.success?
92
65
  end
93
66
 
94
- # Build mapping from line numbers to AST nodes
95
- # @return [Hash<Integer, Array<Prism::Node>>] Line number => nodes on that line
96
- def line_to_node_map
97
- @line_to_node_map ||= build_line_to_node_map
98
- end
67
+ # Get all statements (code nodes outside freeze blocks + FreezeNodes)
68
+ # @return [Array<Prism::Node, FreezeNode>]
69
+ attr_reader :statements
99
70
 
100
- # Build mapping from nodes to line ranges
101
- # @return [Hash<Prism::Node, Range>] Node => line range
102
- def node_to_line_map
103
- @node_to_line_map ||= build_node_to_line_map
71
+ # Get freeze blocks
72
+ # @return [Array<FreezeNode>]
73
+ def freeze_blocks
74
+ @statements.select { |node| node.is_a?(FreezeNode) }
104
75
  end
105
76
 
106
77
  # Get nodes with their associated comments and metadata
78
+ # Comments are now accessed via Prism's native node.location API
107
79
  # @return [Array<Hash>] Array of node info hashes
108
80
  def nodes_with_comments
109
81
  @nodes_with_comments ||= extract_nodes_with_comments
110
82
  end
111
83
 
112
- # Get comment map by line number
113
- # @return [Hash<Integer, Array<Prism::Comment>>] Line number => comments
114
- def comment_map
115
- @comment_map ||= build_comment_map
116
- end
117
-
118
84
  # Get structural signature for a statement at given index
119
85
  # @param index [Integer] Statement index
120
86
  # @return [Array, nil] Signature array
@@ -127,25 +93,33 @@ module Prism
127
93
  # @param node [Prism::Node] Node to generate signature for
128
94
  # @return [Array, nil] Signature array
129
95
  def generate_signature(node)
130
- if @signature_generator
96
+ result = if @signature_generator
131
97
  @signature_generator.call(node)
132
98
  else
133
- default_signature(node)
99
+ compute_node_signature(node)
134
100
  end
101
+
102
+ DebugLogger.debug("Generated signature", {
103
+ node_type: node.class.name.split("::").last,
104
+ signature: result,
105
+ generator: @signature_generator ? "custom" : "default",
106
+ }) if defined?(DebugLogger) && result
107
+
108
+ result
135
109
  end
136
110
 
137
111
  # Check if a line is within a freeze block
138
112
  # @param line_num [Integer] 1-based line number
139
113
  # @return [Boolean]
140
114
  def in_freeze_block?(line_num)
141
- freeze_blocks.any? { |block| block[:line_range].cover?(line_num) }
115
+ freeze_blocks.any? { |freeze_node| freeze_node.location.cover?(line_num) }
142
116
  end
143
117
 
144
118
  # Get the freeze block containing the given line, if any
145
119
  # @param line_num [Integer] 1-based line number
146
- # @return [Hash, nil] Freeze block metadata or nil
120
+ # @return [FreezeNode, nil] Freeze block node or nil
147
121
  def freeze_block_at(line_num)
148
- freeze_blocks.find { |block| block[:line_range].cover?(line_num) }
122
+ freeze_blocks.find { |freeze_node| freeze_node.location.cover?(line_num) }
149
123
  end
150
124
 
151
125
  # Get normalized line content (stripped)
@@ -166,131 +140,228 @@ module Prism
166
140
 
167
141
  private
168
142
 
169
- def build_line_to_node_map
170
- map = Hash.new { |h, k| h[k] = [] }
171
- return map unless valid?
172
-
173
- statements.each do |node|
174
- start_line = node.location.start_line
175
- end_line = node.location.end_line
176
- (start_line..end_line).each do |line_num|
177
- map[line_num] << node
178
- end
143
+ # Safely attach comments to nodes, handling JRuby compatibility issues
144
+ # On JRuby, the Prism::ParseResult::Comments class may not be autoloaded,
145
+ # so we need to explicitly require it
146
+ def attach_comments_safely!
147
+ @parse_result.attach_comments!
148
+ rescue NameError => e
149
+ if e.message.include?("Comments")
150
+ # On JRuby, the Comments class needs to be explicitly required
151
+ require "prism/parse_result/comments"
152
+ @parse_result.attach_comments!
153
+ else
154
+ raise
179
155
  end
180
-
181
- map
182
156
  end
183
157
 
184
- def build_node_to_line_map
185
- map = {}
186
- return map unless valid?
158
+ # Extract all nodes: freeze blocks + statements outside freeze blocks
159
+ # @return [Array<Prism::Node, FreezeNode>]
160
+ def extract_and_integrate_all_nodes
161
+ return [] unless valid?
187
162
 
188
- statements.each do |node|
189
- map[node] = node.location.start_line..node.location.end_line
163
+ body = @parse_result.value.statements
164
+ base_statements = if body.nil?
165
+ []
166
+ elsif body.is_a?(Prism::StatementsNode)
167
+ body.body.compact
168
+ else
169
+ [body].compact
190
170
  end
191
171
 
192
- map
193
- end
172
+ # Extract freeze blocks by scanning comments for markers
173
+ freeze_nodes = extract_freeze_nodes(base_statements)
194
174
 
195
- def extract_nodes_with_comments
196
- return [] unless valid?
175
+ # Filter out statements inside freeze blocks
176
+ statements_outside_freeze = filter_statements_outside_freeze(base_statements, freeze_nodes)
197
177
 
198
- statements.map.with_index do |stmt, idx|
199
- prev_stmt = (idx > 0) ? statements[idx - 1] : nil
200
- body_node = @parse_result.value.statements
201
-
202
- {
203
- node: stmt,
204
- index: idx,
205
- leading_comments: find_leading_comments(stmt, prev_stmt, body_node),
206
- inline_comments: inline_comments_for_node(stmt),
207
- signature: generate_signature(stmt),
208
- line_range: stmt.location.start_line..stmt.location.end_line,
209
- }
178
+ # Combine and sort by line number
179
+ all_nodes = (statements_outside_freeze + freeze_nodes).sort_by do |node|
180
+ node.location.start_line
210
181
  end
182
+
183
+ all_nodes
211
184
  end
212
185
 
213
- def find_leading_comments(current_stmt, prev_stmt, body_node)
214
- start_line = prev_stmt ? prev_stmt.location.end_line : 0
215
- end_line = current_stmt.location.start_line
186
+ # Extract freeze blocks by scanning for freeze/unfreeze markers in comments
187
+ # @param statements [Array<Prism::Node>] Base AST statements
188
+ # @return [Array<FreezeNode>] Freeze block nodes
189
+ def extract_freeze_nodes(statements)
190
+ # Skip freeze node extraction if no freeze token is configured
191
+ return [] unless @freeze_token
192
+
193
+ freeze_blocks = []
194
+ freeze_start_line = nil
195
+ freeze_start_pattern = /#\s*#{Regexp.escape(@freeze_token)}:freeze/i
196
+ freeze_end_pattern = /#\s*#{Regexp.escape(@freeze_token)}:unfreeze/i
216
197
 
217
- # Find all comments in the range
218
- candidates = @parse_result.comments.select do |comment|
219
- comment.location.start_line > start_line &&
220
- comment.location.start_line < end_line
198
+ # Scan all comments for freeze markers
199
+ @parse_result.comments.each do |comment|
200
+ line = comment.slice
201
+ line_num = comment.location.start_line
202
+
203
+ if line.match?(freeze_start_pattern)
204
+ if freeze_start_line
205
+ # Nested freeze blocks not allowed
206
+ raise FreezeNode::InvalidStructureError,
207
+ "Nested freeze block at line #{line_num} (previous freeze at line #{freeze_start_line})"
208
+ end
209
+ freeze_start_line = line_num
210
+ elsif line.match?(freeze_end_pattern)
211
+ unless freeze_start_line
212
+ raise FreezeNode::InvalidStructureError,
213
+ "Unfreeze marker at line #{line_num} without matching freeze marker"
214
+ end
215
+
216
+ # Find statements enclosed by this freeze block
217
+ enclosed_statements = statements.select do |stmt|
218
+ stmt.location.start_line > freeze_start_line &&
219
+ stmt.location.end_line < line_num
220
+ end
221
+
222
+ # Find all statements that overlap with this freeze block (for validation)
223
+ overlapping_statements = statements.select do |stmt|
224
+ stmt_start = stmt.location.start_line
225
+ stmt_end = stmt.location.end_line
226
+ # Overlaps if: starts before end AND ends after start
227
+ stmt_start <= line_num && stmt_end >= freeze_start_line
228
+ end
229
+
230
+ # Create freeze node (validation happens in initialize)
231
+ freeze_node = FreezeNode.new(
232
+ start_line: freeze_start_line,
233
+ end_line: line_num,
234
+ analysis: self,
235
+ nodes: enclosed_statements,
236
+ overlapping_nodes: overlapping_statements,
237
+ )
238
+
239
+ freeze_blocks << freeze_node
240
+ freeze_start_line = nil
241
+ end
221
242
  end
222
243
 
223
- # Only include comments that are immediately adjacent to the statement
224
- # (no blank lines between the comment and the statement)
225
- adjacent_comments = []
226
- expected_line = end_line - 1
244
+ # Handle unclosed freeze blocks
245
+ # If freeze block is unclosed AND at root level, it extends to end of file
246
+ # If freeze block is unclosed AND inside a nested node, it's an error
247
+ if freeze_start_line
248
+ # Check if any statement starts before freeze_start_line and ends after it
249
+ # This means the freeze is inside a nested structure (class, module, method, etc.)
250
+ nested_context = statements.any? do |stmt|
251
+ stmt.location.start_line < freeze_start_line &&
252
+ stmt.location.end_line > freeze_start_line
253
+ end
227
254
 
228
- candidates.reverse_each do |comment|
229
- comment_line = comment.location.start_line
255
+ if nested_context
256
+ raise FreezeNode::InvalidStructureError,
257
+ "Unclosed freeze block starting at line #{freeze_start_line} inside a nested structure. " \
258
+ "Freeze blocks inside classes/methods/modules must have matching unfreeze markers."
259
+ end
230
260
 
231
- # Only include if this comment is immediately adjacent (no gaps)
232
- if comment_line == expected_line
233
- adjacent_comments.unshift(comment)
234
- expected_line = comment_line - 1
235
- else
236
- # Gap found (blank line or code), stop looking
237
- break
261
+ # Root-level unclosed freeze: extends to end of file
262
+ last_line = @lines.length
263
+ enclosed_statements = statements.select do |stmt|
264
+ stmt.location.start_line > freeze_start_line &&
265
+ stmt.location.end_line <= last_line
238
266
  end
267
+
268
+ freeze_node = FreezeNode.new(
269
+ start_line: freeze_start_line,
270
+ end_line: last_line,
271
+ analysis: self,
272
+ nodes: enclosed_statements,
273
+ )
274
+
275
+ freeze_blocks << freeze_node
239
276
  end
240
277
 
241
- adjacent_comments
278
+ freeze_blocks
242
279
  end
243
280
 
244
- def inline_comments_for_node(stmt)
245
- @parse_result.comments.select do |comment|
246
- # Check if comment is on the same line as the start of the statement
247
- # and appears after the statement text begins
248
- comment.location.start_line == stmt.location.start_line &&
249
- comment.location.start_offset > stmt.location.start_offset
281
+ # Filter out statements that are inside freeze blocks
282
+ # @param statements [Array<Prism::Node>] Base statements
283
+ # @param freeze_nodes [Array<FreezeNode>] Freeze block nodes
284
+ # @return [Array<Prism::Node>] Statements outside freeze blocks
285
+ def filter_statements_outside_freeze(statements, freeze_nodes)
286
+ statements.reject do |stmt|
287
+ freeze_nodes.any? do |freeze_node|
288
+ stmt.location.start_line >= freeze_node.start_line &&
289
+ stmt.location.end_line <= freeze_node.end_line
290
+ end
250
291
  end
251
292
  end
252
293
 
253
- def build_comment_map
254
- map = Hash.new { |h, k| h[k] = [] }
255
- return map unless valid?
294
+ # Extract nodes with their comments and metadata
295
+ # Uses Prism's native comment attachment via node.location
296
+ # @return [Array<Hash>]
297
+ def extract_nodes_with_comments
298
+ return [] unless valid?
256
299
 
257
- @parse_result.comments.each do |comment|
258
- line = comment.location.start_line
259
- map[line] << comment
300
+ statements.map.with_index do |stmt, idx|
301
+ # FreezeNode doesn't have Prism location with comments
302
+ # It's a wrapper with custom Location struct
303
+ if stmt.is_a?(FreezeNode)
304
+ {
305
+ node: stmt,
306
+ index: idx,
307
+ leading_comments: [],
308
+ inline_comments: [],
309
+ signature: generate_signature(stmt),
310
+ line_range: stmt.location.start_line..stmt.location.end_line,
311
+ }
312
+ else
313
+ {
314
+ node: stmt,
315
+ index: idx,
316
+ leading_comments: stmt.location.leading_comments, # Prism native!
317
+ inline_comments: stmt.location.trailing_comments, # Prism native!
318
+ signature: generate_signature(stmt),
319
+ line_range: stmt.location.start_line..stmt.location.end_line,
320
+ }
321
+ end
260
322
  end
261
-
262
- map
263
323
  end
264
324
 
265
- # Default signature generation
266
- def default_signature(node)
267
- return [:nil] unless node
325
+ # Generate default signature for a node
326
+ # @param node [Prism::Node] Node to generate signature for
327
+ # @return [Array] Signature array [type, name, params, ...]
328
+ def compute_node_signature(node)
329
+ # IMPORTANT: Do NOT call node.signature - Prism nodes have their own signature method
330
+ # that returns [node_type_symbol, source_text] which is not what we want for matching.
331
+ # We need our own signature format: [:type_symbol, identifier, params]
268
332
 
269
- # For conditional nodes, signature should be based on the condition only,
270
- # not the body, so conditionals with same condition but different bodies
271
- # are recognized as matching
272
333
  case node
334
+ when Prism::DefNode
335
+ # Extract parameter names from ParametersNode
336
+ params = if node.parameters
337
+ param_names = []
338
+ param_names.concat(node.parameters.requireds.map(&:name)) if node.parameters.requireds
339
+ param_names.concat(node.parameters.optionals.map(&:name)) if node.parameters.optionals
340
+ param_names << node.parameters.rest.name if node.parameters.rest
341
+ param_names.concat(node.parameters.posts.map(&:name)) if node.parameters.posts
342
+ param_names.concat(node.parameters.keywords.map(&:name)) if node.parameters.keywords
343
+ param_names << node.parameters.keyword_rest.name if node.parameters.keyword_rest
344
+ param_names << node.parameters.block.name if node.parameters.block
345
+ param_names
346
+ else
347
+ []
348
+ end
349
+ [:def, node.name, params]
350
+ when Prism::ClassNode
351
+ [:class, node.constant_path.slice]
352
+ when Prism::ModuleNode
353
+ [:module, node.constant_path.slice]
354
+ when Prism::ConstantWriteNode, Prism::ConstantPathWriteNode
355
+ [:const, node.name || node.target.slice]
273
356
  when Prism::IfNode, Prism::UnlessNode
274
- condition_slice = node.predicate&.slice || ""
275
- [node.class.name.split("::").last.to_sym, condition_slice]
276
- when Prism::ConstantWriteNode, Prism::GlobalVariableWriteNode,
277
- Prism::InstanceVariableWriteNode, Prism::ClassVariableWriteNode,
278
- Prism::LocalVariableWriteNode
279
- # For variable/constant assignments, signature based on name only,
280
- # not the value, so assignments with same name but different values
281
- # are recognized as matching
282
- name = node.respond_to?(:name) ? node.name.to_s : node.slice.split("=").first.strip
283
- [node.class.name.split("::").last.to_sym, name]
284
- when Prism::CallNode
285
- # For method calls with blocks, signature based on method name and arguments only,
286
- # not the block body, so calls with same name/args but different blocks
287
- # are recognized as matching
288
- method_name = node.name.to_s
289
- # Extract just the arguments (not the block)
290
- arg_signature = node.arguments&.arguments&.map { |arg| arg.slice }&.join(", ") || ""
291
- [node.class.name.split("::").last.to_sym, method_name, arg_signature]
357
+ # Conditionals match by their condition expression
358
+ condition_source = node.predicate.slice
359
+ [node.is_a?(Prism::IfNode) ? :if : :unless, condition_source]
360
+ when FreezeNode
361
+ # FreezeNode has its own signature method with normalized content
362
+ node.signature
292
363
  else
293
- [node.class.name.split("::").last.to_sym, node.slice]
364
+ [:other, node.class.name, node.location.start_line]
294
365
  end
295
366
  end
296
367
  end