canon 0.1.21 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +43 -43
- data/README.adoc +8 -3
- data/docs/advanced/diff-pipeline.adoc +36 -9
- data/docs/features/diff-formatting/colors-and-symbols.adoc +82 -0
- data/docs/features/diff-formatting/index.adoc +12 -0
- data/docs/features/diff-formatting/themes.adoc +353 -0
- data/docs/features/environment-configuration/index.adoc +23 -0
- data/docs/internals/diff-char-range-pipeline.adoc +249 -0
- data/docs/internals/diffnode-enrichment.adoc +1 -0
- data/docs/internals/index.adoc +52 -4
- data/docs/reference/environment-variables.adoc +6 -0
- data/docs/understanding/architecture.adoc +5 -0
- data/examples/show_themes.rb +217 -0
- data/lib/canon/comparison/comparison_result.rb +9 -4
- data/lib/canon/config/env_schema.rb +3 -1
- data/lib/canon/config.rb +11 -0
- data/lib/canon/diff/diff_block.rb +7 -0
- data/lib/canon/diff/diff_block_builder.rb +2 -2
- data/lib/canon/diff/diff_char_range.rb +140 -0
- data/lib/canon/diff/diff_line.rb +42 -4
- data/lib/canon/diff/diff_line_builder.rb +907 -0
- data/lib/canon/diff/diff_node.rb +5 -1
- data/lib/canon/diff/diff_node_enricher.rb +1418 -0
- data/lib/canon/diff/diff_node_mapper.rb +54 -0
- data/lib/canon/diff/source_locator.rb +105 -0
- data/lib/canon/diff/text_decomposer.rb +103 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +264 -24
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +35 -20
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +36 -19
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +33 -19
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +583 -98
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +36 -19
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +62 -13
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +59 -24
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +74 -34
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +4 -5
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +1 -1
- data/lib/canon/diff_formatter/legend.rb +4 -2
- data/lib/canon/diff_formatter/theme.rb +857 -0
- data/lib/canon/diff_formatter.rb +11 -6
- data/lib/canon/tree_diff/matchers/hash_matcher.rb +15 -15
- data/lib/canon/tree_diff/matchers/similarity_matcher.rb +10 -0
- data/lib/canon/tree_diff/operations/operation_detector.rb +5 -1
- data/lib/canon/tree_diff/tree_diff_integrator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- metadata +11 -2
|
@@ -0,0 +1,1418 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "diff_char_range"
|
|
4
|
+
require_relative "text_decomposer"
|
|
5
|
+
require_relative "source_locator"
|
|
6
|
+
|
|
7
|
+
module Canon
|
|
8
|
+
module Diff
|
|
9
|
+
# Enriches DiffNodes with character position data (DiffCharRanges).
|
|
10
|
+
#
|
|
11
|
+
# This is Phase 1 of the two-phase diff pipeline. It runs after comparison
|
|
12
|
+
# and before rendering. It CAN use string operations (including LCS) on
|
|
13
|
+
# serialized content to determine character-level change positions.
|
|
14
|
+
#
|
|
15
|
+
# The output is DiffNodes enriched with:
|
|
16
|
+
# - char_ranges: Array<DiffCharRange> mapping changes to specific line/columns
|
|
17
|
+
# - line_range_before: [start_line, end_line] in text1
|
|
18
|
+
# - line_range_after: [start_line, end_line] in text2
|
|
19
|
+
#
|
|
20
|
+
# Phase 2 (DiffLineBuilder) then assembles DiffLines from these enriched
|
|
21
|
+
# DiffNodes without any further computation.
|
|
22
|
+
class DiffNodeEnricher
|
|
23
|
+
# Enrich DiffNodes with character position data.
|
|
24
|
+
#
|
|
25
|
+
# @param diff_nodes [Array<DiffNode>] The semantic differences
|
|
26
|
+
# @param text1 [String] The first document (preprocessed)
|
|
27
|
+
# @param text2 [String] The second document (preprocessed)
|
|
28
|
+
# @return [Array<DiffNode>] The same DiffNodes, enriched in place
|
|
29
|
+
def self.build(diff_nodes, text1, text2)
|
|
30
|
+
return diff_nodes if diff_nodes.nil? || diff_nodes.empty?
|
|
31
|
+
return diff_nodes if text1.nil? || text2.nil?
|
|
32
|
+
|
|
33
|
+
new(diff_nodes, text1, text2).enrich
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def initialize(diff_nodes, text1, text2)
|
|
37
|
+
@diff_nodes = diff_nodes
|
|
38
|
+
@text1 = text1
|
|
39
|
+
@text2 = text2
|
|
40
|
+
@line_map1 = SourceLocator.build_line_map(text1)
|
|
41
|
+
@line_map2 = SourceLocator.build_line_map(text2)
|
|
42
|
+
@lines1 = text1.split("\n")
|
|
43
|
+
@lines2 = text2.split("\n")
|
|
44
|
+
# Track occurrences for text_content dimension to find correct element instance
|
|
45
|
+
@text_occurrence1 = Hash.new(0)
|
|
46
|
+
@text_occurrence2 = Hash.new(0)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def enrich
|
|
50
|
+
@diff_nodes.each do |diff_node|
|
|
51
|
+
enrich_node(diff_node)
|
|
52
|
+
end
|
|
53
|
+
@diff_nodes
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
private
|
|
57
|
+
|
|
58
|
+
# Enrich a single DiffNode with DiffCharRanges based on its dimension.
|
|
59
|
+
def enrich_node(diff_node)
|
|
60
|
+
case diff_node.dimension
|
|
61
|
+
when :text_content
|
|
62
|
+
enrich_text_content(diff_node)
|
|
63
|
+
when :attribute_values
|
|
64
|
+
enrich_attribute_values(diff_node)
|
|
65
|
+
when :attribute_presence
|
|
66
|
+
enrich_attribute_presence(diff_node)
|
|
67
|
+
when :attribute_order
|
|
68
|
+
enrich_attribute_order(diff_node)
|
|
69
|
+
when :comments
|
|
70
|
+
enrich_comments(diff_node)
|
|
71
|
+
when :structural_whitespace
|
|
72
|
+
enrich_structural_whitespace(diff_node)
|
|
73
|
+
when :element_structure
|
|
74
|
+
enrich_element_structure(diff_node)
|
|
75
|
+
else
|
|
76
|
+
enrich_generic(diff_node)
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# Text content change: decompose serialized_before/after into
|
|
81
|
+
# before-text, changed-text, after-text and map to DiffCharRanges.
|
|
82
|
+
def enrich_text_content(diff_node)
|
|
83
|
+
before = diff_node.serialized_before
|
|
84
|
+
after = diff_node.serialized_after
|
|
85
|
+
|
|
86
|
+
if before.nil? && after.nil?
|
|
87
|
+
return
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# One side is nil = pure insertion/deletion
|
|
91
|
+
if before.nil?
|
|
92
|
+
loc = locate_at_element_index(after, @text2, @line_map2,
|
|
93
|
+
diff_node.path)
|
|
94
|
+
loc ||= locate_via_parent_element(diff_node.path, @text2, @line_map2)
|
|
95
|
+
loc ||= locate_via_node_tree(diff_node.node2, after, @text2,
|
|
96
|
+
@line_map2, :new)
|
|
97
|
+
# Final fallback: when tree-based location in text2 fails because the
|
|
98
|
+
# leaf element is self-closing (text moved OUTSIDE the element in text2),
|
|
99
|
+
# search in text1 (original) using path-based location. The original
|
|
100
|
+
# has the correct element structure with content intact.
|
|
101
|
+
loc ||= locate_via_parent_element(diff_node.path, @text1, @line_map1)
|
|
102
|
+
return unless loc
|
|
103
|
+
|
|
104
|
+
cr = DiffCharRange.new(
|
|
105
|
+
line_number: loc[:line_number],
|
|
106
|
+
start_col: loc[:col],
|
|
107
|
+
end_col: loc[:col] + after.length,
|
|
108
|
+
side: :new,
|
|
109
|
+
status: :added,
|
|
110
|
+
role: :changed,
|
|
111
|
+
diff_node: diff_node,
|
|
112
|
+
)
|
|
113
|
+
diff_node.char_ranges = [cr]
|
|
114
|
+
diff_node.line_range_before = nil
|
|
115
|
+
diff_node.line_range_after = [loc[:line_number], loc[:line_number]]
|
|
116
|
+
return
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
if after.nil?
|
|
120
|
+
loc = locate_at_element_index(before, @text1, @line_map1,
|
|
121
|
+
diff_node.path)
|
|
122
|
+
loc ||= locate_via_parent_element(diff_node.path, @text1, @line_map1)
|
|
123
|
+
loc ||= locate_via_node_tree(diff_node.node1, before, @text1,
|
|
124
|
+
@line_map1, :old)
|
|
125
|
+
return unless loc
|
|
126
|
+
|
|
127
|
+
cr = DiffCharRange.new(
|
|
128
|
+
line_number: loc[:line_number],
|
|
129
|
+
start_col: loc[:col],
|
|
130
|
+
end_col: loc[:col] + before.length,
|
|
131
|
+
side: :old,
|
|
132
|
+
status: :removed,
|
|
133
|
+
role: :changed,
|
|
134
|
+
diff_node: diff_node,
|
|
135
|
+
)
|
|
136
|
+
diff_node.char_ranges = [cr]
|
|
137
|
+
diff_node.line_range_before = [loc[:line_number], loc[:line_number]]
|
|
138
|
+
diff_node.line_range_after = nil
|
|
139
|
+
return
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Both sides exist: locate and decompose
|
|
143
|
+
loc1 = locate_at_element_index(before, @text1, @line_map1,
|
|
144
|
+
diff_node.path)
|
|
145
|
+
loc2 = locate_at_element_index(after, @text2, @line_map2,
|
|
146
|
+
diff_node.path)
|
|
147
|
+
|
|
148
|
+
unless loc1 && loc2
|
|
149
|
+
# Cannot locate - element_structure changes can't be located without exact match
|
|
150
|
+
return
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Decompose into 3 parts
|
|
154
|
+
parts = TextDecomposer.decompose(before, after)
|
|
155
|
+
ranges = []
|
|
156
|
+
|
|
157
|
+
# Before-text (unchanged prefix)
|
|
158
|
+
unless parts[:common_prefix].empty?
|
|
159
|
+
prefix_len = parts[:common_prefix].length
|
|
160
|
+
|
|
161
|
+
ranges << DiffCharRange.new(
|
|
162
|
+
line_number: loc1[:line_number],
|
|
163
|
+
start_col: loc1[:col],
|
|
164
|
+
end_col: loc1[:col] + prefix_len,
|
|
165
|
+
side: :old,
|
|
166
|
+
status: :unchanged,
|
|
167
|
+
role: :before,
|
|
168
|
+
diff_node: diff_node,
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
ranges << DiffCharRange.new(
|
|
172
|
+
line_number: loc2[:line_number],
|
|
173
|
+
start_col: loc2[:col],
|
|
174
|
+
end_col: loc2[:col] + prefix_len,
|
|
175
|
+
side: :new,
|
|
176
|
+
status: :unchanged,
|
|
177
|
+
role: :before,
|
|
178
|
+
diff_node: diff_node,
|
|
179
|
+
)
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Changed-text (the actual difference)
|
|
183
|
+
unless parts[:changed_old].empty? && parts[:changed_new].empty?
|
|
184
|
+
prefix_offset = parts[:common_prefix].length
|
|
185
|
+
|
|
186
|
+
unless parts[:changed_old].empty?
|
|
187
|
+
ranges << DiffCharRange.new(
|
|
188
|
+
line_number: loc1[:line_number],
|
|
189
|
+
start_col: loc1[:col] + prefix_offset,
|
|
190
|
+
end_col: loc1[:col] + prefix_offset + parts[:changed_old].length,
|
|
191
|
+
side: :old,
|
|
192
|
+
status: :changed_old,
|
|
193
|
+
role: :changed,
|
|
194
|
+
diff_node: diff_node,
|
|
195
|
+
)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
unless parts[:changed_new].empty?
|
|
199
|
+
ranges << DiffCharRange.new(
|
|
200
|
+
line_number: loc2[:line_number],
|
|
201
|
+
start_col: loc2[:col] + prefix_offset,
|
|
202
|
+
end_col: loc2[:col] + prefix_offset + parts[:changed_new].length,
|
|
203
|
+
side: :new,
|
|
204
|
+
status: :changed_new,
|
|
205
|
+
role: :changed,
|
|
206
|
+
diff_node: diff_node,
|
|
207
|
+
)
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# After-text (unchanged suffix)
|
|
212
|
+
unless parts[:common_suffix].empty?
|
|
213
|
+
suffix_offset_old = loc1[:col] + before.length - parts[:common_suffix].length
|
|
214
|
+
suffix_offset_new = loc2[:col] + after.length - parts[:common_suffix].length
|
|
215
|
+
suffix_len = parts[:common_suffix].length
|
|
216
|
+
|
|
217
|
+
ranges << DiffCharRange.new(
|
|
218
|
+
line_number: loc1[:line_number],
|
|
219
|
+
start_col: suffix_offset_old,
|
|
220
|
+
end_col: suffix_offset_old + suffix_len,
|
|
221
|
+
side: :old,
|
|
222
|
+
status: :unchanged,
|
|
223
|
+
role: :after,
|
|
224
|
+
diff_node: diff_node,
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
ranges << DiffCharRange.new(
|
|
228
|
+
line_number: loc2[:line_number],
|
|
229
|
+
start_col: suffix_offset_new,
|
|
230
|
+
end_col: suffix_offset_new + suffix_len,
|
|
231
|
+
side: :new,
|
|
232
|
+
status: :unchanged,
|
|
233
|
+
role: :after,
|
|
234
|
+
diff_node: diff_node,
|
|
235
|
+
)
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
diff_node.char_ranges = ranges
|
|
239
|
+
# Compute actual line span for multi-line text content.
|
|
240
|
+
# Content like "abc\ndef" spans 2 lines.
|
|
241
|
+
newline_count_before = before.count("\n")
|
|
242
|
+
newline_count_after = after.count("\n")
|
|
243
|
+
end_line_before = loc1[:line_number] + newline_count_before
|
|
244
|
+
end_line_after = loc2[:line_number] + newline_count_after
|
|
245
|
+
diff_node.line_range_before = [loc1[:line_number], end_line_before]
|
|
246
|
+
diff_node.line_range_after = [loc2[:line_number], end_line_after]
|
|
247
|
+
end
|
|
248
|
+
|
|
249
|
+
# Attribute value change: locate the specific attribute values in the text.
|
|
250
|
+
def enrich_attribute_values(diff_node)
|
|
251
|
+
attrs_before = diff_node.attributes_before
|
|
252
|
+
attrs_after = diff_node.attributes_after
|
|
253
|
+
return unless attrs_before && attrs_after
|
|
254
|
+
|
|
255
|
+
# Find which attributes changed
|
|
256
|
+
all_keys = (attrs_before.keys + attrs_after.keys).uniq
|
|
257
|
+
changed_keys = all_keys.reject do |key|
|
|
258
|
+
attrs_before[key] == attrs_after[key]
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
return if changed_keys.empty?
|
|
262
|
+
|
|
263
|
+
ranges = []
|
|
264
|
+
line1_num = nil
|
|
265
|
+
line2_num = nil
|
|
266
|
+
|
|
267
|
+
changed_keys.each do |key|
|
|
268
|
+
old_val = attrs_before[key]
|
|
269
|
+
new_val = attrs_after[key]
|
|
270
|
+
|
|
271
|
+
# Find in text1: key="old_val"
|
|
272
|
+
# Use element_name to scope the search and avoid matching
|
|
273
|
+
# attributes in the XML declaration (e.g., version="1.0" in
|
|
274
|
+
# <?xml version="1.0"?> vs <element version="1.0">)
|
|
275
|
+
element_name = diff_node.node1&.name
|
|
276
|
+
if old_val
|
|
277
|
+
pattern = build_attr_pattern(key, old_val)
|
|
278
|
+
start_from = xml_declaration_end_offset(@text1)
|
|
279
|
+
loc = SourceLocator.locate(pattern, @text1, @line_map1,
|
|
280
|
+
start_from: start_from)
|
|
281
|
+
# If not found after XML decl, try with element-scoped pattern
|
|
282
|
+
if loc.nil? && element_name
|
|
283
|
+
scoped = "#{element_name} #{pattern}"
|
|
284
|
+
loc = SourceLocator.locate(scoped, @text1, @line_map1)
|
|
285
|
+
# Adjust col to point to the attribute, not the element name
|
|
286
|
+
loc = loc.merge(col: loc[:col] + element_name.length + 1) if loc
|
|
287
|
+
end
|
|
288
|
+
if loc
|
|
289
|
+
line1_num ||= loc[:line_number]
|
|
290
|
+
ranges << DiffCharRange.new(
|
|
291
|
+
line_number: loc[:line_number],
|
|
292
|
+
start_col: loc[:col] + key.length + 2, # skip key="
|
|
293
|
+
end_col: loc[:col] + pattern.length - 1, # skip closing "
|
|
294
|
+
side: :old,
|
|
295
|
+
status: :changed_old,
|
|
296
|
+
role: :changed,
|
|
297
|
+
diff_node: diff_node,
|
|
298
|
+
)
|
|
299
|
+
end
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
# Find in text2: key="new_val"
|
|
303
|
+
element_name2 = diff_node.node2&.name
|
|
304
|
+
if new_val
|
|
305
|
+
pattern = build_attr_pattern(key, new_val)
|
|
306
|
+
start_from = xml_declaration_end_offset(@text2)
|
|
307
|
+
loc = SourceLocator.locate(pattern, @text2, @line_map2,
|
|
308
|
+
start_from: start_from)
|
|
309
|
+
if loc.nil? && element_name2
|
|
310
|
+
scoped = "#{element_name2} #{pattern}"
|
|
311
|
+
loc = SourceLocator.locate(scoped, @text2, @line_map2)
|
|
312
|
+
loc = loc.merge(col: loc[:col] + element_name2.length + 1) if loc
|
|
313
|
+
end
|
|
314
|
+
if loc
|
|
315
|
+
line2_num ||= loc[:line_number]
|
|
316
|
+
ranges << DiffCharRange.new(
|
|
317
|
+
line_number: loc[:line_number],
|
|
318
|
+
start_col: loc[:col] + key.length + 2,
|
|
319
|
+
end_col: loc[:col] + pattern.length - 1,
|
|
320
|
+
side: :new,
|
|
321
|
+
status: :changed_new,
|
|
322
|
+
role: :changed,
|
|
323
|
+
diff_node: diff_node,
|
|
324
|
+
)
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
diff_node.char_ranges = ranges
|
|
330
|
+
diff_node.line_range_before = line1_num ? [line1_num, line1_num] : nil
|
|
331
|
+
diff_node.line_range_after = line2_num ? [line2_num, line2_num] : nil
|
|
332
|
+
end
|
|
333
|
+
|
|
334
|
+
# Attribute presence change: find added/removed attributes.
|
|
335
|
+
def enrich_attribute_presence(diff_node)
|
|
336
|
+
attrs_before = diff_node.attributes_before || {}
|
|
337
|
+
attrs_after = diff_node.attributes_after || {}
|
|
338
|
+
|
|
339
|
+
added_keys = attrs_after.keys - attrs_before.keys
|
|
340
|
+
removed_keys = attrs_before.keys - attrs_after.keys
|
|
341
|
+
|
|
342
|
+
return if added_keys.empty? && removed_keys.empty?
|
|
343
|
+
|
|
344
|
+
ranges = []
|
|
345
|
+
line1_num = nil
|
|
346
|
+
line2_num = nil
|
|
347
|
+
|
|
348
|
+
# Removed attributes (only in text1)
|
|
349
|
+
removed_keys.each do |key|
|
|
350
|
+
val = attrs_before[key]
|
|
351
|
+
pattern = build_attr_pattern(key, val)
|
|
352
|
+
start_from = xml_declaration_end_offset(@text1)
|
|
353
|
+
loc = SourceLocator.locate(pattern, @text1, @line_map1,
|
|
354
|
+
start_from: start_from)
|
|
355
|
+
next unless loc
|
|
356
|
+
|
|
357
|
+
line1_num ||= loc[:line_number]
|
|
358
|
+
ranges << DiffCharRange.new(
|
|
359
|
+
line_number: loc[:line_number],
|
|
360
|
+
start_col: loc[:col],
|
|
361
|
+
end_col: loc[:col] + pattern.length,
|
|
362
|
+
side: :old,
|
|
363
|
+
status: :removed,
|
|
364
|
+
role: :changed,
|
|
365
|
+
diff_node: diff_node,
|
|
366
|
+
)
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
# Added attributes (only in text2)
|
|
370
|
+
added_keys.each do |key|
|
|
371
|
+
val = attrs_after[key]
|
|
372
|
+
pattern = build_attr_pattern(key, val)
|
|
373
|
+
start_from = xml_declaration_end_offset(@text2)
|
|
374
|
+
loc = SourceLocator.locate(pattern, @text2, @line_map2,
|
|
375
|
+
start_from: start_from)
|
|
376
|
+
next unless loc
|
|
377
|
+
|
|
378
|
+
line2_num ||= loc[:line_number]
|
|
379
|
+
ranges << DiffCharRange.new(
|
|
380
|
+
line_number: loc[:line_number],
|
|
381
|
+
start_col: loc[:col],
|
|
382
|
+
end_col: loc[:col] + pattern.length,
|
|
383
|
+
side: :new,
|
|
384
|
+
status: :added,
|
|
385
|
+
role: :changed,
|
|
386
|
+
diff_node: diff_node,
|
|
387
|
+
)
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
diff_node.char_ranges = ranges
|
|
391
|
+
diff_node.line_range_before = line1_num ? [line1_num, line1_num] : nil
|
|
392
|
+
diff_node.line_range_after = line2_num ? [line2_num, line2_num] : nil
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
# Attribute order change: highlight entire attribute sections as formatting.
|
|
396
|
+
def enrich_attribute_order(diff_node)
|
|
397
|
+
before = diff_node.serialized_before
|
|
398
|
+
after = diff_node.serialized_after
|
|
399
|
+
|
|
400
|
+
loc1 = SourceLocator.locate(before, @text1, @line_map1) if before
|
|
401
|
+
loc2 = SourceLocator.locate(after, @text2, @line_map2) if after
|
|
402
|
+
|
|
403
|
+
ranges = []
|
|
404
|
+
|
|
405
|
+
if loc1
|
|
406
|
+
ranges << DiffCharRange.new(
|
|
407
|
+
line_number: loc1[:line_number],
|
|
408
|
+
start_col: loc1[:col],
|
|
409
|
+
end_col: loc1[:col] + before.length,
|
|
410
|
+
side: :old,
|
|
411
|
+
status: :unchanged,
|
|
412
|
+
role: :changed,
|
|
413
|
+
diff_node: diff_node,
|
|
414
|
+
)
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
if loc2
|
|
418
|
+
ranges << DiffCharRange.new(
|
|
419
|
+
line_number: loc2[:line_number],
|
|
420
|
+
start_col: loc2[:col],
|
|
421
|
+
end_col: loc2[:col] + after.length,
|
|
422
|
+
side: :new,
|
|
423
|
+
status: :unchanged,
|
|
424
|
+
role: :changed,
|
|
425
|
+
diff_node: diff_node,
|
|
426
|
+
)
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
diff_node.char_ranges = ranges
|
|
430
|
+
diff_node.line_range_before = if loc1
|
|
431
|
+
[loc1[:line_number],
|
|
432
|
+
loc1[:line_number]]
|
|
433
|
+
end
|
|
434
|
+
diff_node.line_range_after = if loc2
|
|
435
|
+
[loc2[:line_number],
|
|
436
|
+
loc2[:line_number]]
|
|
437
|
+
end
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# Comment change: locate and decompose comment content.
|
|
441
|
+
def enrich_comments(diff_node)
|
|
442
|
+
before = diff_node.serialized_before
|
|
443
|
+
after = diff_node.serialized_after
|
|
444
|
+
|
|
445
|
+
if before.nil? && after.nil?
|
|
446
|
+
return
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
# Pure addition or removal
|
|
450
|
+
if before.nil?
|
|
451
|
+
loc = SourceLocator.locate(after, @text2, @line_map2)
|
|
452
|
+
return unless loc
|
|
453
|
+
|
|
454
|
+
diff_node.char_ranges = [
|
|
455
|
+
DiffCharRange.new(
|
|
456
|
+
line_number: loc[:line_number],
|
|
457
|
+
start_col: loc[:col],
|
|
458
|
+
end_col: loc[:col] + after.length,
|
|
459
|
+
side: :new,
|
|
460
|
+
status: :added,
|
|
461
|
+
role: :changed,
|
|
462
|
+
diff_node: diff_node,
|
|
463
|
+
),
|
|
464
|
+
]
|
|
465
|
+
diff_node.line_range_after = [loc[:line_number], loc[:line_number]]
|
|
466
|
+
return
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
if after.nil?
|
|
470
|
+
loc = SourceLocator.locate(before, @text1, @line_map1)
|
|
471
|
+
return unless loc
|
|
472
|
+
|
|
473
|
+
diff_node.char_ranges = [
|
|
474
|
+
DiffCharRange.new(
|
|
475
|
+
line_number: loc[:line_number],
|
|
476
|
+
start_col: loc[:col],
|
|
477
|
+
end_col: loc[:col] + before.length,
|
|
478
|
+
side: :old,
|
|
479
|
+
status: :removed,
|
|
480
|
+
role: :changed,
|
|
481
|
+
diff_node: diff_node,
|
|
482
|
+
),
|
|
483
|
+
]
|
|
484
|
+
diff_node.line_range_before = [loc[:line_number], loc[:line_number]]
|
|
485
|
+
return
|
|
486
|
+
end
|
|
487
|
+
|
|
488
|
+
# Both exist: locate and decompose
|
|
489
|
+
loc1 = SourceLocator.locate(before, @text1, @line_map1)
|
|
490
|
+
loc2 = SourceLocator.locate(after, @text2, @line_map2)
|
|
491
|
+
|
|
492
|
+
unless loc1 && loc2
|
|
493
|
+
enrich_generic(diff_node)
|
|
494
|
+
return
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
parts = TextDecomposer.decompose(before, after)
|
|
498
|
+
ranges = []
|
|
499
|
+
|
|
500
|
+
# Prefix (unchanged)
|
|
501
|
+
unless parts[:common_prefix].empty?
|
|
502
|
+
prefix_len = parts[:common_prefix].length
|
|
503
|
+
ranges << DiffCharRange.new(
|
|
504
|
+
line_number: loc1[:line_number], start_col: loc1[:col],
|
|
505
|
+
end_col: loc1[:col] + prefix_len,
|
|
506
|
+
side: :old, status: :unchanged, role: :before, diff_node: diff_node
|
|
507
|
+
)
|
|
508
|
+
ranges << DiffCharRange.new(
|
|
509
|
+
line_number: loc2[:line_number], start_col: loc2[:col],
|
|
510
|
+
end_col: loc2[:col] + prefix_len,
|
|
511
|
+
side: :new, status: :unchanged, role: :before, diff_node: diff_node
|
|
512
|
+
)
|
|
513
|
+
end
|
|
514
|
+
|
|
515
|
+
# Changed portion
|
|
516
|
+
unless parts[:changed_old].empty? && parts[:changed_new].empty?
|
|
517
|
+
prefix_offset = parts[:common_prefix].length
|
|
518
|
+
|
|
519
|
+
unless parts[:changed_old].empty?
|
|
520
|
+
ranges << DiffCharRange.new(
|
|
521
|
+
line_number: loc1[:line_number],
|
|
522
|
+
start_col: loc1[:col] + prefix_offset,
|
|
523
|
+
end_col: loc1[:col] + prefix_offset + parts[:changed_old].length,
|
|
524
|
+
side: :old, status: :changed_old, role: :changed, diff_node: diff_node
|
|
525
|
+
)
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
unless parts[:changed_new].empty?
|
|
529
|
+
ranges << DiffCharRange.new(
|
|
530
|
+
line_number: loc2[:line_number],
|
|
531
|
+
start_col: loc2[:col] + prefix_offset,
|
|
532
|
+
end_col: loc2[:col] + prefix_offset + parts[:changed_new].length,
|
|
533
|
+
side: :new, status: :changed_new, role: :changed, diff_node: diff_node
|
|
534
|
+
)
|
|
535
|
+
end
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
# Suffix (unchanged)
|
|
539
|
+
unless parts[:common_suffix].empty?
|
|
540
|
+
s_off_old = loc1[:col] + before.length - parts[:common_suffix].length
|
|
541
|
+
s_off_new = loc2[:col] + after.length - parts[:common_suffix].length
|
|
542
|
+
s_len = parts[:common_suffix].length
|
|
543
|
+
ranges << DiffCharRange.new(
|
|
544
|
+
line_number: loc1[:line_number], start_col: s_off_old,
|
|
545
|
+
end_col: s_off_old + s_len,
|
|
546
|
+
side: :old, status: :unchanged, role: :after, diff_node: diff_node
|
|
547
|
+
)
|
|
548
|
+
ranges << DiffCharRange.new(
|
|
549
|
+
line_number: loc2[:line_number], start_col: s_off_new,
|
|
550
|
+
end_col: s_off_new + s_len,
|
|
551
|
+
side: :new, status: :unchanged, role: :after, diff_node: diff_node
|
|
552
|
+
)
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
diff_node.char_ranges = ranges
|
|
556
|
+
diff_node.line_range_before = [loc1[:line_number], loc1[:line_number]]
|
|
557
|
+
diff_node.line_range_after = [loc2[:line_number], loc2[:line_number]]
|
|
558
|
+
end
|
|
559
|
+
|
|
560
|
+
# Structural whitespace: mark affected lines as formatting-only.
|
|
561
|
+
def enrich_structural_whitespace(diff_node)
|
|
562
|
+
before = diff_node.serialized_before
|
|
563
|
+
after = diff_node.serialized_after
|
|
564
|
+
|
|
565
|
+
loc1 = SourceLocator.locate(before, @text1, @line_map1) if before
|
|
566
|
+
loc2 = SourceLocator.locate(after, @text2, @line_map2) if after
|
|
567
|
+
|
|
568
|
+
ranges = []
|
|
569
|
+
|
|
570
|
+
if loc1 && before
|
|
571
|
+
ranges << DiffCharRange.new(
|
|
572
|
+
line_number: loc1[:line_number],
|
|
573
|
+
start_col: loc1[:col],
|
|
574
|
+
end_col: loc1[:col] + before.length,
|
|
575
|
+
side: :old,
|
|
576
|
+
status: :unchanged,
|
|
577
|
+
role: :changed,
|
|
578
|
+
diff_node: diff_node,
|
|
579
|
+
)
|
|
580
|
+
end
|
|
581
|
+
|
|
582
|
+
if loc2 && after
|
|
583
|
+
ranges << DiffCharRange.new(
|
|
584
|
+
line_number: loc2[:line_number],
|
|
585
|
+
start_col: loc2[:col],
|
|
586
|
+
end_col: loc2[:col] + after.length,
|
|
587
|
+
side: :new,
|
|
588
|
+
status: :unchanged,
|
|
589
|
+
role: :changed,
|
|
590
|
+
diff_node: diff_node,
|
|
591
|
+
)
|
|
592
|
+
end
|
|
593
|
+
|
|
594
|
+
diff_node.char_ranges = ranges
|
|
595
|
+
diff_node.line_range_before = if loc1
|
|
596
|
+
[loc1[:line_number],
|
|
597
|
+
loc1[:line_number]]
|
|
598
|
+
end
|
|
599
|
+
diff_node.line_range_after = if loc2
|
|
600
|
+
[loc2[:line_number],
|
|
601
|
+
loc2[:line_number]]
|
|
602
|
+
end
|
|
603
|
+
end
|
|
604
|
+
|
|
605
|
+
# Element structure change: full element deletion/insertion.
|
|
606
|
+
# Locate the entire element (opening tag through closing tag).
|
|
607
|
+
def enrich_element_structure(diff_node)
|
|
608
|
+
before = diff_node.serialized_before
|
|
609
|
+
after = diff_node.serialized_after
|
|
610
|
+
path = diff_node.path
|
|
611
|
+
|
|
612
|
+
if before.nil? && after.nil?
|
|
613
|
+
return
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
# Minimum reliable length for SourceLocator.locate to find the correct
|
|
617
|
+
# occurrence. Shorter strings match too many places in the document.
|
|
618
|
+
min_locate_length = 3
|
|
619
|
+
|
|
620
|
+
# Element added (only in text2)
|
|
621
|
+
if before.nil?
|
|
622
|
+
loc = if after.length < min_locate_length && path
|
|
623
|
+
locate_via_parent_element(path, @text2, @line_map2)
|
|
624
|
+
else
|
|
625
|
+
SourceLocator.locate(after, @text2, @line_map2)
|
|
626
|
+
end
|
|
627
|
+
|
|
628
|
+
if loc
|
|
629
|
+
end_line = find_end_line(loc[:line_number], @line_map2, after)
|
|
630
|
+
diff_node.char_ranges = [
|
|
631
|
+
DiffCharRange.new(
|
|
632
|
+
line_number: loc[:line_number],
|
|
633
|
+
start_col: loc[:col],
|
|
634
|
+
end_col: loc[:col] + after.length,
|
|
635
|
+
side: :new,
|
|
636
|
+
status: :added,
|
|
637
|
+
role: :changed,
|
|
638
|
+
diff_node: diff_node,
|
|
639
|
+
),
|
|
640
|
+
]
|
|
641
|
+
diff_node.line_range_before = nil
|
|
642
|
+
diff_node.line_range_after = [loc[:line_number], end_line]
|
|
643
|
+
else
|
|
644
|
+
# Fallback: can't locate exact content, mark entire text2 as affected
|
|
645
|
+
fallback_element_structure_ranges(diff_node, nil, after, :new)
|
|
646
|
+
end
|
|
647
|
+
return
|
|
648
|
+
end
|
|
649
|
+
|
|
650
|
+
# Element removed (only in text1)
|
|
651
|
+
if after.nil?
|
|
652
|
+
loc = if before.length < min_locate_length && path
|
|
653
|
+
locate_via_parent_element(path, @text1, @line_map1)
|
|
654
|
+
else
|
|
655
|
+
SourceLocator.locate(before, @text1, @line_map1)
|
|
656
|
+
end
|
|
657
|
+
|
|
658
|
+
if loc
|
|
659
|
+
end_line = find_end_line(loc[:line_number], @line_map1, before)
|
|
660
|
+
diff_node.char_ranges = [
|
|
661
|
+
DiffCharRange.new(
|
|
662
|
+
line_number: loc[:line_number],
|
|
663
|
+
start_col: loc[:col],
|
|
664
|
+
end_col: loc[:col] + before.length,
|
|
665
|
+
side: :old,
|
|
666
|
+
status: :changed_old,
|
|
667
|
+
role: :changed,
|
|
668
|
+
diff_node: diff_node,
|
|
669
|
+
),
|
|
670
|
+
]
|
|
671
|
+
diff_node.line_range_before = [loc[:line_number], end_line]
|
|
672
|
+
diff_node.line_range_after = nil
|
|
673
|
+
else
|
|
674
|
+
# Try using node1's parent element as anchor for text nodes
|
|
675
|
+
loc = locate_textnode_parent(diff_node.node1, before, @text1,
|
|
676
|
+
@line_map1)
|
|
677
|
+
if loc
|
|
678
|
+
end_line = find_end_line(loc[:line_number], @line_map1, before)
|
|
679
|
+
diff_node.char_ranges = [
|
|
680
|
+
DiffCharRange.new(
|
|
681
|
+
line_number: loc[:line_number],
|
|
682
|
+
start_col: loc[:col],
|
|
683
|
+
end_col: loc[:col] + before.length,
|
|
684
|
+
side: :old,
|
|
685
|
+
status: :changed_old,
|
|
686
|
+
role: :changed,
|
|
687
|
+
diff_node: diff_node,
|
|
688
|
+
),
|
|
689
|
+
]
|
|
690
|
+
diff_node.line_range_before = [loc[:line_number], end_line]
|
|
691
|
+
diff_node.line_range_after = nil
|
|
692
|
+
else
|
|
693
|
+
# Fallback: can't locate exact content, mark entire text1 as affected
|
|
694
|
+
fallback_element_structure_ranges(diff_node, before, nil, :old)
|
|
695
|
+
end
|
|
696
|
+
end
|
|
697
|
+
return
|
|
698
|
+
end
|
|
699
|
+
|
|
700
|
+
# Both exist: structural change (e.g., element renamed)
|
|
701
|
+
loc1 = if before.length < min_locate_length && path
|
|
702
|
+
locate_via_parent_element(path, @text1, @line_map1)
|
|
703
|
+
else
|
|
704
|
+
SourceLocator.locate(before, @text1, @line_map1)
|
|
705
|
+
end
|
|
706
|
+
loc2 = if after.length < min_locate_length && path
|
|
707
|
+
locate_via_parent_element(path, @text2, @line_map2)
|
|
708
|
+
else
|
|
709
|
+
SourceLocator.locate(after, @text2, @line_map2)
|
|
710
|
+
end
|
|
711
|
+
|
|
712
|
+
ranges = []
|
|
713
|
+
|
|
714
|
+
if loc1
|
|
715
|
+
ranges << DiffCharRange.new(
|
|
716
|
+
line_number: loc1[:line_number],
|
|
717
|
+
start_col: loc1[:col],
|
|
718
|
+
end_col: loc1[:col] + before.length,
|
|
719
|
+
side: :old,
|
|
720
|
+
status: :changed_old,
|
|
721
|
+
role: :changed,
|
|
722
|
+
diff_node: diff_node,
|
|
723
|
+
)
|
|
724
|
+
end
|
|
725
|
+
|
|
726
|
+
if loc2
|
|
727
|
+
ranges << DiffCharRange.new(
|
|
728
|
+
line_number: loc2[:line_number],
|
|
729
|
+
start_col: loc2[:col],
|
|
730
|
+
end_col: loc2[:col] + after.length,
|
|
731
|
+
side: :new,
|
|
732
|
+
status: :changed_new,
|
|
733
|
+
role: :changed,
|
|
734
|
+
diff_node: diff_node,
|
|
735
|
+
)
|
|
736
|
+
end
|
|
737
|
+
|
|
738
|
+
diff_node.char_ranges = ranges
|
|
739
|
+
diff_node.line_range_before = if loc1
|
|
740
|
+
[loc1[:line_number],
|
|
741
|
+
loc1[:line_number]]
|
|
742
|
+
end
|
|
743
|
+
diff_node.line_range_after = if loc2
|
|
744
|
+
[loc2[:line_number],
|
|
745
|
+
loc2[:line_number]]
|
|
746
|
+
end
|
|
747
|
+
end
|
|
748
|
+
|
|
749
|
+
# Fallback for element_structure when exact location fails.
|
|
750
|
+
# Uses element name matching to find affected lines.
|
|
751
|
+
def fallback_element_structure_ranges(diff_node, before, after, side)
|
|
752
|
+
element_name = extract_element_name(before || after)
|
|
753
|
+
return unless element_name
|
|
754
|
+
|
|
755
|
+
ranges = []
|
|
756
|
+
|
|
757
|
+
if %i[old both].include?(side)
|
|
758
|
+
# Element removed from text1 (before exists, after nil)
|
|
759
|
+
old_lines = find_lines_with_element(element_name, @lines1, @text1)
|
|
760
|
+
old_lines.each do |line_idx|
|
|
761
|
+
ranges << DiffCharRange.new(
|
|
762
|
+
line_number: line_idx,
|
|
763
|
+
start_col: 0,
|
|
764
|
+
end_col: @lines1[line_idx].length,
|
|
765
|
+
side: :old,
|
|
766
|
+
status: :removed,
|
|
767
|
+
role: :changed,
|
|
768
|
+
diff_node: diff_node,
|
|
769
|
+
)
|
|
770
|
+
end
|
|
771
|
+
diff_node.line_range_before = old_lines.any? ? old_lines.minmax : nil
|
|
772
|
+
end
|
|
773
|
+
|
|
774
|
+
if %i[new both].include?(side)
|
|
775
|
+
# Element added to text2 (before nil, after exists)
|
|
776
|
+
new_lines = find_lines_with_element(element_name, @lines2, @text2)
|
|
777
|
+
new_lines.each do |line_idx|
|
|
778
|
+
ranges << DiffCharRange.new(
|
|
779
|
+
line_number: line_idx,
|
|
780
|
+
start_col: 0,
|
|
781
|
+
end_col: @lines2[line_idx].length,
|
|
782
|
+
side: :new,
|
|
783
|
+
status: :added,
|
|
784
|
+
role: :changed,
|
|
785
|
+
diff_node: diff_node,
|
|
786
|
+
)
|
|
787
|
+
end
|
|
788
|
+
diff_node.line_range_after = new_lines.any? ? new_lines.minmax : nil
|
|
789
|
+
end
|
|
790
|
+
|
|
791
|
+
diff_node.char_ranges = ranges
|
|
792
|
+
end
|
|
793
|
+
|
|
794
|
+
def find_lines_with_element(element_name, lines, _text)
|
|
795
|
+
result = []
|
|
796
|
+
lines.each_with_index do |line, idx|
|
|
797
|
+
# Check if line contains opening or closing tag for this element
|
|
798
|
+
if line.include?("<#{element_name}") || line.include?("</#{element_name}>")
|
|
799
|
+
result << idx
|
|
800
|
+
end
|
|
801
|
+
end
|
|
802
|
+
result
|
|
803
|
+
end
|
|
804
|
+
|
|
805
|
+
def extract_element_name(content)
|
|
806
|
+
return nil if content.nil?
|
|
807
|
+
|
|
808
|
+
# Match opening or closing tag: <element or </element>
|
|
809
|
+
match = content.match(/<\/?([a-zA-Z0-9_:-]+)/)
|
|
810
|
+
match[1] if match
|
|
811
|
+
end
|
|
812
|
+
|
|
813
|
+
# Generic fallback: try to locate and decompose serialized content.
|
|
814
|
+
# Does NOT call enrich_text_content to avoid infinite recursion.
|
|
815
|
+
def enrich_generic(diff_node)
|
|
816
|
+
before = diff_node.serialized_before
|
|
817
|
+
after = diff_node.serialized_after
|
|
818
|
+
|
|
819
|
+
if before && after
|
|
820
|
+
# Both sides exist: locate the entire changed region
|
|
821
|
+
loc1 = SourceLocator.locate(before, @text1, @line_map1)
|
|
822
|
+
loc2 = SourceLocator.locate(after, @text2, @line_map2)
|
|
823
|
+
|
|
824
|
+
ranges = []
|
|
825
|
+
if loc1
|
|
826
|
+
ranges << DiffCharRange.new(
|
|
827
|
+
line_number: loc1[:line_number],
|
|
828
|
+
start_col: loc1[:col],
|
|
829
|
+
end_col: loc1[:col] + before.length,
|
|
830
|
+
side: :old,
|
|
831
|
+
status: :changed_old,
|
|
832
|
+
role: :changed,
|
|
833
|
+
diff_node: diff_node,
|
|
834
|
+
)
|
|
835
|
+
end
|
|
836
|
+
if loc2
|
|
837
|
+
ranges << DiffCharRange.new(
|
|
838
|
+
line_number: loc2[:line_number],
|
|
839
|
+
start_col: loc2[:col],
|
|
840
|
+
end_col: loc2[:col] + after.length,
|
|
841
|
+
side: :new,
|
|
842
|
+
status: :changed_new,
|
|
843
|
+
role: :changed,
|
|
844
|
+
diff_node: diff_node,
|
|
845
|
+
)
|
|
846
|
+
end
|
|
847
|
+
diff_node.char_ranges = ranges
|
|
848
|
+
diff_node.line_range_before = if loc1
|
|
849
|
+
[loc1[:line_number],
|
|
850
|
+
loc1[:line_number]]
|
|
851
|
+
end
|
|
852
|
+
diff_node.line_range_after = if loc2
|
|
853
|
+
[loc2[:line_number],
|
|
854
|
+
loc2[:line_number]]
|
|
855
|
+
end
|
|
856
|
+
elsif before
|
|
857
|
+
loc = SourceLocator.locate(before, @text1, @line_map1)
|
|
858
|
+
return unless loc
|
|
859
|
+
|
|
860
|
+
diff_node.char_ranges = [
|
|
861
|
+
DiffCharRange.new(
|
|
862
|
+
line_number: loc[:line_number],
|
|
863
|
+
start_col: loc[:col],
|
|
864
|
+
end_col: loc[:col] + before.length,
|
|
865
|
+
side: :old,
|
|
866
|
+
status: :removed,
|
|
867
|
+
role: :changed,
|
|
868
|
+
diff_node: diff_node,
|
|
869
|
+
),
|
|
870
|
+
]
|
|
871
|
+
diff_node.line_range_before = [loc[:line_number], loc[:line_number]]
|
|
872
|
+
elsif after
|
|
873
|
+
loc = SourceLocator.locate(after, @text2, @line_map2)
|
|
874
|
+
return unless loc
|
|
875
|
+
|
|
876
|
+
diff_node.char_ranges = [
|
|
877
|
+
DiffCharRange.new(
|
|
878
|
+
line_number: loc[:line_number],
|
|
879
|
+
start_col: loc[:col],
|
|
880
|
+
end_col: loc[:col] + after.length,
|
|
881
|
+
side: :new,
|
|
882
|
+
status: :added,
|
|
883
|
+
role: :changed,
|
|
884
|
+
diff_node: diff_node,
|
|
885
|
+
),
|
|
886
|
+
]
|
|
887
|
+
diff_node.line_range_after = [loc[:line_number], loc[:line_number]]
|
|
888
|
+
end
|
|
889
|
+
end
|
|
890
|
+
|
|
891
|
+
# Build an attribute pattern string: key="value"
|
|
892
|
+
def build_attr_pattern(key, value)
|
|
893
|
+
"#{key}=\"#{value}\""
|
|
894
|
+
end
|
|
895
|
+
|
|
896
|
+
# Return the character offset just past the XML declaration `?>`,
|
|
897
|
+
# or 0 if there is no XML declaration.
|
|
898
|
+
#
|
|
899
|
+
# The XML declaration can contain attributes like version, encoding
|
|
900
|
+
# that may collide with element attributes. Skipping past it prevents
|
|
901
|
+
# false matches when locating attribute patterns.
|
|
902
|
+
#
|
|
903
|
+
# @param text [String] the source text
|
|
904
|
+
# @return [Integer] character offset past the XML declaration, or 0
|
|
905
|
+
def xml_declaration_end_offset(text)
|
|
906
|
+
if text.start_with?("<?xml")
|
|
907
|
+
idx = text.index("?>")
|
|
908
|
+
idx ? idx + 2 : 0
|
|
909
|
+
else
|
|
910
|
+
0
|
|
911
|
+
end
|
|
912
|
+
end
|
|
913
|
+
|
|
914
|
+
# Find the last line that content starting at start_line spans.
|
|
915
|
+
# Handles multi-line serialized content.
|
|
916
|
+
#
|
|
917
|
+
# @param start_line [Integer] 0-based line where content starts
|
|
918
|
+
# @param line_map [Array<Hash>] line offset map
|
|
919
|
+
# @param content [String] the serialized content
|
|
920
|
+
# @return [Integer] the last line number
|
|
921
|
+
def find_end_line(start_line, line_map, content)
|
|
922
|
+
newline_count = content.count("\n")
|
|
923
|
+
[start_line + newline_count, line_map.length - 1].min
|
|
924
|
+
end
|
|
925
|
+
|
|
926
|
+
# Find the occurrence of a value at a specific element index.
|
|
927
|
+
# Used for text_content changes when the same text appears multiple times
|
|
928
|
+
# in different elements (e.g., "original" in multiple item elements).
|
|
929
|
+
#
|
|
930
|
+
# @param value [String] the text to find
|
|
931
|
+
# @param text [String] the source text
|
|
932
|
+
# @param line_map [Array<Hash>] pre-built line offset map
|
|
933
|
+
# @param path [String] the diff node path (e.g., "/root[0]/item[1]/unknown[0]")
|
|
934
|
+
# @return [Hash, nil] location hash or nil if not found
|
|
935
|
+
def locate_at_element_index(value, text, line_map, path)
|
|
936
|
+
# Path like "/root[0]/item[1]/unknown[0]" has multiple segments.
|
|
937
|
+
# For text_content changes, the last segment is the text node,
|
|
938
|
+
# and the second-to-last is the element whose text changed.
|
|
939
|
+
# We need to find "item[1]" not "unknown[0]".
|
|
940
|
+
segments = path.split("/").reject(&:empty?)
|
|
941
|
+
if segments.length < 2
|
|
942
|
+
return SourceLocator.locate(value, text,
|
|
943
|
+
line_map)
|
|
944
|
+
end
|
|
945
|
+
|
|
946
|
+
# Start from segments[-2] (skip the last segment which is the text node)
|
|
947
|
+
# and walk backwards to find a segment with a bracket index.
|
|
948
|
+
# E.g., path "named-content[0]/named-content/text()[0]" — segments[-2]
|
|
949
|
+
# is "named-content" (no bracket), so we skip to segments[-3]
|
|
950
|
+
# "named-content[0]" which has the bracket.
|
|
951
|
+
element_segment = nil
|
|
952
|
+
(segments.length - 2).downto(1) do |i|
|
|
953
|
+
seg = segments[i]
|
|
954
|
+
if seg.include?("[")
|
|
955
|
+
element_segment = seg
|
|
956
|
+
break
|
|
957
|
+
end
|
|
958
|
+
end
|
|
959
|
+
unless element_segment
|
|
960
|
+
return SourceLocator.locate(value, text,
|
|
961
|
+
line_map)
|
|
962
|
+
end
|
|
963
|
+
|
|
964
|
+
element_match = element_segment.match(/([a-zA-Z0-9_:-]+)\[(\d+)\]/)
|
|
965
|
+
return SourceLocator.locate(value, text, line_map) unless element_match
|
|
966
|
+
|
|
967
|
+
element_name = element_match[1]
|
|
968
|
+
target_index = element_match[2].to_i
|
|
969
|
+
|
|
970
|
+
# For short values (< 3 chars), enumerate_all is too expensive.
|
|
971
|
+
# Use path-based hierarchy traversal instead.
|
|
972
|
+
if value.length < 3
|
|
973
|
+
return nil # Caller will fall back to locate_via_parent_element
|
|
974
|
+
end
|
|
975
|
+
|
|
976
|
+
# Find all occurrences and determine which element each belongs to
|
|
977
|
+
occurrences = SourceLocator.locate_all(value, text, line_map)
|
|
978
|
+
|
|
979
|
+
occurrences.each do |occ|
|
|
980
|
+
element_index = count_elements_before_position(text,
|
|
981
|
+
occ[:char_offset], element_name)
|
|
982
|
+
return occ if element_index == target_index
|
|
983
|
+
end
|
|
984
|
+
|
|
985
|
+
# Fallback: return first occurrence
|
|
986
|
+
SourceLocator.locate(value, text, line_map)
|
|
987
|
+
end
|
|
988
|
+
|
|
989
|
+
# Fallback location strategy for text_content when locate_at_element_index
|
|
990
|
+
# fails (e.g., the text value is too short to locate reliably).
|
|
991
|
+
# Walks the full element hierarchy from the path to locate the correct
|
|
992
|
+
# parent element, then returns a position inside it.
|
|
993
|
+
#
|
|
994
|
+
# @param path [String] the diff node path (e.g., "/root[0]/item[1]/unknown[0]")
|
|
995
|
+
# @param text [String] the source text
|
|
996
|
+
# @param line_map [Array<Hash>] pre-built line offset map
|
|
997
|
+
# @return [Hash, nil] location hash or nil if not found
|
|
998
|
+
def locate_via_parent_element(path, text, line_map)
|
|
999
|
+
segments = path.split("/").reject(&:empty?)
|
|
1000
|
+
return nil if segments.length < 2
|
|
1001
|
+
|
|
1002
|
+
# Collect all element segments with bracket indices, walking backwards
|
|
1003
|
+
# from segments[-2] (skip the last segment which is the text node).
|
|
1004
|
+
# E.g., for ".../def-item[1]/term[0]/named-content[0]/unknown[0]"
|
|
1005
|
+
# we need to traverse: def-item[1] -> term[0] -> named-content[0]
|
|
1006
|
+
element_segments = []
|
|
1007
|
+
(segments.length - 2).downto(0) do |i|
|
|
1008
|
+
seg = segments[i]
|
|
1009
|
+
next if seg.start_with?("text()", "comment()", "unknown")
|
|
1010
|
+
|
|
1011
|
+
if seg.include?("[")
|
|
1012
|
+
element_segments.unshift(seg) # maintain top-down order
|
|
1013
|
+
end
|
|
1014
|
+
end
|
|
1015
|
+
return nil if element_segments.empty?
|
|
1016
|
+
|
|
1017
|
+
# Walk the hierarchy: find each element within the search range of its parent
|
|
1018
|
+
search_start = 0
|
|
1019
|
+
search_end = text.length
|
|
1020
|
+
|
|
1021
|
+
element_segments.each do |seg|
|
|
1022
|
+
match = seg.match(/([a-zA-Z0-9_:-]+)\[(\d+)\]/)
|
|
1023
|
+
return nil unless match
|
|
1024
|
+
|
|
1025
|
+
element_name = match[1]
|
|
1026
|
+
target_index = match[2].to_i
|
|
1027
|
+
|
|
1028
|
+
pos = find_nth_element_in_range(text, element_name, target_index,
|
|
1029
|
+
search_start, search_end)
|
|
1030
|
+
return nil unless pos
|
|
1031
|
+
|
|
1032
|
+
# Narrow the search range to inside this element
|
|
1033
|
+
close_pos = text.index(">", pos)
|
|
1034
|
+
return nil unless close_pos
|
|
1035
|
+
|
|
1036
|
+
search_start = close_pos + 1
|
|
1037
|
+
|
|
1038
|
+
# Find the end of this element (closing tag or self-closing)
|
|
1039
|
+
close_tag = "</#{element_name}>"
|
|
1040
|
+
end_pos = text.index(close_tag, search_start)
|
|
1041
|
+
search_end = if end_pos
|
|
1042
|
+
end_pos
|
|
1043
|
+
else
|
|
1044
|
+
# Self-closing: search range is empty for children
|
|
1045
|
+
search_start
|
|
1046
|
+
end
|
|
1047
|
+
end
|
|
1048
|
+
|
|
1049
|
+
# search_start now points inside the innermost element
|
|
1050
|
+
line_idx = SourceLocator.send(:find_line_for_offset, search_start,
|
|
1051
|
+
line_map)
|
|
1052
|
+
return nil unless line_idx
|
|
1053
|
+
|
|
1054
|
+
col = search_start - line_map[line_idx][:start_offset]
|
|
1055
|
+
{ char_offset: search_start, line_number: line_idx, col: col }
|
|
1056
|
+
end
|
|
1057
|
+
|
|
1058
|
+
# Find the Nth sibling occurrence of an element within a text range,
|
|
1059
|
+
# counting only elements at the same depth (direct children).
|
|
1060
|
+
#
|
|
1061
|
+
# The path indices (e.g., sec[3]) count siblings at the same level.
|
|
1062
|
+
# Simply counting all <sec> tags would incorrectly count descendant
|
|
1063
|
+
# elements (e.g., a <sec> nested inside another <sec>).
|
|
1064
|
+
#
|
|
1065
|
+
# This method tracks XML depth: it skips <element> tags inside child
|
|
1066
|
+
# elements (depth > 1) and only counts at depth == 1.
|
|
1067
|
+
def find_nth_element_in_range(text, element_name, target_index,
|
|
1068
|
+
range_start, range_end)
|
|
1069
|
+
offset = range_start
|
|
1070
|
+
current_index = 0
|
|
1071
|
+
depth = 0
|
|
1072
|
+
open_pattern = /<#{Regexp.escape(element_name)}[\s>]/
|
|
1073
|
+
close_pattern = /<\/#{Regexp.escape(element_name)}\s*>/
|
|
1074
|
+
|
|
1075
|
+
loop do
|
|
1076
|
+
# Find next opening tag at any depth
|
|
1077
|
+
open_pos = text.index(open_pattern, offset)
|
|
1078
|
+
open_pos = nil if open_pos && open_pos >= range_end
|
|
1079
|
+
|
|
1080
|
+
# Find next closing tag at any depth
|
|
1081
|
+
close_pos = text.index(close_pattern, offset)
|
|
1082
|
+
close_pos = nil if close_pos && close_pos >= range_end
|
|
1083
|
+
|
|
1084
|
+
# Both exhausted or past range end
|
|
1085
|
+
break if open_pos.nil? && close_pos.nil?
|
|
1086
|
+
|
|
1087
|
+
if open_pos && (close_pos.nil? || open_pos <= close_pos)
|
|
1088
|
+
tag_end = text.index(">", open_pos)
|
|
1089
|
+
break unless tag_end
|
|
1090
|
+
|
|
1091
|
+
if depth.zero?
|
|
1092
|
+
return open_pos if current_index == target_index
|
|
1093
|
+
|
|
1094
|
+
current_index += 1
|
|
1095
|
+
end
|
|
1096
|
+
|
|
1097
|
+
# Check if self-closing
|
|
1098
|
+
tag_text = text[open_pos..tag_end]
|
|
1099
|
+
unless tag_text.include?("/>")
|
|
1100
|
+
depth += 1
|
|
1101
|
+
end
|
|
1102
|
+
offset = tag_end + 1
|
|
1103
|
+
else
|
|
1104
|
+
# Closing tag
|
|
1105
|
+
close_tag_end = close_pos + 2 # "</x>".length = 2 min chars for ">"
|
|
1106
|
+
# Find actual > of closing tag
|
|
1107
|
+
actual_close = text.index(">", close_pos)
|
|
1108
|
+
close_tag_end = actual_close + 1 if actual_close
|
|
1109
|
+
depth -= 1 if depth.positive?
|
|
1110
|
+
offset = close_tag_end
|
|
1111
|
+
end
|
|
1112
|
+
end
|
|
1113
|
+
|
|
1114
|
+
nil
|
|
1115
|
+
end
|
|
1116
|
+
|
|
1117
|
+
# Locate text using the parsed node tree when path-based lookup fails.
|
|
1118
|
+
#
|
|
1119
|
+
# This is the most robust fallback: it walks up the node's ancestor chain
|
|
1120
|
+
# looking for an element with a unique "id" attribute, then searches for
|
|
1121
|
+
# that element in the text. Once found, it locates the target text within
|
|
1122
|
+
# the element's content area.
|
|
1123
|
+
#
|
|
1124
|
+
# @param node [Canon::Xml::Node] the parsed node (TextNode or ElementNode)
|
|
1125
|
+
# @param value [String] the text value to locate (e.g., "a")
|
|
1126
|
+
# @param text [String] the full source text
|
|
1127
|
+
# @param line_map [Array<Hash>] pre-built line offset map
|
|
1128
|
+
# @param side [Symbol] :old or :new (which text to search)
|
|
1129
|
+
# @return [Hash, nil] location hash {char_offset, line_number, col} or nil
|
|
1130
|
+
def locate_via_node_tree(node, value, text, line_map, _side)
|
|
1131
|
+
return nil unless node
|
|
1132
|
+
|
|
1133
|
+
# Walk up ancestors to find one with an "id" attribute
|
|
1134
|
+
ancestors = []
|
|
1135
|
+
current = node
|
|
1136
|
+
while current.respond_to?(:parent)
|
|
1137
|
+
ancestors << current if current.respond_to?(:name)
|
|
1138
|
+
current = current.parent
|
|
1139
|
+
end
|
|
1140
|
+
|
|
1141
|
+
# Find the nearest ancestor with an "id" attribute
|
|
1142
|
+
anchor = nil
|
|
1143
|
+
anchor_name = nil
|
|
1144
|
+
anchor_id = nil
|
|
1145
|
+
ancestors.each do |anc|
|
|
1146
|
+
next unless anc.respond_to?(:attribute_nodes) && anc.attribute_nodes
|
|
1147
|
+
|
|
1148
|
+
anc.attribute_nodes.each do |attr|
|
|
1149
|
+
next unless attr.respond_to?(:name) && attr.name == "id"
|
|
1150
|
+
|
|
1151
|
+
anchor = anc
|
|
1152
|
+
anchor_name = anc.name
|
|
1153
|
+
anchor_id = attr.respond_to?(:value) ? attr.value : nil
|
|
1154
|
+
break
|
|
1155
|
+
end
|
|
1156
|
+
break if anchor
|
|
1157
|
+
end
|
|
1158
|
+
|
|
1159
|
+
return nil unless anchor && anchor_id
|
|
1160
|
+
|
|
1161
|
+
# Find the anchor element in the text: <anchor_name ... id="anchor_id" ...>
|
|
1162
|
+
anchor_pattern = /<#{Regexp.escape(anchor_name)}\b[^>]*\bid="#{Regexp.escape(anchor_id)}"/
|
|
1163
|
+
anchor_pos = text.index(anchor_pattern)
|
|
1164
|
+
return nil unless anchor_pos
|
|
1165
|
+
|
|
1166
|
+
# Find the end of the opening tag
|
|
1167
|
+
anchor_tag_end = text.index(">", anchor_pos)
|
|
1168
|
+
return nil unless anchor_tag_end
|
|
1169
|
+
|
|
1170
|
+
# Find the closing tag for the anchor
|
|
1171
|
+
close_tag = "</#{anchor_name}>"
|
|
1172
|
+
anchor_close = text.index(close_tag, anchor_tag_end + 1)
|
|
1173
|
+
return nil unless anchor_close
|
|
1174
|
+
|
|
1175
|
+
# Search for the value within the anchor's content
|
|
1176
|
+
# But first, walk down from anchor to find the specific leaf element
|
|
1177
|
+
# Build a regex for each ancestor level between anchor and node
|
|
1178
|
+
leaf_element = ancestors.first # closest ancestor with a name (the parent of the text node)
|
|
1179
|
+
|
|
1180
|
+
# Find the leaf element's opening tag within the anchor's content
|
|
1181
|
+
if leaf_element && leaf_element != anchor
|
|
1182
|
+
leaf_name = leaf_element.name
|
|
1183
|
+
leaf_attrs = element_attribute_signature(leaf_element)
|
|
1184
|
+
|
|
1185
|
+
# Search for the leaf element within anchor range
|
|
1186
|
+
leaf_pattern = /<#{Regexp.escape(leaf_name)}\b/
|
|
1187
|
+
leaf_pos = nil
|
|
1188
|
+
offset = anchor_tag_end + 1
|
|
1189
|
+
while (pos = text.index(leaf_pattern, offset))
|
|
1190
|
+
break if pos >= anchor_close
|
|
1191
|
+
|
|
1192
|
+
# Check if this element matches the attribute signature
|
|
1193
|
+
tag_end_pos = text.index(">", pos)
|
|
1194
|
+
break unless tag_end_pos && tag_end_pos < anchor_close
|
|
1195
|
+
|
|
1196
|
+
tag_text = text[pos..tag_end_pos]
|
|
1197
|
+
if leaf_attrs.empty? || leaf_attrs.all? do |k, v|
|
|
1198
|
+
tag_text.include?("#{k}=\"#{v}\"")
|
|
1199
|
+
end
|
|
1200
|
+
leaf_pos = pos
|
|
1201
|
+
break
|
|
1202
|
+
end
|
|
1203
|
+
offset = pos + 1
|
|
1204
|
+
end
|
|
1205
|
+
|
|
1206
|
+
if leaf_pos
|
|
1207
|
+
# Found the leaf element - find the value within it
|
|
1208
|
+
leaf_tag_end = text.index(">", leaf_pos)
|
|
1209
|
+
leaf_close = text.index("</#{leaf_name}>", leaf_tag_end + 1)
|
|
1210
|
+
|
|
1211
|
+
# Check if leaf is self-closing: if so, the value cannot be inside it
|
|
1212
|
+
# in this document (it was moved or removed). Return nil so the caller
|
|
1213
|
+
# can fall back to searching in the other document.
|
|
1214
|
+
if text[leaf_pos..leaf_tag_end].include?("/>")
|
|
1215
|
+
return nil # Self-closing element - value not present in this doc
|
|
1216
|
+
end
|
|
1217
|
+
|
|
1218
|
+
if leaf_close && leaf_close < anchor_close
|
|
1219
|
+
# Search for value inside leaf element
|
|
1220
|
+
value_pos = text.index(value, leaf_tag_end + 1)
|
|
1221
|
+
if value_pos && value_pos < leaf_close
|
|
1222
|
+
line_idx = SourceLocator.send(:find_line_for_offset, value_pos,
|
|
1223
|
+
line_map)
|
|
1224
|
+
return nil unless line_idx
|
|
1225
|
+
|
|
1226
|
+
col = value_pos - line_map[line_idx][:start_offset]
|
|
1227
|
+
return { char_offset: value_pos, line_number: line_idx,
|
|
1228
|
+
col: col }
|
|
1229
|
+
end
|
|
1230
|
+
end
|
|
1231
|
+
end
|
|
1232
|
+
end
|
|
1233
|
+
|
|
1234
|
+
# Direct search: value might be directly in the anchor's content
|
|
1235
|
+
value_pos = text.index(value, anchor_tag_end + 1)
|
|
1236
|
+
if value_pos && value_pos < anchor_close
|
|
1237
|
+
line_idx = SourceLocator.send(:find_line_for_offset, value_pos,
|
|
1238
|
+
line_map)
|
|
1239
|
+
return nil unless line_idx
|
|
1240
|
+
|
|
1241
|
+
col = value_pos - line_map[line_idx][:start_offset]
|
|
1242
|
+
return { char_offset: value_pos, line_number: line_idx, col: col }
|
|
1243
|
+
end
|
|
1244
|
+
|
|
1245
|
+
nil
|
|
1246
|
+
end
|
|
1247
|
+
|
|
1248
|
+
# Locate text using a TextNode's parent element as anchor.
|
|
1249
|
+
# Uses the parent element's tag name and attributes to find a unique anchor,
|
|
1250
|
+
# then searches within that element for the text value.
|
|
1251
|
+
#
|
|
1252
|
+
# @param textnode [Canon::Xml::Nodes::TextNode] the TextNode whose parent to use
|
|
1253
|
+
# @param value [String] the text value to find
|
|
1254
|
+
# @param text [String] the source text to search in
|
|
1255
|
+
# @param line_map [Array<Hash>] pre-built line offset map
|
|
1256
|
+
# @return [Hash, nil] location hash with :char_offset, :line_number, :col or nil
|
|
1257
|
+
def locate_textnode_parent(textnode, value, text, line_map)
|
|
1258
|
+
return nil unless textnode.respond_to?(:parent) && textnode.parent
|
|
1259
|
+
|
|
1260
|
+
parent = textnode.parent
|
|
1261
|
+
return nil unless parent.respond_to?(:name) && parent.name
|
|
1262
|
+
|
|
1263
|
+
parent_name = parent.name
|
|
1264
|
+
parent_attrs = element_attribute_signature(parent)
|
|
1265
|
+
|
|
1266
|
+
# Find all occurrences of the parent element
|
|
1267
|
+
anchor_pattern = /<#{Regexp.escape(parent_name)}\b/
|
|
1268
|
+
offset = 0
|
|
1269
|
+
|
|
1270
|
+
while (anchor_pos = text.index(anchor_pattern, offset))
|
|
1271
|
+
tag_end = text.index(">", anchor_pos)
|
|
1272
|
+
break unless tag_end
|
|
1273
|
+
|
|
1274
|
+
# Check if attributes match
|
|
1275
|
+
tag_text = text[anchor_pos..tag_end]
|
|
1276
|
+
attrs_match = parent_attrs.empty? || parent_attrs.all? do |k, v|
|
|
1277
|
+
tag_text.include?("#{k}=\"#{v}\"")
|
|
1278
|
+
end
|
|
1279
|
+
|
|
1280
|
+
if attrs_match
|
|
1281
|
+
# Found matching parent element - search for value inside it
|
|
1282
|
+
anchor_tag_end = tag_end
|
|
1283
|
+
anchor_close = text.index("</#{parent_name}>", anchor_tag_end + 1)
|
|
1284
|
+
return nil unless anchor_close
|
|
1285
|
+
|
|
1286
|
+
# Search for value within this element
|
|
1287
|
+
value_pos = text.index(value, anchor_tag_end + 1)
|
|
1288
|
+
if value_pos && value_pos < anchor_close
|
|
1289
|
+
line_idx = SourceLocator.send(:find_line_for_offset, value_pos,
|
|
1290
|
+
line_map)
|
|
1291
|
+
return nil unless line_idx
|
|
1292
|
+
|
|
1293
|
+
col = value_pos - line_map[line_idx][:start_offset]
|
|
1294
|
+
return { char_offset: value_pos, line_number: line_idx, col: col }
|
|
1295
|
+
end
|
|
1296
|
+
end
|
|
1297
|
+
|
|
1298
|
+
offset = anchor_pos + 1
|
|
1299
|
+
end
|
|
1300
|
+
|
|
1301
|
+
nil
|
|
1302
|
+
end
|
|
1303
|
+
|
|
1304
|
+
# Locate the same element (parent of a TextNode) in text2, even if empty.
|
|
1305
|
+
# Uses the parent element's tag name and attributes to find a matching element.
|
|
1306
|
+
# Returns the element's position (for creating zero-length new_ranges).
|
|
1307
|
+
#
|
|
1308
|
+
# @param textnode [Canon::Xml::Nodes::TextNode] the TextNode whose parent to find
|
|
1309
|
+
# @param text [String] the source text (should be text2)
|
|
1310
|
+
# @param line_map [Array<Hash>] pre-built line offset map
|
|
1311
|
+
# @return [Hash, nil] location hash with :char_offset, :line_number, :col or nil
|
|
1312
|
+
def locate_element_in_text2(textnode, text, line_map)
|
|
1313
|
+
return nil unless textnode.respond_to?(:parent) && textnode.parent
|
|
1314
|
+
|
|
1315
|
+
parent = textnode.parent
|
|
1316
|
+
return nil unless parent.respond_to?(:name) && parent.name
|
|
1317
|
+
|
|
1318
|
+
parent_name = parent.name
|
|
1319
|
+
parent_attrs = element_attribute_signature(parent)
|
|
1320
|
+
|
|
1321
|
+
# Find all occurrences of the parent element
|
|
1322
|
+
anchor_pattern = /<#{Regexp.escape(parent_name)}\b/
|
|
1323
|
+
offset = 0
|
|
1324
|
+
|
|
1325
|
+
while (anchor_pos = text.index(anchor_pattern, offset))
|
|
1326
|
+
tag_end = text.index(">", anchor_pos)
|
|
1327
|
+
break unless tag_end
|
|
1328
|
+
|
|
1329
|
+
# Check if attributes match
|
|
1330
|
+
tag_text = text[anchor_pos..tag_end]
|
|
1331
|
+
attrs_match = parent_attrs.empty? || parent_attrs.all? do |k, v|
|
|
1332
|
+
tag_text.include?("#{k}=\"#{v}\"")
|
|
1333
|
+
end
|
|
1334
|
+
|
|
1335
|
+
if attrs_match
|
|
1336
|
+
# Found matching element - return its START position
|
|
1337
|
+
# For self-closing elements, return the position of <
|
|
1338
|
+
# For regular elements, return the position of >
|
|
1339
|
+
is_self_closing = tag_text.include?("/>")
|
|
1340
|
+
|
|
1341
|
+
if is_self_closing
|
|
1342
|
+
# Self-closing element - return position of <
|
|
1343
|
+
line_idx = SourceLocator.send(:find_line_for_offset, anchor_pos,
|
|
1344
|
+
line_map)
|
|
1345
|
+
return nil unless line_idx
|
|
1346
|
+
|
|
1347
|
+
col = anchor_pos - line_map[line_idx][:start_offset]
|
|
1348
|
+
return { char_offset: anchor_pos, line_number: line_idx,
|
|
1349
|
+
col: col }
|
|
1350
|
+
else
|
|
1351
|
+
# Regular element - return position of >
|
|
1352
|
+
line_idx = SourceLocator.send(:find_line_for_offset, tag_end_pos,
|
|
1353
|
+
line_map)
|
|
1354
|
+
return nil unless line_idx
|
|
1355
|
+
|
|
1356
|
+
col = tag_end_pos - line_map[line_idx][:start_offset]
|
|
1357
|
+
return { char_offset: tag_end_pos, line_number: line_idx,
|
|
1358
|
+
col: col }
|
|
1359
|
+
end
|
|
1360
|
+
end
|
|
1361
|
+
|
|
1362
|
+
offset = anchor_pos + 1
|
|
1363
|
+
end
|
|
1364
|
+
|
|
1365
|
+
nil
|
|
1366
|
+
end
|
|
1367
|
+
|
|
1368
|
+
# Build a string representation of an element's attributes for matching.
|
|
1369
|
+
def element_attribute_signature(element)
|
|
1370
|
+
sig = {}
|
|
1371
|
+
if element.respond_to?(:attribute_nodes) && element.attribute_nodes
|
|
1372
|
+
element.attribute_nodes.each do |attr|
|
|
1373
|
+
next unless attr.respond_to?(:name) && attr.respond_to?(:value)
|
|
1374
|
+
|
|
1375
|
+
sig[attr.name] = attr.value
|
|
1376
|
+
end
|
|
1377
|
+
end
|
|
1378
|
+
sig
|
|
1379
|
+
end
|
|
1380
|
+
|
|
1381
|
+
# Fallback for short text location when tree-based methods fail.
|
|
1382
|
+
# Searches in the original text (text1) for the value and returns the first
|
|
1383
|
+
# occurrence. For `before.nil?` cases where the content exists in text1
|
|
1384
|
+
# but not at the tree-indicated position in text2.
|
|
1385
|
+
#
|
|
1386
|
+
# @param value [String] the text to find
|
|
1387
|
+
# @param path [String] the diff node path for element context
|
|
1388
|
+
# @param text [String] the source text (should be text1/original)
|
|
1389
|
+
# @param line_map [Array<Hash>] pre-built line offset map
|
|
1390
|
+
# @return [Hash, nil] location hash or nil
|
|
1391
|
+
def locate_short_text_in_original(value, _path, text, line_map)
|
|
1392
|
+
return nil unless value && !value.empty?
|
|
1393
|
+
|
|
1394
|
+
# For very short strings, just use SourceLocator.locate which finds
|
|
1395
|
+
# the first occurrence. This is a best-effort approach.
|
|
1396
|
+
loc = SourceLocator.locate(value, text, line_map)
|
|
1397
|
+
return loc if loc
|
|
1398
|
+
|
|
1399
|
+
nil
|
|
1400
|
+
end
|
|
1401
|
+
|
|
1402
|
+
# Count how many elements of a given name appear before a character position,
|
|
1403
|
+
# minus one (since the count includes the element we are inside).
|
|
1404
|
+
# Used to determine which element instance an occurrence belongs to.
|
|
1405
|
+
#
|
|
1406
|
+
# @param text [String] the source text
|
|
1407
|
+
# @param char_offset [Integer] character offset to check before
|
|
1408
|
+
# @param element_name [String] name of element to count
|
|
1409
|
+
# @return [Integer] element index (0-based) of the element containing the position
|
|
1410
|
+
def count_elements_before_position(text, char_offset, element_name)
|
|
1411
|
+
prefix = text[0...char_offset]
|
|
1412
|
+
count = prefix.scan(/<#{element_name}[>\s]/).length
|
|
1413
|
+
# Subtract 1 because the count includes the element we are inside
|
|
1414
|
+
[count - 1, 0].max
|
|
1415
|
+
end
|
|
1416
|
+
end
|
|
1417
|
+
end
|
|
1418
|
+
end
|