canon 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -1
- data/.rubocop_todo.yml +276 -7
- data/README.adoc +203 -138
- data/_config.yml +116 -0
- data/docs/ADVANCED_TOPICS.adoc +20 -0
- data/docs/BASIC_USAGE.adoc +16 -0
- data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
- data/docs/CLI.adoc +493 -0
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
- data/docs/DIFF_ARCHITECTURE.adoc +435 -0
- data/docs/DIFF_FORMATTING.adoc +540 -0
- data/docs/FORMATS.adoc +447 -0
- data/docs/INDEX.adoc +222 -0
- data/docs/INPUT_VALIDATION.adoc +477 -0
- data/docs/MATCH_ARCHITECTURE.adoc +463 -0
- data/docs/MATCH_OPTIONS.adoc +719 -0
- data/docs/MODES.adoc +432 -0
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
- data/docs/OPTIONS.adoc +1387 -0
- data/docs/PREPROCESSING.adoc +491 -0
- data/docs/RSPEC.adoc +605 -0
- data/docs/RUBY_API.adoc +478 -0
- data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
- data/docs/UNDERSTANDING_CANON.adoc +17 -0
- data/docs/VERBOSE.adoc +482 -0
- data/exe/canon +7 -0
- data/lib/canon/cli.rb +179 -0
- data/lib/canon/commands/diff_command.rb +195 -0
- data/lib/canon/commands/format_command.rb +113 -0
- data/lib/canon/comparison/base_comparator.rb +39 -0
- data/lib/canon/comparison/comparison_result.rb +79 -0
- data/lib/canon/comparison/html_comparator.rb +410 -0
- data/lib/canon/comparison/json_comparator.rb +212 -0
- data/lib/canon/comparison/match_options.rb +616 -0
- data/lib/canon/comparison/xml_comparator.rb +566 -0
- data/lib/canon/comparison/yaml_comparator.rb +93 -0
- data/lib/canon/comparison.rb +239 -0
- data/lib/canon/config.rb +172 -0
- data/lib/canon/diff/diff_block.rb +71 -0
- data/lib/canon/diff/diff_block_builder.rb +105 -0
- data/lib/canon/diff/diff_classifier.rb +46 -0
- data/lib/canon/diff/diff_context.rb +85 -0
- data/lib/canon/diff/diff_context_builder.rb +107 -0
- data/lib/canon/diff/diff_line.rb +77 -0
- data/lib/canon/diff/diff_node.rb +56 -0
- data/lib/canon/diff/diff_node_mapper.rb +148 -0
- data/lib/canon/diff/diff_report.rb +133 -0
- data/lib/canon/diff/diff_report_builder.rb +62 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
- data/lib/canon/diff_formatter/character_map.yml +197 -0
- data/lib/canon/diff_formatter/debug_output.rb +431 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
- data/lib/canon/diff_formatter/legend.rb +141 -0
- data/lib/canon/diff_formatter.rb +520 -0
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html4_formatter.rb +17 -0
- data/lib/canon/formatters/html5_formatter.rb +17 -0
- data/lib/canon/formatters/html_formatter.rb +37 -0
- data/lib/canon/formatters/html_formatter_base.rb +163 -0
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/xml_formatter.rb +20 -55
- data/lib/canon/formatters/yaml_formatter.rb +4 -1
- data/lib/canon/pretty_printer/html.rb +57 -0
- data/lib/canon/pretty_printer/json.rb +25 -0
- data/lib/canon/pretty_printer/xml.rb +29 -0
- data/lib/canon/rspec_matchers.rb +222 -80
- data/lib/canon/validators/base_validator.rb +49 -0
- data/lib/canon/validators/html_validator.rb +138 -0
- data/lib/canon/validators/json_validator.rb +89 -0
- data/lib/canon/validators/xml_validator.rb +53 -0
- data/lib/canon/validators/yaml_validator.rb +73 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/attribute_handler.rb +80 -0
- data/lib/canon/xml/c14n.rb +36 -0
- data/lib/canon/xml/character_encoder.rb +38 -0
- data/lib/canon/xml/data_model.rb +225 -0
- data/lib/canon/xml/element_matcher.rb +196 -0
- data/lib/canon/xml/line_range_mapper.rb +158 -0
- data/lib/canon/xml/namespace_handler.rb +86 -0
- data/lib/canon/xml/node.rb +32 -0
- data/lib/canon/xml/nodes/attribute_node.rb +54 -0
- data/lib/canon/xml/nodes/comment_node.rb +23 -0
- data/lib/canon/xml/nodes/element_node.rb +56 -0
- data/lib/canon/xml/nodes/namespace_node.rb +38 -0
- data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
- data/lib/canon/xml/nodes/root_node.rb +16 -0
- data/lib/canon/xml/nodes/text_node.rb +23 -0
- data/lib/canon/xml/processor.rb +151 -0
- data/lib/canon/xml/whitespace_normalizer.rb +72 -0
- data/lib/canon/xml/xml_base_handler.rb +188 -0
- data/lib/canon.rb +14 -3
- metadata +116 -21
|
@@ -0,0 +1,860 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "base_formatter"
|
|
4
|
+
require_relative "../legend"
|
|
5
|
+
require "set"
|
|
6
|
+
require "strscan"
|
|
7
|
+
|
|
8
|
+
module Canon
|
|
9
|
+
class DiffFormatter
|
|
10
|
+
module ByLine
|
|
11
|
+
# XML formatter with DOM-guided diffing
|
|
12
|
+
# Uses DOM parsing and element matching for intelligent XML diffs
|
|
13
|
+
class XmlFormatter < BaseFormatter
|
|
14
|
+
# Format DOM-guided XML diff
|
|
15
|
+
#
|
|
16
|
+
# @param doc1 [String] First XML document
|
|
17
|
+
# @param doc2 [String] Second XML document
|
|
18
|
+
# @return [String] Formatted diff
|
|
19
|
+
def format(doc1, doc2)
|
|
20
|
+
# If we have DiffNodes from comparison, check if there are normative diffs
|
|
21
|
+
# based on show_diffs setting
|
|
22
|
+
if @differences&.any?(Canon::Diff::DiffNode)
|
|
23
|
+
# Check if we should skip based on show_diffs setting
|
|
24
|
+
if should_skip_diff_display?
|
|
25
|
+
return ""
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Use new pipeline when DiffNodes available
|
|
29
|
+
return format_with_pipeline(doc1, doc2)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# LEGACY: Fall back to old behavior for backward compatibility
|
|
33
|
+
# This happens when @differences is nil (no comparison result provided)
|
|
34
|
+
format_legacy(doc1, doc2)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Format using new DiffReportBuilder pipeline
|
|
38
|
+
def format_with_pipeline(doc1, doc2)
|
|
39
|
+
# Check if we should show any diffs
|
|
40
|
+
if should_skip_diff_display?
|
|
41
|
+
return ""
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
require_relative "../../diff/diff_node_mapper"
|
|
45
|
+
require_relative "../../diff/diff_report_builder"
|
|
46
|
+
|
|
47
|
+
# Layer 2: Map DiffNodes to DiffLines
|
|
48
|
+
diff_lines = Canon::Diff::DiffNodeMapper.map(@differences, doc1, doc2)
|
|
49
|
+
|
|
50
|
+
# Layers 3-5: Build report through pipeline
|
|
51
|
+
report = Canon::Diff::DiffReportBuilder.build(
|
|
52
|
+
diff_lines,
|
|
53
|
+
show_diffs: @show_diffs,
|
|
54
|
+
context_lines: @context_lines,
|
|
55
|
+
grouping_lines: @diff_grouping_lines,
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
# Layer 6: Format the report
|
|
59
|
+
format_report(report, doc1, doc2)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Format a DiffReport for display
|
|
63
|
+
def format_report(report, doc1, doc2)
|
|
64
|
+
return "" if report.contexts.empty?
|
|
65
|
+
|
|
66
|
+
lines1 = doc1.split("\n")
|
|
67
|
+
lines2 = doc2.split("\n")
|
|
68
|
+
|
|
69
|
+
output = []
|
|
70
|
+
|
|
71
|
+
# Detect non-ASCII characters
|
|
72
|
+
all_text = (lines1 + lines2).join
|
|
73
|
+
non_ascii = Legend.detect_non_ascii(all_text, @visualization_map)
|
|
74
|
+
|
|
75
|
+
# Add Unicode legend if needed
|
|
76
|
+
unless non_ascii.empty?
|
|
77
|
+
output << Legend.build_legend(non_ascii, use_color: @use_color)
|
|
78
|
+
output << ""
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Format each context
|
|
82
|
+
report.contexts.each_with_index do |context, idx|
|
|
83
|
+
output << "" if idx.positive?
|
|
84
|
+
output << format_context_from_lines(context, lines1, lines2)
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
output.join("\n")
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Format a context using its DiffLines
|
|
91
|
+
def format_context_from_lines(context, lines1, _lines2)
|
|
92
|
+
output = []
|
|
93
|
+
|
|
94
|
+
context.lines.each do |diff_line|
|
|
95
|
+
case diff_line.type
|
|
96
|
+
when :unchanged
|
|
97
|
+
line_num = diff_line.line_number + 1
|
|
98
|
+
output << format_unified_line(line_num, line_num, " ",
|
|
99
|
+
diff_line.content)
|
|
100
|
+
when :removed
|
|
101
|
+
line_num = diff_line.line_number + 1
|
|
102
|
+
informative = diff_line.informative?
|
|
103
|
+
output << format_unified_line(line_num, nil, "-",
|
|
104
|
+
diff_line.content,
|
|
105
|
+
informative ? :cyan : :red,
|
|
106
|
+
informative: informative)
|
|
107
|
+
when :added
|
|
108
|
+
line_num = diff_line.line_number + 1
|
|
109
|
+
informative = diff_line.informative?
|
|
110
|
+
output << format_unified_line(nil, line_num, "+",
|
|
111
|
+
diff_line.content,
|
|
112
|
+
informative ? :cyan : :green,
|
|
113
|
+
informative: informative)
|
|
114
|
+
when :changed
|
|
115
|
+
line_num = diff_line.line_number + 1
|
|
116
|
+
informative = diff_line.informative?
|
|
117
|
+
# For changed lines, we need both old and new content
|
|
118
|
+
# For now, show as removed + added
|
|
119
|
+
old_content = lines1[diff_line.line_number]
|
|
120
|
+
new_content = diff_line.content
|
|
121
|
+
output << format_unified_line(line_num, nil, "-",
|
|
122
|
+
old_content,
|
|
123
|
+
informative ? :cyan : :red,
|
|
124
|
+
informative: informative)
|
|
125
|
+
output << format_unified_line(nil, line_num, "+",
|
|
126
|
+
new_content,
|
|
127
|
+
informative ? :cyan : :green,
|
|
128
|
+
informative: informative)
|
|
129
|
+
end
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
output.join("\n")
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
# Legacy format method (for backward compatibility)
|
|
136
|
+
def format_legacy(doc1, doc2)
|
|
137
|
+
# Check if we should show any diffs based on differences array
|
|
138
|
+
if should_skip_diff_display?
|
|
139
|
+
return ""
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
require_relative "../../xml/data_model"
|
|
143
|
+
require_relative "../../xml/element_matcher"
|
|
144
|
+
require_relative "../../xml/line_range_mapper"
|
|
145
|
+
|
|
146
|
+
output = []
|
|
147
|
+
|
|
148
|
+
begin
|
|
149
|
+
# Parse to DOM
|
|
150
|
+
root1 = Canon::Xml::DataModel.from_xml(doc1)
|
|
151
|
+
root2 = Canon::Xml::DataModel.from_xml(doc2)
|
|
152
|
+
|
|
153
|
+
# Match elements semantically
|
|
154
|
+
matcher = Canon::Xml::ElementMatcher.new
|
|
155
|
+
matches = matcher.match_trees(root1, root2)
|
|
156
|
+
|
|
157
|
+
# Build line range maps using ORIGINAL documents
|
|
158
|
+
mapper1 = Canon::Xml::LineRangeMapper.new(indent: 2)
|
|
159
|
+
mapper2 = Canon::Xml::LineRangeMapper.new(indent: 2)
|
|
160
|
+
map1 = mapper1.build_map(root1, doc1)
|
|
161
|
+
map2 = mapper2.build_map(root2, doc2)
|
|
162
|
+
|
|
163
|
+
# Use ORIGINAL document lines for display
|
|
164
|
+
lines1 = doc1.split("\n")
|
|
165
|
+
lines2 = doc2.split("\n")
|
|
166
|
+
|
|
167
|
+
# Display diffs based on element matches
|
|
168
|
+
output << format_element_matches(matches, map1, map2, lines1,
|
|
169
|
+
lines2)
|
|
170
|
+
rescue StandardError => e
|
|
171
|
+
# Fall back to simple diff on error
|
|
172
|
+
output << colorize("Warning: DOM parsing failed, using simple diff",
|
|
173
|
+
:yellow)
|
|
174
|
+
output << colorize("Error: #{e.class}: #{e.message}", :red)
|
|
175
|
+
|
|
176
|
+
# Include relevant backtrace lines
|
|
177
|
+
relevant_trace = e.backtrace.select do |line|
|
|
178
|
+
line.include?("canon")
|
|
179
|
+
end.take(3)
|
|
180
|
+
unless relevant_trace.empty?
|
|
181
|
+
output << colorize("Backtrace:", :yellow)
|
|
182
|
+
relevant_trace.each do |line|
|
|
183
|
+
output << colorize(" #{line}", :yellow)
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
output << ""
|
|
188
|
+
require_relative "simple_formatter"
|
|
189
|
+
simple = SimpleFormatter.new(
|
|
190
|
+
use_color: @use_color,
|
|
191
|
+
context_lines: @context_lines,
|
|
192
|
+
diff_grouping_lines: @diff_grouping_lines,
|
|
193
|
+
visualization_map: @visualization_map,
|
|
194
|
+
)
|
|
195
|
+
output << simple.format(doc1, doc2)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
output.join("\n")
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
private
|
|
202
|
+
|
|
203
|
+
# Check if diff display should be skipped
|
|
204
|
+
# Returns true when:
|
|
205
|
+
# 1. show_diffs is :normative AND there are no normative differences
|
|
206
|
+
# 2. show_diffs is :informative AND there are no informative differences
|
|
207
|
+
def should_skip_diff_display?
|
|
208
|
+
return false if @differences.nil? || @differences.empty?
|
|
209
|
+
|
|
210
|
+
case @show_diffs
|
|
211
|
+
when :normative
|
|
212
|
+
# Skip if no normative diffs
|
|
213
|
+
@differences.none? do |diff|
|
|
214
|
+
diff.is_a?(Canon::Diff::DiffNode) && diff.normative?
|
|
215
|
+
end
|
|
216
|
+
when :informative
|
|
217
|
+
# Skip if no informative diffs
|
|
218
|
+
@differences.none? do |diff|
|
|
219
|
+
diff.is_a?(Canon::Diff::DiffNode) && diff.informative?
|
|
220
|
+
end
|
|
221
|
+
else
|
|
222
|
+
# :all or other - never skip
|
|
223
|
+
false
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Format element matches for display
|
|
228
|
+
def format_element_matches(matches, map1, map2, lines1, lines2)
|
|
229
|
+
output = []
|
|
230
|
+
|
|
231
|
+
# Detect non-ASCII characters in the diff
|
|
232
|
+
all_text = (lines1 + lines2).join
|
|
233
|
+
non_ascii = Legend.detect_non_ascii(all_text, @visualization_map)
|
|
234
|
+
|
|
235
|
+
# Add Unicode legend if any non-ASCII characters detected
|
|
236
|
+
unless non_ascii.empty?
|
|
237
|
+
output << Legend.build_legend(non_ascii, use_color: @use_color)
|
|
238
|
+
output << ""
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
# Build a set of elements to skip (children of parents showing diffs)
|
|
242
|
+
elements_to_skip = build_skip_set(matches, map1, map2, lines1,
|
|
243
|
+
lines2)
|
|
244
|
+
|
|
245
|
+
# Build a set of children of matched parents
|
|
246
|
+
children_of_matched_parents = build_children_set(matches)
|
|
247
|
+
|
|
248
|
+
# Collect diff sections with metadata
|
|
249
|
+
diff_sections = collect_diff_sections(matches, map1, map2, lines1,
|
|
250
|
+
lines2, elements_to_skip,
|
|
251
|
+
children_of_matched_parents)
|
|
252
|
+
|
|
253
|
+
# Sort by line number
|
|
254
|
+
diff_sections.sort_by! do |section|
|
|
255
|
+
section[:start_line1] || section[:start_line2] || 0
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
# Group diffs by proximity if diff_grouping_lines is set
|
|
259
|
+
formatted_diffs = if @diff_grouping_lines
|
|
260
|
+
groups = group_diff_sections(diff_sections,
|
|
261
|
+
@diff_grouping_lines)
|
|
262
|
+
format_diff_groups(groups, lines1, lines2)
|
|
263
|
+
else
|
|
264
|
+
diff_sections.map do |s|
|
|
265
|
+
s[:formatted]
|
|
266
|
+
end.compact.join("\n\n")
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
output << formatted_diffs
|
|
270
|
+
output.join("\n")
|
|
271
|
+
end
|
|
272
|
+
|
|
273
|
+
# Build set of elements to skip (children with parents showing diffs)
|
|
274
|
+
def build_skip_set(matches, map1, map2, lines1, lines2)
|
|
275
|
+
elements_to_skip = Set.new
|
|
276
|
+
elements_with_diffs = Set.new
|
|
277
|
+
|
|
278
|
+
# Build set of element pairs that have semantic diffs
|
|
279
|
+
build_elements_with_semantic_diffs_set
|
|
280
|
+
|
|
281
|
+
# First pass: identify elements with line differences
|
|
282
|
+
# (semantic filtering happens in collect_diff_sections)
|
|
283
|
+
matches.each do |match|
|
|
284
|
+
next unless match.status == :matched
|
|
285
|
+
|
|
286
|
+
range1 = map1[match.elem1]
|
|
287
|
+
range2 = map2[match.elem2]
|
|
288
|
+
next unless range1 && range2
|
|
289
|
+
|
|
290
|
+
elem_lines1 = lines1[range1.start_line..range1.end_line]
|
|
291
|
+
elem_lines2 = lines2[range2.start_line..range2.end_line]
|
|
292
|
+
|
|
293
|
+
# Add if there are line diffs
|
|
294
|
+
# Semantic filtering is done in collect_diff_sections
|
|
295
|
+
if elem_lines1 != elem_lines2
|
|
296
|
+
elements_with_diffs.add(match.elem1)
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# Second pass: skip children of elements with diffs
|
|
301
|
+
elements_with_diffs.each do |elem|
|
|
302
|
+
if elem.respond_to?(:parent)
|
|
303
|
+
current = elem.parent
|
|
304
|
+
while current
|
|
305
|
+
if current.respond_to?(:name) && elements_with_diffs.include?(current)
|
|
306
|
+
elements_to_skip.add(elem)
|
|
307
|
+
break
|
|
308
|
+
end
|
|
309
|
+
current = current.respond_to?(:parent) ? current.parent : nil
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
elements_to_skip
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
# Check if an element or its children have semantic diffs
|
|
318
|
+
def has_semantic_diff_in_subtree?(element, elements_with_semantic_diffs)
|
|
319
|
+
# Check the element itself
|
|
320
|
+
return true if elements_with_semantic_diffs.include?(element)
|
|
321
|
+
|
|
322
|
+
# Check all descendants
|
|
323
|
+
if element.respond_to?(:children)
|
|
324
|
+
element.children.any? do |child|
|
|
325
|
+
has_semantic_diff_in_subtree?(child, elements_with_semantic_diffs)
|
|
326
|
+
end
|
|
327
|
+
else
|
|
328
|
+
false
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
# Build set of individual elements (not pairs) that have semantic diffs
|
|
333
|
+
def build_elements_with_semantic_diffs_set
|
|
334
|
+
elements = Set.new
|
|
335
|
+
|
|
336
|
+
return elements if @differences.nil? || @differences.empty?
|
|
337
|
+
|
|
338
|
+
@differences.each do |diff|
|
|
339
|
+
next unless diff.is_a?(Canon::Diff::DiffNode)
|
|
340
|
+
|
|
341
|
+
# Add both nodes if they exist
|
|
342
|
+
elements.add(diff.node1) if diff.node1
|
|
343
|
+
elements.add(diff.node2) if diff.node2
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
elements
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
# Build set of children of matched parents
|
|
350
|
+
def build_children_set(matches)
|
|
351
|
+
children = Set.new
|
|
352
|
+
|
|
353
|
+
matches.each do |match|
|
|
354
|
+
next unless match.status == :matched
|
|
355
|
+
|
|
356
|
+
[match.elem1, match.elem2].compact.each do |elem|
|
|
357
|
+
next unless elem.respond_to?(:children)
|
|
358
|
+
|
|
359
|
+
elem.children.each do |child|
|
|
360
|
+
children.add(child) if child.respond_to?(:name)
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
end
|
|
364
|
+
|
|
365
|
+
children
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
# Collect diff sections with metadata
|
|
369
|
+
def collect_diff_sections(matches, map1, map2, lines1, lines2,
|
|
370
|
+
elements_to_skip, children_of_matched_parents)
|
|
371
|
+
diff_sections = []
|
|
372
|
+
|
|
373
|
+
# If there are NO semantic diffs, don't show any matched elements
|
|
374
|
+
elements_with_semantic_diffs = build_elements_with_semantic_diffs_set
|
|
375
|
+
|
|
376
|
+
matches.each do |match|
|
|
377
|
+
case match.status
|
|
378
|
+
when :matched
|
|
379
|
+
next if elements_to_skip.include?(match.elem1)
|
|
380
|
+
|
|
381
|
+
# Only apply semantic filtering if we have DiffNode objects
|
|
382
|
+
# (when called standalone or without DiffNodes, show all diffs)
|
|
383
|
+
if !@differences.nil? && !@differences.empty? && @differences.any?(Canon::Diff::DiffNode)
|
|
384
|
+
# Skip if no semantic diffs exist (all diffs were normalized)
|
|
385
|
+
next if elements_with_semantic_diffs.empty?
|
|
386
|
+
|
|
387
|
+
# Skip if this element has no semantic diffs in its subtree
|
|
388
|
+
next unless has_semantic_diff_in_subtree?(match.elem1,
|
|
389
|
+
elements_with_semantic_diffs)
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
section = format_matched_element_with_metadata(match, map1,
|
|
393
|
+
map2, lines1,
|
|
394
|
+
lines2)
|
|
395
|
+
diff_sections << section if section
|
|
396
|
+
when :deleted
|
|
397
|
+
next if children_of_matched_parents.include?(match.elem1)
|
|
398
|
+
|
|
399
|
+
section = format_deleted_element_with_metadata(match, map1,
|
|
400
|
+
lines1)
|
|
401
|
+
diff_sections << section if section
|
|
402
|
+
when :inserted
|
|
403
|
+
next if children_of_matched_parents.include?(match.elem2)
|
|
404
|
+
|
|
405
|
+
section = format_inserted_element_with_metadata(match, map2,
|
|
406
|
+
lines2)
|
|
407
|
+
diff_sections << section if section
|
|
408
|
+
end
|
|
409
|
+
end
|
|
410
|
+
|
|
411
|
+
diff_sections
|
|
412
|
+
end
|
|
413
|
+
|
|
414
|
+
# Format matched element with metadata
|
|
415
|
+
def format_matched_element_with_metadata(match, map1, map2, lines1,
|
|
416
|
+
lines2)
|
|
417
|
+
range1 = map1[match.elem1]
|
|
418
|
+
range2 = map2[match.elem2]
|
|
419
|
+
return nil unless range1 && range2
|
|
420
|
+
|
|
421
|
+
formatted = format_matched_element(match, map1, map2, lines1,
|
|
422
|
+
lines2)
|
|
423
|
+
return nil unless formatted
|
|
424
|
+
|
|
425
|
+
{
|
|
426
|
+
formatted: formatted,
|
|
427
|
+
start_line1: range1.start_line,
|
|
428
|
+
end_line1: range1.end_line,
|
|
429
|
+
start_line2: range2.start_line,
|
|
430
|
+
end_line2: range2.end_line,
|
|
431
|
+
path: match.path.join("/"),
|
|
432
|
+
}
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
# Format deleted element with metadata
|
|
436
|
+
def format_deleted_element_with_metadata(match, map1, lines1)
|
|
437
|
+
range1 = map1[match.elem1]
|
|
438
|
+
return nil unless range1
|
|
439
|
+
|
|
440
|
+
formatted = format_deleted_element(match, map1, lines1)
|
|
441
|
+
return nil unless formatted
|
|
442
|
+
|
|
443
|
+
{
|
|
444
|
+
formatted: formatted,
|
|
445
|
+
start_line1: range1.start_line,
|
|
446
|
+
end_line1: range1.end_line,
|
|
447
|
+
start_line2: nil,
|
|
448
|
+
end_line2: nil,
|
|
449
|
+
path: match.path.join("/"),
|
|
450
|
+
}
|
|
451
|
+
end
|
|
452
|
+
|
|
453
|
+
# Format inserted element with metadata
|
|
454
|
+
def format_inserted_element_with_metadata(match, map2, lines2)
|
|
455
|
+
range2 = map2[match.elem2]
|
|
456
|
+
return nil unless range2
|
|
457
|
+
|
|
458
|
+
formatted = format_inserted_element(match, map2, lines2)
|
|
459
|
+
return nil unless formatted
|
|
460
|
+
|
|
461
|
+
{
|
|
462
|
+
formatted: formatted,
|
|
463
|
+
start_line1: nil,
|
|
464
|
+
end_line1: nil,
|
|
465
|
+
start_line2: range2.start_line,
|
|
466
|
+
end_line2: range2.end_line,
|
|
467
|
+
path: match.path.join("/"),
|
|
468
|
+
}
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
# Format a matched element showing differences
|
|
472
|
+
def format_matched_element(match, map1, map2, lines1, lines2)
|
|
473
|
+
range1 = map1[match.elem1]
|
|
474
|
+
range2 = map2[match.elem2]
|
|
475
|
+
return nil unless range1 && range2
|
|
476
|
+
|
|
477
|
+
# Extract line ranges
|
|
478
|
+
elem_lines1 = lines1[range1.start_line..range1.end_line]
|
|
479
|
+
elem_lines2 = lines2[range2.start_line..range2.end_line]
|
|
480
|
+
|
|
481
|
+
# Skip if identical
|
|
482
|
+
return nil if elem_lines1 == elem_lines2
|
|
483
|
+
|
|
484
|
+
# Run line diff
|
|
485
|
+
diffs = ::Diff::LCS.sdiff(elem_lines1, elem_lines2)
|
|
486
|
+
|
|
487
|
+
# Identify diff blocks
|
|
488
|
+
diff_blocks = identify_diff_blocks(diffs)
|
|
489
|
+
return nil if diff_blocks.empty?
|
|
490
|
+
|
|
491
|
+
# Group into contexts
|
|
492
|
+
contexts = group_diff_blocks_into_contexts(diff_blocks,
|
|
493
|
+
@diff_grouping_lines || 0)
|
|
494
|
+
|
|
495
|
+
# Expand with context lines
|
|
496
|
+
expanded_contexts = expand_contexts_with_context_lines(contexts,
|
|
497
|
+
@context_lines,
|
|
498
|
+
diffs.length)
|
|
499
|
+
|
|
500
|
+
# Format contexts
|
|
501
|
+
output = []
|
|
502
|
+
expanded_contexts.each_with_index do |context, idx|
|
|
503
|
+
output << "" if idx.positive?
|
|
504
|
+
output << format_context(context, diffs, range1.start_line,
|
|
505
|
+
range2.start_line)
|
|
506
|
+
end
|
|
507
|
+
|
|
508
|
+
output.join("\n")
|
|
509
|
+
end
|
|
510
|
+
|
|
511
|
+
# Format a deleted element
|
|
512
|
+
def format_deleted_element(match, map1, lines1)
|
|
513
|
+
range1 = map1[match.elem1]
|
|
514
|
+
return nil unless range1
|
|
515
|
+
|
|
516
|
+
output = []
|
|
517
|
+
path_str = match.path.join("/")
|
|
518
|
+
output << colorize("Element: #{path_str} [DELETED]", :red, :bold)
|
|
519
|
+
|
|
520
|
+
# Show all lines as deleted
|
|
521
|
+
(range1.start_line..range1.end_line).each do |i|
|
|
522
|
+
output << format_unified_line(i + 1, nil, "-", lines1[i], :red)
|
|
523
|
+
end
|
|
524
|
+
|
|
525
|
+
output.join("\n")
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
# Format an inserted element
|
|
529
|
+
def format_inserted_element(match, map2, lines2)
|
|
530
|
+
range2 = map2[match.elem2]
|
|
531
|
+
return nil unless range2
|
|
532
|
+
|
|
533
|
+
output = []
|
|
534
|
+
path_str = match.path.join("/")
|
|
535
|
+
output << colorize("Element: #{path_str} [INSERTED]", :green, :bold)
|
|
536
|
+
|
|
537
|
+
# Show all lines as inserted
|
|
538
|
+
(range2.start_line..range2.end_line).each do |i|
|
|
539
|
+
output << format_unified_line(nil, i + 1, "+", lines2[i], :green)
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
output.join("\n")
|
|
543
|
+
end
|
|
544
|
+
|
|
545
|
+
# Identify contiguous diff blocks
|
|
546
|
+
def identify_diff_blocks(diffs)
|
|
547
|
+
require_relative "../../diff/diff_block"
|
|
548
|
+
|
|
549
|
+
blocks = []
|
|
550
|
+
current_start = nil
|
|
551
|
+
current_types = []
|
|
552
|
+
|
|
553
|
+
diffs.each_with_index do |change, idx|
|
|
554
|
+
if change.action != "="
|
|
555
|
+
if current_start.nil?
|
|
556
|
+
current_start = idx
|
|
557
|
+
current_types = [change.action]
|
|
558
|
+
else
|
|
559
|
+
current_types << change.action unless current_types.include?(change.action)
|
|
560
|
+
end
|
|
561
|
+
elsif current_start
|
|
562
|
+
blocks << Canon::Diff::DiffBlock.new(
|
|
563
|
+
start_idx: current_start,
|
|
564
|
+
end_idx: idx - 1,
|
|
565
|
+
types: current_types,
|
|
566
|
+
)
|
|
567
|
+
current_start = nil
|
|
568
|
+
current_types = []
|
|
569
|
+
end
|
|
570
|
+
end
|
|
571
|
+
|
|
572
|
+
# Don't forget the last block
|
|
573
|
+
if current_start
|
|
574
|
+
blocks << Canon::Diff::DiffBlock.new(
|
|
575
|
+
start_idx: current_start,
|
|
576
|
+
end_idx: diffs.length - 1,
|
|
577
|
+
types: current_types,
|
|
578
|
+
)
|
|
579
|
+
end
|
|
580
|
+
|
|
581
|
+
blocks
|
|
582
|
+
end
|
|
583
|
+
|
|
584
|
+
# Group diff blocks into contexts
|
|
585
|
+
def group_diff_blocks_into_contexts(blocks, grouping_lines)
|
|
586
|
+
return [] if blocks.empty?
|
|
587
|
+
|
|
588
|
+
contexts = []
|
|
589
|
+
current_context = [blocks[0]]
|
|
590
|
+
|
|
591
|
+
blocks[1..].each do |block|
|
|
592
|
+
last_block = current_context.last
|
|
593
|
+
gap = block.start_idx - last_block.end_idx - 1
|
|
594
|
+
|
|
595
|
+
if gap <= grouping_lines
|
|
596
|
+
current_context << block
|
|
597
|
+
else
|
|
598
|
+
contexts << current_context
|
|
599
|
+
current_context = [block]
|
|
600
|
+
end
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
contexts << current_context unless current_context.empty?
|
|
604
|
+
contexts
|
|
605
|
+
end
|
|
606
|
+
|
|
607
|
+
# Expand contexts with context lines
|
|
608
|
+
def expand_contexts_with_context_lines(contexts, context_lines,
|
|
609
|
+
total_lines)
|
|
610
|
+
require_relative "../../diff/diff_context"
|
|
611
|
+
|
|
612
|
+
contexts.map do |context|
|
|
613
|
+
first_block = context.first
|
|
614
|
+
last_block = context.last
|
|
615
|
+
|
|
616
|
+
start_idx = [first_block.start_idx - context_lines, 0].max
|
|
617
|
+
end_idx = [last_block.end_idx + context_lines, total_lines - 1].min
|
|
618
|
+
|
|
619
|
+
Canon::Diff::DiffContext.new(
|
|
620
|
+
start_idx: start_idx,
|
|
621
|
+
end_idx: end_idx,
|
|
622
|
+
blocks: context,
|
|
623
|
+
)
|
|
624
|
+
end
|
|
625
|
+
end
|
|
626
|
+
|
|
627
|
+
# Format a context
|
|
628
|
+
def format_context(context, diffs, base_line1, base_line2)
|
|
629
|
+
output = []
|
|
630
|
+
|
|
631
|
+
(context.start_idx..context.end_idx).each do |idx|
|
|
632
|
+
change = diffs[idx]
|
|
633
|
+
|
|
634
|
+
line1 = change.old_position ? base_line1 + change.old_position + 1 : nil
|
|
635
|
+
line2 = change.new_position ? base_line2 + change.new_position + 1 : nil
|
|
636
|
+
|
|
637
|
+
case change.action
|
|
638
|
+
when "="
|
|
639
|
+
output << format_unified_line(line1, line2, " ",
|
|
640
|
+
change.old_element)
|
|
641
|
+
when "-"
|
|
642
|
+
output << format_unified_line(line1, nil, "-",
|
|
643
|
+
change.old_element, :red)
|
|
644
|
+
when "+"
|
|
645
|
+
output << format_unified_line(nil, line2, "+",
|
|
646
|
+
change.new_element, :green)
|
|
647
|
+
when "!"
|
|
648
|
+
# Token-level highlighting
|
|
649
|
+
old_tokens = tokenize_xml(change.old_element)
|
|
650
|
+
new_tokens = tokenize_xml(change.new_element)
|
|
651
|
+
token_diffs = ::Diff::LCS.sdiff(old_tokens, new_tokens)
|
|
652
|
+
|
|
653
|
+
old_highlighted = build_token_highlighted_text(token_diffs, :old)
|
|
654
|
+
new_highlighted = build_token_highlighted_text(token_diffs, :new)
|
|
655
|
+
|
|
656
|
+
output << format_token_diff_line(line1, line2, old_highlighted,
|
|
657
|
+
new_highlighted)
|
|
658
|
+
end
|
|
659
|
+
end
|
|
660
|
+
|
|
661
|
+
output.join("\n")
|
|
662
|
+
end
|
|
663
|
+
|
|
664
|
+
# Group diff sections by proximity
|
|
665
|
+
def group_diff_sections(sections, grouping_lines)
|
|
666
|
+
return [] if sections.empty?
|
|
667
|
+
|
|
668
|
+
groups = []
|
|
669
|
+
current_group = [sections[0]]
|
|
670
|
+
|
|
671
|
+
sections[1..].each do |section|
|
|
672
|
+
last_section = current_group.last
|
|
673
|
+
|
|
674
|
+
# Calculate gap
|
|
675
|
+
gap1 = if last_section[:end_line1] && section[:start_line1]
|
|
676
|
+
section[:start_line1] - last_section[:end_line1] - 1
|
|
677
|
+
else
|
|
678
|
+
Float::INFINITY
|
|
679
|
+
end
|
|
680
|
+
|
|
681
|
+
gap2 = if last_section[:end_line2] && section[:start_line2]
|
|
682
|
+
section[:start_line2] - last_section[:end_line2] - 1
|
|
683
|
+
else
|
|
684
|
+
Float::INFINITY
|
|
685
|
+
end
|
|
686
|
+
|
|
687
|
+
max_gap = [gap1, gap2].max
|
|
688
|
+
|
|
689
|
+
if max_gap <= grouping_lines
|
|
690
|
+
current_group << section
|
|
691
|
+
else
|
|
692
|
+
groups << current_group
|
|
693
|
+
current_group = [section]
|
|
694
|
+
end
|
|
695
|
+
end
|
|
696
|
+
|
|
697
|
+
groups << current_group unless current_group.empty?
|
|
698
|
+
groups
|
|
699
|
+
end
|
|
700
|
+
|
|
701
|
+
# Format groups of diffs
|
|
702
|
+
def format_diff_groups(groups, _lines1, _lines2)
|
|
703
|
+
output = []
|
|
704
|
+
|
|
705
|
+
groups.each_with_index do |group, group_idx|
|
|
706
|
+
output << "" if group_idx.positive?
|
|
707
|
+
|
|
708
|
+
if group.length > 1
|
|
709
|
+
output << colorize("Context block has #{group.length} diffs",
|
|
710
|
+
:yellow, :bold)
|
|
711
|
+
output << ""
|
|
712
|
+
group.each do |section|
|
|
713
|
+
output << section[:formatted] if section[:formatted]
|
|
714
|
+
end
|
|
715
|
+
elsif group[0][:formatted]
|
|
716
|
+
output << group[0][:formatted]
|
|
717
|
+
end
|
|
718
|
+
end
|
|
719
|
+
|
|
720
|
+
output.join("\n")
|
|
721
|
+
end
|
|
722
|
+
|
|
723
|
+
# Format a unified diff line
|
|
724
|
+
def format_unified_line(old_num, new_num, marker, content, color = nil,
|
|
725
|
+
informative: false)
|
|
726
|
+
old_str = old_num ? "%4d" % old_num : " "
|
|
727
|
+
new_str = new_num ? "%4d" % new_num : " "
|
|
728
|
+
marker_part = "#{marker} "
|
|
729
|
+
|
|
730
|
+
visualized_content = if color
|
|
731
|
+
apply_visualization(content,
|
|
732
|
+
color)
|
|
733
|
+
else
|
|
734
|
+
content
|
|
735
|
+
end
|
|
736
|
+
|
|
737
|
+
if @use_color
|
|
738
|
+
yellow_old = colorize(old_str, :yellow)
|
|
739
|
+
yellow_pipe1 = colorize("|", :yellow)
|
|
740
|
+
yellow_new = colorize(new_str, :yellow)
|
|
741
|
+
yellow_pipe2 = colorize("|", :yellow)
|
|
742
|
+
|
|
743
|
+
if color
|
|
744
|
+
colored_marker = colorize(marker, color)
|
|
745
|
+
"#{yellow_old}#{yellow_pipe1}#{yellow_new}#{colored_marker} #{yellow_pipe2} #{visualized_content}"
|
|
746
|
+
else
|
|
747
|
+
"#{yellow_old}#{yellow_pipe1}#{yellow_new}#{marker} #{yellow_pipe2} #{visualized_content}"
|
|
748
|
+
end
|
|
749
|
+
else
|
|
750
|
+
"#{old_str}|#{new_str}#{marker_part}| #{visualized_content}"
|
|
751
|
+
end
|
|
752
|
+
end
|
|
753
|
+
|
|
754
|
+
# Format token diff lines
|
|
755
|
+
def format_token_diff_line(old_line, new_line, old_highlighted,
|
|
756
|
+
new_highlighted)
|
|
757
|
+
output = []
|
|
758
|
+
|
|
759
|
+
if @use_color
|
|
760
|
+
yellow_old = colorize("%4d" % old_line, :yellow)
|
|
761
|
+
yellow_pipe1 = colorize("|", :yellow)
|
|
762
|
+
yellow_new = colorize("%4d" % new_line, :yellow)
|
|
763
|
+
yellow_pipe2 = colorize("|", :yellow)
|
|
764
|
+
red_marker = colorize("-", :red)
|
|
765
|
+
green_marker = colorize("+", :green)
|
|
766
|
+
|
|
767
|
+
output << "#{yellow_old}#{yellow_pipe1} #{red_marker} #{yellow_pipe2} #{old_highlighted}"
|
|
768
|
+
output << " #{yellow_pipe1}#{yellow_new}#{green_marker} #{yellow_pipe2} #{new_highlighted}"
|
|
769
|
+
else
|
|
770
|
+
output << "#{'%4d' % old_line}| - | #{old_highlighted}"
|
|
771
|
+
output << " |#{'%4d' % new_line}+ | #{new_highlighted}"
|
|
772
|
+
end
|
|
773
|
+
|
|
774
|
+
output.join("\n")
|
|
775
|
+
end
|
|
776
|
+
|
|
777
|
+
# Tokenize XML line
|
|
778
|
+
def tokenize_xml(line)
|
|
779
|
+
tokens = []
|
|
780
|
+
scanner = StringScanner.new(line)
|
|
781
|
+
|
|
782
|
+
until scanner.eos?
|
|
783
|
+
tokens << if scanner.scan(/\s+/)
|
|
784
|
+
scanner.matched
|
|
785
|
+
elsif scanner.scan(/<\/?[\w:-]+/)
|
|
786
|
+
scanner.matched
|
|
787
|
+
elsif scanner.scan(/[\w:-]+="[^"]*"/)
|
|
788
|
+
scanner.matched
|
|
789
|
+
elsif scanner.scan(/[\w:-]+='[^']*'/)
|
|
790
|
+
scanner.matched
|
|
791
|
+
elsif scanner.scan(/[\w:-]+=/)
|
|
792
|
+
scanner.matched
|
|
793
|
+
elsif scanner.scan(/\/?>/)
|
|
794
|
+
scanner.matched
|
|
795
|
+
elsif scanner.scan(/[^<>\s]+/)
|
|
796
|
+
scanner.matched
|
|
797
|
+
else
|
|
798
|
+
scanner.getch
|
|
799
|
+
end
|
|
800
|
+
end
|
|
801
|
+
|
|
802
|
+
tokens
|
|
803
|
+
end
|
|
804
|
+
|
|
805
|
+
# Build highlighted text from token diff
|
|
806
|
+
def build_token_highlighted_text(token_diffs, side)
|
|
807
|
+
parts = []
|
|
808
|
+
|
|
809
|
+
token_diffs.each do |change|
|
|
810
|
+
case change.action
|
|
811
|
+
when "="
|
|
812
|
+
element = change.old_element || ""
|
|
813
|
+
visual = element.to_s.chars.map do |char|
|
|
814
|
+
@visualization_map.fetch(char, char)
|
|
815
|
+
end.join
|
|
816
|
+
|
|
817
|
+
parts << if @use_color
|
|
818
|
+
colorize(visual, :default)
|
|
819
|
+
else
|
|
820
|
+
visual
|
|
821
|
+
end
|
|
822
|
+
when "-"
|
|
823
|
+
if side == :old
|
|
824
|
+
parts << apply_visualization(change.old_element, :red)
|
|
825
|
+
end
|
|
826
|
+
when "+"
|
|
827
|
+
if side == :new
|
|
828
|
+
parts << apply_visualization(change.new_element, :green)
|
|
829
|
+
end
|
|
830
|
+
when "!"
|
|
831
|
+
parts << if side == :old
|
|
832
|
+
apply_visualization(change.old_element, :red)
|
|
833
|
+
else
|
|
834
|
+
apply_visualization(change.new_element, :green)
|
|
835
|
+
end
|
|
836
|
+
end
|
|
837
|
+
end
|
|
838
|
+
|
|
839
|
+
parts.join
|
|
840
|
+
end
|
|
841
|
+
|
|
842
|
+
# Apply character visualization
|
|
843
|
+
def apply_visualization(token, color = nil)
|
|
844
|
+
return "" if token.nil?
|
|
845
|
+
|
|
846
|
+
visual = token.to_s.chars.map do |char|
|
|
847
|
+
@visualization_map.fetch(char, char)
|
|
848
|
+
end.join
|
|
849
|
+
|
|
850
|
+
if color && @use_color
|
|
851
|
+
require "paint"
|
|
852
|
+
Paint[visual, color, :bold]
|
|
853
|
+
else
|
|
854
|
+
visual
|
|
855
|
+
end
|
|
856
|
+
end
|
|
857
|
+
end
|
|
858
|
+
end
|
|
859
|
+
end
|
|
860
|
+
end
|