canon 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -1
- data/.rubocop_todo.yml +276 -7
- data/README.adoc +203 -138
- data/_config.yml +116 -0
- data/docs/ADVANCED_TOPICS.adoc +20 -0
- data/docs/BASIC_USAGE.adoc +16 -0
- data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
- data/docs/CLI.adoc +493 -0
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
- data/docs/DIFF_ARCHITECTURE.adoc +435 -0
- data/docs/DIFF_FORMATTING.adoc +540 -0
- data/docs/FORMATS.adoc +447 -0
- data/docs/INDEX.adoc +222 -0
- data/docs/INPUT_VALIDATION.adoc +477 -0
- data/docs/MATCH_ARCHITECTURE.adoc +463 -0
- data/docs/MATCH_OPTIONS.adoc +719 -0
- data/docs/MODES.adoc +432 -0
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
- data/docs/OPTIONS.adoc +1387 -0
- data/docs/PREPROCESSING.adoc +491 -0
- data/docs/RSPEC.adoc +605 -0
- data/docs/RUBY_API.adoc +478 -0
- data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
- data/docs/UNDERSTANDING_CANON.adoc +17 -0
- data/docs/VERBOSE.adoc +482 -0
- data/exe/canon +7 -0
- data/lib/canon/cli.rb +179 -0
- data/lib/canon/commands/diff_command.rb +195 -0
- data/lib/canon/commands/format_command.rb +113 -0
- data/lib/canon/comparison/base_comparator.rb +39 -0
- data/lib/canon/comparison/comparison_result.rb +79 -0
- data/lib/canon/comparison/html_comparator.rb +410 -0
- data/lib/canon/comparison/json_comparator.rb +212 -0
- data/lib/canon/comparison/match_options.rb +616 -0
- data/lib/canon/comparison/xml_comparator.rb +566 -0
- data/lib/canon/comparison/yaml_comparator.rb +93 -0
- data/lib/canon/comparison.rb +239 -0
- data/lib/canon/config.rb +172 -0
- data/lib/canon/diff/diff_block.rb +71 -0
- data/lib/canon/diff/diff_block_builder.rb +105 -0
- data/lib/canon/diff/diff_classifier.rb +46 -0
- data/lib/canon/diff/diff_context.rb +85 -0
- data/lib/canon/diff/diff_context_builder.rb +107 -0
- data/lib/canon/diff/diff_line.rb +77 -0
- data/lib/canon/diff/diff_node.rb +56 -0
- data/lib/canon/diff/diff_node_mapper.rb +148 -0
- data/lib/canon/diff/diff_report.rb +133 -0
- data/lib/canon/diff/diff_report_builder.rb +62 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
- data/lib/canon/diff_formatter/character_map.yml +197 -0
- data/lib/canon/diff_formatter/debug_output.rb +431 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
- data/lib/canon/diff_formatter/legend.rb +141 -0
- data/lib/canon/diff_formatter.rb +520 -0
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html4_formatter.rb +17 -0
- data/lib/canon/formatters/html5_formatter.rb +17 -0
- data/lib/canon/formatters/html_formatter.rb +37 -0
- data/lib/canon/formatters/html_formatter_base.rb +163 -0
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/xml_formatter.rb +20 -55
- data/lib/canon/formatters/yaml_formatter.rb +4 -1
- data/lib/canon/pretty_printer/html.rb +57 -0
- data/lib/canon/pretty_printer/json.rb +25 -0
- data/lib/canon/pretty_printer/xml.rb +29 -0
- data/lib/canon/rspec_matchers.rb +222 -80
- data/lib/canon/validators/base_validator.rb +49 -0
- data/lib/canon/validators/html_validator.rb +138 -0
- data/lib/canon/validators/json_validator.rb +89 -0
- data/lib/canon/validators/xml_validator.rb +53 -0
- data/lib/canon/validators/yaml_validator.rb +73 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/attribute_handler.rb +80 -0
- data/lib/canon/xml/c14n.rb +36 -0
- data/lib/canon/xml/character_encoder.rb +38 -0
- data/lib/canon/xml/data_model.rb +225 -0
- data/lib/canon/xml/element_matcher.rb +196 -0
- data/lib/canon/xml/line_range_mapper.rb +158 -0
- data/lib/canon/xml/namespace_handler.rb +86 -0
- data/lib/canon/xml/node.rb +32 -0
- data/lib/canon/xml/nodes/attribute_node.rb +54 -0
- data/lib/canon/xml/nodes/comment_node.rb +23 -0
- data/lib/canon/xml/nodes/element_node.rb +56 -0
- data/lib/canon/xml/nodes/namespace_node.rb +38 -0
- data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
- data/lib/canon/xml/nodes/root_node.rb +16 -0
- data/lib/canon/xml/nodes/text_node.rb +23 -0
- data/lib/canon/xml/processor.rb +151 -0
- data/lib/canon/xml/whitespace_normalizer.rb +72 -0
- data/lib/canon/xml/xml_base_handler.rb +188 -0
- data/lib/canon.rb +14 -3
- metadata +116 -21
|
@@ -0,0 +1,672 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "base_formatter"
|
|
4
|
+
require_relative "../legend"
|
|
5
|
+
require "set"
|
|
6
|
+
|
|
7
|
+
module Canon
|
|
8
|
+
class DiffFormatter
|
|
9
|
+
module ByLine
|
|
10
|
+
# HTML formatter with DOM-guided diffing
|
|
11
|
+
# Uses DOM parsing and element matching for intelligent HTML diffs
|
|
12
|
+
class HtmlFormatter < BaseFormatter
|
|
13
|
+
attr_reader :html_version
|
|
14
|
+
|
|
15
|
+
def initialize(use_color: true, context_lines: 3,
|
|
16
|
+
diff_grouping_lines: nil, visualization_map: nil,
|
|
17
|
+
html_version: :html4, show_diffs: :all, differences: [])
|
|
18
|
+
super(use_color: use_color, context_lines: context_lines,
|
|
19
|
+
diff_grouping_lines: diff_grouping_lines,
|
|
20
|
+
visualization_map: visualization_map,
|
|
21
|
+
show_diffs: show_diffs, differences: differences)
|
|
22
|
+
@html_version = html_version
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Format DOM-guided HTML diff
|
|
26
|
+
#
|
|
27
|
+
# @param doc1 [String] First HTML document
|
|
28
|
+
# @param doc2 [String] Second HTML document
|
|
29
|
+
# @return [String] Formatted diff
|
|
30
|
+
def format(doc1, doc2)
|
|
31
|
+
# If we have DiffNodes from comparison, use the new pipeline
|
|
32
|
+
if @differences&.any?(Canon::Diff::DiffNode)
|
|
33
|
+
# Check if we should skip based on show_diffs setting
|
|
34
|
+
if should_skip_diff_display?
|
|
35
|
+
return ""
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Use new pipeline when DiffNodes available
|
|
39
|
+
return format_with_pipeline(doc1, doc2)
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# LEGACY: Fall back to old DOM-based behavior
|
|
43
|
+
# Check if we should show any diffs based on differences array
|
|
44
|
+
if should_skip_diff_display?
|
|
45
|
+
return ""
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
require_relative "../../xml/data_model"
|
|
49
|
+
require_relative "../../xml/element_matcher"
|
|
50
|
+
require_relative "../../xml/line_range_mapper"
|
|
51
|
+
require_relative "../../pretty_printer/html"
|
|
52
|
+
|
|
53
|
+
output = []
|
|
54
|
+
|
|
55
|
+
begin
|
|
56
|
+
# Parse to DOM using HTML parser
|
|
57
|
+
root1 = Canon::Xml::DataModel.from_html(doc1,
|
|
58
|
+
version: @html_version)
|
|
59
|
+
root2 = Canon::Xml::DataModel.from_html(doc2,
|
|
60
|
+
version: @html_version)
|
|
61
|
+
|
|
62
|
+
# Match elements semantically
|
|
63
|
+
matcher = Canon::Xml::ElementMatcher.new
|
|
64
|
+
matches = matcher.match_trees(root1, root2)
|
|
65
|
+
|
|
66
|
+
# Pretty-print HTML for line mapping
|
|
67
|
+
pretty_printer = Canon::PrettyPrinter::Html.new(indent: 2)
|
|
68
|
+
pretty1 = pretty_printer.format(doc1)
|
|
69
|
+
pretty2 = pretty_printer.format(doc2)
|
|
70
|
+
|
|
71
|
+
# Build line range maps using pretty-printed documents
|
|
72
|
+
mapper1 = Canon::Xml::LineRangeMapper.new(indent: 2)
|
|
73
|
+
mapper2 = Canon::Xml::LineRangeMapper.new(indent: 2)
|
|
74
|
+
map1 = mapper1.build_map(root1, pretty1)
|
|
75
|
+
map2 = mapper2.build_map(root2, pretty2)
|
|
76
|
+
|
|
77
|
+
# Use pretty-printed document lines for display
|
|
78
|
+
lines1 = pretty1.split("\n")
|
|
79
|
+
lines2 = pretty2.split("\n")
|
|
80
|
+
|
|
81
|
+
# DEBUG
|
|
82
|
+
warn "DEBUG: HTML Formatter - lines1.length=#{lines1.length}, lines2.length=#{lines2.length}"
|
|
83
|
+
warn "DEBUG: HTML Formatter - matches.length=#{matches.length}"
|
|
84
|
+
warn "DEBUG: HTML Formatter - map1.size=#{map1.size}, map2.size=#{map2.size}"
|
|
85
|
+
warn "DEBUG: Mapped elements in map1: #{map1.keys.map(&:name).join(', ')}"
|
|
86
|
+
warn "DEBUG: Match types: matched=#{matches.count do |m|
|
|
87
|
+
m.status == :matched
|
|
88
|
+
end}, deleted=#{matches.count do |m|
|
|
89
|
+
m.status == :deleted
|
|
90
|
+
end}, inserted=#{matches.count do |m|
|
|
91
|
+
m.status == :inserted
|
|
92
|
+
end}"
|
|
93
|
+
|
|
94
|
+
# Display diffs based on element matches
|
|
95
|
+
result = format_element_matches(matches, map1, map2, lines1, lines2)
|
|
96
|
+
warn "DEBUG: HTML Formatter - result.length=#{result.length}"
|
|
97
|
+
output << result
|
|
98
|
+
rescue StandardError => e
|
|
99
|
+
# Fall back to simple diff on error
|
|
100
|
+
output << colorize("Warning: DOM parsing failed, using simple diff",
|
|
101
|
+
:yellow)
|
|
102
|
+
output << colorize("Error: #{e.class}: #{e.message}", :red)
|
|
103
|
+
|
|
104
|
+
# Include relevant backtrace lines
|
|
105
|
+
relevant_trace = e.backtrace.select do |line|
|
|
106
|
+
line.include?("canon")
|
|
107
|
+
end.take(3)
|
|
108
|
+
unless relevant_trace.empty?
|
|
109
|
+
output << colorize("Backtrace:", :yellow)
|
|
110
|
+
relevant_trace.each do |line|
|
|
111
|
+
output << colorize(" #{line}", :yellow)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
output << ""
|
|
116
|
+
require_relative "simple_formatter"
|
|
117
|
+
simple = SimpleFormatter.new(
|
|
118
|
+
use_color: @use_color,
|
|
119
|
+
context_lines: @context_lines,
|
|
120
|
+
diff_grouping_lines: @diff_grouping_lines,
|
|
121
|
+
visualization_map: @visualization_map,
|
|
122
|
+
)
|
|
123
|
+
output << simple.format(doc1, doc2)
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
output.join("\n")
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Format using new DiffReportBuilder pipeline
|
|
130
|
+
def format_with_pipeline(doc1, doc2)
|
|
131
|
+
require_relative "../../diff/diff_node_mapper"
|
|
132
|
+
require_relative "../../diff/diff_report_builder"
|
|
133
|
+
|
|
134
|
+
# Layer 2: Map DiffNodes to DiffLines
|
|
135
|
+
diff_lines = Canon::Diff::DiffNodeMapper.map(@differences, doc1, doc2)
|
|
136
|
+
|
|
137
|
+
# Layers 3-5: Build report through pipeline
|
|
138
|
+
report = Canon::Diff::DiffReportBuilder.build(
|
|
139
|
+
diff_lines,
|
|
140
|
+
show_diffs: @show_diffs,
|
|
141
|
+
context_lines: @context_lines,
|
|
142
|
+
grouping_lines: @diff_grouping_lines,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
# Layer 6: Format the report
|
|
146
|
+
format_report(report, doc1, doc2)
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# Format a DiffReport for display
|
|
150
|
+
def format_report(report, doc1, doc2)
|
|
151
|
+
return "" if report.contexts.empty?
|
|
152
|
+
|
|
153
|
+
lines1 = doc1.split("\n")
|
|
154
|
+
lines2 = doc2.split("\n")
|
|
155
|
+
|
|
156
|
+
output = []
|
|
157
|
+
|
|
158
|
+
# Detect non-ASCII characters
|
|
159
|
+
all_text = (lines1 + lines2).join
|
|
160
|
+
non_ascii = Legend.detect_non_ascii(all_text, @visualization_map)
|
|
161
|
+
|
|
162
|
+
# Add Unicode legend if needed
|
|
163
|
+
unless non_ascii.empty?
|
|
164
|
+
output << Legend.build_legend(non_ascii, use_color: @use_color)
|
|
165
|
+
output << ""
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Format each context
|
|
169
|
+
report.contexts.each_with_index do |context, idx|
|
|
170
|
+
output << "" if idx.positive?
|
|
171
|
+
output << format_context_from_lines(context, lines1, lines2)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
output.join("\n")
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
# Format a context using its DiffLines
|
|
178
|
+
def format_context_from_lines(context, lines1, _lines2)
|
|
179
|
+
output = []
|
|
180
|
+
|
|
181
|
+
context.lines.each do |diff_line|
|
|
182
|
+
case diff_line.type
|
|
183
|
+
when :unchanged
|
|
184
|
+
line_num = diff_line.line_number + 1
|
|
185
|
+
output << format_unified_line(line_num, line_num, " ",
|
|
186
|
+
diff_line.content)
|
|
187
|
+
when :removed
|
|
188
|
+
line_num = diff_line.line_number + 1
|
|
189
|
+
informative = diff_line.informative?
|
|
190
|
+
output << format_unified_line(line_num, nil, "-",
|
|
191
|
+
diff_line.content,
|
|
192
|
+
informative ? :cyan : :red,
|
|
193
|
+
informative: informative)
|
|
194
|
+
when :added
|
|
195
|
+
line_num = diff_line.line_number + 1
|
|
196
|
+
informative = diff_line.informative?
|
|
197
|
+
output << format_unified_line(nil, line_num, "+",
|
|
198
|
+
diff_line.content,
|
|
199
|
+
informative ? :cyan : :green,
|
|
200
|
+
informative: informative)
|
|
201
|
+
when :changed
|
|
202
|
+
line_num = diff_line.line_number + 1
|
|
203
|
+
informative = diff_line.informative?
|
|
204
|
+
old_content = lines1[diff_line.line_number]
|
|
205
|
+
new_content = diff_line.content
|
|
206
|
+
output << format_unified_line(line_num, nil, "-",
|
|
207
|
+
old_content,
|
|
208
|
+
informative ? :cyan : :red,
|
|
209
|
+
informative: informative)
|
|
210
|
+
output << format_unified_line(nil, line_num, "+",
|
|
211
|
+
new_content,
|
|
212
|
+
informative ? :cyan : :green,
|
|
213
|
+
informative: informative)
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
output.join("\n")
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
private
|
|
221
|
+
|
|
222
|
+
# Check if diff display should be skipped
|
|
223
|
+
# Returns true when:
|
|
224
|
+
# 1. show_diffs is :normative AND there are no normative differences
|
|
225
|
+
# 2. show_diffs is :informative AND there are no informative differences
|
|
226
|
+
def should_skip_diff_display?
|
|
227
|
+
return false if @differences.nil? || @differences.empty?
|
|
228
|
+
|
|
229
|
+
case @show_diffs
|
|
230
|
+
when :normative
|
|
231
|
+
# Skip if no normative diffs
|
|
232
|
+
@differences.none? do |diff|
|
|
233
|
+
diff.is_a?(Canon::Diff::DiffNode) && diff.normative?
|
|
234
|
+
end
|
|
235
|
+
when :informative
|
|
236
|
+
# Skip if no informative diffs
|
|
237
|
+
@differences.none? do |diff|
|
|
238
|
+
diff.is_a?(Canon::Diff::DiffNode) && diff.informative?
|
|
239
|
+
end
|
|
240
|
+
else
|
|
241
|
+
# :all or other - never skip
|
|
242
|
+
false
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
# Format element matches for display
|
|
247
|
+
def format_element_matches(matches, map1, map2, lines1, lines2)
|
|
248
|
+
output = []
|
|
249
|
+
|
|
250
|
+
# Detect non-ASCII characters in the diff
|
|
251
|
+
all_text = (lines1 + lines2).join
|
|
252
|
+
non_ascii = Legend.detect_non_ascii(all_text, @visualization_map)
|
|
253
|
+
|
|
254
|
+
# Add Unicode legend if any non-ASCII characters detected
|
|
255
|
+
unless non_ascii.empty?
|
|
256
|
+
output << Legend.build_legend(non_ascii, use_color: @use_color)
|
|
257
|
+
output << ""
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Build a set of elements to skip (children of parents showing diffs)
|
|
261
|
+
elements_to_skip = build_skip_set(matches, map1, map2, lines1,
|
|
262
|
+
lines2)
|
|
263
|
+
|
|
264
|
+
# Build a set of children of matched parents
|
|
265
|
+
children_of_matched_parents = build_children_set(matches)
|
|
266
|
+
|
|
267
|
+
# Collect diff sections with metadata
|
|
268
|
+
diff_sections = collect_diff_sections(matches, map1, map2, lines1,
|
|
269
|
+
lines2, elements_to_skip,
|
|
270
|
+
children_of_matched_parents)
|
|
271
|
+
|
|
272
|
+
# DEBUG
|
|
273
|
+
warn "DEBUG: format_element_matches - diff_sections.length=#{diff_sections.length}"
|
|
274
|
+
warn "DEBUG: format_element_matches - elements_to_skip.size=#{elements_to_skip.size}"
|
|
275
|
+
warn "DEBUG: format_element_matches - children_of_matched_parents.size=#{children_of_matched_parents.size}"
|
|
276
|
+
|
|
277
|
+
# Sort by line number
|
|
278
|
+
diff_sections.sort_by! do |section|
|
|
279
|
+
section[:start_line1] || section[:start_line2] || 0
|
|
280
|
+
end
|
|
281
|
+
|
|
282
|
+
# Group diffs by proximity if diff_grouping_lines is set
|
|
283
|
+
formatted_diffs = if @diff_grouping_lines
|
|
284
|
+
groups = group_diff_sections(diff_sections,
|
|
285
|
+
@diff_grouping_lines)
|
|
286
|
+
format_diff_groups(groups, lines1, lines2)
|
|
287
|
+
else
|
|
288
|
+
diff_sections.map do |s|
|
|
289
|
+
s[:formatted]
|
|
290
|
+
end.compact.join("\n\n")
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
warn "DEBUG: format_element_matches - formatted_diffs.length=#{formatted_diffs.length}"
|
|
294
|
+
output << formatted_diffs
|
|
295
|
+
output.join("\n")
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
# Build set of elements to skip (children with parents showing diffs)
|
|
299
|
+
def build_skip_set(matches, map1, map2, lines1, lines2)
|
|
300
|
+
elements_to_skip = Set.new
|
|
301
|
+
elements_with_diffs = Set.new
|
|
302
|
+
|
|
303
|
+
# Build set of element pairs that have semantic diffs
|
|
304
|
+
build_elements_with_semantic_diffs_set
|
|
305
|
+
|
|
306
|
+
# First pass: identify elements with line differences
|
|
307
|
+
# (semantic filtering happens in collect_diff_sections)
|
|
308
|
+
matches.each do |match|
|
|
309
|
+
next unless match.status == :matched
|
|
310
|
+
|
|
311
|
+
range1 = map1[match.elem1]
|
|
312
|
+
range2 = map2[match.elem2]
|
|
313
|
+
next unless range1 && range2
|
|
314
|
+
|
|
315
|
+
elem_lines1 = lines1[range1.start_line..range1.end_line]
|
|
316
|
+
elem_lines2 = lines2[range2.start_line..range2.end_line]
|
|
317
|
+
|
|
318
|
+
# Add if there are line diffs
|
|
319
|
+
# Semantic filtering is done in collect_diff_sections
|
|
320
|
+
if elem_lines1 != elem_lines2
|
|
321
|
+
elements_with_diffs.add(match.elem1)
|
|
322
|
+
end
|
|
323
|
+
end
|
|
324
|
+
|
|
325
|
+
# Second pass: skip children of elements with diffs
|
|
326
|
+
elements_with_diffs.each do |elem|
|
|
327
|
+
if elem.respond_to?(:parent)
|
|
328
|
+
current = elem.parent
|
|
329
|
+
while current
|
|
330
|
+
if current.respond_to?(:name) && elements_with_diffs.include?(current)
|
|
331
|
+
elements_to_skip.add(elem)
|
|
332
|
+
break
|
|
333
|
+
end
|
|
334
|
+
current = current.respond_to?(:parent) ? current.parent : nil
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
elements_to_skip
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
# Check if an element or its children have semantic diffs
|
|
343
|
+
def has_semantic_diff_in_subtree?(element, elements_with_semantic_diffs)
|
|
344
|
+
# Check the element itself
|
|
345
|
+
return true if elements_with_semantic_diffs.include?(element)
|
|
346
|
+
|
|
347
|
+
# Check all descendants
|
|
348
|
+
if element.respond_to?(:children)
|
|
349
|
+
element.children.any? do |child|
|
|
350
|
+
has_semantic_diff_in_subtree?(child, elements_with_semantic_diffs)
|
|
351
|
+
end
|
|
352
|
+
else
|
|
353
|
+
false
|
|
354
|
+
end
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
# Build set of individual elements (not pairs) that have semantic diffs
|
|
358
|
+
def build_elements_with_semantic_diffs_set
|
|
359
|
+
elements = Set.new
|
|
360
|
+
|
|
361
|
+
return elements if @differences.nil? || @differences.empty?
|
|
362
|
+
|
|
363
|
+
@differences.each do |diff|
|
|
364
|
+
next unless diff.is_a?(Canon::Diff::DiffNode)
|
|
365
|
+
|
|
366
|
+
# Add both nodes if they exist
|
|
367
|
+
elements.add(diff.node1) if diff.node1
|
|
368
|
+
elements.add(diff.node2) if diff.node2
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
elements
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
# Build set of children of matched parents
|
|
375
|
+
def build_children_set(matches)
|
|
376
|
+
children = Set.new
|
|
377
|
+
|
|
378
|
+
matches.each do |match|
|
|
379
|
+
next unless match.status == :matched
|
|
380
|
+
|
|
381
|
+
[match.elem1, match.elem2].compact.each do |elem|
|
|
382
|
+
next unless elem.respond_to?(:children)
|
|
383
|
+
|
|
384
|
+
elem.children.each do |child|
|
|
385
|
+
children.add(child) if child.respond_to?(:name)
|
|
386
|
+
end
|
|
387
|
+
end
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
children
|
|
391
|
+
end
|
|
392
|
+
|
|
393
|
+
# Collect diff sections with metadata
|
|
394
|
+
def collect_diff_sections(matches, map1, map2, lines1, lines2,
|
|
395
|
+
elements_to_skip, _children_of_matched_parents)
|
|
396
|
+
diff_sections = []
|
|
397
|
+
no_range_count = 0
|
|
398
|
+
no_diff_count = 0
|
|
399
|
+
|
|
400
|
+
# If there are NO semantic diffs, don't show any matched elements
|
|
401
|
+
# (all text diffs were normalized away)
|
|
402
|
+
elements_with_semantic_diffs = build_elements_with_semantic_diffs_set
|
|
403
|
+
|
|
404
|
+
matches.each do |match|
|
|
405
|
+
case match.status
|
|
406
|
+
when :matched
|
|
407
|
+
next if elements_to_skip.include?(match.elem1)
|
|
408
|
+
|
|
409
|
+
# Only apply semantic filtering if we have DiffNode objects
|
|
410
|
+
# (when called standalone or without DiffNodes, show all diffs)
|
|
411
|
+
if !@differences.nil? && !@differences.empty? && @differences.any?(Canon::Diff::DiffNode)
|
|
412
|
+
# Skip if no semantic diffs exist (all diffs were normalized)
|
|
413
|
+
next if elements_with_semantic_diffs.empty?
|
|
414
|
+
|
|
415
|
+
# Skip if this element has no semantic diffs in its subtree
|
|
416
|
+
next unless has_semantic_diff_in_subtree?(match.elem1,
|
|
417
|
+
elements_with_semantic_diffs)
|
|
418
|
+
end
|
|
419
|
+
|
|
420
|
+
range1 = map1[match.elem1]
|
|
421
|
+
range2 = map2[match.elem2]
|
|
422
|
+
if !range1 || !range2
|
|
423
|
+
no_range_count += 1
|
|
424
|
+
warn "DEBUG: No range for #{match.elem1.name} (path: #{match.path.join('/')})" if no_range_count <= 5
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
section = format_matched_element_with_metadata(match, map1,
|
|
428
|
+
map2, lines1,
|
|
429
|
+
lines2)
|
|
430
|
+
if range1 && range2 && !section
|
|
431
|
+
no_diff_count += 1
|
|
432
|
+
warn "DEBUG: No diff for #{match.elem1.name} (path: #{match.path.join('/')})" if no_diff_count <= 5
|
|
433
|
+
end
|
|
434
|
+
diff_sections << section if section
|
|
435
|
+
when :deleted
|
|
436
|
+
# Don't skip deleted elements - they should always be shown
|
|
437
|
+
section = format_deleted_element_with_metadata(match, map1,
|
|
438
|
+
lines1)
|
|
439
|
+
diff_sections << section if section
|
|
440
|
+
when :inserted
|
|
441
|
+
# Don't skip inserted elements - they should always be shown
|
|
442
|
+
section = format_inserted_element_with_metadata(match, map2,
|
|
443
|
+
lines2)
|
|
444
|
+
diff_sections << section if section
|
|
445
|
+
end
|
|
446
|
+
end
|
|
447
|
+
|
|
448
|
+
warn "DEBUG: collect_diff_sections - no_range_count=#{no_range_count}, no_diff_count=#{no_diff_count}"
|
|
449
|
+
diff_sections
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
# Format matched element with metadata
|
|
453
|
+
def format_matched_element_with_metadata(match, map1, map2, lines1,
|
|
454
|
+
lines2)
|
|
455
|
+
range1 = map1[match.elem1]
|
|
456
|
+
range2 = map2[match.elem2]
|
|
457
|
+
return nil unless range1 && range2
|
|
458
|
+
|
|
459
|
+
formatted = format_matched_element(match, map1, map2, lines1,
|
|
460
|
+
lines2)
|
|
461
|
+
return nil unless formatted
|
|
462
|
+
|
|
463
|
+
{
|
|
464
|
+
formatted: formatted,
|
|
465
|
+
start_line1: range1.start_line,
|
|
466
|
+
end_line1: range1.end_line,
|
|
467
|
+
start_line2: range2.start_line,
|
|
468
|
+
end_line2: range2.end_line,
|
|
469
|
+
path: match.path.join("/"),
|
|
470
|
+
}
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
# Format deleted element with metadata
|
|
474
|
+
def format_deleted_element_with_metadata(match, map1, lines1)
|
|
475
|
+
range1 = map1[match.elem1]
|
|
476
|
+
return nil unless range1
|
|
477
|
+
|
|
478
|
+
formatted = format_deleted_element(match, map1, lines1)
|
|
479
|
+
return nil unless formatted
|
|
480
|
+
|
|
481
|
+
{
|
|
482
|
+
formatted: formatted,
|
|
483
|
+
start_line1: range1.start_line,
|
|
484
|
+
end_line1: range1.end_line,
|
|
485
|
+
start_line2: nil,
|
|
486
|
+
end_line2: nil,
|
|
487
|
+
path: match.path.join("/"),
|
|
488
|
+
}
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
# Format inserted element with metadata
|
|
492
|
+
def format_inserted_element_with_metadata(match, map2, lines2)
|
|
493
|
+
range2 = map2[match.elem2]
|
|
494
|
+
return nil unless range2
|
|
495
|
+
|
|
496
|
+
formatted = format_inserted_element(match, map2, lines2)
|
|
497
|
+
return nil unless formatted
|
|
498
|
+
|
|
499
|
+
{
|
|
500
|
+
formatted: formatted,
|
|
501
|
+
start_line1: nil,
|
|
502
|
+
end_line1: nil,
|
|
503
|
+
start_line2: range2.start_line,
|
|
504
|
+
end_line2: range2.end_line,
|
|
505
|
+
path: match.path.join("/"),
|
|
506
|
+
}
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
# Format a matched element showing differences
|
|
510
|
+
def format_matched_element(match, map1, map2, lines1, lines2)
|
|
511
|
+
range1 = map1[match.elem1]
|
|
512
|
+
range2 = map2[match.elem2]
|
|
513
|
+
return nil unless range1 && range2
|
|
514
|
+
|
|
515
|
+
# Extract line ranges
|
|
516
|
+
elem_lines1 = lines1[range1.start_line..range1.end_line]
|
|
517
|
+
elem_lines2 = lines2[range2.start_line..range2.end_line]
|
|
518
|
+
|
|
519
|
+
# Skip if identical
|
|
520
|
+
return nil if elem_lines1 == elem_lines2
|
|
521
|
+
|
|
522
|
+
# Run line diff
|
|
523
|
+
diffs = ::Diff::LCS.sdiff(elem_lines1, elem_lines2)
|
|
524
|
+
|
|
525
|
+
# Identify diff blocks
|
|
526
|
+
diff_blocks = identify_diff_blocks(diffs)
|
|
527
|
+
return nil if diff_blocks.empty?
|
|
528
|
+
|
|
529
|
+
# Group into contexts
|
|
530
|
+
contexts = group_diff_blocks_into_contexts(diff_blocks,
|
|
531
|
+
@diff_grouping_lines || 0)
|
|
532
|
+
|
|
533
|
+
# Expand with context lines
|
|
534
|
+
expanded_contexts = expand_contexts_with_context_lines(contexts,
|
|
535
|
+
@context_lines,
|
|
536
|
+
diffs.length)
|
|
537
|
+
|
|
538
|
+
# Format contexts
|
|
539
|
+
output = []
|
|
540
|
+
expanded_contexts.each_with_index do |context, idx|
|
|
541
|
+
output << "" if idx.positive?
|
|
542
|
+
output << format_context(context, diffs, range1.start_line,
|
|
543
|
+
range2.start_line)
|
|
544
|
+
end
|
|
545
|
+
|
|
546
|
+
output.join("\n")
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
# Format a deleted element
|
|
550
|
+
def format_deleted_element(match, map1, lines1)
|
|
551
|
+
range1 = map1[match.elem1]
|
|
552
|
+
return nil unless range1
|
|
553
|
+
|
|
554
|
+
output = []
|
|
555
|
+
path_str = match.path.join("/")
|
|
556
|
+
output << colorize("Element: #{path_str} [DELETED]", :red, :bold)
|
|
557
|
+
|
|
558
|
+
# Show all lines as deleted
|
|
559
|
+
(range1.start_line..range1.end_line).each do |i|
|
|
560
|
+
output << format_unified_line(i + 1, nil, "-", lines1[i], :red)
|
|
561
|
+
end
|
|
562
|
+
|
|
563
|
+
output.join("\n")
|
|
564
|
+
end
|
|
565
|
+
|
|
566
|
+
# Format an inserted element
|
|
567
|
+
def format_inserted_element(match, map2, lines2)
|
|
568
|
+
range2 = map2[match.elem2]
|
|
569
|
+
return nil unless range2
|
|
570
|
+
|
|
571
|
+
output = []
|
|
572
|
+
path_str = match.path.join("/")
|
|
573
|
+
output << colorize("Element: #{path_str} [INSERTED]", :green, :bold)
|
|
574
|
+
|
|
575
|
+
# Show all lines as inserted
|
|
576
|
+
(range2.start_line..range2.end_line).each do |i|
|
|
577
|
+
output << format_unified_line(nil, i + 1, "+", lines2[i], :green)
|
|
578
|
+
end
|
|
579
|
+
|
|
580
|
+
output.join("\n")
|
|
581
|
+
end
|
|
582
|
+
|
|
583
|
+
# Group diff sections by proximity
|
|
584
|
+
def group_diff_sections(sections, grouping_lines)
|
|
585
|
+
return [] if sections.empty?
|
|
586
|
+
|
|
587
|
+
groups = []
|
|
588
|
+
current_group = [sections[0]]
|
|
589
|
+
|
|
590
|
+
sections[1..].each do |section|
|
|
591
|
+
last_section = current_group.last
|
|
592
|
+
|
|
593
|
+
# Calculate gap
|
|
594
|
+
gap1 = if last_section[:end_line1] && section[:start_line1]
|
|
595
|
+
section[:start_line1] - last_section[:end_line1] - 1
|
|
596
|
+
else
|
|
597
|
+
Float::INFINITY
|
|
598
|
+
end
|
|
599
|
+
|
|
600
|
+
gap2 = if last_section[:end_line2] && section[:start_line2]
|
|
601
|
+
section[:start_line2] - last_section[:end_line2] - 1
|
|
602
|
+
else
|
|
603
|
+
Float::INFINITY
|
|
604
|
+
end
|
|
605
|
+
|
|
606
|
+
max_gap = [gap1, gap2].max
|
|
607
|
+
|
|
608
|
+
if max_gap <= grouping_lines
|
|
609
|
+
current_group << section
|
|
610
|
+
else
|
|
611
|
+
groups << current_group
|
|
612
|
+
current_group = [section]
|
|
613
|
+
end
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
groups << current_group unless current_group.empty?
|
|
617
|
+
groups
|
|
618
|
+
end
|
|
619
|
+
|
|
620
|
+
# Format groups of diffs
|
|
621
|
+
def format_diff_groups(groups, _lines1, _lines2)
|
|
622
|
+
output = []
|
|
623
|
+
|
|
624
|
+
groups.each_with_index do |group, group_idx|
|
|
625
|
+
output << "" if group_idx.positive?
|
|
626
|
+
|
|
627
|
+
if group.length > 1
|
|
628
|
+
output << colorize("Context block has #{group.length} diffs",
|
|
629
|
+
:yellow, :bold)
|
|
630
|
+
output << ""
|
|
631
|
+
group.each do |section|
|
|
632
|
+
output << section[:formatted] if section[:formatted]
|
|
633
|
+
end
|
|
634
|
+
elsif group[0][:formatted]
|
|
635
|
+
output << group[0][:formatted]
|
|
636
|
+
end
|
|
637
|
+
end
|
|
638
|
+
|
|
639
|
+
output.join("\n")
|
|
640
|
+
end
|
|
641
|
+
|
|
642
|
+
# Check if an element or its children have semantic diffs
|
|
643
|
+
def has_semantic_diff_in_subtree?(element, elements_with_semantic_diffs)
|
|
644
|
+
return true if elements_with_semantic_diffs.include?(element)
|
|
645
|
+
|
|
646
|
+
if element.respond_to?(:children)
|
|
647
|
+
element.children.any? do |child|
|
|
648
|
+
has_semantic_diff_in_subtree?(child, elements_with_semantic_diffs)
|
|
649
|
+
end
|
|
650
|
+
else
|
|
651
|
+
false
|
|
652
|
+
end
|
|
653
|
+
end
|
|
654
|
+
|
|
655
|
+
# Build set of individual elements that have semantic diffs
|
|
656
|
+
def build_elements_with_semantic_diffs_set
|
|
657
|
+
elements = Set.new
|
|
658
|
+
return elements if @differences.nil? || @differences.empty?
|
|
659
|
+
|
|
660
|
+
@differences.each do |diff|
|
|
661
|
+
next unless diff.is_a?(Canon::Diff::DiffNode)
|
|
662
|
+
|
|
663
|
+
elements.add(diff.node1) if diff.node1
|
|
664
|
+
elements.add(diff.node2) if diff.node2
|
|
665
|
+
end
|
|
666
|
+
|
|
667
|
+
elements
|
|
668
|
+
end
|
|
669
|
+
end
|
|
670
|
+
end
|
|
671
|
+
end
|
|
672
|
+
end
|