canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,672 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_formatter"
4
+ require_relative "../legend"
5
+ require "set"
6
+
7
+ module Canon
8
+ class DiffFormatter
9
+ module ByLine
10
+ # HTML formatter with DOM-guided diffing
11
+ # Uses DOM parsing and element matching for intelligent HTML diffs
12
+ class HtmlFormatter < BaseFormatter
13
+ attr_reader :html_version
14
+
15
+ def initialize(use_color: true, context_lines: 3,
16
+ diff_grouping_lines: nil, visualization_map: nil,
17
+ html_version: :html4, show_diffs: :all, differences: [])
18
+ super(use_color: use_color, context_lines: context_lines,
19
+ diff_grouping_lines: diff_grouping_lines,
20
+ visualization_map: visualization_map,
21
+ show_diffs: show_diffs, differences: differences)
22
+ @html_version = html_version
23
+ end
24
+
25
+ # Format DOM-guided HTML diff
26
+ #
27
+ # @param doc1 [String] First HTML document
28
+ # @param doc2 [String] Second HTML document
29
+ # @return [String] Formatted diff
30
+ def format(doc1, doc2)
31
+ # If we have DiffNodes from comparison, use the new pipeline
32
+ if @differences&.any?(Canon::Diff::DiffNode)
33
+ # Check if we should skip based on show_diffs setting
34
+ if should_skip_diff_display?
35
+ return ""
36
+ end
37
+
38
+ # Use new pipeline when DiffNodes available
39
+ return format_with_pipeline(doc1, doc2)
40
+ end
41
+
42
+ # LEGACY: Fall back to old DOM-based behavior
43
+ # Check if we should show any diffs based on differences array
44
+ if should_skip_diff_display?
45
+ return ""
46
+ end
47
+
48
+ require_relative "../../xml/data_model"
49
+ require_relative "../../xml/element_matcher"
50
+ require_relative "../../xml/line_range_mapper"
51
+ require_relative "../../pretty_printer/html"
52
+
53
+ output = []
54
+
55
+ begin
56
+ # Parse to DOM using HTML parser
57
+ root1 = Canon::Xml::DataModel.from_html(doc1,
58
+ version: @html_version)
59
+ root2 = Canon::Xml::DataModel.from_html(doc2,
60
+ version: @html_version)
61
+
62
+ # Match elements semantically
63
+ matcher = Canon::Xml::ElementMatcher.new
64
+ matches = matcher.match_trees(root1, root2)
65
+
66
+ # Pretty-print HTML for line mapping
67
+ pretty_printer = Canon::PrettyPrinter::Html.new(indent: 2)
68
+ pretty1 = pretty_printer.format(doc1)
69
+ pretty2 = pretty_printer.format(doc2)
70
+
71
+ # Build line range maps using pretty-printed documents
72
+ mapper1 = Canon::Xml::LineRangeMapper.new(indent: 2)
73
+ mapper2 = Canon::Xml::LineRangeMapper.new(indent: 2)
74
+ map1 = mapper1.build_map(root1, pretty1)
75
+ map2 = mapper2.build_map(root2, pretty2)
76
+
77
+ # Use pretty-printed document lines for display
78
+ lines1 = pretty1.split("\n")
79
+ lines2 = pretty2.split("\n")
80
+
81
+ # DEBUG
82
+ warn "DEBUG: HTML Formatter - lines1.length=#{lines1.length}, lines2.length=#{lines2.length}"
83
+ warn "DEBUG: HTML Formatter - matches.length=#{matches.length}"
84
+ warn "DEBUG: HTML Formatter - map1.size=#{map1.size}, map2.size=#{map2.size}"
85
+ warn "DEBUG: Mapped elements in map1: #{map1.keys.map(&:name).join(', ')}"
86
+ warn "DEBUG: Match types: matched=#{matches.count do |m|
87
+ m.status == :matched
88
+ end}, deleted=#{matches.count do |m|
89
+ m.status == :deleted
90
+ end}, inserted=#{matches.count do |m|
91
+ m.status == :inserted
92
+ end}"
93
+
94
+ # Display diffs based on element matches
95
+ result = format_element_matches(matches, map1, map2, lines1, lines2)
96
+ warn "DEBUG: HTML Formatter - result.length=#{result.length}"
97
+ output << result
98
+ rescue StandardError => e
99
+ # Fall back to simple diff on error
100
+ output << colorize("Warning: DOM parsing failed, using simple diff",
101
+ :yellow)
102
+ output << colorize("Error: #{e.class}: #{e.message}", :red)
103
+
104
+ # Include relevant backtrace lines
105
+ relevant_trace = e.backtrace.select do |line|
106
+ line.include?("canon")
107
+ end.take(3)
108
+ unless relevant_trace.empty?
109
+ output << colorize("Backtrace:", :yellow)
110
+ relevant_trace.each do |line|
111
+ output << colorize(" #{line}", :yellow)
112
+ end
113
+ end
114
+
115
+ output << ""
116
+ require_relative "simple_formatter"
117
+ simple = SimpleFormatter.new(
118
+ use_color: @use_color,
119
+ context_lines: @context_lines,
120
+ diff_grouping_lines: @diff_grouping_lines,
121
+ visualization_map: @visualization_map,
122
+ )
123
+ output << simple.format(doc1, doc2)
124
+ end
125
+
126
+ output.join("\n")
127
+ end
128
+
129
+ # Format using new DiffReportBuilder pipeline
130
+ def format_with_pipeline(doc1, doc2)
131
+ require_relative "../../diff/diff_node_mapper"
132
+ require_relative "../../diff/diff_report_builder"
133
+
134
+ # Layer 2: Map DiffNodes to DiffLines
135
+ diff_lines = Canon::Diff::DiffNodeMapper.map(@differences, doc1, doc2)
136
+
137
+ # Layers 3-5: Build report through pipeline
138
+ report = Canon::Diff::DiffReportBuilder.build(
139
+ diff_lines,
140
+ show_diffs: @show_diffs,
141
+ context_lines: @context_lines,
142
+ grouping_lines: @diff_grouping_lines,
143
+ )
144
+
145
+ # Layer 6: Format the report
146
+ format_report(report, doc1, doc2)
147
+ end
148
+
149
+ # Format a DiffReport for display
150
+ def format_report(report, doc1, doc2)
151
+ return "" if report.contexts.empty?
152
+
153
+ lines1 = doc1.split("\n")
154
+ lines2 = doc2.split("\n")
155
+
156
+ output = []
157
+
158
+ # Detect non-ASCII characters
159
+ all_text = (lines1 + lines2).join
160
+ non_ascii = Legend.detect_non_ascii(all_text, @visualization_map)
161
+
162
+ # Add Unicode legend if needed
163
+ unless non_ascii.empty?
164
+ output << Legend.build_legend(non_ascii, use_color: @use_color)
165
+ output << ""
166
+ end
167
+
168
+ # Format each context
169
+ report.contexts.each_with_index do |context, idx|
170
+ output << "" if idx.positive?
171
+ output << format_context_from_lines(context, lines1, lines2)
172
+ end
173
+
174
+ output.join("\n")
175
+ end
176
+
177
+ # Format a context using its DiffLines
178
+ def format_context_from_lines(context, lines1, _lines2)
179
+ output = []
180
+
181
+ context.lines.each do |diff_line|
182
+ case diff_line.type
183
+ when :unchanged
184
+ line_num = diff_line.line_number + 1
185
+ output << format_unified_line(line_num, line_num, " ",
186
+ diff_line.content)
187
+ when :removed
188
+ line_num = diff_line.line_number + 1
189
+ informative = diff_line.informative?
190
+ output << format_unified_line(line_num, nil, "-",
191
+ diff_line.content,
192
+ informative ? :cyan : :red,
193
+ informative: informative)
194
+ when :added
195
+ line_num = diff_line.line_number + 1
196
+ informative = diff_line.informative?
197
+ output << format_unified_line(nil, line_num, "+",
198
+ diff_line.content,
199
+ informative ? :cyan : :green,
200
+ informative: informative)
201
+ when :changed
202
+ line_num = diff_line.line_number + 1
203
+ informative = diff_line.informative?
204
+ old_content = lines1[diff_line.line_number]
205
+ new_content = diff_line.content
206
+ output << format_unified_line(line_num, nil, "-",
207
+ old_content,
208
+ informative ? :cyan : :red,
209
+ informative: informative)
210
+ output << format_unified_line(nil, line_num, "+",
211
+ new_content,
212
+ informative ? :cyan : :green,
213
+ informative: informative)
214
+ end
215
+ end
216
+
217
+ output.join("\n")
218
+ end
219
+
220
+ private
221
+
222
+ # Check if diff display should be skipped
223
+ # Returns true when:
224
+ # 1. show_diffs is :normative AND there are no normative differences
225
+ # 2. show_diffs is :informative AND there are no informative differences
226
+ def should_skip_diff_display?
227
+ return false if @differences.nil? || @differences.empty?
228
+
229
+ case @show_diffs
230
+ when :normative
231
+ # Skip if no normative diffs
232
+ @differences.none? do |diff|
233
+ diff.is_a?(Canon::Diff::DiffNode) && diff.normative?
234
+ end
235
+ when :informative
236
+ # Skip if no informative diffs
237
+ @differences.none? do |diff|
238
+ diff.is_a?(Canon::Diff::DiffNode) && diff.informative?
239
+ end
240
+ else
241
+ # :all or other - never skip
242
+ false
243
+ end
244
+ end
245
+
246
+ # Format element matches for display
247
+ def format_element_matches(matches, map1, map2, lines1, lines2)
248
+ output = []
249
+
250
+ # Detect non-ASCII characters in the diff
251
+ all_text = (lines1 + lines2).join
252
+ non_ascii = Legend.detect_non_ascii(all_text, @visualization_map)
253
+
254
+ # Add Unicode legend if any non-ASCII characters detected
255
+ unless non_ascii.empty?
256
+ output << Legend.build_legend(non_ascii, use_color: @use_color)
257
+ output << ""
258
+ end
259
+
260
+ # Build a set of elements to skip (children of parents showing diffs)
261
+ elements_to_skip = build_skip_set(matches, map1, map2, lines1,
262
+ lines2)
263
+
264
+ # Build a set of children of matched parents
265
+ children_of_matched_parents = build_children_set(matches)
266
+
267
+ # Collect diff sections with metadata
268
+ diff_sections = collect_diff_sections(matches, map1, map2, lines1,
269
+ lines2, elements_to_skip,
270
+ children_of_matched_parents)
271
+
272
+ # DEBUG
273
+ warn "DEBUG: format_element_matches - diff_sections.length=#{diff_sections.length}"
274
+ warn "DEBUG: format_element_matches - elements_to_skip.size=#{elements_to_skip.size}"
275
+ warn "DEBUG: format_element_matches - children_of_matched_parents.size=#{children_of_matched_parents.size}"
276
+
277
+ # Sort by line number
278
+ diff_sections.sort_by! do |section|
279
+ section[:start_line1] || section[:start_line2] || 0
280
+ end
281
+
282
+ # Group diffs by proximity if diff_grouping_lines is set
283
+ formatted_diffs = if @diff_grouping_lines
284
+ groups = group_diff_sections(diff_sections,
285
+ @diff_grouping_lines)
286
+ format_diff_groups(groups, lines1, lines2)
287
+ else
288
+ diff_sections.map do |s|
289
+ s[:formatted]
290
+ end.compact.join("\n\n")
291
+ end
292
+
293
+ warn "DEBUG: format_element_matches - formatted_diffs.length=#{formatted_diffs.length}"
294
+ output << formatted_diffs
295
+ output.join("\n")
296
+ end
297
+
298
+ # Build set of elements to skip (children with parents showing diffs)
299
+ def build_skip_set(matches, map1, map2, lines1, lines2)
300
+ elements_to_skip = Set.new
301
+ elements_with_diffs = Set.new
302
+
303
+ # Build set of element pairs that have semantic diffs
304
+ build_elements_with_semantic_diffs_set
305
+
306
+ # First pass: identify elements with line differences
307
+ # (semantic filtering happens in collect_diff_sections)
308
+ matches.each do |match|
309
+ next unless match.status == :matched
310
+
311
+ range1 = map1[match.elem1]
312
+ range2 = map2[match.elem2]
313
+ next unless range1 && range2
314
+
315
+ elem_lines1 = lines1[range1.start_line..range1.end_line]
316
+ elem_lines2 = lines2[range2.start_line..range2.end_line]
317
+
318
+ # Add if there are line diffs
319
+ # Semantic filtering is done in collect_diff_sections
320
+ if elem_lines1 != elem_lines2
321
+ elements_with_diffs.add(match.elem1)
322
+ end
323
+ end
324
+
325
+ # Second pass: skip children of elements with diffs
326
+ elements_with_diffs.each do |elem|
327
+ if elem.respond_to?(:parent)
328
+ current = elem.parent
329
+ while current
330
+ if current.respond_to?(:name) && elements_with_diffs.include?(current)
331
+ elements_to_skip.add(elem)
332
+ break
333
+ end
334
+ current = current.respond_to?(:parent) ? current.parent : nil
335
+ end
336
+ end
337
+ end
338
+
339
+ elements_to_skip
340
+ end
341
+
342
+ # Check if an element or its children have semantic diffs
343
+ def has_semantic_diff_in_subtree?(element, elements_with_semantic_diffs)
344
+ # Check the element itself
345
+ return true if elements_with_semantic_diffs.include?(element)
346
+
347
+ # Check all descendants
348
+ if element.respond_to?(:children)
349
+ element.children.any? do |child|
350
+ has_semantic_diff_in_subtree?(child, elements_with_semantic_diffs)
351
+ end
352
+ else
353
+ false
354
+ end
355
+ end
356
+
357
+ # Build set of individual elements (not pairs) that have semantic diffs
358
+ def build_elements_with_semantic_diffs_set
359
+ elements = Set.new
360
+
361
+ return elements if @differences.nil? || @differences.empty?
362
+
363
+ @differences.each do |diff|
364
+ next unless diff.is_a?(Canon::Diff::DiffNode)
365
+
366
+ # Add both nodes if they exist
367
+ elements.add(diff.node1) if diff.node1
368
+ elements.add(diff.node2) if diff.node2
369
+ end
370
+
371
+ elements
372
+ end
373
+
374
+ # Build set of children of matched parents
375
+ def build_children_set(matches)
376
+ children = Set.new
377
+
378
+ matches.each do |match|
379
+ next unless match.status == :matched
380
+
381
+ [match.elem1, match.elem2].compact.each do |elem|
382
+ next unless elem.respond_to?(:children)
383
+
384
+ elem.children.each do |child|
385
+ children.add(child) if child.respond_to?(:name)
386
+ end
387
+ end
388
+ end
389
+
390
+ children
391
+ end
392
+
393
+ # Collect diff sections with metadata
394
+ def collect_diff_sections(matches, map1, map2, lines1, lines2,
395
+ elements_to_skip, _children_of_matched_parents)
396
+ diff_sections = []
397
+ no_range_count = 0
398
+ no_diff_count = 0
399
+
400
+ # If there are NO semantic diffs, don't show any matched elements
401
+ # (all text diffs were normalized away)
402
+ elements_with_semantic_diffs = build_elements_with_semantic_diffs_set
403
+
404
+ matches.each do |match|
405
+ case match.status
406
+ when :matched
407
+ next if elements_to_skip.include?(match.elem1)
408
+
409
+ # Only apply semantic filtering if we have DiffNode objects
410
+ # (when called standalone or without DiffNodes, show all diffs)
411
+ if !@differences.nil? && !@differences.empty? && @differences.any?(Canon::Diff::DiffNode)
412
+ # Skip if no semantic diffs exist (all diffs were normalized)
413
+ next if elements_with_semantic_diffs.empty?
414
+
415
+ # Skip if this element has no semantic diffs in its subtree
416
+ next unless has_semantic_diff_in_subtree?(match.elem1,
417
+ elements_with_semantic_diffs)
418
+ end
419
+
420
+ range1 = map1[match.elem1]
421
+ range2 = map2[match.elem2]
422
+ if !range1 || !range2
423
+ no_range_count += 1
424
+ warn "DEBUG: No range for #{match.elem1.name} (path: #{match.path.join('/')})" if no_range_count <= 5
425
+ end
426
+
427
+ section = format_matched_element_with_metadata(match, map1,
428
+ map2, lines1,
429
+ lines2)
430
+ if range1 && range2 && !section
431
+ no_diff_count += 1
432
+ warn "DEBUG: No diff for #{match.elem1.name} (path: #{match.path.join('/')})" if no_diff_count <= 5
433
+ end
434
+ diff_sections << section if section
435
+ when :deleted
436
+ # Don't skip deleted elements - they should always be shown
437
+ section = format_deleted_element_with_metadata(match, map1,
438
+ lines1)
439
+ diff_sections << section if section
440
+ when :inserted
441
+ # Don't skip inserted elements - they should always be shown
442
+ section = format_inserted_element_with_metadata(match, map2,
443
+ lines2)
444
+ diff_sections << section if section
445
+ end
446
+ end
447
+
448
+ warn "DEBUG: collect_diff_sections - no_range_count=#{no_range_count}, no_diff_count=#{no_diff_count}"
449
+ diff_sections
450
+ end
451
+
452
+ # Format matched element with metadata
453
+ def format_matched_element_with_metadata(match, map1, map2, lines1,
454
+ lines2)
455
+ range1 = map1[match.elem1]
456
+ range2 = map2[match.elem2]
457
+ return nil unless range1 && range2
458
+
459
+ formatted = format_matched_element(match, map1, map2, lines1,
460
+ lines2)
461
+ return nil unless formatted
462
+
463
+ {
464
+ formatted: formatted,
465
+ start_line1: range1.start_line,
466
+ end_line1: range1.end_line,
467
+ start_line2: range2.start_line,
468
+ end_line2: range2.end_line,
469
+ path: match.path.join("/"),
470
+ }
471
+ end
472
+
473
+ # Format deleted element with metadata
474
+ def format_deleted_element_with_metadata(match, map1, lines1)
475
+ range1 = map1[match.elem1]
476
+ return nil unless range1
477
+
478
+ formatted = format_deleted_element(match, map1, lines1)
479
+ return nil unless formatted
480
+
481
+ {
482
+ formatted: formatted,
483
+ start_line1: range1.start_line,
484
+ end_line1: range1.end_line,
485
+ start_line2: nil,
486
+ end_line2: nil,
487
+ path: match.path.join("/"),
488
+ }
489
+ end
490
+
491
+ # Format inserted element with metadata
492
+ def format_inserted_element_with_metadata(match, map2, lines2)
493
+ range2 = map2[match.elem2]
494
+ return nil unless range2
495
+
496
+ formatted = format_inserted_element(match, map2, lines2)
497
+ return nil unless formatted
498
+
499
+ {
500
+ formatted: formatted,
501
+ start_line1: nil,
502
+ end_line1: nil,
503
+ start_line2: range2.start_line,
504
+ end_line2: range2.end_line,
505
+ path: match.path.join("/"),
506
+ }
507
+ end
508
+
509
+ # Format a matched element showing differences
510
+ def format_matched_element(match, map1, map2, lines1, lines2)
511
+ range1 = map1[match.elem1]
512
+ range2 = map2[match.elem2]
513
+ return nil unless range1 && range2
514
+
515
+ # Extract line ranges
516
+ elem_lines1 = lines1[range1.start_line..range1.end_line]
517
+ elem_lines2 = lines2[range2.start_line..range2.end_line]
518
+
519
+ # Skip if identical
520
+ return nil if elem_lines1 == elem_lines2
521
+
522
+ # Run line diff
523
+ diffs = ::Diff::LCS.sdiff(elem_lines1, elem_lines2)
524
+
525
+ # Identify diff blocks
526
+ diff_blocks = identify_diff_blocks(diffs)
527
+ return nil if diff_blocks.empty?
528
+
529
+ # Group into contexts
530
+ contexts = group_diff_blocks_into_contexts(diff_blocks,
531
+ @diff_grouping_lines || 0)
532
+
533
+ # Expand with context lines
534
+ expanded_contexts = expand_contexts_with_context_lines(contexts,
535
+ @context_lines,
536
+ diffs.length)
537
+
538
+ # Format contexts
539
+ output = []
540
+ expanded_contexts.each_with_index do |context, idx|
541
+ output << "" if idx.positive?
542
+ output << format_context(context, diffs, range1.start_line,
543
+ range2.start_line)
544
+ end
545
+
546
+ output.join("\n")
547
+ end
548
+
549
+ # Format a deleted element
550
+ def format_deleted_element(match, map1, lines1)
551
+ range1 = map1[match.elem1]
552
+ return nil unless range1
553
+
554
+ output = []
555
+ path_str = match.path.join("/")
556
+ output << colorize("Element: #{path_str} [DELETED]", :red, :bold)
557
+
558
+ # Show all lines as deleted
559
+ (range1.start_line..range1.end_line).each do |i|
560
+ output << format_unified_line(i + 1, nil, "-", lines1[i], :red)
561
+ end
562
+
563
+ output.join("\n")
564
+ end
565
+
566
+ # Format an inserted element
567
+ def format_inserted_element(match, map2, lines2)
568
+ range2 = map2[match.elem2]
569
+ return nil unless range2
570
+
571
+ output = []
572
+ path_str = match.path.join("/")
573
+ output << colorize("Element: #{path_str} [INSERTED]", :green, :bold)
574
+
575
+ # Show all lines as inserted
576
+ (range2.start_line..range2.end_line).each do |i|
577
+ output << format_unified_line(nil, i + 1, "+", lines2[i], :green)
578
+ end
579
+
580
+ output.join("\n")
581
+ end
582
+
583
+ # Group diff sections by proximity
584
+ def group_diff_sections(sections, grouping_lines)
585
+ return [] if sections.empty?
586
+
587
+ groups = []
588
+ current_group = [sections[0]]
589
+
590
+ sections[1..].each do |section|
591
+ last_section = current_group.last
592
+
593
+ # Calculate gap
594
+ gap1 = if last_section[:end_line1] && section[:start_line1]
595
+ section[:start_line1] - last_section[:end_line1] - 1
596
+ else
597
+ Float::INFINITY
598
+ end
599
+
600
+ gap2 = if last_section[:end_line2] && section[:start_line2]
601
+ section[:start_line2] - last_section[:end_line2] - 1
602
+ else
603
+ Float::INFINITY
604
+ end
605
+
606
+ max_gap = [gap1, gap2].max
607
+
608
+ if max_gap <= grouping_lines
609
+ current_group << section
610
+ else
611
+ groups << current_group
612
+ current_group = [section]
613
+ end
614
+ end
615
+
616
+ groups << current_group unless current_group.empty?
617
+ groups
618
+ end
619
+
620
+ # Format groups of diffs
621
+ def format_diff_groups(groups, _lines1, _lines2)
622
+ output = []
623
+
624
+ groups.each_with_index do |group, group_idx|
625
+ output << "" if group_idx.positive?
626
+
627
+ if group.length > 1
628
+ output << colorize("Context block has #{group.length} diffs",
629
+ :yellow, :bold)
630
+ output << ""
631
+ group.each do |section|
632
+ output << section[:formatted] if section[:formatted]
633
+ end
634
+ elsif group[0][:formatted]
635
+ output << group[0][:formatted]
636
+ end
637
+ end
638
+
639
+ output.join("\n")
640
+ end
641
+
642
+ # Check if an element or its children have semantic diffs
643
+ def has_semantic_diff_in_subtree?(element, elements_with_semantic_diffs)
644
+ return true if elements_with_semantic_diffs.include?(element)
645
+
646
+ if element.respond_to?(:children)
647
+ element.children.any? do |child|
648
+ has_semantic_diff_in_subtree?(child, elements_with_semantic_diffs)
649
+ end
650
+ else
651
+ false
652
+ end
653
+ end
654
+
655
+ # Build set of individual elements that have semantic diffs
656
+ def build_elements_with_semantic_diffs_set
657
+ elements = Set.new
658
+ return elements if @differences.nil? || @differences.empty?
659
+
660
+ @differences.each do |diff|
661
+ next unless diff.is_a?(Canon::Diff::DiffNode)
662
+
663
+ elements.add(diff.node1) if diff.node1
664
+ elements.add(diff.node2) if diff.node2
665
+ end
666
+
667
+ elements
668
+ end
669
+ end
670
+ end
671
+ end
672
+ end