canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,860 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_formatter"
4
+ require_relative "../legend"
5
+ require "set"
6
+ require "strscan"
7
+
8
+ module Canon
9
+ class DiffFormatter
10
+ module ByLine
11
+ # XML formatter with DOM-guided diffing
12
+ # Uses DOM parsing and element matching for intelligent XML diffs
13
+ class XmlFormatter < BaseFormatter
14
+ # Format DOM-guided XML diff
15
+ #
16
+ # @param doc1 [String] First XML document
17
+ # @param doc2 [String] Second XML document
18
+ # @return [String] Formatted diff
19
+ def format(doc1, doc2)
20
+ # If we have DiffNodes from comparison, check if there are normative diffs
21
+ # based on show_diffs setting
22
+ if @differences&.any?(Canon::Diff::DiffNode)
23
+ # Check if we should skip based on show_diffs setting
24
+ if should_skip_diff_display?
25
+ return ""
26
+ end
27
+
28
+ # Use new pipeline when DiffNodes available
29
+ return format_with_pipeline(doc1, doc2)
30
+ end
31
+
32
+ # LEGACY: Fall back to old behavior for backward compatibility
33
+ # This happens when @differences is nil (no comparison result provided)
34
+ format_legacy(doc1, doc2)
35
+ end
36
+
37
+ # Format using new DiffReportBuilder pipeline
38
+ def format_with_pipeline(doc1, doc2)
39
+ # Check if we should show any diffs
40
+ if should_skip_diff_display?
41
+ return ""
42
+ end
43
+
44
+ require_relative "../../diff/diff_node_mapper"
45
+ require_relative "../../diff/diff_report_builder"
46
+
47
+ # Layer 2: Map DiffNodes to DiffLines
48
+ diff_lines = Canon::Diff::DiffNodeMapper.map(@differences, doc1, doc2)
49
+
50
+ # Layers 3-5: Build report through pipeline
51
+ report = Canon::Diff::DiffReportBuilder.build(
52
+ diff_lines,
53
+ show_diffs: @show_diffs,
54
+ context_lines: @context_lines,
55
+ grouping_lines: @diff_grouping_lines,
56
+ )
57
+
58
+ # Layer 6: Format the report
59
+ format_report(report, doc1, doc2)
60
+ end
61
+
62
+ # Format a DiffReport for display
63
+ def format_report(report, doc1, doc2)
64
+ return "" if report.contexts.empty?
65
+
66
+ lines1 = doc1.split("\n")
67
+ lines2 = doc2.split("\n")
68
+
69
+ output = []
70
+
71
+ # Detect non-ASCII characters
72
+ all_text = (lines1 + lines2).join
73
+ non_ascii = Legend.detect_non_ascii(all_text, @visualization_map)
74
+
75
+ # Add Unicode legend if needed
76
+ unless non_ascii.empty?
77
+ output << Legend.build_legend(non_ascii, use_color: @use_color)
78
+ output << ""
79
+ end
80
+
81
+ # Format each context
82
+ report.contexts.each_with_index do |context, idx|
83
+ output << "" if idx.positive?
84
+ output << format_context_from_lines(context, lines1, lines2)
85
+ end
86
+
87
+ output.join("\n")
88
+ end
89
+
90
+ # Format a context using its DiffLines
91
+ def format_context_from_lines(context, lines1, _lines2)
92
+ output = []
93
+
94
+ context.lines.each do |diff_line|
95
+ case diff_line.type
96
+ when :unchanged
97
+ line_num = diff_line.line_number + 1
98
+ output << format_unified_line(line_num, line_num, " ",
99
+ diff_line.content)
100
+ when :removed
101
+ line_num = diff_line.line_number + 1
102
+ informative = diff_line.informative?
103
+ output << format_unified_line(line_num, nil, "-",
104
+ diff_line.content,
105
+ informative ? :cyan : :red,
106
+ informative: informative)
107
+ when :added
108
+ line_num = diff_line.line_number + 1
109
+ informative = diff_line.informative?
110
+ output << format_unified_line(nil, line_num, "+",
111
+ diff_line.content,
112
+ informative ? :cyan : :green,
113
+ informative: informative)
114
+ when :changed
115
+ line_num = diff_line.line_number + 1
116
+ informative = diff_line.informative?
117
+ # For changed lines, we need both old and new content
118
+ # For now, show as removed + added
119
+ old_content = lines1[diff_line.line_number]
120
+ new_content = diff_line.content
121
+ output << format_unified_line(line_num, nil, "-",
122
+ old_content,
123
+ informative ? :cyan : :red,
124
+ informative: informative)
125
+ output << format_unified_line(nil, line_num, "+",
126
+ new_content,
127
+ informative ? :cyan : :green,
128
+ informative: informative)
129
+ end
130
+ end
131
+
132
+ output.join("\n")
133
+ end
134
+
135
+ # Legacy format method (for backward compatibility)
136
+ def format_legacy(doc1, doc2)
137
+ # Check if we should show any diffs based on differences array
138
+ if should_skip_diff_display?
139
+ return ""
140
+ end
141
+
142
+ require_relative "../../xml/data_model"
143
+ require_relative "../../xml/element_matcher"
144
+ require_relative "../../xml/line_range_mapper"
145
+
146
+ output = []
147
+
148
+ begin
149
+ # Parse to DOM
150
+ root1 = Canon::Xml::DataModel.from_xml(doc1)
151
+ root2 = Canon::Xml::DataModel.from_xml(doc2)
152
+
153
+ # Match elements semantically
154
+ matcher = Canon::Xml::ElementMatcher.new
155
+ matches = matcher.match_trees(root1, root2)
156
+
157
+ # Build line range maps using ORIGINAL documents
158
+ mapper1 = Canon::Xml::LineRangeMapper.new(indent: 2)
159
+ mapper2 = Canon::Xml::LineRangeMapper.new(indent: 2)
160
+ map1 = mapper1.build_map(root1, doc1)
161
+ map2 = mapper2.build_map(root2, doc2)
162
+
163
+ # Use ORIGINAL document lines for display
164
+ lines1 = doc1.split("\n")
165
+ lines2 = doc2.split("\n")
166
+
167
+ # Display diffs based on element matches
168
+ output << format_element_matches(matches, map1, map2, lines1,
169
+ lines2)
170
+ rescue StandardError => e
171
+ # Fall back to simple diff on error
172
+ output << colorize("Warning: DOM parsing failed, using simple diff",
173
+ :yellow)
174
+ output << colorize("Error: #{e.class}: #{e.message}", :red)
175
+
176
+ # Include relevant backtrace lines
177
+ relevant_trace = e.backtrace.select do |line|
178
+ line.include?("canon")
179
+ end.take(3)
180
+ unless relevant_trace.empty?
181
+ output << colorize("Backtrace:", :yellow)
182
+ relevant_trace.each do |line|
183
+ output << colorize(" #{line}", :yellow)
184
+ end
185
+ end
186
+
187
+ output << ""
188
+ require_relative "simple_formatter"
189
+ simple = SimpleFormatter.new(
190
+ use_color: @use_color,
191
+ context_lines: @context_lines,
192
+ diff_grouping_lines: @diff_grouping_lines,
193
+ visualization_map: @visualization_map,
194
+ )
195
+ output << simple.format(doc1, doc2)
196
+ end
197
+
198
+ output.join("\n")
199
+ end
200
+
201
+ private
202
+
203
+ # Check if diff display should be skipped
204
+ # Returns true when:
205
+ # 1. show_diffs is :normative AND there are no normative differences
206
+ # 2. show_diffs is :informative AND there are no informative differences
207
+ def should_skip_diff_display?
208
+ return false if @differences.nil? || @differences.empty?
209
+
210
+ case @show_diffs
211
+ when :normative
212
+ # Skip if no normative diffs
213
+ @differences.none? do |diff|
214
+ diff.is_a?(Canon::Diff::DiffNode) && diff.normative?
215
+ end
216
+ when :informative
217
+ # Skip if no informative diffs
218
+ @differences.none? do |diff|
219
+ diff.is_a?(Canon::Diff::DiffNode) && diff.informative?
220
+ end
221
+ else
222
+ # :all or other - never skip
223
+ false
224
+ end
225
+ end
226
+
227
+ # Format element matches for display
228
+ def format_element_matches(matches, map1, map2, lines1, lines2)
229
+ output = []
230
+
231
+ # Detect non-ASCII characters in the diff
232
+ all_text = (lines1 + lines2).join
233
+ non_ascii = Legend.detect_non_ascii(all_text, @visualization_map)
234
+
235
+ # Add Unicode legend if any non-ASCII characters detected
236
+ unless non_ascii.empty?
237
+ output << Legend.build_legend(non_ascii, use_color: @use_color)
238
+ output << ""
239
+ end
240
+
241
+ # Build a set of elements to skip (children of parents showing diffs)
242
+ elements_to_skip = build_skip_set(matches, map1, map2, lines1,
243
+ lines2)
244
+
245
+ # Build a set of children of matched parents
246
+ children_of_matched_parents = build_children_set(matches)
247
+
248
+ # Collect diff sections with metadata
249
+ diff_sections = collect_diff_sections(matches, map1, map2, lines1,
250
+ lines2, elements_to_skip,
251
+ children_of_matched_parents)
252
+
253
+ # Sort by line number
254
+ diff_sections.sort_by! do |section|
255
+ section[:start_line1] || section[:start_line2] || 0
256
+ end
257
+
258
+ # Group diffs by proximity if diff_grouping_lines is set
259
+ formatted_diffs = if @diff_grouping_lines
260
+ groups = group_diff_sections(diff_sections,
261
+ @diff_grouping_lines)
262
+ format_diff_groups(groups, lines1, lines2)
263
+ else
264
+ diff_sections.map do |s|
265
+ s[:formatted]
266
+ end.compact.join("\n\n")
267
+ end
268
+
269
+ output << formatted_diffs
270
+ output.join("\n")
271
+ end
272
+
273
+ # Build set of elements to skip (children with parents showing diffs)
274
+ def build_skip_set(matches, map1, map2, lines1, lines2)
275
+ elements_to_skip = Set.new
276
+ elements_with_diffs = Set.new
277
+
278
+ # Build set of element pairs that have semantic diffs
279
+ build_elements_with_semantic_diffs_set
280
+
281
+ # First pass: identify elements with line differences
282
+ # (semantic filtering happens in collect_diff_sections)
283
+ matches.each do |match|
284
+ next unless match.status == :matched
285
+
286
+ range1 = map1[match.elem1]
287
+ range2 = map2[match.elem2]
288
+ next unless range1 && range2
289
+
290
+ elem_lines1 = lines1[range1.start_line..range1.end_line]
291
+ elem_lines2 = lines2[range2.start_line..range2.end_line]
292
+
293
+ # Add if there are line diffs
294
+ # Semantic filtering is done in collect_diff_sections
295
+ if elem_lines1 != elem_lines2
296
+ elements_with_diffs.add(match.elem1)
297
+ end
298
+ end
299
+
300
+ # Second pass: skip children of elements with diffs
301
+ elements_with_diffs.each do |elem|
302
+ if elem.respond_to?(:parent)
303
+ current = elem.parent
304
+ while current
305
+ if current.respond_to?(:name) && elements_with_diffs.include?(current)
306
+ elements_to_skip.add(elem)
307
+ break
308
+ end
309
+ current = current.respond_to?(:parent) ? current.parent : nil
310
+ end
311
+ end
312
+ end
313
+
314
+ elements_to_skip
315
+ end
316
+
317
+ # Check if an element or its children have semantic diffs
318
+ def has_semantic_diff_in_subtree?(element, elements_with_semantic_diffs)
319
+ # Check the element itself
320
+ return true if elements_with_semantic_diffs.include?(element)
321
+
322
+ # Check all descendants
323
+ if element.respond_to?(:children)
324
+ element.children.any? do |child|
325
+ has_semantic_diff_in_subtree?(child, elements_with_semantic_diffs)
326
+ end
327
+ else
328
+ false
329
+ end
330
+ end
331
+
332
+ # Build set of individual elements (not pairs) that have semantic diffs
333
+ def build_elements_with_semantic_diffs_set
334
+ elements = Set.new
335
+
336
+ return elements if @differences.nil? || @differences.empty?
337
+
338
+ @differences.each do |diff|
339
+ next unless diff.is_a?(Canon::Diff::DiffNode)
340
+
341
+ # Add both nodes if they exist
342
+ elements.add(diff.node1) if diff.node1
343
+ elements.add(diff.node2) if diff.node2
344
+ end
345
+
346
+ elements
347
+ end
348
+
349
+ # Build set of children of matched parents
350
+ def build_children_set(matches)
351
+ children = Set.new
352
+
353
+ matches.each do |match|
354
+ next unless match.status == :matched
355
+
356
+ [match.elem1, match.elem2].compact.each do |elem|
357
+ next unless elem.respond_to?(:children)
358
+
359
+ elem.children.each do |child|
360
+ children.add(child) if child.respond_to?(:name)
361
+ end
362
+ end
363
+ end
364
+
365
+ children
366
+ end
367
+
368
+ # Collect diff sections with metadata
369
+ def collect_diff_sections(matches, map1, map2, lines1, lines2,
370
+ elements_to_skip, children_of_matched_parents)
371
+ diff_sections = []
372
+
373
+ # If there are NO semantic diffs, don't show any matched elements
374
+ elements_with_semantic_diffs = build_elements_with_semantic_diffs_set
375
+
376
+ matches.each do |match|
377
+ case match.status
378
+ when :matched
379
+ next if elements_to_skip.include?(match.elem1)
380
+
381
+ # Only apply semantic filtering if we have DiffNode objects
382
+ # (when called standalone or without DiffNodes, show all diffs)
383
+ if !@differences.nil? && !@differences.empty? && @differences.any?(Canon::Diff::DiffNode)
384
+ # Skip if no semantic diffs exist (all diffs were normalized)
385
+ next if elements_with_semantic_diffs.empty?
386
+
387
+ # Skip if this element has no semantic diffs in its subtree
388
+ next unless has_semantic_diff_in_subtree?(match.elem1,
389
+ elements_with_semantic_diffs)
390
+ end
391
+
392
+ section = format_matched_element_with_metadata(match, map1,
393
+ map2, lines1,
394
+ lines2)
395
+ diff_sections << section if section
396
+ when :deleted
397
+ next if children_of_matched_parents.include?(match.elem1)
398
+
399
+ section = format_deleted_element_with_metadata(match, map1,
400
+ lines1)
401
+ diff_sections << section if section
402
+ when :inserted
403
+ next if children_of_matched_parents.include?(match.elem2)
404
+
405
+ section = format_inserted_element_with_metadata(match, map2,
406
+ lines2)
407
+ diff_sections << section if section
408
+ end
409
+ end
410
+
411
+ diff_sections
412
+ end
413
+
414
+ # Format matched element with metadata
415
+ def format_matched_element_with_metadata(match, map1, map2, lines1,
416
+ lines2)
417
+ range1 = map1[match.elem1]
418
+ range2 = map2[match.elem2]
419
+ return nil unless range1 && range2
420
+
421
+ formatted = format_matched_element(match, map1, map2, lines1,
422
+ lines2)
423
+ return nil unless formatted
424
+
425
+ {
426
+ formatted: formatted,
427
+ start_line1: range1.start_line,
428
+ end_line1: range1.end_line,
429
+ start_line2: range2.start_line,
430
+ end_line2: range2.end_line,
431
+ path: match.path.join("/"),
432
+ }
433
+ end
434
+
435
+ # Format deleted element with metadata
436
+ def format_deleted_element_with_metadata(match, map1, lines1)
437
+ range1 = map1[match.elem1]
438
+ return nil unless range1
439
+
440
+ formatted = format_deleted_element(match, map1, lines1)
441
+ return nil unless formatted
442
+
443
+ {
444
+ formatted: formatted,
445
+ start_line1: range1.start_line,
446
+ end_line1: range1.end_line,
447
+ start_line2: nil,
448
+ end_line2: nil,
449
+ path: match.path.join("/"),
450
+ }
451
+ end
452
+
453
+ # Format inserted element with metadata
454
+ def format_inserted_element_with_metadata(match, map2, lines2)
455
+ range2 = map2[match.elem2]
456
+ return nil unless range2
457
+
458
+ formatted = format_inserted_element(match, map2, lines2)
459
+ return nil unless formatted
460
+
461
+ {
462
+ formatted: formatted,
463
+ start_line1: nil,
464
+ end_line1: nil,
465
+ start_line2: range2.start_line,
466
+ end_line2: range2.end_line,
467
+ path: match.path.join("/"),
468
+ }
469
+ end
470
+
471
+ # Format a matched element showing differences
472
+ def format_matched_element(match, map1, map2, lines1, lines2)
473
+ range1 = map1[match.elem1]
474
+ range2 = map2[match.elem2]
475
+ return nil unless range1 && range2
476
+
477
+ # Extract line ranges
478
+ elem_lines1 = lines1[range1.start_line..range1.end_line]
479
+ elem_lines2 = lines2[range2.start_line..range2.end_line]
480
+
481
+ # Skip if identical
482
+ return nil if elem_lines1 == elem_lines2
483
+
484
+ # Run line diff
485
+ diffs = ::Diff::LCS.sdiff(elem_lines1, elem_lines2)
486
+
487
+ # Identify diff blocks
488
+ diff_blocks = identify_diff_blocks(diffs)
489
+ return nil if diff_blocks.empty?
490
+
491
+ # Group into contexts
492
+ contexts = group_diff_blocks_into_contexts(diff_blocks,
493
+ @diff_grouping_lines || 0)
494
+
495
+ # Expand with context lines
496
+ expanded_contexts = expand_contexts_with_context_lines(contexts,
497
+ @context_lines,
498
+ diffs.length)
499
+
500
+ # Format contexts
501
+ output = []
502
+ expanded_contexts.each_with_index do |context, idx|
503
+ output << "" if idx.positive?
504
+ output << format_context(context, diffs, range1.start_line,
505
+ range2.start_line)
506
+ end
507
+
508
+ output.join("\n")
509
+ end
510
+
511
+ # Format a deleted element
512
+ def format_deleted_element(match, map1, lines1)
513
+ range1 = map1[match.elem1]
514
+ return nil unless range1
515
+
516
+ output = []
517
+ path_str = match.path.join("/")
518
+ output << colorize("Element: #{path_str} [DELETED]", :red, :bold)
519
+
520
+ # Show all lines as deleted
521
+ (range1.start_line..range1.end_line).each do |i|
522
+ output << format_unified_line(i + 1, nil, "-", lines1[i], :red)
523
+ end
524
+
525
+ output.join("\n")
526
+ end
527
+
528
+ # Format an inserted element
529
+ def format_inserted_element(match, map2, lines2)
530
+ range2 = map2[match.elem2]
531
+ return nil unless range2
532
+
533
+ output = []
534
+ path_str = match.path.join("/")
535
+ output << colorize("Element: #{path_str} [INSERTED]", :green, :bold)
536
+
537
+ # Show all lines as inserted
538
+ (range2.start_line..range2.end_line).each do |i|
539
+ output << format_unified_line(nil, i + 1, "+", lines2[i], :green)
540
+ end
541
+
542
+ output.join("\n")
543
+ end
544
+
545
+ # Identify contiguous diff blocks
546
+ def identify_diff_blocks(diffs)
547
+ require_relative "../../diff/diff_block"
548
+
549
+ blocks = []
550
+ current_start = nil
551
+ current_types = []
552
+
553
+ diffs.each_with_index do |change, idx|
554
+ if change.action != "="
555
+ if current_start.nil?
556
+ current_start = idx
557
+ current_types = [change.action]
558
+ else
559
+ current_types << change.action unless current_types.include?(change.action)
560
+ end
561
+ elsif current_start
562
+ blocks << Canon::Diff::DiffBlock.new(
563
+ start_idx: current_start,
564
+ end_idx: idx - 1,
565
+ types: current_types,
566
+ )
567
+ current_start = nil
568
+ current_types = []
569
+ end
570
+ end
571
+
572
+ # Don't forget the last block
573
+ if current_start
574
+ blocks << Canon::Diff::DiffBlock.new(
575
+ start_idx: current_start,
576
+ end_idx: diffs.length - 1,
577
+ types: current_types,
578
+ )
579
+ end
580
+
581
+ blocks
582
+ end
583
+
584
+ # Group diff blocks into contexts
585
+ def group_diff_blocks_into_contexts(blocks, grouping_lines)
586
+ return [] if blocks.empty?
587
+
588
+ contexts = []
589
+ current_context = [blocks[0]]
590
+
591
+ blocks[1..].each do |block|
592
+ last_block = current_context.last
593
+ gap = block.start_idx - last_block.end_idx - 1
594
+
595
+ if gap <= grouping_lines
596
+ current_context << block
597
+ else
598
+ contexts << current_context
599
+ current_context = [block]
600
+ end
601
+ end
602
+
603
+ contexts << current_context unless current_context.empty?
604
+ contexts
605
+ end
606
+
607
+ # Expand contexts with context lines
608
+ def expand_contexts_with_context_lines(contexts, context_lines,
609
+ total_lines)
610
+ require_relative "../../diff/diff_context"
611
+
612
+ contexts.map do |context|
613
+ first_block = context.first
614
+ last_block = context.last
615
+
616
+ start_idx = [first_block.start_idx - context_lines, 0].max
617
+ end_idx = [last_block.end_idx + context_lines, total_lines - 1].min
618
+
619
+ Canon::Diff::DiffContext.new(
620
+ start_idx: start_idx,
621
+ end_idx: end_idx,
622
+ blocks: context,
623
+ )
624
+ end
625
+ end
626
+
627
+ # Format a context
628
+ def format_context(context, diffs, base_line1, base_line2)
629
+ output = []
630
+
631
+ (context.start_idx..context.end_idx).each do |idx|
632
+ change = diffs[idx]
633
+
634
+ line1 = change.old_position ? base_line1 + change.old_position + 1 : nil
635
+ line2 = change.new_position ? base_line2 + change.new_position + 1 : nil
636
+
637
+ case change.action
638
+ when "="
639
+ output << format_unified_line(line1, line2, " ",
640
+ change.old_element)
641
+ when "-"
642
+ output << format_unified_line(line1, nil, "-",
643
+ change.old_element, :red)
644
+ when "+"
645
+ output << format_unified_line(nil, line2, "+",
646
+ change.new_element, :green)
647
+ when "!"
648
+ # Token-level highlighting
649
+ old_tokens = tokenize_xml(change.old_element)
650
+ new_tokens = tokenize_xml(change.new_element)
651
+ token_diffs = ::Diff::LCS.sdiff(old_tokens, new_tokens)
652
+
653
+ old_highlighted = build_token_highlighted_text(token_diffs, :old)
654
+ new_highlighted = build_token_highlighted_text(token_diffs, :new)
655
+
656
+ output << format_token_diff_line(line1, line2, old_highlighted,
657
+ new_highlighted)
658
+ end
659
+ end
660
+
661
+ output.join("\n")
662
+ end
663
+
664
+ # Group diff sections by proximity
665
+ def group_diff_sections(sections, grouping_lines)
666
+ return [] if sections.empty?
667
+
668
+ groups = []
669
+ current_group = [sections[0]]
670
+
671
+ sections[1..].each do |section|
672
+ last_section = current_group.last
673
+
674
+ # Calculate gap
675
+ gap1 = if last_section[:end_line1] && section[:start_line1]
676
+ section[:start_line1] - last_section[:end_line1] - 1
677
+ else
678
+ Float::INFINITY
679
+ end
680
+
681
+ gap2 = if last_section[:end_line2] && section[:start_line2]
682
+ section[:start_line2] - last_section[:end_line2] - 1
683
+ else
684
+ Float::INFINITY
685
+ end
686
+
687
+ max_gap = [gap1, gap2].max
688
+
689
+ if max_gap <= grouping_lines
690
+ current_group << section
691
+ else
692
+ groups << current_group
693
+ current_group = [section]
694
+ end
695
+ end
696
+
697
+ groups << current_group unless current_group.empty?
698
+ groups
699
+ end
700
+
701
+ # Format groups of diffs
702
+ def format_diff_groups(groups, _lines1, _lines2)
703
+ output = []
704
+
705
+ groups.each_with_index do |group, group_idx|
706
+ output << "" if group_idx.positive?
707
+
708
+ if group.length > 1
709
+ output << colorize("Context block has #{group.length} diffs",
710
+ :yellow, :bold)
711
+ output << ""
712
+ group.each do |section|
713
+ output << section[:formatted] if section[:formatted]
714
+ end
715
+ elsif group[0][:formatted]
716
+ output << group[0][:formatted]
717
+ end
718
+ end
719
+
720
+ output.join("\n")
721
+ end
722
+
723
+ # Format a unified diff line
724
+ def format_unified_line(old_num, new_num, marker, content, color = nil,
725
+ informative: false)
726
+ old_str = old_num ? "%4d" % old_num : " "
727
+ new_str = new_num ? "%4d" % new_num : " "
728
+ marker_part = "#{marker} "
729
+
730
+ visualized_content = if color
731
+ apply_visualization(content,
732
+ color)
733
+ else
734
+ content
735
+ end
736
+
737
+ if @use_color
738
+ yellow_old = colorize(old_str, :yellow)
739
+ yellow_pipe1 = colorize("|", :yellow)
740
+ yellow_new = colorize(new_str, :yellow)
741
+ yellow_pipe2 = colorize("|", :yellow)
742
+
743
+ if color
744
+ colored_marker = colorize(marker, color)
745
+ "#{yellow_old}#{yellow_pipe1}#{yellow_new}#{colored_marker} #{yellow_pipe2} #{visualized_content}"
746
+ else
747
+ "#{yellow_old}#{yellow_pipe1}#{yellow_new}#{marker} #{yellow_pipe2} #{visualized_content}"
748
+ end
749
+ else
750
+ "#{old_str}|#{new_str}#{marker_part}| #{visualized_content}"
751
+ end
752
+ end
753
+
754
+ # Format token diff lines
755
+ def format_token_diff_line(old_line, new_line, old_highlighted,
756
+ new_highlighted)
757
+ output = []
758
+
759
+ if @use_color
760
+ yellow_old = colorize("%4d" % old_line, :yellow)
761
+ yellow_pipe1 = colorize("|", :yellow)
762
+ yellow_new = colorize("%4d" % new_line, :yellow)
763
+ yellow_pipe2 = colorize("|", :yellow)
764
+ red_marker = colorize("-", :red)
765
+ green_marker = colorize("+", :green)
766
+
767
+ output << "#{yellow_old}#{yellow_pipe1} #{red_marker} #{yellow_pipe2} #{old_highlighted}"
768
+ output << " #{yellow_pipe1}#{yellow_new}#{green_marker} #{yellow_pipe2} #{new_highlighted}"
769
+ else
770
+ output << "#{'%4d' % old_line}| - | #{old_highlighted}"
771
+ output << " |#{'%4d' % new_line}+ | #{new_highlighted}"
772
+ end
773
+
774
+ output.join("\n")
775
+ end
776
+
777
+ # Tokenize XML line
778
+ def tokenize_xml(line)
779
+ tokens = []
780
+ scanner = StringScanner.new(line)
781
+
782
+ until scanner.eos?
783
+ tokens << if scanner.scan(/\s+/)
784
+ scanner.matched
785
+ elsif scanner.scan(/<\/?[\w:-]+/)
786
+ scanner.matched
787
+ elsif scanner.scan(/[\w:-]+="[^"]*"/)
788
+ scanner.matched
789
+ elsif scanner.scan(/[\w:-]+='[^']*'/)
790
+ scanner.matched
791
+ elsif scanner.scan(/[\w:-]+=/)
792
+ scanner.matched
793
+ elsif scanner.scan(/\/?>/)
794
+ scanner.matched
795
+ elsif scanner.scan(/[^<>\s]+/)
796
+ scanner.matched
797
+ else
798
+ scanner.getch
799
+ end
800
+ end
801
+
802
+ tokens
803
+ end
804
+
805
+ # Build highlighted text from token diff
806
+ def build_token_highlighted_text(token_diffs, side)
807
+ parts = []
808
+
809
+ token_diffs.each do |change|
810
+ case change.action
811
+ when "="
812
+ element = change.old_element || ""
813
+ visual = element.to_s.chars.map do |char|
814
+ @visualization_map.fetch(char, char)
815
+ end.join
816
+
817
+ parts << if @use_color
818
+ colorize(visual, :default)
819
+ else
820
+ visual
821
+ end
822
+ when "-"
823
+ if side == :old
824
+ parts << apply_visualization(change.old_element, :red)
825
+ end
826
+ when "+"
827
+ if side == :new
828
+ parts << apply_visualization(change.new_element, :green)
829
+ end
830
+ when "!"
831
+ parts << if side == :old
832
+ apply_visualization(change.old_element, :red)
833
+ else
834
+ apply_visualization(change.new_element, :green)
835
+ end
836
+ end
837
+ end
838
+
839
+ parts.join
840
+ end
841
+
842
+ # Apply character visualization
843
+ def apply_visualization(token, color = nil)
844
+ return "" if token.nil?
845
+
846
+ visual = token.to_s.chars.map do |char|
847
+ @visualization_map.fetch(char, char)
848
+ end.join
849
+
850
+ if color && @use_color
851
+ require "paint"
852
+ Paint[visual, color, :bold]
853
+ else
854
+ visual
855
+ end
856
+ end
857
+ end
858
+ end
859
+ end
860
+ end