canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,407 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "diff/lcs"
4
+ require "diff/lcs/hunk"
5
+ require_relative "../debug_output"
6
+
7
+ module Canon
8
+ class DiffFormatter
9
+ module ByLine
10
+ # Base formatter for line-by-line diffs
11
+ # Provides common LCS diff logic and hunk building
12
+ class BaseFormatter
13
+ attr_reader :use_color, :context_lines, :diff_grouping_lines,
14
+ :visualization_map, :show_diffs
15
+
16
+ # Create a format-specific by-line formatter
17
+ #
18
+ # @param format [Symbol] Format type (:xml, :html, :html4, :html5, :json, :yaml, :simple)
19
+ # @param options [Hash] Formatting options
20
+ # @return [BaseFormatter] Format-specific formatter instance
21
+ def self.for_format(format, **options)
22
+ case format
23
+ when :xml
24
+ require_relative "xml_formatter"
25
+ XmlFormatter.new(**options)
26
+ when :html, :html4, :html5
27
+ require_relative "html_formatter"
28
+ # Determine HTML version from format
29
+ version = case format
30
+ when :html5 then :html5
31
+ when :html4 then :html4
32
+ else :html4 # default to html4
33
+ end
34
+ HtmlFormatter.new(html_version: version, **options)
35
+ when :json
36
+ require_relative "json_formatter"
37
+ JsonFormatter.new(**options)
38
+ when :yaml
39
+ require_relative "yaml_formatter"
40
+ YamlFormatter.new(**options)
41
+ else
42
+ require_relative "simple_formatter"
43
+ SimpleFormatter.new(**options)
44
+ end
45
+ end
46
+
47
+ def initialize(use_color: true, context_lines: 3,
48
+ diff_grouping_lines: nil, visualization_map: nil,
49
+ show_diffs: :all, differences: [])
50
+ @use_color = use_color
51
+ @context_lines = context_lines
52
+ @diff_grouping_lines = diff_grouping_lines
53
+ @visualization_map = visualization_map
54
+ @show_diffs = show_diffs
55
+ @differences = differences
56
+ end
57
+
58
+ # Format line-by-line diff
59
+ # Subclasses must implement this method
60
+ #
61
+ # @param doc1 [String] First document
62
+ # @param doc2 [String] Second document
63
+ # @return [String] Formatted diff
64
+ def format(doc1, doc2)
65
+ raise NotImplementedError,
66
+ "Subclasses must implement the format method"
67
+ end
68
+
69
+ protected
70
+
71
+ # Build hunks from diff with context lines
72
+ #
73
+ # @param diffs [Array] LCS diff array
74
+ # @param lines1 [Array<String>] Lines from first document
75
+ # @param lines2 [Array<String>] Lines from second document
76
+ # @param context_lines [Integer] Number of context lines
77
+ # @return [Array<Array>] Array of hunks
78
+ def build_hunks(diffs, _lines1, _lines2, context_lines: 3)
79
+ hunks = []
80
+ current_hunk = []
81
+ last_change_index = -context_lines - 1
82
+
83
+ diffs.each_with_index do |change, index|
84
+ # Check if we should start a new hunk
85
+ if !current_hunk.empty? && index - last_change_index > context_lines * 2
86
+ # Trim trailing context lines before finalizing hunk
87
+ trim_trailing_context!(current_hunk, last_change_index,
88
+ context_lines)
89
+ hunks << current_hunk
90
+ current_hunk = []
91
+ end
92
+
93
+ # Add context before first change or after gap
94
+ if current_hunk.empty? && change.action != "="
95
+ start_context = [index - context_lines, 0].max
96
+ (start_context...index).each do |i|
97
+ current_hunk << diffs[i] if i < diffs.length
98
+ end
99
+ end
100
+
101
+ current_hunk << change
102
+
103
+ # Track last change for hunk grouping
104
+ last_change_index = index if change.action != "="
105
+ end
106
+
107
+ # Trim trailing context lines and add final hunk if any
108
+ unless current_hunk.empty?
109
+ trim_trailing_context!(current_hunk, last_change_index,
110
+ context_lines)
111
+ hunks << current_hunk
112
+ end
113
+
114
+ hunks
115
+ end
116
+
117
+ # Trim trailing context lines from a hunk
118
+ # Removes context lines beyond context_lines after the last change
119
+ #
120
+ # @param hunk [Array] The hunk to trim
121
+ # @param last_change_index [Integer] Index of last change in original diffs
122
+ # @param context_lines [Integer] Number of context lines to keep
123
+ def trim_trailing_context!(hunk, _last_change_index, context_lines)
124
+ # Find the position of the last change in this hunk
125
+ last_change_pos = nil
126
+ hunk.each_with_index do |change, i|
127
+ last_change_pos = i if change.action != "="
128
+ end
129
+
130
+ return if last_change_pos.nil?
131
+
132
+ # Keep only context_lines after the last change
133
+ keep_until = [last_change_pos + context_lines, hunk.length - 1].min
134
+ hunk.slice!(keep_until + 1..-1) if keep_until < hunk.length - 1
135
+ end
136
+
137
+ # Colorize text if color is enabled
138
+ # RSpec-aware: resets any existing ANSI codes before applying new colors
139
+ #
140
+ # @param text [String] Text to colorize
141
+ # @param colors [Array<Symbol>] Paint color arguments
142
+ # @return [String] Colorized or plain text
143
+ def colorize(text, *colors)
144
+ return text unless @use_color
145
+
146
+ require "paint"
147
+ # Reset ANSI codes first to prevent RSpec's initial red from interfering
148
+ "\e[0m#{Paint[text, *colors]}"
149
+ end
150
+
151
+ # Identify contiguous diff blocks
152
+ #
153
+ # @param diffs [Array] LCS diff array
154
+ # @return [Array<Canon::Diff::DiffBlock>] Array of diff blocks
155
+ def identify_diff_blocks(diffs)
156
+ require_relative "../../diff/diff_block"
157
+
158
+ blocks = []
159
+ current_start = nil
160
+ current_types = []
161
+
162
+ diffs.each_with_index do |change, idx|
163
+ if change.action != "="
164
+ if current_start.nil?
165
+ current_start = idx
166
+ current_types = [change.action]
167
+ else
168
+ current_types << change.action unless current_types.include?(change.action)
169
+ end
170
+ elsif current_start
171
+ blocks << Canon::Diff::DiffBlock.new(
172
+ start_idx: current_start,
173
+ end_idx: idx - 1,
174
+ types: current_types,
175
+ )
176
+ current_start = nil
177
+ current_types = []
178
+ end
179
+ end
180
+
181
+ # Don't forget the last block
182
+ if current_start
183
+ blocks << Canon::Diff::DiffBlock.new(
184
+ start_idx: current_start,
185
+ end_idx: diffs.length - 1,
186
+ types: current_types,
187
+ )
188
+ end
189
+
190
+ # Filter blocks based on show_diffs setting
191
+ filter_diff_blocks(blocks)
192
+ end
193
+
194
+ # Group diff blocks into contexts
195
+ #
196
+ # @param blocks [Array<Canon::Diff::DiffBlock>] Array of diff blocks
197
+ # @param grouping_lines [Integer] Maximum gap between blocks to group
198
+ # @return [Array<Array<Canon::Diff::DiffBlock>>] Array of block groups
199
+ def group_diff_blocks_into_contexts(blocks, grouping_lines)
200
+ return [] if blocks.empty?
201
+
202
+ contexts = []
203
+ current_context = [blocks[0]]
204
+
205
+ blocks[1..].each do |block|
206
+ last_block = current_context.last
207
+ gap = block.start_idx - last_block.end_idx - 1
208
+
209
+ if gap <= grouping_lines
210
+ current_context << block
211
+ else
212
+ contexts << current_context
213
+ current_context = [block]
214
+ end
215
+ end
216
+
217
+ contexts << current_context unless current_context.empty?
218
+ contexts
219
+ end
220
+
221
+ # Expand contexts with context lines
222
+ #
223
+ # @param contexts [Array<Array<Canon::Diff::DiffBlock>>] Block groups
224
+ # @param context_lines [Integer] Number of context lines to add
225
+ # @param total_lines [Integer] Total number of lines in diff
226
+ # @return [Array<Canon::Diff::DiffContext>] Array of diff contexts
227
+ def expand_contexts_with_context_lines(contexts, context_lines,
228
+ total_lines)
229
+ require_relative "../../diff/diff_context"
230
+
231
+ contexts.map do |context|
232
+ first_block = context.first
233
+ last_block = context.last
234
+
235
+ start_idx = [first_block.start_idx - context_lines, 0].max
236
+ end_idx = [last_block.end_idx + context_lines, total_lines - 1].min
237
+
238
+ Canon::Diff::DiffContext.new(
239
+ start_idx: start_idx,
240
+ end_idx: end_idx,
241
+ blocks: context,
242
+ )
243
+ end
244
+ end
245
+
246
+ # Format a context
247
+ #
248
+ # @param context [Canon::Diff::DiffContext] The context to format
249
+ # @param diffs [Array] LCS diff array
250
+ # @param base_line1 [Integer] Base line number for old file
251
+ # @param base_line2 [Integer] Base line number for new file
252
+ # @return [String] Formatted context
253
+ def format_context(context, diffs, base_line1, base_line2)
254
+ output = []
255
+
256
+ (context.start_idx..context.end_idx).each do |idx|
257
+ change = diffs[idx]
258
+
259
+ line1 = change.old_position ? base_line1 + change.old_position + 1 : nil
260
+ line2 = change.new_position ? base_line2 + change.new_position + 1 : nil
261
+
262
+ case change.action
263
+ when "="
264
+ output << format_unified_line(line1, line2, " ",
265
+ change.old_element)
266
+ when "-"
267
+ output << format_unified_line(line1, nil, "-",
268
+ change.old_element, :red)
269
+ when "+"
270
+ output << format_unified_line(nil, line2, "+",
271
+ change.new_element, :green)
272
+ when "!"
273
+ # Format changed line
274
+ output << format_changed_line(line1, line2,
275
+ change.old_element,
276
+ change.new_element)
277
+ end
278
+ end
279
+
280
+ output.join("\n")
281
+ end
282
+
283
+ # Filter diff blocks based on show_diffs setting
284
+ #
285
+ # @param blocks [Array<Canon::Diff::DiffBlock>] Array of diff blocks
286
+ # @return [Array<Canon::Diff::DiffBlock>] Filtered array
287
+ def filter_diff_blocks(blocks)
288
+ case @show_diffs
289
+ when :normative
290
+ blocks.select(&:normative?)
291
+ when :informative
292
+ blocks.select(&:informative?)
293
+ else # :all or nil
294
+ blocks
295
+ end
296
+ end
297
+
298
+ # Format a unified diff line
299
+ #
300
+ # @param old_num [Integer, nil] Line number in old file
301
+ # @param new_num [Integer, nil] Line number in new file
302
+ # @param marker [String] Diff marker (' ', '-', '+', '~')
303
+ # @param content [String] Line content
304
+ # @param color [Symbol, nil] Color for diff lines
305
+ # @param informative [Boolean] Whether this is an informative diff
306
+ # @return [String] Formatted line
307
+ def format_unified_line(old_num, new_num, marker, content, color = nil,
308
+ informative: false)
309
+ old_str = old_num ? "%4d" % old_num : " "
310
+ new_str = new_num ? "%4d" % new_num : " "
311
+ marker_part = "#{marker} "
312
+
313
+ # For informative diffs, use cyan color
314
+ effective_color = informative ? :cyan : color
315
+
316
+ visualized_content = if effective_color
317
+ apply_visualization(content, effective_color)
318
+ else
319
+ content
320
+ end
321
+
322
+ if @use_color
323
+ yellow_old = colorize(old_str, :yellow)
324
+ yellow_pipe1 = colorize("|", :yellow)
325
+ yellow_new = colorize(new_str, :yellow)
326
+ yellow_pipe2 = colorize("|", :yellow)
327
+
328
+ if effective_color
329
+ colored_marker = colorize(marker, effective_color)
330
+ "#{yellow_old}#{yellow_pipe1}#{yellow_new}#{colored_marker} #{yellow_pipe2} #{visualized_content}"
331
+ else
332
+ "#{yellow_old}#{yellow_pipe1}#{yellow_new}#{marker} #{yellow_pipe2} #{visualized_content}"
333
+ end
334
+ else
335
+ "#{old_str}|#{new_str}#{marker_part}| #{visualized_content}"
336
+ end
337
+ end
338
+
339
+ # Format changed lines (default implementation without token-level diff)
340
+ #
341
+ # @param old_line [Integer] Line number in old file
342
+ # @param new_line [Integer] Line number in new file
343
+ # @param old_text [String] Old line text
344
+ # @param new_text [String] New line text
345
+ # @param informative [Boolean] Whether this is an informative diff
346
+ # @return [String] Formatted change
347
+ def format_changed_line(old_line, new_line, old_text, new_text,
348
+ informative: false)
349
+ output = []
350
+
351
+ # For informative diffs, use cyan color and ~ marker
352
+ if informative
353
+ old_marker = "~"
354
+ new_marker = "~"
355
+ old_color = :cyan
356
+ new_color = :cyan
357
+ else
358
+ old_marker = "-"
359
+ new_marker = "+"
360
+ old_color = :red
361
+ new_color = :green
362
+ end
363
+
364
+ old_visualized = apply_visualization(old_text, old_color)
365
+ new_visualized = apply_visualization(new_text, new_color)
366
+
367
+ if @use_color
368
+ yellow_old = colorize("%4d" % old_line, :yellow)
369
+ yellow_pipe1 = colorize("|", :yellow)
370
+ yellow_new = colorize("%4d" % new_line, :yellow)
371
+ yellow_pipe2 = colorize("|", :yellow)
372
+ old_marker_colored = colorize(old_marker, old_color)
373
+ new_marker_colored = colorize(new_marker, new_color)
374
+
375
+ output << "#{yellow_old}#{yellow_pipe1} #{old_marker_colored} #{yellow_pipe2} #{old_visualized}"
376
+ output << " #{yellow_pipe1}#{yellow_new}#{new_marker_colored} #{yellow_pipe2} #{new_visualized}"
377
+ else
378
+ output << "#{'%4d' % old_line}| #{old_marker} | #{old_visualized}"
379
+ output << " |#{'%4d' % new_line}#{new_marker} | #{new_visualized}"
380
+ end
381
+
382
+ output.join("\n")
383
+ end
384
+
385
+ # Apply character visualization
386
+ #
387
+ # @param token [String] The token to apply visualization to
388
+ # @param color [Symbol, nil] Optional color to apply
389
+ # @return [String] Visualized and optionally colored token
390
+ def apply_visualization(token, color = nil)
391
+ return "" if token.nil?
392
+
393
+ visual = token.to_s.chars.map do |char|
394
+ @visualization_map.fetch(char, char)
395
+ end.join
396
+
397
+ if color && @use_color
398
+ require "paint"
399
+ Paint[visual, color, :bold]
400
+ else
401
+ visual
402
+ end
403
+ end
404
+ end
405
+ end
406
+ end
407
+ end