canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,520 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "paint"
4
+ require "yaml"
5
+ require_relative "comparison"
6
+ require_relative "diff/diff_block"
7
+ require_relative "diff/diff_context"
8
+ require_relative "diff/diff_report"
9
+ require_relative "diff_formatter/debug_output"
10
+
11
+ module Canon
12
+ # Formatter for displaying semantic differences with color support
13
+ #
14
+ # This is a pure orchestrator class that delegates formatting to mode-specific
15
+ # and format-specific formatters. It provides a unified interface for generating
16
+ # both by-line and by-object diffs across multiple formats (XML, HTML, JSON, YAML).
17
+ #
18
+ # == Architecture
19
+ #
20
+ # DiffFormatter follows the orchestrator pattern with MECE (Mutually Exclusive,
21
+ # Collectively Exhaustive) delegation:
22
+ #
23
+ # 1. **Mode Selection**: Chooses by-line or by-object visualization
24
+ # 2. **Format Delegation**: Dispatches to format-specific formatter
25
+ # 3. **Customization**: Applies color, context, and visualization options
26
+ #
27
+ # == Diff Modes
28
+ #
29
+ # **By-Object Mode** (default for XML/JSON/YAML):
30
+ # - Tree-based semantic diff
31
+ # - Shows only what changed in the structure
32
+ # - Visual tree with box-drawing characters
33
+ # - Best for configuration files and structured data
34
+ #
35
+ # **By-Line Mode** (default for HTML):
36
+ # - Traditional line-by-line diff
37
+ # - Shows changes in document order with context
38
+ # - Syntax-aware token highlighting
39
+ # - Best for markup and when line context matters
40
+ #
41
+ # == Visualization Features
42
+ #
43
+ # - **Color support**: Red (deletions), green (additions), yellow (structure), cyan (informative)
44
+ # - **Whitespace visualization**: Makes invisible characters visible
45
+ # - **Context lines**: Shows unchanged lines around changes
46
+ # - **Diff grouping**: Groups nearby changes into blocks
47
+ # - **Character map customization**: CJK-safe Unicode symbols
48
+ #
49
+ # == Usage
50
+ #
51
+ # # Basic usage
52
+ # formatter = Canon::DiffFormatter.new(use_color: true, mode: :by_object)
53
+ # output = formatter.format(differences, :xml, doc1: xml1, doc2: xml2)
54
+ #
55
+ # # With options
56
+ # formatter = Canon::DiffFormatter.new(
57
+ # use_color: true,
58
+ # mode: :by_line,
59
+ # context_lines: 5,
60
+ # diff_grouping_lines: 10,
61
+ # show_diffs: :normative
62
+ # )
63
+ #
64
+ class DiffFormatter
65
+ # Namespace for by-object mode formatters
66
+ module ByObject
67
+ autoload :BaseFormatter, "canon/diff_formatter/by_object/base_formatter"
68
+ autoload :XmlFormatter, "canon/diff_formatter/by_object/xml_formatter"
69
+ autoload :JsonFormatter, "canon/diff_formatter/by_object/json_formatter"
70
+ autoload :YamlFormatter, "canon/diff_formatter/by_object/yaml_formatter"
71
+ end
72
+
73
+ # Namespace for by-line mode formatters
74
+ module ByLine
75
+ autoload :BaseFormatter, "canon/diff_formatter/by_line/base_formatter"
76
+ autoload :SimpleFormatter, "canon/diff_formatter/by_line/simple_formatter"
77
+ autoload :XmlFormatter, "canon/diff_formatter/by_line/xml_formatter"
78
+ autoload :JsonFormatter, "canon/diff_formatter/by_line/json_formatter"
79
+ autoload :YamlFormatter, "canon/diff_formatter/by_line/yaml_formatter"
80
+ end
81
+
82
+ # Load character map from YAML file
83
+ #
84
+ # @return [Hash] Hash with :visualization_map, :category_map, :category_names
85
+ def self.load_character_map
86
+ yaml_path = File.join(__dir__, "diff_formatter", "character_map.yml")
87
+ data = YAML.load_file(yaml_path)
88
+
89
+ visualization_map = {}
90
+ category_map = {}
91
+ character_metadata = {}
92
+
93
+ data["characters"].each do |char_data|
94
+ # Get character from either unicode code point or character field
95
+ char = if char_data["unicode"]
96
+ # Convert hex string to character
97
+ [char_data["unicode"].to_i(16)].pack("U")
98
+ else
99
+ # Use character field directly (handles \n, \r, \t, etc.)
100
+ char_data["character"]
101
+ end
102
+
103
+ vis = char_data["visualization"]
104
+ category = char_data["category"].to_sym
105
+ name = char_data["name"]
106
+
107
+ visualization_map[char] = vis
108
+ category_map[char] = category
109
+ character_metadata[char] = {
110
+ visualization: vis,
111
+ category: category,
112
+ name: name,
113
+ }
114
+ end
115
+
116
+ category_names = {}
117
+ data["category_names"].each do |key, value|
118
+ category_names[key.to_sym] = value
119
+ end
120
+
121
+ {
122
+ visualization_map: visualization_map,
123
+ category_map: category_map,
124
+ category_names: category_names,
125
+ character_metadata: character_metadata,
126
+ }
127
+ end
128
+
129
+ # Lazily load and cache character map data
130
+ def self.character_map_data
131
+ @character_map_data ||= load_character_map
132
+ end
133
+
134
+ # Default character visualization map (loaded from YAML)
135
+ DEFAULT_VISUALIZATION_MAP = character_map_data[:visualization_map].freeze
136
+
137
+ # Character category map (loaded from YAML)
138
+ CHARACTER_CATEGORY_MAP = character_map_data[:category_map].freeze
139
+
140
+ # Category display names (loaded from YAML)
141
+ CHARACTER_CATEGORY_NAMES = character_map_data[:category_names].freeze
142
+
143
+ # Character metadata including names (loaded from YAML)
144
+ CHARACTER_METADATA = character_map_data[:character_metadata].freeze
145
+
146
+ # Map difference codes to human-readable descriptions
147
+ DIFF_DESCRIPTIONS = {
148
+ Comparison::EQUIVALENT => "Equivalent",
149
+ Comparison::MISSING_ATTRIBUTE => "Missing attribute",
150
+ Comparison::MISSING_NODE => "Missing node",
151
+ Comparison::UNEQUAL_ATTRIBUTES => "Unequal attributes",
152
+ Comparison::UNEQUAL_COMMENTS => "Unequal comments",
153
+ Comparison::UNEQUAL_DOCUMENTS => "Unequal documents",
154
+ Comparison::UNEQUAL_ELEMENTS => "Unequal elements",
155
+ Comparison::UNEQUAL_NODES_TYPES => "Unequal node types",
156
+ Comparison::UNEQUAL_TEXT_CONTENTS => "Unequal text contents",
157
+ Comparison::MISSING_HASH_KEY => "Missing hash key",
158
+ Comparison::UNEQUAL_HASH_VALUES => "Unequal hash values",
159
+ Comparison::UNEQUAL_ARRAY_LENGTHS => "Unequal array lengths",
160
+ Comparison::UNEQUAL_ARRAY_ELEMENTS => "Unequal array elements",
161
+ Comparison::UNEQUAL_TYPES => "Unequal types",
162
+ Comparison::UNEQUAL_PRIMITIVES => "Unequal primitive values",
163
+ }.freeze
164
+
165
+ def initialize(use_color: true, mode: :by_object, context_lines: 3,
166
+ diff_grouping_lines: nil, visualization_map: nil,
167
+ character_map_file: nil, character_definitions: nil,
168
+ show_diffs: :all, verbose_diff: false)
169
+ @use_color = use_color
170
+ @mode = mode
171
+ @context_lines = context_lines
172
+ @diff_grouping_lines = diff_grouping_lines
173
+ @show_diffs = show_diffs
174
+ @verbose_diff = verbose_diff
175
+ @visualization_map = build_visualization_map(
176
+ visualization_map: visualization_map,
177
+ character_map_file: character_map_file,
178
+ character_definitions: character_definitions,
179
+ )
180
+ end
181
+
182
+ # Merge custom character visualization map with defaults
183
+ #
184
+ # @param custom_map [Hash, nil] Custom character mappings
185
+ # @return [Hash] Merged character visualization map
186
+ def self.merge_visualization_map(custom_map)
187
+ DEFAULT_VISUALIZATION_MAP.merge(custom_map || {})
188
+ end
189
+
190
+ # Load character map from custom YAML file
191
+ #
192
+ # @param file_path [String] Path to YAML file with character definitions
193
+ # @return [Hash] Character visualization map
194
+ def self.load_custom_character_map(file_path)
195
+ data = YAML.load_file(file_path)
196
+ visualization_map = {}
197
+
198
+ data["characters"].each do |char_data|
199
+ # Get character from either unicode code point or character field
200
+ char = if char_data["unicode"]
201
+ [char_data["unicode"].to_i(16)].pack("U")
202
+ else
203
+ char_data["character"]
204
+ end
205
+
206
+ visualization_map[char] = char_data["visualization"]
207
+ end
208
+
209
+ visualization_map
210
+ end
211
+
212
+ # Build character definition from hash
213
+ #
214
+ # @param definition [Hash] Character definition with keys (matching YAML format):
215
+ # - :character or :unicode (required)
216
+ # - :visualization (required)
217
+ # - :category (required)
218
+ # - :name (required)
219
+ # @return [Hash] Single-entry visualization map
220
+ def self.build_character_definition(definition)
221
+ # Validate required fields
222
+ char = if definition[:unicode]
223
+ [definition[:unicode].to_i(16)].pack("U")
224
+ elsif definition[:character]
225
+ definition[:character]
226
+ else
227
+ raise ArgumentError,
228
+ "Character definition must include :character or :unicode"
229
+ end
230
+
231
+ unless definition[:visualization]
232
+ raise ArgumentError, "Character definition must include :visualization"
233
+ end
234
+
235
+ unless definition[:category]
236
+ raise ArgumentError, "Character definition must include :category"
237
+ end
238
+
239
+ unless definition[:name]
240
+ raise ArgumentError, "Character definition must include :name"
241
+ end
242
+
243
+ { char => definition[:visualization] }
244
+ end
245
+
246
+ # Format differences array for display
247
+ #
248
+ # @param differences [Array] Array of difference hashes
249
+ # @param format [Symbol] Format type (:xml, :html, :json, :yaml)
250
+ # @param doc1 [String, nil] First document content (for by-line mode)
251
+ # @param doc2 [String, nil] Second document content (for by-line mode)
252
+ # @param html_version [Symbol, nil] HTML version (:html4 or :html5)
253
+ # @return [String] Formatted output
254
+ def format(differences, format, doc1: nil, doc2: nil, html_version: nil)
255
+ # In by-line mode with doc1/doc2, always perform diff regardless of differences
256
+ if @mode == :by_line && doc1 && doc2
257
+ return by_line_diff(doc1, doc2, format: format,
258
+ html_version: html_version,
259
+ differences: differences)
260
+ end
261
+
262
+ # Check if no differences (handle both ComparisonResult and legacy Array)
263
+ no_diffs = if differences.respond_to?(:equivalent?)
264
+ # ComparisonResult object (production path)
265
+ differences.equivalent?
266
+ else
267
+ # Legacy Array (for low-level tests)
268
+ differences.empty?
269
+ end
270
+ return success_message if no_diffs
271
+
272
+ case @mode
273
+ when :by_line
274
+ by_line_diff(doc1, doc2, format: format, html_version: html_version,
275
+ differences: differences)
276
+ else
277
+ by_object_diff(differences, format)
278
+ end
279
+ end
280
+
281
+ # Format comparison result from Canon::Comparison.equivalent?
282
+ # This is the single entry point for generating diffs from comparison results
283
+ #
284
+ # @param comparison_result [ComparisonResult, Hash, Array, Boolean] Result from Canon::Comparison.equivalent?
285
+ # @param expected [Object] Expected value
286
+ # @param actual [Object] Actual value
287
+ # @return [String] Formatted diff output
288
+ def format_comparison_result(comparison_result, expected, actual)
289
+ # Detect format from expected content
290
+ format = Canon::Comparison.send(:detect_format, expected)
291
+
292
+ formatter_options = {
293
+ use_color: @use_color,
294
+ mode: @mode,
295
+ context_lines: @context_lines,
296
+ diff_grouping_lines: @diff_grouping_lines,
297
+ show_diffs: @show_diffs,
298
+ verbose_diff: @verbose_diff,
299
+ }
300
+
301
+ output = []
302
+
303
+ # 1. CANON VERBOSE tables (ONLY if CANON_VERBOSE=1)
304
+ verbose_tables = DebugOutput.verbose_tables_only(
305
+ comparison_result,
306
+ formatter_options,
307
+ )
308
+ output << verbose_tables unless verbose_tables.empty?
309
+
310
+ # 2. Semantic Diff Report (ALWAYS if diffs exist)
311
+ if comparison_result.is_a?(Canon::Comparison::ComparisonResult) &&
312
+ comparison_result.differences.any?
313
+ require_relative "diff_formatter/diff_detail_formatter"
314
+ output << DiffDetailFormatter.format_report(
315
+ comparison_result.differences,
316
+ use_color: @use_color,
317
+ )
318
+ end
319
+
320
+ # 3. Main diff output (by-line or by-object) - ALWAYS
321
+
322
+ # Check if comparison result is a ComparisonResult object
323
+ if comparison_result.is_a?(Canon::Comparison::ComparisonResult)
324
+ # Use preprocessed strings from comparison - avoids re-preprocessing
325
+ doc1, doc2 = comparison_result.preprocessed_strings
326
+ differences = comparison_result.differences
327
+ html_version = comparison_result.html_version
328
+ elsif comparison_result.is_a?(Hash) && comparison_result[:preprocessed]
329
+ # Legacy Hash format - Use preprocessed strings from comparison
330
+ doc1, doc2 = comparison_result[:preprocessed]
331
+ differences = comparison_result[:differences]
332
+ html_version = comparison_result[:html_version]
333
+ else
334
+ # Legacy path: normalize content for display
335
+ doc1, doc2 = normalize_content_for_display(expected, actual, format)
336
+ # comparison_result is an array of differences when verbose: true
337
+ differences = comparison_result.is_a?(Array) ? comparison_result : []
338
+ html_version = nil
339
+ end
340
+
341
+ # Generate diff using existing format method
342
+ output << format(differences, format, doc1: doc1, doc2: doc2,
343
+ html_version: html_version)
344
+
345
+ output.compact.join("\n")
346
+ end
347
+
348
+ private
349
+
350
+ # Normalize content for display in diffs
351
+ #
352
+ # @param expected [Object] Expected value
353
+ # @param actual [Object] Actual value
354
+ # @param format [Symbol] Detected format
355
+ # @return [Array<String, String>] Normalized [expected, actual] strings
356
+ def normalize_content_for_display(expected, actual, format)
357
+ case format
358
+ when :xml
359
+ [
360
+ Canon::Xml::C14n.canonicalize(expected, with_comments: false).gsub(
361
+ /></, ">\n<"
362
+ ),
363
+ Canon::Xml::C14n.canonicalize(actual, with_comments: false).gsub(
364
+ /></, ">\n<"
365
+ ),
366
+ ]
367
+ when :html
368
+ require "nokogiri"
369
+ [
370
+ parse_and_format_html(expected),
371
+ parse_and_format_html(actual),
372
+ ]
373
+ when :json
374
+ [
375
+ Canon.format(expected, :json),
376
+ Canon.format(actual, :json),
377
+ ]
378
+ when :yaml
379
+ [
380
+ Canon.format(expected, :yaml),
381
+ Canon.format(actual, :yaml),
382
+ ]
383
+ when :ruby_object
384
+ # For Ruby objects, format as JSON for display
385
+ require "json"
386
+ [
387
+ JSON.pretty_generate(expected),
388
+ JSON.pretty_generate(actual),
389
+ ]
390
+ else
391
+ # Default case including :string format
392
+ [expected.to_s, actual.to_s]
393
+ end
394
+ end
395
+
396
+ # Parse and format HTML for display
397
+ #
398
+ # @param html [Object] HTML content
399
+ # @return [String] Formatted HTML
400
+ def parse_and_format_html(html)
401
+ return html.to_html if html.is_a?(Nokogiri::HTML::Document) ||
402
+ html.is_a?(Nokogiri::HTML5::Document)
403
+
404
+ require "nokogiri"
405
+ Nokogiri::HTML(html).to_html
406
+ rescue StandardError
407
+ html.to_s
408
+ end
409
+
410
+ # Build the final visualization map from various customization options
411
+ #
412
+ # @param visualization_map [Hash, nil] Complete custom visualization map
413
+ # @param character_map_file [String, nil] Path to custom YAML file
414
+ # @param character_definitions [Array<Hash>, nil] Individual character definitions
415
+ # @return [Hash] Final visualization map
416
+ def build_visualization_map(visualization_map: nil, character_map_file: nil,
417
+ character_definitions: nil)
418
+ # Priority order:
419
+ # 1. If visualization_map is provided, use it as complete replacement
420
+ # 2. Otherwise, start with defaults and apply customizations
421
+
422
+ return visualization_map if visualization_map
423
+
424
+ # Start with defaults
425
+ result = DEFAULT_VISUALIZATION_MAP.dup
426
+
427
+ # Apply custom file if provided
428
+ if character_map_file
429
+ custom_map = self.class.load_custom_character_map(character_map_file)
430
+ result.merge!(custom_map)
431
+ end
432
+
433
+ # Apply individual character definitions if provided
434
+ character_definitions&.each do |definition|
435
+ char_map = self.class.build_character_definition(definition)
436
+ result.merge!(char_map)
437
+ end
438
+
439
+ result
440
+ end
441
+
442
+ # Generate success message based on mode
443
+ def success_message
444
+ emoji = @use_color ? "✅ " : ""
445
+ message = case @mode
446
+ when :by_line
447
+ "Files are identical"
448
+ else
449
+ "Files are semantically equivalent"
450
+ end
451
+
452
+ colorize("#{emoji}#{message}\n", :green, :bold)
453
+ end
454
+
455
+ # Generate by-object diff with tree visualization
456
+ # Delegates to format-specific by-object formatters
457
+ def by_object_diff(differences, format)
458
+ require_relative "diff_formatter/by_object/base_formatter"
459
+
460
+ output = []
461
+ output << colorize("Visual Diff:", :cyan, :bold)
462
+
463
+ # Delegate to format-specific formatter
464
+ formatter = ByObject::BaseFormatter.for_format(
465
+ format,
466
+ use_color: @use_color,
467
+ visualization_map: @visualization_map,
468
+ )
469
+
470
+ output << formatter.format(differences, format)
471
+
472
+ output.join("\n")
473
+ end
474
+
475
+ # Generate by-line diff
476
+ # Delegates to format-specific by-line formatters
477
+ def by_line_diff(doc1, doc2, format: :xml, html_version: nil,
478
+ differences: [])
479
+ require_relative "diff_formatter/by_line/base_formatter"
480
+
481
+ # For HTML format, use html_version if provided, otherwise default to :html4
482
+ if format == :html && html_version
483
+ format = html_version # Use :html4 or :html5
484
+ end
485
+
486
+ # Format display name for header
487
+ format_name = format.to_s.upcase
488
+
489
+ output = []
490
+ output << colorize("Line-by-line diff (#{format_name} mode):", :cyan,
491
+ :bold)
492
+
493
+ return output.join("\n") if doc1.nil? || doc2.nil?
494
+
495
+ # Delegate to format-specific formatter
496
+ formatter = ByLine::BaseFormatter.for_format(
497
+ format,
498
+ use_color: @use_color,
499
+ context_lines: @context_lines,
500
+ diff_grouping_lines: @diff_grouping_lines,
501
+ visualization_map: @visualization_map,
502
+ show_diffs: @show_diffs,
503
+ differences: differences,
504
+ )
505
+
506
+ output << formatter.format(doc1, doc2)
507
+
508
+ output.join("\n")
509
+ end
510
+
511
+ # Colorize text if color is enabled
512
+ # RSpec-aware: resets any existing ANSI codes before applying new colors
513
+ def colorize(text, *colors)
514
+ return text unless @use_color
515
+
516
+ # Reset ANSI codes first to prevent RSpec's initial red from interfering
517
+ "\e[0m#{Paint[text, *colors]}"
518
+ end
519
+ end
520
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ # Base error class for Canon gem
5
+ class Error < StandardError; end
6
+
7
+ # Error raised when attempting to compare different formats
8
+ class CompareFormatMismatchError < Error
9
+ # Initialize a new CompareFormatMismatchError
10
+ #
11
+ # @param format1 [Symbol] The first format
12
+ # @param format2 [Symbol] The second format
13
+ def initialize(format1, format2)
14
+ super("Cannot compare different formats: #{format1} vs #{format2}")
15
+ end
16
+ end
17
+
18
+ # Error raised when input validation fails
19
+ #
20
+ # This error is raised when input (XML, HTML, JSON, YAML) is malformed
21
+ # or fails validation checks. It includes detailed information about
22
+ # the error location and nature.
23
+ class ValidationError < Error
24
+ attr_reader :format, :line, :column, :details
25
+
26
+ # Initialize a new ValidationError
27
+ #
28
+ # @param message [String] The error message
29
+ # @param format [Symbol] The format being validated (:xml, :html, :json,
30
+ # :yaml)
31
+ # @param line [Integer, nil] The line number where the error occurred
32
+ # @param column [Integer, nil] The column number where the error occurred
33
+ # @param details [String, nil] Additional details about the error
34
+ def initialize(message, format:, line: nil, column: nil, details: nil)
35
+ @format = format
36
+ @line = line
37
+ @column = column
38
+ @details = details
39
+ super(build_message(message))
40
+ end
41
+
42
+ private
43
+
44
+ # Build a detailed error message with location information
45
+ #
46
+ # @param msg [String] The base error message
47
+ # @return [String] The formatted error message
48
+ def build_message(msg)
49
+ parts = ["#{format.to_s.upcase} Validation Error: #{msg}"]
50
+ parts << " Line: #{line}" if line
51
+ parts << " Column: #{column}" if column
52
+ parts << " Details: #{details}" if details
53
+ parts.join("\n")
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "html_formatter_base"
4
+
5
+ module Canon
6
+ module Formatters
7
+ # HTML4 formatter using Nokogiri::HTML parser
8
+ class Html4Formatter < HtmlFormatterBase
9
+ # Parse HTML4 document
10
+ # @param html [String] HTML document to parse
11
+ # @return [Nokogiri::HTML::Document] Parsed HTML4 document
12
+ def self.parse(html)
13
+ Nokogiri::HTML(html)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "html_formatter_base"
4
+
5
+ module Canon
6
+ module Formatters
7
+ # HTML5 formatter using Nokogiri::HTML5 parser
8
+ class Html5Formatter < HtmlFormatterBase
9
+ # Parse HTML5 document
10
+ # @param html [String] HTML document to parse
11
+ # @return [Nokogiri::HTML5::Document] Parsed HTML5 document
12
+ def self.parse(html)
13
+ Nokogiri::HTML5(html)
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,37 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "nokogiri"
4
+ require_relative "html_formatter_base"
5
+ require_relative "../pretty_printer/html"
6
+ require_relative "../validators/html_validator"
7
+
8
+ module Canon
9
+ module Formatters
10
+ # HTML formatter for HTML 4/5 and XHTML
11
+ class HtmlFormatter < HtmlFormatterBase
12
+ # Parse HTML into a Nokogiri document
13
+ # @param html [String] HTML document to parse
14
+ # @return [Nokogiri::HTML::Document, Nokogiri::XML::Document]
15
+ # Parsed HTML or XML document
16
+ def self.parse(html)
17
+ # Validate before parsing
18
+ Canon::Validators::HtmlValidator.validate!(html)
19
+
20
+ if xhtml?(html)
21
+ Nokogiri::XML(html)
22
+ else
23
+ Nokogiri::HTML5(html)
24
+ end
25
+ end
26
+
27
+ # Check if HTML is XHTML
28
+ def self.xhtml?(html)
29
+ html.include?("XHTML") ||
30
+ html.include?('xmlns="http://www.w3.org/1999/xhtml"') ||
31
+ html.match?(/xmlns:\w+/)
32
+ end
33
+
34
+ private_class_method :xhtml?
35
+ end
36
+ end
37
+ end