canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,292 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_formatter"
4
+ require_relative "../legend"
5
+ require "strscan"
6
+
7
+ module Canon
8
+ class DiffFormatter
9
+ module ByLine
10
+ # YAML formatter with semantic token-level highlighting
11
+ # Pretty-prints YAML before diffing for better structure awareness
12
+ class YamlFormatter < BaseFormatter
13
+ # Format semantic YAML diff with token-level highlighting
14
+ #
15
+ # @param doc1 [String] First YAML document
16
+ # @param doc2 [String] Second YAML document
17
+ # @return [String] Formatted diff
18
+ def format(doc1, doc2)
19
+ output = []
20
+
21
+ begin
22
+ # Pretty print both YAML files (canonicalized)
23
+ require "canon"
24
+ pretty1 = Canon.format(doc1, :yaml)
25
+ pretty2 = Canon.format(doc2, :yaml)
26
+
27
+ lines1 = pretty1.split("\n")
28
+ lines2 = pretty2.split("\n")
29
+
30
+ # Get LCS diff
31
+ diffs = ::Diff::LCS.sdiff(lines1, lines2)
32
+
33
+ # Format with semantic token highlighting
34
+ output << format_semantic_diff(diffs, lines1, lines2)
35
+ rescue StandardError => e
36
+ output << colorize(
37
+ "Warning: YAML parsing failed (#{e.message}), using simple diff", :yellow
38
+ )
39
+ require_relative "simple_formatter"
40
+ simple = SimpleFormatter.new(
41
+ use_color: @use_color,
42
+ context_lines: @context_lines,
43
+ diff_grouping_lines: @diff_grouping_lines,
44
+ visualization_map: @visualization_map,
45
+ )
46
+ output << simple.format(doc1, doc2)
47
+ end
48
+
49
+ output.join("\n")
50
+ end
51
+
52
+ private
53
+
54
+ # Format semantic diff with token-level highlighting
55
+ #
56
+ # @param diffs [Array] LCS diff array
57
+ # @param lines1 [Array<String>] Lines from first document
58
+ # @param lines2 [Array<String>] Lines from second document
59
+ # @return [String] Formatted diff
60
+ def format_semantic_diff(diffs, lines1, lines2)
61
+ output = []
62
+
63
+ # Detect non-ASCII characters in the diff
64
+ all_text = (lines1 + lines2).join
65
+ non_ascii = Legend.detect_non_ascii(all_text, @visualization_map)
66
+
67
+ # Add Unicode legend if any non-ASCII characters detected
68
+ unless non_ascii.empty?
69
+ output << Legend.build_legend(non_ascii, use_color: @use_color)
70
+ output << ""
71
+ end
72
+
73
+ diffs.each do |change|
74
+ old_line = change.old_position ? change.old_position + 1 : nil
75
+ new_line = change.new_position ? change.new_position + 1 : nil
76
+
77
+ case change.action
78
+ when "="
79
+ # Unchanged line
80
+ output << format_unified_line(old_line, new_line, " ",
81
+ change.old_element)
82
+ when "-"
83
+ # Deletion
84
+ output << format_unified_line(old_line, nil, "-",
85
+ change.old_element, :red)
86
+ when "+"
87
+ # Addition
88
+ output << format_unified_line(nil, new_line, "+",
89
+ change.new_element, :green)
90
+ when "!"
91
+ # Change - show with semantic token highlighting
92
+ old_text = change.old_element
93
+ new_text = change.new_element
94
+
95
+ # Tokenize YAML
96
+ old_tokens = tokenize_yaml(old_text)
97
+ new_tokens = tokenize_yaml(new_text)
98
+
99
+ # Get token-level diff
100
+ token_diffs = ::Diff::LCS.sdiff(old_tokens, new_tokens)
101
+
102
+ # Build highlighted versions
103
+ old_highlighted = build_token_highlighted_text(token_diffs, :old)
104
+ new_highlighted = build_token_highlighted_text(token_diffs, :new)
105
+
106
+ # Format both lines
107
+ output << format_token_diff_line(old_line, new_line,
108
+ old_highlighted,
109
+ new_highlighted)
110
+ end
111
+ end
112
+
113
+ output.join("\n")
114
+ end
115
+
116
+ # Format a unified diff line
117
+ #
118
+ # @param old_num [Integer, nil] Line number in old file
119
+ # @param new_num [Integer, nil] Line number in new file
120
+ # @param marker [String] Diff marker
121
+ # @param content [String] Line content
122
+ # @param color [Symbol, nil] Color for diff lines
123
+ # @return [String] Formatted line
124
+ def format_unified_line(old_num, new_num, marker, content, color = nil)
125
+ old_str = old_num ? "%4d" % old_num : " "
126
+ new_str = new_num ? "%4d" % new_num : " "
127
+ marker_part = "#{marker} "
128
+
129
+ visualized_content = if color
130
+ apply_visualization(content,
131
+ color)
132
+ else
133
+ content
134
+ end
135
+
136
+ if @use_color
137
+ yellow_old = colorize(old_str, :yellow)
138
+ yellow_pipe1 = colorize("|", :yellow)
139
+ yellow_new = colorize(new_str, :yellow)
140
+ yellow_pipe2 = colorize("|", :yellow)
141
+
142
+ if color
143
+ colored_marker = colorize(marker, color)
144
+ "#{yellow_old}#{yellow_pipe1}#{yellow_new}#{colored_marker} #{yellow_pipe2} #{visualized_content}"
145
+ else
146
+ "#{yellow_old}#{yellow_pipe1}#{yellow_new}#{marker} #{yellow_pipe2} #{visualized_content}"
147
+ end
148
+ else
149
+ "#{old_str}|#{new_str}#{marker_part}| #{visualized_content}"
150
+ end
151
+ end
152
+
153
+ # Format token diff lines
154
+ #
155
+ # @param old_line [Integer] Old line number
156
+ # @param new_line [Integer] New line number
157
+ # @param old_highlighted [String] Highlighted old text
158
+ # @param new_highlighted [String] Highlighted new text
159
+ # @return [String] Formatted lines
160
+ def format_token_diff_line(old_line, new_line, old_highlighted,
161
+ new_highlighted)
162
+ output = []
163
+
164
+ if @use_color
165
+ yellow_old = colorize("%4d" % old_line, :yellow)
166
+ yellow_pipe1 = colorize("|", :yellow)
167
+ yellow_new = colorize("%4d" % new_line, :yellow)
168
+ yellow_pipe2 = colorize("|", :yellow)
169
+ red_marker = colorize("-", :red)
170
+ green_marker = colorize("+", :green)
171
+
172
+ output << "#{yellow_old}#{yellow_pipe1} #{red_marker} #{yellow_pipe2} #{old_highlighted}"
173
+ output << " #{yellow_pipe1}#{yellow_new}#{green_marker} #{yellow_pipe2} #{new_highlighted}"
174
+ else
175
+ output << "#{'%4d' % old_line}| - | #{old_highlighted}"
176
+ output << " |#{'%4d' % new_line}+ | #{new_highlighted}"
177
+ end
178
+
179
+ output.join("\n")
180
+ end
181
+
182
+ # Tokenize YAML line into meaningful tokens
183
+ #
184
+ # @param line [String] YAML line to tokenize
185
+ # @return [Array<String>] Tokens
186
+ def tokenize_yaml(line)
187
+ tokens = []
188
+ scanner = StringScanner.new(line)
189
+
190
+ until scanner.eos?
191
+ tokens << if scanner.scan(/\s+/)
192
+ # Whitespace (preserve for indentation)
193
+ scanner.matched
194
+ elsif scanner.scan(/[\w-]+:/)
195
+ # YAML key with colon
196
+ scanner.matched
197
+ elsif scanner.scan(/"(?:[^"\\]|\\.)*"/)
198
+ # Quoted strings
199
+ scanner.matched
200
+ elsif scanner.scan(/'(?:[^'\\]|\\.)*'/)
201
+ # Single-quoted strings
202
+ scanner.matched
203
+ elsif scanner.scan(/-?\d+\.?\d*/)
204
+ # Numbers
205
+ scanner.matched
206
+ elsif scanner.scan(/\b(?:true|false|yes|no)\b/)
207
+ # Booleans
208
+ scanner.matched
209
+ elsif scanner.scan(/-\s/)
210
+ # List markers
211
+ scanner.matched
212
+ elsif scanner.scan(/[^\s:]+/)
213
+ # Bare words (unquoted values)
214
+ scanner.matched
215
+ else
216
+ # Any other character
217
+ scanner.getch
218
+ end
219
+ end
220
+
221
+ tokens
222
+ end
223
+
224
+ # Build highlighted text from token diff
225
+ #
226
+ # @param token_diffs [Array] Token-level diff
227
+ # @param side [Symbol] Which side (:old or :new)
228
+ # @return [String] Highlighted text
229
+ def build_token_highlighted_text(token_diffs, side)
230
+ parts = []
231
+
232
+ token_diffs.each do |change|
233
+ case change.action
234
+ when "="
235
+ # Unchanged token - apply visualization with explicit reset
236
+ visual = change.old_element.chars.map do |char|
237
+ @visualization_map.fetch(char, char)
238
+ end.join
239
+
240
+ parts << if @use_color
241
+ colorize(visual, :default)
242
+ else
243
+ visual
244
+ end
245
+ when "-"
246
+ # Deleted token (only show on old side)
247
+ if side == :old
248
+ token = change.old_element
249
+ parts << apply_visualization(token, :red)
250
+ end
251
+ when "+"
252
+ # Added token (only show on new side)
253
+ if side == :new
254
+ token = change.new_element
255
+ parts << apply_visualization(token, :green)
256
+ end
257
+ when "!"
258
+ # Changed token
259
+ if side == :old
260
+ token = change.old_element
261
+ parts << apply_visualization(token, :red)
262
+ else
263
+ token = change.new_element
264
+ parts << apply_visualization(token, :green)
265
+ end
266
+ end
267
+ end
268
+
269
+ parts.join
270
+ end
271
+
272
+ # Apply character visualization
273
+ #
274
+ # @param token [String] Token to visualize
275
+ # @param color [Symbol, nil] Optional color
276
+ # @return [String] Visualized token
277
+ def apply_visualization(token, color = nil)
278
+ visual = token.chars.map do |char|
279
+ @visualization_map.fetch(char, char)
280
+ end.join
281
+
282
+ if color && @use_color
283
+ require "paint"
284
+ Paint[visual, color, :bold]
285
+ else
286
+ visual
287
+ end
288
+ end
289
+ end
290
+ end
291
+ end
292
+ end
@@ -0,0 +1,199 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ class DiffFormatter
5
+ module ByObject
6
+ # Base class for by-object diff formatters
7
+ # Provides tree visualization for semantic differences
8
+ class BaseFormatter
9
+ attr_reader :use_color, :visualization_map
10
+
11
+ def initialize(use_color: true, visualization_map: nil)
12
+ @use_color = use_color
13
+ @visualization_map = visualization_map ||
14
+ DiffFormatter::DEFAULT_VISUALIZATION_MAP
15
+ end
16
+
17
+ # Format differences for display
18
+ # @param differences [ComparisonResult, Array] ComparisonResult object or legacy Array
19
+ # @param format [Symbol] Format type (:xml, :html, :json, :yaml)
20
+ # @return [String] Formatted output
21
+ def format(differences, _format)
22
+ # Handle both ComparisonResult (production) and Array (low-level tests)
23
+ if differences.respond_to?(:equivalent?)
24
+ # ComparisonResult object
25
+ return success_message if differences.equivalent?
26
+
27
+ diffs_array = differences.differences
28
+ else
29
+ # Legacy Array
30
+ return success_message if differences.empty?
31
+
32
+ diffs_array = differences
33
+ end
34
+
35
+ output = []
36
+ output << colorize("Visual Diff:", :cyan, :bold)
37
+
38
+ # Group differences by path for tree building
39
+ tree = build_diff_tree(diffs_array)
40
+
41
+ # Render tree
42
+ output << render_tree(tree)
43
+
44
+ output.join("\n")
45
+ end
46
+
47
+ # Factory method to create format-specific formatter
48
+ def self.for_format(format, use_color: true, visualization_map: nil)
49
+ case format
50
+ when :xml, :html
51
+ require_relative "xml_formatter"
52
+ XmlFormatter.new(use_color: use_color,
53
+ visualization_map: visualization_map)
54
+ when :json
55
+ require_relative "json_formatter"
56
+ JsonFormatter.new(use_color: use_color,
57
+ visualization_map: visualization_map)
58
+ when :yaml
59
+ require_relative "yaml_formatter"
60
+ YamlFormatter.new(use_color: use_color,
61
+ visualization_map: visualization_map)
62
+ else
63
+ new(use_color: use_color, visualization_map: visualization_map)
64
+ end
65
+ end
66
+
67
+ private
68
+
69
+ # Generate success message
70
+ def success_message
71
+ emoji = @use_color ? "✅ " : ""
72
+ message = "Files are semantically equivalent"
73
+ colorize("#{emoji}#{message}\n", :green, :bold)
74
+ end
75
+
76
+ # Build a tree structure from differences
77
+ def build_diff_tree(differences)
78
+ tree = {}
79
+
80
+ differences.each do |diff|
81
+ # Handle both DiffNode and Hash formats
82
+ if diff.is_a?(Hash) && diff.key?(:path)
83
+ # Ruby object difference (Hash format)
84
+ add_to_tree(tree, diff[:path], diff)
85
+ elsif diff.is_a?(Canon::Diff::DiffNode)
86
+ # DiffNode format - extract path from nodes
87
+ path = extract_dom_path_from_diffnode(diff)
88
+ add_to_tree(tree, path, diff)
89
+ else
90
+ # Legacy DOM difference (Hash format) - extract path from node
91
+ path = extract_dom_path(diff)
92
+ add_to_tree(tree, path, diff)
93
+ end
94
+ end
95
+
96
+ tree
97
+ end
98
+
99
+ # Add a difference to the tree structure
100
+ def add_to_tree(tree, path, diff)
101
+ parts = path.to_s.split(/[.\[\]]/).reject(&:empty?)
102
+ current = tree
103
+
104
+ parts.each_with_index do |part, index|
105
+ current[part] ||= {}
106
+ if index == parts.length - 1
107
+ current[part][:__diff__] = diff
108
+ else
109
+ current = current[part]
110
+ end
111
+ end
112
+ end
113
+
114
+ # Extract path from DOM node difference
115
+ def extract_dom_path(diff)
116
+ node = diff[:node1] || diff[:node2]
117
+ return "" unless node
118
+
119
+ parts = []
120
+ current = node
121
+
122
+ while current.respond_to?(:name)
123
+ parts.unshift(current.name) if current.name
124
+ current = current.parent if current.respond_to?(:parent)
125
+ end
126
+
127
+ parts.join(".")
128
+ end
129
+
130
+ # Extract path from DiffNode object
131
+ def extract_dom_path_from_diffnode(diff_node)
132
+ # Extract path from node1 or node2 in the DiffNode
133
+ node = diff_node.node1 || diff_node.node2
134
+ return diff_node.dimension.to_s unless node
135
+
136
+ parts = []
137
+ current = node
138
+
139
+ while current.respond_to?(:name)
140
+ parts.unshift(current.name) if current.name
141
+ current = current.parent if current.respond_to?(:parent)
142
+ end
143
+
144
+ parts.empty? ? diff_node.dimension.to_s : parts.join(".")
145
+ end
146
+
147
+ # Render tree structure with box-drawing characters
148
+ def render_tree(tree, prefix: "", is_last: true)
149
+ output = []
150
+
151
+ sorted_keys = tree.keys.reject { |k| k == :__diff__ }
152
+ begin
153
+ sorted_keys = sorted_keys.sort_by(&:to_s)
154
+ rescue ArgumentError
155
+ # If sorting fails, just use the keys as-is
156
+ end
157
+
158
+ sorted_keys.each_with_index do |key, index|
159
+ is_last_item = (index == sorted_keys.length - 1)
160
+ connector = is_last_item ? "└── " : "├── "
161
+ continuation = is_last_item ? " " : "│ "
162
+
163
+ value = tree[key]
164
+ diff = value[:__diff__] if value.is_a?(Hash)
165
+
166
+ if diff
167
+ # Render difference
168
+ output << render_diff_node(key, diff, prefix, connector)
169
+ else
170
+ # Render intermediate path
171
+ output << colorize("#{prefix}#{connector}#{key}:", :cyan)
172
+ # Recurse into subtree
173
+ if value.is_a?(Hash)
174
+ output << render_tree(value, prefix: prefix + continuation,
175
+ is_last: is_last_item)
176
+ end
177
+ end
178
+ end
179
+
180
+ output.join("\n")
181
+ end
182
+
183
+ # Render a single diff node - to be overridden by subclasses
184
+ def render_diff_node(key, diff, prefix, connector)
185
+ raise NotImplementedError,
186
+ "Subclasses must implement render_diff_node"
187
+ end
188
+
189
+ # Colorize text if color is enabled
190
+ def colorize(text, *colors)
191
+ return text unless @use_color
192
+
193
+ require "paint"
194
+ "\e[0m#{Paint[text, *colors]}"
195
+ end
196
+ end
197
+ end
198
+ end
199
+ end