canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,284 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_formatter"
4
+ require_relative "../legend"
5
+ require "strscan"
6
+
7
+ module Canon
8
+ class DiffFormatter
9
+ module ByLine
10
+ # JSON formatter with semantic token-level highlighting
11
+ # Pretty-prints JSON before diffing for better structure awareness
12
+ class JsonFormatter < BaseFormatter
13
+ # Format semantic JSON diff with token-level highlighting
14
+ #
15
+ # @param doc1 [String] First JSON document
16
+ # @param doc2 [String] Second JSON document
17
+ # @return [String] Formatted diff
18
+ def format(doc1, doc2)
19
+ output = []
20
+
21
+ begin
22
+ # Pretty print both JSON files
23
+ require "canon/pretty_printer/json"
24
+ formatter = Canon::PrettyPrinter::Json.new(indent: 2)
25
+ pretty1 = formatter.format(doc1)
26
+ pretty2 = formatter.format(doc2)
27
+
28
+ lines1 = pretty1.split("\n")
29
+ lines2 = pretty2.split("\n")
30
+
31
+ # Get LCS diff
32
+ diffs = ::Diff::LCS.sdiff(lines1, lines2)
33
+
34
+ # Format with semantic token highlighting
35
+ output << format_semantic_diff(diffs, lines1, lines2)
36
+ rescue StandardError => e
37
+ output << colorize(
38
+ "Warning: JSON parsing failed (#{e.message}), using simple diff", :yellow
39
+ )
40
+ require_relative "simple_formatter"
41
+ simple = SimpleFormatter.new(
42
+ use_color: @use_color,
43
+ context_lines: @context_lines,
44
+ diff_grouping_lines: @diff_grouping_lines,
45
+ visualization_map: @visualization_map,
46
+ )
47
+ output << simple.format(doc1, doc2)
48
+ end
49
+
50
+ output.join("\n")
51
+ end
52
+
53
+ private
54
+
55
+ # Format semantic diff with token-level highlighting
56
+ #
57
+ # @param diffs [Array] LCS diff array
58
+ # @param lines1 [Array<String>] Lines from first document
59
+ # @param lines2 [Array<String>] Lines from second document
60
+ # @return [String] Formatted diff
61
+ def format_semantic_diff(diffs, lines1, lines2)
62
+ output = []
63
+
64
+ # Detect non-ASCII characters in the diff
65
+ all_text = (lines1 + lines2).join
66
+ non_ascii = Legend.detect_non_ascii(all_text, @visualization_map)
67
+
68
+ # Add Unicode legend if any non-ASCII characters detected
69
+ unless non_ascii.empty?
70
+ output << Legend.build_legend(non_ascii, use_color: @use_color)
71
+ output << ""
72
+ end
73
+
74
+ diffs.each do |change|
75
+ old_line = change.old_position ? change.old_position + 1 : nil
76
+ new_line = change.new_position ? change.new_position + 1 : nil
77
+
78
+ case change.action
79
+ when "="
80
+ # Unchanged line
81
+ output << format_unified_line(old_line, new_line, " ",
82
+ change.old_element)
83
+ when "-"
84
+ # Deletion
85
+ output << format_unified_line(old_line, nil, "-",
86
+ change.old_element, :red)
87
+ when "+"
88
+ # Addition
89
+ output << format_unified_line(nil, new_line, "+",
90
+ change.new_element, :green)
91
+ when "!"
92
+ # Change - show with semantic token highlighting
93
+ old_text = change.old_element
94
+ new_text = change.new_element
95
+
96
+ # Tokenize JSON
97
+ old_tokens = tokenize_json(old_text)
98
+ new_tokens = tokenize_json(new_text)
99
+
100
+ # Get token-level diff
101
+ token_diffs = ::Diff::LCS.sdiff(old_tokens, new_tokens)
102
+
103
+ # Build highlighted versions
104
+ old_highlighted = build_token_highlighted_text(token_diffs, :old)
105
+ new_highlighted = build_token_highlighted_text(token_diffs, :new)
106
+
107
+ # Format both lines
108
+ output << format_token_diff_line(old_line, new_line,
109
+ old_highlighted,
110
+ new_highlighted)
111
+ end
112
+ end
113
+
114
+ output.join("\n")
115
+ end
116
+
117
+ # Format a unified diff line
118
+ #
119
+ # @param old_num [Integer, nil] Line number in old file
120
+ # @param new_num [Integer, nil] Line number in new file
121
+ # @param marker [String] Diff marker
122
+ # @param content [String] Line content
123
+ # @param color [Symbol, nil] Color for diff lines
124
+ # @return [String] Formatted line
125
+ def format_unified_line(old_num, new_num, marker, content, color = nil)
126
+ old_str = old_num ? "%4d" % old_num : " "
127
+ new_str = new_num ? "%4d" % new_num : " "
128
+ marker_part = "#{marker} "
129
+
130
+ visualized_content = if color
131
+ apply_visualization(content,
132
+ color)
133
+ else
134
+ content
135
+ end
136
+
137
+ if @use_color
138
+ yellow_old = colorize(old_str, :yellow)
139
+ yellow_pipe1 = colorize("|", :yellow)
140
+ yellow_new = colorize(new_str, :yellow)
141
+ yellow_pipe2 = colorize("|", :yellow)
142
+
143
+ if color
144
+ colored_marker = colorize(marker, color)
145
+ "#{yellow_old}#{yellow_pipe1}#{yellow_new}#{colored_marker} #{yellow_pipe2} #{visualized_content}"
146
+ else
147
+ "#{yellow_old}#{yellow_pipe1}#{yellow_new}#{marker} #{yellow_pipe2} #{visualized_content}"
148
+ end
149
+ else
150
+ "#{old_str}|#{new_str}#{marker_part}| #{visualized_content}"
151
+ end
152
+ end
153
+
154
+ # Format token diff lines
155
+ #
156
+ # @param old_line [Integer] Old line number
157
+ # @param new_line [Integer] New line number
158
+ # @param old_highlighted [String] Highlighted old text
159
+ # @param new_highlighted [String] Highlighted new text
160
+ # @return [String] Formatted lines
161
+ def format_token_diff_line(old_line, new_line, old_highlighted,
162
+ new_highlighted)
163
+ output = []
164
+
165
+ if @use_color
166
+ yellow_old = colorize("%4d" % old_line, :yellow)
167
+ yellow_pipe1 = colorize("|", :yellow)
168
+ yellow_new = colorize("%4d" % new_line, :yellow)
169
+ yellow_pipe2 = colorize("|", :yellow)
170
+ red_marker = colorize("-", :red)
171
+ green_marker = colorize("+", :green)
172
+
173
+ output << "#{yellow_old}#{yellow_pipe1} #{red_marker} #{yellow_pipe2} #{old_highlighted}"
174
+ output << " #{yellow_pipe1}#{yellow_new}#{green_marker} #{yellow_pipe2} #{new_highlighted}"
175
+ else
176
+ output << "#{'%4d' % old_line}| - | #{old_highlighted}"
177
+ output << " |#{'%4d' % new_line}+ | #{new_highlighted}"
178
+ end
179
+
180
+ output.join("\n")
181
+ end
182
+
183
+ # Tokenize JSON line into meaningful tokens
184
+ #
185
+ # @param line [String] JSON line to tokenize
186
+ # @return [Array<String>] Tokens
187
+ def tokenize_json(line)
188
+ tokens = []
189
+ scanner = StringScanner.new(line)
190
+
191
+ until scanner.eos?
192
+ tokens << if scanner.scan(/\s+/)
193
+ # Whitespace
194
+ scanner.matched
195
+ elsif scanner.scan(/"(?:[^"\\]|\\.)*"/)
196
+ # String values (with quotes)
197
+ scanner.matched
198
+ elsif scanner.scan(/-?\d+\.?\d*(?:[eE][+-]?\d+)?/)
199
+ # Numbers
200
+ scanner.matched
201
+ elsif scanner.scan(/\b(?:true|false|null)\b/)
202
+ # Booleans and null
203
+ scanner.matched
204
+ elsif scanner.scan(/[{}\[\]:,]/)
205
+ # Structural characters
206
+ scanner.matched
207
+ else
208
+ # Any other character
209
+ scanner.getch
210
+ end
211
+ end
212
+
213
+ tokens
214
+ end
215
+
216
+ # Build highlighted text from token diff
217
+ #
218
+ # @param token_diffs [Array] Token-level diff
219
+ # @param side [Symbol] Which side (:old or :new)
220
+ # @return [String] Highlighted text
221
+ def build_token_highlighted_text(token_diffs, side)
222
+ parts = []
223
+
224
+ token_diffs.each do |change|
225
+ case change.action
226
+ when "="
227
+ # Unchanged token - apply visualization with explicit reset
228
+ visual = change.old_element.chars.map do |char|
229
+ @visualization_map.fetch(char, char)
230
+ end.join
231
+
232
+ parts << if @use_color
233
+ colorize(visual, :default)
234
+ else
235
+ visual
236
+ end
237
+ when "-"
238
+ # Deleted token (only show on old side)
239
+ if side == :old
240
+ token = change.old_element
241
+ parts << apply_visualization(token, :red)
242
+ end
243
+ when "+"
244
+ # Added token (only show on new side)
245
+ if side == :new
246
+ token = change.new_element
247
+ parts << apply_visualization(token, :green)
248
+ end
249
+ when "!"
250
+ # Changed token
251
+ if side == :old
252
+ token = change.old_element
253
+ parts << apply_visualization(token, :red)
254
+ else
255
+ token = change.new_element
256
+ parts << apply_visualization(token, :green)
257
+ end
258
+ end
259
+ end
260
+
261
+ parts.join
262
+ end
263
+
264
+ # Apply character visualization
265
+ #
266
+ # @param token [String] Token to visualize
267
+ # @param color [Symbol, nil] Optional color
268
+ # @return [String] Visualized token
269
+ def apply_visualization(token, color = nil)
270
+ visual = token.chars.map do |char|
271
+ @visualization_map.fetch(char, char)
272
+ end.join
273
+
274
+ if color && @use_color
275
+ require "paint"
276
+ Paint[visual, color, :bold]
277
+ else
278
+ visual
279
+ end
280
+ end
281
+ end
282
+ end
283
+ end
284
+ end
@@ -0,0 +1,190 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base_formatter"
4
+ require_relative "../legend"
5
+
6
+ module Canon
7
+ class DiffFormatter
8
+ module ByLine
9
+ # Simple line-based formatter (fallback)
10
+ # Uses basic LCS diff without format-specific intelligence
11
+ class SimpleFormatter < BaseFormatter
12
+ # Format simple line-by-line diff
13
+ #
14
+ # @param doc1 [String] First document
15
+ # @param doc2 [String] Second document
16
+ # @return [String] Formatted diff
17
+ def format(doc1, doc2)
18
+ output = []
19
+ # Use split with -1 to preserve trailing empty strings (from trailing \n)
20
+ lines1 = doc1.split("\n", -1)
21
+ lines2 = doc2.split("\n", -1)
22
+
23
+ # Detect non-ASCII characters in the diff
24
+ all_text = (lines1 + lines2).join
25
+ non_ascii = Legend.detect_non_ascii(all_text, @visualization_map)
26
+
27
+ # Add Unicode legend if any non-ASCII characters detected
28
+ unless non_ascii.empty?
29
+ output << Legend.build_legend(non_ascii, use_color: @use_color)
30
+ output << ""
31
+ end
32
+
33
+ # Get LCS diff
34
+ diffs = ::Diff::LCS.sdiff(lines1, lines2)
35
+
36
+ # Group into hunks with context
37
+ hunks = build_hunks(diffs, lines1, lines2,
38
+ context_lines: @context_lines)
39
+
40
+ # Format each hunk
41
+ hunks.each do |hunk|
42
+ output << format_hunk(hunk)
43
+ end
44
+
45
+ output.join("\n")
46
+ end
47
+
48
+ private
49
+
50
+ # Format a hunk of changes
51
+ #
52
+ # @param hunk [Array] Hunk of diff changes
53
+ # @return [String] Formatted hunk
54
+ def format_hunk(hunk)
55
+ output = []
56
+ old_line = hunk.first.old_position + 1
57
+ new_line = hunk.first.new_position + 1
58
+
59
+ hunk.each do |change|
60
+ case change.action
61
+ when "="
62
+ # Unchanged line (context)
63
+ output << format_unified_line(old_line, new_line, " ",
64
+ change.old_element)
65
+ old_line += 1
66
+ new_line += 1
67
+ when "-"
68
+ # Deletion
69
+ output << format_unified_line(old_line, nil, "-",
70
+ change.old_element, :red)
71
+ old_line += 1
72
+ when "+"
73
+ # Addition
74
+ output << format_unified_line(nil, new_line, "+",
75
+ change.new_element, :green)
76
+ new_line += 1
77
+ when "!"
78
+ # Change - show both with inline diff highlighting
79
+ old_text = change.old_element
80
+ new_text = change.new_element
81
+
82
+ # Format with inline highlighting
83
+ output << format_changed_line(old_line, old_text, new_text)
84
+ old_line += 1
85
+ new_line += 1
86
+ end
87
+ end
88
+
89
+ output.join("\n")
90
+ end
91
+
92
+ # Format a unified diff line
93
+ #
94
+ # @param old_num [Integer, nil] Line number in old file
95
+ # @param new_num [Integer, nil] Line number in new file
96
+ # @param marker [String] Diff marker (' ', '-', '+')
97
+ # @param content [String] Line content
98
+ # @param color [Symbol, nil] Color for diff lines
99
+ # @return [String] Formatted line
100
+ def format_unified_line(old_num, new_num, marker, content, color = nil)
101
+ old_str = old_num ? "%4d" % old_num : " "
102
+ new_str = new_num ? "%4d" % new_num : " "
103
+ marker_part = "#{marker} "
104
+
105
+ # Only apply visualization to diff lines (when color is provided),
106
+ # not context lines
107
+ visualized_content = if color
108
+ apply_visualization(content, color)
109
+ else
110
+ content
111
+ end
112
+
113
+ if @use_color
114
+ # Yellow for line numbers and pipes
115
+ yellow_old = colorize(old_str, :yellow)
116
+ yellow_pipe1 = colorize("|", :yellow)
117
+ yellow_new = colorize(new_str, :yellow)
118
+ yellow_pipe2 = colorize("|", :yellow)
119
+
120
+ if color
121
+ # Colored marker for additions/deletions
122
+ colored_marker = colorize(marker, color)
123
+ "#{yellow_old}#{yellow_pipe1}#{yellow_new}#{colored_marker} #{yellow_pipe2} #{visualized_content}"
124
+ else
125
+ # Context line - apply visualization but no color
126
+ "#{yellow_old}#{yellow_pipe1}#{yellow_new}#{marker} #{yellow_pipe2} #{visualized_content}"
127
+ end
128
+ else
129
+ # No color mode
130
+ "#{old_str}|#{new_str}#{marker_part}| #{visualized_content}"
131
+ end
132
+ end
133
+
134
+ # Format changed lines with basic character-level diff
135
+ #
136
+ # @param line_num [Integer] Line number
137
+ # @param old_text [String] Old line text
138
+ # @param new_text [String] New line text
139
+ # @return [String] Formatted change
140
+ def format_changed_line(line_num, old_text, new_text)
141
+ output = []
142
+
143
+ # Apply visualization
144
+ old_visualized = apply_visualization(old_text, :red)
145
+ new_visualized = apply_visualization(new_text, :green)
146
+
147
+ # Format both lines with yellow line numbers and pipes
148
+ if @use_color
149
+ yellow_old = colorize("%4d" % line_num, :yellow)
150
+ yellow_pipe1 = colorize("|", :yellow)
151
+ yellow_new = colorize("%4d" % line_num, :yellow)
152
+ yellow_pipe2 = colorize("|", :yellow)
153
+ red_marker = colorize("-", :red)
154
+ green_marker = colorize("+", :green)
155
+
156
+ output << "#{yellow_old}#{yellow_pipe1} #{red_marker} #{yellow_pipe2} #{old_visualized}"
157
+ output << " #{yellow_pipe1}#{yellow_new}#{green_marker} #{yellow_pipe2} #{new_visualized}"
158
+ else
159
+ old_str = "%4d" % line_num
160
+ new_str = "%4d" % line_num
161
+ output << "#{old_str}| - | #{old_visualized}"
162
+ output << " |#{new_str}+ | #{new_visualized}"
163
+ end
164
+
165
+ output.join("\n")
166
+ end
167
+
168
+ # Apply character visualization using configurable visualization map
169
+ #
170
+ # @param token [String] The token to apply visualization to
171
+ # @param color [Symbol, nil] Optional color to apply
172
+ # @return [String] Visualized and optionally colored token
173
+ def apply_visualization(token, color = nil)
174
+ # Replace each character with its visualization from the map
175
+ visual = token.chars.map do |char|
176
+ @visualization_map.fetch(char, char)
177
+ end.join
178
+
179
+ # Apply color if provided and color is enabled
180
+ if color && @use_color
181
+ require "paint"
182
+ Paint[visual, color, :bold]
183
+ else
184
+ visual
185
+ end
186
+ end
187
+ end
188
+ end
189
+ end
190
+ end