canon 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -1
- data/.rubocop_todo.yml +276 -7
- data/README.adoc +203 -138
- data/_config.yml +116 -0
- data/docs/ADVANCED_TOPICS.adoc +20 -0
- data/docs/BASIC_USAGE.adoc +16 -0
- data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
- data/docs/CLI.adoc +493 -0
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
- data/docs/DIFF_ARCHITECTURE.adoc +435 -0
- data/docs/DIFF_FORMATTING.adoc +540 -0
- data/docs/FORMATS.adoc +447 -0
- data/docs/INDEX.adoc +222 -0
- data/docs/INPUT_VALIDATION.adoc +477 -0
- data/docs/MATCH_ARCHITECTURE.adoc +463 -0
- data/docs/MATCH_OPTIONS.adoc +719 -0
- data/docs/MODES.adoc +432 -0
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
- data/docs/OPTIONS.adoc +1387 -0
- data/docs/PREPROCESSING.adoc +491 -0
- data/docs/RSPEC.adoc +605 -0
- data/docs/RUBY_API.adoc +478 -0
- data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
- data/docs/UNDERSTANDING_CANON.adoc +17 -0
- data/docs/VERBOSE.adoc +482 -0
- data/exe/canon +7 -0
- data/lib/canon/cli.rb +179 -0
- data/lib/canon/commands/diff_command.rb +195 -0
- data/lib/canon/commands/format_command.rb +113 -0
- data/lib/canon/comparison/base_comparator.rb +39 -0
- data/lib/canon/comparison/comparison_result.rb +79 -0
- data/lib/canon/comparison/html_comparator.rb +410 -0
- data/lib/canon/comparison/json_comparator.rb +212 -0
- data/lib/canon/comparison/match_options.rb +616 -0
- data/lib/canon/comparison/xml_comparator.rb +566 -0
- data/lib/canon/comparison/yaml_comparator.rb +93 -0
- data/lib/canon/comparison.rb +239 -0
- data/lib/canon/config.rb +172 -0
- data/lib/canon/diff/diff_block.rb +71 -0
- data/lib/canon/diff/diff_block_builder.rb +105 -0
- data/lib/canon/diff/diff_classifier.rb +46 -0
- data/lib/canon/diff/diff_context.rb +85 -0
- data/lib/canon/diff/diff_context_builder.rb +107 -0
- data/lib/canon/diff/diff_line.rb +77 -0
- data/lib/canon/diff/diff_node.rb +56 -0
- data/lib/canon/diff/diff_node_mapper.rb +148 -0
- data/lib/canon/diff/diff_report.rb +133 -0
- data/lib/canon/diff/diff_report_builder.rb +62 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
- data/lib/canon/diff_formatter/character_map.yml +197 -0
- data/lib/canon/diff_formatter/debug_output.rb +431 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
- data/lib/canon/diff_formatter/legend.rb +141 -0
- data/lib/canon/diff_formatter.rb +520 -0
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html4_formatter.rb +17 -0
- data/lib/canon/formatters/html5_formatter.rb +17 -0
- data/lib/canon/formatters/html_formatter.rb +37 -0
- data/lib/canon/formatters/html_formatter_base.rb +163 -0
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/xml_formatter.rb +20 -55
- data/lib/canon/formatters/yaml_formatter.rb +4 -1
- data/lib/canon/pretty_printer/html.rb +57 -0
- data/lib/canon/pretty_printer/json.rb +25 -0
- data/lib/canon/pretty_printer/xml.rb +29 -0
- data/lib/canon/rspec_matchers.rb +222 -80
- data/lib/canon/validators/base_validator.rb +49 -0
- data/lib/canon/validators/html_validator.rb +138 -0
- data/lib/canon/validators/json_validator.rb +89 -0
- data/lib/canon/validators/xml_validator.rb +53 -0
- data/lib/canon/validators/yaml_validator.rb +73 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/attribute_handler.rb +80 -0
- data/lib/canon/xml/c14n.rb +36 -0
- data/lib/canon/xml/character_encoder.rb +38 -0
- data/lib/canon/xml/data_model.rb +225 -0
- data/lib/canon/xml/element_matcher.rb +196 -0
- data/lib/canon/xml/line_range_mapper.rb +158 -0
- data/lib/canon/xml/namespace_handler.rb +86 -0
- data/lib/canon/xml/node.rb +32 -0
- data/lib/canon/xml/nodes/attribute_node.rb +54 -0
- data/lib/canon/xml/nodes/comment_node.rb +23 -0
- data/lib/canon/xml/nodes/element_node.rb +56 -0
- data/lib/canon/xml/nodes/namespace_node.rb +38 -0
- data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
- data/lib/canon/xml/nodes/root_node.rb +16 -0
- data/lib/canon/xml/nodes/text_node.rb +23 -0
- data/lib/canon/xml/processor.rb +151 -0
- data/lib/canon/xml/whitespace_normalizer.rb +72 -0
- data/lib/canon/xml/xml_base_handler.rb +188 -0
- data/lib/canon.rb +14 -3
- metadata +116 -21
|
@@ -0,0 +1,551 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "paint"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
class DiffFormatter
|
|
7
|
+
# Formats dimension-specific detail for individual differences
|
|
8
|
+
# Provides actionable, colorized output showing exactly what changed
|
|
9
|
+
module DiffDetailFormatter
|
|
10
|
+
class << self
|
|
11
|
+
# Format all differences as a semantic diff report
|
|
12
|
+
#
|
|
13
|
+
# @param differences [Array<DiffNode>] Array of differences
|
|
14
|
+
# @param use_color [Boolean] Whether to use colors
|
|
15
|
+
# @return [String] Formatted semantic diff report
|
|
16
|
+
def format_report(differences, use_color: true)
|
|
17
|
+
return "" if differences.empty?
|
|
18
|
+
|
|
19
|
+
output = []
|
|
20
|
+
output << ""
|
|
21
|
+
output << colorize("=" * 70, :cyan, use_color, bold: true)
|
|
22
|
+
output << colorize(
|
|
23
|
+
" SEMANTIC DIFF REPORT (#{differences.length} #{differences.length == 1 ? 'difference' : 'differences'})", :cyan, use_color, bold: true
|
|
24
|
+
)
|
|
25
|
+
output << colorize("=" * 70, :cyan, use_color, bold: true)
|
|
26
|
+
|
|
27
|
+
differences.each_with_index do |diff, i|
|
|
28
|
+
output << ""
|
|
29
|
+
output << format_single_diff(diff, i + 1, differences.length,
|
|
30
|
+
use_color)
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
output << ""
|
|
34
|
+
output << colorize("=" * 70, :cyan, use_color, bold: true)
|
|
35
|
+
output << ""
|
|
36
|
+
|
|
37
|
+
output.join("\n")
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
private
|
|
41
|
+
|
|
42
|
+
# Format a single difference with dimension-specific details
|
|
43
|
+
def format_single_diff(diff, number, total, use_color)
|
|
44
|
+
output = []
|
|
45
|
+
|
|
46
|
+
# Header - handle both DiffNode and Hash
|
|
47
|
+
status = if diff.respond_to?(:normative?)
|
|
48
|
+
diff.normative? ? "NORMATIVE" : "INFORMATIVE"
|
|
49
|
+
else
|
|
50
|
+
"NORMATIVE" # Hash diffs are always normative
|
|
51
|
+
end
|
|
52
|
+
status_color = status == "NORMATIVE" ? :green : :yellow
|
|
53
|
+
output << colorize("🔍 DIFFERENCE ##{number}/#{total} [#{status}]",
|
|
54
|
+
status_color, use_color, bold: true)
|
|
55
|
+
output << colorize("─" * 70, :cyan, use_color)
|
|
56
|
+
|
|
57
|
+
# Dimension - handle both DiffNode and Hash
|
|
58
|
+
dimension = if diff.respond_to?(:dimension)
|
|
59
|
+
diff.dimension
|
|
60
|
+
elsif diff.is_a?(Hash)
|
|
61
|
+
diff[:diff_code] || diff[:dimension] || "unknown"
|
|
62
|
+
else
|
|
63
|
+
"unknown"
|
|
64
|
+
end
|
|
65
|
+
output << "#{colorize('Dimension:', :cyan, use_color,
|
|
66
|
+
bold: true)} #{colorize(dimension.to_s,
|
|
67
|
+
:magenta, use_color)}"
|
|
68
|
+
|
|
69
|
+
# Location (XPath for XML/HTML, Path for JSON/YAML)
|
|
70
|
+
location = extract_location(diff)
|
|
71
|
+
output << "#{colorize('Location:', :cyan, use_color,
|
|
72
|
+
bold: true)} #{colorize(location, :blue,
|
|
73
|
+
use_color)}"
|
|
74
|
+
output << ""
|
|
75
|
+
|
|
76
|
+
# Dimension-specific details
|
|
77
|
+
detail1, detail2, changes = format_dimension_details(diff,
|
|
78
|
+
use_color)
|
|
79
|
+
|
|
80
|
+
output << colorize("⊖ Expected (File 1):", :red, use_color,
|
|
81
|
+
bold: true)
|
|
82
|
+
output << " #{detail1}"
|
|
83
|
+
output << ""
|
|
84
|
+
output << colorize("⊕ Actual (File 2):", :green, use_color,
|
|
85
|
+
bold: true)
|
|
86
|
+
output << " #{detail2}"
|
|
87
|
+
|
|
88
|
+
if changes && !changes.empty?
|
|
89
|
+
output << ""
|
|
90
|
+
output << colorize("✨ Changes:", :yellow, use_color, bold: true)
|
|
91
|
+
output << " #{changes}"
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
output.join("\n")
|
|
95
|
+
rescue StandardError => e
|
|
96
|
+
# Safe fallback if formatting fails
|
|
97
|
+
colorize(
|
|
98
|
+
"🔍 DIFFERENCE ##{number}/#{total} [Error formatting: #{e.message}]", :red, use_color, bold: true
|
|
99
|
+
)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
# Extract XPath or JSON path for the difference location
|
|
103
|
+
def extract_location(diff)
|
|
104
|
+
# For Hash diffs (JSON/YAML)
|
|
105
|
+
if diff.is_a?(Hash)
|
|
106
|
+
return diff[:path] || "(root)"
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# For DiffNode (XML/HTML)
|
|
110
|
+
node = diff.respond_to?(:node1) ? (diff.node1 || diff.node2) : nil
|
|
111
|
+
|
|
112
|
+
# For XML/HTML element nodes
|
|
113
|
+
if node.respond_to?(:name)
|
|
114
|
+
return extract_xpath(node)
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
# Fallback
|
|
118
|
+
if diff.respond_to?(:dimension)
|
|
119
|
+
diff.dimension.to_s
|
|
120
|
+
else
|
|
121
|
+
"(unknown)"
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Extract XPath from an XML/HTML node
|
|
126
|
+
def extract_xpath(node)
|
|
127
|
+
return "/" if node.nil?
|
|
128
|
+
|
|
129
|
+
# Document nodes don't have meaningful XPaths
|
|
130
|
+
if node.is_a?(Nokogiri::XML::Document) ||
|
|
131
|
+
node.is_a?(Nokogiri::HTML::Document) ||
|
|
132
|
+
node.is_a?(Nokogiri::HTML4::Document) ||
|
|
133
|
+
node.is_a?(Nokogiri::HTML5::Document)
|
|
134
|
+
return "/"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
parts = []
|
|
138
|
+
current = node
|
|
139
|
+
max_depth = 100
|
|
140
|
+
depth = 0
|
|
141
|
+
|
|
142
|
+
begin
|
|
143
|
+
while current.respond_to?(:name) && current.name && depth < max_depth
|
|
144
|
+
# Stop at document-level nodes
|
|
145
|
+
break if ["document", "#document"].include?(current.name)
|
|
146
|
+
break if current.is_a?(Nokogiri::XML::Document) ||
|
|
147
|
+
current.is_a?(Nokogiri::HTML::Document)
|
|
148
|
+
|
|
149
|
+
parts.unshift(current.name)
|
|
150
|
+
|
|
151
|
+
# Move to parent safely
|
|
152
|
+
break unless current.respond_to?(:parent)
|
|
153
|
+
|
|
154
|
+
parent = begin
|
|
155
|
+
current.parent
|
|
156
|
+
rescue StandardError
|
|
157
|
+
nil
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
break unless parent
|
|
161
|
+
break if parent == current
|
|
162
|
+
|
|
163
|
+
current = parent
|
|
164
|
+
depth += 1
|
|
165
|
+
end
|
|
166
|
+
rescue StandardError
|
|
167
|
+
# If any error, return what we have
|
|
168
|
+
return "/#{parts.join('/')}"
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
"/#{parts.join('/')}"
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
# Format details based on dimension type
|
|
175
|
+
def format_dimension_details(diff, use_color)
|
|
176
|
+
# Handle Hash diffs (JSON/YAML)
|
|
177
|
+
if diff.is_a?(Hash)
|
|
178
|
+
return format_hash_diff_details(diff, use_color)
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Handle DiffNode (XML/HTML)
|
|
182
|
+
dimension = diff.respond_to?(:dimension) ? diff.dimension : nil
|
|
183
|
+
|
|
184
|
+
case dimension
|
|
185
|
+
when :attribute_presence
|
|
186
|
+
format_attribute_presence_details(diff, use_color)
|
|
187
|
+
when :attribute_values
|
|
188
|
+
format_attribute_values_details(diff, use_color)
|
|
189
|
+
when :text_content
|
|
190
|
+
format_text_content_details(diff, use_color)
|
|
191
|
+
when :structural_whitespace
|
|
192
|
+
format_structural_whitespace_details(diff, use_color)
|
|
193
|
+
when :comments
|
|
194
|
+
format_comments_details(diff, use_color)
|
|
195
|
+
else
|
|
196
|
+
format_fallback_details(diff, use_color)
|
|
197
|
+
end
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Format attribute_presence dimension details
|
|
201
|
+
def format_attribute_presence_details(diff, use_color)
|
|
202
|
+
node1 = diff.node1
|
|
203
|
+
node2 = diff.node2
|
|
204
|
+
|
|
205
|
+
attrs1 = get_attribute_names(node1)
|
|
206
|
+
attrs2 = get_attribute_names(node2)
|
|
207
|
+
|
|
208
|
+
attrs1 & attrs2
|
|
209
|
+
missing = attrs1 - attrs2 # In node1 but not node2
|
|
210
|
+
extra = attrs2 - attrs1 # In node2 but not node1
|
|
211
|
+
|
|
212
|
+
# Format expected
|
|
213
|
+
detail1 = "<#{node1.name}> with #{attrs1.length} #{attrs1.length == 1 ? 'attribute' : 'attributes'}: #{attrs1.join(', ')}"
|
|
214
|
+
|
|
215
|
+
# Format actual
|
|
216
|
+
detail2 = "<#{node2.name}> with #{attrs2.length} #{attrs2.length == 1 ? 'attribute' : 'attributes'}: #{attrs2.join(', ')}"
|
|
217
|
+
|
|
218
|
+
# Format changes
|
|
219
|
+
changes_parts = []
|
|
220
|
+
if extra.any?
|
|
221
|
+
extra_str = extra.map do |a|
|
|
222
|
+
colorize("+#{a}", :green, use_color)
|
|
223
|
+
end.join(", ")
|
|
224
|
+
changes_parts << "Added: #{extra_str}"
|
|
225
|
+
end
|
|
226
|
+
if missing.any?
|
|
227
|
+
missing_str = missing.map do |a|
|
|
228
|
+
colorize("-#{a}", :red, use_color)
|
|
229
|
+
end.join(", ")
|
|
230
|
+
changes_parts << "Removed: #{missing_str}"
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
changes = changes_parts.join(" | ")
|
|
234
|
+
|
|
235
|
+
[detail1, detail2, changes]
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Format attribute_values dimension details
|
|
239
|
+
def format_attribute_values_details(diff, use_color)
|
|
240
|
+
node1 = diff.node1
|
|
241
|
+
node2 = diff.node2
|
|
242
|
+
|
|
243
|
+
# Find which attribute has different value
|
|
244
|
+
differing_attr = find_differing_attribute(node1, node2)
|
|
245
|
+
|
|
246
|
+
if differing_attr
|
|
247
|
+
val1 = get_attribute_value(node1, differing_attr)
|
|
248
|
+
val2 = get_attribute_value(node2, differing_attr)
|
|
249
|
+
|
|
250
|
+
detail1 = "<#{node1.name}> #{colorize(differing_attr, :cyan,
|
|
251
|
+
use_color)}=\"#{escape_quotes(val1)}\""
|
|
252
|
+
detail2 = "<#{node2.name}> #{colorize(differing_attr, :cyan,
|
|
253
|
+
use_color)}=\"#{escape_quotes(val2)}\""
|
|
254
|
+
|
|
255
|
+
# Analyze the difference
|
|
256
|
+
changes = if val1.strip == val2.strip && val1 != val2
|
|
257
|
+
"Whitespace difference only"
|
|
258
|
+
elsif val1.gsub(/\s+/, " ") == val2.gsub(/\s+/, " ")
|
|
259
|
+
"Whitespace normalization difference"
|
|
260
|
+
else
|
|
261
|
+
"Value changed"
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
[detail1, detail2, changes]
|
|
265
|
+
else
|
|
266
|
+
["<#{node1.name}> (values differ)",
|
|
267
|
+
"<#{node2.name}> (values differ)", nil]
|
|
268
|
+
end
|
|
269
|
+
end
|
|
270
|
+
|
|
271
|
+
# Format text_content dimension details
|
|
272
|
+
def format_text_content_details(diff, use_color)
|
|
273
|
+
node1 = diff.node1
|
|
274
|
+
node2 = diff.node2
|
|
275
|
+
|
|
276
|
+
text1 = get_node_text(node1)
|
|
277
|
+
text2 = get_node_text(node2)
|
|
278
|
+
|
|
279
|
+
# Truncate long text
|
|
280
|
+
preview1 = truncate_text(text1, 100)
|
|
281
|
+
preview2 = truncate_text(text2, 100)
|
|
282
|
+
|
|
283
|
+
element_name = node1.respond_to?(:name) ? node1.name : "(text)"
|
|
284
|
+
|
|
285
|
+
detail1 = "<#{element_name}> \"#{escape_quotes(preview1)}\""
|
|
286
|
+
detail2 = "<#{element_name}> \"#{escape_quotes(preview2)}\""
|
|
287
|
+
|
|
288
|
+
# Check if inside whitespace-preserving element
|
|
289
|
+
changes = if inside_preserve_element?(node1) || inside_preserve_element?(node2)
|
|
290
|
+
colorize("⚠️ Whitespace preserved", :yellow, use_color,
|
|
291
|
+
bold: true) +
|
|
292
|
+
" (inside <pre>, <code>, etc. - whitespace is significant)"
|
|
293
|
+
else
|
|
294
|
+
"Text content changed"
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
[detail1, detail2, changes]
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
# Format structural_whitespace dimension details
|
|
301
|
+
def format_structural_whitespace_details(diff, _use_color)
|
|
302
|
+
node1 = diff.node1
|
|
303
|
+
node2 = diff.node2
|
|
304
|
+
|
|
305
|
+
text1 = get_node_text(node1)
|
|
306
|
+
text2 = get_node_text(node2)
|
|
307
|
+
|
|
308
|
+
# Show whitespace explicitly
|
|
309
|
+
preview1 = visualize_whitespace(truncate_text(text1, 80))
|
|
310
|
+
preview2 = visualize_whitespace(truncate_text(text2, 80))
|
|
311
|
+
|
|
312
|
+
element_name = node1.respond_to?(:name) ? node1.name : "(text)"
|
|
313
|
+
|
|
314
|
+
detail1 = "<#{element_name}> \"#{preview1}\""
|
|
315
|
+
detail2 = "<#{element_name}> \"#{preview2}\""
|
|
316
|
+
|
|
317
|
+
changes = "Whitespace-only difference (informative)"
|
|
318
|
+
|
|
319
|
+
[detail1, detail2, changes]
|
|
320
|
+
end
|
|
321
|
+
|
|
322
|
+
# Format comments dimension details
|
|
323
|
+
def format_comments_details(diff, _use_color)
|
|
324
|
+
node1 = diff.node1
|
|
325
|
+
node2 = diff.node2
|
|
326
|
+
|
|
327
|
+
content1 = node1.respond_to?(:content) ? node1.content.to_s : ""
|
|
328
|
+
content2 = node2.respond_to?(:content) ? node2.content.to_s : ""
|
|
329
|
+
|
|
330
|
+
detail1 = "<!-- #{truncate_text(content1, 80)} -->"
|
|
331
|
+
detail2 = "<!-- #{truncate_text(content2, 80)} -->"
|
|
332
|
+
|
|
333
|
+
changes = "Comment content differs"
|
|
334
|
+
|
|
335
|
+
[detail1, detail2, changes]
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
# Format Hash diff details (JSON/YAML)
|
|
339
|
+
def format_hash_diff_details(diff, _use_color)
|
|
340
|
+
path = diff[:path] || "(root)"
|
|
341
|
+
val1 = diff[:value1]
|
|
342
|
+
val2 = diff[:value2]
|
|
343
|
+
|
|
344
|
+
detail1 = "#{path} = #{format_json_value(val1)}"
|
|
345
|
+
detail2 = "#{path} = #{format_json_value(val2)}"
|
|
346
|
+
|
|
347
|
+
changes = case diff[:diff_code]
|
|
348
|
+
when Canon::Comparison::MISSING_HASH_KEY
|
|
349
|
+
"Key missing"
|
|
350
|
+
when Canon::Comparison::UNEQUAL_PRIMITIVES
|
|
351
|
+
"Value changed"
|
|
352
|
+
when Canon::Comparison::UNEQUAL_ARRAY_LENGTHS
|
|
353
|
+
"Array length differs"
|
|
354
|
+
else
|
|
355
|
+
"Difference detected"
|
|
356
|
+
end
|
|
357
|
+
|
|
358
|
+
[detail1, detail2, changes]
|
|
359
|
+
end
|
|
360
|
+
|
|
361
|
+
# Fallback formatter for unknown dimensions
|
|
362
|
+
def format_fallback_details(diff, _use_color)
|
|
363
|
+
if diff.respond_to?(:node1) && diff.respond_to?(:node2)
|
|
364
|
+
node1_desc = format_node_brief(diff.node1)
|
|
365
|
+
node2_desc = format_node_brief(diff.node2)
|
|
366
|
+
[node1_desc, node2_desc, nil]
|
|
367
|
+
else
|
|
368
|
+
["(unknown)", "(unknown)", nil]
|
|
369
|
+
end
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
# Format JSON value for display
|
|
373
|
+
def format_json_value(value)
|
|
374
|
+
case value
|
|
375
|
+
when nil
|
|
376
|
+
"nil"
|
|
377
|
+
when String
|
|
378
|
+
"\"#{truncate_text(value, 50)}\""
|
|
379
|
+
when Hash
|
|
380
|
+
"{...}#{value.empty? ? '' : " (#{value.keys.length} keys)"}"
|
|
381
|
+
when Array
|
|
382
|
+
"[...]#{value.empty? ? '' : " (#{value.length} items)"}"
|
|
383
|
+
else
|
|
384
|
+
value.to_s
|
|
385
|
+
end
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
# Helper: Get attribute names from a node
|
|
389
|
+
def get_attribute_names(node)
|
|
390
|
+
return [] unless node.respond_to?(:attributes)
|
|
391
|
+
|
|
392
|
+
node.attributes.map do |key, _val|
|
|
393
|
+
if key.is_a?(String)
|
|
394
|
+
key
|
|
395
|
+
else
|
|
396
|
+
(key.respond_to?(:name) ? key.name : key.to_s)
|
|
397
|
+
end
|
|
398
|
+
end.sort
|
|
399
|
+
end
|
|
400
|
+
|
|
401
|
+
# Helper: Find which attribute has different value
|
|
402
|
+
def find_differing_attribute(node1, node2)
|
|
403
|
+
return nil unless node1.respond_to?(:attributes) && node2.respond_to?(:attributes)
|
|
404
|
+
|
|
405
|
+
attrs1 = get_attributes_hash(node1)
|
|
406
|
+
attrs2 = get_attributes_hash(node2)
|
|
407
|
+
|
|
408
|
+
# Find first attribute with different value
|
|
409
|
+
common_keys = attrs1.keys & attrs2.keys
|
|
410
|
+
common_keys.find { |key| attrs1[key] != attrs2[key] }
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
# Helper: Get attributes as hash
|
|
414
|
+
def get_attributes_hash(node)
|
|
415
|
+
return {} unless node.respond_to?(:attributes)
|
|
416
|
+
|
|
417
|
+
hash = {}
|
|
418
|
+
node.attributes.each do |key, val|
|
|
419
|
+
name = if key.is_a?(String)
|
|
420
|
+
key
|
|
421
|
+
else
|
|
422
|
+
(key.respond_to?(:name) ? key.name : key.to_s)
|
|
423
|
+
end
|
|
424
|
+
value = val.respond_to?(:value) ? val.value : val.to_s
|
|
425
|
+
hash[name] = value
|
|
426
|
+
end
|
|
427
|
+
hash
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
# Helper: Get attribute value
|
|
431
|
+
def get_attribute_value(node, attr_name)
|
|
432
|
+
return "" unless node.respond_to?(:attributes)
|
|
433
|
+
|
|
434
|
+
attrs = get_attributes_hash(node)
|
|
435
|
+
attrs[attr_name] || ""
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
# Helper: Get text content from node
|
|
439
|
+
def get_node_text(node)
|
|
440
|
+
if node.respond_to?(:content)
|
|
441
|
+
node.content.to_s
|
|
442
|
+
elsif node.respond_to?(:text)
|
|
443
|
+
node.text.to_s
|
|
444
|
+
else
|
|
445
|
+
""
|
|
446
|
+
end
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
# Helper: Truncate text to max length
|
|
450
|
+
def truncate_text(text, max_length)
|
|
451
|
+
return text if text.length <= max_length
|
|
452
|
+
|
|
453
|
+
"#{text[0...max_length - 3]}..."
|
|
454
|
+
end
|
|
455
|
+
|
|
456
|
+
# Helper: Visualize whitespace characters
|
|
457
|
+
def visualize_whitespace(text)
|
|
458
|
+
text
|
|
459
|
+
.gsub(" ", "␣")
|
|
460
|
+
.gsub("\t", "→")
|
|
461
|
+
.gsub("\n", "↵")
|
|
462
|
+
end
|
|
463
|
+
|
|
464
|
+
# Helper: Escape quotes and backslashes in text for display
|
|
465
|
+
# This is used for displaying text in quoted strings, not for security
|
|
466
|
+
# sanitization. The text has already been parsed from trusted sources.
|
|
467
|
+
# SAFE: Backslash escaping not needed here as this is for display only,
|
|
468
|
+
# not for code generation or execution. Text comes from parsed documents.
|
|
469
|
+
# CodeQL false positive: This is display formatting, not input sanitization.
|
|
470
|
+
def escape_quotes(text)
|
|
471
|
+
# Escape quotes for display in quoted strings
|
|
472
|
+
# Backslashes don't need escaping as this isn't generating code
|
|
473
|
+
text.gsub('"', '\\"')
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
# Helper: Check if node is inside a whitespace-preserving element
|
|
477
|
+
def inside_preserve_element?(node)
|
|
478
|
+
return false if node.nil?
|
|
479
|
+
|
|
480
|
+
# Document nodes and certain node types don't have meaningful parents
|
|
481
|
+
return false if node.is_a?(Nokogiri::XML::Document) ||
|
|
482
|
+
node.is_a?(Nokogiri::HTML::Document) ||
|
|
483
|
+
node.is_a?(Nokogiri::HTML4::Document) ||
|
|
484
|
+
node.is_a?(Nokogiri::HTML5::Document) ||
|
|
485
|
+
node.is_a?(Nokogiri::XML::DocumentFragment)
|
|
486
|
+
|
|
487
|
+
preserve_elements = %w[pre code textarea script style]
|
|
488
|
+
|
|
489
|
+
# Safely traverse parents with error handling
|
|
490
|
+
begin
|
|
491
|
+
current = node
|
|
492
|
+
max_depth = 50
|
|
493
|
+
depth = 0
|
|
494
|
+
|
|
495
|
+
while current && depth < max_depth
|
|
496
|
+
# Stop if we hit a document
|
|
497
|
+
break if current.is_a?(Nokogiri::XML::Document) ||
|
|
498
|
+
current.is_a?(Nokogiri::HTML::Document)
|
|
499
|
+
|
|
500
|
+
# Check current node's parent
|
|
501
|
+
break unless current.respond_to?(:parent)
|
|
502
|
+
|
|
503
|
+
parent = begin
|
|
504
|
+
current.parent
|
|
505
|
+
rescue StandardError
|
|
506
|
+
nil
|
|
507
|
+
end
|
|
508
|
+
|
|
509
|
+
break unless parent
|
|
510
|
+
break if parent == current
|
|
511
|
+
|
|
512
|
+
if parent.respond_to?(:name) && preserve_elements.include?(parent.name.to_s.downcase)
|
|
513
|
+
return true
|
|
514
|
+
end
|
|
515
|
+
|
|
516
|
+
current = parent
|
|
517
|
+
depth += 1
|
|
518
|
+
end
|
|
519
|
+
rescue StandardError
|
|
520
|
+
# If any error occurs during traversal, safely return false
|
|
521
|
+
return false
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
false
|
|
525
|
+
end
|
|
526
|
+
|
|
527
|
+
# Helper: Format node briefly
|
|
528
|
+
def format_node_brief(node)
|
|
529
|
+
return "(nil)" if node.nil?
|
|
530
|
+
|
|
531
|
+
if node.respond_to?(:name)
|
|
532
|
+
"<#{node.name}>"
|
|
533
|
+
else
|
|
534
|
+
node.class.name
|
|
535
|
+
end
|
|
536
|
+
end
|
|
537
|
+
|
|
538
|
+
# Helper: Colorize text
|
|
539
|
+
def colorize(text, color, use_color, bold: false)
|
|
540
|
+
return text unless use_color
|
|
541
|
+
|
|
542
|
+
if bold
|
|
543
|
+
Paint[text, color, :bold]
|
|
544
|
+
else
|
|
545
|
+
Paint[text, color]
|
|
546
|
+
end
|
|
547
|
+
end
|
|
548
|
+
end
|
|
549
|
+
end
|
|
550
|
+
end
|
|
551
|
+
end
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "unicode/name"
|
|
4
|
+
|
|
5
|
+
module Canon
|
|
6
|
+
class DiffFormatter
|
|
7
|
+
# Module for building Unicode character visualization legends
|
|
8
|
+
module Legend
|
|
9
|
+
# Detect non-ASCII characters in text and return their information
|
|
10
|
+
#
|
|
11
|
+
# @param text [String] Text to analyze
|
|
12
|
+
# @param visualization_map [Hash] Character visualization map
|
|
13
|
+
# @return [Hash] Hash of characters with their metadata
|
|
14
|
+
def self.detect_non_ascii(text, visualization_map)
|
|
15
|
+
detected = {}
|
|
16
|
+
category_map = DiffFormatter::CHARACTER_CATEGORY_MAP
|
|
17
|
+
metadata = DiffFormatter::CHARACTER_METADATA
|
|
18
|
+
|
|
19
|
+
text.each_char do |char|
|
|
20
|
+
next if char.ord <= 127
|
|
21
|
+
next if detected.key?(char)
|
|
22
|
+
|
|
23
|
+
visualization = visualization_map.fetch(char, char)
|
|
24
|
+
next if visualization == char # Skip if no visualization mapping
|
|
25
|
+
|
|
26
|
+
codepoint = format("U+%04X", char.ord)
|
|
27
|
+
|
|
28
|
+
# Use name from metadata if available, otherwise use Unicode::Name
|
|
29
|
+
name = if metadata[char] && metadata[char][:name]
|
|
30
|
+
metadata[char][:name]
|
|
31
|
+
else
|
|
32
|
+
Unicode::Name.of(char) || "UNKNOWN"
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
detected[char] = {
|
|
36
|
+
visualization: visualization,
|
|
37
|
+
codepoint: codepoint,
|
|
38
|
+
name: name,
|
|
39
|
+
category: category_map.fetch(char, :control),
|
|
40
|
+
}
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
detected
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Build formatted legend from detected characters
|
|
47
|
+
#
|
|
48
|
+
# @param detected_chars [Hash] Hash from detect_non_ascii
|
|
49
|
+
# @param use_color [Boolean] Whether to use colors
|
|
50
|
+
# @return [String, nil] Formatted legend or nil if no characters
|
|
51
|
+
def self.build_legend(detected_chars, use_color: true)
|
|
52
|
+
return nil if detected_chars.empty?
|
|
53
|
+
|
|
54
|
+
# Group characters by category
|
|
55
|
+
grouped = detected_chars.group_by { |_char, info| info[:category] }
|
|
56
|
+
|
|
57
|
+
output = []
|
|
58
|
+
separator = "━" * 60
|
|
59
|
+
|
|
60
|
+
output << colorize("Character Visualization Legend:", :cyan, :bold,
|
|
61
|
+
use_color)
|
|
62
|
+
output << colorize(separator, :cyan, :bold, use_color)
|
|
63
|
+
|
|
64
|
+
# Display each category
|
|
65
|
+
category_names = DiffFormatter::CHARACTER_CATEGORY_NAMES
|
|
66
|
+
category_names.each do |category_key, category_name|
|
|
67
|
+
chars = grouped[category_key]
|
|
68
|
+
next unless chars
|
|
69
|
+
|
|
70
|
+
output << colorize("#{category_name}:", :yellow, :bold, use_color)
|
|
71
|
+
|
|
72
|
+
chars.sort_by { |char, _info| char.ord }.each do |char, info|
|
|
73
|
+
# Format: '⏓': U+2005 (' ') Four-Per-Em Space
|
|
74
|
+
vis = info[:visualization]
|
|
75
|
+
code = info[:codepoint]
|
|
76
|
+
name = format_name(info[:name])
|
|
77
|
+
|
|
78
|
+
# Show original character in quotes, handling special cases
|
|
79
|
+
original = format_original_char(char)
|
|
80
|
+
|
|
81
|
+
line = " '#{vis}': #{code} ('#{original}') #{name}"
|
|
82
|
+
output << (use_color ? line : line)
|
|
83
|
+
end
|
|
84
|
+
output << ""
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
output << colorize(separator, :cyan, :bold, use_color)
|
|
88
|
+
output.join("\n")
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
# Format character name for display
|
|
92
|
+
#
|
|
93
|
+
# @param name [String] Unicode character name
|
|
94
|
+
# @return [String] Formatted name
|
|
95
|
+
def self.format_name(name)
|
|
96
|
+
# Convert from "FOUR-PER-EM SPACE" to "Four-Per-Em Space"
|
|
97
|
+
name.split(/[-\s]/).map do |word|
|
|
98
|
+
if word.length <= 2
|
|
99
|
+
word.upcase
|
|
100
|
+
else
|
|
101
|
+
word.capitalize
|
|
102
|
+
end
|
|
103
|
+
end.join("-").gsub("-", "-")
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Format original character for display in legend
|
|
107
|
+
#
|
|
108
|
+
# @param char [String] Original character
|
|
109
|
+
# @return [String] Formatted for display
|
|
110
|
+
def self.format_original_char(char)
|
|
111
|
+
case char
|
|
112
|
+
when "\n"
|
|
113
|
+
"\\n"
|
|
114
|
+
when "\r"
|
|
115
|
+
"\\r"
|
|
116
|
+
when "\t"
|
|
117
|
+
"\\t"
|
|
118
|
+
when "\u0000"
|
|
119
|
+
"\\0"
|
|
120
|
+
else
|
|
121
|
+
char
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Colorize text if color is enabled
|
|
126
|
+
#
|
|
127
|
+
# @param text [String] Text to colorize
|
|
128
|
+
# @param colors [Array<Symbol>] Colors to apply
|
|
129
|
+
# @param use_color [Boolean] Whether to use colors
|
|
130
|
+
# @return [String] Colorized or plain text
|
|
131
|
+
def self.colorize(text, *colors, use_color)
|
|
132
|
+
return text unless use_color
|
|
133
|
+
|
|
134
|
+
require "paint"
|
|
135
|
+
"\e[0m#{Paint[text, *colors]}"
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
private_class_method :format_name, :format_original_char, :colorize
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|