canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,551 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "paint"
4
+
5
+ module Canon
6
+ class DiffFormatter
7
+ # Formats dimension-specific detail for individual differences
8
+ # Provides actionable, colorized output showing exactly what changed
9
+ module DiffDetailFormatter
10
+ class << self
11
+ # Format all differences as a semantic diff report
12
+ #
13
+ # @param differences [Array<DiffNode>] Array of differences
14
+ # @param use_color [Boolean] Whether to use colors
15
+ # @return [String] Formatted semantic diff report
16
+ def format_report(differences, use_color: true)
17
+ return "" if differences.empty?
18
+
19
+ output = []
20
+ output << ""
21
+ output << colorize("=" * 70, :cyan, use_color, bold: true)
22
+ output << colorize(
23
+ " SEMANTIC DIFF REPORT (#{differences.length} #{differences.length == 1 ? 'difference' : 'differences'})", :cyan, use_color, bold: true
24
+ )
25
+ output << colorize("=" * 70, :cyan, use_color, bold: true)
26
+
27
+ differences.each_with_index do |diff, i|
28
+ output << ""
29
+ output << format_single_diff(diff, i + 1, differences.length,
30
+ use_color)
31
+ end
32
+
33
+ output << ""
34
+ output << colorize("=" * 70, :cyan, use_color, bold: true)
35
+ output << ""
36
+
37
+ output.join("\n")
38
+ end
39
+
40
+ private
41
+
42
+ # Format a single difference with dimension-specific details
43
+ def format_single_diff(diff, number, total, use_color)
44
+ output = []
45
+
46
+ # Header - handle both DiffNode and Hash
47
+ status = if diff.respond_to?(:normative?)
48
+ diff.normative? ? "NORMATIVE" : "INFORMATIVE"
49
+ else
50
+ "NORMATIVE" # Hash diffs are always normative
51
+ end
52
+ status_color = status == "NORMATIVE" ? :green : :yellow
53
+ output << colorize("🔍 DIFFERENCE ##{number}/#{total} [#{status}]",
54
+ status_color, use_color, bold: true)
55
+ output << colorize("─" * 70, :cyan, use_color)
56
+
57
+ # Dimension - handle both DiffNode and Hash
58
+ dimension = if diff.respond_to?(:dimension)
59
+ diff.dimension
60
+ elsif diff.is_a?(Hash)
61
+ diff[:diff_code] || diff[:dimension] || "unknown"
62
+ else
63
+ "unknown"
64
+ end
65
+ output << "#{colorize('Dimension:', :cyan, use_color,
66
+ bold: true)} #{colorize(dimension.to_s,
67
+ :magenta, use_color)}"
68
+
69
+ # Location (XPath for XML/HTML, Path for JSON/YAML)
70
+ location = extract_location(diff)
71
+ output << "#{colorize('Location:', :cyan, use_color,
72
+ bold: true)} #{colorize(location, :blue,
73
+ use_color)}"
74
+ output << ""
75
+
76
+ # Dimension-specific details
77
+ detail1, detail2, changes = format_dimension_details(diff,
78
+ use_color)
79
+
80
+ output << colorize("⊖ Expected (File 1):", :red, use_color,
81
+ bold: true)
82
+ output << " #{detail1}"
83
+ output << ""
84
+ output << colorize("⊕ Actual (File 2):", :green, use_color,
85
+ bold: true)
86
+ output << " #{detail2}"
87
+
88
+ if changes && !changes.empty?
89
+ output << ""
90
+ output << colorize("✨ Changes:", :yellow, use_color, bold: true)
91
+ output << " #{changes}"
92
+ end
93
+
94
+ output.join("\n")
95
+ rescue StandardError => e
96
+ # Safe fallback if formatting fails
97
+ colorize(
98
+ "🔍 DIFFERENCE ##{number}/#{total} [Error formatting: #{e.message}]", :red, use_color, bold: true
99
+ )
100
+ end
101
+
102
+ # Extract XPath or JSON path for the difference location
103
+ def extract_location(diff)
104
+ # For Hash diffs (JSON/YAML)
105
+ if diff.is_a?(Hash)
106
+ return diff[:path] || "(root)"
107
+ end
108
+
109
+ # For DiffNode (XML/HTML)
110
+ node = diff.respond_to?(:node1) ? (diff.node1 || diff.node2) : nil
111
+
112
+ # For XML/HTML element nodes
113
+ if node.respond_to?(:name)
114
+ return extract_xpath(node)
115
+ end
116
+
117
+ # Fallback
118
+ if diff.respond_to?(:dimension)
119
+ diff.dimension.to_s
120
+ else
121
+ "(unknown)"
122
+ end
123
+ end
124
+
125
+ # Extract XPath from an XML/HTML node
126
+ def extract_xpath(node)
127
+ return "/" if node.nil?
128
+
129
+ # Document nodes don't have meaningful XPaths
130
+ if node.is_a?(Nokogiri::XML::Document) ||
131
+ node.is_a?(Nokogiri::HTML::Document) ||
132
+ node.is_a?(Nokogiri::HTML4::Document) ||
133
+ node.is_a?(Nokogiri::HTML5::Document)
134
+ return "/"
135
+ end
136
+
137
+ parts = []
138
+ current = node
139
+ max_depth = 100
140
+ depth = 0
141
+
142
+ begin
143
+ while current.respond_to?(:name) && current.name && depth < max_depth
144
+ # Stop at document-level nodes
145
+ break if ["document", "#document"].include?(current.name)
146
+ break if current.is_a?(Nokogiri::XML::Document) ||
147
+ current.is_a?(Nokogiri::HTML::Document)
148
+
149
+ parts.unshift(current.name)
150
+
151
+ # Move to parent safely
152
+ break unless current.respond_to?(:parent)
153
+
154
+ parent = begin
155
+ current.parent
156
+ rescue StandardError
157
+ nil
158
+ end
159
+
160
+ break unless parent
161
+ break if parent == current
162
+
163
+ current = parent
164
+ depth += 1
165
+ end
166
+ rescue StandardError
167
+ # If any error, return what we have
168
+ return "/#{parts.join('/')}"
169
+ end
170
+
171
+ "/#{parts.join('/')}"
172
+ end
173
+
174
+ # Format details based on dimension type
175
+ def format_dimension_details(diff, use_color)
176
+ # Handle Hash diffs (JSON/YAML)
177
+ if diff.is_a?(Hash)
178
+ return format_hash_diff_details(diff, use_color)
179
+ end
180
+
181
+ # Handle DiffNode (XML/HTML)
182
+ dimension = diff.respond_to?(:dimension) ? diff.dimension : nil
183
+
184
+ case dimension
185
+ when :attribute_presence
186
+ format_attribute_presence_details(diff, use_color)
187
+ when :attribute_values
188
+ format_attribute_values_details(diff, use_color)
189
+ when :text_content
190
+ format_text_content_details(diff, use_color)
191
+ when :structural_whitespace
192
+ format_structural_whitespace_details(diff, use_color)
193
+ when :comments
194
+ format_comments_details(diff, use_color)
195
+ else
196
+ format_fallback_details(diff, use_color)
197
+ end
198
+ end
199
+
200
+ # Format attribute_presence dimension details
201
+ def format_attribute_presence_details(diff, use_color)
202
+ node1 = diff.node1
203
+ node2 = diff.node2
204
+
205
+ attrs1 = get_attribute_names(node1)
206
+ attrs2 = get_attribute_names(node2)
207
+
208
+ attrs1 & attrs2
209
+ missing = attrs1 - attrs2 # In node1 but not node2
210
+ extra = attrs2 - attrs1 # In node2 but not node1
211
+
212
+ # Format expected
213
+ detail1 = "<#{node1.name}> with #{attrs1.length} #{attrs1.length == 1 ? 'attribute' : 'attributes'}: #{attrs1.join(', ')}"
214
+
215
+ # Format actual
216
+ detail2 = "<#{node2.name}> with #{attrs2.length} #{attrs2.length == 1 ? 'attribute' : 'attributes'}: #{attrs2.join(', ')}"
217
+
218
+ # Format changes
219
+ changes_parts = []
220
+ if extra.any?
221
+ extra_str = extra.map do |a|
222
+ colorize("+#{a}", :green, use_color)
223
+ end.join(", ")
224
+ changes_parts << "Added: #{extra_str}"
225
+ end
226
+ if missing.any?
227
+ missing_str = missing.map do |a|
228
+ colorize("-#{a}", :red, use_color)
229
+ end.join(", ")
230
+ changes_parts << "Removed: #{missing_str}"
231
+ end
232
+
233
+ changes = changes_parts.join(" | ")
234
+
235
+ [detail1, detail2, changes]
236
+ end
237
+
238
+ # Format attribute_values dimension details
239
+ def format_attribute_values_details(diff, use_color)
240
+ node1 = diff.node1
241
+ node2 = diff.node2
242
+
243
+ # Find which attribute has different value
244
+ differing_attr = find_differing_attribute(node1, node2)
245
+
246
+ if differing_attr
247
+ val1 = get_attribute_value(node1, differing_attr)
248
+ val2 = get_attribute_value(node2, differing_attr)
249
+
250
+ detail1 = "<#{node1.name}> #{colorize(differing_attr, :cyan,
251
+ use_color)}=\"#{escape_quotes(val1)}\""
252
+ detail2 = "<#{node2.name}> #{colorize(differing_attr, :cyan,
253
+ use_color)}=\"#{escape_quotes(val2)}\""
254
+
255
+ # Analyze the difference
256
+ changes = if val1.strip == val2.strip && val1 != val2
257
+ "Whitespace difference only"
258
+ elsif val1.gsub(/\s+/, " ") == val2.gsub(/\s+/, " ")
259
+ "Whitespace normalization difference"
260
+ else
261
+ "Value changed"
262
+ end
263
+
264
+ [detail1, detail2, changes]
265
+ else
266
+ ["<#{node1.name}> (values differ)",
267
+ "<#{node2.name}> (values differ)", nil]
268
+ end
269
+ end
270
+
271
+ # Format text_content dimension details
272
+ def format_text_content_details(diff, use_color)
273
+ node1 = diff.node1
274
+ node2 = diff.node2
275
+
276
+ text1 = get_node_text(node1)
277
+ text2 = get_node_text(node2)
278
+
279
+ # Truncate long text
280
+ preview1 = truncate_text(text1, 100)
281
+ preview2 = truncate_text(text2, 100)
282
+
283
+ element_name = node1.respond_to?(:name) ? node1.name : "(text)"
284
+
285
+ detail1 = "<#{element_name}> \"#{escape_quotes(preview1)}\""
286
+ detail2 = "<#{element_name}> \"#{escape_quotes(preview2)}\""
287
+
288
+ # Check if inside whitespace-preserving element
289
+ changes = if inside_preserve_element?(node1) || inside_preserve_element?(node2)
290
+ colorize("⚠️ Whitespace preserved", :yellow, use_color,
291
+ bold: true) +
292
+ " (inside <pre>, <code>, etc. - whitespace is significant)"
293
+ else
294
+ "Text content changed"
295
+ end
296
+
297
+ [detail1, detail2, changes]
298
+ end
299
+
300
+ # Format structural_whitespace dimension details
301
+ def format_structural_whitespace_details(diff, _use_color)
302
+ node1 = diff.node1
303
+ node2 = diff.node2
304
+
305
+ text1 = get_node_text(node1)
306
+ text2 = get_node_text(node2)
307
+
308
+ # Show whitespace explicitly
309
+ preview1 = visualize_whitespace(truncate_text(text1, 80))
310
+ preview2 = visualize_whitespace(truncate_text(text2, 80))
311
+
312
+ element_name = node1.respond_to?(:name) ? node1.name : "(text)"
313
+
314
+ detail1 = "<#{element_name}> \"#{preview1}\""
315
+ detail2 = "<#{element_name}> \"#{preview2}\""
316
+
317
+ changes = "Whitespace-only difference (informative)"
318
+
319
+ [detail1, detail2, changes]
320
+ end
321
+
322
+ # Format comments dimension details
323
+ def format_comments_details(diff, _use_color)
324
+ node1 = diff.node1
325
+ node2 = diff.node2
326
+
327
+ content1 = node1.respond_to?(:content) ? node1.content.to_s : ""
328
+ content2 = node2.respond_to?(:content) ? node2.content.to_s : ""
329
+
330
+ detail1 = "<!-- #{truncate_text(content1, 80)} -->"
331
+ detail2 = "<!-- #{truncate_text(content2, 80)} -->"
332
+
333
+ changes = "Comment content differs"
334
+
335
+ [detail1, detail2, changes]
336
+ end
337
+
338
+ # Format Hash diff details (JSON/YAML)
339
+ def format_hash_diff_details(diff, _use_color)
340
+ path = diff[:path] || "(root)"
341
+ val1 = diff[:value1]
342
+ val2 = diff[:value2]
343
+
344
+ detail1 = "#{path} = #{format_json_value(val1)}"
345
+ detail2 = "#{path} = #{format_json_value(val2)}"
346
+
347
+ changes = case diff[:diff_code]
348
+ when Canon::Comparison::MISSING_HASH_KEY
349
+ "Key missing"
350
+ when Canon::Comparison::UNEQUAL_PRIMITIVES
351
+ "Value changed"
352
+ when Canon::Comparison::UNEQUAL_ARRAY_LENGTHS
353
+ "Array length differs"
354
+ else
355
+ "Difference detected"
356
+ end
357
+
358
+ [detail1, detail2, changes]
359
+ end
360
+
361
+ # Fallback formatter for unknown dimensions
362
+ def format_fallback_details(diff, _use_color)
363
+ if diff.respond_to?(:node1) && diff.respond_to?(:node2)
364
+ node1_desc = format_node_brief(diff.node1)
365
+ node2_desc = format_node_brief(diff.node2)
366
+ [node1_desc, node2_desc, nil]
367
+ else
368
+ ["(unknown)", "(unknown)", nil]
369
+ end
370
+ end
371
+
372
+ # Format JSON value for display
373
+ def format_json_value(value)
374
+ case value
375
+ when nil
376
+ "nil"
377
+ when String
378
+ "\"#{truncate_text(value, 50)}\""
379
+ when Hash
380
+ "{...}#{value.empty? ? '' : " (#{value.keys.length} keys)"}"
381
+ when Array
382
+ "[...]#{value.empty? ? '' : " (#{value.length} items)"}"
383
+ else
384
+ value.to_s
385
+ end
386
+ end
387
+
388
+ # Helper: Get attribute names from a node
389
+ def get_attribute_names(node)
390
+ return [] unless node.respond_to?(:attributes)
391
+
392
+ node.attributes.map do |key, _val|
393
+ if key.is_a?(String)
394
+ key
395
+ else
396
+ (key.respond_to?(:name) ? key.name : key.to_s)
397
+ end
398
+ end.sort
399
+ end
400
+
401
+ # Helper: Find which attribute has different value
402
+ def find_differing_attribute(node1, node2)
403
+ return nil unless node1.respond_to?(:attributes) && node2.respond_to?(:attributes)
404
+
405
+ attrs1 = get_attributes_hash(node1)
406
+ attrs2 = get_attributes_hash(node2)
407
+
408
+ # Find first attribute with different value
409
+ common_keys = attrs1.keys & attrs2.keys
410
+ common_keys.find { |key| attrs1[key] != attrs2[key] }
411
+ end
412
+
413
+ # Helper: Get attributes as hash
414
+ def get_attributes_hash(node)
415
+ return {} unless node.respond_to?(:attributes)
416
+
417
+ hash = {}
418
+ node.attributes.each do |key, val|
419
+ name = if key.is_a?(String)
420
+ key
421
+ else
422
+ (key.respond_to?(:name) ? key.name : key.to_s)
423
+ end
424
+ value = val.respond_to?(:value) ? val.value : val.to_s
425
+ hash[name] = value
426
+ end
427
+ hash
428
+ end
429
+
430
+ # Helper: Get attribute value
431
+ def get_attribute_value(node, attr_name)
432
+ return "" unless node.respond_to?(:attributes)
433
+
434
+ attrs = get_attributes_hash(node)
435
+ attrs[attr_name] || ""
436
+ end
437
+
438
+ # Helper: Get text content from node
439
+ def get_node_text(node)
440
+ if node.respond_to?(:content)
441
+ node.content.to_s
442
+ elsif node.respond_to?(:text)
443
+ node.text.to_s
444
+ else
445
+ ""
446
+ end
447
+ end
448
+
449
+ # Helper: Truncate text to max length
450
+ def truncate_text(text, max_length)
451
+ return text if text.length <= max_length
452
+
453
+ "#{text[0...max_length - 3]}..."
454
+ end
455
+
456
+ # Helper: Visualize whitespace characters
457
+ def visualize_whitespace(text)
458
+ text
459
+ .gsub(" ", "␣")
460
+ .gsub("\t", "→")
461
+ .gsub("\n", "↵")
462
+ end
463
+
464
+ # Helper: Escape quotes and backslashes in text for display
465
+ # This is used for displaying text in quoted strings, not for security
466
+ # sanitization. The text has already been parsed from trusted sources.
467
+ # SAFE: Backslash escaping not needed here as this is for display only,
468
+ # not for code generation or execution. Text comes from parsed documents.
469
+ # CodeQL false positive: This is display formatting, not input sanitization.
470
+ def escape_quotes(text)
471
+ # Escape quotes for display in quoted strings
472
+ # Backslashes don't need escaping as this isn't generating code
473
+ text.gsub('"', '\\"')
474
+ end
475
+
476
+ # Helper: Check if node is inside a whitespace-preserving element
477
+ def inside_preserve_element?(node)
478
+ return false if node.nil?
479
+
480
+ # Document nodes and certain node types don't have meaningful parents
481
+ return false if node.is_a?(Nokogiri::XML::Document) ||
482
+ node.is_a?(Nokogiri::HTML::Document) ||
483
+ node.is_a?(Nokogiri::HTML4::Document) ||
484
+ node.is_a?(Nokogiri::HTML5::Document) ||
485
+ node.is_a?(Nokogiri::XML::DocumentFragment)
486
+
487
+ preserve_elements = %w[pre code textarea script style]
488
+
489
+ # Safely traverse parents with error handling
490
+ begin
491
+ current = node
492
+ max_depth = 50
493
+ depth = 0
494
+
495
+ while current && depth < max_depth
496
+ # Stop if we hit a document
497
+ break if current.is_a?(Nokogiri::XML::Document) ||
498
+ current.is_a?(Nokogiri::HTML::Document)
499
+
500
+ # Check current node's parent
501
+ break unless current.respond_to?(:parent)
502
+
503
+ parent = begin
504
+ current.parent
505
+ rescue StandardError
506
+ nil
507
+ end
508
+
509
+ break unless parent
510
+ break if parent == current
511
+
512
+ if parent.respond_to?(:name) && preserve_elements.include?(parent.name.to_s.downcase)
513
+ return true
514
+ end
515
+
516
+ current = parent
517
+ depth += 1
518
+ end
519
+ rescue StandardError
520
+ # If any error occurs during traversal, safely return false
521
+ return false
522
+ end
523
+
524
+ false
525
+ end
526
+
527
+ # Helper: Format node briefly
528
+ def format_node_brief(node)
529
+ return "(nil)" if node.nil?
530
+
531
+ if node.respond_to?(:name)
532
+ "<#{node.name}>"
533
+ else
534
+ node.class.name
535
+ end
536
+ end
537
+
538
+ # Helper: Colorize text
539
+ def colorize(text, color, use_color, bold: false)
540
+ return text unless use_color
541
+
542
+ if bold
543
+ Paint[text, color, :bold]
544
+ else
545
+ Paint[text, color]
546
+ end
547
+ end
548
+ end
549
+ end
550
+ end
551
+ end
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "unicode/name"
4
+
5
+ module Canon
6
+ class DiffFormatter
7
+ # Module for building Unicode character visualization legends
8
+ module Legend
9
+ # Detect non-ASCII characters in text and return their information
10
+ #
11
+ # @param text [String] Text to analyze
12
+ # @param visualization_map [Hash] Character visualization map
13
+ # @return [Hash] Hash of characters with their metadata
14
+ def self.detect_non_ascii(text, visualization_map)
15
+ detected = {}
16
+ category_map = DiffFormatter::CHARACTER_CATEGORY_MAP
17
+ metadata = DiffFormatter::CHARACTER_METADATA
18
+
19
+ text.each_char do |char|
20
+ next if char.ord <= 127
21
+ next if detected.key?(char)
22
+
23
+ visualization = visualization_map.fetch(char, char)
24
+ next if visualization == char # Skip if no visualization mapping
25
+
26
+ codepoint = format("U+%04X", char.ord)
27
+
28
+ # Use name from metadata if available, otherwise use Unicode::Name
29
+ name = if metadata[char] && metadata[char][:name]
30
+ metadata[char][:name]
31
+ else
32
+ Unicode::Name.of(char) || "UNKNOWN"
33
+ end
34
+
35
+ detected[char] = {
36
+ visualization: visualization,
37
+ codepoint: codepoint,
38
+ name: name,
39
+ category: category_map.fetch(char, :control),
40
+ }
41
+ end
42
+
43
+ detected
44
+ end
45
+
46
+ # Build formatted legend from detected characters
47
+ #
48
+ # @param detected_chars [Hash] Hash from detect_non_ascii
49
+ # @param use_color [Boolean] Whether to use colors
50
+ # @return [String, nil] Formatted legend or nil if no characters
51
+ def self.build_legend(detected_chars, use_color: true)
52
+ return nil if detected_chars.empty?
53
+
54
+ # Group characters by category
55
+ grouped = detected_chars.group_by { |_char, info| info[:category] }
56
+
57
+ output = []
58
+ separator = "━" * 60
59
+
60
+ output << colorize("Character Visualization Legend:", :cyan, :bold,
61
+ use_color)
62
+ output << colorize(separator, :cyan, :bold, use_color)
63
+
64
+ # Display each category
65
+ category_names = DiffFormatter::CHARACTER_CATEGORY_NAMES
66
+ category_names.each do |category_key, category_name|
67
+ chars = grouped[category_key]
68
+ next unless chars
69
+
70
+ output << colorize("#{category_name}:", :yellow, :bold, use_color)
71
+
72
+ chars.sort_by { |char, _info| char.ord }.each do |char, info|
73
+ # Format: '⏓': U+2005 (' ') Four-Per-Em Space
74
+ vis = info[:visualization]
75
+ code = info[:codepoint]
76
+ name = format_name(info[:name])
77
+
78
+ # Show original character in quotes, handling special cases
79
+ original = format_original_char(char)
80
+
81
+ line = " '#{vis}': #{code} ('#{original}') #{name}"
82
+ output << (use_color ? line : line)
83
+ end
84
+ output << ""
85
+ end
86
+
87
+ output << colorize(separator, :cyan, :bold, use_color)
88
+ output.join("\n")
89
+ end
90
+
91
+ # Format character name for display
92
+ #
93
+ # @param name [String] Unicode character name
94
+ # @return [String] Formatted name
95
+ def self.format_name(name)
96
+ # Convert from "FOUR-PER-EM SPACE" to "Four-Per-Em Space"
97
+ name.split(/[-\s]/).map do |word|
98
+ if word.length <= 2
99
+ word.upcase
100
+ else
101
+ word.capitalize
102
+ end
103
+ end.join("-").gsub("-", "-")
104
+ end
105
+
106
+ # Format original character for display in legend
107
+ #
108
+ # @param char [String] Original character
109
+ # @return [String] Formatted for display
110
+ def self.format_original_char(char)
111
+ case char
112
+ when "\n"
113
+ "\\n"
114
+ when "\r"
115
+ "\\r"
116
+ when "\t"
117
+ "\\t"
118
+ when "\u0000"
119
+ "\\0"
120
+ else
121
+ char
122
+ end
123
+ end
124
+
125
+ # Colorize text if color is enabled
126
+ #
127
+ # @param text [String] Text to colorize
128
+ # @param colors [Array<Symbol>] Colors to apply
129
+ # @param use_color [Boolean] Whether to use colors
130
+ # @return [String] Colorized or plain text
131
+ def self.colorize(text, *colors, use_color)
132
+ return text unless use_color
133
+
134
+ require "paint"
135
+ "\e[0m#{Paint[text, *colors]}"
136
+ end
137
+
138
+ private_class_method :format_name, :format_original_char, :colorize
139
+ end
140
+ end
141
+ end