canon 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +69 -92
  3. data/README.adoc +13 -13
  4. data/docs/.lycheeignore +69 -0
  5. data/docs/Gemfile +1 -0
  6. data/docs/_config.yml +90 -1
  7. data/docs/advanced/diff-classification.adoc +82 -2
  8. data/docs/advanced/extending-canon.adoc +193 -0
  9. data/docs/features/match-options/index.adoc +239 -1
  10. data/docs/internals/diffnode-enrichment.adoc +611 -0
  11. data/docs/internals/index.adoc +251 -0
  12. data/docs/lychee.toml +13 -6
  13. data/docs/understanding/architecture.adoc +749 -33
  14. data/docs/understanding/comparison-pipeline.adoc +122 -0
  15. data/lib/canon/cache.rb +129 -0
  16. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +68 -0
  17. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +68 -0
  18. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +171 -0
  19. data/lib/canon/comparison/dimensions/base_dimension.rb +107 -0
  20. data/lib/canon/comparison/dimensions/comments_dimension.rb +121 -0
  21. data/lib/canon/comparison/dimensions/element_position_dimension.rb +90 -0
  22. data/lib/canon/comparison/dimensions/registry.rb +77 -0
  23. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +119 -0
  24. data/lib/canon/comparison/dimensions/text_content_dimension.rb +96 -0
  25. data/lib/canon/comparison/dimensions.rb +54 -0
  26. data/lib/canon/comparison/format_detector.rb +87 -0
  27. data/lib/canon/comparison/html_comparator.rb +70 -26
  28. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  29. data/lib/canon/comparison/html_parser.rb +80 -0
  30. data/lib/canon/comparison/json_comparator.rb +12 -0
  31. data/lib/canon/comparison/json_parser.rb +19 -0
  32. data/lib/canon/comparison/markup_comparator.rb +293 -0
  33. data/lib/canon/comparison/match_options/base_resolver.rb +150 -0
  34. data/lib/canon/comparison/match_options/json_resolver.rb +82 -0
  35. data/lib/canon/comparison/match_options/xml_resolver.rb +151 -0
  36. data/lib/canon/comparison/match_options/yaml_resolver.rb +87 -0
  37. data/lib/canon/comparison/match_options.rb +68 -463
  38. data/lib/canon/comparison/profile_definition.rb +149 -0
  39. data/lib/canon/comparison/ruby_object_comparator.rb +180 -0
  40. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +7 -10
  41. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  42. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +177 -0
  43. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +136 -0
  44. data/lib/canon/comparison/xml_comparator/child_comparison.rb +197 -0
  45. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +115 -0
  46. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +186 -0
  47. data/lib/canon/comparison/xml_comparator/node_parser.rb +79 -0
  48. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +102 -0
  49. data/lib/canon/comparison/xml_comparator.rb +97 -684
  50. data/lib/canon/comparison/xml_node_comparison.rb +319 -0
  51. data/lib/canon/comparison/xml_parser.rb +19 -0
  52. data/lib/canon/comparison/yaml_comparator.rb +3 -3
  53. data/lib/canon/comparison.rb +265 -110
  54. data/lib/canon/diff/diff_classifier.rb +101 -2
  55. data/lib/canon/diff/diff_node.rb +32 -2
  56. data/lib/canon/diff/formatting_detector.rb +1 -1
  57. data/lib/canon/diff/node_serializer.rb +191 -0
  58. data/lib/canon/diff/path_builder.rb +143 -0
  59. data/lib/canon/diff_formatter/by_line/base_formatter.rb +251 -0
  60. data/lib/canon/diff_formatter/by_line/html_formatter.rb +6 -248
  61. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +38 -229
  62. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +30 -0
  63. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +579 -0
  64. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +121 -0
  65. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +253 -0
  66. data/lib/canon/diff_formatter/diff_detail_formatter/text_utils.rb +61 -0
  67. data/lib/canon/diff_formatter/diff_detail_formatter.rb +31 -1028
  68. data/lib/canon/diff_formatter.rb +1 -1
  69. data/lib/canon/rspec_matchers.rb +38 -9
  70. data/lib/canon/tree_diff/operation_converter.rb +92 -338
  71. data/lib/canon/tree_diff/operation_converter_helpers/metadata_enricher.rb +71 -0
  72. data/lib/canon/tree_diff/operation_converter_helpers/post_processor.rb +103 -0
  73. data/lib/canon/tree_diff/operation_converter_helpers/reason_builder.rb +168 -0
  74. data/lib/canon/tree_diff/operation_converter_helpers/update_change_handler.rb +188 -0
  75. data/lib/canon/version.rb +1 -1
  76. data/lib/canon/xml/data_model.rb +24 -13
  77. metadata +48 -2
@@ -2,6 +2,12 @@
2
2
 
3
3
  require "paint"
4
4
  require_relative "../xml/namespace_helper"
5
+ # DiffDetailFormatter helper modules
6
+ require_relative "diff_detail_formatter/text_utils"
7
+ require_relative "diff_detail_formatter/color_helper"
8
+ require_relative "diff_detail_formatter/location_extractor"
9
+ require_relative "diff_detail_formatter/node_utils"
10
+ require_relative "diff_detail_formatter/dimension_formatter"
5
11
 
6
12
  module Canon
7
13
  class DiffFormatter
@@ -99,15 +105,16 @@ module Canon
99
105
  :magenta, use_color)}"
100
106
 
101
107
  # Location (XPath for XML/HTML, Path for JSON/YAML)
102
- location = extract_location(diff)
108
+ location = DiffDetailFormatterHelpers::LocationExtractor.extract_location(diff)
103
109
  output << "#{colorize('Location:', :cyan, use_color,
104
110
  bold: true)} #{colorize(location, :blue,
105
111
  use_color)}"
106
112
  output << ""
107
113
 
108
114
  # Dimension-specific details
109
- detail1, detail2, changes = format_dimension_details(diff,
110
- use_color)
115
+ detail1, detail2, changes = DiffDetailFormatterHelpers::DimensionFormatter.format_dimension_details(
116
+ diff, use_color
117
+ )
111
118
 
112
119
  output << colorize("⊖ Expected (File 1):", :red, use_color,
113
120
  bold: true)
@@ -125,1042 +132,38 @@ module Canon
125
132
 
126
133
  output.join("\n")
127
134
  rescue StandardError => e
128
- # Safe fallback if formatting fails
129
- colorize(
130
- "🔍 DIFFERENCE ##{number}/#{total} [Error formatting: #{e.message}]", :red, use_color, bold: true
131
- )
132
- end
133
-
134
- # Extract XPath or JSON path for the difference location
135
- def extract_location(diff)
136
- # For Hash diffs (JSON/YAML)
137
- if diff.is_a?(Hash)
138
- return diff[:path] || "(root)"
139
- end
140
-
141
- # For DiffNode (XML/HTML)
142
- node = diff.respond_to?(:node1) ? (diff.node1 || diff.node2) : nil
143
-
144
- # For XML/HTML element nodes
145
- if node.respond_to?(:name)
146
- return extract_xpath(node)
147
- end
148
-
149
- # Fallback
150
- if diff.respond_to?(:dimension)
151
- diff.dimension.to_s
152
- else
153
- "(unknown)"
154
- end
155
- end
156
-
157
- # Extract XPath from an XML/HTML node
158
- def extract_xpath(node)
159
- return "/" if node.nil?
160
-
161
- # Document nodes don't have meaningful XPaths
162
- if node.is_a?(Nokogiri::XML::Document) ||
163
- node.is_a?(Nokogiri::HTML::Document) ||
164
- node.is_a?(Nokogiri::HTML4::Document) ||
165
- node.is_a?(Nokogiri::HTML5::Document)
166
- return "/"
167
- end
168
-
169
- parts = []
170
- current = node
171
- max_depth = 100
172
- depth = 0
173
-
174
- begin
175
- while current.respond_to?(:name) && current.name && depth < max_depth
176
- # Stop at document-level nodes
177
- break if ["document", "#document"].include?(current.name)
178
- break if current.is_a?(Nokogiri::XML::Document) ||
179
- current.is_a?(Nokogiri::HTML::Document)
180
-
181
- parts.unshift(current.name)
182
-
183
- # Move to parent safely
184
- break unless current.respond_to?(:parent)
185
-
186
- parent = begin
187
- current.parent
188
- rescue StandardError
189
- nil
190
- end
191
-
192
- break unless parent
193
- break if parent == current
194
-
195
- current = parent
196
- depth += 1
197
- end
135
+ # Safe fallback if formatting fails - provide detailed context
136
+ location = begin
137
+ DiffDetailFormatterHelpers::LocationExtractor.extract_location(diff)
198
138
  rescue StandardError
199
- # If any error, return what we have
200
- return "/#{parts.join('/')}"
201
- end
202
-
203
- "/#{parts.join('/')}"
204
- end
205
-
206
- # Format details based on dimension type
207
- def format_dimension_details(diff, use_color)
208
- # Handle Hash diffs (JSON/YAML)
209
- if diff.is_a?(Hash)
210
- return format_hash_diff_details(diff, use_color)
211
- end
212
-
213
- # Handle DiffNode (XML/HTML)
214
- dimension = diff.respond_to?(:dimension) ? diff.dimension : nil
215
-
216
- case dimension
217
- when :element_structure
218
- format_element_structure_details(diff, use_color)
219
- when :attribute_presence
220
- format_attribute_presence_details(diff, use_color)
221
- when :attribute_values
222
- format_attribute_values_details(diff, use_color)
223
- when :attribute_order
224
- format_attribute_order_details(diff, use_color)
225
- when :namespace_uri
226
- format_namespace_uri_details(diff, use_color)
227
- when :namespace_declarations
228
- format_namespace_declarations_details(diff, use_color)
229
- when :text_content
230
- format_text_content_details(diff, use_color)
231
- when :structural_whitespace
232
- format_structural_whitespace_details(diff, use_color)
233
- when :comments
234
- format_comments_details(diff, use_color)
235
- else
236
- format_fallback_details(diff, use_color)
237
- end
238
- end
239
-
240
- # Format namespace_uri dimension details
241
- def format_namespace_uri_details(diff, use_color)
242
- node1 = diff.node1
243
- node2 = diff.node2
244
-
245
- # Use NamespaceHelper for consistent formatting
246
- ns1_display = Canon::Xml::NamespaceHelper.format_namespace(
247
- node1.respond_to?(:namespace_uri) ? node1.namespace_uri : nil,
248
- )
249
- ns2_display = Canon::Xml::NamespaceHelper.format_namespace(
250
- node2.respond_to?(:namespace_uri) ? node2.namespace_uri : nil,
251
- )
252
-
253
- element_name = if node1.respond_to?(:name)
254
- node1.name
255
- else
256
- node2.respond_to?(:name) ? node2.name : "element"
257
- end
258
-
259
- detail1 = "<#{element_name}> #{colorize(ns1_display, :cyan,
260
- use_color)}"
261
- detail2 = "<#{element_name}> #{colorize(ns2_display, :cyan,
262
- use_color)}"
263
-
264
- changes = "Namespace differs: #{colorize(ns1_display, :red,
265
- use_color)} → #{colorize(
266
- ns2_display, :green, use_color
267
- )}"
268
-
269
- [detail1, detail2, changes]
270
- end
271
-
272
- # Format namespace_declarations dimension details
273
- def format_namespace_declarations_details(diff, use_color)
274
- node1 = diff.node1
275
- node2 = diff.node2
276
-
277
- # Extract namespace declarations from both nodes
278
- ns_decls1 = extract_namespace_declarations_from_node(node1)
279
- ns_decls2 = extract_namespace_declarations_from_node(node2)
280
-
281
- element_name = if node1.respond_to?(:name)
282
- node1.name
283
- else
284
- node2.respond_to?(:name) ? node2.name : "element"
285
- end
286
-
287
- # Format namespace declarations for display
288
- detail1 = if ns_decls1.empty?
289
- "<#{element_name}> #{colorize(
290
- '(no namespace declarations)', :red, use_color
291
- )}"
292
- else
293
- ns_str = ns_decls1.map do |prefix, uri|
294
- attr_name = prefix.empty? ? "xmlns" : "xmlns:#{prefix}"
295
- "#{attr_name}=\"#{uri}\""
296
- end.join(" ")
297
- "<#{element_name}> #{ns_str}"
298
- end
299
-
300
- detail2 = if ns_decls2.empty?
301
- "<#{element_name}> #{colorize(
302
- '(no namespace declarations)', :green, use_color
303
- )}"
304
- else
305
- ns_str = ns_decls2.map do |prefix, uri|
306
- attr_name = prefix.empty? ? "xmlns" : "xmlns:#{prefix}"
307
- "#{attr_name}=\"#{uri}\""
308
- end.join(" ")
309
- "<#{element_name}> #{ns_str}"
310
- end
311
-
312
- # Analyze changes
313
- missing = ns_decls1.keys - ns_decls2.keys # In node1 but not node2
314
- extra = ns_decls2.keys - ns_decls1.keys # In node2 but not node1
315
- changed = ns_decls1.select do |prefix, uri|
316
- ns_decls2[prefix] && ns_decls2[prefix] != uri
317
- end.keys
318
-
319
- # Format changes
320
- changes_parts = []
321
- if missing.any?
322
- missing_str = missing.map do |prefix|
323
- attr_name = prefix.empty? ? "xmlns" : "xmlns:#{prefix}"
324
- colorize("-#{attr_name}=\"#{ns_decls1[prefix]}\"", :red,
325
- use_color)
326
- end.join(", ")
327
- changes_parts << "Removed: #{missing_str}"
328
- end
329
- if extra.any?
330
- extra_str = extra.map do |prefix|
331
- attr_name = prefix.empty? ? "xmlns" : "xmlns:#{prefix}"
332
- colorize("+#{attr_name}=\"#{ns_decls2[prefix]}\"", :green,
333
- use_color)
334
- end.join(", ")
335
- changes_parts << "Added: #{extra_str}"
336
- end
337
- if changed.any?
338
- changed_str = changed.map do |prefix|
339
- attr_name = prefix.empty? ? "xmlns" : "xmlns:#{prefix}"
340
- "#{colorize(attr_name, :cyan,
341
- use_color)}: \"#{ns_decls1[prefix]}\" → \"#{ns_decls2[prefix]}\""
342
- end.join(", ")
343
- changes_parts << "Changed: #{changed_str}"
344
- end
345
-
346
- changes = changes_parts.join(" | ")
347
-
348
- [detail1, detail2, changes]
349
- end
350
-
351
- # Extract namespace declarations from a node (helper for formatter)
352
- # @param node [Object] Node to extract namespace declarations from
353
- # @return [Hash] Hash of prefix => URI mappings
354
- def extract_namespace_declarations_from_node(node)
355
- return {} if node.nil?
356
-
357
- declarations = {}
358
-
359
- # Handle Canon::Xml::Node (uses namespace_nodes)
360
- if node.respond_to?(:namespace_nodes)
361
- node.namespace_nodes.each do |ns|
362
- # Skip the implicit xml namespace (always present)
363
- next if ns.prefix == "xml" && ns.uri == "http://www.w3.org/XML/1998/namespace"
364
-
365
- prefix = ns.prefix || ""
366
- declarations[prefix] = ns.uri
367
- end
368
- return declarations
369
- end
370
-
371
- # Handle Nokogiri/Moxml nodes (use attributes)
372
- # Get raw attributes
373
- raw_attrs = if node.respond_to?(:attribute_nodes)
374
- node.attribute_nodes
375
- elsif node.respond_to?(:attributes)
376
- node.attributes
377
- else
378
- return {}
379
- end
380
-
381
- # Handle Canon::Xml::Node attribute format (array of AttributeNode)
382
- if raw_attrs.is_a?(Array)
383
- raw_attrs.each do |attr|
384
- name = attr.name
385
- value = attr.value
386
-
387
- if name == "xmlns" || name.start_with?("xmlns:")
388
- # Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
389
- prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
390
- declarations[prefix] = value
391
- end
392
- end
393
- else
394
- # Handle Nokogiri and Moxml attribute formats (Hash-like)
395
- raw_attrs.each do |key, val|
396
- if key.is_a?(String)
397
- # Nokogiri format: key=name (String), val=attr object
398
- name = key
399
- value = val.respond_to?(:value) ? val.value : val.to_s
400
- else
401
- # Moxml format: key=attr object, val=nil
402
- name = key.respond_to?(:name) ? key.name : key.to_s
403
- value = key.respond_to?(:value) ? key.value : key.to_s
404
- end
405
-
406
- if name == "xmlns" || name.start_with?("xmlns:")
407
- # Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
408
- prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
409
- declarations[prefix] = value
410
- end
411
- end
412
- end
413
-
414
- declarations
415
- end
416
-
417
- # Format element_structure dimension details (INSERT/DELETE operations)
418
- def format_element_structure_details(diff, use_color)
419
- node1 = diff.node1
420
- node2 = diff.node2
421
-
422
- # Determine operation type
423
- if node1.nil? && !node2.nil?
424
- # INSERT operation - show content preview
425
- node2.respond_to?(:name) ? node2.name : "element"
426
- content_preview = extract_content_preview(node2, 50)
427
- detail1 = colorize("(not present)", :red, use_color)
428
- detail2 = content_preview
429
- changes = "Element inserted"
430
- elsif !node1.nil? && node2.nil?
431
- # DELETE operation - show content preview
432
- node1.respond_to?(:name) ? node1.name : "element"
433
- content_preview = extract_content_preview(node1, 50)
434
- detail1 = content_preview
435
- detail2 = colorize("(not present)", :green, use_color)
436
- changes = "Element deleted"
437
- elsif !node1.nil? && !node2.nil?
438
- # STRUCTURAL CHANGE (both nodes present) - show both previews
439
- name1 = node1.respond_to?(:name) ? node1.name : "element"
440
- name2 = node2.respond_to?(:name) ? node2.name : "element"
441
- detail1 = extract_content_preview(node1, 50)
442
- detail2 = extract_content_preview(node2, 50)
443
-
444
- changes = if name1 == name2
445
- "Element structure changed"
446
- else
447
- "Element type changed: #{name1} → #{name2}"
448
- end
449
- else
450
- # Both nil (shouldn't happen)
451
- detail1 = "(nil)"
452
- detail2 = "(nil)"
453
- changes = "Unknown structural change"
454
- end
455
-
456
- [detail1, detail2, changes]
457
- end
458
-
459
- # Format attribute_presence dimension details
460
- def format_attribute_presence_details(diff, use_color)
461
- node1 = diff.node1
462
- node2 = diff.node2
463
-
464
- attrs1 = get_attribute_names(node1)
465
- attrs2 = get_attribute_names(node2)
466
-
467
- attrs1 & attrs2
468
- missing = attrs1 - attrs2 # In node1 but not node2
469
- extra = attrs2 - attrs1 # In node2 but not node1
470
-
471
- # Format expected
472
- detail1 = "<#{node1.name}> with #{attrs1.length} #{attrs1.length == 1 ? 'attribute' : 'attributes'}: #{attrs1.join(', ')}"
473
-
474
- # Format actual
475
- detail2 = "<#{node2.name}> with #{attrs2.length} #{attrs2.length == 1 ? 'attribute' : 'attributes'}: #{attrs2.join(', ')}"
476
-
477
- # Format changes
478
- changes_parts = []
479
- if extra.any?
480
- extra_str = extra.map do |a|
481
- colorize("+#{a}", :green, use_color)
482
- end.join(", ")
483
- changes_parts << "Added: #{extra_str}"
484
- end
485
- if missing.any?
486
- missing_str = missing.map do |a|
487
- colorize("-#{a}", :red, use_color)
488
- end.join(", ")
489
- changes_parts << "Removed: #{missing_str}"
490
- end
491
-
492
- changes = changes_parts.join(" | ")
493
-
494
- [detail1, detail2, changes]
495
- end
496
-
497
- # Format attribute_values dimension details
498
- def format_attribute_values_details(diff, use_color)
499
- node1 = diff.node1
500
- node2 = diff.node2
501
-
502
- # Find ALL attributes with different values
503
- differing_attrs = find_all_differing_attributes(node1, node2)
504
-
505
- if differing_attrs.any?
506
- # Show element name with all differing attributes
507
- attrs1_str = differing_attrs.map do |attr|
508
- val1 = get_attribute_value(node1, attr)
509
- "#{colorize(attr, :cyan, use_color)}=\"#{escape_quotes(val1)}\""
510
- end.join(" ")
511
-
512
- attrs2_str = differing_attrs.map do |attr|
513
- val2 = get_attribute_value(node2, attr)
514
- "#{colorize(attr, :cyan, use_color)}=\"#{escape_quotes(val2)}\""
515
- end.join(" ")
516
-
517
- detail1 = "<#{node1.name}> #{attrs1_str}"
518
- detail2 = "<#{node2.name}> #{attrs2_str}"
519
-
520
- # List all attribute changes
521
- changes_parts = differing_attrs.map do |attr|
522
- val1 = get_attribute_value(node1, attr)
523
- val2 = get_attribute_value(node2, attr)
524
-
525
- if val1.empty? && !val2.empty?
526
- "#{colorize(attr, :cyan,
527
- use_color)}: (added) → \"#{escape_quotes(val2)}\""
528
- elsif !val1.empty? && val2.empty?
529
- "#{colorize(attr, :cyan,
530
- use_color)}: \"#{escape_quotes(val1)}\" → (removed)"
531
- else
532
- "#{colorize(attr, :cyan,
533
- use_color)}: \"#{escape_quotes(val1)}\" → \"#{escape_quotes(val2)}\""
534
- end
535
- end
536
-
537
- changes = changes_parts.join("; ")
538
-
539
- [detail1, detail2, changes]
540
- else
541
- ["<#{node1.name}> (values differ)",
542
- "<#{node2.name}> (values differ)", nil]
543
- end
544
- end
545
-
546
- # Format attribute_order dimension details
547
- def format_attribute_order_details(diff, use_color)
548
- node1 = diff.node1
549
- node2 = diff.node2
550
-
551
- # Get attribute names in order
552
- attrs1 = get_attribute_names_in_order(node1)
553
- attrs2 = get_attribute_names_in_order(node2)
554
-
555
- # Format as ordered list
556
- attrs1_str = "[#{attrs1.join(', ')}]"
557
- attrs2_str = "[#{attrs2.join(', ')}]"
558
-
559
- detail1 = "<#{node1.name}> attributes in order: #{colorize(
560
- attrs1_str, :cyan, use_color
561
- )}"
562
- detail2 = "<#{node2.name}> attributes in order: #{colorize(
563
- attrs2_str, :cyan, use_color
564
- )}"
565
-
566
- changes = "Attribute order changed: #{attrs1_str} → #{attrs2_str}"
567
-
568
- [detail1, detail2, changes]
569
- end
570
-
571
- # Format text content dimension details
572
- def format_text_content_details(diff, use_color)
573
- node1 = diff.node1
574
- node2 = diff.node2
575
-
576
- text1 = get_node_text(node1)
577
- text2 = get_node_text(node2)
578
-
579
- # Truncate long text
580
- preview1 = truncate_text(text1, 100)
581
- preview2 = truncate_text(text2, 100)
582
-
583
- # Get element names - for text nodes, use parent element name
584
- # When one node is nil, use the other's name for context
585
- element_name1 = get_element_name_for_display(node1)
586
- element_name2 = get_element_name_for_display(node2)
587
-
588
- # If one shows nil-node, try to use the other's name for context
589
- if element_name1.include?("nil") && !element_name2.include?("nil")
590
- # Use node2's name as a hint for what node1 should be
591
- element_name1 = element_name2
592
- elsif element_name2.include?("nil") && !element_name1.include?("nil")
593
- # Use node1's name as a hint for what node2 should be
594
- element_name2 = element_name1
595
- end
596
-
597
- # Get namespace URIs
598
- ns1 = get_namespace_uri_for_display(node1)
599
- ns2 = get_namespace_uri_for_display(node2)
600
-
601
- # Build namespace display strings using NamespaceHelper
602
- ns1_info = if ns1 && !ns1.empty?
603
- " #{Canon::Xml::NamespaceHelper.format_namespace(ns1)}"
604
- else
605
- ""
606
- end
607
-
608
- ns2_info = if ns2 && !ns2.empty?
609
- " #{Canon::Xml::NamespaceHelper.format_namespace(ns2)}"
610
- else
611
- ""
612
- end
613
-
614
- detail1 = "<#{element_name1}>#{ns1_info} \"#{escape_quotes(preview1)}\""
615
- detail2 = "<#{element_name2}>#{ns2_info} \"#{escape_quotes(preview2)}\""
616
-
617
- # Check if diff contains namespace information in reason
618
- # If so, display it prominently
619
- changes = if diff.respond_to?(:reason) && diff.reason&.include?("namespace")
620
- diff.reason
621
- # Check if inside whitespace-preserving element
622
- elsif inside_preserve_element?(node1) || inside_preserve_element?(node2)
623
- colorize("⚠️ Whitespace preserved", :yellow, use_color,
624
- bold: true) +
625
- " (inside <pre>, <code>, etc. - whitespace is significant)"
626
- else
627
- "Text content changed"
628
- end
629
-
630
- [detail1, detail2, changes]
631
- end
632
-
633
- # Format structural whitespace dimension details
634
- def format_structural_whitespace_details(diff, _use_color)
635
- node1 = diff.node1
636
- node2 = diff.node2
637
-
638
- text1 = get_node_text(node1)
639
- text2 = get_node_text(node2)
640
-
641
- # Show whitespace explicitly
642
- preview1 = visualize_whitespace(truncate_text(text1, 80))
643
- preview2 = visualize_whitespace(truncate_text(text2, 80))
644
-
645
- element_name = node1.respond_to?(:name) ? node1.name : "(text)"
646
-
647
- detail1 = "<#{element_name}> \"#{preview1}\""
648
- detail2 = "<#{element_name}> \"#{preview2}\""
649
-
650
- changes = "Whitespace-only difference (informative)"
651
-
652
- [detail1, detail2, changes]
653
- end
654
-
655
- # Format comments dimension details
656
- def format_comments_details(diff, _use_color)
657
- node1 = diff.node1
658
- node2 = diff.node2
659
-
660
- content1 = node1.respond_to?(:content) ? node1.content.to_s : ""
661
- content2 = node2.respond_to?(:content) ? node2.content.to_s : ""
662
-
663
- detail1 = "<!-- #{truncate_text(content1, 80)} -->"
664
- detail2 = "<!-- #{truncate_text(content2, 80)} -->"
665
-
666
- changes = "Comment content differs"
667
-
668
- [detail1, detail2, changes]
669
- end
670
-
671
- # Format Hash diff details (JSON/YAML)
672
- def format_hash_diff_details(diff, _use_color)
673
- path = diff[:path] || "(root)"
674
- val1 = diff[:value1]
675
- val2 = diff[:value2]
676
-
677
- detail1 = "#{path} = #{format_json_value(val1)}"
678
- detail2 = "#{path} = #{format_json_value(val2)}"
679
-
680
- changes = case diff[:diff_code]
681
- when Canon::Comparison::MISSING_HASH_KEY
682
- "Key missing"
683
- when Canon::Comparison::UNEQUAL_PRIMITIVES
684
- "Value changed"
685
- when Canon::Comparison::UNEQUAL_ARRAY_LENGTHS
686
- "Array length differs"
687
- else
688
- "Difference detected"
689
- end
690
-
691
- [detail1, detail2, changes]
692
- end
693
-
694
- # Fallback formatter for unknown dimensions
695
- def format_fallback_details(diff, _use_color)
696
- if diff.respond_to?(:node1) && diff.respond_to?(:node2)
697
- node1_desc = format_node_brief(diff.node1)
698
- node2_desc = format_node_brief(diff.node2)
699
- [node1_desc, node2_desc, nil]
700
- else
701
- ["(unknown)", "(unknown)", nil]
702
- end
703
- end
704
-
705
- # Format JSON value for display
706
- def format_json_value(value)
707
- case value
708
- when nil
709
- "nil"
710
- when String
711
- "\"#{truncate_text(value, 50)}\""
712
- when Hash
713
- "{...}#{value.empty? ? '' : " (#{value.keys.length} keys)"}"
714
- when Array
715
- "[...]#{value.empty? ? '' : " (#{value.length} items)"}"
716
- else
717
- value.to_s
718
- end
719
- end
720
-
721
- # Helper: Get attribute names from a node
722
- def get_attribute_names(node)
723
- # Handle Canon::Xml::Nodes::ElementNode (uses attribute_nodes array)
724
- if node.respond_to?(:attribute_nodes) && node.attribute_nodes.is_a?(Array)
725
- return node.attribute_nodes.map(&:qname).sort
726
- end
727
-
728
- return [] unless node.respond_to?(:attributes)
729
-
730
- attrs = node.attributes
731
-
732
- # Handle Moxml::Element (attributes is an Array)
733
- if attrs.is_a?(Array)
734
- attrs.map do |attr|
735
- if attr.respond_to?(:qname)
736
- attr.qname
737
- elsif attr.respond_to?(:name)
738
- attr.name
739
- else
740
- attr.to_s
741
- end
742
- end.sort
743
- # Handle Nokogiri nodes (attributes is a Hash)
744
- else
745
- attrs.map do |key, val|
746
- # Get the qualified name (with prefix if present)
747
- if val.respond_to?(:namespace) && val.namespace&.prefix
748
- "#{val.namespace.prefix}:#{val.name}"
749
- else
750
- val.respond_to?(:name) ? val.name : key.to_s
751
- end
752
- end.sort
753
- end
754
- end
755
-
756
- # Helper: Find ALL attributes with different values
757
- def find_all_differing_attributes(node1, node2)
758
- return [] unless node1.respond_to?(:attributes) && node2.respond_to?(:attributes)
759
-
760
- attrs1 = get_attributes_hash(node1)
761
- attrs2 = get_attributes_hash(node2)
762
-
763
- # Find all attributes with different values
764
- all_keys = (attrs1.keys + attrs2.keys).uniq
765
- all_keys.reject do |key|
766
- attrs1[key] == attrs2[key]
767
- end
768
- end
769
-
770
- # Helper: Get attribute names in document order (not sorted)
771
- def get_attribute_names_in_order(node)
772
- # Handle Canon::Xml::Nodes::ElementNode (uses attribute_nodes array)
773
- if node.respond_to?(:attribute_nodes) && node.attribute_nodes.is_a?(Array)
774
- return node.attribute_nodes.map(&:qname)
775
- end
776
-
777
- return [] unless node.respond_to?(:attributes)
778
-
779
- attrs = node.attributes
780
-
781
- # Handle Moxml::Element (attributes is an Array)
782
- if attrs.is_a?(Array)
783
- attrs.map do |attr|
784
- # Use qname for AttributeNode objects (includes prefix)
785
- if attr.respond_to?(:qname)
786
- attr.qname
787
- elsif attr.respond_to?(:name)
788
- attr.name
789
- else
790
- attr.to_s
791
- end
792
- end
793
- # Handle Nokogiri nodes (attributes is a Hash)
794
- else
795
- attrs.map do |key, val|
796
- # For Nokogiri attributes, get the full qualified name
797
- if key.is_a?(String)
798
- key
799
- elsif val.respond_to?(:namespace) && val.namespace
800
- # Construct qualified name if attribute has a namespace prefix
801
- prefix = val.namespace.prefix
802
- name = val.respond_to?(:name) ? val.name : key.to_s
803
- prefix ? "#{prefix}:#{name}" : name
804
- else
805
- (key.respond_to?(:name) ? key.name : key.to_s)
806
- end
807
- end
808
- end
809
- end
810
-
811
- # Helper: Get attributes as hash
812
- def get_attributes_hash(node)
813
- # Handle Canon::Xml::Nodes::ElementNode (uses attribute_nodes array)
814
- if node.respond_to?(:attribute_nodes) && node.attribute_nodes.is_a?(Array)
815
- hash = {}
816
- node.attribute_nodes.each do |attr|
817
- hash[attr.qname] = attr.value
818
- end
819
- return hash
820
- end
821
-
822
- return {} unless node.respond_to?(:attributes)
823
-
824
- hash = {}
825
- attrs = node.attributes
826
-
827
- # Handle Moxml::Element (attributes is an Array of Moxml::Attribute)
828
- if attrs.is_a?(Array)
829
- attrs.each do |attr|
830
- # Use qname for AttributeNode objects (includes prefix)
831
- name = if attr.respond_to?(:qname)
832
- attr.qname
833
- elsif attr.respond_to?(:name)
834
- attr.name
835
- else
836
- attr.to_s
837
- end
838
- value = if attr.respond_to?(:value)
839
- attr.value
840
- elsif attr.respond_to?(:native) && attr.native.respond_to?(:value)
841
- attr.native.value
842
- else
843
- attr.to_s
844
- end
845
- hash[name] = value
846
- end
847
- # Handle Nokogiri nodes (attributes is a Hash)
848
- else
849
- attrs.each do |key, val|
850
- # Get the qualified name (with prefix if present)
851
- name = if val.respond_to?(:namespace) && val.namespace&.prefix
852
- "#{val.namespace.prefix}:#{val.name}"
853
- else
854
- val.respond_to?(:name) ? val.name : key.to_s
855
- end
856
- value = val.respond_to?(:value) ? val.value : val.to_s
857
- hash[name] = value
858
- end
859
- end
860
- hash
861
- end
862
-
863
- # Helper: Get attribute value
864
- def get_attribute_value(node, attr_name)
865
- return "" unless node.respond_to?(:attributes)
866
-
867
- attrs = get_attributes_hash(node)
868
- attrs[attr_name] || ""
869
- end
870
-
871
- # Helper: Get text content from node
872
- def get_node_text(node)
873
- if node.respond_to?(:content)
874
- node.content.to_s
875
- elsif node.respond_to?(:text)
876
- node.text.to_s
877
- else
878
- ""
879
- end
880
- end
881
-
882
- # Helper: Get element name for display
883
- # For text nodes, returns parent element name
884
- # For element nodes, returns the node's own name
885
- def get_element_name_for_display(node)
886
- # Handle completely nil nodes
887
- return "(nil-node)" if node.nil?
888
-
889
- # Try to get name
890
- node_name = if node.respond_to?(:name)
891
- begin
892
- node.name
893
- rescue StandardError
894
- nil
895
- end
896
- end
897
-
898
- # Special check: if name is explicitly nil (not just empty), this might be a parsing issue
899
- # Show node type information to help debug
900
- if node_name.nil?
901
- # Try to show what type of node this is
902
- if node.respond_to?(:node_type)
903
- type = begin
904
- node.node_type
905
- rescue StandardError
906
- nil
907
- end
908
- return "(nil-name:#{type})" if type
909
- end
910
-
911
- # fallback to class name
912
- class_info = node.class.name&.split("::")&.last || "UnknownClass"
913
- return "(nil-name:#{class_info})"
914
- end
915
-
916
- # If we have a valid element name, return it
917
- if !node_name.to_s.empty? && !["#text", "text", "#document",
918
- "document"].include?(node_name.to_s)
919
- return node_name.to_s
139
+ "(unable to extract location)"
920
140
  end
921
141
 
922
- # Check if this is a text node
923
- is_text_node = if node.respond_to?(:node_type)
924
- begin
925
- node.node_type == :text
926
- rescue StandardError
927
- false
928
- end
929
- elsif ["#text", "text"].include?(node_name.to_s)
930
- true
931
- elsif node.class.name
932
- node.class.name.include?("TextNode") ||
933
- node.class.name.include?("Text")
934
- else
935
- false
936
- end
937
-
938
- # For text nodes or document nodes, try parent
939
- if is_text_node || ["#text", "text", "#document",
940
- "document"].include?(node_name.to_s)
941
- parent = if node.respond_to?(:parent)
942
- begin
943
- node.parent
944
- rescue StandardError
945
- nil
946
- end
947
- end
948
-
949
- max_depth = 5
950
- depth = 0
951
-
952
- # Traverse up to find named parent element
953
- while parent && depth < max_depth
954
- parent_name = if parent.respond_to?(:name)
955
- begin
956
- parent.name
957
- rescue StandardError
958
- nil
959
- end
960
- end
961
-
962
- if parent_name && !parent_name.to_s.empty? &&
963
- !["#text", "text", "#document",
964
- "document"].include?(parent_name.to_s)
965
- return parent_name.to_s
966
- end
967
-
968
- parent = if parent.respond_to?(:parent)
969
- begin
970
- parent.parent
971
- rescue StandardError
972
- nil
973
- end
974
- end
975
- depth += 1
976
- end
977
-
978
- # Still no name found
979
- return "(text)" if is_text_node
980
-
981
- return "(no-name)"
982
- end
983
-
984
- # Fallback
985
- node_name.to_s
986
- end
987
-
988
- # Helper: Get namespace URI for display
989
- # For text nodes, returns parent element's namespace URI
990
- # For element nodes, returns the node's own namespace URI
991
- def get_namespace_uri_for_display(node)
992
- # Check if this is a text node
993
- is_text_node = if node.respond_to?(:node_type)
994
- node.node_type == :text
995
- elsif node.class.name
996
- node.class.name.include?("TextNode") || node.class.name.include?("Text")
997
- else
998
- false
999
- end
1000
-
1001
- if is_text_node
1002
- # For text nodes, get parent element's namespace
1003
- parent = node.respond_to?(:parent) ? node.parent : nil
1004
- if parent.respond_to?(:namespace_uri)
1005
- parent.namespace_uri
1006
- end
1007
- elsif node.respond_to?(:namespace_uri)
1008
- # For element nodes, use their own namespace
1009
- node.namespace_uri
1010
- else
1011
- nil
1012
- end
1013
- end
1014
-
1015
- # Helper: Truncate text to max length
1016
- def truncate_text(text, max_length)
1017
- return text if text.length <= max_length
1018
-
1019
- "#{text[0...max_length - 3]}..."
1020
- end
1021
-
1022
- # Helper: Visualize whitespace characters
1023
- def visualize_whitespace(text)
1024
- text
1025
- .gsub(" ", "␣")
1026
- .gsub("\t", "→")
1027
- .gsub("\n", "↵")
1028
- end
1029
-
1030
- # Helper: Escape quotes and backslashes in text for display
1031
- # This is used for displaying text in quoted strings, not for security
1032
- # sanitization. The text has already been parsed from trusted sources.
1033
- # SAFE: Backslash escaping not needed here as this is for display only,
1034
- # not for code generation or execution. Text comes from parsed documents.
1035
- # CodeQL false positive: This is display formatting, not input sanitization.
1036
- def escape_quotes(text)
1037
- # Escape quotes for display in quoted strings
1038
- # Backslashes don't need escaping as this isn't generating code
1039
- text.gsub('"', '\\"')
1040
- end
1041
-
1042
- # Helper: Check if node is inside a whitespace-preserving element
1043
- def inside_preserve_element?(node)
1044
- return false if node.nil?
1045
-
1046
- # Document nodes and certain node types don't have meaningful parents
1047
- return false if node.is_a?(Nokogiri::XML::Document) ||
1048
- node.is_a?(Nokogiri::HTML::Document) ||
1049
- node.is_a?(Nokogiri::HTML4::Document) ||
1050
- node.is_a?(Nokogiri::HTML5::Document) ||
1051
- node.is_a?(Nokogiri::XML::DocumentFragment)
1052
-
1053
- preserve_elements = %w[pre code textarea script style]
1054
-
1055
- # Safely traverse parents with error handling
1056
- begin
1057
- current = node
1058
- max_depth = 50
1059
- depth = 0
1060
-
1061
- while current && depth < max_depth
1062
- # Stop if we hit a document
1063
- break if current.is_a?(Nokogiri::XML::Document) ||
1064
- current.is_a?(Nokogiri::HTML::Document)
1065
-
1066
- # Check current node's parent
1067
- break unless current.respond_to?(:parent)
1068
-
1069
- parent = begin
1070
- current.parent
1071
- rescue StandardError
1072
- nil
1073
- end
1074
-
1075
- break unless parent
1076
- break if parent == current
1077
-
1078
- if parent.respond_to?(:name) && preserve_elements.include?(parent.name.to_s.downcase)
1079
- return true
1080
- end
1081
-
1082
- current = parent
1083
- depth += 1
1084
- end
142
+ dimension = begin
143
+ diff.respond_to?(:dimension) ? diff.dimension : "unknown"
1085
144
  rescue StandardError
1086
- # If any error occurs during traversal, safely return false
1087
- return false
145
+ "unknown"
1088
146
  end
1089
147
 
1090
- false
1091
- end
1092
-
1093
- # Helper: Format node briefly
1094
- def format_node_brief(node)
1095
- return "(nil)" if node.nil?
148
+ error_msg = [
149
+ "🔍 DIFFERENCE ##{number}/#{total} [Error formatting diff]",
150
+ "",
151
+ "Location: #{location}",
152
+ "Dimension: #{dimension}",
153
+ "",
154
+ "Error: #{e.message}",
155
+ "",
156
+ "This is likely a bug in the diff formatter. Please report this issue",
157
+ "with the above information.",
158
+ ].join("\n")
1096
159
 
1097
- if node.respond_to?(:name)
1098
- "<#{node.name}>"
1099
- else
1100
- node.class.name
1101
- end
1102
- end
1103
-
1104
- # Helper: Extract content preview from a node
1105
- # Shows element name, attributes, and text content for clarity
1106
- def extract_content_preview(node, max_length = 50)
1107
- return "(nil)" if node.nil?
1108
-
1109
- parts = []
1110
-
1111
- # Add element name
1112
- if node.respond_to?(:name)
1113
- parts << "<#{node.name}>"
1114
- end
1115
-
1116
- # Add key attributes (id, class, name, type)
1117
- if node.respond_to?(:attributes) && node.attributes&.any?
1118
- key_attrs = %w[id class name type]
1119
- attrs_hash = get_attributes_hash(node)
1120
-
1121
- key_attr_strs = key_attrs.map do |key|
1122
- next unless attrs_hash.key?(key)
1123
-
1124
- val = attrs_hash[key]
1125
- next if val.nil? || val.empty?
1126
-
1127
- # Truncate long attribute values
1128
- val_preview = val.length > 20 ? "#{val[0..17]}..." : val
1129
- "#{key}=\"#{val_preview}\""
1130
- end.compact
1131
-
1132
- parts << "[#{key_attr_strs.join(' ')}]" if key_attr_strs.any?
1133
- end
1134
-
1135
- # Add text content preview
1136
- text = get_node_text(node)
1137
- if text && !text.empty?
1138
- text_preview = text.strip
1139
- # Only show text if meaningful (not just whitespace)
1140
- if text_preview.length.positive?
1141
- text_preview = text_preview.length > 40 ? "#{text_preview[0..37]}..." : text_preview
1142
- parts << "\"#{text_preview}\""
1143
- end
1144
- elsif node.respond_to?(:children) && node.children&.any?
1145
- # Show child count if no text but has children
1146
- parts << "(#{node.children.length} children)"
1147
- end
1148
-
1149
- result = parts.join(" ")
1150
-
1151
- # Truncate if still too long
1152
- result.length > max_length ? "#{result[0...max_length - 3]}..." : result
160
+ colorize(error_msg, :red, use_color, bold: true)
1153
161
  end
1154
162
 
1155
163
  # Helper: Colorize text
1156
164
  def colorize(text, color, use_color, bold: false)
1157
- return text unless use_color
1158
-
1159
- if bold
1160
- Paint[text, color, :bold]
1161
- else
1162
- Paint[text, color]
1163
- end
165
+ DiffDetailFormatterHelpers::ColorHelper.colorize(text, color,
166
+ use_color, bold: bold)
1164
167
  end
1165
168
  end
1166
169
  end