canon 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec-opal +7 -0
  3. data/.rubocop_todo.yml +14 -71
  4. data/Rakefile +17 -0
  5. data/lib/canon/cli.rb +1 -1
  6. data/lib/canon/color_detector.rb +3 -5
  7. data/lib/canon/comparison/compare_profile.rb +1 -4
  8. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
  9. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
  10. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
  11. data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
  12. data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
  13. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
  14. data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
  15. data/lib/canon/comparison/format_detector.rb +29 -20
  16. data/lib/canon/comparison/html_comparator.rb +18 -29
  17. data/lib/canon/comparison/html_compare_profile.rb +3 -10
  18. data/lib/canon/comparison/html_parser.rb +1 -1
  19. data/lib/canon/comparison/json_comparator.rb +8 -0
  20. data/lib/canon/comparison/node_inspector.rb +146 -80
  21. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
  22. data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
  23. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
  24. data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
  25. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +10 -8
  26. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
  27. data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
  28. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
  29. data/lib/canon/comparison/xml_comparator.rb +61 -83
  30. data/lib/canon/comparison/xml_node_comparison.rb +15 -15
  31. data/lib/canon/comparison/yaml_comparator.rb +8 -0
  32. data/lib/canon/comparison.rb +23 -23
  33. data/lib/canon/config/profile_loader.rb +13 -13
  34. data/lib/canon/config.rb +29 -5
  35. data/lib/canon/diff/diff_classifier.rb +7 -41
  36. data/lib/canon/diff/diff_line.rb +1 -1
  37. data/lib/canon/diff/diff_node_enricher.rb +22 -24
  38. data/lib/canon/diff/node_serializer.rb +23 -30
  39. data/lib/canon/diff/path_builder.rb +24 -37
  40. data/lib/canon/diff/source_locator.rb +0 -3
  41. data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
  42. data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
  43. data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
  44. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
  45. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
  46. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
  47. data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
  48. data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
  49. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
  50. data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
  51. data/lib/canon/diff_formatter/debug_output.rb +12 -24
  52. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
  53. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
  54. data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
  55. data/lib/canon/diff_formatter/legend.rb +2 -2
  56. data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
  57. data/lib/canon/diff_formatter/theme.rb +4 -4
  58. data/lib/canon/diff_formatter.rb +2 -2
  59. data/lib/canon/formatters/html_formatter.rb +1 -1
  60. data/lib/canon/formatters/html_formatter_base.rb +1 -1
  61. data/lib/canon/formatters/xml_formatter.rb +7 -32
  62. data/lib/canon/html/data_model.rb +1 -1
  63. data/lib/canon/pretty_printer/html.rb +1 -1
  64. data/lib/canon/pretty_printer/xml.rb +16 -7
  65. data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
  66. data/lib/canon/rspec_matchers.rb +2 -2
  67. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  68. data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
  69. data/lib/canon/tree_diff/core/tree_node.rb +1 -3
  70. data/lib/canon/validators/html_validator.rb +1 -1
  71. data/lib/canon/validators/xml_validator.rb +1 -1
  72. data/lib/canon/version.rb +1 -1
  73. data/lib/canon/xml/data_model.rb +131 -137
  74. data/lib/canon/xml/namespace_helper.rb +5 -0
  75. data/lib/canon/xml/node.rb +2 -1
  76. data/lib/canon/xml/nodes/root_node.rb +4 -0
  77. data/lib/canon/xml/nodes/text_node.rb +6 -1
  78. data/lib/canon/xml/sax_builder.rb +4 -6
  79. data/lib/canon/xml_backend.rb +49 -0
  80. data/lib/canon/xml_parsing.rb +271 -0
  81. data/lib/canon.rb +3 -1
  82. data/lib/tasks/benchmark_runner.rb +1 -1
  83. data/lib/tasks/performance_helpers.rb +1 -1
  84. metadata +5 -2
@@ -77,21 +77,22 @@ module Canon
77
77
  # @return [Canon::Xml::Node] Converted node
78
78
  def self.convert_from_node(node, preserve_whitespace: false,
79
79
  parser: nil)
80
- # FAST PATH: Convert Nokogiri/Moxml nodes directly without string round-trip
81
- if defined?(Nokogiri::XML::Node) && node.is_a?(Nokogiri::XML::Node)
82
- return Canon::Xml::DataModel.build_from_nokogiri(
80
+ if Canon::XmlBackend.nokogiri?
81
+ if node.is_a?(Nokogiri::XML::Node)
82
+ return Canon::Xml::DataModel.build_from_nokogiri(
83
+ node, preserve_whitespace: preserve_whitespace
84
+ )
85
+ end
86
+ elsif node.is_a?(Moxml::Node)
87
+ return Canon::Xml::DataModel.build_from_moxml(
83
88
  node, preserve_whitespace: preserve_whitespace
84
89
  )
85
90
  end
86
91
 
87
- # SLOW PATH: Fallback to string serialization for unknown node types
88
- xml_str = if node.respond_to?(:to_xml)
89
- node.to_xml
90
- elsif node.respond_to?(:to_s)
91
- node.to_s
92
+ xml_str = if node.is_a?(String)
93
+ node
92
94
  else
93
- raise Canon::Error,
94
- "Unable to convert node to string: #{node.class}"
95
+ node.to_xml
95
96
  end
96
97
 
97
98
  resolved_parser = parser || resolve_parser_config
@@ -112,7 +113,7 @@ parser: nil)
112
113
  def self.resolve_parser_config
113
114
  Canon::Config.instance.xml.diff.parser
114
115
  rescue StandardError
115
- :sax
116
+ Canon::XmlBackend.nokogiri? ? :sax : :dom
116
117
  end
117
118
  end
118
119
  end
@@ -7,90 +7,62 @@ module Canon
7
7
  #
8
8
  # Handles dispatching comparison logic based on node type.
9
9
  # Supports both Canon::Xml::Node (with symbolic node_type) and
10
- # Moxml/Nokogiri nodes (with predicate methods like element?, text?, etc.)
11
- #
12
- # This module encapsulates the complex node type detection and dispatch
13
- # logic, making the main XmlComparator cleaner and more maintainable.
10
+ # backend nodes (Nokogiri/Moxml) via XmlParsing type checks.
14
11
  module NodeTypeComparator
15
12
  class << self
16
- # Compare two nodes by dispatching to appropriate comparison method
17
- #
18
- # @param node1 [Object] First node
19
- # @param node2 [Object] Second node
20
- # @param comparator [XmlComparator] The comparator instance for method delegation
21
- # @param opts [Hash] Comparison options
22
- # @param child_opts [Hash] Options for child comparison
23
- # @param diff_children [Boolean] Whether to diff children
24
- # @param differences [Array] Array to collect differences
25
- # @return [Integer] Comparison result code
26
13
  def compare(node1, node2, comparator, opts, child_opts,
27
14
  diff_children, differences)
28
- # Dispatch based on node type
29
- # Canon::Xml::Node types use .node_type method that returns symbols
30
- # Nokogiri also has .node_type but returns integers, so check for Symbol
31
- if node1.respond_to?(:node_type) && node2.respond_to?(:node_type) &&
32
- node1.node_type.is_a?(Symbol) && node2.node_type.is_a?(Symbol)
15
+ if node1.is_a?(Canon::Xml::Node) && node2.is_a?(Canon::Xml::Node)
33
16
  compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
34
17
  diff_children, differences)
35
- # Moxml/Nokogiri types use .element?, .text?, etc. methods
36
18
  else
37
- compare_by_predicate_methods(node1, node2, comparator, opts, child_opts,
38
- diff_children, differences)
19
+ compare_by_backend_type(node1, node2, comparator, opts, child_opts,
20
+ diff_children, differences)
39
21
  end
40
22
  end
41
23
 
42
24
  private
43
25
 
44
- # Compare nodes using symbolic node_type (Canon::Xml::Node)
45
26
  def compare_by_symbolic_type(node1, node2, comparator, opts, child_opts,
46
27
  diff_children, differences)
47
28
  case node1.node_type
48
29
  when :root
49
- comparator.send(:compare_children, node1, node2, opts, child_opts,
50
- diff_children, differences)
30
+ comparator.compare_children(node1, node2, opts, child_opts,
31
+ diff_children, differences)
51
32
  when :element
52
- comparator.send(:compare_element_nodes, node1, node2, opts, child_opts,
53
- diff_children, differences)
33
+ comparator.compare_element_nodes(node1, node2, opts, child_opts,
34
+ diff_children, differences)
54
35
  when :text
55
- comparator.send(:compare_text_nodes, node1, node2, opts,
56
- differences)
36
+ comparator.compare_text_nodes(node1, node2, opts, differences)
57
37
  when :comment
58
- comparator.send(:compare_comment_nodes, node1, node2, opts,
59
- differences)
38
+ comparator.compare_comment_nodes(node1, node2, opts, differences)
60
39
  when :cdata
61
- comparator.send(:compare_text_nodes, node1, node2, opts,
62
- differences)
40
+ comparator.compare_text_nodes(node1, node2, opts, differences)
63
41
  when :processing_instruction
64
- comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,
65
- differences)
42
+ comparator.compare_processing_instruction_nodes(node1, node2, opts,
43
+ differences)
66
44
  else
67
45
  Comparison::EQUIVALENT
68
46
  end
69
47
  end
70
48
 
71
- # Compare nodes using predicate methods (Moxml/Nokogiri)
72
- def compare_by_predicate_methods(node1, node2, comparator, opts, child_opts,
73
- diff_children, differences)
74
- if node1.respond_to?(:element?) && node1.element?
75
- comparator.send(:compare_element_nodes, node1, node2, opts, child_opts,
76
- diff_children, differences)
77
- elsif node1.respond_to?(:text?) && node1.text?
78
- comparator.send(:compare_text_nodes, node1, node2, opts,
79
- differences)
80
- elsif node1.respond_to?(:comment?) && node1.comment?
81
- comparator.send(:compare_comment_nodes, node1, node2, opts,
82
- differences)
83
- elsif node1.respond_to?(:cdata?) && node1.cdata?
84
- comparator.send(:compare_text_nodes, node1, node2, opts,
85
- differences)
86
- elsif node1.respond_to?(:processing_instruction?) &&
87
- node1.processing_instruction?
88
- comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,
89
- differences)
90
- elsif node1.respond_to?(:root)
91
- # Document node (Moxml/Nokogiri - legacy path)
92
- comparator.send(:compare_document_nodes, node1, node2, opts, child_opts,
93
- diff_children, differences)
49
+ def compare_by_backend_type(node1, node2, comparator, opts, child_opts,
50
+ diff_children, differences)
51
+ if Canon::XmlParsing.element?(node1)
52
+ comparator.compare_element_nodes(node1, node2, opts, child_opts,
53
+ diff_children, differences)
54
+ elsif Canon::XmlParsing.text_node?(node1)
55
+ comparator.compare_text_nodes(node1, node2, opts, differences)
56
+ elsif Canon::XmlParsing.comment?(node1)
57
+ comparator.compare_comment_nodes(node1, node2, opts, differences)
58
+ elsif Canon::XmlParsing.cdata?(node1)
59
+ comparator.compare_text_nodes(node1, node2, opts, differences)
60
+ elsif Canon::XmlParsing.processing_instruction?(node1)
61
+ comparator.compare_processing_instruction_nodes(node1, node2, opts,
62
+ differences)
63
+ elsif Canon::XmlParsing.document?(node1)
64
+ comparator.compare_document_nodes(node1, node2, opts, child_opts,
65
+ diff_children, differences)
94
66
  else
95
67
  Comparison::EQUIVALENT
96
68
  end
@@ -122,16 +122,8 @@ module Canon
122
122
  preserve_whitespace: preserve_whitespace)
123
123
 
124
124
  # Store original strings for line diff display (before preprocessing)
125
- original1 = if n1.is_a?(String)
126
- n1
127
- else
128
- (n1.respond_to?(:to_xml) ? n1.to_xml : n1.to_s)
129
- end
130
- original2 = if n2.is_a?(String)
131
- n2
132
- else
133
- (n2.respond_to?(:to_xml) ? n2.to_xml : n2.to_s)
134
- end
125
+ original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
126
+ original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
135
127
 
136
128
  differences = []
137
129
  diff_children = opts[:diff_children] || false
@@ -187,16 +179,9 @@ module Canon
187
179
  # @return [Boolean, ComparisonResult] Result of tree diff comparison
188
180
  def perform_semantic_tree_diff(n1, n2, opts, match_opts_hash)
189
181
  # Store original strings for line diff display (before preprocessing)
190
- original1 = if n1.is_a?(String)
191
- n1
192
- else
193
- (n1.respond_to?(:to_xml) ? n1.to_xml : n1.to_s)
194
- end
195
- original2 = if n2.is_a?(String)
196
- n2
197
- else
198
- (n2.respond_to?(:to_xml) ? n2.to_xml : n2.to_s)
199
- end
182
+ # Store original strings for line diff display (before preprocessing)
183
+ original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
184
+ original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
200
185
 
201
186
  # Parse to Canon::Xml::Node (preserves preprocessing)
202
187
  node1 = parse_node(n1, match_opts_hash[:preprocessing])
@@ -262,20 +247,8 @@ module Canon
262
247
  serialize_node(node1).gsub("><", ">\n<"),
263
248
  serialize_node(node2).gsub("><", ">\n<"),
264
249
  ]
265
- original1 = if n1.is_a?(String)
266
- n1
267
- elsif n1.respond_to?(:to_xml)
268
- n1.to_xml
269
- else
270
- n1.to_s
271
- end
272
- original2 = if n2.is_a?(String)
273
- n2
274
- elsif n2.respond_to?(:to_xml)
275
- n2.to_xml
276
- else
277
- n2.to_s
278
- end
250
+ original1 = n1.is_a?(String) ? n1 : serialize_node(n1)
251
+ original2 = n2.is_a?(String) ? n2 : serialize_node(n2)
279
252
 
280
253
  ComparisonResult.new(
281
254
  differences: [],
@@ -289,14 +262,20 @@ module Canon
289
262
 
290
263
  public
291
264
 
265
+ # Public parsing API for external callers
266
+ def parse(node, preprocessing = :none, preserve_whitespace: false)
267
+ parse_node(node, preprocessing,
268
+ preserve_whitespace: preserve_whitespace)
269
+ end
270
+
292
271
  # Main comparison dispatcher
293
272
  def compare_nodes(n1, n2, opts, child_opts, diff_children, differences)
294
273
  # FAST PATH: Object identity - same object is always equivalent
295
274
  return Comparison::EQUIVALENT if n1.equal?(n2)
296
275
 
297
276
  # Handle DocumentFragment nodes - compare their children instead
298
- if n1.is_a?(Nokogiri::XML::DocumentFragment) &&
299
- n2.is_a?(Nokogiri::XML::DocumentFragment)
277
+ if Canon::XmlParsing.document_fragment?(n1) &&
278
+ Canon::XmlParsing.document_fragment?(n2)
300
279
  children1 = n1.children.to_a
301
280
  children2 = n2.children.to_a
302
281
 
@@ -392,8 +371,8 @@ module Canon
392
371
  end
393
372
 
394
373
  # Compare namespace URIs - elements with different namespaces are different elements
395
- ns1 = n1.respond_to?(:namespace_uri) ? n1.namespace_uri : nil
396
- ns2 = n2.respond_to?(:namespace_uri) ? n2.namespace_uri : nil
374
+ ns1 = Canon::XmlParsing.namespace_uri(n1)
375
+ ns2 = Canon::XmlParsing.namespace_uri(n2)
397
376
 
398
377
  unless ns1 == ns2
399
378
  # Create descriptive reason showing the actual namespace URIs
@@ -410,18 +389,30 @@ module Canon
410
389
  return Comparison::UNEQUAL_ELEMENTS
411
390
  end
412
391
 
392
+ # Track the worst result across namespace, attribute, and children
393
+ # comparisons. Do NOT return early on attribute/namespace mismatches —
394
+ # children must still be compared so structural differences in the
395
+ # subtree are reported. Early returns caused the comparator to skip
396
+ # entire subtrees when a root or intermediate element had different
397
+ # attributes, missing all nested structural changes.
398
+ worst_result = Comparison::EQUIVALENT
399
+
413
400
  # Compare namespace declarations (xmlns and xmlns:* attributes)
414
401
  ns_result = compare_namespace_declarations(n1, n2, opts, differences)
415
- return ns_result unless ns_result == Comparison::EQUIVALENT
402
+ worst_result = ns_result unless ns_result == Comparison::EQUIVALENT
416
403
 
417
404
  # Compare attributes
418
405
  attr_result = compare_attribute_sets(n1, n2, opts, differences)
419
- return attr_result unless attr_result == Comparison::EQUIVALENT
406
+ worst_result = attr_result unless attr_result == Comparison::EQUIVALENT
420
407
 
421
408
  # Compare children if not ignored
422
- return Comparison::EQUIVALENT if opts[:ignore_children]
409
+ unless opts[:ignore_children]
410
+ child_result = compare_children(n1, n2, opts, child_opts,
411
+ diff_children, differences)
412
+ worst_result = child_result unless child_result == Comparison::EQUIVALENT
413
+ end
423
414
 
424
- compare_children(n1, n2, opts, child_opts, diff_children, differences)
415
+ worst_result
425
416
  end
426
417
 
427
418
  # Compare attribute sets
@@ -500,7 +491,7 @@ module Canon
500
491
  def should_preserve_whitespace_strictly?(n1, n2, opts)
501
492
  # Check both n1 and n2 - if either is in a preserve whitespace element, preserve strictly
502
493
  [n1, n2].each do |node|
503
- next unless node.respond_to?(:parent)
494
+ next unless Canon::XmlParsing.xml_node?(node) || node.is_a?(Canon::Xml::Node)
504
495
 
505
496
  parent = node.parent
506
497
  next unless parent
@@ -516,15 +507,12 @@ module Canon
516
507
  # Check if a node is inside a whitespace-preserving element
517
508
  def in_preserve_element?(node, preserve_list)
518
509
  current = node.parent
519
- while current.respond_to?(:name)
510
+ while Canon::XmlParsing.xml_node?(current) || current.is_a?(Canon::Xml::Node)
520
511
  return true if preserve_list.include?(current.name.downcase)
521
512
 
522
- # Stop at document root
523
- break if current.is_a?(Nokogiri::XML::Document) ||
524
- current.is_a?(Nokogiri::HTML4::Document) ||
525
- current.is_a?(Nokogiri::HTML5::Document)
513
+ break if Canon::XmlParsing.document?(current)
526
514
 
527
- current = current.parent if current.respond_to?(:parent)
515
+ current = current.parent
528
516
  break unless current
529
517
  end
530
518
  false
@@ -567,8 +555,8 @@ module Canon
567
555
  return Comparison::UNEQUAL_NODES_TYPES
568
556
  end
569
557
 
570
- content1 = n1.respond_to?(:content) ? n1.content.to_s.strip : ""
571
- content2 = n2.respond_to?(:content) ? n2.content.to_s.strip : ""
558
+ content1 = Canon::XmlParsing.xml_node?(n1) ? n1.content.to_s.strip : ""
559
+ content2 = Canon::XmlParsing.xml_node?(n2) ? n2.content.to_s.strip : ""
572
560
 
573
561
  if content1 == content2
574
562
  Comparison::EQUIVALENT
@@ -618,17 +606,19 @@ differences)
618
606
  depth = 0
619
607
 
620
608
  while current && depth < max_depth
621
- if current.respond_to?(:name) && current.name
622
- path.unshift(current.name)
623
- end
609
+ n = if current.is_a?(Canon::Xml::Node)
610
+ current.name
611
+ elsif Canon::XmlParsing.xml_node?(current)
612
+ current.name
613
+ end
614
+ path.unshift(n) if n
624
615
 
625
- break unless current.respond_to?(:parent)
616
+ break unless Canon::XmlParsing.xml_node?(current) || current.is_a?(Canon::Xml::Node)
626
617
 
627
618
  current = current.parent
628
619
  depth += 1
629
620
 
630
- # Stop at document root
631
- break if current.respond_to?(:root)
621
+ break if Canon::XmlParsing.document?(current)
632
622
  end
633
623
 
634
624
  path
@@ -665,8 +655,8 @@ differences)
665
655
  # For deleted/inserted nodes, include namespace information if available
666
656
  if dimension == :text_content && (node1.nil? || node2.nil?)
667
657
  node = node1 || node2
668
- if node.respond_to?(:name) && node.respond_to?(:namespace_uri)
669
- ns = node.namespace_uri
658
+ if Canon::XmlParsing.xml_node?(node)
659
+ ns = Canon::XmlParsing.namespace_uri(node)
670
660
  ns_info = if ns.nil? || ns.empty?
671
661
  ""
672
662
  else
@@ -674,9 +664,8 @@ differences)
674
664
  end
675
665
  label = Canon::Comparison.code_pair_label(diff1, diff2)
676
666
  return "element '#{node.name}'#{ns_info}: #{label}"
677
- elsif node.respond_to?(:name) && !node.respond_to?(:namespace_uri)
678
- # TextNode and other nodes without namespace_uri
679
- display = if node.respond_to?(:value) && node.node_type == :text
667
+ elsif node.is_a?(Canon::Xml::Node)
668
+ display = if node.is_a?(Canon::Xml::Nodes::TextNode)
680
669
  "\"#{truncate_text(node.value)}\""
681
670
  else
682
671
  node.name.to_s
@@ -726,8 +715,8 @@ differences)
726
715
  elsif dimension == :element_structure &&
727
716
  diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
728
717
  diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
729
- (node1.is_a?(Canon::Xml::Node) || node1.is_a?(Nokogiri::XML::Node)) &&
730
- (node2.is_a?(Canon::Xml::Node) || node2.is_a?(Nokogiri::XML::Node)) &&
718
+ (node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
719
+ (node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
731
720
  node1.name && node2.name && node1.name != node2.name
732
721
  # Most common case: differing element names. Surface the
733
722
  # actual names rather than a generic "elements differ".
@@ -798,27 +787,16 @@ differences)
798
787
  # @return [String, nil] Text content or nil
799
788
  def extract_text_from_node(node)
800
789
  return nil if node.nil?
801
-
802
- # For Canon::Xml::Nodes::TextNode
803
- return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
804
-
805
- # For XML/HTML nodes with text_content method
806
- return node.text_content if node.respond_to?(:text_content)
807
-
808
- # For nodes with text method
809
- return node.text if node.respond_to?(:text)
810
-
811
- # For nodes with content method (Moxml::Text)
812
- return node.content if node.respond_to?(:content)
813
-
814
- # For nodes with value method (other types)
815
- return node.value if node.respond_to?(:value)
816
-
817
- # For simple text nodes or strings
818
790
  return node.to_s if node.is_a?(String)
819
791
 
820
- # For other node types, try to_s
821
- node.to_s
792
+ case node
793
+ when Canon::Xml::Nodes::TextNode
794
+ node.value
795
+ when Canon::Xml::Node
796
+ node.text_content
797
+ else
798
+ Canon::XmlParsing.xml_node?(node) ? Canon::XmlParsing.text_content(node).to_s : node.to_s
799
+ end
822
800
  rescue StandardError
823
801
  nil
824
802
  end
@@ -27,8 +27,8 @@ module Canon
27
27
  def self.compare_nodes(node1, node2, opts, child_opts, diff_children,
28
28
  differences)
29
29
  # Handle DocumentFragment nodes - compare their children instead
30
- if node1.is_a?(Nokogiri::XML::DocumentFragment) &&
31
- node2.is_a?(Nokogiri::XML::DocumentFragment)
30
+ if Canon::XmlParsing.document_fragment?(node1) &&
31
+ Canon::XmlParsing.document_fragment?(node2)
32
32
  return compare_document_fragments(node1, node2, opts, child_opts,
33
33
  diff_children, differences)
34
34
  end
@@ -285,10 +285,14 @@ diff_children, differences)
285
285
  return false if node1.class != node2.class
286
286
 
287
287
  case node1
288
- when Canon::Xml::Node, Nokogiri::XML::Node
288
+ when Canon::Xml::Node
289
289
  node1.node_type == node2.node_type
290
290
  else
291
- true
291
+ if Canon::XmlBackend.nokogiri?
292
+ node1.is_a?(Nokogiri::XML::Node) && node1.node_type == node2.node_type
293
+ else
294
+ Canon::XmlParsing.xml_node?(node1) && Canon::XmlParsing.node_type(node1) == Canon::XmlParsing.node_type(node2)
295
+ end
292
296
  end
293
297
  end
294
298
 
@@ -305,7 +309,7 @@ diff_children, differences)
305
309
  def self.comment_node?(node, check_children: false)
306
310
  return true if NodeInspector.comment_node?(node)
307
311
 
308
- if check_children && node.is_a?(Nokogiri::XML::Element) && !node.children.empty?
312
+ if check_children && Canon::XmlParsing.element?(node) && !Canon::XmlParsing.children(node).empty?
309
313
  node.children.any? { |child| NodeInspector.comment_node?(child) }
310
314
  else
311
315
  false
@@ -360,24 +364,20 @@ diff_children, differences)
360
364
  # Dispatch by legacy Nokogiri/Moxml node type
361
365
  def self.dispatch_legacy_node_type(node1, node2, opts, child_opts,
362
366
  diff_children, differences)
363
- # Import XmlComparator to use its comparison methods
364
367
  require_relative "xml_comparator"
365
368
 
366
- case node1
367
- when Nokogiri::XML::Document
369
+ if Canon::XmlParsing.document?(node1)
368
370
  XmlComparator.compare_document_nodes(node1, node2, opts, child_opts,
369
371
  diff_children, differences)
370
- when Nokogiri::XML::Node
371
- if node1.element?
372
+ elsif Canon::XmlParsing.xml_node?(node1)
373
+ if Canon::XmlParsing.element?(node1)
372
374
  XmlComparator.compare_element_nodes(node1, node2, opts, child_opts,
373
375
  diff_children, differences)
374
- elsif node1.text?
376
+ elsif Canon::XmlParsing.text_node?(node1) || Canon::XmlParsing.cdata?(node1)
375
377
  XmlComparator.compare_text_nodes(node1, node2, opts, differences)
376
- elsif node1.comment?
378
+ elsif Canon::XmlParsing.comment?(node1)
377
379
  XmlComparator.compare_comment_nodes(node1, node2, opts, differences)
378
- elsif node1.cdata?
379
- XmlComparator.compare_text_nodes(node1, node2, opts, differences)
380
- elsif node1.processing_instruction?
380
+ elsif Canon::XmlParsing.processing_instruction?(node1)
381
381
  XmlComparator.compare_processing_instruction_nodes(node1, node2,
382
382
  opts, differences)
383
383
  else
@@ -27,6 +27,14 @@ module Canon
27
27
  }.freeze
28
28
 
29
29
  class << self
30
+ # Parse YAML from string or return as-is
31
+ #
32
+ # @param obj [String, Hash, Array] YAML string or parsed object
33
+ # @return [Object] Parsed YAML object
34
+ def parse(obj)
35
+ parse_yaml(obj)
36
+ end
37
+
30
38
  # Compare two YAML objects for equivalence
31
39
  #
32
40
  # @param yaml1 [String, Hash, Array] First YAML
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "moxml"
4
- require "nokogiri"
4
+ require "nokogiri" if Canon::XmlBackend.nokogiri?
5
5
  require_relative "xml/whitespace_normalizer"
6
6
  require_relative "comparison/xml_comparator"
7
7
  require_relative "comparison/html_comparator"
@@ -316,7 +316,8 @@ module Canon
316
316
 
317
317
  # Get global config options if not defined in opts
318
318
  # This is needed because semantic_diff doesn't go through dom_diff's config handling
319
- if !(opts[:match_profile] || opts[:global_options]) && Canon::Config.instance.respond_to?(format1)
319
+ if !(opts[:match_profile] || opts[:global_options]) && %i[xml html json
320
+ yaml string].include?(format1)
320
321
  format_config = Canon::Config.instance.public_send(format1)
321
322
  if format_config.match.profile
322
323
  opts[:match_profile] =
@@ -333,7 +334,8 @@ module Canon
333
334
 
334
335
  # Also read diff options from config (e.g., max_node_count for large documents)
335
336
  # This is independent of match options and needs to be passed to TreeDiffIntegrator
336
- if !match_opts_hash[:max_node_count] && Canon::Config.instance.respond_to?(format1)
337
+ if !match_opts_hash[:max_node_count] && %i[xml html json yaml
338
+ string].include?(format1)
337
339
  diff_max_node = Canon::Config.instance.public_send(format1).diff.max_node_count
338
340
  if diff_max_node > 10_000
339
341
  match_opts_hash[:max_node_count] =
@@ -564,43 +566,39 @@ module Canon
564
566
 
565
567
  case format
566
568
  when :xml
567
- # Delegate to XmlComparator's parse_node - returns Canon::Xml::Node
568
- # Adapter now handles Canon::Xml::Node directly
569
+ # Delegate to XmlComparator's parse - returns Canon::Xml::Node
569
570
  doc1 = parse_with_cache(obj1, format, preprocessing) do |doc|
570
- XmlComparator.send(:parse_node, doc, preprocessing)
571
+ XmlComparator.parse(doc, preprocessing)
571
572
  end
572
573
  doc2 = parse_with_cache(obj2, format, preprocessing) do |doc|
573
- XmlComparator.send(:parse_node, doc, preprocessing)
574
+ XmlComparator.parse(doc, preprocessing)
574
575
  end
575
576
  [doc1, doc2]
576
577
  when :html, :html4, :html5
577
- # Delegate to HtmlComparator's parse_node_for_semantic for Canon::Xml::Node
578
578
  [
579
579
  parse_with_cache(obj1, format, preprocessing) do |doc|
580
- HtmlComparator.send(:parse_node_for_semantic, doc, preprocessing)
580
+ HtmlComparator.parse(doc, preprocessing)
581
581
  end,
582
582
  parse_with_cache(obj2, format, preprocessing) do |doc|
583
- HtmlComparator.send(:parse_node_for_semantic, doc, preprocessing)
583
+ HtmlComparator.parse(doc, preprocessing)
584
584
  end,
585
585
  ]
586
586
  when :json
587
- # Delegate to JsonComparator's parse_json
588
587
  [
589
588
  parse_with_cache(obj1, format, :none) do |doc|
590
- JsonComparator.send(:parse_json, doc)
589
+ JsonComparator.parse(doc)
591
590
  end,
592
591
  parse_with_cache(obj2, format, :none) do |doc|
593
- JsonComparator.send(:parse_json, doc)
592
+ JsonComparator.parse(doc)
594
593
  end,
595
594
  ]
596
595
  when :yaml
597
- # Delegate to YamlComparator's parse_yaml
598
596
  [
599
597
  parse_with_cache(obj1, format, :none) do |doc|
600
- YamlComparator.send(:parse_yaml, doc)
598
+ YamlComparator.parse(doc)
601
599
  end,
602
600
  parse_with_cache(obj2, format, :none) do |doc|
603
- YamlComparator.send(:parse_yaml, doc)
601
+ YamlComparator.parse(doc)
604
602
  end,
605
603
  ]
606
604
  else
@@ -651,12 +649,10 @@ module Canon
651
649
  obj
652
650
  when Nokogiri::XML::Document, Nokogiri::HTML::Document,
653
651
  Nokogiri::XML::DocumentFragment, Nokogiri::HTML::DocumentFragment
654
- obj.respond_to?(:to_html) ? obj.to_html : obj.to_xml
652
+ obj.to_html
655
653
  else
656
- if obj.respond_to?(:to_html)
657
- obj.to_html
658
- elsif obj.respond_to?(:to_xml)
659
- obj.to_xml
654
+ if Canon::XmlParsing.xml_node?(obj) || obj.is_a?(Canon::Xml::Node)
655
+ Canon::XmlParsing.serialize(obj)
660
656
  else
661
657
  obj.to_s
662
658
  end
@@ -667,7 +663,11 @@ module Canon
667
663
  def serialize_document(doc, format)
668
664
  case format
669
665
  when :xml, :html, :html4, :html5
670
- doc.respond_to?(:to_html) ? doc.to_html : doc.to_xml
666
+ if Canon::XmlParsing.xml_node?(doc) || doc.is_a?(Canon::Xml::Node)
667
+ Canon::XmlParsing.serialize(doc)
668
+ else
669
+ doc.to_s
670
+ end
671
671
  when :json
672
672
  require "json"
673
673
  JSON.pretty_generate(doc)
@@ -750,7 +750,7 @@ module Canon
750
750
 
751
751
  # get match_profile if it is not defined in options
752
752
  # but defined in config
753
- if Canon::Config.instance.respond_to?(comparison_format)
753
+ if %i[xml html json yaml string].include?(comparison_format)
754
754
  format_config = Canon::Config.instance.public_send(comparison_format)
755
755
  if opts[:global_profile].nil? && format_config.match.profile
756
756
  # Config-sourced profile has *global* priority (applied before