canon 0.2.8 → 0.2.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec-opal +7 -0
- data/.rubocop_todo.yml +25 -73
- data/Rakefile +37 -0
- data/lib/canon/cache.rb +16 -27
- data/lib/canon/cli.rb +1 -1
- data/lib/canon/color_detector.rb +3 -5
- data/lib/canon/comparison/compare_profile.rb +1 -4
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
- data/lib/canon/comparison/format_detector.rb +29 -20
- data/lib/canon/comparison/html_comparator.rb +20 -29
- data/lib/canon/comparison/html_compare_profile.rb +3 -10
- data/lib/canon/comparison/html_parser.rb +1 -1
- data/lib/canon/comparison/json_comparator.rb +8 -0
- data/lib/canon/comparison/node_inspector.rb +117 -86
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
- data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
- data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +19 -2
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +40 -8
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
- data/lib/canon/comparison/xml_comparator/node_parser.rb +14 -13
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
- data/lib/canon/comparison/xml_comparator.rb +63 -85
- data/lib/canon/comparison/xml_node_comparison.rb +15 -15
- data/lib/canon/comparison/yaml_comparator.rb +8 -0
- data/lib/canon/comparison.rb +24 -24
- data/lib/canon/config/profile_loader.rb +13 -13
- data/lib/canon/config.rb +29 -5
- data/lib/canon/diff/diff_classifier.rb +7 -41
- data/lib/canon/diff/diff_line.rb +1 -1
- data/lib/canon/diff/diff_line_builder.rb +2 -0
- data/lib/canon/diff/diff_node_enricher.rb +22 -24
- data/lib/canon/diff/diff_node_mapper.rb +10 -8
- data/lib/canon/diff/formatting_detector.rb +3 -2
- data/lib/canon/diff/node_serializer.rb +23 -30
- data/lib/canon/diff/path_builder.rb +24 -37
- data/lib/canon/diff/source_locator.rb +0 -3
- data/lib/canon/diff/xml_serialization_formatter.rb +8 -84
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +23 -17
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +127 -11
- data/lib/canon/diff_formatter/by_object_formatter.rb +2 -6
- data/lib/canon/diff_formatter/debug_output.rb +12 -24
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +3 -3
- data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +26 -27
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
- data/lib/canon/diff_formatter/legend.rb +2 -2
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
- data/lib/canon/diff_formatter/theme.rb +4 -4
- data/lib/canon/diff_formatter.rb +17 -13
- data/lib/canon/formatters/html_formatter.rb +1 -1
- data/lib/canon/formatters/html_formatter_base.rb +1 -1
- data/lib/canon/formatters/xml_formatter.rb +7 -32
- data/lib/canon/html/data_model.rb +2 -2
- data/lib/canon/pretty_printer/html.rb +1 -1
- data/lib/canon/pretty_printer/xml.rb +16 -7
- data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
- data/lib/canon/rspec_matchers.rb +2 -2
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +1 -3
- data/lib/canon/tree_diff/operation_converter.rb +7 -7
- data/lib/canon/tree_diff/operations/operation_detector.rb +4 -0
- data/lib/canon/validators/base_validator.rb +5 -8
- data/lib/canon/validators/html_validator.rb +3 -8
- data/lib/canon/validators/xml_validator.rb +3 -8
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +132 -138
- data/lib/canon/xml/namespace_helper.rb +5 -0
- data/lib/canon/xml/node.rb +2 -1
- data/lib/canon/xml/nodes/root_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +6 -1
- data/lib/canon/xml/sax_builder.rb +5 -7
- data/lib/canon/xml/whitespace_normalizer.rb +2 -2
- data/lib/canon/xml_backend.rb +49 -0
- data/lib/canon/xml_parsing.rb +283 -0
- data/lib/canon.rb +3 -1
- data/lib/tasks/benchmark_runner.rb +1 -1
- data/lib/tasks/performance_helpers.rb +1 -1
- metadata +9 -6
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "nokogiri"
|
|
3
|
+
require "nokogiri" unless RUBY_ENGINE == "opal"
|
|
4
4
|
require_relative "../comparison" # Load base module with constants first
|
|
5
5
|
require_relative "markup_comparator"
|
|
6
6
|
require_relative "xml_comparator"
|
|
@@ -167,6 +167,11 @@ module Canon
|
|
|
167
167
|
end
|
|
168
168
|
end
|
|
169
169
|
|
|
170
|
+
# Public parsing API for external callers
|
|
171
|
+
def parse(html, preprocessing = :none)
|
|
172
|
+
parse_node_for_semantic(html, preprocessing)
|
|
173
|
+
end
|
|
174
|
+
|
|
170
175
|
private
|
|
171
176
|
|
|
172
177
|
# Check if both nodes are document fragments
|
|
@@ -337,13 +342,10 @@ module Canon
|
|
|
337
342
|
# Convert to string if needed
|
|
338
343
|
html_string = if html.is_a?(String)
|
|
339
344
|
html
|
|
340
|
-
elsif
|
|
345
|
+
elsif Canon::XmlParsing.xml_node?(html)
|
|
341
346
|
html.to_html
|
|
342
|
-
elsif html.respond_to?(:to_s)
|
|
343
|
-
html.to_s
|
|
344
347
|
else
|
|
345
|
-
|
|
346
|
-
"Unable to convert HTML to string: #{html.class}"
|
|
348
|
+
html.to_s
|
|
347
349
|
end
|
|
348
350
|
|
|
349
351
|
# Strip DOCTYPE for consistent parsing
|
|
@@ -492,22 +494,18 @@ module Canon
|
|
|
492
494
|
end
|
|
493
495
|
|
|
494
496
|
def find_and_normalize_style_script(node)
|
|
495
|
-
return unless node.
|
|
497
|
+
return unless node.is_a?(Canon::Xml::Node)
|
|
496
498
|
|
|
497
499
|
node.children.each do |child|
|
|
498
500
|
next unless child.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
499
501
|
|
|
500
502
|
# If this is a style or script element, normalize its text content
|
|
501
503
|
if %w[style script].include?(child.name.downcase)
|
|
502
|
-
# Get text children and remove HTML comments from them
|
|
503
504
|
child.children.each do |text_child|
|
|
504
505
|
next unless text_child.is_a?(Canon::Xml::Nodes::TextNode)
|
|
505
506
|
|
|
506
|
-
# Remove HTML comments from text content without using regex
|
|
507
|
-
# to avoid ReDoS/incomplete sanitization vulnerabilities
|
|
508
507
|
normalized = remove_html_comments(text_child.value)
|
|
509
|
-
|
|
510
|
-
text_child.instance_variable_set(:@value, normalized)
|
|
508
|
+
text_child.value = normalized
|
|
511
509
|
end
|
|
512
510
|
end
|
|
513
511
|
|
|
@@ -560,6 +558,8 @@ module Canon
|
|
|
560
558
|
end
|
|
561
559
|
end
|
|
562
560
|
|
|
561
|
+
public :detect_html_version
|
|
562
|
+
|
|
563
563
|
# Detect HTML version from node
|
|
564
564
|
#
|
|
565
565
|
# @param node [Canon::Xml::Node, Nokogiri::XML::Node] HTML node
|
|
@@ -584,13 +584,10 @@ module Canon
|
|
|
584
584
|
# @param node [Canon::Xml::Node, Nokogiri::HTML::Document] Parsed node
|
|
585
585
|
# @return [String] Serialized HTML string
|
|
586
586
|
def serialize_for_display(node)
|
|
587
|
-
# Use XmlNodeComparison's serializer for Canon::Xml::Node
|
|
588
587
|
if node.is_a?(Canon::Xml::Node)
|
|
589
588
|
XmlNodeComparison.serialize_node_to_xml(node)
|
|
590
|
-
elsif
|
|
591
|
-
node.to_html
|
|
592
|
-
elsif node.respond_to?(:to_xml)
|
|
593
|
-
node.to_xml
|
|
589
|
+
elsif Canon::XmlParsing.xml_node?(node)
|
|
590
|
+
Canon::XmlBackend.nokogiri? ? node.to_html : Canon::XmlParsing.serialize(node)
|
|
594
591
|
else
|
|
595
592
|
node.to_s
|
|
596
593
|
end
|
|
@@ -605,16 +602,11 @@ module Canon
|
|
|
605
602
|
if html.is_a?(String)
|
|
606
603
|
html
|
|
607
604
|
elsif html.is_a?(Canon::Xml::Node)
|
|
608
|
-
# Serialize Canon nodes to string
|
|
609
605
|
Canon::Xml::DataModel.serialize(html)
|
|
610
|
-
elsif
|
|
611
|
-
|
|
612
|
-
html.to_html
|
|
613
|
-
elsif html.respond_to?(:to_s)
|
|
614
|
-
html.to_s
|
|
606
|
+
elsif Canon::XmlParsing.xml_node?(html)
|
|
607
|
+
Canon::XmlBackend.nokogiri? ? html.to_html : html.to_s
|
|
615
608
|
else
|
|
616
|
-
|
|
617
|
-
"Unable to extract original string from: #{html.class}"
|
|
609
|
+
html.to_s
|
|
618
610
|
end
|
|
619
611
|
end
|
|
620
612
|
|
|
@@ -727,11 +719,10 @@ compare_profile = nil)
|
|
|
727
719
|
# Check if any ancestor of the given node preserves whitespace
|
|
728
720
|
def ancestor_preserves_whitespace?(node, preserve_list)
|
|
729
721
|
current = node
|
|
730
|
-
while current.
|
|
722
|
+
while current.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(current)
|
|
731
723
|
return true if preserve_list.include?(current.name.downcase)
|
|
732
724
|
|
|
733
|
-
|
|
734
|
-
break if current.is_a?(Nokogiri::XML::Document)
|
|
725
|
+
break if Canon::XmlParsing.document?(current)
|
|
735
726
|
|
|
736
727
|
current = current.parent
|
|
737
728
|
end
|
|
@@ -811,7 +802,7 @@ compare_profile = nil)
|
|
|
811
802
|
end
|
|
812
803
|
|
|
813
804
|
# Check if it's a fragment that contains XML processing instructions
|
|
814
|
-
if node.
|
|
805
|
+
if (node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)) && node.children.any? do |child|
|
|
815
806
|
child.is_a?(Nokogiri::XML::ProcessingInstruction) &&
|
|
816
807
|
child.name == "xml"
|
|
817
808
|
end
|
|
@@ -48,9 +48,8 @@ module Canon
|
|
|
48
48
|
|
|
49
49
|
# If key exists, check if it's :strict
|
|
50
50
|
return match_options[:comments] == :strict
|
|
51
|
-
elsif match_options.
|
|
51
|
+
elsif match_options.is_a?(ResolvedMatchOptions)
|
|
52
52
|
behavior = behavior_for(dimension)
|
|
53
|
-
# In HTML, only :strict makes comments affect equivalence
|
|
54
53
|
return behavior == :strict
|
|
55
54
|
end
|
|
56
55
|
# Default: comments don't affect equivalence in HTML
|
|
@@ -106,14 +105,8 @@ module Canon
|
|
|
106
105
|
def has_explicit_option?(dimension)
|
|
107
106
|
if match_options.is_a?(Hash)
|
|
108
107
|
match_options.key?(dimension)
|
|
109
|
-
elsif match_options.
|
|
110
|
-
|
|
111
|
-
begin
|
|
112
|
-
match_options[dimension]
|
|
113
|
-
true
|
|
114
|
-
rescue StandardError
|
|
115
|
-
false
|
|
116
|
-
end
|
|
108
|
+
elsif match_options.is_a?(ResolvedMatchOptions)
|
|
109
|
+
!match_options.options[dimension].nil?
|
|
117
110
|
else
|
|
118
111
|
false
|
|
119
112
|
end
|
|
@@ -26,6 +26,14 @@ module Canon
|
|
|
26
26
|
}.freeze
|
|
27
27
|
|
|
28
28
|
class << self
|
|
29
|
+
# Parse JSON from string or return as-is
|
|
30
|
+
#
|
|
31
|
+
# @param obj [String, Hash, Array] JSON string or parsed object
|
|
32
|
+
# @return [Object] Parsed JSON object
|
|
33
|
+
def parse(obj)
|
|
34
|
+
parse_json(obj)
|
|
35
|
+
end
|
|
36
|
+
|
|
29
37
|
# Compare two JSON objects for equivalence
|
|
30
38
|
#
|
|
31
39
|
# @param json1 [String, Hash, Array] First JSON
|
|
@@ -4,94 +4,76 @@ module Canon
|
|
|
4
4
|
module Comparison
|
|
5
5
|
# Single source of truth for cross-backend node type operations.
|
|
6
6
|
#
|
|
7
|
-
# The comparison pipeline handles nodes from
|
|
7
|
+
# The comparison pipeline handles nodes from multiple sources:
|
|
8
8
|
# * Canon::Xml::Node (+ RootNode, ElementNode, TextNode, etc.) —
|
|
9
9
|
# custom DOM built by SAX builder and DataModel.
|
|
10
|
-
# *
|
|
11
|
-
#
|
|
10
|
+
# * Canon::TreeDiff::Core::TreeNode — semantic tree diff nodes.
|
|
11
|
+
# * Backend-specific nodes (Nokogiri or Moxml) — live parsed nodes.
|
|
12
12
|
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
13
|
+
# Architecture: NodeInspector handles Canon-native types (Canon::Xml::Node,
|
|
14
|
+
# TreeNode) directly, then delegates ALL backend-specific queries to
|
|
15
|
+
# XmlParsing. No Moxml/Nokogiri constants are referenced here — that
|
|
16
|
+
# knowledge lives exclusively in XmlParsing.
|
|
15
17
|
module NodeInspector
|
|
16
|
-
|
|
17
|
-
NOKOGIRI_TEXT_TYPE = defined?(Nokogiri::XML::Node::TEXT_NODE) ? Nokogiri::XML::Node::TEXT_NODE : 3
|
|
18
|
+
# --- Type predicates ---
|
|
18
19
|
|
|
19
|
-
# True when +node+ is a text node (whitespace, content, etc.).
|
|
20
20
|
def self.text_node?(node)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
node.node_type == CANON_TEXT_TYPE
|
|
24
|
-
when Nokogiri::XML::Node
|
|
25
|
-
node.node_type == NOKOGIRI_TEXT_TYPE
|
|
26
|
-
else
|
|
27
|
-
false
|
|
28
|
-
end
|
|
29
|
-
end
|
|
21
|
+
return false unless node
|
|
22
|
+
return node.node_type == :text if node.is_a?(Canon::Xml::Node)
|
|
30
23
|
|
|
31
|
-
|
|
32
|
-
def self.text_content(node)
|
|
33
|
-
case node
|
|
34
|
-
when Canon::Xml::Node
|
|
35
|
-
node.value.to_s
|
|
36
|
-
when Nokogiri::XML::Node
|
|
37
|
-
node.content.to_s
|
|
38
|
-
else
|
|
39
|
-
node.to_s
|
|
40
|
-
end
|
|
24
|
+
XmlParsing.text_node?(node)
|
|
41
25
|
end
|
|
42
26
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def self.whitespace_only_text?(node)
|
|
47
|
-
return false unless text_node?(node)
|
|
27
|
+
def self.element_node?(node)
|
|
28
|
+
return false unless node
|
|
29
|
+
return node.node_type == :element if node.is_a?(Canon::Xml::Node)
|
|
48
30
|
|
|
49
|
-
|
|
50
|
-
!text.empty? && text.strip.empty?
|
|
31
|
+
XmlParsing.element?(node)
|
|
51
32
|
end
|
|
52
33
|
|
|
53
|
-
# True when +node+ is a comment node.
|
|
54
|
-
# For HTML, also detects comments that Nokogiri parses as TEXT nodes
|
|
55
|
-
# (content like "<!-- comment -->" or escaped "<\\!-- comment -->").
|
|
56
34
|
def self.comment_node?(node)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
return true if node.comment?
|
|
35
|
+
return false unless node
|
|
36
|
+
return node.node_type == :comment if node.is_a?(Canon::Xml::Node)
|
|
37
|
+
|
|
38
|
+
if XmlBackend.nokogiri?
|
|
39
|
+
return true if node.is_a?(Nokogiri::XML::Node) && node.comment?
|
|
62
40
|
|
|
63
41
|
# HTML comments are parsed as TEXT nodes by Nokogiri
|
|
64
|
-
if node.text?
|
|
42
|
+
if node.is_a?(Nokogiri::XML::Node) && node.text?
|
|
65
43
|
text_stripped = text_content(node).to_s.strip.gsub("\\", "")
|
|
66
44
|
return true if text_stripped.start_with?("<!--") && text_stripped.end_with?("-->")
|
|
67
45
|
end
|
|
68
46
|
false
|
|
69
47
|
else
|
|
70
|
-
|
|
48
|
+
XmlParsing.comment?(node)
|
|
71
49
|
end
|
|
72
50
|
end
|
|
73
51
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
node.node_type == :element
|
|
79
|
-
when Nokogiri::XML::Node
|
|
80
|
-
node.element?
|
|
81
|
-
else
|
|
82
|
-
false
|
|
83
|
-
end
|
|
52
|
+
def self.document?(node)
|
|
53
|
+
return node.node_type == :root if node.is_a?(Canon::Xml::Node)
|
|
54
|
+
|
|
55
|
+
XmlParsing.document?(node)
|
|
84
56
|
end
|
|
85
57
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
#
|
|
94
|
-
#
|
|
58
|
+
def self.document_fragment?(node)
|
|
59
|
+
return false unless node
|
|
60
|
+
return false unless node.is_a?(Canon::Xml::Nodes::RootNode)
|
|
61
|
+
|
|
62
|
+
node.fragment?
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# True when +node+ is a text node whose content is whitespace-only.
|
|
66
|
+
# Empty-string text nodes return false — those represent genuine
|
|
67
|
+
# empty-vs-content asymmetry, not pretty-print indentation.
|
|
68
|
+
def self.whitespace_only_text?(node)
|
|
69
|
+
return false unless text_node?(node)
|
|
70
|
+
|
|
71
|
+
text = text_content(node)
|
|
72
|
+
!text.empty? && text.strip.empty?
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# --- Noise classification ---
|
|
76
|
+
|
|
95
77
|
def self.noise_dimension_for(node)
|
|
96
78
|
if whitespace_only_text?(node)
|
|
97
79
|
:whitespace_adjacency
|
|
@@ -100,37 +82,86 @@ module Canon
|
|
|
100
82
|
end
|
|
101
83
|
end
|
|
102
84
|
|
|
103
|
-
# True when +node+ is a noise node (whitespace-only text or comment).
|
|
104
|
-
# Convenience wrapper around +noise_dimension_for+.
|
|
105
|
-
#
|
|
106
|
-
# @param node [Object] DOM node to check
|
|
107
|
-
# @return [Boolean]
|
|
108
85
|
def self.noise_node?(node)
|
|
109
86
|
!noise_dimension_for(node).nil?
|
|
110
87
|
end
|
|
111
88
|
|
|
112
|
-
#
|
|
113
|
-
|
|
114
|
-
def self.
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
89
|
+
# --- Node queries ---
|
|
90
|
+
|
|
91
|
+
def self.name(node)
|
|
92
|
+
return nil unless node
|
|
93
|
+
return node.name if node.is_a?(Canon::Xml::Node)
|
|
94
|
+
return node.label if node.is_a?(Canon::TreeDiff::Core::TreeNode)
|
|
95
|
+
|
|
96
|
+
XmlParsing.name(node)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def self.parent(node)
|
|
100
|
+
return nil unless node
|
|
101
|
+
return node.parent if node.is_a?(Canon::Xml::Node)
|
|
102
|
+
return node.parent if node.is_a?(Canon::TreeDiff::Core::TreeNode)
|
|
103
|
+
|
|
104
|
+
XmlParsing.parent(node)
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
def self.children(node)
|
|
108
|
+
return [] unless node
|
|
109
|
+
return node.children if node.is_a?(Canon::Xml::Node)
|
|
110
|
+
return node.children || [] if node.is_a?(Canon::TreeDiff::Core::TreeNode)
|
|
111
|
+
|
|
112
|
+
XmlParsing.children(node)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def self.text_content(node)
|
|
116
|
+
return node.value.to_s if node.is_a?(Canon::Xml::Nodes::TextNode)
|
|
117
|
+
return node.text_content.to_s if node.is_a?(Canon::Xml::Node)
|
|
118
|
+
|
|
119
|
+
XmlParsing.text_content(node).to_s
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
def self.node_type(node)
|
|
123
|
+
return nil unless node
|
|
124
|
+
return node.node_type if node.is_a?(Canon::Xml::Node)
|
|
125
|
+
return node.type&.to_sym if node.is_a?(Canon::TreeDiff::Core::TreeNode)
|
|
126
|
+
|
|
127
|
+
XmlParsing.node_type(node)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
def self.attribute_value(node, attr_name)
|
|
131
|
+
return nil unless node
|
|
132
|
+
|
|
133
|
+
if node.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
134
|
+
attr = node.attribute_nodes.find { |a| a.name == attr_name.to_s }
|
|
135
|
+
attr&.value
|
|
136
|
+
elsif node.is_a?(Canon::Xml::Node)
|
|
137
|
+
nil
|
|
123
138
|
else
|
|
124
|
-
|
|
139
|
+
XmlParsing.attribute_value(node, attr_name)
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
def self.namespace_uri(node)
|
|
144
|
+
return nil unless node
|
|
145
|
+
|
|
146
|
+
if node.is_a?(Canon::Xml::Node)
|
|
147
|
+
node.is_a?(Canon::Xml::Nodes::ElementNode) ? node.namespace_uri : nil
|
|
148
|
+
else
|
|
149
|
+
XmlParsing.namespace_uri(node)
|
|
125
150
|
end
|
|
126
151
|
end
|
|
127
152
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
node.
|
|
153
|
+
def self.parse_errors(node)
|
|
154
|
+
return [] if node.nil?
|
|
155
|
+
return Array(node.parse_errors).map(&:to_s) if node.is_a?(Canon::Xml::Node)
|
|
156
|
+
|
|
157
|
+
if XmlBackend.nokogiri?
|
|
158
|
+
if node.is_a?(Nokogiri::XML::Document) || node.is_a?(Nokogiri::HTML5::Document)
|
|
159
|
+
Array(node.errors).map(&:to_s)
|
|
160
|
+
else
|
|
161
|
+
[]
|
|
162
|
+
end
|
|
163
|
+
else
|
|
164
|
+
[]
|
|
134
165
|
end
|
|
135
166
|
end
|
|
136
167
|
end
|
|
@@ -125,20 +125,18 @@ module Canon
|
|
|
125
125
|
# @param doc2 [Object] Second XML document
|
|
126
126
|
# @return [Array<String>] Preprocessed strings
|
|
127
127
|
def preprocess_xml(doc1, doc2)
|
|
128
|
-
# Serialize XML to string
|
|
129
|
-
# Use XmlNodeComparison's serializer for Canon::Xml::Node
|
|
130
128
|
xml1 = if doc1.is_a?(Canon::Xml::Node)
|
|
131
129
|
XmlNodeComparison.serialize_node_to_xml(doc1)
|
|
132
|
-
elsif
|
|
133
|
-
doc1
|
|
130
|
+
elsif Canon::XmlParsing.xml_node?(doc1)
|
|
131
|
+
Canon::XmlParsing.serialize(doc1)
|
|
134
132
|
else
|
|
135
133
|
doc1.to_s
|
|
136
134
|
end
|
|
137
135
|
|
|
138
136
|
xml2 = if doc2.is_a?(Canon::Xml::Node)
|
|
139
137
|
XmlNodeComparison.serialize_node_to_xml(doc2)
|
|
140
|
-
elsif
|
|
141
|
-
doc2
|
|
138
|
+
elsif Canon::XmlParsing.xml_node?(doc2)
|
|
139
|
+
Canon::XmlParsing.serialize(doc2)
|
|
142
140
|
else
|
|
143
141
|
doc2.to_s
|
|
144
142
|
end
|
|
@@ -167,7 +165,7 @@ module Canon
|
|
|
167
165
|
XmlNodeComparison.serialize_node_to_xml(doc1)
|
|
168
166
|
elsif doc1.is_a?(Nokogiri::XML::DocumentFragment)
|
|
169
167
|
doc1.to_s
|
|
170
|
-
elsif
|
|
168
|
+
elsif Canon::XmlParsing.xml_node?(doc1)
|
|
171
169
|
doc1.to_html
|
|
172
170
|
else
|
|
173
171
|
doc1.to_s
|
|
@@ -177,7 +175,7 @@ module Canon
|
|
|
177
175
|
XmlNodeComparison.serialize_node_to_xml(doc2)
|
|
178
176
|
elsif doc2.is_a?(Nokogiri::XML::DocumentFragment)
|
|
179
177
|
doc2.to_s
|
|
180
|
-
elsif
|
|
178
|
+
elsif Canon::XmlParsing.xml_node?(doc2)
|
|
181
179
|
doc2.to_html
|
|
182
180
|
else
|
|
183
181
|
doc2.to_s
|