canon 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec-opal +7 -0
- data/.rubocop_todo.yml +14 -71
- data/Rakefile +17 -0
- data/lib/canon/cli.rb +1 -1
- data/lib/canon/color_detector.rb +3 -5
- data/lib/canon/comparison/compare_profile.rb +1 -4
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
- data/lib/canon/comparison/format_detector.rb +29 -20
- data/lib/canon/comparison/html_comparator.rb +18 -29
- data/lib/canon/comparison/html_compare_profile.rb +3 -10
- data/lib/canon/comparison/html_parser.rb +1 -1
- data/lib/canon/comparison/json_comparator.rb +8 -0
- data/lib/canon/comparison/node_inspector.rb +146 -80
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
- data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +10 -8
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
- data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
- data/lib/canon/comparison/xml_comparator.rb +61 -83
- data/lib/canon/comparison/xml_node_comparison.rb +15 -15
- data/lib/canon/comparison/yaml_comparator.rb +8 -0
- data/lib/canon/comparison.rb +23 -23
- data/lib/canon/config/profile_loader.rb +13 -13
- data/lib/canon/config.rb +29 -5
- data/lib/canon/diff/diff_classifier.rb +7 -41
- data/lib/canon/diff/diff_line.rb +1 -1
- data/lib/canon/diff/diff_node_enricher.rb +22 -24
- data/lib/canon/diff/node_serializer.rb +23 -30
- data/lib/canon/diff/path_builder.rb +24 -37
- data/lib/canon/diff/source_locator.rb +0 -3
- data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
- data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
- data/lib/canon/diff_formatter/debug_output.rb +12 -24
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
- data/lib/canon/diff_formatter/legend.rb +2 -2
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
- data/lib/canon/diff_formatter/theme.rb +4 -4
- data/lib/canon/diff_formatter.rb +2 -2
- data/lib/canon/formatters/html_formatter.rb +1 -1
- data/lib/canon/formatters/html_formatter_base.rb +1 -1
- data/lib/canon/formatters/xml_formatter.rb +7 -32
- data/lib/canon/html/data_model.rb +1 -1
- data/lib/canon/pretty_printer/html.rb +1 -1
- data/lib/canon/pretty_printer/xml.rb +16 -7
- data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
- data/lib/canon/rspec_matchers.rb +2 -2
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +1 -3
- data/lib/canon/validators/html_validator.rb +1 -1
- data/lib/canon/validators/xml_validator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +131 -137
- data/lib/canon/xml/namespace_helper.rb +5 -0
- data/lib/canon/xml/node.rb +2 -1
- data/lib/canon/xml/nodes/root_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +6 -1
- data/lib/canon/xml/sax_builder.rb +4 -6
- data/lib/canon/xml_backend.rb +49 -0
- data/lib/canon/xml_parsing.rb +271 -0
- data/lib/canon.rb +3 -1
- data/lib/tasks/benchmark_runner.rb +1 -1
- data/lib/tasks/performance_helpers.rb +1 -1
- metadata +5 -2
|
@@ -4,94 +4,88 @@ module Canon
|
|
|
4
4
|
module Comparison
|
|
5
5
|
# Single source of truth for cross-backend node type operations.
|
|
6
6
|
#
|
|
7
|
-
# The comparison pipeline handles nodes from
|
|
7
|
+
# The comparison pipeline handles nodes from multiple sources:
|
|
8
8
|
# * Canon::Xml::Node (+ RootNode, ElementNode, TextNode, etc.) —
|
|
9
9
|
# custom DOM built by SAX builder and DataModel.
|
|
10
|
-
# *
|
|
11
|
-
#
|
|
10
|
+
# * Canon::TreeDiff::Core::TreeNode — semantic tree diff nodes.
|
|
11
|
+
# * Backend-specific nodes (Nokogiri or Moxml) — live parsed nodes.
|
|
12
12
|
#
|
|
13
|
-
#
|
|
14
|
-
#
|
|
13
|
+
# All type dispatch uses backend-branching (`if XmlBackend.nokogiri?`)
|
|
14
|
+
# rather than `case/when` with constant references. This prevents
|
|
15
|
+
# NameError when Nokogiri constants are undefined under Opal.
|
|
16
|
+
#
|
|
17
|
+
# Every node query in the codebase should go through this module.
|
|
18
|
+
# Do not create private dispatch methods in consumers.
|
|
15
19
|
module NodeInspector
|
|
16
|
-
CANON_TEXT_TYPE = :text
|
|
17
20
|
NOKOGIRI_TEXT_TYPE = defined?(Nokogiri::XML::Node::TEXT_NODE) ? Nokogiri::XML::Node::TEXT_NODE : 3
|
|
18
21
|
|
|
19
|
-
#
|
|
22
|
+
# --- Type predicates ---
|
|
23
|
+
|
|
20
24
|
def self.text_node?(node)
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
node.node_type == CANON_TEXT_TYPE
|
|
24
|
-
when Nokogiri::XML::Node
|
|
25
|
-
node.node_type == NOKOGIRI_TEXT_TYPE
|
|
26
|
-
else
|
|
27
|
-
false
|
|
28
|
-
end
|
|
29
|
-
end
|
|
25
|
+
return false unless node
|
|
26
|
+
return node.node_type == :text if node.is_a?(Canon::Xml::Node)
|
|
30
27
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
case node
|
|
34
|
-
when Canon::Xml::Node
|
|
35
|
-
node.value.to_s
|
|
36
|
-
when Nokogiri::XML::Node
|
|
37
|
-
node.content.to_s
|
|
28
|
+
if XmlBackend.nokogiri?
|
|
29
|
+
node.is_a?(Nokogiri::XML::Text) || node.is_a?(Moxml::Text)
|
|
38
30
|
else
|
|
39
|
-
node.
|
|
31
|
+
node.is_a?(Moxml::Text)
|
|
40
32
|
end
|
|
41
33
|
end
|
|
42
34
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
def self.whitespace_only_text?(node)
|
|
47
|
-
return false unless text_node?(node)
|
|
35
|
+
def self.element_node?(node)
|
|
36
|
+
return false unless node
|
|
37
|
+
return node.node_type == :element if node.is_a?(Canon::Xml::Node)
|
|
48
38
|
|
|
49
|
-
|
|
50
|
-
|
|
39
|
+
if XmlBackend.nokogiri?
|
|
40
|
+
node.is_a?(Nokogiri::XML::Element) || node.is_a?(Moxml::Element)
|
|
41
|
+
else
|
|
42
|
+
node.is_a?(Moxml::Element)
|
|
43
|
+
end
|
|
51
44
|
end
|
|
52
45
|
|
|
53
|
-
# True when +node+ is a comment node.
|
|
54
|
-
# For HTML, also detects comments that Nokogiri parses as TEXT nodes
|
|
55
|
-
# (content like "<!-- comment -->" or escaped "<\\!-- comment -->").
|
|
56
46
|
def self.comment_node?(node)
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
return true if node.comment?
|
|
47
|
+
return false unless node
|
|
48
|
+
return node.node_type == :comment if node.is_a?(Canon::Xml::Node)
|
|
49
|
+
|
|
50
|
+
if XmlBackend.nokogiri?
|
|
51
|
+
return true if node.is_a?(Nokogiri::XML::Node) && node.comment?
|
|
62
52
|
|
|
63
53
|
# HTML comments are parsed as TEXT nodes by Nokogiri
|
|
64
|
-
if node.text?
|
|
54
|
+
if node.is_a?(Nokogiri::XML::Node) && node.text?
|
|
65
55
|
text_stripped = text_content(node).to_s.strip.gsub("\\", "")
|
|
66
56
|
return true if text_stripped.start_with?("<!--") && text_stripped.end_with?("-->")
|
|
67
57
|
end
|
|
68
58
|
false
|
|
69
59
|
else
|
|
70
|
-
|
|
60
|
+
node.is_a?(Moxml::Comment)
|
|
71
61
|
end
|
|
72
62
|
end
|
|
73
63
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
node.node_type == :element
|
|
79
|
-
when Nokogiri::XML::Node
|
|
80
|
-
node.element?
|
|
81
|
-
else
|
|
82
|
-
false
|
|
83
|
-
end
|
|
64
|
+
def self.document?(node)
|
|
65
|
+
return node.node_type == :root if node.is_a?(Canon::Xml::Node)
|
|
66
|
+
|
|
67
|
+
XmlParsing.document?(node)
|
|
84
68
|
end
|
|
85
69
|
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
#
|
|
94
|
-
#
|
|
70
|
+
def self.document_fragment?(node)
|
|
71
|
+
return false unless node
|
|
72
|
+
return false unless node.is_a?(Canon::Xml::Nodes::RootNode)
|
|
73
|
+
|
|
74
|
+
node.fragment?
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# True when +node+ is a text node whose content is whitespace-only.
|
|
78
|
+
# Empty-string text nodes return false — those represent genuine
|
|
79
|
+
# empty-vs-content asymmetry, not pretty-print indentation.
|
|
80
|
+
def self.whitespace_only_text?(node)
|
|
81
|
+
return false unless text_node?(node)
|
|
82
|
+
|
|
83
|
+
text = text_content(node)
|
|
84
|
+
!text.empty? && text.strip.empty?
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# --- Noise classification ---
|
|
88
|
+
|
|
95
89
|
def self.noise_dimension_for(node)
|
|
96
90
|
if whitespace_only_text?(node)
|
|
97
91
|
:whitespace_adjacency
|
|
@@ -100,38 +94,110 @@ module Canon
|
|
|
100
94
|
end
|
|
101
95
|
end
|
|
102
96
|
|
|
103
|
-
# True when +node+ is a noise node (whitespace-only text or comment).
|
|
104
|
-
# Convenience wrapper around +noise_dimension_for+.
|
|
105
|
-
#
|
|
106
|
-
# @param node [Object] DOM node to check
|
|
107
|
-
# @return [Boolean]
|
|
108
97
|
def self.noise_node?(node)
|
|
109
98
|
!noise_dimension_for(node).nil?
|
|
110
99
|
end
|
|
111
100
|
|
|
112
|
-
#
|
|
113
|
-
|
|
114
|
-
|
|
101
|
+
# --- Node queries ---
|
|
102
|
+
|
|
103
|
+
# Unified node name extraction across all node types.
|
|
104
|
+
def self.name(node)
|
|
105
|
+
return nil unless node
|
|
106
|
+
return node.name if node.is_a?(Canon::Xml::Node)
|
|
107
|
+
return node.label if node.is_a?(Canon::TreeDiff::Core::TreeNode)
|
|
108
|
+
|
|
109
|
+
XmlParsing.name(node)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Unified parent access across all node types.
|
|
113
|
+
def self.parent(node)
|
|
114
|
+
return nil unless node
|
|
115
|
+
return node.parent if node.is_a?(Canon::Xml::Node)
|
|
116
|
+
return node.parent if node.is_a?(Canon::TreeDiff::Core::TreeNode)
|
|
117
|
+
|
|
118
|
+
XmlParsing.parent(node)
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Unified children access across all node types.
|
|
122
|
+
def self.children(node)
|
|
123
|
+
return [] unless node
|
|
124
|
+
return node.children if node.is_a?(Canon::Xml::Node)
|
|
125
|
+
return node.children || [] if node.is_a?(Canon::TreeDiff::Core::TreeNode)
|
|
126
|
+
|
|
127
|
+
XmlParsing.children(node)
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Extract the text content of +node+ as a String.
|
|
131
|
+
def self.text_content(node)
|
|
115
132
|
case node
|
|
116
|
-
when
|
|
117
|
-
|
|
133
|
+
when Canon::Xml::Nodes::TextNode
|
|
134
|
+
node.value.to_s
|
|
118
135
|
when Canon::Xml::Node
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
136
|
+
node.text_content.to_s
|
|
137
|
+
when Moxml::Text
|
|
138
|
+
node.content.to_s
|
|
139
|
+
else
|
|
140
|
+
XmlParsing.text_content(node).to_s
|
|
141
|
+
end
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Unified node type that always returns a symbol.
|
|
145
|
+
# Returns nil for unrecognised nodes.
|
|
146
|
+
def self.node_type(node)
|
|
147
|
+
return nil unless node
|
|
148
|
+
return node.node_type if node.is_a?(Canon::Xml::Node)
|
|
149
|
+
|
|
150
|
+
if node.is_a?(Canon::TreeDiff::Core::TreeNode)
|
|
151
|
+
node.type&.to_sym
|
|
152
|
+
else
|
|
153
|
+
XmlParsing.node_type(node)
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Unified attribute value access.
|
|
158
|
+
def self.attribute_value(node, attr_name)
|
|
159
|
+
return nil unless node
|
|
160
|
+
|
|
161
|
+
if node.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
162
|
+
attr = node.attribute_nodes.find { |a| a.name == attr_name.to_s }
|
|
163
|
+
attr&.value
|
|
164
|
+
elsif node.is_a?(Canon::Xml::Node)
|
|
165
|
+
nil
|
|
166
|
+
else
|
|
167
|
+
XmlParsing.attribute_value(node, attr_name)
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Unified namespace URI access.
|
|
172
|
+
def self.namespace_uri(node)
|
|
173
|
+
return nil unless node
|
|
174
|
+
|
|
175
|
+
if node.is_a?(Canon::Xml::Node)
|
|
176
|
+
node.is_a?(Canon::Xml::Nodes::ElementNode) ? node.namespace_uri : nil
|
|
177
|
+
else
|
|
178
|
+
XmlParsing.namespace_uri(node)
|
|
179
|
+
end
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
# Extract parse-time errors carried on a node or its owning document.
|
|
183
|
+
def self.parse_errors(node)
|
|
184
|
+
return [] if node.nil?
|
|
185
|
+
return Array(node.parse_errors).map(&:to_s) if node.is_a?(Canon::Xml::Node)
|
|
186
|
+
|
|
187
|
+
if XmlBackend.nokogiri?
|
|
188
|
+
if node.is_a?(Nokogiri::XML::Document) || node.is_a?(Nokogiri::HTML5::Document)
|
|
189
|
+
Array(node.errors).map(&:to_s)
|
|
190
|
+
else
|
|
191
|
+
[]
|
|
192
|
+
end
|
|
123
193
|
else
|
|
124
194
|
[]
|
|
125
195
|
end
|
|
126
196
|
end
|
|
127
197
|
|
|
128
|
-
#
|
|
129
|
-
# recognised DOM backend type or has no parent.
|
|
198
|
+
# Deprecated: use NodeInspector.parent instead.
|
|
130
199
|
def self.parent_of(node)
|
|
131
|
-
|
|
132
|
-
when Canon::Xml::Node, Nokogiri::XML::Node
|
|
133
|
-
node.parent
|
|
134
|
-
end
|
|
200
|
+
parent(node)
|
|
135
201
|
end
|
|
136
202
|
end
|
|
137
203
|
end
|
|
@@ -125,20 +125,18 @@ module Canon
|
|
|
125
125
|
# @param doc2 [Object] Second XML document
|
|
126
126
|
# @return [Array<String>] Preprocessed strings
|
|
127
127
|
def preprocess_xml(doc1, doc2)
|
|
128
|
-
# Serialize XML to string
|
|
129
|
-
# Use XmlNodeComparison's serializer for Canon::Xml::Node
|
|
130
128
|
xml1 = if doc1.is_a?(Canon::Xml::Node)
|
|
131
129
|
XmlNodeComparison.serialize_node_to_xml(doc1)
|
|
132
|
-
elsif
|
|
133
|
-
doc1
|
|
130
|
+
elsif Canon::XmlParsing.xml_node?(doc1)
|
|
131
|
+
Canon::XmlParsing.serialize(doc1)
|
|
134
132
|
else
|
|
135
133
|
doc1.to_s
|
|
136
134
|
end
|
|
137
135
|
|
|
138
136
|
xml2 = if doc2.is_a?(Canon::Xml::Node)
|
|
139
137
|
XmlNodeComparison.serialize_node_to_xml(doc2)
|
|
140
|
-
elsif
|
|
141
|
-
doc2
|
|
138
|
+
elsif Canon::XmlParsing.xml_node?(doc2)
|
|
139
|
+
Canon::XmlParsing.serialize(doc2)
|
|
142
140
|
else
|
|
143
141
|
doc2.to_s
|
|
144
142
|
end
|
|
@@ -167,7 +165,7 @@ module Canon
|
|
|
167
165
|
XmlNodeComparison.serialize_node_to_xml(doc1)
|
|
168
166
|
elsif doc1.is_a?(Nokogiri::XML::DocumentFragment)
|
|
169
167
|
doc1.to_s
|
|
170
|
-
elsif
|
|
168
|
+
elsif Canon::XmlParsing.xml_node?(doc1)
|
|
171
169
|
doc1.to_html
|
|
172
170
|
else
|
|
173
171
|
doc1.to_s
|
|
@@ -177,7 +175,7 @@ module Canon
|
|
|
177
175
|
XmlNodeComparison.serialize_node_to_xml(doc2)
|
|
178
176
|
elsif doc2.is_a?(Nokogiri::XML::DocumentFragment)
|
|
179
177
|
doc2.to_s
|
|
180
|
-
elsif
|
|
178
|
+
elsif Canon::XmlParsing.xml_node?(doc2)
|
|
181
179
|
doc2.to_html
|
|
182
180
|
else
|
|
183
181
|
doc2.to_s
|