canon 0.2.8 → 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec-opal +7 -0
- data/.rubocop_todo.yml +14 -71
- data/Rakefile +17 -0
- data/lib/canon/cli.rb +1 -1
- data/lib/canon/color_detector.rb +3 -5
- data/lib/canon/comparison/compare_profile.rb +1 -4
- data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
- data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
- data/lib/canon/comparison/format_detector.rb +29 -20
- data/lib/canon/comparison/html_comparator.rb +18 -29
- data/lib/canon/comparison/html_compare_profile.rb +3 -10
- data/lib/canon/comparison/html_parser.rb +1 -1
- data/lib/canon/comparison/json_comparator.rb +8 -0
- data/lib/canon/comparison/node_inspector.rb +146 -80
- data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
- data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
- data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
- data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +10 -8
- data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
- data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
- data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
- data/lib/canon/comparison/xml_comparator.rb +61 -83
- data/lib/canon/comparison/xml_node_comparison.rb +15 -15
- data/lib/canon/comparison/yaml_comparator.rb +8 -0
- data/lib/canon/comparison.rb +23 -23
- data/lib/canon/config/profile_loader.rb +13 -13
- data/lib/canon/config.rb +29 -5
- data/lib/canon/diff/diff_classifier.rb +7 -41
- data/lib/canon/diff/diff_line.rb +1 -1
- data/lib/canon/diff/diff_node_enricher.rb +22 -24
- data/lib/canon/diff/node_serializer.rb +23 -30
- data/lib/canon/diff/path_builder.rb +24 -37
- data/lib/canon/diff/source_locator.rb +0 -3
- data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
- data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
- data/lib/canon/diff_formatter/debug_output.rb +12 -24
- data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
- data/lib/canon/diff_formatter/legend.rb +2 -2
- data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
- data/lib/canon/diff_formatter/theme.rb +4 -4
- data/lib/canon/diff_formatter.rb +2 -2
- data/lib/canon/formatters/html_formatter.rb +1 -1
- data/lib/canon/formatters/html_formatter_base.rb +1 -1
- data/lib/canon/formatters/xml_formatter.rb +7 -32
- data/lib/canon/html/data_model.rb +1 -1
- data/lib/canon/pretty_printer/html.rb +1 -1
- data/lib/canon/pretty_printer/xml.rb +16 -7
- data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
- data/lib/canon/rspec_matchers.rb +2 -2
- data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
- data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
- data/lib/canon/tree_diff/core/tree_node.rb +1 -3
- data/lib/canon/validators/html_validator.rb +1 -1
- data/lib/canon/validators/xml_validator.rb +1 -1
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/data_model.rb +131 -137
- data/lib/canon/xml/namespace_helper.rb +5 -0
- data/lib/canon/xml/node.rb +2 -1
- data/lib/canon/xml/nodes/root_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +6 -1
- data/lib/canon/xml/sax_builder.rb +4 -6
- data/lib/canon/xml_backend.rb +49 -0
- data/lib/canon/xml_parsing.rb +271 -0
- data/lib/canon.rb +3 -1
- data/lib/tasks/benchmark_runner.rb +1 -1
- data/lib/tasks/performance_helpers.rb +1 -1
- metadata +5 -2
|
@@ -3,41 +3,6 @@
|
|
|
3
3
|
module Canon
|
|
4
4
|
module Comparison
|
|
5
5
|
# Whitespace sensitivity utilities for element-level control
|
|
6
|
-
#
|
|
7
|
-
# This module provides three-way classification of whitespace behaviour
|
|
8
|
-
# at the element level:
|
|
9
|
-
#
|
|
10
|
-
# * **:preserve** — every whitespace character is significant. `" "` ≠ `"\n"`.
|
|
11
|
-
# Configured via +preserve_whitespace_elements+ (HTML default: pre, code,
|
|
12
|
-
# textarea, script, style; XML default: none).
|
|
13
|
-
#
|
|
14
|
-
# * **:collapse** — presence ≠ absence, but all whitespace forms are
|
|
15
|
-
# equivalent: `" "` == `"\n "`. Configured via +collapse_whitespace_elements+
|
|
16
|
-
# (HTML default: p, li, dt, dd, td, th, h1-h6, caption, figcaption, label,
|
|
17
|
-
# legend, summary, blockquote, address; XML default: none).
|
|
18
|
-
#
|
|
19
|
-
# * **:strip** — all whitespace is structural formatting noise and is
|
|
20
|
-
# dropped. Default for XML; HTML elements not in the above lists.
|
|
21
|
-
#
|
|
22
|
-
# Classification is **ancestor-based**: the closest matching ancestor
|
|
23
|
-
# determines the class. The strip blacklist (+strip_whitespace_elements+)
|
|
24
|
-
# overrides any sensitive ancestor.
|
|
25
|
-
#
|
|
26
|
-
# == Priority Order
|
|
27
|
-
#
|
|
28
|
-
# 1. respect_xml_space: false → User config only (ignore xml:space)
|
|
29
|
-
# 2. Ancestor walk (strip blacklist wins; then preserve; then collapse)
|
|
30
|
-
# 3. xml:space="preserve" → preserve
|
|
31
|
-
# 4. xml:space="default" → use configured behaviour
|
|
32
|
-
# 5. Format defaults (HTML: collapse for most elements; XML: strip)
|
|
33
|
-
#
|
|
34
|
-
# == Usage
|
|
35
|
-
#
|
|
36
|
-
# WhitespaceSensitivity.classify_element(element, match_opts)
|
|
37
|
-
# => :preserve, :collapse, or :strip
|
|
38
|
-
#
|
|
39
|
-
# WhitespaceSensitivity.element_sensitive?(node, opts)
|
|
40
|
-
# => true if whitespace should be preserved (preserve or collapse)
|
|
41
6
|
module WhitespaceSensitivity
|
|
42
7
|
# HTML mixed-content "leaf block" elements where whitespace presence
|
|
43
8
|
# matters but all forms are equivalent (CSS block whitespace collapsing).
|
|
@@ -61,80 +26,52 @@ module Canon
|
|
|
61
26
|
|
|
62
27
|
class << self
|
|
63
28
|
# Classify the whitespace behaviour for an element using ancestor walk.
|
|
64
|
-
#
|
|
65
|
-
# @param element [Object] The element node to classify
|
|
66
|
-
# @param match_opts [Hash] Resolved match options
|
|
67
|
-
# @return [Symbol] :preserve, :collapse, or :strip
|
|
68
29
|
def classify_element(element, match_opts)
|
|
69
30
|
return :strip unless element
|
|
70
|
-
return :strip unless element
|
|
31
|
+
return :strip unless node_name(element)
|
|
71
32
|
|
|
72
33
|
preserve_set = resolved_preserve_elements_set(match_opts)
|
|
73
34
|
collapse_set = resolved_collapse_elements_set(match_opts)
|
|
74
35
|
strip_set = resolved_strip_elements_set(match_opts)
|
|
75
36
|
|
|
76
|
-
# Ancestor walk: start at the element itself, walk up.
|
|
77
|
-
# Strip blacklist wins over any sensitive ancestor.
|
|
78
37
|
walk_ancestor_classification(element, preserve_set, collapse_set,
|
|
79
|
-
strip_set
|
|
38
|
+
strip_set)
|
|
80
39
|
end
|
|
81
40
|
|
|
82
41
|
# Check if an element is whitespace-sensitive based on configuration.
|
|
83
|
-
# Returns true for :preserve or :collapse classification.
|
|
84
|
-
#
|
|
85
|
-
# @param node [Object] The element node to check
|
|
86
|
-
# @param opts [Hash] Comparison options containing match_opts
|
|
87
|
-
# @return [Boolean] true if whitespace should be preserved for this element
|
|
88
42
|
def element_sensitive?(node, opts)
|
|
89
43
|
match_opts = opts[:match_opts]
|
|
90
44
|
return false unless match_opts
|
|
91
45
|
return false unless text_node_parent?(node)
|
|
92
46
|
|
|
93
|
-
parent = node
|
|
47
|
+
parent = node_parent(node)
|
|
94
48
|
|
|
95
|
-
# 1. Check if we should ignore xml:space (user override)
|
|
96
49
|
unless respect_xml_space?(match_opts)
|
|
97
50
|
return user_config_sensitive?(parent, match_opts)
|
|
98
51
|
end
|
|
99
52
|
|
|
100
|
-
# 2. Check xml:space="preserve" (document declaration)
|
|
101
53
|
return true if xml_space_preserve?(parent)
|
|
102
|
-
|
|
103
|
-
# 3. Check xml:space="default" (use configured behavior)
|
|
104
54
|
return false if xml_space_default?(parent)
|
|
105
55
|
|
|
106
|
-
# 4. Three-way classification (ancestor-based)
|
|
107
56
|
classification = classify_element(parent, match_opts)
|
|
108
57
|
%i[preserve collapse].include?(classification)
|
|
109
58
|
end
|
|
110
59
|
|
|
111
60
|
# Check if whitespace-only text node should be filtered
|
|
112
|
-
#
|
|
113
|
-
# @param node [Object] The text node to check
|
|
114
|
-
# @param opts [Hash] Comparison options
|
|
115
|
-
# @return [Boolean] true if node should be preserved (not filtered)
|
|
116
61
|
def preserve_whitespace_node?(node, opts)
|
|
117
|
-
|
|
118
|
-
return false unless
|
|
62
|
+
parent = node_parent(node)
|
|
63
|
+
return false unless parent
|
|
119
64
|
|
|
120
65
|
element_sensitive?(node, opts)
|
|
121
66
|
end
|
|
122
67
|
|
|
123
68
|
# Return the whitespace class for a text node used during comparison.
|
|
124
|
-
#
|
|
125
|
-
# :preserve → preserve all whitespace character-by-character
|
|
126
|
-
# :collapse → preserve presence (normalize to single space)
|
|
127
|
-
# :strip → drop whitespace-only text nodes
|
|
128
|
-
#
|
|
129
|
-
# @param node [Object] Text node to classify
|
|
130
|
-
# @param opts [Hash] Comparison options containing match_opts
|
|
131
|
-
# @return [Symbol] :preserve, :collapse, or :strip
|
|
132
69
|
def classify_text_node(node, opts)
|
|
133
70
|
match_opts = opts[:match_opts]
|
|
134
71
|
return :strip unless match_opts
|
|
135
72
|
return :strip unless text_node_parent?(node)
|
|
136
73
|
|
|
137
|
-
parent = node
|
|
74
|
+
parent = node_parent(node)
|
|
138
75
|
|
|
139
76
|
unless respect_xml_space?(match_opts)
|
|
140
77
|
return user_config_sensitive?(parent,
|
|
@@ -148,15 +85,6 @@ module Canon
|
|
|
148
85
|
end
|
|
149
86
|
|
|
150
87
|
# Check if structural whitespace is preserved (not stripped) for an element.
|
|
151
|
-
#
|
|
152
|
-
# Uses the same priority chain as element_sensitive? / classify_text_node:
|
|
153
|
-
# 1. xml:space="preserve" → always preserved
|
|
154
|
-
# 2. xml:space="default" → use configured behaviour
|
|
155
|
-
# 3. ancestor-walk classification (strip = dropped)
|
|
156
|
-
#
|
|
157
|
-
# @param element [Object] Element node to check
|
|
158
|
-
# @param match_opts [Hash] Resolved match options
|
|
159
|
-
# @return [Boolean] true if whitespace is preserved (not stripped)
|
|
160
88
|
def whitespace_preserved?(element, match_opts)
|
|
161
89
|
if respect_xml_space?(match_opts)
|
|
162
90
|
return true if xml_space_preserve?(element)
|
|
@@ -167,27 +95,14 @@ module Canon
|
|
|
167
95
|
%i[preserve collapse].include?(classification)
|
|
168
96
|
end
|
|
169
97
|
|
|
170
|
-
# Get resolved list of preserve whitespace element names (strings).
|
|
171
|
-
#
|
|
172
|
-
# @param match_opts [Hash] Resolved match options
|
|
173
|
-
# @return [Array<String>] Preserve element names
|
|
174
98
|
def resolved_preserve_elements(match_opts)
|
|
175
99
|
resolved_preserve_elements_set(match_opts).to_a
|
|
176
100
|
end
|
|
177
101
|
|
|
178
|
-
# Get resolved list of collapse whitespace element names (strings).
|
|
179
|
-
#
|
|
180
|
-
# @param match_opts [Hash] Resolved match options
|
|
181
|
-
# @return [Array<String>] Collapse element names
|
|
182
102
|
def resolved_collapse_elements(match_opts)
|
|
183
103
|
resolved_collapse_elements_set(match_opts).to_a
|
|
184
104
|
end
|
|
185
105
|
|
|
186
|
-
# Get format-specific default preserve (exact-whitespace) elements.
|
|
187
|
-
# This is the SINGLE SOURCE OF TRUTH for default preserve-whitespace elements.
|
|
188
|
-
#
|
|
189
|
-
# @param match_opts [Hash] Resolved match options
|
|
190
|
-
# @return [Array<Symbol>] Default preserve element names
|
|
191
106
|
def format_default_preserve_elements(match_opts)
|
|
192
107
|
format = match_opts[:format] || :xml
|
|
193
108
|
case format
|
|
@@ -198,10 +113,6 @@ module Canon
|
|
|
198
113
|
end
|
|
199
114
|
end
|
|
200
115
|
|
|
201
|
-
# Get format-specific default collapse elements.
|
|
202
|
-
#
|
|
203
|
-
# @param match_opts [Hash] Resolved match options
|
|
204
|
-
# @return [Array<Symbol>] Default collapse element names
|
|
205
116
|
def format_default_collapse_elements(match_opts)
|
|
206
117
|
format = match_opts[:format] || :xml
|
|
207
118
|
case format
|
|
@@ -212,61 +123,32 @@ module Canon
|
|
|
212
123
|
end
|
|
213
124
|
end
|
|
214
125
|
|
|
215
|
-
# Check if an element is in the default sensitive list for its format
|
|
216
|
-
#
|
|
217
|
-
# @param element_name [String, Symbol] The element name to check
|
|
218
|
-
# @param match_opts [Hash] Resolved match options
|
|
219
|
-
# @return [Boolean] true if element is in default sensitive list
|
|
220
126
|
def default_sensitive_element?(element_name, match_opts)
|
|
221
127
|
format_default_preserve_elements(match_opts)
|
|
222
128
|
.include?(element_name.to_sym)
|
|
223
129
|
end
|
|
224
130
|
|
|
225
|
-
# Check if
|
|
131
|
+
# Check if whitespace-only text node sits between two inline element
|
|
226
132
|
# siblings, making the whitespace semantically significant.
|
|
227
|
-
#
|
|
228
|
-
# In HTML rendering, a space between <span>A</span> <span>B</span>
|
|
229
|
-
# produces visible output. Stripping such nodes produces false
|
|
230
|
-
# equivalence.
|
|
231
|
-
#
|
|
232
|
-
# Works with any parent type (element, DocumentFragment, RootNode)
|
|
233
|
-
# since the check is about sibling context, not parent type.
|
|
234
|
-
#
|
|
235
|
-
# @param text_node [Object] Text node (Nokogiri or Canon::Xml::Node)
|
|
236
|
-
# @return [Boolean] true if whitespace is between inline siblings
|
|
237
133
|
def inline_whitespace_significant?(text_node)
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
parent = text_node.parent
|
|
134
|
+
parent = NodeInspector.parent(text_node)
|
|
241
135
|
return false unless parent
|
|
242
|
-
return false unless parent.respond_to?(:children)
|
|
243
136
|
|
|
244
|
-
siblings =
|
|
137
|
+
siblings = NodeInspector.children(parent)
|
|
245
138
|
idx = siblings.index(text_node)
|
|
246
139
|
return false unless idx
|
|
247
140
|
|
|
248
|
-
# Look at the IMMEDIATE non-whitespace-text neighbour on each
|
|
249
|
-
# side. Whitespace at a block boundary is collapsed per CSS,
|
|
250
|
-
# so both immediate neighbours must be inline for the
|
|
251
|
-
# whitespace to be significant. Walking all siblings (the
|
|
252
|
-
# earlier behaviour) misclassified whitespace at a block
|
|
253
|
-
# boundary as significant whenever any inline element existed
|
|
254
|
-
# elsewhere among the siblings.
|
|
255
141
|
prev_neighbour = nearest_non_whitespace_sibling(siblings, idx, -1)
|
|
256
142
|
next_neighbour = nearest_non_whitespace_sibling(siblings, idx, 1)
|
|
257
143
|
|
|
258
144
|
inline_element?(prev_neighbour) && inline_element?(next_neighbour)
|
|
259
145
|
end
|
|
260
146
|
|
|
261
|
-
# Walk outward from +idx+ in +direction+ (+1 forward, -1 back),
|
|
262
|
-
# skipping whitespace-only text nodes, and return the first
|
|
263
|
-
# non-whitespace sibling found. Returns nil if none.
|
|
264
147
|
def nearest_non_whitespace_sibling(siblings, idx, direction)
|
|
265
148
|
i = idx + direction
|
|
266
149
|
while i >= 0 && i < siblings.length
|
|
267
150
|
s = siblings[i]
|
|
268
|
-
unless
|
|
269
|
-
s.respond_to?(:content) && s.content.to_s.strip.empty?
|
|
151
|
+
unless whitespace_text_node?(s)
|
|
270
152
|
return s
|
|
271
153
|
end
|
|
272
154
|
|
|
@@ -275,19 +157,32 @@ module Canon
|
|
|
275
157
|
nil
|
|
276
158
|
end
|
|
277
159
|
|
|
278
|
-
# Check if text content contains a non-breaking space (U+00A0).
|
|
279
|
-
# NBSP is NOT collapsible whitespace in HTML — it always renders as
|
|
280
|
-
# a visible space and must never be stripped.
|
|
281
|
-
#
|
|
282
|
-
# @param text [String] Text content to check
|
|
283
|
-
# @return [Boolean] true if text contains U+00A0
|
|
284
160
|
def contains_nbsp?(text)
|
|
285
|
-
text.to_s.include?("
|
|
161
|
+
text.to_s.include?(" ")
|
|
286
162
|
end
|
|
287
163
|
|
|
288
164
|
private
|
|
289
165
|
|
|
290
|
-
|
|
166
|
+
def whitespace_text_node?(node)
|
|
167
|
+
NodeInspector.whitespace_only_text?(node)
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def node_name(node)
|
|
171
|
+
NodeInspector.name(node)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def node_parent(node)
|
|
175
|
+
NodeInspector.parent(node)
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
def node_children(node)
|
|
179
|
+
NodeInspector.children(node)
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def element?(node)
|
|
183
|
+
NodeInspector.element_node?(node)
|
|
184
|
+
end
|
|
185
|
+
|
|
291
186
|
def resolved_preserve_elements_set(match_opts)
|
|
292
187
|
set = Set.new(format_default_preserve_elements(match_opts).map(&:to_s))
|
|
293
188
|
|
|
@@ -295,12 +190,10 @@ module Canon
|
|
|
295
190
|
set |= match_opts[:preserve_whitespace_elements].map(&:to_s)
|
|
296
191
|
end
|
|
297
192
|
|
|
298
|
-
# Remove blacklisted elements
|
|
299
193
|
strip_set = resolved_strip_elements_set(match_opts)
|
|
300
194
|
set.reject { |e| strip_set.include?(e) }.to_set
|
|
301
195
|
end
|
|
302
196
|
|
|
303
|
-
# Build the Set of collapse whitespace element names (strings).
|
|
304
197
|
def resolved_collapse_elements_set(match_opts)
|
|
305
198
|
set = Set.new(format_default_collapse_elements(match_opts).map(&:to_s))
|
|
306
199
|
|
|
@@ -308,49 +201,36 @@ module Canon
|
|
|
308
201
|
set |= match_opts[:collapse_whitespace_elements].map(&:to_s)
|
|
309
202
|
end
|
|
310
203
|
|
|
311
|
-
# Remove blacklisted elements
|
|
312
204
|
strip_set = resolved_strip_elements_set(match_opts)
|
|
313
205
|
set.reject { |e| strip_set.include?(e) }.to_set
|
|
314
206
|
end
|
|
315
207
|
|
|
316
|
-
# Build the Set of strip (blacklist) element names (strings).
|
|
317
208
|
def resolved_strip_elements_set(match_opts)
|
|
318
209
|
raw = match_opts[:strip_whitespace_elements]
|
|
319
210
|
Set.new((raw || []).map(&:to_s))
|
|
320
211
|
end
|
|
321
212
|
|
|
322
|
-
# Perform the ancestor walk classification.
|
|
323
|
-
# The element itself is checked first, then its ancestors.
|
|
324
|
-
# Strip blacklist wins over any sensitive ancestor.
|
|
325
213
|
def walk_ancestor_classification(element, preserve_set, collapse_set,
|
|
326
|
-
strip_set
|
|
214
|
+
strip_set)
|
|
327
215
|
current = element
|
|
328
|
-
while current
|
|
329
|
-
name = current
|
|
216
|
+
while current
|
|
217
|
+
name = node_name(current)
|
|
218
|
+
break unless name
|
|
330
219
|
|
|
331
|
-
return :strip if strip_set.include?(name)
|
|
332
|
-
return :preserve if preserve_set.include?(name)
|
|
333
|
-
return :collapse if collapse_set.include?(name)
|
|
220
|
+
return :strip if strip_set.include?(name.to_s)
|
|
221
|
+
return :preserve if preserve_set.include?(name.to_s)
|
|
222
|
+
return :collapse if collapse_set.include?(name.to_s)
|
|
334
223
|
|
|
335
|
-
|
|
336
|
-
break unless current.respond_to?(:parent)
|
|
337
|
-
|
|
338
|
-
parent = current.parent
|
|
224
|
+
parent = node_parent(current)
|
|
339
225
|
break if parent.nil?
|
|
340
|
-
break
|
|
341
|
-
break if parent == current # guard infinite loop
|
|
226
|
+
break if parent == current
|
|
342
227
|
|
|
343
228
|
current = parent
|
|
344
229
|
end
|
|
345
230
|
|
|
346
|
-
# No matching ancestor — whitespace sensitivity is always opt-in.
|
|
347
|
-
# Elements not in any list are strip regardless of format.
|
|
348
|
-
# (HTML_COLLAPSE_ELEMENTS are already merged into the collapse_set
|
|
349
|
-
# by resolved_collapse_elements_set, so they are found during the walk.)
|
|
350
231
|
:strip
|
|
351
232
|
end
|
|
352
233
|
|
|
353
|
-
# Check if we should respect xml:space attribute
|
|
354
234
|
def respect_xml_space?(match_opts)
|
|
355
235
|
if match_opts.key?(:respect_xml_space)
|
|
356
236
|
match_opts[:respect_xml_space]
|
|
@@ -359,7 +239,6 @@ module Canon
|
|
|
359
239
|
end
|
|
360
240
|
end
|
|
361
241
|
|
|
362
|
-
# Check if xml:space="preserve" is set
|
|
363
242
|
def xml_space_preserve?(element)
|
|
364
243
|
if element.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
365
244
|
element.attribute_nodes.any? do |attr|
|
|
@@ -367,14 +246,12 @@ module Canon
|
|
|
367
246
|
attr.namespace_uri == "http://www.w3.org/XML/1998/namespace" &&
|
|
368
247
|
attr.value == "preserve"
|
|
369
248
|
end
|
|
370
|
-
elsif element.respond_to?(:[])
|
|
371
|
-
element["xml:space"] == "preserve"
|
|
372
249
|
else
|
|
373
|
-
|
|
250
|
+
Canon::XmlParsing.attribute_value(element,
|
|
251
|
+
"xml:space") == "preserve"
|
|
374
252
|
end
|
|
375
253
|
end
|
|
376
254
|
|
|
377
|
-
# Check if xml:space="default" is set
|
|
378
255
|
def xml_space_default?(element)
|
|
379
256
|
if element.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
380
257
|
element.attribute_nodes.any? do |attr|
|
|
@@ -382,55 +259,40 @@ module Canon
|
|
|
382
259
|
attr.namespace_uri == "http://www.w3.org/XML/1998/namespace" &&
|
|
383
260
|
attr.value == "default"
|
|
384
261
|
end
|
|
385
|
-
elsif element.respond_to?(:[])
|
|
386
|
-
element["xml:space"] == "default"
|
|
387
262
|
else
|
|
388
|
-
|
|
263
|
+
Canon::XmlParsing.attribute_value(element, "xml:space") == "default"
|
|
389
264
|
end
|
|
390
265
|
end
|
|
391
266
|
|
|
392
|
-
# Check sensitivity based on user configuration (binary, no ancestor)
|
|
393
267
|
def user_config_sensitive?(element, match_opts)
|
|
394
268
|
list = match_opts[:preserve_whitespace_elements]
|
|
395
269
|
return false unless list
|
|
396
270
|
|
|
397
|
-
|
|
271
|
+
name = node_name(element)
|
|
272
|
+
return false unless name
|
|
273
|
+
|
|
274
|
+
list.map(&:to_s).include?(name.to_s)
|
|
398
275
|
end
|
|
399
276
|
|
|
400
|
-
# Check if node has a parent that's an element (not document root)
|
|
401
277
|
def text_node_parent?(node)
|
|
402
|
-
|
|
403
|
-
return false unless
|
|
404
|
-
|
|
405
|
-
parent = node.parent
|
|
406
|
-
return true if parent.respond_to?(:element?) && parent.element?
|
|
278
|
+
parent = node_parent(node)
|
|
279
|
+
return false unless parent
|
|
407
280
|
|
|
408
|
-
|
|
409
|
-
parent.respond_to?(:node_type) && parent.node_type == :element
|
|
281
|
+
element?(parent)
|
|
410
282
|
end
|
|
411
283
|
|
|
412
|
-
# Get the parent element of a text node, or nil.
|
|
413
|
-
# Works with both Nokogiri and Canon::Xml::Node types.
|
|
414
284
|
def parent_element_of(text_node)
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
parent = text_node.parent
|
|
285
|
+
parent = node_parent(text_node)
|
|
418
286
|
return nil unless parent
|
|
419
287
|
|
|
420
|
-
if
|
|
421
|
-
parent
|
|
422
|
-
elsif parent.respond_to?(:element?) && parent.element?
|
|
423
|
-
parent
|
|
424
|
-
elsif parent.respond_to?(:node_type) && parent.node_type == :element
|
|
425
|
-
parent
|
|
426
|
-
end
|
|
288
|
+
parent if element?(parent)
|
|
427
289
|
end
|
|
428
290
|
|
|
429
|
-
# Check if a node is an HTML inline element.
|
|
430
291
|
def inline_element?(node)
|
|
431
|
-
|
|
292
|
+
name = node_name(node)
|
|
293
|
+
return false unless name
|
|
432
294
|
|
|
433
|
-
INLINE_ELEMENTS.include?(
|
|
295
|
+
INLINE_ELEMENTS.include?(name.to_s.downcase)
|
|
434
296
|
end
|
|
435
297
|
end
|
|
436
298
|
end
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "../match_options"
|
|
4
|
+
require_relative "../../xml/namespace_helper"
|
|
4
5
|
|
|
5
6
|
module Canon
|
|
6
7
|
module Comparison
|
|
@@ -91,13 +92,11 @@ module Canon
|
|
|
91
92
|
# @return [Array<String, String>] Normalized [name, value] pair
|
|
92
93
|
def self.normalize_attribute_pair(key, val)
|
|
93
94
|
if key.is_a?(String)
|
|
94
|
-
# Nokogiri format: key=name (String), val=attr object
|
|
95
95
|
name = key
|
|
96
|
-
value = val.
|
|
96
|
+
value = val.is_a?(String) ? val : val.value
|
|
97
97
|
else
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
value = key.respond_to?(:value) ? key.value : key.to_s
|
|
98
|
+
name = key.name
|
|
99
|
+
value = key.value
|
|
101
100
|
end
|
|
102
101
|
|
|
103
102
|
[name, value]
|
|
@@ -123,12 +122,8 @@ module Canon
|
|
|
123
122
|
end
|
|
124
123
|
end
|
|
125
124
|
|
|
126
|
-
# Check if an attribute name is a namespace declaration
|
|
127
|
-
#
|
|
128
|
-
# @param attr_name [String] Attribute name
|
|
129
|
-
# @return [Boolean] true if it's a namespace declaration
|
|
130
125
|
def self.namespace_declaration?(attr_name)
|
|
131
|
-
|
|
126
|
+
Canon::Xml::NamespaceHelper.namespace_declaration?(attr_name)
|
|
132
127
|
end
|
|
133
128
|
end
|
|
134
129
|
end
|
|
@@ -83,10 +83,10 @@ module Canon
|
|
|
83
83
|
|
|
84
84
|
# Create temporary RootNode wrappers
|
|
85
85
|
temp_root1 = Canon::Xml::Nodes::RootNode.new
|
|
86
|
-
temp_root1.
|
|
86
|
+
temp_root1.children = children1.dup
|
|
87
87
|
|
|
88
88
|
temp_root2 = Canon::Xml::Nodes::RootNode.new
|
|
89
|
-
temp_root2.
|
|
89
|
+
temp_root2.children = children2.dup
|
|
90
90
|
|
|
91
91
|
matcher = Canon::Xml::ElementMatcher.new
|
|
92
92
|
matches = matcher.match_trees(temp_root1, temp_root2)
|
|
@@ -282,7 +282,7 @@ module Canon
|
|
|
282
282
|
end
|
|
283
283
|
|
|
284
284
|
smaller_set_names = smaller_set.filter_map do |c|
|
|
285
|
-
next nil unless c.is_a?(Canon::Xml::Node) ||
|
|
285
|
+
next nil unless c.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(c)
|
|
286
286
|
# Exclude generic node-type names (e.g. "#text") that are
|
|
287
287
|
# shared by all text nodes and cannot be used for matching.
|
|
288
288
|
next nil if c.name.start_with?("#")
|
|
@@ -298,7 +298,7 @@ module Canon
|
|
|
298
298
|
# consider it a mismatch
|
|
299
299
|
mismatch_children << larger_set[i]
|
|
300
300
|
elsif (larger_set[i].is_a?(Canon::Xml::Node) ||
|
|
301
|
-
larger_set[i]
|
|
301
|
+
Canon::XmlParsing.xml_node?(larger_set[i])) &&
|
|
302
302
|
!larger_set[i].name.start_with?("#") &&
|
|
303
303
|
!smaller_set_names.include?(larger_set[i].name)
|
|
304
304
|
# If the name of the node is not found in the smaller set,
|
|
@@ -53,7 +53,7 @@ module Canon
|
|
|
53
53
|
# For deleted/inserted nodes, include namespace information if available
|
|
54
54
|
if dimension == :text_content && (node1.nil? || node2.nil?)
|
|
55
55
|
node = node1 || node2
|
|
56
|
-
if node.is_a?(Canon::Xml::Node) ||
|
|
56
|
+
if node.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node)
|
|
57
57
|
ns = node.namespace_uri
|
|
58
58
|
ns_info = if ns.nil? || ns.empty?
|
|
59
59
|
""
|
|
@@ -100,8 +100,8 @@ module Canon
|
|
|
100
100
|
elsif dimension == :element_structure &&
|
|
101
101
|
diff1 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
102
102
|
diff2 == Canon::Comparison::UNEQUAL_ELEMENTS &&
|
|
103
|
-
(node1.is_a?(Canon::Xml::Node) ||
|
|
104
|
-
(node2.is_a?(Canon::Xml::Node) ||
|
|
103
|
+
(node1.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node1)) &&
|
|
104
|
+
(node2.is_a?(Canon::Xml::Node) || Canon::XmlParsing.xml_node?(node2)) &&
|
|
105
105
|
node1.name && node2.name && node1.name != node2.name
|
|
106
106
|
"different element name (<#{node1.name}> vs <#{node2.name}>)"
|
|
107
107
|
else
|
|
@@ -198,12 +198,14 @@ module Canon
|
|
|
198
198
|
node.value
|
|
199
199
|
when Canon::Xml::Node
|
|
200
200
|
node.text_content
|
|
201
|
-
when Nokogiri::XML::Node
|
|
202
|
-
node.content.to_s
|
|
203
|
-
when String
|
|
204
|
-
node
|
|
205
201
|
else
|
|
206
|
-
node.
|
|
202
|
+
if Canon::XmlBackend.nokogiri? && node.is_a?(Nokogiri::XML::Node)
|
|
203
|
+
node.content.to_s
|
|
204
|
+
elsif Canon::XmlParsing.xml_node?(node)
|
|
205
|
+
Canon::XmlParsing.text_content(node)
|
|
206
|
+
else
|
|
207
|
+
node.to_s
|
|
208
|
+
end
|
|
207
209
|
end
|
|
208
210
|
rescue StandardError
|
|
209
211
|
nil
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require_relative "../../xml/namespace_helper"
|
|
4
|
+
|
|
3
5
|
module Canon
|
|
4
6
|
module Comparison
|
|
5
7
|
module XmlComparatorHelpers
|
|
@@ -41,20 +43,20 @@ module Canon
|
|
|
41
43
|
def self.extract_declarations(node)
|
|
42
44
|
declarations = {}
|
|
43
45
|
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
46
|
+
if node.is_a?(Canon::Xml::Node)
|
|
47
|
+
if node.namespace_nodes
|
|
48
|
+
return extract_from_namespace_nodes(node.namespace_nodes,
|
|
49
|
+
declarations)
|
|
50
|
+
end
|
|
49
51
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
+
raw_attrs = node.attribute_nodes
|
|
53
|
+
else
|
|
54
|
+
raw_attrs = node.attributes
|
|
55
|
+
end
|
|
52
56
|
|
|
53
|
-
# Handle Canon::Xml::Node attribute format (array of AttributeNode)
|
|
54
57
|
if raw_attrs.is_a?(Array)
|
|
55
58
|
extract_from_array_attributes(raw_attrs, declarations)
|
|
56
59
|
else
|
|
57
|
-
# Handle Nokogiri and Moxml attribute formats (Hash-like)
|
|
58
60
|
extract_from_hash_attributes(raw_attrs, declarations)
|
|
59
61
|
end
|
|
60
62
|
|
|
@@ -105,23 +107,11 @@ module Canon
|
|
|
105
107
|
# @return [Hash] Declarations hash
|
|
106
108
|
def self.extract_from_hash_attributes(raw_attrs, declarations)
|
|
107
109
|
raw_attrs.each do |key, val|
|
|
108
|
-
|
|
109
|
-
name = if key.is_a?(String)
|
|
110
|
-
# Nokogiri format: key=name (String), val=attr object
|
|
111
|
-
key
|
|
112
|
-
else
|
|
113
|
-
# Moxml format: key=attr object, val=nil
|
|
114
|
-
key.respond_to?(:name) ? key.name : key.to_s
|
|
115
|
-
end
|
|
110
|
+
name = key.is_a?(String) ? key : key.name
|
|
116
111
|
|
|
117
112
|
if namespace_declaration?(name)
|
|
118
|
-
value =
|
|
119
|
-
val.value
|
|
120
|
-
else
|
|
121
|
-
val.to_s
|
|
122
|
-
end
|
|
113
|
+
value = val.is_a?(String) ? val : val.value
|
|
123
114
|
|
|
124
|
-
# Extract prefix: "xmlns" -> "", "xmlns:xmi" -> "xmi"
|
|
125
115
|
prefix = name == "xmlns" ? "" : name.split(":", 2)[1]
|
|
126
116
|
declarations[prefix] = value
|
|
127
117
|
end
|
|
@@ -130,12 +120,8 @@ module Canon
|
|
|
130
120
|
declarations
|
|
131
121
|
end
|
|
132
122
|
|
|
133
|
-
# Check if an attribute name is a namespace declaration
|
|
134
|
-
#
|
|
135
|
-
# @param attr_name [String] Attribute name
|
|
136
|
-
# @return [Boolean] true if it's a namespace declaration
|
|
137
123
|
def self.namespace_declaration?(attr_name)
|
|
138
|
-
|
|
124
|
+
Canon::Xml::NamespaceHelper.namespace_declaration?(attr_name)
|
|
139
125
|
end
|
|
140
126
|
|
|
141
127
|
# Add a namespace declaration difference
|