canon 0.1.23 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +155 -30
- data/docs/INDEX.adoc +4 -0
- data/docs/advanced/diff-classification.adoc +3 -2
- data/docs/advanced/verbose-mode-architecture.adoc +23 -0
- data/docs/features/configuration-profiles.adoc +288 -0
- data/docs/features/diff-formatting/character-visualization.adoc +153 -454
- data/docs/features/diff-formatting/display-filtering.adoc +44 -0
- data/docs/features/diff-formatting/display-preprocessing.adoc +656 -0
- data/docs/features/diff-formatting/index.adoc +47 -0
- data/docs/features/diff-formatting/pretty-diff-mode.adoc +154 -0
- data/docs/features/environment-configuration/override-system.adoc +10 -3
- data/docs/features/index.adoc +9 -0
- data/docs/features/match-options/html-policies.adoc +3 -0
- data/docs/features/match-options/index.adoc +32 -42
- data/docs/features/match-options/pretty-printed-fixtures.adoc +270 -0
- data/docs/guides/choosing-configuration.adoc +22 -0
- data/docs/reference/environment-variables.adoc +121 -1
- data/docs/reference/options-across-interfaces.adoc +182 -2
- data/lib/canon/cli.rb +20 -0
- data/lib/canon/commands/diff_command.rb +7 -2
- data/lib/canon/commands/format_command.rb +1 -1
- data/lib/canon/comparison/html_comparator.rb +29 -19
- data/lib/canon/comparison/html_compare_profile.rb +4 -4
- data/lib/canon/comparison/markup_comparator.rb +12 -3
- data/lib/canon/comparison/match_options/base_resolver.rb +29 -7
- data/lib/canon/comparison/match_options/json_resolver.rb +9 -0
- data/lib/canon/comparison/match_options/xml_resolver.rb +16 -2
- data/lib/canon/comparison/match_options/yaml_resolver.rb +10 -0
- data/lib/canon/comparison/match_options.rb +4 -1
- data/lib/canon/comparison/whitespace_sensitivity.rb +189 -137
- data/lib/canon/comparison/xml_comparator/child_comparison.rb +21 -4
- data/lib/canon/comparison/xml_comparator.rb +14 -12
- data/lib/canon/comparison/xml_node_comparison.rb +51 -6
- data/lib/canon/comparison.rb +52 -9
- data/lib/canon/config/env_schema.rb +32 -4
- data/lib/canon/config/override_resolver.rb +16 -3
- data/lib/canon/config/profile_loader.rb +135 -0
- data/lib/canon/config/profiles/metanorma.yml +74 -0
- data/lib/canon/config/profiles/metanorma_debug.yml +8 -0
- data/lib/canon/config/type_converter.rb +8 -0
- data/lib/canon/config.rb +469 -5
- data/lib/canon/diff/diff_classifier.rb +41 -11
- data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +48 -17
- data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +58 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +73 -17
- data/lib/canon/diff_formatter.rb +493 -36
- data/lib/canon/pretty_printer/xml_normalized.rb +395 -0
- data/lib/canon/rspec_matchers.rb +36 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/nodes/namespace_node.rb +4 -0
- data/lib/canon/xml/nodes/processing_instruction_node.rb +4 -0
- data/lib/canon/xml/nodes/root_node.rb +4 -0
- data/lib/canon/xml/nodes/text_node.rb +4 -0
- data/lib/tasks/performance_helpers.rb +2 -2
- metadata +24 -2
|
@@ -4,30 +4,74 @@ module Canon
|
|
|
4
4
|
module Comparison
|
|
5
5
|
# Whitespace sensitivity utilities for element-level control
|
|
6
6
|
#
|
|
7
|
-
# This module provides
|
|
8
|
-
#
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
#
|
|
12
|
-
#
|
|
13
|
-
#
|
|
7
|
+
# This module provides three-way classification of whitespace behaviour
|
|
8
|
+
# at the element level:
|
|
9
|
+
#
|
|
10
|
+
# * **:preserve** — every whitespace character is significant. `" "` ≠ `"\n"`.
|
|
11
|
+
# Configured via +preserve_whitespace_elements+ (HTML default: pre, code,
|
|
12
|
+
# textarea, script, style; XML default: none).
|
|
13
|
+
#
|
|
14
|
+
# * **:collapse** — presence ≠ absence, but all whitespace forms are
|
|
15
|
+
# equivalent: `" "` == `"\n "`. Configured via +collapse_whitespace_elements+
|
|
16
|
+
# (HTML default: p, li, dt, dd, td, th, h1-h6, caption, figcaption, label,
|
|
17
|
+
# legend, summary, blockquote, address; XML default: none).
|
|
18
|
+
#
|
|
19
|
+
# * **:strip** — all whitespace is structural formatting noise and is
|
|
20
|
+
# dropped. Default for XML; HTML elements not in the above lists.
|
|
21
|
+
#
|
|
22
|
+
# Classification is **ancestor-based**: the closest matching ancestor
|
|
23
|
+
# determines the class. The strip blacklist (+strip_whitespace_elements+)
|
|
24
|
+
# overrides any sensitive ancestor.
|
|
14
25
|
#
|
|
15
26
|
# == Priority Order
|
|
16
27
|
#
|
|
17
28
|
# 1. respect_xml_space: false → User config only (ignore xml:space)
|
|
18
|
-
# 2.
|
|
19
|
-
# 3.
|
|
20
|
-
# 4.
|
|
21
|
-
# 5.
|
|
22
|
-
# 6. xml:space="default" → Use steps 1-4
|
|
29
|
+
# 2. Ancestor walk (strip blacklist wins; then preserve; then collapse)
|
|
30
|
+
# 3. xml:space="preserve" → preserve
|
|
31
|
+
# 4. xml:space="default" → use configured behaviour
|
|
32
|
+
# 5. Format defaults (HTML: collapse for most elements; XML: strip)
|
|
23
33
|
#
|
|
24
34
|
# == Usage
|
|
25
35
|
#
|
|
36
|
+
# WhitespaceSensitivity.classify_element(element, match_opts)
|
|
37
|
+
# => :preserve, :collapse, or :strip
|
|
38
|
+
#
|
|
26
39
|
# WhitespaceSensitivity.element_sensitive?(node, opts)
|
|
27
|
-
# => true if whitespace should be preserved
|
|
40
|
+
# => true if whitespace should be preserved (preserve or collapse)
|
|
28
41
|
module WhitespaceSensitivity
|
|
42
|
+
# HTML mixed-content "leaf block" elements where whitespace presence
|
|
43
|
+
# matters but all forms are equivalent (CSS block whitespace collapsing).
|
|
44
|
+
HTML_COLLAPSE_ELEMENTS = %w[
|
|
45
|
+
p li dt dd td th caption figcaption label legend summary
|
|
46
|
+
h1 h2 h3 h4 h5 h6
|
|
47
|
+
blockquote address button
|
|
48
|
+
].freeze
|
|
49
|
+
|
|
50
|
+
# HTML elements where every whitespace character is significant.
|
|
51
|
+
HTML_PRESERVE_ELEMENTS = %w[pre code textarea script style].freeze
|
|
52
|
+
|
|
29
53
|
class << self
|
|
30
|
-
#
|
|
54
|
+
# Classify the whitespace behaviour for an element using ancestor walk.
|
|
55
|
+
#
|
|
56
|
+
# @param element [Object] The element node to classify
|
|
57
|
+
# @param match_opts [Hash] Resolved match options
|
|
58
|
+
# @return [Symbol] :preserve, :collapse, or :strip
|
|
59
|
+
def classify_element(element, match_opts)
|
|
60
|
+
return :strip unless element
|
|
61
|
+
return :strip unless element.respond_to?(:name)
|
|
62
|
+
|
|
63
|
+
preserve_set = resolved_preserve_elements_set(match_opts)
|
|
64
|
+
collapse_set = resolved_collapse_elements_set(match_opts)
|
|
65
|
+
strip_set = resolved_strip_elements_set(match_opts)
|
|
66
|
+
|
|
67
|
+
# Ancestor walk: start at the element itself, walk up.
|
|
68
|
+
# Strip blacklist wins over any sensitive ancestor.
|
|
69
|
+
walk_ancestor_classification(element, preserve_set, collapse_set,
|
|
70
|
+
strip_set, match_opts)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Check if an element is whitespace-sensitive based on configuration.
|
|
74
|
+
# Returns true for :preserve or :collapse classification.
|
|
31
75
|
#
|
|
32
76
|
# @param node [Object] The element node to check
|
|
33
77
|
# @param opts [Hash] Comparison options containing match_opts
|
|
@@ -40,7 +84,7 @@ module Canon
|
|
|
40
84
|
parent = node.parent
|
|
41
85
|
|
|
42
86
|
# 1. Check if we should ignore xml:space (user override)
|
|
43
|
-
|
|
87
|
+
unless respect_xml_space?(match_opts)
|
|
44
88
|
return user_config_sensitive?(parent, match_opts)
|
|
45
89
|
end
|
|
46
90
|
|
|
@@ -50,8 +94,9 @@ module Canon
|
|
|
50
94
|
# 3. Check xml:space="default" (use configured behavior)
|
|
51
95
|
return false if xml_space_default?(parent)
|
|
52
96
|
|
|
53
|
-
# 4.
|
|
54
|
-
|
|
97
|
+
# 4. Three-way classification (ancestor-based)
|
|
98
|
+
classification = classify_element(parent, match_opts)
|
|
99
|
+
%i[preserve collapse].include?(classification)
|
|
55
100
|
end
|
|
56
101
|
|
|
57
102
|
# Check if whitespace-only text node should be filtered
|
|
@@ -66,105 +111,93 @@ module Canon
|
|
|
66
111
|
element_sensitive?(node, opts)
|
|
67
112
|
end
|
|
68
113
|
|
|
69
|
-
#
|
|
114
|
+
# Return the whitespace class for a text node used during comparison.
|
|
115
|
+
#
|
|
116
|
+
# :preserve → preserve all whitespace character-by-character
|
|
117
|
+
# :collapse → preserve presence (normalize to single space)
|
|
118
|
+
# :strip → drop whitespace-only text nodes
|
|
70
119
|
#
|
|
71
|
-
#
|
|
72
|
-
#
|
|
73
|
-
#
|
|
120
|
+
# @param node [Object] Text node to classify
|
|
121
|
+
# @param opts [Hash] Comparison options containing match_opts
|
|
122
|
+
# @return [Symbol] :preserve, :collapse, or :strip
|
|
123
|
+
def classify_text_node(node, opts)
|
|
124
|
+
match_opts = opts[:match_opts]
|
|
125
|
+
return :strip unless match_opts
|
|
126
|
+
return :strip unless text_node_parent?(node)
|
|
127
|
+
|
|
128
|
+
parent = node.parent
|
|
129
|
+
|
|
130
|
+
unless respect_xml_space?(match_opts)
|
|
131
|
+
return user_config_sensitive?(parent,
|
|
132
|
+
match_opts) ? :preserve : :strip
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
return :preserve if xml_space_preserve?(parent)
|
|
136
|
+
return :strip if xml_space_default?(parent)
|
|
137
|
+
|
|
138
|
+
classify_element(parent, match_opts)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Check if structural whitespace is preserved (not stripped) for an element.
|
|
74
142
|
#
|
|
75
|
-
#
|
|
143
|
+
# Uses the same priority chain as element_sensitive? / classify_text_node:
|
|
144
|
+
# 1. xml:space="preserve" → always preserved
|
|
145
|
+
# 2. xml:space="default" → use configured behaviour
|
|
146
|
+
# 3. ancestor-walk classification (strip = dropped)
|
|
76
147
|
#
|
|
77
148
|
# @param element [Object] Element node to check
|
|
78
149
|
# @param match_opts [Hash] Resolved match options
|
|
79
150
|
# @return [Boolean] true if whitespace is preserved (not stripped)
|
|
80
151
|
def whitespace_preserved?(element, match_opts)
|
|
81
|
-
return false unless element
|
|
82
|
-
return false unless element.respond_to?(:name)
|
|
83
|
-
|
|
84
|
-
elem_name = element.name.to_s
|
|
85
|
-
|
|
86
|
-
# Blacklist: always strip (highest priority)
|
|
87
|
-
insensitive_raw = match_opts[:insensitive_elements]
|
|
88
|
-
insensitive_raw ||= match_opts[:whitespace_insensitive_elements]
|
|
89
|
-
insensitive = (insensitive_raw || []).map(&:to_s)
|
|
90
|
-
return false if insensitive.include?(elem_name)
|
|
91
|
-
|
|
92
|
-
# Check if we should ignore xml:space (user override)
|
|
93
152
|
if respect_xml_space?(match_opts)
|
|
94
|
-
|
|
95
|
-
return true if xml_space_preserve?(element)
|
|
96
|
-
|
|
97
|
-
# Check xml:space="default" (use configured behavior)
|
|
153
|
+
return true if xml_space_preserve?(element)
|
|
98
154
|
return false if xml_space_default?(element)
|
|
99
155
|
end
|
|
100
156
|
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
return true if sensitive.include?(elem_name)
|
|
104
|
-
|
|
105
|
-
# Default: preserve for HTML, strip for XML
|
|
106
|
-
format = match_opts[:format] || :xml
|
|
107
|
-
case format
|
|
108
|
-
when :html, :html4, :html5
|
|
109
|
-
true
|
|
110
|
-
else
|
|
111
|
-
false
|
|
112
|
-
end
|
|
157
|
+
classification = classify_element(element, match_opts)
|
|
158
|
+
%i[preserve collapse].include?(classification)
|
|
113
159
|
end
|
|
114
160
|
|
|
115
|
-
# Get resolved list of whitespace
|
|
161
|
+
# Get resolved list of preserve whitespace element names (strings).
|
|
116
162
|
#
|
|
117
|
-
#
|
|
118
|
-
#
|
|
119
|
-
|
|
163
|
+
# @param match_opts [Hash] Resolved match options
|
|
164
|
+
# @return [Array<String>] Preserve element names
|
|
165
|
+
def resolved_preserve_elements(match_opts)
|
|
166
|
+
resolved_preserve_elements_set(match_opts).to_a
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
# Get resolved list of collapse whitespace element names (strings).
|
|
120
170
|
#
|
|
121
171
|
# @param match_opts [Hash] Resolved match options
|
|
122
|
-
# @return [Array<String>]
|
|
123
|
-
def
|
|
124
|
-
|
|
172
|
+
# @return [Array<String>] Collapse element names
|
|
173
|
+
def resolved_collapse_elements(match_opts)
|
|
174
|
+
resolved_collapse_elements_set(match_opts).to_a
|
|
175
|
+
end
|
|
125
176
|
|
|
126
|
-
|
|
177
|
+
# Get format-specific default preserve (exact-whitespace) elements.
|
|
178
|
+
# This is the SINGLE SOURCE OF TRUTH for default preserve-whitespace elements.
|
|
179
|
+
#
|
|
180
|
+
# @param match_opts [Hash] Resolved match options
|
|
181
|
+
# @return [Array<Symbol>] Default preserve element names
|
|
182
|
+
def format_default_preserve_elements(match_opts)
|
|
127
183
|
format = match_opts[:format] || :xml
|
|
128
184
|
case format
|
|
129
185
|
when :html, :html4, :html5
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
# 2. User whitelist (additive to format defaults)
|
|
134
|
-
whitelist = match_opts[:sensitive_elements]
|
|
135
|
-
whitelist ||= match_opts[:whitespace_sensitive_elements]
|
|
136
|
-
if whitelist
|
|
137
|
-
sensitive += whitelist.map(&:to_s)
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
# 3. User blacklist removes from combined set
|
|
141
|
-
blacklist_raw = match_opts[:insensitive_elements]
|
|
142
|
-
blacklist_raw ||= match_opts[:whitespace_insensitive_elements]
|
|
143
|
-
if blacklist_raw
|
|
144
|
-
blacklist = blacklist_raw.to_set(&:to_s)
|
|
145
|
-
sensitive.reject! { |e| blacklist.include?(e) }
|
|
186
|
+
HTML_PRESERVE_ELEMENTS.map(&:to_sym).freeze
|
|
187
|
+
else
|
|
188
|
+
[].freeze
|
|
146
189
|
end
|
|
147
|
-
|
|
148
|
-
sensitive.uniq
|
|
149
190
|
end
|
|
150
191
|
|
|
151
|
-
# Get format-specific default
|
|
152
|
-
#
|
|
153
|
-
# This is the SINGLE SOURCE OF TRUTH for default whitespace-sensitive
|
|
154
|
-
# elements. All other code should use this method to get the list.
|
|
192
|
+
# Get format-specific default collapse elements.
|
|
155
193
|
#
|
|
156
194
|
# @param match_opts [Hash] Resolved match options
|
|
157
|
-
# @return [Array<Symbol>] Default
|
|
158
|
-
def
|
|
195
|
+
# @return [Array<Symbol>] Default collapse element names
|
|
196
|
+
def format_default_collapse_elements(match_opts)
|
|
159
197
|
format = match_opts[:format] || :xml
|
|
160
|
-
|
|
161
198
|
case format
|
|
162
199
|
when :html, :html4, :html5
|
|
163
|
-
|
|
164
|
-
%i[pre code textarea script style].freeze
|
|
165
|
-
when :xml
|
|
166
|
-
# XML has no default sensitive elements - purely user-controlled
|
|
167
|
-
[].freeze
|
|
200
|
+
HTML_COLLAPSE_ELEMENTS.map(&:to_sym).freeze
|
|
168
201
|
else
|
|
169
202
|
[].freeze
|
|
170
203
|
end
|
|
@@ -172,23 +205,80 @@ module Canon
|
|
|
172
205
|
|
|
173
206
|
# Check if an element is in the default sensitive list for its format
|
|
174
207
|
#
|
|
175
|
-
# Convenience method for checking element sensitivity without building
|
|
176
|
-
# the full list first.
|
|
177
|
-
#
|
|
178
208
|
# @param element_name [String, Symbol] The element name to check
|
|
179
209
|
# @param match_opts [Hash] Resolved match options
|
|
180
210
|
# @return [Boolean] true if element is in default sensitive list
|
|
181
211
|
def default_sensitive_element?(element_name, match_opts)
|
|
182
|
-
|
|
212
|
+
format_default_preserve_elements(match_opts)
|
|
183
213
|
.include?(element_name.to_sym)
|
|
184
214
|
end
|
|
185
215
|
|
|
186
216
|
private
|
|
187
217
|
|
|
218
|
+
# Build the Set of preserve whitespace element names (strings).
|
|
219
|
+
def resolved_preserve_elements_set(match_opts)
|
|
220
|
+
set = Set.new(format_default_preserve_elements(match_opts).map(&:to_s))
|
|
221
|
+
|
|
222
|
+
if match_opts[:preserve_whitespace_elements]
|
|
223
|
+
set |= match_opts[:preserve_whitespace_elements].map(&:to_s)
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
# Remove blacklisted elements
|
|
227
|
+
strip_set = resolved_strip_elements_set(match_opts)
|
|
228
|
+
set.reject { |e| strip_set.include?(e) }.to_set
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
# Build the Set of collapse whitespace element names (strings).
|
|
232
|
+
def resolved_collapse_elements_set(match_opts)
|
|
233
|
+
set = Set.new(format_default_collapse_elements(match_opts).map(&:to_s))
|
|
234
|
+
|
|
235
|
+
if match_opts[:collapse_whitespace_elements]
|
|
236
|
+
set |= match_opts[:collapse_whitespace_elements].map(&:to_s)
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
# Remove blacklisted elements
|
|
240
|
+
strip_set = resolved_strip_elements_set(match_opts)
|
|
241
|
+
set.reject { |e| strip_set.include?(e) }.to_set
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
# Build the Set of strip (blacklist) element names (strings).
|
|
245
|
+
def resolved_strip_elements_set(match_opts)
|
|
246
|
+
raw = match_opts[:strip_whitespace_elements]
|
|
247
|
+
Set.new((raw || []).map(&:to_s))
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
# Perform the ancestor walk classification.
|
|
251
|
+
# The element itself is checked first, then its ancestors.
|
|
252
|
+
# Strip blacklist wins over any sensitive ancestor.
|
|
253
|
+
def walk_ancestor_classification(element, preserve_set, collapse_set,
|
|
254
|
+
strip_set, _match_opts)
|
|
255
|
+
current = element
|
|
256
|
+
while current.respond_to?(:name)
|
|
257
|
+
name = current.name.to_s
|
|
258
|
+
|
|
259
|
+
return :strip if strip_set.include?(name)
|
|
260
|
+
return :preserve if preserve_set.include?(name)
|
|
261
|
+
return :collapse if collapse_set.include?(name)
|
|
262
|
+
|
|
263
|
+
# Walk up
|
|
264
|
+
break unless current.respond_to?(:parent)
|
|
265
|
+
|
|
266
|
+
parent = current.parent
|
|
267
|
+
break if parent.nil?
|
|
268
|
+
break unless parent.respond_to?(:name)
|
|
269
|
+
break if parent == current # guard infinite loop
|
|
270
|
+
|
|
271
|
+
current = parent
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
# No matching ancestor — whitespace sensitivity is always opt-in.
|
|
275
|
+
# Elements not in any list are strip regardless of format.
|
|
276
|
+
# (HTML_COLLAPSE_ELEMENTS are already merged into the collapse_set
|
|
277
|
+
# by resolved_collapse_elements_set, so they are found during the walk.)
|
|
278
|
+
:strip
|
|
279
|
+
end
|
|
280
|
+
|
|
188
281
|
# Check if we should respect xml:space attribute
|
|
189
|
-
#
|
|
190
|
-
# @param match_opts [Hash] Resolved match options
|
|
191
|
-
# @return [Boolean] true if xml:space should be respected
|
|
192
282
|
def respect_xml_space?(match_opts)
|
|
193
283
|
if match_opts.key?(:respect_xml_space)
|
|
194
284
|
match_opts[:respect_xml_space]
|
|
@@ -198,13 +288,8 @@ module Canon
|
|
|
198
288
|
end
|
|
199
289
|
|
|
200
290
|
# Check if xml:space="preserve" is set
|
|
201
|
-
#
|
|
202
|
-
# @param element [Object] The element to check
|
|
203
|
-
# @return [Boolean] true if xml:space="preserve"
|
|
204
291
|
def xml_space_preserve?(element)
|
|
205
292
|
if element.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
206
|
-
# Check attribute_nodes for xml:space attribute
|
|
207
|
-
# xml:space is stored with name="space" and namespace_uri="http://www.w3.org/XML/1998/namespace"
|
|
208
293
|
element.attribute_nodes.any? do |attr|
|
|
209
294
|
attr.name == "space" &&
|
|
210
295
|
attr.namespace_uri == "http://www.w3.org/XML/1998/namespace" &&
|
|
@@ -218,13 +303,8 @@ module Canon
|
|
|
218
303
|
end
|
|
219
304
|
|
|
220
305
|
# Check if xml:space="default" is set
|
|
221
|
-
#
|
|
222
|
-
# @param element [Object] The element to check
|
|
223
|
-
# @return [Boolean] true if xml:space="default"
|
|
224
306
|
def xml_space_default?(element)
|
|
225
307
|
if element.is_a?(Canon::Xml::Nodes::ElementNode)
|
|
226
|
-
# Check attribute_nodes for xml:space attribute
|
|
227
|
-
# xml:space is stored with name="space" and namespace_uri="http://www.w3.org/XML/1998/namespace"
|
|
228
308
|
element.attribute_nodes.any? do |attr|
|
|
229
309
|
attr.name == "space" &&
|
|
230
310
|
attr.namespace_uri == "http://www.w3.org/XML/1998/namespace" &&
|
|
@@ -237,43 +317,15 @@ module Canon
|
|
|
237
317
|
end
|
|
238
318
|
end
|
|
239
319
|
|
|
240
|
-
# Check sensitivity based on user configuration
|
|
241
|
-
#
|
|
242
|
-
# @param element [Object] The element to check
|
|
243
|
-
# @param match_opts [Hash] Resolved match options
|
|
244
|
-
# @return [Boolean] true if element is in whitelist
|
|
320
|
+
# Check sensitivity based on user configuration (binary, no ancestor)
|
|
245
321
|
def user_config_sensitive?(element, match_opts)
|
|
246
|
-
|
|
322
|
+
list = match_opts[:preserve_whitespace_elements]
|
|
323
|
+
return false unless list
|
|
247
324
|
|
|
248
|
-
|
|
249
|
-
end
|
|
250
|
-
|
|
251
|
-
# Check sensitivity based on user config + format defaults
|
|
252
|
-
#
|
|
253
|
-
# @param element [Object] The element to check
|
|
254
|
-
# @param match_opts [Hash] Resolved match options
|
|
255
|
-
# @return [Boolean] true if element should be sensitive
|
|
256
|
-
def configured_sensitive?(element, match_opts)
|
|
257
|
-
# Start with format defaults
|
|
258
|
-
sensitive = format_default_sensitive_elements(match_opts).to_set
|
|
259
|
-
|
|
260
|
-
# Apply whitelist (adds to defaults)
|
|
261
|
-
if match_opts[:whitespace_sensitive_elements]
|
|
262
|
-
sensitive |= match_opts[:whitespace_sensitive_elements]
|
|
263
|
-
end
|
|
264
|
-
|
|
265
|
-
# Apply blacklist (removes from everything)
|
|
266
|
-
if match_opts[:whitespace_insensitive_elements]
|
|
267
|
-
sensitive -= match_opts[:whitespace_insensitive_elements]
|
|
268
|
-
end
|
|
269
|
-
|
|
270
|
-
sensitive.include?(element.name.to_sym)
|
|
325
|
+
list.map(&:to_s).include?(element.name.to_s)
|
|
271
326
|
end
|
|
272
327
|
|
|
273
328
|
# Check if node has a parent that's an element (not document root)
|
|
274
|
-
#
|
|
275
|
-
# @param node [Object] The node to check
|
|
276
|
-
# @return [Boolean] true if node has an element parent
|
|
277
329
|
def text_node_parent?(node)
|
|
278
330
|
return false unless node.respond_to?(:parent)
|
|
279
331
|
return false unless node.parent
|
|
@@ -28,8 +28,17 @@ module Canon
|
|
|
28
28
|
# @return [Integer] Comparison result code
|
|
29
29
|
def compare(node1, node2, comparator, opts, child_opts,
|
|
30
30
|
diff_children, differences)
|
|
31
|
-
|
|
32
|
-
|
|
31
|
+
# Apply side-specific pretty-print heuristic when either flag is set:
|
|
32
|
+
# pretty_printed_expected → drop \n-starting whitespace nodes from node1
|
|
33
|
+
# pretty_printed_received → drop \n-starting whitespace nodes from node2
|
|
34
|
+
# The ephemeral _pretty_print_side_active flag is consumed by node_excluded?
|
|
35
|
+
# and must NOT be forwarded into recursive compare_nodes calls.
|
|
36
|
+
require_relative "../xml_node_comparison"
|
|
37
|
+
opts1 = XmlNodeComparison.opts_for_side(opts, :expected)
|
|
38
|
+
opts2 = XmlNodeComparison.opts_for_side(opts, :received)
|
|
39
|
+
|
|
40
|
+
children1 = comparator.send(:filter_children, node1.children, opts1)
|
|
41
|
+
children2 = comparator.send(:filter_children, node2.children, opts2)
|
|
33
42
|
|
|
34
43
|
# Quick check: if both have no children, they're equivalent
|
|
35
44
|
return Comparison::EQUIVALENT if children1.empty? && children2.empty?
|
|
@@ -241,7 +250,12 @@ diff_children, differences)
|
|
|
241
250
|
end
|
|
242
251
|
|
|
243
252
|
smaller_set_names = smaller_set.filter_map do |c|
|
|
244
|
-
c.respond_to?(:name)
|
|
253
|
+
next nil unless c.respond_to?(:name)
|
|
254
|
+
# Exclude generic node-type names (e.g. "#text") that are
|
|
255
|
+
# shared by all text nodes and cannot be used for matching.
|
|
256
|
+
next nil if c.name.start_with?("#")
|
|
257
|
+
|
|
258
|
+
c.name
|
|
245
259
|
end
|
|
246
260
|
|
|
247
261
|
new_larger_set = []
|
|
@@ -252,9 +266,12 @@ diff_children, differences)
|
|
|
252
266
|
# consider it a mismatch
|
|
253
267
|
mismatch_children << larger_set[i]
|
|
254
268
|
elsif larger_set[i].respond_to?(:name) &&
|
|
269
|
+
!larger_set[i].name.start_with?("#") &&
|
|
255
270
|
!smaller_set_names.include?(larger_set[i].name)
|
|
256
271
|
# If the name of the node is not found in the smaller set,
|
|
257
|
-
# consider it a mismatch
|
|
272
|
+
# consider it a mismatch. Skip nodes with generic names
|
|
273
|
+
# starting with "#" (e.g. "#text") since those names are
|
|
274
|
+
# shared by all nodes of that type and not useful for matching.
|
|
258
275
|
mismatch_children << larger_set[i]
|
|
259
276
|
else
|
|
260
277
|
new_larger_set << larger_set[i]
|
|
@@ -417,20 +417,22 @@ module Canon
|
|
|
417
417
|
|
|
418
418
|
# Check if whitespace should be preserved strictly for these text nodes
|
|
419
419
|
# This applies to HTML elements like pre, code, textarea, script, style
|
|
420
|
-
# and elements with xml:space="preserve" or in user-configured
|
|
420
|
+
# and elements with xml:space="preserve" or in user-configured preserve list.
|
|
421
|
+
#
|
|
422
|
+
# IMPORTANT: This returns true ONLY for :preserve classification.
|
|
423
|
+
# For :collapse classification, whitespace differences ARE acceptable
|
|
424
|
+
# (they are detected as formatting-only by DiffClassifier).
|
|
421
425
|
def should_preserve_whitespace_strictly?(n1, n2, opts)
|
|
422
|
-
#
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
end
|
|
426
|
+
# Check both n1 and n2 - if either is in a preserve whitespace element, preserve strictly
|
|
427
|
+
[n1, n2].each do |node|
|
|
428
|
+
next unless node.respond_to?(:parent)
|
|
429
|
+
|
|
430
|
+
parent = node.parent
|
|
431
|
+
next unless parent
|
|
429
432
|
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
return true if
|
|
433
|
-
sensitivity_opts)
|
|
433
|
+
classification = WhitespaceSensitivity.classify_element(parent,
|
|
434
|
+
opts[:match_opts])
|
|
435
|
+
return true if classification == :preserve
|
|
434
436
|
end
|
|
435
437
|
|
|
436
438
|
false
|
|
@@ -90,6 +90,39 @@ differences)
|
|
|
90
90
|
end
|
|
91
91
|
end
|
|
92
92
|
|
|
93
|
+
# Build a side-specific opts copy that activates the pretty-print
|
|
94
|
+
# structural-whitespace heuristic for the given side.
|
|
95
|
+
#
|
|
96
|
+
# When +pretty_printed_expected+ (side :expected) or
|
|
97
|
+
# +pretty_printed_received+ (side :received) is truthy in match_opts,
|
|
98
|
+
# returns a shallow copy of +opts+ with an ephemeral
|
|
99
|
+
# +_pretty_print_side_active: true+ flag merged into +:match_opts+.
|
|
100
|
+
# Otherwise returns +opts+ unchanged (no allocation overhead).
|
|
101
|
+
#
|
|
102
|
+
# The flag is consumed by +node_excluded?+ to drop whitespace-only text
|
|
103
|
+
# nodes that start with "\n" in +:normalize+ whitespace elements.
|
|
104
|
+
# It is intentionally NOT propagated to recursive +compare_nodes+ calls —
|
|
105
|
+
# each level of +ChildComparison.compare+ re-evaluates it from the
|
|
106
|
+
# original +pretty_printed_*+ flags.
|
|
107
|
+
#
|
|
108
|
+
# @param opts [Hash] Full comparison options hash
|
|
109
|
+
# @param side [Symbol] :expected or :received
|
|
110
|
+
# @return [Hash] opts copy with ephemeral flag, or opts itself
|
|
111
|
+
def self.opts_for_side(opts, side)
|
|
112
|
+
match_opts = opts[:match_opts]
|
|
113
|
+
return opts unless match_opts
|
|
114
|
+
|
|
115
|
+
active = case side
|
|
116
|
+
when :expected then match_opts[:pretty_printed_expected]
|
|
117
|
+
when :received then match_opts[:pretty_printed_received]
|
|
118
|
+
else false
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
return opts unless active
|
|
122
|
+
|
|
123
|
+
opts.merge(match_opts: match_opts.merge(_pretty_print_side_active: true))
|
|
124
|
+
end
|
|
125
|
+
|
|
93
126
|
# Compare document fragments by comparing their children
|
|
94
127
|
#
|
|
95
128
|
# @param node1 [Nokogiri::XML::DocumentFragment] First fragment
|
|
@@ -104,9 +137,12 @@ differences)
|
|
|
104
137
|
childrenode1 = node1.children.to_a
|
|
105
138
|
childrenode2 = node2.children.to_a
|
|
106
139
|
|
|
107
|
-
# Filter children before comparison to handle ignored nodes (like comments with :ignore)
|
|
108
|
-
|
|
109
|
-
|
|
140
|
+
# Filter children before comparison to handle ignored nodes (like comments with :ignore).
|
|
141
|
+
# Apply side-specific pretty-print heuristic when the relevant flag is active.
|
|
142
|
+
children1 = filter_children(childrenode1,
|
|
143
|
+
opts_for_side(opts, :expected))
|
|
144
|
+
children2 = filter_children(childrenode2,
|
|
145
|
+
opts_for_side(opts, :received))
|
|
110
146
|
|
|
111
147
|
if children1.length != children2.length
|
|
112
148
|
add_difference(node1, node2, Comparison::UNEQUAL_ELEMENTS,
|
|
@@ -191,8 +227,8 @@ diff_children, differences)
|
|
|
191
227
|
end
|
|
192
228
|
|
|
193
229
|
# Strip whitespace-only text nodes based on parent element configuration.
|
|
194
|
-
# Use
|
|
195
|
-
# Blacklist (
|
|
230
|
+
# Use preserve_whitespace_elements / strip_whitespace_elements to control.
|
|
231
|
+
# Blacklist (strip) > preserve > collapse > format defaults.
|
|
196
232
|
return false unless text_node?(node) && node.parent
|
|
197
233
|
return false unless MatchOptions.normalize_text(node_text(node)).empty?
|
|
198
234
|
|
|
@@ -200,7 +236,16 @@ diff_children, differences)
|
|
|
200
236
|
node.parent, match_opts
|
|
201
237
|
)
|
|
202
238
|
|
|
203
|
-
|
|
239
|
+
# When the pretty-print-side flag is active (set by opts_for_side in
|
|
240
|
+
# ChildComparison.compare), drop whitespace-only text nodes that start
|
|
241
|
+
# with "\n" inside :collapse elements — they are structural indentation
|
|
242
|
+
# from the pretty-printer, not content. Space-only nodes (no "\n") are
|
|
243
|
+
# real inline content and are kept for normalised comparison.
|
|
244
|
+
# :preserve elements are always left unchanged.
|
|
245
|
+
if match_opts[:_pretty_print_side_active]
|
|
246
|
+
ws_class = WhitespaceSensitivity.classify_text_node(node, opts)
|
|
247
|
+
return true if ws_class == :collapse && node_text(node).start_with?("\n")
|
|
248
|
+
end
|
|
204
249
|
|
|
205
250
|
false
|
|
206
251
|
end
|