canon 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -1
- data/.rubocop_todo.yml +276 -7
- data/README.adoc +203 -138
- data/_config.yml +116 -0
- data/docs/ADVANCED_TOPICS.adoc +20 -0
- data/docs/BASIC_USAGE.adoc +16 -0
- data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
- data/docs/CLI.adoc +493 -0
- data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
- data/docs/DIFF_ARCHITECTURE.adoc +435 -0
- data/docs/DIFF_FORMATTING.adoc +540 -0
- data/docs/FORMATS.adoc +447 -0
- data/docs/INDEX.adoc +222 -0
- data/docs/INPUT_VALIDATION.adoc +477 -0
- data/docs/MATCH_ARCHITECTURE.adoc +463 -0
- data/docs/MATCH_OPTIONS.adoc +719 -0
- data/docs/MODES.adoc +432 -0
- data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
- data/docs/OPTIONS.adoc +1387 -0
- data/docs/PREPROCESSING.adoc +491 -0
- data/docs/RSPEC.adoc +605 -0
- data/docs/RUBY_API.adoc +478 -0
- data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
- data/docs/UNDERSTANDING_CANON.adoc +17 -0
- data/docs/VERBOSE.adoc +482 -0
- data/exe/canon +7 -0
- data/lib/canon/cli.rb +179 -0
- data/lib/canon/commands/diff_command.rb +195 -0
- data/lib/canon/commands/format_command.rb +113 -0
- data/lib/canon/comparison/base_comparator.rb +39 -0
- data/lib/canon/comparison/comparison_result.rb +79 -0
- data/lib/canon/comparison/html_comparator.rb +410 -0
- data/lib/canon/comparison/json_comparator.rb +212 -0
- data/lib/canon/comparison/match_options.rb +616 -0
- data/lib/canon/comparison/xml_comparator.rb +566 -0
- data/lib/canon/comparison/yaml_comparator.rb +93 -0
- data/lib/canon/comparison.rb +239 -0
- data/lib/canon/config.rb +172 -0
- data/lib/canon/diff/diff_block.rb +71 -0
- data/lib/canon/diff/diff_block_builder.rb +105 -0
- data/lib/canon/diff/diff_classifier.rb +46 -0
- data/lib/canon/diff/diff_context.rb +85 -0
- data/lib/canon/diff/diff_context_builder.rb +107 -0
- data/lib/canon/diff/diff_line.rb +77 -0
- data/lib/canon/diff/diff_node.rb +56 -0
- data/lib/canon/diff/diff_node_mapper.rb +148 -0
- data/lib/canon/diff/diff_report.rb +133 -0
- data/lib/canon/diff/diff_report_builder.rb +62 -0
- data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
- data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
- data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
- data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
- data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
- data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
- data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
- data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
- data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
- data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
- data/lib/canon/diff_formatter/character_map.yml +197 -0
- data/lib/canon/diff_formatter/debug_output.rb +431 -0
- data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
- data/lib/canon/diff_formatter/legend.rb +141 -0
- data/lib/canon/diff_formatter.rb +520 -0
- data/lib/canon/errors.rb +56 -0
- data/lib/canon/formatters/html4_formatter.rb +17 -0
- data/lib/canon/formatters/html5_formatter.rb +17 -0
- data/lib/canon/formatters/html_formatter.rb +37 -0
- data/lib/canon/formatters/html_formatter_base.rb +163 -0
- data/lib/canon/formatters/json_formatter.rb +3 -0
- data/lib/canon/formatters/xml_formatter.rb +20 -55
- data/lib/canon/formatters/yaml_formatter.rb +4 -1
- data/lib/canon/pretty_printer/html.rb +57 -0
- data/lib/canon/pretty_printer/json.rb +25 -0
- data/lib/canon/pretty_printer/xml.rb +29 -0
- data/lib/canon/rspec_matchers.rb +222 -80
- data/lib/canon/validators/base_validator.rb +49 -0
- data/lib/canon/validators/html_validator.rb +138 -0
- data/lib/canon/validators/json_validator.rb +89 -0
- data/lib/canon/validators/xml_validator.rb +53 -0
- data/lib/canon/validators/yaml_validator.rb +73 -0
- data/lib/canon/version.rb +1 -1
- data/lib/canon/xml/attribute_handler.rb +80 -0
- data/lib/canon/xml/c14n.rb +36 -0
- data/lib/canon/xml/character_encoder.rb +38 -0
- data/lib/canon/xml/data_model.rb +225 -0
- data/lib/canon/xml/element_matcher.rb +196 -0
- data/lib/canon/xml/line_range_mapper.rb +158 -0
- data/lib/canon/xml/namespace_handler.rb +86 -0
- data/lib/canon/xml/node.rb +32 -0
- data/lib/canon/xml/nodes/attribute_node.rb +54 -0
- data/lib/canon/xml/nodes/comment_node.rb +23 -0
- data/lib/canon/xml/nodes/element_node.rb +56 -0
- data/lib/canon/xml/nodes/namespace_node.rb +38 -0
- data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
- data/lib/canon/xml/nodes/root_node.rb +16 -0
- data/lib/canon/xml/nodes/text_node.rb +23 -0
- data/lib/canon/xml/processor.rb +151 -0
- data/lib/canon/xml/whitespace_normalizer.rb +72 -0
- data/lib/canon/xml/xml_base_handler.rb +188 -0
- data/lib/canon.rb +14 -3
- metadata +116 -21
|
@@ -0,0 +1,566 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../xml/c14n"
|
|
4
|
+
require_relative "match_options"
|
|
5
|
+
require_relative "../diff/diff_node"
|
|
6
|
+
require_relative "../diff/diff_classifier"
|
|
7
|
+
require_relative "comparison_result"
|
|
8
|
+
|
|
9
|
+
module Canon
|
|
10
|
+
module Comparison
|
|
11
|
+
# XML comparison class
|
|
12
|
+
# Handles comparison of XML nodes with various options
|
|
13
|
+
class XmlComparator
|
|
14
|
+
# Default comparison options for XML
|
|
15
|
+
DEFAULT_OPTS = {
|
|
16
|
+
# Structural filtering options
|
|
17
|
+
ignore_children: false,
|
|
18
|
+
ignore_text_nodes: false,
|
|
19
|
+
ignore_attr_content: [],
|
|
20
|
+
ignore_attrs: [],
|
|
21
|
+
ignore_attrs_by_name: [],
|
|
22
|
+
ignore_nodes: [],
|
|
23
|
+
|
|
24
|
+
# Output options
|
|
25
|
+
verbose: false,
|
|
26
|
+
diff_children: false,
|
|
27
|
+
|
|
28
|
+
# Match system options
|
|
29
|
+
match_profile: nil,
|
|
30
|
+
match: nil,
|
|
31
|
+
preprocessing: nil,
|
|
32
|
+
global_profile: nil,
|
|
33
|
+
global_options: nil,
|
|
34
|
+
|
|
35
|
+
# Diff display options
|
|
36
|
+
diff: nil,
|
|
37
|
+
}.freeze
|
|
38
|
+
|
|
39
|
+
class << self
|
|
40
|
+
# Compare two XML nodes for equivalence
|
|
41
|
+
#
|
|
42
|
+
# @param n1 [String, Moxml::Node] First node
|
|
43
|
+
# @param n2 [String, Moxml::Node] Second node
|
|
44
|
+
# @param opts [Hash] Comparison options
|
|
45
|
+
# @param child_opts [Hash] Options for child comparison
|
|
46
|
+
# @return [Boolean, Array] true if equivalent, or array of diffs if
|
|
47
|
+
# verbose
|
|
48
|
+
def equivalent?(n1, n2, opts = {}, child_opts = {})
|
|
49
|
+
opts = DEFAULT_OPTS.merge(opts)
|
|
50
|
+
|
|
51
|
+
# Resolve match options with format-specific defaults
|
|
52
|
+
match_opts_hash = MatchOptions::Xml.resolve(
|
|
53
|
+
format: :xml,
|
|
54
|
+
match_profile: opts[:match_profile],
|
|
55
|
+
match: opts[:match],
|
|
56
|
+
preprocessing: opts[:preprocessing],
|
|
57
|
+
global_profile: opts[:global_profile],
|
|
58
|
+
global_options: opts[:global_options],
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Wrap in ResolvedMatchOptions for DiffClassifier
|
|
62
|
+
match_opts = Canon::Comparison::ResolvedMatchOptions.new(
|
|
63
|
+
match_opts_hash,
|
|
64
|
+
format: :xml,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Store resolved match options hash for use in comparison logic
|
|
68
|
+
opts[:match_opts] = match_opts_hash
|
|
69
|
+
|
|
70
|
+
# Create child_opts with resolved options
|
|
71
|
+
child_opts = opts.merge(child_opts)
|
|
72
|
+
|
|
73
|
+
# Parse nodes if they are strings, applying preprocessing if needed
|
|
74
|
+
node1 = parse_node(n1, match_opts_hash[:preprocessing])
|
|
75
|
+
node2 = parse_node(n2, match_opts_hash[:preprocessing])
|
|
76
|
+
|
|
77
|
+
differences = []
|
|
78
|
+
diff_children = opts[:diff_children] || false
|
|
79
|
+
|
|
80
|
+
result = compare_nodes(node1, node2, opts, child_opts,
|
|
81
|
+
diff_children, differences)
|
|
82
|
+
|
|
83
|
+
# Classify DiffNodes as normative/informative if we have verbose output
|
|
84
|
+
if opts[:verbose] && !differences.empty?
|
|
85
|
+
classifier = Canon::Diff::DiffClassifier.new(match_opts)
|
|
86
|
+
classifier.classify_all(differences.select do |d|
|
|
87
|
+
d.is_a?(Canon::Diff::DiffNode)
|
|
88
|
+
end)
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
if opts[:verbose]
|
|
92
|
+
# Return ComparisonResult for proper equivalence checking
|
|
93
|
+
# Format XMLfor line-by-line display by adding line breaks between elements
|
|
94
|
+
xml1 = node1.respond_to?(:to_xml) ? node1.to_xml : node1.to_s
|
|
95
|
+
xml2 = node2.respond_to?(:to_xml) ? node2.to_xml : node2.to_s
|
|
96
|
+
|
|
97
|
+
preprocessed = [
|
|
98
|
+
xml1.gsub(/></, ">\n<"),
|
|
99
|
+
xml2.gsub(/></, ">\n<"),
|
|
100
|
+
]
|
|
101
|
+
|
|
102
|
+
ComparisonResult.new(
|
|
103
|
+
differences: differences,
|
|
104
|
+
preprocessed_strings: preprocessed,
|
|
105
|
+
format: :xml,
|
|
106
|
+
match_options: match_opts_hash,
|
|
107
|
+
)
|
|
108
|
+
else
|
|
109
|
+
result == Comparison::EQUIVALENT
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
private
|
|
114
|
+
|
|
115
|
+
# Parse a node from string or return as-is
|
|
116
|
+
# Applies preprocessing transformation before parsing if specified
|
|
117
|
+
def parse_node(node, preprocessing = :none)
|
|
118
|
+
return node unless node.is_a?(String)
|
|
119
|
+
|
|
120
|
+
# Apply preprocessing to XML string before parsing
|
|
121
|
+
xml_string = case preprocessing
|
|
122
|
+
when :normalize
|
|
123
|
+
# Normalize whitespace: collapse runs, trim lines
|
|
124
|
+
node.lines.map(&:strip).reject(&:empty?).join("\n")
|
|
125
|
+
when :c14n
|
|
126
|
+
# Canonicalize the XML
|
|
127
|
+
Canon::Xml::C14n.canonicalize(node,
|
|
128
|
+
with_comments: false)
|
|
129
|
+
when :format
|
|
130
|
+
# Pretty format the XML
|
|
131
|
+
Canon.format(node, :xml)
|
|
132
|
+
else
|
|
133
|
+
# :none or unrecognized - use as-is
|
|
134
|
+
node
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
# Use Moxml for XML parsing
|
|
138
|
+
Moxml.new.parse(xml_string)
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
# Main comparison dispatcher
|
|
142
|
+
def compare_nodes(n1, n2, opts, child_opts, diff_children, differences)
|
|
143
|
+
# Handle DocumentFragment nodes - compare their children instead
|
|
144
|
+
if n1.is_a?(Nokogiri::XML::DocumentFragment) &&
|
|
145
|
+
n2.is_a?(Nokogiri::XML::DocumentFragment)
|
|
146
|
+
children1 = n1.children.to_a
|
|
147
|
+
children2 = n2.children.to_a
|
|
148
|
+
|
|
149
|
+
if children1.length != children2.length
|
|
150
|
+
add_difference(n1, n2, Comparison::UNEQUAL_ELEMENTS,
|
|
151
|
+
Comparison::UNEQUAL_ELEMENTS, :text_content, opts,
|
|
152
|
+
differences)
|
|
153
|
+
return Comparison::UNEQUAL_ELEMENTS
|
|
154
|
+
elsif children1.empty?
|
|
155
|
+
return Comparison::EQUIVALENT
|
|
156
|
+
else
|
|
157
|
+
# Compare each pair of children
|
|
158
|
+
result = Comparison::EQUIVALENT
|
|
159
|
+
children1.zip(children2).each do |child1, child2|
|
|
160
|
+
child_result = compare_nodes(child1, child2, opts, child_opts,
|
|
161
|
+
diff_children, differences)
|
|
162
|
+
if child_result != Comparison::EQUIVALENT
|
|
163
|
+
result = child_result
|
|
164
|
+
break
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
return result
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Check if nodes should be excluded
|
|
172
|
+
return Comparison::EQUIVALENT if node_excluded?(n1, opts) &&
|
|
173
|
+
node_excluded?(n2, opts)
|
|
174
|
+
|
|
175
|
+
if node_excluded?(n1, opts) || node_excluded?(n2, opts)
|
|
176
|
+
add_difference(n1, n2, Comparison::MISSING_NODE,
|
|
177
|
+
Comparison::MISSING_NODE, :text_content, opts, differences)
|
|
178
|
+
return Comparison::MISSING_NODE
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Check node types match
|
|
182
|
+
unless same_node_type?(n1, n2)
|
|
183
|
+
add_difference(n1, n2, Comparison::UNEQUAL_NODES_TYPES,
|
|
184
|
+
Comparison::UNEQUAL_NODES_TYPES, :text_content, opts,
|
|
185
|
+
differences)
|
|
186
|
+
return Comparison::UNEQUAL_NODES_TYPES
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Dispatch based on node type
|
|
190
|
+
if n1.respond_to?(:element?) && n1.element?
|
|
191
|
+
compare_element_nodes(n1, n2, opts, child_opts, diff_children,
|
|
192
|
+
differences)
|
|
193
|
+
elsif n1.respond_to?(:text?) && n1.text?
|
|
194
|
+
compare_text_nodes(n1, n2, opts, differences)
|
|
195
|
+
elsif n1.respond_to?(:comment?) && n1.comment?
|
|
196
|
+
compare_comment_nodes(n1, n2, opts, differences)
|
|
197
|
+
elsif n1.respond_to?(:cdata?) && n1.cdata?
|
|
198
|
+
compare_text_nodes(n1, n2, opts, differences)
|
|
199
|
+
elsif n1.respond_to?(:processing_instruction?) &&
|
|
200
|
+
n1.processing_instruction?
|
|
201
|
+
compare_processing_instruction_nodes(n1, n2, opts, differences)
|
|
202
|
+
elsif n1.respond_to?(:root)
|
|
203
|
+
# Document node
|
|
204
|
+
compare_document_nodes(n1, n2, opts, child_opts, diff_children,
|
|
205
|
+
differences)
|
|
206
|
+
else
|
|
207
|
+
Comparison::EQUIVALENT
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
|
|
211
|
+
# Compare two element nodes
|
|
212
|
+
def compare_element_nodes(n1, n2, opts, child_opts, diff_children,
|
|
213
|
+
differences)
|
|
214
|
+
# Compare element names
|
|
215
|
+
unless n1.name == n2.name
|
|
216
|
+
add_difference(n1, n2, Comparison::UNEQUAL_ELEMENTS,
|
|
217
|
+
Comparison::UNEQUAL_ELEMENTS, :text_content, opts,
|
|
218
|
+
differences)
|
|
219
|
+
return Comparison::UNEQUAL_ELEMENTS
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
# Compare attributes
|
|
223
|
+
attr_result = compare_attribute_sets(n1, n2, opts, differences)
|
|
224
|
+
return attr_result unless attr_result == Comparison::EQUIVALENT
|
|
225
|
+
|
|
226
|
+
# Compare children if not ignored
|
|
227
|
+
return Comparison::EQUIVALENT if opts[:ignore_children]
|
|
228
|
+
|
|
229
|
+
compare_children(n1, n2, opts, child_opts, diff_children, differences)
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# Compare attribute sets
|
|
233
|
+
def compare_attribute_sets(n1, n2, opts, differences)
|
|
234
|
+
attrs1 = filter_attributes(n1.attributes, opts)
|
|
235
|
+
attrs2 = filter_attributes(n2.attributes, opts)
|
|
236
|
+
|
|
237
|
+
# Always sort attributes since attribute order doesn't matter in XML/HTML
|
|
238
|
+
attrs1 = attrs1.sort_by { |k, _v| k.to_s }.to_h
|
|
239
|
+
attrs2 = attrs2.sort_by { |k, _v| k.to_s }.to_h
|
|
240
|
+
|
|
241
|
+
unless attrs1.keys.map(&:to_s).sort == attrs2.keys.map(&:to_s).sort
|
|
242
|
+
add_difference(n1, n2, Comparison::MISSING_ATTRIBUTE,
|
|
243
|
+
Comparison::MISSING_ATTRIBUTE,
|
|
244
|
+
:attribute_presence, opts, differences)
|
|
245
|
+
return Comparison::MISSING_ATTRIBUTE
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
attrs1.each do |name, value|
|
|
249
|
+
unless attrs2[name] == value
|
|
250
|
+
add_difference(n1, n2, Comparison::UNEQUAL_ATTRIBUTES,
|
|
251
|
+
Comparison::UNEQUAL_ATTRIBUTES,
|
|
252
|
+
:attribute_values, opts, differences)
|
|
253
|
+
return Comparison::UNEQUAL_ATTRIBUTES
|
|
254
|
+
end
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
Comparison::EQUIVALENT
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Filter attributes based on options
|
|
261
|
+
def filter_attributes(attributes, opts)
|
|
262
|
+
filtered = {}
|
|
263
|
+
match_opts = opts[:match_opts]
|
|
264
|
+
|
|
265
|
+
attributes.each do |key, val|
|
|
266
|
+
# Handle both Nokogiri and Moxml attribute formats:
|
|
267
|
+
# - Nokogiri: key is String name, val is Nokogiri::XML::Attr object
|
|
268
|
+
# - Moxml: key is Moxml::Attribute object, val is nil
|
|
269
|
+
|
|
270
|
+
if key.is_a?(String)
|
|
271
|
+
# Nokogiri format: key=name (String), val=attr object
|
|
272
|
+
name = key
|
|
273
|
+
value = val.respond_to?(:value) ? val.value : val.to_s
|
|
274
|
+
else
|
|
275
|
+
# Moxml format: key=attr object, val=nil
|
|
276
|
+
name = key.respond_to?(:name) ? key.name : key.to_s
|
|
277
|
+
value = key.respond_to?(:value) ? key.value : key.to_s
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# Skip if attribute name should be ignored
|
|
281
|
+
next if should_ignore_attr_by_name?(name, opts)
|
|
282
|
+
|
|
283
|
+
# Skip if attribute content should be ignored
|
|
284
|
+
next if should_ignore_attr_content?(value, opts)
|
|
285
|
+
|
|
286
|
+
# Apply match options for attribute values
|
|
287
|
+
behavior = match_opts[:attribute_values] || :strict
|
|
288
|
+
value = MatchOptions.process_attribute_value(value, behavior)
|
|
289
|
+
|
|
290
|
+
filtered[name] = value
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
filtered
|
|
294
|
+
end
|
|
295
|
+
|
|
296
|
+
# Check if attribute should be ignored by name
|
|
297
|
+
def should_ignore_attr_by_name?(name, opts)
|
|
298
|
+
opts[:ignore_attrs_by_name].any? do |pattern|
|
|
299
|
+
name.include?(pattern)
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
# Check if attribute should be ignored by content
|
|
304
|
+
def should_ignore_attr_content?(value, opts)
|
|
305
|
+
opts[:ignore_attr_content].any? do |pattern|
|
|
306
|
+
value.to_s.include?(pattern)
|
|
307
|
+
end
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# Compare text nodes
|
|
311
|
+
def compare_text_nodes(n1, n2, opts, differences)
|
|
312
|
+
return Comparison::EQUIVALENT if opts[:ignore_text_nodes]
|
|
313
|
+
|
|
314
|
+
text1 = node_text(n1)
|
|
315
|
+
text2 = node_text(n2)
|
|
316
|
+
|
|
317
|
+
# Use match options
|
|
318
|
+
match_opts = opts[:match_opts]
|
|
319
|
+
behavior = match_opts[:text_content]
|
|
320
|
+
|
|
321
|
+
# For HTML, check if text node is inside whitespace-preserving element
|
|
322
|
+
# If so, always use strict comparison regardless of text_content setting
|
|
323
|
+
if should_preserve_whitespace_strictly?(n1, n2)
|
|
324
|
+
behavior = :strict
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
if MatchOptions.match_text?(text1, text2, behavior)
|
|
328
|
+
Comparison::EQUIVALENT
|
|
329
|
+
else
|
|
330
|
+
# Determine the correct dimension for this difference
|
|
331
|
+
# - If text_content is :strict, ALL differences use :text_content dimension
|
|
332
|
+
# - If text_content is :normalize, whitespace-only diffs use :structural_whitespace
|
|
333
|
+
# - Otherwise use :text_content
|
|
334
|
+
dimension = if behavior == :normalize && whitespace_only_difference?(
|
|
335
|
+
text1, text2
|
|
336
|
+
)
|
|
337
|
+
:structural_whitespace
|
|
338
|
+
else
|
|
339
|
+
:text_content
|
|
340
|
+
end
|
|
341
|
+
|
|
342
|
+
add_difference(n1, n2, Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
343
|
+
Comparison::UNEQUAL_TEXT_CONTENTS, dimension,
|
|
344
|
+
opts, differences)
|
|
345
|
+
Comparison::UNEQUAL_TEXT_CONTENTS
|
|
346
|
+
end
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
# Check if the difference between two texts is only whitespace-related
|
|
350
|
+
# @param text1 [String] First text
|
|
351
|
+
# @param text2 [String] Second text
|
|
352
|
+
# @return [Boolean] true if difference is only in whitespace
|
|
353
|
+
def whitespace_only_difference?(text1, text2)
|
|
354
|
+
# Normalize both texts (collapse/trim whitespace)
|
|
355
|
+
norm1 = MatchOptions.normalize_text(text1)
|
|
356
|
+
norm2 = MatchOptions.normalize_text(text2)
|
|
357
|
+
|
|
358
|
+
# If normalized texts are the same, the difference was only whitespace
|
|
359
|
+
norm1 == norm2
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
# Check if whitespace should be preserved strictly for these text nodes
|
|
363
|
+
# This applies to HTML elements like pre, code, textarea, script, style
|
|
364
|
+
def should_preserve_whitespace_strictly?(n1, n2)
|
|
365
|
+
# Only applies to Nokogiri nodes (HTML)
|
|
366
|
+
return false unless n1.respond_to?(:parent) && n2.respond_to?(:parent)
|
|
367
|
+
return false unless n1.parent.respond_to?(:name) && n2.parent.respond_to?(:name)
|
|
368
|
+
|
|
369
|
+
# Elements where whitespace must be preserved in HTML
|
|
370
|
+
preserve_elements = %w[pre code textarea script style]
|
|
371
|
+
|
|
372
|
+
# Check if either node is inside a whitespace-preserving element
|
|
373
|
+
in_preserve_element?(n1, preserve_elements) ||
|
|
374
|
+
in_preserve_element?(n2, preserve_elements)
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
# Check if a node is inside a whitespace-preserving element
|
|
378
|
+
def in_preserve_element?(node, preserve_list)
|
|
379
|
+
current = node.parent
|
|
380
|
+
while current.respond_to?(:name)
|
|
381
|
+
return true if preserve_list.include?(current.name.downcase)
|
|
382
|
+
|
|
383
|
+
# Stop at document root
|
|
384
|
+
break if current.is_a?(Nokogiri::XML::Document) ||
|
|
385
|
+
current.is_a?(Nokogiri::HTML4::Document) ||
|
|
386
|
+
current.is_a?(Nokogiri::HTML5::Document)
|
|
387
|
+
|
|
388
|
+
current = current.parent if current.respond_to?(:parent)
|
|
389
|
+
break unless current
|
|
390
|
+
end
|
|
391
|
+
false
|
|
392
|
+
end
|
|
393
|
+
|
|
394
|
+
# Compare comment nodes
|
|
395
|
+
def compare_comment_nodes(n1, n2, opts, differences)
|
|
396
|
+
match_opts = opts[:match_opts]
|
|
397
|
+
behavior = match_opts[:comments]
|
|
398
|
+
|
|
399
|
+
# If comments are ignored, consider them equivalent
|
|
400
|
+
return Comparison::EQUIVALENT if behavior == :ignore
|
|
401
|
+
|
|
402
|
+
content1 = n1.content.to_s
|
|
403
|
+
content2 = n2.content.to_s
|
|
404
|
+
|
|
405
|
+
if MatchOptions.match_text?(content1, content2, behavior)
|
|
406
|
+
Comparison::EQUIVALENT
|
|
407
|
+
else
|
|
408
|
+
add_difference(n1, n2, Comparison::UNEQUAL_COMMENTS,
|
|
409
|
+
Comparison::UNEQUAL_COMMENTS, :comments, opts,
|
|
410
|
+
differences)
|
|
411
|
+
Comparison::UNEQUAL_COMMENTS
|
|
412
|
+
end
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
# Compare processing instruction nodes
|
|
416
|
+
def compare_processing_instruction_nodes(n1, n2, opts, differences)
|
|
417
|
+
unless n1.target == n2.target
|
|
418
|
+
add_difference(n1, n2, Comparison::UNEQUAL_NODES_TYPES,
|
|
419
|
+
Comparison::UNEQUAL_NODES_TYPES, :text_content, opts,
|
|
420
|
+
differences)
|
|
421
|
+
return Comparison::UNEQUAL_NODES_TYPES
|
|
422
|
+
end
|
|
423
|
+
|
|
424
|
+
content1 = n1.content.to_s.strip
|
|
425
|
+
content2 = n2.content.to_s.strip
|
|
426
|
+
|
|
427
|
+
if content1 == content2
|
|
428
|
+
Comparison::EQUIVALENT
|
|
429
|
+
else
|
|
430
|
+
add_difference(n1, n2, Comparison::UNEQUAL_TEXT_CONTENTS,
|
|
431
|
+
Comparison::UNEQUAL_TEXT_CONTENTS, :text_content,
|
|
432
|
+
opts, differences)
|
|
433
|
+
Comparison::UNEQUAL_TEXT_CONTENTS
|
|
434
|
+
end
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
# Compare document nodes
|
|
438
|
+
def compare_document_nodes(n1, n2, opts, child_opts, diff_children,
|
|
439
|
+
differences)
|
|
440
|
+
# Compare root elements
|
|
441
|
+
root1 = n1.root
|
|
442
|
+
root2 = n2.root
|
|
443
|
+
|
|
444
|
+
if root1.nil? || root2.nil?
|
|
445
|
+
add_difference(n1, n2, Comparison::MISSING_NODE,
|
|
446
|
+
Comparison::MISSING_NODE, :text_content, opts, differences)
|
|
447
|
+
return Comparison::MISSING_NODE
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
compare_nodes(root1, root2, opts, child_opts, diff_children,
|
|
451
|
+
differences)
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
# Compare children of two nodes
|
|
455
|
+
def compare_children(n1, n2, opts, child_opts, diff_children,
|
|
456
|
+
differences)
|
|
457
|
+
children1 = filter_children(n1.children, opts)
|
|
458
|
+
children2 = filter_children(n2.children, opts)
|
|
459
|
+
|
|
460
|
+
unless children1.length == children2.length
|
|
461
|
+
add_difference(n1, n2, Comparison::MISSING_NODE,
|
|
462
|
+
Comparison::MISSING_NODE, :text_content, opts, differences)
|
|
463
|
+
return Comparison::MISSING_NODE
|
|
464
|
+
end
|
|
465
|
+
|
|
466
|
+
children1.zip(children2).each do |child1, child2|
|
|
467
|
+
result = compare_nodes(child1, child2, child_opts, child_opts,
|
|
468
|
+
diff_children, differences)
|
|
469
|
+
return result unless result == Comparison::EQUIVALENT
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
Comparison::EQUIVALENT
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
# Filter children based on options
|
|
476
|
+
def filter_children(children, opts)
|
|
477
|
+
children.reject do |child|
|
|
478
|
+
node_excluded?(child, opts)
|
|
479
|
+
end
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
# Check if node should be excluded
|
|
483
|
+
def node_excluded?(node, opts)
|
|
484
|
+
match_opts = opts[:match_opts]
|
|
485
|
+
|
|
486
|
+
# Ignore comments based on match options
|
|
487
|
+
if node.respond_to?(:comment?) && node.comment? && (match_opts[:comments] == :ignore)
|
|
488
|
+
return true
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
# Ignore text nodes if specified
|
|
492
|
+
return true if opts[:ignore_text_nodes] &&
|
|
493
|
+
node.respond_to?(:text?) && node.text?
|
|
494
|
+
|
|
495
|
+
# Ignore whitespace-only text nodes based on structural_whitespace
|
|
496
|
+
# Both :ignore and :normalize should filter out whitespace-only nodes
|
|
497
|
+
if %i[ignore
|
|
498
|
+
normalize].include?(match_opts[:structural_whitespace]) &&
|
|
499
|
+
node.respond_to?(:text?) && node.text?
|
|
500
|
+
text = node_text(node)
|
|
501
|
+
return true if MatchOptions.normalize_text(text).empty?
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
false
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
# Check if two nodes are the same type
|
|
508
|
+
def same_node_type?(n1, n2)
|
|
509
|
+
return true if n1.respond_to?(:element?) && n1.element? &&
|
|
510
|
+
n2.respond_to?(:element?) && n2.element?
|
|
511
|
+
return true if n1.respond_to?(:text?) && n1.text? &&
|
|
512
|
+
n2.respond_to?(:text?) && n2.text?
|
|
513
|
+
return true if n1.respond_to?(:comment?) && n1.comment? &&
|
|
514
|
+
n2.respond_to?(:comment?) && n2.comment?
|
|
515
|
+
return true if n1.respond_to?(:cdata?) && n1.cdata? &&
|
|
516
|
+
n2.respond_to?(:cdata?) && n2.cdata?
|
|
517
|
+
return true if n1.respond_to?(:processing_instruction?) &&
|
|
518
|
+
n1.processing_instruction? &&
|
|
519
|
+
n2.respond_to?(:processing_instruction?) &&
|
|
520
|
+
n2.processing_instruction?
|
|
521
|
+
return true if n1.respond_to?(:root) && n2.respond_to?(:root)
|
|
522
|
+
|
|
523
|
+
false
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
# Get text content from a node
|
|
527
|
+
def node_text(node)
|
|
528
|
+
if node.respond_to?(:content)
|
|
529
|
+
node.content.to_s
|
|
530
|
+
elsif node.respond_to?(:text)
|
|
531
|
+
node.text.to_s
|
|
532
|
+
else
|
|
533
|
+
""
|
|
534
|
+
end
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
# Add a difference to the differences array
|
|
538
|
+
# @param node1 [Object] First node
|
|
539
|
+
# @param node2 [Object] Second node
|
|
540
|
+
# @param diff1 [String] Difference type for node1
|
|
541
|
+
# @param diff2 [String] Difference type for node2
|
|
542
|
+
# @param dimension [Symbol] The match dimension causing this difference
|
|
543
|
+
# @param opts [Hash] Options
|
|
544
|
+
# @param differences [Array] Array to append difference to
|
|
545
|
+
def add_difference(node1, node2, diff1, diff2, dimension, opts,
|
|
546
|
+
differences)
|
|
547
|
+
return unless opts[:verbose]
|
|
548
|
+
|
|
549
|
+
# All differences must be DiffNode objects (OO architecture)
|
|
550
|
+
if dimension.nil?
|
|
551
|
+
raise ArgumentError,
|
|
552
|
+
"dimension required for DiffNode"
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
diff_node = Canon::Diff::DiffNode.new(
|
|
556
|
+
node1: node1,
|
|
557
|
+
node2: node2,
|
|
558
|
+
dimension: dimension,
|
|
559
|
+
reason: "#{diff1} vs #{diff2}",
|
|
560
|
+
)
|
|
561
|
+
differences << diff_node
|
|
562
|
+
end
|
|
563
|
+
end
|
|
564
|
+
end
|
|
565
|
+
end
|
|
566
|
+
end
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "yaml"
|
|
4
|
+
require_relative "json_comparator"
|
|
5
|
+
require_relative "match_options"
|
|
6
|
+
require_relative "comparison_result"
|
|
7
|
+
|
|
8
|
+
module Canon
|
|
9
|
+
module Comparison
|
|
10
|
+
# YAML comparison class
|
|
11
|
+
# Handles comparison of YAML objects with various options
|
|
12
|
+
class YamlComparator
|
|
13
|
+
# Default comparison options for YAML
|
|
14
|
+
DEFAULT_OPTS = {
|
|
15
|
+
# Output options
|
|
16
|
+
verbose: false,
|
|
17
|
+
|
|
18
|
+
# Match system options
|
|
19
|
+
match_profile: nil,
|
|
20
|
+
match: nil,
|
|
21
|
+
preprocessing: nil,
|
|
22
|
+
global_profile: nil,
|
|
23
|
+
global_options: nil,
|
|
24
|
+
|
|
25
|
+
# Diff display options
|
|
26
|
+
diff: nil,
|
|
27
|
+
}.freeze
|
|
28
|
+
|
|
29
|
+
class << self
|
|
30
|
+
# Compare two YAML objects for equivalence
|
|
31
|
+
#
|
|
32
|
+
# @param yaml1 [String, Hash, Array] First YAML
|
|
33
|
+
# @param yaml2 [String, Hash, Array] Second YAML
|
|
34
|
+
# @param opts [Hash] Comparison options
|
|
35
|
+
# @return [Boolean, ComparisonResult] true if equivalent, or ComparisonResult if verbose
|
|
36
|
+
def equivalent?(yaml1, yaml2, opts = {})
|
|
37
|
+
opts = DEFAULT_OPTS.merge(opts)
|
|
38
|
+
|
|
39
|
+
# Resolve match options with format-specific defaults
|
|
40
|
+
match_opts_hash = MatchOptions::Yaml.resolve(
|
|
41
|
+
format: :yaml,
|
|
42
|
+
match_profile: opts[:match_profile],
|
|
43
|
+
match: opts[:match],
|
|
44
|
+
preprocessing: opts[:preprocessing],
|
|
45
|
+
global_profile: opts[:global_profile],
|
|
46
|
+
global_options: opts[:global_options],
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Wrap in ResolvedMatchOptions for consistency with XML/HTML/JSON
|
|
50
|
+
Canon::Comparison::ResolvedMatchOptions.new(
|
|
51
|
+
match_opts_hash,
|
|
52
|
+
format: :yaml,
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
# Store resolved match options for use in comparison logic
|
|
56
|
+
opts[:match_opts] = match_opts_hash
|
|
57
|
+
|
|
58
|
+
# Parse YAML if strings
|
|
59
|
+
obj1 = parse_yaml(yaml1)
|
|
60
|
+
obj2 = parse_yaml(yaml2)
|
|
61
|
+
|
|
62
|
+
differences = []
|
|
63
|
+
result = JsonComparator.send(:compare_ruby_objects, obj1, obj2, opts,
|
|
64
|
+
differences, "")
|
|
65
|
+
|
|
66
|
+
if opts[:verbose]
|
|
67
|
+
# Format YAML for display
|
|
68
|
+
yaml_str1 = obj1.is_a?(String) ? obj1 : YAML.dump(obj1)
|
|
69
|
+
yaml_str2 = obj2.is_a?(String) ? obj2 : YAML.dump(obj2)
|
|
70
|
+
|
|
71
|
+
ComparisonResult.new(
|
|
72
|
+
differences: differences,
|
|
73
|
+
preprocessed_strings: [yaml_str1, yaml_str2],
|
|
74
|
+
format: :yaml,
|
|
75
|
+
match_options: match_opts_hash,
|
|
76
|
+
)
|
|
77
|
+
else
|
|
78
|
+
result == Comparison::EQUIVALENT
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
private
|
|
83
|
+
|
|
84
|
+
# Parse YAML from string or return as-is
|
|
85
|
+
def parse_yaml(obj)
|
|
86
|
+
return obj unless obj.is_a?(String)
|
|
87
|
+
|
|
88
|
+
YAML.safe_load(obj, aliases: true)
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|