canon 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -1
  3. data/.rubocop_todo.yml +276 -7
  4. data/README.adoc +203 -138
  5. data/_config.yml +116 -0
  6. data/docs/ADVANCED_TOPICS.adoc +20 -0
  7. data/docs/BASIC_USAGE.adoc +16 -0
  8. data/docs/CHARACTER_VISUALIZATION.adoc +567 -0
  9. data/docs/CLI.adoc +493 -0
  10. data/docs/CUSTOMIZING_BEHAVIOR.adoc +19 -0
  11. data/docs/DIFF_ARCHITECTURE.adoc +435 -0
  12. data/docs/DIFF_FORMATTING.adoc +540 -0
  13. data/docs/FORMATS.adoc +447 -0
  14. data/docs/INDEX.adoc +222 -0
  15. data/docs/INPUT_VALIDATION.adoc +477 -0
  16. data/docs/MATCH_ARCHITECTURE.adoc +463 -0
  17. data/docs/MATCH_OPTIONS.adoc +719 -0
  18. data/docs/MODES.adoc +432 -0
  19. data/docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +219 -0
  20. data/docs/OPTIONS.adoc +1387 -0
  21. data/docs/PREPROCESSING.adoc +491 -0
  22. data/docs/RSPEC.adoc +605 -0
  23. data/docs/RUBY_API.adoc +478 -0
  24. data/docs/SEMANTIC_DIFF_REPORT.adoc +528 -0
  25. data/docs/UNDERSTANDING_CANON.adoc +17 -0
  26. data/docs/VERBOSE.adoc +482 -0
  27. data/exe/canon +7 -0
  28. data/lib/canon/cli.rb +179 -0
  29. data/lib/canon/commands/diff_command.rb +195 -0
  30. data/lib/canon/commands/format_command.rb +113 -0
  31. data/lib/canon/comparison/base_comparator.rb +39 -0
  32. data/lib/canon/comparison/comparison_result.rb +79 -0
  33. data/lib/canon/comparison/html_comparator.rb +410 -0
  34. data/lib/canon/comparison/json_comparator.rb +212 -0
  35. data/lib/canon/comparison/match_options.rb +616 -0
  36. data/lib/canon/comparison/xml_comparator.rb +566 -0
  37. data/lib/canon/comparison/yaml_comparator.rb +93 -0
  38. data/lib/canon/comparison.rb +239 -0
  39. data/lib/canon/config.rb +172 -0
  40. data/lib/canon/diff/diff_block.rb +71 -0
  41. data/lib/canon/diff/diff_block_builder.rb +105 -0
  42. data/lib/canon/diff/diff_classifier.rb +46 -0
  43. data/lib/canon/diff/diff_context.rb +85 -0
  44. data/lib/canon/diff/diff_context_builder.rb +107 -0
  45. data/lib/canon/diff/diff_line.rb +77 -0
  46. data/lib/canon/diff/diff_node.rb +56 -0
  47. data/lib/canon/diff/diff_node_mapper.rb +148 -0
  48. data/lib/canon/diff/diff_report.rb +133 -0
  49. data/lib/canon/diff/diff_report_builder.rb +62 -0
  50. data/lib/canon/diff_formatter/by_line/base_formatter.rb +407 -0
  51. data/lib/canon/diff_formatter/by_line/html_formatter.rb +672 -0
  52. data/lib/canon/diff_formatter/by_line/json_formatter.rb +284 -0
  53. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +190 -0
  54. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +860 -0
  55. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +292 -0
  56. data/lib/canon/diff_formatter/by_object/base_formatter.rb +199 -0
  57. data/lib/canon/diff_formatter/by_object/json_formatter.rb +305 -0
  58. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +248 -0
  59. data/lib/canon/diff_formatter/by_object/yaml_formatter.rb +17 -0
  60. data/lib/canon/diff_formatter/character_map.yml +197 -0
  61. data/lib/canon/diff_formatter/debug_output.rb +431 -0
  62. data/lib/canon/diff_formatter/diff_detail_formatter.rb +551 -0
  63. data/lib/canon/diff_formatter/legend.rb +141 -0
  64. data/lib/canon/diff_formatter.rb +520 -0
  65. data/lib/canon/errors.rb +56 -0
  66. data/lib/canon/formatters/html4_formatter.rb +17 -0
  67. data/lib/canon/formatters/html5_formatter.rb +17 -0
  68. data/lib/canon/formatters/html_formatter.rb +37 -0
  69. data/lib/canon/formatters/html_formatter_base.rb +163 -0
  70. data/lib/canon/formatters/json_formatter.rb +3 -0
  71. data/lib/canon/formatters/xml_formatter.rb +20 -55
  72. data/lib/canon/formatters/yaml_formatter.rb +4 -1
  73. data/lib/canon/pretty_printer/html.rb +57 -0
  74. data/lib/canon/pretty_printer/json.rb +25 -0
  75. data/lib/canon/pretty_printer/xml.rb +29 -0
  76. data/lib/canon/rspec_matchers.rb +222 -80
  77. data/lib/canon/validators/base_validator.rb +49 -0
  78. data/lib/canon/validators/html_validator.rb +138 -0
  79. data/lib/canon/validators/json_validator.rb +89 -0
  80. data/lib/canon/validators/xml_validator.rb +53 -0
  81. data/lib/canon/validators/yaml_validator.rb +73 -0
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/attribute_handler.rb +80 -0
  84. data/lib/canon/xml/c14n.rb +36 -0
  85. data/lib/canon/xml/character_encoder.rb +38 -0
  86. data/lib/canon/xml/data_model.rb +225 -0
  87. data/lib/canon/xml/element_matcher.rb +196 -0
  88. data/lib/canon/xml/line_range_mapper.rb +158 -0
  89. data/lib/canon/xml/namespace_handler.rb +86 -0
  90. data/lib/canon/xml/node.rb +32 -0
  91. data/lib/canon/xml/nodes/attribute_node.rb +54 -0
  92. data/lib/canon/xml/nodes/comment_node.rb +23 -0
  93. data/lib/canon/xml/nodes/element_node.rb +56 -0
  94. data/lib/canon/xml/nodes/namespace_node.rb +38 -0
  95. data/lib/canon/xml/nodes/processing_instruction_node.rb +24 -0
  96. data/lib/canon/xml/nodes/root_node.rb +16 -0
  97. data/lib/canon/xml/nodes/text_node.rb +23 -0
  98. data/lib/canon/xml/processor.rb +151 -0
  99. data/lib/canon/xml/whitespace_normalizer.rb +72 -0
  100. data/lib/canon/xml/xml_base_handler.rb +188 -0
  101. data/lib/canon.rb +14 -3
  102. metadata +116 -21
@@ -0,0 +1,566 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../xml/c14n"
4
+ require_relative "match_options"
5
+ require_relative "../diff/diff_node"
6
+ require_relative "../diff/diff_classifier"
7
+ require_relative "comparison_result"
8
+
9
+ module Canon
10
+ module Comparison
11
+ # XML comparison class
12
+ # Handles comparison of XML nodes with various options
13
+ class XmlComparator
14
+ # Default comparison options for XML
15
+ DEFAULT_OPTS = {
16
+ # Structural filtering options
17
+ ignore_children: false,
18
+ ignore_text_nodes: false,
19
+ ignore_attr_content: [],
20
+ ignore_attrs: [],
21
+ ignore_attrs_by_name: [],
22
+ ignore_nodes: [],
23
+
24
+ # Output options
25
+ verbose: false,
26
+ diff_children: false,
27
+
28
+ # Match system options
29
+ match_profile: nil,
30
+ match: nil,
31
+ preprocessing: nil,
32
+ global_profile: nil,
33
+ global_options: nil,
34
+
35
+ # Diff display options
36
+ diff: nil,
37
+ }.freeze
38
+
39
+ class << self
40
+ # Compare two XML nodes for equivalence
41
+ #
42
+ # @param n1 [String, Moxml::Node] First node
43
+ # @param n2 [String, Moxml::Node] Second node
44
+ # @param opts [Hash] Comparison options
45
+ # @param child_opts [Hash] Options for child comparison
46
+ # @return [Boolean, Array] true if equivalent, or array of diffs if
47
+ # verbose
48
+ def equivalent?(n1, n2, opts = {}, child_opts = {})
49
+ opts = DEFAULT_OPTS.merge(opts)
50
+
51
+ # Resolve match options with format-specific defaults
52
+ match_opts_hash = MatchOptions::Xml.resolve(
53
+ format: :xml,
54
+ match_profile: opts[:match_profile],
55
+ match: opts[:match],
56
+ preprocessing: opts[:preprocessing],
57
+ global_profile: opts[:global_profile],
58
+ global_options: opts[:global_options],
59
+ )
60
+
61
+ # Wrap in ResolvedMatchOptions for DiffClassifier
62
+ match_opts = Canon::Comparison::ResolvedMatchOptions.new(
63
+ match_opts_hash,
64
+ format: :xml,
65
+ )
66
+
67
+ # Store resolved match options hash for use in comparison logic
68
+ opts[:match_opts] = match_opts_hash
69
+
70
+ # Create child_opts with resolved options
71
+ child_opts = opts.merge(child_opts)
72
+
73
+ # Parse nodes if they are strings, applying preprocessing if needed
74
+ node1 = parse_node(n1, match_opts_hash[:preprocessing])
75
+ node2 = parse_node(n2, match_opts_hash[:preprocessing])
76
+
77
+ differences = []
78
+ diff_children = opts[:diff_children] || false
79
+
80
+ result = compare_nodes(node1, node2, opts, child_opts,
81
+ diff_children, differences)
82
+
83
+ # Classify DiffNodes as normative/informative if we have verbose output
84
+ if opts[:verbose] && !differences.empty?
85
+ classifier = Canon::Diff::DiffClassifier.new(match_opts)
86
+ classifier.classify_all(differences.select do |d|
87
+ d.is_a?(Canon::Diff::DiffNode)
88
+ end)
89
+ end
90
+
91
+ if opts[:verbose]
92
+ # Return ComparisonResult for proper equivalence checking
93
+ # Format XMLfor line-by-line display by adding line breaks between elements
94
+ xml1 = node1.respond_to?(:to_xml) ? node1.to_xml : node1.to_s
95
+ xml2 = node2.respond_to?(:to_xml) ? node2.to_xml : node2.to_s
96
+
97
+ preprocessed = [
98
+ xml1.gsub(/></, ">\n<"),
99
+ xml2.gsub(/></, ">\n<"),
100
+ ]
101
+
102
+ ComparisonResult.new(
103
+ differences: differences,
104
+ preprocessed_strings: preprocessed,
105
+ format: :xml,
106
+ match_options: match_opts_hash,
107
+ )
108
+ else
109
+ result == Comparison::EQUIVALENT
110
+ end
111
+ end
112
+
113
+ private
114
+
115
+ # Parse a node from string or return as-is
116
+ # Applies preprocessing transformation before parsing if specified
117
+ def parse_node(node, preprocessing = :none)
118
+ return node unless node.is_a?(String)
119
+
120
+ # Apply preprocessing to XML string before parsing
121
+ xml_string = case preprocessing
122
+ when :normalize
123
+ # Normalize whitespace: collapse runs, trim lines
124
+ node.lines.map(&:strip).reject(&:empty?).join("\n")
125
+ when :c14n
126
+ # Canonicalize the XML
127
+ Canon::Xml::C14n.canonicalize(node,
128
+ with_comments: false)
129
+ when :format
130
+ # Pretty format the XML
131
+ Canon.format(node, :xml)
132
+ else
133
+ # :none or unrecognized - use as-is
134
+ node
135
+ end
136
+
137
+ # Use Moxml for XML parsing
138
+ Moxml.new.parse(xml_string)
139
+ end
140
+
141
+ # Main comparison dispatcher
142
+ def compare_nodes(n1, n2, opts, child_opts, diff_children, differences)
143
+ # Handle DocumentFragment nodes - compare their children instead
144
+ if n1.is_a?(Nokogiri::XML::DocumentFragment) &&
145
+ n2.is_a?(Nokogiri::XML::DocumentFragment)
146
+ children1 = n1.children.to_a
147
+ children2 = n2.children.to_a
148
+
149
+ if children1.length != children2.length
150
+ add_difference(n1, n2, Comparison::UNEQUAL_ELEMENTS,
151
+ Comparison::UNEQUAL_ELEMENTS, :text_content, opts,
152
+ differences)
153
+ return Comparison::UNEQUAL_ELEMENTS
154
+ elsif children1.empty?
155
+ return Comparison::EQUIVALENT
156
+ else
157
+ # Compare each pair of children
158
+ result = Comparison::EQUIVALENT
159
+ children1.zip(children2).each do |child1, child2|
160
+ child_result = compare_nodes(child1, child2, opts, child_opts,
161
+ diff_children, differences)
162
+ if child_result != Comparison::EQUIVALENT
163
+ result = child_result
164
+ break
165
+ end
166
+ end
167
+ return result
168
+ end
169
+ end
170
+
171
+ # Check if nodes should be excluded
172
+ return Comparison::EQUIVALENT if node_excluded?(n1, opts) &&
173
+ node_excluded?(n2, opts)
174
+
175
+ if node_excluded?(n1, opts) || node_excluded?(n2, opts)
176
+ add_difference(n1, n2, Comparison::MISSING_NODE,
177
+ Comparison::MISSING_NODE, :text_content, opts, differences)
178
+ return Comparison::MISSING_NODE
179
+ end
180
+
181
+ # Check node types match
182
+ unless same_node_type?(n1, n2)
183
+ add_difference(n1, n2, Comparison::UNEQUAL_NODES_TYPES,
184
+ Comparison::UNEQUAL_NODES_TYPES, :text_content, opts,
185
+ differences)
186
+ return Comparison::UNEQUAL_NODES_TYPES
187
+ end
188
+
189
+ # Dispatch based on node type
190
+ if n1.respond_to?(:element?) && n1.element?
191
+ compare_element_nodes(n1, n2, opts, child_opts, diff_children,
192
+ differences)
193
+ elsif n1.respond_to?(:text?) && n1.text?
194
+ compare_text_nodes(n1, n2, opts, differences)
195
+ elsif n1.respond_to?(:comment?) && n1.comment?
196
+ compare_comment_nodes(n1, n2, opts, differences)
197
+ elsif n1.respond_to?(:cdata?) && n1.cdata?
198
+ compare_text_nodes(n1, n2, opts, differences)
199
+ elsif n1.respond_to?(:processing_instruction?) &&
200
+ n1.processing_instruction?
201
+ compare_processing_instruction_nodes(n1, n2, opts, differences)
202
+ elsif n1.respond_to?(:root)
203
+ # Document node
204
+ compare_document_nodes(n1, n2, opts, child_opts, diff_children,
205
+ differences)
206
+ else
207
+ Comparison::EQUIVALENT
208
+ end
209
+ end
210
+
211
+ # Compare two element nodes
212
+ def compare_element_nodes(n1, n2, opts, child_opts, diff_children,
213
+ differences)
214
+ # Compare element names
215
+ unless n1.name == n2.name
216
+ add_difference(n1, n2, Comparison::UNEQUAL_ELEMENTS,
217
+ Comparison::UNEQUAL_ELEMENTS, :text_content, opts,
218
+ differences)
219
+ return Comparison::UNEQUAL_ELEMENTS
220
+ end
221
+
222
+ # Compare attributes
223
+ attr_result = compare_attribute_sets(n1, n2, opts, differences)
224
+ return attr_result unless attr_result == Comparison::EQUIVALENT
225
+
226
+ # Compare children if not ignored
227
+ return Comparison::EQUIVALENT if opts[:ignore_children]
228
+
229
+ compare_children(n1, n2, opts, child_opts, diff_children, differences)
230
+ end
231
+
232
+ # Compare attribute sets
233
+ def compare_attribute_sets(n1, n2, opts, differences)
234
+ attrs1 = filter_attributes(n1.attributes, opts)
235
+ attrs2 = filter_attributes(n2.attributes, opts)
236
+
237
+ # Always sort attributes since attribute order doesn't matter in XML/HTML
238
+ attrs1 = attrs1.sort_by { |k, _v| k.to_s }.to_h
239
+ attrs2 = attrs2.sort_by { |k, _v| k.to_s }.to_h
240
+
241
+ unless attrs1.keys.map(&:to_s).sort == attrs2.keys.map(&:to_s).sort
242
+ add_difference(n1, n2, Comparison::MISSING_ATTRIBUTE,
243
+ Comparison::MISSING_ATTRIBUTE,
244
+ :attribute_presence, opts, differences)
245
+ return Comparison::MISSING_ATTRIBUTE
246
+ end
247
+
248
+ attrs1.each do |name, value|
249
+ unless attrs2[name] == value
250
+ add_difference(n1, n2, Comparison::UNEQUAL_ATTRIBUTES,
251
+ Comparison::UNEQUAL_ATTRIBUTES,
252
+ :attribute_values, opts, differences)
253
+ return Comparison::UNEQUAL_ATTRIBUTES
254
+ end
255
+ end
256
+
257
+ Comparison::EQUIVALENT
258
+ end
259
+
260
+ # Filter attributes based on options
261
+ def filter_attributes(attributes, opts)
262
+ filtered = {}
263
+ match_opts = opts[:match_opts]
264
+
265
+ attributes.each do |key, val|
266
+ # Handle both Nokogiri and Moxml attribute formats:
267
+ # - Nokogiri: key is String name, val is Nokogiri::XML::Attr object
268
+ # - Moxml: key is Moxml::Attribute object, val is nil
269
+
270
+ if key.is_a?(String)
271
+ # Nokogiri format: key=name (String), val=attr object
272
+ name = key
273
+ value = val.respond_to?(:value) ? val.value : val.to_s
274
+ else
275
+ # Moxml format: key=attr object, val=nil
276
+ name = key.respond_to?(:name) ? key.name : key.to_s
277
+ value = key.respond_to?(:value) ? key.value : key.to_s
278
+ end
279
+
280
+ # Skip if attribute name should be ignored
281
+ next if should_ignore_attr_by_name?(name, opts)
282
+
283
+ # Skip if attribute content should be ignored
284
+ next if should_ignore_attr_content?(value, opts)
285
+
286
+ # Apply match options for attribute values
287
+ behavior = match_opts[:attribute_values] || :strict
288
+ value = MatchOptions.process_attribute_value(value, behavior)
289
+
290
+ filtered[name] = value
291
+ end
292
+
293
+ filtered
294
+ end
295
+
296
+ # Check if attribute should be ignored by name
297
+ def should_ignore_attr_by_name?(name, opts)
298
+ opts[:ignore_attrs_by_name].any? do |pattern|
299
+ name.include?(pattern)
300
+ end
301
+ end
302
+
303
+ # Check if attribute should be ignored by content
304
+ def should_ignore_attr_content?(value, opts)
305
+ opts[:ignore_attr_content].any? do |pattern|
306
+ value.to_s.include?(pattern)
307
+ end
308
+ end
309
+
310
+ # Compare text nodes
311
+ def compare_text_nodes(n1, n2, opts, differences)
312
+ return Comparison::EQUIVALENT if opts[:ignore_text_nodes]
313
+
314
+ text1 = node_text(n1)
315
+ text2 = node_text(n2)
316
+
317
+ # Use match options
318
+ match_opts = opts[:match_opts]
319
+ behavior = match_opts[:text_content]
320
+
321
+ # For HTML, check if text node is inside whitespace-preserving element
322
+ # If so, always use strict comparison regardless of text_content setting
323
+ if should_preserve_whitespace_strictly?(n1, n2)
324
+ behavior = :strict
325
+ end
326
+
327
+ if MatchOptions.match_text?(text1, text2, behavior)
328
+ Comparison::EQUIVALENT
329
+ else
330
+ # Determine the correct dimension for this difference
331
+ # - If text_content is :strict, ALL differences use :text_content dimension
332
+ # - If text_content is :normalize, whitespace-only diffs use :structural_whitespace
333
+ # - Otherwise use :text_content
334
+ dimension = if behavior == :normalize && whitespace_only_difference?(
335
+ text1, text2
336
+ )
337
+ :structural_whitespace
338
+ else
339
+ :text_content
340
+ end
341
+
342
+ add_difference(n1, n2, Comparison::UNEQUAL_TEXT_CONTENTS,
343
+ Comparison::UNEQUAL_TEXT_CONTENTS, dimension,
344
+ opts, differences)
345
+ Comparison::UNEQUAL_TEXT_CONTENTS
346
+ end
347
+ end
348
+
349
+ # Check if the difference between two texts is only whitespace-related
350
+ # @param text1 [String] First text
351
+ # @param text2 [String] Second text
352
+ # @return [Boolean] true if difference is only in whitespace
353
+ def whitespace_only_difference?(text1, text2)
354
+ # Normalize both texts (collapse/trim whitespace)
355
+ norm1 = MatchOptions.normalize_text(text1)
356
+ norm2 = MatchOptions.normalize_text(text2)
357
+
358
+ # If normalized texts are the same, the difference was only whitespace
359
+ norm1 == norm2
360
+ end
361
+
362
+ # Check if whitespace should be preserved strictly for these text nodes
363
+ # This applies to HTML elements like pre, code, textarea, script, style
364
+ def should_preserve_whitespace_strictly?(n1, n2)
365
+ # Only applies to Nokogiri nodes (HTML)
366
+ return false unless n1.respond_to?(:parent) && n2.respond_to?(:parent)
367
+ return false unless n1.parent.respond_to?(:name) && n2.parent.respond_to?(:name)
368
+
369
+ # Elements where whitespace must be preserved in HTML
370
+ preserve_elements = %w[pre code textarea script style]
371
+
372
+ # Check if either node is inside a whitespace-preserving element
373
+ in_preserve_element?(n1, preserve_elements) ||
374
+ in_preserve_element?(n2, preserve_elements)
375
+ end
376
+
377
+ # Check if a node is inside a whitespace-preserving element
378
+ def in_preserve_element?(node, preserve_list)
379
+ current = node.parent
380
+ while current.respond_to?(:name)
381
+ return true if preserve_list.include?(current.name.downcase)
382
+
383
+ # Stop at document root
384
+ break if current.is_a?(Nokogiri::XML::Document) ||
385
+ current.is_a?(Nokogiri::HTML4::Document) ||
386
+ current.is_a?(Nokogiri::HTML5::Document)
387
+
388
+ current = current.parent if current.respond_to?(:parent)
389
+ break unless current
390
+ end
391
+ false
392
+ end
393
+
394
+ # Compare comment nodes
395
+ def compare_comment_nodes(n1, n2, opts, differences)
396
+ match_opts = opts[:match_opts]
397
+ behavior = match_opts[:comments]
398
+
399
+ # If comments are ignored, consider them equivalent
400
+ return Comparison::EQUIVALENT if behavior == :ignore
401
+
402
+ content1 = n1.content.to_s
403
+ content2 = n2.content.to_s
404
+
405
+ if MatchOptions.match_text?(content1, content2, behavior)
406
+ Comparison::EQUIVALENT
407
+ else
408
+ add_difference(n1, n2, Comparison::UNEQUAL_COMMENTS,
409
+ Comparison::UNEQUAL_COMMENTS, :comments, opts,
410
+ differences)
411
+ Comparison::UNEQUAL_COMMENTS
412
+ end
413
+ end
414
+
415
+ # Compare processing instruction nodes
416
+ def compare_processing_instruction_nodes(n1, n2, opts, differences)
417
+ unless n1.target == n2.target
418
+ add_difference(n1, n2, Comparison::UNEQUAL_NODES_TYPES,
419
+ Comparison::UNEQUAL_NODES_TYPES, :text_content, opts,
420
+ differences)
421
+ return Comparison::UNEQUAL_NODES_TYPES
422
+ end
423
+
424
+ content1 = n1.content.to_s.strip
425
+ content2 = n2.content.to_s.strip
426
+
427
+ if content1 == content2
428
+ Comparison::EQUIVALENT
429
+ else
430
+ add_difference(n1, n2, Comparison::UNEQUAL_TEXT_CONTENTS,
431
+ Comparison::UNEQUAL_TEXT_CONTENTS, :text_content,
432
+ opts, differences)
433
+ Comparison::UNEQUAL_TEXT_CONTENTS
434
+ end
435
+ end
436
+
437
+ # Compare document nodes
438
+ def compare_document_nodes(n1, n2, opts, child_opts, diff_children,
439
+ differences)
440
+ # Compare root elements
441
+ root1 = n1.root
442
+ root2 = n2.root
443
+
444
+ if root1.nil? || root2.nil?
445
+ add_difference(n1, n2, Comparison::MISSING_NODE,
446
+ Comparison::MISSING_NODE, :text_content, opts, differences)
447
+ return Comparison::MISSING_NODE
448
+ end
449
+
450
+ compare_nodes(root1, root2, opts, child_opts, diff_children,
451
+ differences)
452
+ end
453
+
454
+ # Compare children of two nodes
455
+ def compare_children(n1, n2, opts, child_opts, diff_children,
456
+ differences)
457
+ children1 = filter_children(n1.children, opts)
458
+ children2 = filter_children(n2.children, opts)
459
+
460
+ unless children1.length == children2.length
461
+ add_difference(n1, n2, Comparison::MISSING_NODE,
462
+ Comparison::MISSING_NODE, :text_content, opts, differences)
463
+ return Comparison::MISSING_NODE
464
+ end
465
+
466
+ children1.zip(children2).each do |child1, child2|
467
+ result = compare_nodes(child1, child2, child_opts, child_opts,
468
+ diff_children, differences)
469
+ return result unless result == Comparison::EQUIVALENT
470
+ end
471
+
472
+ Comparison::EQUIVALENT
473
+ end
474
+
475
+ # Filter children based on options
476
+ def filter_children(children, opts)
477
+ children.reject do |child|
478
+ node_excluded?(child, opts)
479
+ end
480
+ end
481
+
482
+ # Check if node should be excluded
483
+ def node_excluded?(node, opts)
484
+ match_opts = opts[:match_opts]
485
+
486
+ # Ignore comments based on match options
487
+ if node.respond_to?(:comment?) && node.comment? && (match_opts[:comments] == :ignore)
488
+ return true
489
+ end
490
+
491
+ # Ignore text nodes if specified
492
+ return true if opts[:ignore_text_nodes] &&
493
+ node.respond_to?(:text?) && node.text?
494
+
495
+ # Ignore whitespace-only text nodes based on structural_whitespace
496
+ # Both :ignore and :normalize should filter out whitespace-only nodes
497
+ if %i[ignore
498
+ normalize].include?(match_opts[:structural_whitespace]) &&
499
+ node.respond_to?(:text?) && node.text?
500
+ text = node_text(node)
501
+ return true if MatchOptions.normalize_text(text).empty?
502
+ end
503
+
504
+ false
505
+ end
506
+
507
+ # Check if two nodes are the same type
508
+ def same_node_type?(n1, n2)
509
+ return true if n1.respond_to?(:element?) && n1.element? &&
510
+ n2.respond_to?(:element?) && n2.element?
511
+ return true if n1.respond_to?(:text?) && n1.text? &&
512
+ n2.respond_to?(:text?) && n2.text?
513
+ return true if n1.respond_to?(:comment?) && n1.comment? &&
514
+ n2.respond_to?(:comment?) && n2.comment?
515
+ return true if n1.respond_to?(:cdata?) && n1.cdata? &&
516
+ n2.respond_to?(:cdata?) && n2.cdata?
517
+ return true if n1.respond_to?(:processing_instruction?) &&
518
+ n1.processing_instruction? &&
519
+ n2.respond_to?(:processing_instruction?) &&
520
+ n2.processing_instruction?
521
+ return true if n1.respond_to?(:root) && n2.respond_to?(:root)
522
+
523
+ false
524
+ end
525
+
526
+ # Get text content from a node
527
+ def node_text(node)
528
+ if node.respond_to?(:content)
529
+ node.content.to_s
530
+ elsif node.respond_to?(:text)
531
+ node.text.to_s
532
+ else
533
+ ""
534
+ end
535
+ end
536
+
537
+ # Add a difference to the differences array
538
+ # @param node1 [Object] First node
539
+ # @param node2 [Object] Second node
540
+ # @param diff1 [String] Difference type for node1
541
+ # @param diff2 [String] Difference type for node2
542
+ # @param dimension [Symbol] The match dimension causing this difference
543
+ # @param opts [Hash] Options
544
+ # @param differences [Array] Array to append difference to
545
+ def add_difference(node1, node2, diff1, diff2, dimension, opts,
546
+ differences)
547
+ return unless opts[:verbose]
548
+
549
+ # All differences must be DiffNode objects (OO architecture)
550
+ if dimension.nil?
551
+ raise ArgumentError,
552
+ "dimension required for DiffNode"
553
+ end
554
+
555
+ diff_node = Canon::Diff::DiffNode.new(
556
+ node1: node1,
557
+ node2: node2,
558
+ dimension: dimension,
559
+ reason: "#{diff1} vs #{diff2}",
560
+ )
561
+ differences << diff_node
562
+ end
563
+ end
564
+ end
565
+ end
566
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "yaml"
4
+ require_relative "json_comparator"
5
+ require_relative "match_options"
6
+ require_relative "comparison_result"
7
+
8
+ module Canon
9
+ module Comparison
10
+ # YAML comparison class
11
+ # Handles comparison of YAML objects with various options
12
+ class YamlComparator
13
+ # Default comparison options for YAML
14
+ DEFAULT_OPTS = {
15
+ # Output options
16
+ verbose: false,
17
+
18
+ # Match system options
19
+ match_profile: nil,
20
+ match: nil,
21
+ preprocessing: nil,
22
+ global_profile: nil,
23
+ global_options: nil,
24
+
25
+ # Diff display options
26
+ diff: nil,
27
+ }.freeze
28
+
29
+ class << self
30
+ # Compare two YAML objects for equivalence
31
+ #
32
+ # @param yaml1 [String, Hash, Array] First YAML
33
+ # @param yaml2 [String, Hash, Array] Second YAML
34
+ # @param opts [Hash] Comparison options
35
+ # @return [Boolean, ComparisonResult] true if equivalent, or ComparisonResult if verbose
36
+ def equivalent?(yaml1, yaml2, opts = {})
37
+ opts = DEFAULT_OPTS.merge(opts)
38
+
39
+ # Resolve match options with format-specific defaults
40
+ match_opts_hash = MatchOptions::Yaml.resolve(
41
+ format: :yaml,
42
+ match_profile: opts[:match_profile],
43
+ match: opts[:match],
44
+ preprocessing: opts[:preprocessing],
45
+ global_profile: opts[:global_profile],
46
+ global_options: opts[:global_options],
47
+ )
48
+
49
+ # Wrap in ResolvedMatchOptions for consistency with XML/HTML/JSON
50
+ Canon::Comparison::ResolvedMatchOptions.new(
51
+ match_opts_hash,
52
+ format: :yaml,
53
+ )
54
+
55
+ # Store resolved match options for use in comparison logic
56
+ opts[:match_opts] = match_opts_hash
57
+
58
+ # Parse YAML if strings
59
+ obj1 = parse_yaml(yaml1)
60
+ obj2 = parse_yaml(yaml2)
61
+
62
+ differences = []
63
+ result = JsonComparator.send(:compare_ruby_objects, obj1, obj2, opts,
64
+ differences, "")
65
+
66
+ if opts[:verbose]
67
+ # Format YAML for display
68
+ yaml_str1 = obj1.is_a?(String) ? obj1 : YAML.dump(obj1)
69
+ yaml_str2 = obj2.is_a?(String) ? obj2 : YAML.dump(obj2)
70
+
71
+ ComparisonResult.new(
72
+ differences: differences,
73
+ preprocessed_strings: [yaml_str1, yaml_str2],
74
+ format: :yaml,
75
+ match_options: match_opts_hash,
76
+ )
77
+ else
78
+ result == Comparison::EQUIVALENT
79
+ end
80
+ end
81
+
82
+ private
83
+
84
+ # Parse YAML from string or return as-is
85
+ def parse_yaml(obj)
86
+ return obj unless obj.is_a?(String)
87
+
88
+ YAML.safe_load(obj, aliases: true)
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end