canon 0.1.8 → 0.1.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop_todo.yml +83 -22
  3. data/docs/Gemfile +1 -0
  4. data/docs/_config.yml +90 -1
  5. data/docs/advanced/diff-classification.adoc +196 -24
  6. data/docs/features/match-options/index.adoc +239 -1
  7. data/lib/canon/comparison/format_detector.rb +2 -1
  8. data/lib/canon/comparison/html_comparator.rb +19 -8
  9. data/lib/canon/comparison/html_compare_profile.rb +8 -2
  10. data/lib/canon/comparison/markup_comparator.rb +109 -2
  11. data/lib/canon/comparison/match_options/base_resolver.rb +7 -0
  12. data/lib/canon/comparison/whitespace_sensitivity.rb +208 -0
  13. data/lib/canon/comparison/xml_comparator/child_comparison.rb +15 -7
  14. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +108 -0
  15. data/lib/canon/comparison/xml_comparator/node_parser.rb +10 -5
  16. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +14 -7
  17. data/lib/canon/comparison/xml_comparator.rb +240 -23
  18. data/lib/canon/comparison/xml_node_comparison.rb +25 -3
  19. data/lib/canon/diff/diff_classifier.rb +119 -5
  20. data/lib/canon/diff/formatting_detector.rb +1 -1
  21. data/lib/canon/diff/xml_serialization_formatter.rb +153 -0
  22. data/lib/canon/rspec_matchers.rb +37 -8
  23. data/lib/canon/version.rb +1 -1
  24. data/lib/canon/xml/data_model.rb +24 -13
  25. metadata +4 -78
  26. data/docs/plans/2025-01-17-html-parser-selection-fix.adoc +0 -250
  27. data/false_positive_analysis.txt +0 -0
  28. data/file1.html +0 -1
  29. data/file2.html +0 -1
  30. data/old-docs/ADVANCED_TOPICS.adoc +0 -20
  31. data/old-docs/BASIC_USAGE.adoc +0 -16
  32. data/old-docs/CHARACTER_VISUALIZATION.adoc +0 -567
  33. data/old-docs/CLI.adoc +0 -497
  34. data/old-docs/CUSTOMIZING_BEHAVIOR.adoc +0 -19
  35. data/old-docs/DIFF_ARCHITECTURE.adoc +0 -435
  36. data/old-docs/DIFF_FORMATTING.adoc +0 -540
  37. data/old-docs/DIFF_PARAMETERS.adoc +0 -261
  38. data/old-docs/DOM_DIFF.adoc +0 -1017
  39. data/old-docs/ENV_CONFIG.adoc +0 -876
  40. data/old-docs/FORMATS.adoc +0 -867
  41. data/old-docs/INPUT_VALIDATION.adoc +0 -477
  42. data/old-docs/MATCHER_BEHAVIOR.adoc +0 -90
  43. data/old-docs/MATCH_ARCHITECTURE.adoc +0 -463
  44. data/old-docs/MATCH_OPTIONS.adoc +0 -912
  45. data/old-docs/MODES.adoc +0 -432
  46. data/old-docs/NORMATIVE_INFORMATIVE_DIFFS.adoc +0 -219
  47. data/old-docs/OPTIONS.adoc +0 -1387
  48. data/old-docs/PREPROCESSING.adoc +0 -491
  49. data/old-docs/README.old.adoc +0 -2831
  50. data/old-docs/RSPEC.adoc +0 -814
  51. data/old-docs/RUBY_API.adoc +0 -485
  52. data/old-docs/SEMANTIC_DIFF_REPORT.adoc +0 -646
  53. data/old-docs/SEMANTIC_TREE_DIFF.adoc +0 -765
  54. data/old-docs/STRING_COMPARE.adoc +0 -345
  55. data/old-docs/TMP.adoc +0 -3384
  56. data/old-docs/TREE_DIFF.adoc +0 -1080
  57. data/old-docs/UNDERSTANDING_CANON.adoc +0 -17
  58. data/old-docs/VERBOSE.adoc +0 -482
  59. data/old-docs/VISUALIZATION_MAP.adoc +0 -625
  60. data/old-docs/WHITESPACE_TREATMENT.adoc +0 -1155
  61. data/scripts/analyze_current_state.rb +0 -85
  62. data/scripts/analyze_false_positives.rb +0 -114
  63. data/scripts/analyze_remaining_failures.rb +0 -105
  64. data/scripts/compare_current_failures.rb +0 -95
  65. data/scripts/compare_dom_tree_diff.rb +0 -158
  66. data/scripts/compare_failures.rb +0 -151
  67. data/scripts/debug_attribute_extraction.rb +0 -66
  68. data/scripts/debug_blocks_839.rb +0 -115
  69. data/scripts/debug_meta_matching.rb +0 -52
  70. data/scripts/debug_p_matching.rb +0 -192
  71. data/scripts/debug_signature_matching.rb +0 -118
  72. data/scripts/debug_sourcecode_124.rb +0 -32
  73. data/scripts/debug_whitespace_sensitive.rb +0 -192
  74. data/scripts/extract_false_positives.rb +0 -138
  75. data/scripts/find_actual_false_positives.rb +0 -125
  76. data/scripts/investigate_all_false_positives.rb +0 -161
  77. data/scripts/investigate_batch1.rb +0 -127
  78. data/scripts/investigate_classification.rb +0 -150
  79. data/scripts/investigate_classification_detailed.rb +0 -190
  80. data/scripts/investigate_common_failures.rb +0 -342
  81. data/scripts/investigate_false_negative.rb +0 -80
  82. data/scripts/investigate_false_positive.rb +0 -83
  83. data/scripts/investigate_false_positives.rb +0 -227
  84. data/scripts/investigate_false_positives_batch.rb +0 -163
  85. data/scripts/investigate_mixed_content.rb +0 -125
  86. data/scripts/investigate_remaining_16.rb +0 -214
  87. data/scripts/run_single_test.rb +0 -29
  88. data/scripts/test_all_false_positives.rb +0 -95
  89. data/scripts/test_attribute_details.rb +0 -61
  90. data/scripts/test_both_algorithms.rb +0 -49
  91. data/scripts/test_both_simple.rb +0 -49
  92. data/scripts/test_enhanced_semantic_output.rb +0 -125
  93. data/scripts/test_readme_examples.rb +0 -131
  94. data/scripts/test_semantic_tree_diff.rb +0 -99
  95. data/scripts/test_semantic_ux_improvements.rb +0 -135
  96. data/scripts/test_single_false_positive.rb +0 -119
  97. data/scripts/test_size_limits.rb +0 -99
  98. data/test_html_1.html +0 -21
  99. data/test_html_2.html +0 -21
  100. data/test_nokogiri.rb +0 -33
  101. data/test_normalize.rb +0 -45
@@ -0,0 +1,208 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ module Comparison
5
+ # Whitespace sensitivity utilities for element-level control
6
+ #
7
+ # This module provides logic to determine whether whitespace should be
8
+ # preserved during comparison based on:
9
+ # - Format-specific defaults (HTML has built-in sensitive elements)
10
+ # - User-configured whitelist (elements that care about whitespace)
11
+ # - User-configured blacklist (elements that don't care about whitespace)
12
+ # - xml:space attribute in the document itself
13
+ # - respect_xml_space flag (whether to honor or override xml:space)
14
+ #
15
+ # == Priority Order
16
+ #
17
+ # 1. respect_xml_space: false → User config only (ignore xml:space)
18
+ # 2. User whitelist → Use whitelist (user explicitly declared)
19
+ # 3. Format defaults → HTML: [:pre, :textarea, :script, :style], XML: []
20
+ # 4. User blacklist → Remove from defaults/whitelist
21
+ # 5. xml:space="preserve" → Element is sensitive
22
+ # 6. xml:space="default" → Use steps 1-4
23
+ #
24
+ # == Usage
25
+ #
26
+ # WhitespaceSensitivity.element_sensitive?(node, opts)
27
+ # => true if whitespace should be preserved for this element
28
+ module WhitespaceSensitivity
29
+ class << self
30
+ # Check if an element is whitespace-sensitive based on configuration
31
+ #
32
+ # @param node [Object] The element node to check
33
+ # @param opts [Hash] Comparison options containing match_opts
34
+ # @return [Boolean] true if whitespace should be preserved for this element
35
+ def element_sensitive?(node, opts)
36
+ match_opts = opts[:match_opts]
37
+ return false unless match_opts
38
+ return false unless text_node_parent?(node)
39
+
40
+ parent = node.parent
41
+
42
+ # 1. Check if we should ignore xml:space (user override)
43
+ if !respect_xml_space?(match_opts)
44
+ return user_config_sensitive?(parent, match_opts)
45
+ end
46
+
47
+ # 2. Check xml:space="preserve" (document declaration)
48
+ return true if xml_space_preserve?(parent)
49
+
50
+ # 3. Check xml:space="default" (use configured behavior)
51
+ return false if xml_space_default?(parent)
52
+
53
+ # 4. Use user configuration + format defaults
54
+ configured_sensitive?(parent, match_opts)
55
+ end
56
+
57
+ # Check if whitespace-only text node should be filtered
58
+ #
59
+ # @param node [Object] The text node to check
60
+ # @param opts [Hash] Comparison options
61
+ # @return [Boolean] true if node should be preserved (not filtered)
62
+ def preserve_whitespace_node?(node, opts)
63
+ return false unless node.respond_to?(:parent)
64
+ return false unless node.parent
65
+
66
+ element_sensitive?(node, opts)
67
+ end
68
+
69
+ # Get format-specific default sensitive elements
70
+ #
71
+ # This is the SINGLE SOURCE OF TRUTH for default whitespace-sensitive
72
+ # elements. All other code should use this method to get the list.
73
+ #
74
+ # @param match_opts [Hash] Resolved match options
75
+ # @return [Array<Symbol>] Default sensitive element names
76
+ def format_default_sensitive_elements(match_opts)
77
+ format = match_opts[:format] || :xml
78
+
79
+ case format
80
+ when :html, :html4, :html5
81
+ # HTML specification: these elements preserve whitespace
82
+ %i[pre code textarea script style].freeze
83
+ when :xml
84
+ # XML has no default sensitive elements - purely user-controlled
85
+ [].freeze
86
+ else
87
+ [].freeze
88
+ end
89
+ end
90
+
91
+ # Check if an element is in the default sensitive list for its format
92
+ #
93
+ # Convenience method for checking element sensitivity without building
94
+ # the full list first.
95
+ #
96
+ # @param element_name [String, Symbol] The element name to check
97
+ # @param match_opts [Hash] Resolved match options
98
+ # @return [Boolean] true if element is in default sensitive list
99
+ def default_sensitive_element?(element_name, match_opts)
100
+ format_default_sensitive_elements(match_opts)
101
+ .include?(element_name.to_sym)
102
+ end
103
+
104
+ private
105
+
106
+ # Check if we should respect xml:space attribute
107
+ #
108
+ # @param match_opts [Hash] Resolved match options
109
+ # @return [Boolean] true if xml:space should be respected
110
+ def respect_xml_space?(match_opts)
111
+ if match_opts.key?(:respect_xml_space)
112
+ match_opts[:respect_xml_space]
113
+ else
114
+ true
115
+ end
116
+ end
117
+
118
+ # Check if xml:space="preserve" is set
119
+ #
120
+ # @param element [Object] The element to check
121
+ # @return [Boolean] true if xml:space="preserve"
122
+ def xml_space_preserve?(element)
123
+ if element.is_a?(Canon::Xml::Nodes::ElementNode)
124
+ # Check attribute_nodes for xml:space attribute
125
+ # xml:space is stored with name="space" and namespace_uri="http://www.w3.org/XML/1998/namespace"
126
+ element.attribute_nodes.any? do |attr|
127
+ attr.name == "space" &&
128
+ attr.namespace_uri == "http://www.w3.org/XML/1998/namespace" &&
129
+ attr.value == "preserve"
130
+ end
131
+ elsif element.respond_to?(:[])
132
+ element["xml:space"] == "preserve"
133
+ else
134
+ false
135
+ end
136
+ end
137
+
138
+ # Check if xml:space="default" is set
139
+ #
140
+ # @param element [Object] The element to check
141
+ # @return [Boolean] true if xml:space="default"
142
+ def xml_space_default?(element)
143
+ if element.is_a?(Canon::Xml::Nodes::ElementNode)
144
+ # Check attribute_nodes for xml:space attribute
145
+ # xml:space is stored with name="space" and namespace_uri="http://www.w3.org/XML/1998/namespace"
146
+ element.attribute_nodes.any? do |attr|
147
+ attr.name == "space" &&
148
+ attr.namespace_uri == "http://www.w3.org/XML/1998/namespace" &&
149
+ attr.value == "default"
150
+ end
151
+ elsif element.respond_to?(:[])
152
+ element["xml:space"] == "default"
153
+ else
154
+ false
155
+ end
156
+ end
157
+
158
+ # Check sensitivity based on user configuration
159
+ #
160
+ # @param element [Object] The element to check
161
+ # @param match_opts [Hash] Resolved match options
162
+ # @return [Boolean] true if element is in whitelist
163
+ def user_config_sensitive?(element, match_opts)
164
+ return false unless match_opts[:whitespace_sensitive_elements]
165
+
166
+ match_opts[:whitespace_sensitive_elements].include?(element.name.to_sym)
167
+ end
168
+
169
+ # Check sensitivity based on user config + format defaults
170
+ #
171
+ # @param element [Object] The element to check
172
+ # @param match_opts [Hash] Resolved match options
173
+ # @return [Boolean] true if element should be sensitive
174
+ def configured_sensitive?(element, match_opts)
175
+ # Start with format defaults
176
+ sensitive = format_default_sensitive_elements(match_opts).to_set
177
+
178
+ # Apply whitelist (adds to defaults)
179
+ if match_opts[:whitespace_sensitive_elements]
180
+ sensitive |= match_opts[:whitespace_sensitive_elements]
181
+ end
182
+
183
+ # Apply blacklist (removes from everything)
184
+ if match_opts[:whitespace_insensitive_elements]
185
+ sensitive -= match_opts[:whitespace_insensitive_elements]
186
+ end
187
+
188
+ sensitive.include?(element.name.to_sym)
189
+ end
190
+
191
+ # Check if node has a parent that's an element (not document root)
192
+ #
193
+ # @param node [Object] The node to check
194
+ # @return [Boolean] true if node has an element parent
195
+ def text_node_parent?(node)
196
+ return false unless node.respond_to?(:parent)
197
+ return false unless node.parent
198
+
199
+ parent = node.parent
200
+ return true if parent.respond_to?(:element?) && parent.element?
201
+
202
+ # Nokogiri compatibility
203
+ parent.respond_to?(:node_type) && parent.node_type == :element
204
+ end
205
+ end
206
+ end
207
+ end
208
+ end
@@ -26,7 +26,8 @@ module Canon
26
26
  # @param diff_children [Boolean] Whether to diff children
27
27
  # @param differences [Array] Array to collect differences
28
28
  # @return [Integer] Comparison result code
29
- def compare(node1, node2, comparator, opts, child_opts, diff_children, differences)
29
+ def compare(node1, node2, comparator, opts, child_opts,
30
+ diff_children, differences)
30
31
  children1 = comparator.send(:filter_children, node1.children, opts)
31
32
  children2 = comparator.send(:filter_children, node2.children, opts)
32
33
 
@@ -51,7 +52,9 @@ module Canon
51
52
  # method that returns symbols, and only works with element nodes.
52
53
  def can_use_element_matcher?(children1, children2)
53
54
  !children1.empty? && !children2.empty? &&
54
- children1.all? { |c| c.is_a?(Canon::Xml::Node) && c.node_type == :element } &&
55
+ children1.all? do |c|
56
+ c.is_a?(Canon::Xml::Node) && c.node_type == :element
57
+ end &&
55
58
  children2.all? { |c| c.is_a?(Canon::Xml::Node) && c.node_type == :element }
56
59
  end
57
60
 
@@ -140,7 +143,8 @@ module Canon
140
143
  opts, child_opts, diff_children, differences)
141
144
  # Length check
142
145
  unless children1.length == children2.length
143
- dimension = determine_dimension_for_mismatch(children1, children2, comparator)
146
+ dimension = determine_dimension_for_mismatch(children1,
147
+ children2, comparator)
144
148
  comparator.send(:add_difference, parent_node, parent_node,
145
149
  Comparison::MISSING_NODE, Comparison::MISSING_NODE,
146
150
  dimension, opts, differences)
@@ -167,15 +171,19 @@ module Canon
167
171
  (0...max_len).each do |i|
168
172
  if i >= children1.length
169
173
  # Extra child in children2
170
- dimension = comparator.send(:determine_node_dimension, children2[i])
174
+ dimension = comparator.send(:determine_node_dimension,
175
+ children2[i])
171
176
  break
172
177
  elsif i >= children2.length
173
178
  # Extra child in children1
174
- dimension = comparator.send(:determine_node_dimension, children1[i])
179
+ dimension = comparator.send(:determine_node_dimension,
180
+ children1[i])
175
181
  break
176
- elsif !comparator.send(:same_node_type?, children1[i], children2[i])
182
+ elsif !comparator.send(:same_node_type?, children1[i],
183
+ children2[i])
177
184
  # Different node types at same position
178
- dimension = comparator.send(:determine_node_dimension, children1[i])
185
+ dimension = comparator.send(:determine_node_dimension,
186
+ children1[i])
179
187
  break
180
188
  end
181
189
  end
@@ -1,5 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ require "set"
3
4
  require_relative "../../diff/diff_node"
4
5
  require_relative "../../diff/path_builder"
5
6
  require_relative "../../diff/node_serializer"
@@ -62,6 +63,21 @@ module Canon
62
63
  end
63
64
  end
64
65
 
66
+ # For attribute presence differences, show what attributes differ
67
+ if dimension == :attribute_presence
68
+ attrs1 = extract_attributes(node1)
69
+ attrs2 = extract_attributes(node2)
70
+ return build_attribute_difference_reason(attrs1, attrs2)
71
+ end
72
+
73
+ # For text content differences, show the actual text (truncated if needed)
74
+ if dimension == :text_content
75
+ text1 = extract_text_content(node1)
76
+ text2 = extract_text_content(node2)
77
+ return build_text_difference_reason(text1, text2)
78
+ end
79
+
80
+ # Default reason
65
81
  "#{diff1} vs #{diff2}"
66
82
  end
67
83
 
@@ -110,6 +126,98 @@ module Canon
110
126
 
111
127
  Canon::Diff::NodeSerializer.extract_attributes(node)
112
128
  end
129
+
130
+ # Build a clear reason message for attribute presence differences
131
+ # Shows which attributes are only in node1, only in node2, or different values
132
+ #
133
+ # @param attrs1 [Hash, nil] First node's attributes
134
+ # @param attrs2 [Hash, nil] Second node's attributes
135
+ # @return [String] Clear explanation of the attribute difference
136
+ def self.build_attribute_difference_reason(attrs1, attrs2)
137
+ return "#{attrs1&.keys&.size || 0} vs #{attrs2&.keys&.size || 0} attributes" unless attrs1 && attrs2
138
+
139
+ keys1 = attrs1.keys.to_set
140
+ keys2 = attrs2.keys.to_set
141
+
142
+ only_in_1 = keys1 - keys2
143
+ only_in_2 = keys2 - keys1
144
+ common = keys1 & keys2
145
+
146
+ # Check if values differ for common keys
147
+ different_values = common.reject { |k| attrs1[k] == attrs2[k] }
148
+
149
+ parts = []
150
+ parts << "only in first: #{only_in_1.to_a.sort.join(', ')}" if only_in_1.any?
151
+ parts << "only in second: #{only_in_2.to_a.sort.join(', ')}" if only_in_2.any?
152
+ parts << "different values: #{different_values.sort.join(', ')}" if different_values.any?
153
+
154
+ if parts.empty?
155
+ "#{keys1.size} vs #{keys2.size} attributes (same names)"
156
+ else
157
+ parts.join("; ")
158
+ end
159
+ end
160
+
161
+ # Extract text content from a node
162
+ #
163
+ # @param node [Object, nil] Node to extract text from
164
+ # @return [String, nil] Text content or nil
165
+ def self.extract_text_content(node)
166
+ return nil if node.nil?
167
+
168
+ # For Canon::Xml::Nodes::TextNode
169
+ return node.value if node.respond_to?(:value) && node.is_a?(Canon::Xml::Nodes::TextNode)
170
+
171
+ # For XML/HTML nodes with text_content method
172
+ return node.text_content if node.respond_to?(:text_content)
173
+
174
+ # For nodes with text method
175
+ return node.text if node.respond_to?(:text)
176
+
177
+ # For nodes with content method (Moxml::Text)
178
+ return node.content if node.respond_to?(:content)
179
+
180
+ # For nodes with value method (other types)
181
+ return node.value if node.respond_to?(:value)
182
+
183
+ # For simple text nodes or strings
184
+ return node.to_s if node.is_a?(String)
185
+
186
+ # For other node types, try to_s
187
+ node.to_s
188
+ rescue StandardError
189
+ nil
190
+ end
191
+
192
+ # Build a clear reason message for text content differences
193
+ # Shows the actual text content (truncated if too long)
194
+ #
195
+ # @param text1 [String, nil] First text content
196
+ # @param text2 [String, nil] Second text content
197
+ # @return [String] Clear explanation of the text difference
198
+ def self.build_text_difference_reason(text1, text2)
199
+ # Handle nil cases
200
+ return "missing vs '#{truncate(text2)}'" if text1.nil? && text2
201
+ return "'#{truncate(text1)}' vs missing" if text1 && text2.nil?
202
+ return "both missing" if text1.nil? && text2.nil?
203
+
204
+ # Both have content - show truncated versions
205
+ "'#{truncate(text1)}' vs '#{truncate(text2)}'"
206
+ end
207
+
208
+ # Truncate text for display in reason messages
209
+ #
210
+ # @param text [String] Text to truncate
211
+ # @param max_length [Integer] Maximum length
212
+ # @return [String] Truncated text
213
+ def self.truncate(text, max_length = 40)
214
+ return "" if text.nil?
215
+
216
+ text = text.to_s
217
+ return text if text.length <= max_length
218
+
219
+ "#{text[0...max_length]}..."
220
+ end
113
221
  end
114
222
  end
115
223
  end
@@ -13,21 +13,24 @@ module Canon
13
13
  #
14
14
  # @param node [String, Object] Node to parse
15
15
  # @param preprocessing [Symbol] Preprocessing mode (:none, :normalize, :c14n, :format)
16
+ # @param preserve_whitespace [Boolean] Whether to preserve whitespace-only text nodes
16
17
  # @return [Canon::Xml::Node] Parsed node
17
- def self.parse(node, preprocessing = :none)
18
+ def self.parse(node, preprocessing = :none, preserve_whitespace: false)
18
19
  # If already a Canon::Xml::Node, return as-is
19
20
  return node if node.is_a?(Canon::Xml::Node)
20
21
 
21
22
  # If it's a Nokogiri or Moxml node, convert to DataModel
22
23
  unless node.is_a?(String)
23
- return convert_from_node(node)
24
+ return convert_from_node(node,
25
+ preserve_whitespace: preserve_whitespace)
24
26
  end
25
27
 
26
28
  # Apply preprocessing to XML string before parsing
27
29
  xml_string = apply_preprocessing(node, preprocessing)
28
30
 
29
31
  # Use Canon::Xml::DataModel for parsing to get Canon::Xml::Node instances
30
- Canon::Xml::DataModel.from_xml(xml_string)
32
+ Canon::Xml::DataModel.from_xml(xml_string,
33
+ preserve_whitespace: preserve_whitespace)
31
34
  end
32
35
 
33
36
  # Apply preprocessing transformation to XML string
@@ -55,8 +58,9 @@ module Canon
55
58
  # Convert from Nokogiri/Moxml node to Canon::Xml::Node
56
59
  #
57
60
  # @param node [Object] Nokogiri or Moxml node
61
+ # @param preserve_whitespace [Boolean] Whether to preserve whitespace-only text nodes
58
62
  # @return [Canon::Xml::Node] Converted node
59
- def self.convert_from_node(node)
63
+ def self.convert_from_node(node, preserve_whitespace: false)
60
64
  # Convert to XML string then parse through DataModel
61
65
  xml_str = if node.respond_to?(:to_xml)
62
66
  node.to_xml
@@ -66,7 +70,8 @@ module Canon
66
70
  raise Canon::Error,
67
71
  "Unable to convert node to string: #{node.class}"
68
72
  end
69
- Canon::Xml::DataModel.from_xml(xml_str)
73
+ Canon::Xml::DataModel.from_xml(xml_str,
74
+ preserve_whitespace: preserve_whitespace)
70
75
  end
71
76
  end
72
77
  end
@@ -23,7 +23,8 @@ module Canon
23
23
  # @param diff_children [Boolean] Whether to diff children
24
24
  # @param differences [Array] Array to collect differences
25
25
  # @return [Integer] Comparison result code
26
- def compare(node1, node2, comparator, opts, child_opts, diff_children, differences)
26
+ def compare(node1, node2, comparator, opts, child_opts,
27
+ diff_children, differences)
27
28
  # Dispatch based on node type
28
29
  # Canon::Xml::Node types use .node_type method that returns symbols
29
30
  # Nokogiri also has .node_type but returns integers, so check for Symbol
@@ -51,11 +52,14 @@ module Canon
51
52
  comparator.send(:compare_element_nodes, node1, node2, opts, child_opts,
52
53
  diff_children, differences)
53
54
  when :text
54
- comparator.send(:compare_text_nodes, node1, node2, opts, differences)
55
+ comparator.send(:compare_text_nodes, node1, node2, opts,
56
+ differences)
55
57
  when :comment
56
- comparator.send(:compare_comment_nodes, node1, node2, opts, differences)
58
+ comparator.send(:compare_comment_nodes, node1, node2, opts,
59
+ differences)
57
60
  when :cdata
58
- comparator.send(:compare_text_nodes, node1, node2, opts, differences)
61
+ comparator.send(:compare_text_nodes, node1, node2, opts,
62
+ differences)
59
63
  when :processing_instruction
60
64
  comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,
61
65
  differences)
@@ -71,11 +75,14 @@ module Canon
71
75
  comparator.send(:compare_element_nodes, node1, node2, opts, child_opts,
72
76
  diff_children, differences)
73
77
  elsif node1.respond_to?(:text?) && node1.text?
74
- comparator.send(:compare_text_nodes, node1, node2, opts, differences)
78
+ comparator.send(:compare_text_nodes, node1, node2, opts,
79
+ differences)
75
80
  elsif node1.respond_to?(:comment?) && node1.comment?
76
- comparator.send(:compare_comment_nodes, node1, node2, opts, differences)
81
+ comparator.send(:compare_comment_nodes, node1, node2, opts,
82
+ differences)
77
83
  elsif node1.respond_to?(:cdata?) && node1.cdata?
78
- comparator.send(:compare_text_nodes, node1, node2, opts, differences)
84
+ comparator.send(:compare_text_nodes, node1, node2, opts,
85
+ differences)
79
86
  elsif node1.respond_to?(:processing_instruction?) &&
80
87
  node1.processing_instruction?
81
88
  comparator.send(:compare_processing_instruction_nodes, node1, node2, opts,