canon 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec-opal +7 -0
  3. data/.rubocop_todo.yml +14 -71
  4. data/Rakefile +17 -0
  5. data/lib/canon/cli.rb +1 -1
  6. data/lib/canon/color_detector.rb +3 -5
  7. data/lib/canon/comparison/compare_profile.rb +1 -4
  8. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
  9. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
  10. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
  11. data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
  12. data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
  13. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
  14. data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
  15. data/lib/canon/comparison/format_detector.rb +29 -20
  16. data/lib/canon/comparison/html_comparator.rb +18 -29
  17. data/lib/canon/comparison/html_compare_profile.rb +3 -10
  18. data/lib/canon/comparison/html_parser.rb +1 -1
  19. data/lib/canon/comparison/json_comparator.rb +8 -0
  20. data/lib/canon/comparison/node_inspector.rb +146 -80
  21. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
  22. data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
  23. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
  24. data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
  25. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +10 -8
  26. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
  27. data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
  28. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
  29. data/lib/canon/comparison/xml_comparator.rb +61 -83
  30. data/lib/canon/comparison/xml_node_comparison.rb +15 -15
  31. data/lib/canon/comparison/yaml_comparator.rb +8 -0
  32. data/lib/canon/comparison.rb +23 -23
  33. data/lib/canon/config/profile_loader.rb +13 -13
  34. data/lib/canon/config.rb +29 -5
  35. data/lib/canon/diff/diff_classifier.rb +7 -41
  36. data/lib/canon/diff/diff_line.rb +1 -1
  37. data/lib/canon/diff/diff_node_enricher.rb +22 -24
  38. data/lib/canon/diff/node_serializer.rb +23 -30
  39. data/lib/canon/diff/path_builder.rb +24 -37
  40. data/lib/canon/diff/source_locator.rb +0 -3
  41. data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
  42. data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
  43. data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
  44. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
  45. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
  46. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
  47. data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
  48. data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
  49. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
  50. data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
  51. data/lib/canon/diff_formatter/debug_output.rb +12 -24
  52. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
  53. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
  54. data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
  55. data/lib/canon/diff_formatter/legend.rb +2 -2
  56. data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
  57. data/lib/canon/diff_formatter/theme.rb +4 -4
  58. data/lib/canon/diff_formatter.rb +2 -2
  59. data/lib/canon/formatters/html_formatter.rb +1 -1
  60. data/lib/canon/formatters/html_formatter_base.rb +1 -1
  61. data/lib/canon/formatters/xml_formatter.rb +7 -32
  62. data/lib/canon/html/data_model.rb +1 -1
  63. data/lib/canon/pretty_printer/html.rb +1 -1
  64. data/lib/canon/pretty_printer/xml.rb +16 -7
  65. data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
  66. data/lib/canon/rspec_matchers.rb +2 -2
  67. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  68. data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
  69. data/lib/canon/tree_diff/core/tree_node.rb +1 -3
  70. data/lib/canon/validators/html_validator.rb +1 -1
  71. data/lib/canon/validators/xml_validator.rb +1 -1
  72. data/lib/canon/version.rb +1 -1
  73. data/lib/canon/xml/data_model.rb +131 -137
  74. data/lib/canon/xml/namespace_helper.rb +5 -0
  75. data/lib/canon/xml/node.rb +2 -1
  76. data/lib/canon/xml/nodes/root_node.rb +4 -0
  77. data/lib/canon/xml/nodes/text_node.rb +6 -1
  78. data/lib/canon/xml/sax_builder.rb +4 -6
  79. data/lib/canon/xml_backend.rb +49 -0
  80. data/lib/canon/xml_parsing.rb +271 -0
  81. data/lib/canon.rb +3 -1
  82. data/lib/tasks/benchmark_runner.rb +1 -1
  83. data/lib/tasks/performance_helpers.rb +1 -1
  84. metadata +5 -2
@@ -4,94 +4,88 @@ module Canon
4
4
  module Comparison
5
5
  # Single source of truth for cross-backend node type operations.
6
6
  #
7
- # The comparison pipeline handles nodes from two backends:
7
+ # The comparison pipeline handles nodes from multiple sources:
8
8
  # * Canon::Xml::Node (+ RootNode, ElementNode, TextNode, etc.) —
9
9
  # custom DOM built by SAX builder and DataModel.
10
- # * Nokogiri::XML::Node (+ subclasses) native Nokogiri nodes used
11
- # by the HTML comparator and some legacy paths.
10
+ # * Canon::TreeDiff::Core::TreeNode semantic tree diff nodes.
11
+ # * Backend-specific nodes (Nokogiri or Moxml) — live parsed nodes.
12
12
  #
13
- # Every method here dispatches on type via +case/when+ (+is_a?+).
14
- # No +respond_to?+ the types are known at every call site.
13
+ # All type dispatch uses backend-branching (`if XmlBackend.nokogiri?`)
14
+ # rather than `case/when` with constant references. This prevents
15
+ # NameError when Nokogiri constants are undefined under Opal.
16
+ #
17
+ # Every node query in the codebase should go through this module.
18
+ # Do not create private dispatch methods in consumers.
15
19
  module NodeInspector
16
- CANON_TEXT_TYPE = :text
17
20
  NOKOGIRI_TEXT_TYPE = defined?(Nokogiri::XML::Node::TEXT_NODE) ? Nokogiri::XML::Node::TEXT_NODE : 3
18
21
 
19
- # True when +node+ is a text node (whitespace, content, etc.).
22
+ # --- Type predicates ---
23
+
20
24
  def self.text_node?(node)
21
- case node
22
- when Canon::Xml::Node
23
- node.node_type == CANON_TEXT_TYPE
24
- when Nokogiri::XML::Node
25
- node.node_type == NOKOGIRI_TEXT_TYPE
26
- else
27
- false
28
- end
29
- end
25
+ return false unless node
26
+ return node.node_type == :text if node.is_a?(Canon::Xml::Node)
30
27
 
31
- # Extract the text content of +node+ as a String.
32
- def self.text_content(node)
33
- case node
34
- when Canon::Xml::Node
35
- node.value.to_s
36
- when Nokogiri::XML::Node
37
- node.content.to_s
28
+ if XmlBackend.nokogiri?
29
+ node.is_a?(Nokogiri::XML::Text) || node.is_a?(Moxml::Text)
38
30
  else
39
- node.to_s
31
+ node.is_a?(Moxml::Text)
40
32
  end
41
33
  end
42
34
 
43
- # True when +node+ is a text node whose content is whitespace-only.
44
- # Empty-string text nodes return false those represent genuine
45
- # empty-vs-content asymmetry, not pretty-print indentation.
46
- def self.whitespace_only_text?(node)
47
- return false unless text_node?(node)
35
+ def self.element_node?(node)
36
+ return false unless node
37
+ return node.node_type == :element if node.is_a?(Canon::Xml::Node)
48
38
 
49
- text = text_content(node)
50
- !text.empty? && text.strip.empty?
39
+ if XmlBackend.nokogiri?
40
+ node.is_a?(Nokogiri::XML::Element) || node.is_a?(Moxml::Element)
41
+ else
42
+ node.is_a?(Moxml::Element)
43
+ end
51
44
  end
52
45
 
53
- # True when +node+ is a comment node.
54
- # For HTML, also detects comments that Nokogiri parses as TEXT nodes
55
- # (content like "<!-- comment -->" or escaped "<\\!-- comment -->").
56
46
  def self.comment_node?(node)
57
- case node
58
- when Canon::Xml::Node
59
- node.node_type == :comment
60
- when Nokogiri::XML::Node
61
- return true if node.comment?
47
+ return false unless node
48
+ return node.node_type == :comment if node.is_a?(Canon::Xml::Node)
49
+
50
+ if XmlBackend.nokogiri?
51
+ return true if node.is_a?(Nokogiri::XML::Node) && node.comment?
62
52
 
63
53
  # HTML comments are parsed as TEXT nodes by Nokogiri
64
- if node.text?
54
+ if node.is_a?(Nokogiri::XML::Node) && node.text?
65
55
  text_stripped = text_content(node).to_s.strip.gsub("\\", "")
66
56
  return true if text_stripped.start_with?("<!--") && text_stripped.end_with?("-->")
67
57
  end
68
58
  false
69
59
  else
70
- false
60
+ node.is_a?(Moxml::Comment)
71
61
  end
72
62
  end
73
63
 
74
- # True when +node+ is an element node.
75
- def self.element_node?(node)
76
- case node
77
- when Canon::Xml::Node
78
- node.node_type == :element
79
- when Nokogiri::XML::Node
80
- node.element?
81
- else
82
- false
83
- end
64
+ def self.document?(node)
65
+ return node.node_type == :root if node.is_a?(Canon::Xml::Node)
66
+
67
+ XmlParsing.document?(node)
84
68
  end
85
69
 
86
- # Classify +node+ as a noise node and return the diff dimension
87
- # it should be reported under, or +nil+ if it is structural content.
88
- #
89
- # Noise nodes (whitespace-only text, comments) are realigned past
90
- # during child comparison so that content nodes line up correctly
91
- # across sides.
92
- #
93
- # @param node [Object] DOM node to classify
94
- # @return [Symbol, nil] +:whitespace_adjacency+, +:comments+, or +nil+
70
+ def self.document_fragment?(node)
71
+ return false unless node
72
+ return false unless node.is_a?(Canon::Xml::Nodes::RootNode)
73
+
74
+ node.fragment?
75
+ end
76
+
77
+ # True when +node+ is a text node whose content is whitespace-only.
78
+ # Empty-string text nodes return false those represent genuine
79
+ # empty-vs-content asymmetry, not pretty-print indentation.
80
+ def self.whitespace_only_text?(node)
81
+ return false unless text_node?(node)
82
+
83
+ text = text_content(node)
84
+ !text.empty? && text.strip.empty?
85
+ end
86
+
87
+ # --- Noise classification ---
88
+
95
89
  def self.noise_dimension_for(node)
96
90
  if whitespace_only_text?(node)
97
91
  :whitespace_adjacency
@@ -100,38 +94,110 @@ module Canon
100
94
  end
101
95
  end
102
96
 
103
- # True when +node+ is a noise node (whitespace-only text or comment).
104
- # Convenience wrapper around +noise_dimension_for+.
105
- #
106
- # @param node [Object] DOM node to check
107
- # @return [Boolean]
108
97
  def self.noise_node?(node)
109
98
  !noise_dimension_for(node).nil?
110
99
  end
111
100
 
112
- # Extract parse-time errors carried on a node or its owning document.
113
- # Returns an Array of Strings.
114
- def self.parse_errors(node)
101
+ # --- Node queries ---
102
+
103
+ # Unified node name extraction across all node types.
104
+ def self.name(node)
105
+ return nil unless node
106
+ return node.name if node.is_a?(Canon::Xml::Node)
107
+ return node.label if node.is_a?(Canon::TreeDiff::Core::TreeNode)
108
+
109
+ XmlParsing.name(node)
110
+ end
111
+
112
+ # Unified parent access across all node types.
113
+ def self.parent(node)
114
+ return nil unless node
115
+ return node.parent if node.is_a?(Canon::Xml::Node)
116
+ return node.parent if node.is_a?(Canon::TreeDiff::Core::TreeNode)
117
+
118
+ XmlParsing.parent(node)
119
+ end
120
+
121
+ # Unified children access across all node types.
122
+ def self.children(node)
123
+ return [] unless node
124
+ return node.children if node.is_a?(Canon::Xml::Node)
125
+ return node.children || [] if node.is_a?(Canon::TreeDiff::Core::TreeNode)
126
+
127
+ XmlParsing.children(node)
128
+ end
129
+
130
+ # Extract the text content of +node+ as a String.
131
+ def self.text_content(node)
115
132
  case node
116
- when nil
117
- []
133
+ when Canon::Xml::Nodes::TextNode
134
+ node.value.to_s
118
135
  when Canon::Xml::Node
119
- errors = node.parse_errors
120
- Array(errors).map(&:to_s)
121
- when Nokogiri::XML::Document, Nokogiri::HTML5::Document
122
- Array(node.errors).map(&:to_s)
136
+ node.text_content.to_s
137
+ when Moxml::Text
138
+ node.content.to_s
139
+ else
140
+ XmlParsing.text_content(node).to_s
141
+ end
142
+ end
143
+
144
+ # Unified node type that always returns a symbol.
145
+ # Returns nil for unrecognised nodes.
146
+ def self.node_type(node)
147
+ return nil unless node
148
+ return node.node_type if node.is_a?(Canon::Xml::Node)
149
+
150
+ if node.is_a?(Canon::TreeDiff::Core::TreeNode)
151
+ node.type&.to_sym
152
+ else
153
+ XmlParsing.node_type(node)
154
+ end
155
+ end
156
+
157
+ # Unified attribute value access.
158
+ def self.attribute_value(node, attr_name)
159
+ return nil unless node
160
+
161
+ if node.is_a?(Canon::Xml::Nodes::ElementNode)
162
+ attr = node.attribute_nodes.find { |a| a.name == attr_name.to_s }
163
+ attr&.value
164
+ elsif node.is_a?(Canon::Xml::Node)
165
+ nil
166
+ else
167
+ XmlParsing.attribute_value(node, attr_name)
168
+ end
169
+ end
170
+
171
+ # Unified namespace URI access.
172
+ def self.namespace_uri(node)
173
+ return nil unless node
174
+
175
+ if node.is_a?(Canon::Xml::Node)
176
+ node.is_a?(Canon::Xml::Nodes::ElementNode) ? node.namespace_uri : nil
177
+ else
178
+ XmlParsing.namespace_uri(node)
179
+ end
180
+ end
181
+
182
+ # Extract parse-time errors carried on a node or its owning document.
183
+ def self.parse_errors(node)
184
+ return [] if node.nil?
185
+ return Array(node.parse_errors).map(&:to_s) if node.is_a?(Canon::Xml::Node)
186
+
187
+ if XmlBackend.nokogiri?
188
+ if node.is_a?(Nokogiri::XML::Document) || node.is_a?(Nokogiri::HTML5::Document)
189
+ Array(node.errors).map(&:to_s)
190
+ else
191
+ []
192
+ end
123
193
  else
124
194
  []
125
195
  end
126
196
  end
127
197
 
128
- # Return the parent node of +node+, or nil when +node+ is not a
129
- # recognised DOM backend type or has no parent.
198
+ # Deprecated: use NodeInspector.parent instead.
130
199
  def self.parent_of(node)
131
- case node
132
- when Canon::Xml::Node, Nokogiri::XML::Node
133
- node.parent
134
- end
200
+ parent(node)
135
201
  end
136
202
  end
137
203
  end
@@ -125,20 +125,18 @@ module Canon
125
125
  # @param doc2 [Object] Second XML document
126
126
  # @return [Array<String>] Preprocessed strings
127
127
  def preprocess_xml(doc1, doc2)
128
- # Serialize XML to string
129
- # Use XmlNodeComparison's serializer for Canon::Xml::Node
130
128
  xml1 = if doc1.is_a?(Canon::Xml::Node)
131
129
  XmlNodeComparison.serialize_node_to_xml(doc1)
132
- elsif doc1.respond_to?(:to_xml)
133
- doc1.to_xml
130
+ elsif Canon::XmlParsing.xml_node?(doc1)
131
+ Canon::XmlParsing.serialize(doc1)
134
132
  else
135
133
  doc1.to_s
136
134
  end
137
135
 
138
136
  xml2 = if doc2.is_a?(Canon::Xml::Node)
139
137
  XmlNodeComparison.serialize_node_to_xml(doc2)
140
- elsif doc2.respond_to?(:to_xml)
141
- doc2.to_xml
138
+ elsif Canon::XmlParsing.xml_node?(doc2)
139
+ Canon::XmlParsing.serialize(doc2)
142
140
  else
143
141
  doc2.to_s
144
142
  end
@@ -167,7 +165,7 @@ module Canon
167
165
  XmlNodeComparison.serialize_node_to_xml(doc1)
168
166
  elsif doc1.is_a?(Nokogiri::XML::DocumentFragment)
169
167
  doc1.to_s
170
- elsif doc1.respond_to?(:to_html)
168
+ elsif Canon::XmlParsing.xml_node?(doc1)
171
169
  doc1.to_html
172
170
  else
173
171
  doc1.to_s
@@ -177,7 +175,7 @@ module Canon
177
175
  XmlNodeComparison.serialize_node_to_xml(doc2)
178
176
  elsif doc2.is_a?(Nokogiri::XML::DocumentFragment)
179
177
  doc2.to_s
180
- elsif doc2.respond_to?(:to_html)
178
+ elsif Canon::XmlParsing.xml_node?(doc2)
181
179
  doc2.to_html
182
180
  else
183
181
  doc2.to_s