canon 0.2.8 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec-opal +7 -0
  3. data/.rubocop_todo.yml +25 -73
  4. data/Rakefile +37 -0
  5. data/lib/canon/cache.rb +16 -27
  6. data/lib/canon/cli.rb +1 -1
  7. data/lib/canon/color_detector.rb +3 -5
  8. data/lib/canon/comparison/compare_profile.rb +1 -4
  9. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
  10. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
  11. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
  12. data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
  13. data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
  14. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
  15. data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
  16. data/lib/canon/comparison/format_detector.rb +29 -20
  17. data/lib/canon/comparison/html_comparator.rb +20 -29
  18. data/lib/canon/comparison/html_compare_profile.rb +3 -10
  19. data/lib/canon/comparison/html_parser.rb +1 -1
  20. data/lib/canon/comparison/json_comparator.rb +8 -0
  21. data/lib/canon/comparison/node_inspector.rb +117 -86
  22. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
  23. data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
  24. data/lib/canon/comparison/xml_comparator/attribute_comparator.rb +19 -2
  25. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
  26. data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
  27. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +40 -8
  28. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
  29. data/lib/canon/comparison/xml_comparator/node_parser.rb +14 -13
  30. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
  31. data/lib/canon/comparison/xml_comparator.rb +63 -85
  32. data/lib/canon/comparison/xml_node_comparison.rb +15 -15
  33. data/lib/canon/comparison/yaml_comparator.rb +8 -0
  34. data/lib/canon/comparison.rb +24 -24
  35. data/lib/canon/config/profile_loader.rb +13 -13
  36. data/lib/canon/config.rb +29 -5
  37. data/lib/canon/diff/diff_classifier.rb +7 -41
  38. data/lib/canon/diff/diff_line.rb +1 -1
  39. data/lib/canon/diff/diff_line_builder.rb +2 -0
  40. data/lib/canon/diff/diff_node_enricher.rb +22 -24
  41. data/lib/canon/diff/diff_node_mapper.rb +10 -8
  42. data/lib/canon/diff/formatting_detector.rb +3 -2
  43. data/lib/canon/diff/node_serializer.rb +23 -30
  44. data/lib/canon/diff/path_builder.rb +24 -37
  45. data/lib/canon/diff/source_locator.rb +0 -3
  46. data/lib/canon/diff/xml_serialization_formatter.rb +8 -84
  47. data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
  48. data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
  49. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
  50. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
  51. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
  52. data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
  53. data/lib/canon/diff_formatter/by_object/base_formatter.rb +23 -17
  54. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +127 -11
  55. data/lib/canon/diff_formatter/by_object_formatter.rb +2 -6
  56. data/lib/canon/diff_formatter/debug_output.rb +12 -24
  57. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
  58. data/lib/canon/diff_formatter/diff_detail_formatter/dimension_formatter.rb +3 -3
  59. data/lib/canon/diff_formatter/diff_detail_formatter/location_extractor.rb +26 -27
  60. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
  61. data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
  62. data/lib/canon/diff_formatter/legend.rb +2 -2
  63. data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
  64. data/lib/canon/diff_formatter/theme.rb +4 -4
  65. data/lib/canon/diff_formatter.rb +17 -13
  66. data/lib/canon/formatters/html_formatter.rb +1 -1
  67. data/lib/canon/formatters/html_formatter_base.rb +1 -1
  68. data/lib/canon/formatters/xml_formatter.rb +7 -32
  69. data/lib/canon/html/data_model.rb +2 -2
  70. data/lib/canon/pretty_printer/html.rb +1 -1
  71. data/lib/canon/pretty_printer/xml.rb +16 -7
  72. data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
  73. data/lib/canon/rspec_matchers.rb +2 -2
  74. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  75. data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
  76. data/lib/canon/tree_diff/core/tree_node.rb +1 -3
  77. data/lib/canon/tree_diff/operation_converter.rb +7 -7
  78. data/lib/canon/tree_diff/operations/operation_detector.rb +4 -0
  79. data/lib/canon/validators/base_validator.rb +5 -8
  80. data/lib/canon/validators/html_validator.rb +3 -8
  81. data/lib/canon/validators/xml_validator.rb +3 -8
  82. data/lib/canon/version.rb +1 -1
  83. data/lib/canon/xml/data_model.rb +132 -138
  84. data/lib/canon/xml/namespace_helper.rb +5 -0
  85. data/lib/canon/xml/node.rb +2 -1
  86. data/lib/canon/xml/nodes/root_node.rb +4 -0
  87. data/lib/canon/xml/nodes/text_node.rb +6 -1
  88. data/lib/canon/xml/sax_builder.rb +5 -7
  89. data/lib/canon/xml/whitespace_normalizer.rb +2 -2
  90. data/lib/canon/xml_backend.rb +49 -0
  91. data/lib/canon/xml_parsing.rb +283 -0
  92. data/lib/canon.rb +3 -1
  93. data/lib/tasks/benchmark_runner.rb +1 -1
  94. data/lib/tasks/performance_helpers.rb +1 -1
  95. metadata +9 -6
@@ -0,0 +1,283 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ # Backend-agnostic XML parsing, serialization, and type dispatch.
5
+ #
6
+ # Provides a unified API that delegates to the active backend
7
+ # (Nokogiri or moxml/Oga). Uses backend-branching (`if XmlBackend.nokogiri?`)
8
+ # rather than `case/when` with constant references — this ensures Nokogiri
9
+ # constants are never resolved under Opal, preventing NameError at runtime.
10
+ #
11
+ # OCP: adding a new backend only requires updating this module.
12
+ # DRY: all backend dispatch centralized here, not scattered across
13
+ # comparator/formatter files.
14
+ module XmlParsing
15
+ class << self
16
+ def moxml_context
17
+ @moxml_context ||= Moxml.new(RUBY_ENGINE == "opal" ? :rexml : :oga)
18
+ end
19
+
20
+ # --- Parsing ---
21
+
22
+ def parse(xml_string, options = {})
23
+ if XmlBackend.nokogiri?
24
+ nokogiri_parse(xml_string, options)
25
+ else
26
+ moxml_parse(xml_string, options)
27
+ end
28
+ end
29
+
30
+ def parse_fragment(xml_string)
31
+ if XmlBackend.nokogiri?
32
+ Nokogiri::XML.fragment(xml_string).children.to_a
33
+ else
34
+ doc = moxml_context.parse("<__frag__>#{xml_string}</__frag__>")
35
+ doc.root.children.to_a
36
+ end
37
+ end
38
+
39
+ # --- Serialization ---
40
+
41
+ def serialize(node)
42
+ if XmlBackend.nokogiri?
43
+ nokogiri_serialize(node)
44
+ else
45
+ moxml_serialize(node)
46
+ end
47
+ end
48
+
49
+ # --- Type checks (backend-safe) ---
50
+ #
51
+ # Both Nokogiri and Moxml are loaded as dependencies. XmlBackend
52
+ # determines which is used for *parsing*, but nodes from either
53
+ # library may flow through comparison code (e.g. tests, format
54
+ # detection). Under Nokogiri backend, both types are checked.
55
+
56
+ def document?(obj)
57
+ if XmlBackend.nokogiri?
58
+ obj.is_a?(Nokogiri::XML::Document) || obj.is_a?(Moxml::Document)
59
+ else
60
+ obj.is_a?(Moxml::Document)
61
+ end
62
+ end
63
+
64
+ def xml_node?(obj)
65
+ if XmlBackend.nokogiri?
66
+ obj.is_a?(Nokogiri::XML::Node) || obj.is_a?(Moxml::Node)
67
+ else
68
+ obj.is_a?(Moxml::Node)
69
+ end
70
+ end
71
+
72
+ def element?(node)
73
+ if XmlBackend.nokogiri?
74
+ node.is_a?(Nokogiri::XML::Element) || node.is_a?(Moxml::Element)
75
+ else
76
+ node.is_a?(Moxml::Element)
77
+ end
78
+ end
79
+
80
+ def text_node?(node)
81
+ if XmlBackend.nokogiri?
82
+ node.is_a?(Nokogiri::XML::Text) || node.is_a?(Moxml::Text)
83
+ else
84
+ node.is_a?(Moxml::Text)
85
+ end
86
+ end
87
+
88
+ def comment?(node)
89
+ if XmlBackend.nokogiri?
90
+ node.is_a?(Nokogiri::XML::Comment) || node.is_a?(Moxml::Comment)
91
+ else
92
+ node.is_a?(Moxml::Comment)
93
+ end
94
+ end
95
+
96
+ def cdata?(node)
97
+ if XmlBackend.nokogiri?
98
+ node.is_a?(Nokogiri::XML::CDATA) || node.is_a?(Moxml::Cdata)
99
+ else
100
+ node.is_a?(Moxml::Cdata)
101
+ end
102
+ end
103
+
104
+ def processing_instruction?(node)
105
+ if XmlBackend.nokogiri?
106
+ node.is_a?(Nokogiri::XML::ProcessingInstruction) || node.is_a?(Moxml::ProcessingInstruction)
107
+ else
108
+ node.is_a?(Moxml::ProcessingInstruction)
109
+ end
110
+ end
111
+
112
+ def document_fragment?(obj)
113
+ if XmlBackend.nokogiri?
114
+ obj.is_a?(Nokogiri::XML::DocumentFragment)
115
+ else
116
+ false
117
+ end
118
+ end
119
+
120
+ def dtd?(node)
121
+ if XmlBackend.nokogiri?
122
+ node.is_a?(Nokogiri::XML::DTD)
123
+ else
124
+ false
125
+ end
126
+ end
127
+
128
+ # --- Node traversal ---
129
+
130
+ def children(node)
131
+ if XmlBackend.nokogiri?
132
+ node.is_a?(Nokogiri::XML::Node) ? node.children.to_a : []
133
+ else
134
+ node.is_a?(Moxml::Node) ? node.children.to_a : []
135
+ end
136
+ end
137
+
138
+ def name(node)
139
+ if XmlBackend.nokogiri?
140
+ node.is_a?(Nokogiri::XML::Node) ? node.name : nil
141
+ else
142
+ node.is_a?(Moxml::Node) ? node.name : nil
143
+ end
144
+ end
145
+
146
+ def text_content(node)
147
+ if XmlBackend.nokogiri?
148
+ node.is_a?(Nokogiri::XML::Node) ? node.content : node.to_s
149
+ else
150
+ case node
151
+ when Moxml::Text, Moxml::Cdata, Moxml::Comment
152
+ node.content.to_s
153
+ when Moxml::Node
154
+ node.text.to_s
155
+ else
156
+ node.to_s
157
+ end
158
+ end
159
+ end
160
+
161
+ def attributes(node)
162
+ if XmlBackend.nokogiri?
163
+ node.is_a?(Nokogiri::XML::Element) ? node.attributes.values : []
164
+ else
165
+ node.is_a?(Moxml::Element) ? node.attributes : []
166
+ end
167
+ end
168
+
169
+ def attribute_value(node, attr_name)
170
+ if XmlBackend.nokogiri?
171
+ node.is_a?(Nokogiri::XML::Element) ? node[attr_name.to_s] : nil
172
+ else
173
+ node.is_a?(Moxml::Element) ? node[attr_name.to_s] : nil
174
+ end
175
+ end
176
+
177
+ def namespace_definitions(node)
178
+ if XmlBackend.nokogiri?
179
+ node.is_a?(Nokogiri::XML::Element) ? node.namespace_definitions : []
180
+ else
181
+ node.is_a?(Moxml::Element) ? node.namespace_definitions : []
182
+ end
183
+ end
184
+
185
+ def parent(node)
186
+ return nil unless xml_node?(node)
187
+ # Document nodes have no parent
188
+ return nil if document?(node)
189
+
190
+ node.parent
191
+ end
192
+
193
+ def namespace_uri(node)
194
+ if XmlBackend.nokogiri?
195
+ node.namespace&.href if node.is_a?(Nokogiri::XML::Element)
196
+ elsif node.is_a?(Moxml::Element)
197
+ node.namespace_uri
198
+ end
199
+ end
200
+
201
+ # Returns a symbol for all backends (:element, :text, :comment, etc.)
202
+ # or nil for unrecognised nodes.
203
+ def node_type(node)
204
+ if XmlBackend.nokogiri?
205
+ nokogiri_node_type(node)
206
+ else
207
+ moxml_node_type(node)
208
+ end
209
+ end
210
+
211
+ def canonicalize(node, options = {})
212
+ if XmlBackend.nokogiri?
213
+ node.canonicalize(options)
214
+ else
215
+ moxml_canonicalize(node, options)
216
+ end
217
+ end
218
+
219
+ private
220
+
221
+ # --- Nokogiri backend ---
222
+
223
+ def nokogiri_type_map
224
+ @nokogiri_type_map ||= {
225
+ Nokogiri::XML::Node::ELEMENT_NODE => :element,
226
+ Nokogiri::XML::Node::TEXT_NODE => :text,
227
+ Nokogiri::XML::Node::CDATA_SECTION_NODE => :cdata,
228
+ Nokogiri::XML::Node::COMMENT_NODE => :comment,
229
+ Nokogiri::XML::Node::PI_NODE => :processing_instruction,
230
+ Nokogiri::XML::Node::DOCUMENT_NODE => :document,
231
+ Nokogiri::XML::Node::DOCUMENT_FRAG_NODE => :document_fragment,
232
+ Nokogiri::XML::Node::DTD_NODE => :dtd,
233
+ Nokogiri::XML::Node::ATTRIBUTE_NODE => :attribute,
234
+ }.freeze
235
+ end
236
+
237
+ def nokogiri_node_type(node)
238
+ return nil unless node.is_a?(Nokogiri::XML::Node)
239
+
240
+ nokogiri_type_map[node.node_type]
241
+ end
242
+
243
+ def nokogiri_parse(xml_string, options)
244
+ doc = Nokogiri::XML.parse(xml_string)
245
+ doc = doc.remove_namespaces! if options[:remove_namespaces]
246
+ doc
247
+ end
248
+
249
+ def nokogiri_serialize(node)
250
+ if node.is_a?(Nokogiri::XML::Document)
251
+ node.to_xml(encoding: "UTF-8")
252
+ else
253
+ node.to_xml
254
+ end
255
+ end
256
+
257
+ # --- Moxml backend ---
258
+
259
+ def moxml_parse(xml_string, _options)
260
+ moxml_context.parse(xml_string)
261
+ end
262
+
263
+ def moxml_serialize(node)
264
+ node.to_xml
265
+ end
266
+
267
+ def moxml_canonicalize(node, _options)
268
+ node.to_xml
269
+ end
270
+
271
+ def moxml_node_type(node)
272
+ return :element if node.element?
273
+ return :text if node.text?
274
+ return :comment if node.comment?
275
+ return :cdata if node.cdata?
276
+ return :document if node.document?
277
+ return :processing_instruction if node.processing_instruction?
278
+
279
+ nil
280
+ end
281
+ end
282
+ end
283
+ end
data/lib/canon.rb CHANGED
@@ -2,6 +2,8 @@
2
2
 
3
3
  require_relative "canon/version"
4
4
  require_relative "canon/errors"
5
+ require_relative "canon/xml_backend"
6
+ require_relative "canon/xml_parsing"
5
7
  require_relative "canon/config"
6
8
  require_relative "canon/data_model"
7
9
  require_relative "canon/html"
@@ -13,7 +15,7 @@ require_relative "canon/formatters/html4_formatter"
13
15
  require_relative "canon/formatters/html5_formatter"
14
16
  require_relative "canon/comparison"
15
17
 
16
- require_relative "canon/rspec_matchers" if defined?(RSpec)
18
+ require_relative "canon/rspec_matchers" if defined?(RSpec.configure)
17
19
 
18
20
  module Canon
19
21
  SUPPORTED_FORMATS = %i[xml yaml json html html4 html5 string].freeze
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "benchmark/ips"
4
- require "table_tennis"
4
+ require "table_tennis" unless RUBY_ENGINE == "opal"
5
5
 
6
6
  # Ensure lib/ is on the load path regardless of tmp location
7
7
  lib_path = File.expand_path(File.join(__dir__, "..", "..", "lib"))
@@ -4,7 +4,7 @@ require "json"
4
4
  require "open3"
5
5
  require "tmpdir"
6
6
  require "fileutils"
7
- require "table_tennis"
7
+ require "table_tennis" unless RUBY_ENGINE == "opal"
8
8
 
9
9
  module PerformanceHelpers
10
10
  # ANSI color codes for terminal output
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: canon
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.8
4
+ version: 0.2.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-05-05 00:00:00.000000000 Z
11
+ date: 2026-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: diff-lcs
@@ -42,16 +42,16 @@ dependencies:
42
42
  name: moxml
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: 0.1.22
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
54
+ version: 0.1.22
55
55
  - !ruby/object:Gem::Dependency
56
56
  name: nokogiri
57
57
  requirement: !ruby/object:Gem::Requirement
@@ -148,6 +148,7 @@ extensions: []
148
148
  extra_rdoc_files: []
149
149
  files:
150
150
  - ".rspec"
151
+ - ".rspec-opal"
151
152
  - ".rubocop.yml"
152
153
  - ".rubocop_todo.yml"
153
154
  - CODE_OF_CONDUCT.md
@@ -385,6 +386,8 @@ files:
385
386
  - lib/canon/xml/whitespace_normalizer.rb
386
387
  - lib/canon/xml/xml_base_handler.rb
387
388
  - lib/canon/xml/xpath_engine.rb
389
+ - lib/canon/xml_backend.rb
390
+ - lib/canon/xml_parsing.rb
388
391
  - lib/tasks/benchmark_runner.rb
389
392
  - lib/tasks/performance.rake
390
393
  - lib/tasks/performance_comparator.rb