canon 0.2.8 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (84) hide show
  1. checksums.yaml +4 -4
  2. data/.rspec-opal +7 -0
  3. data/.rubocop_todo.yml +14 -71
  4. data/Rakefile +17 -0
  5. data/lib/canon/cli.rb +1 -1
  6. data/lib/canon/color_detector.rb +3 -5
  7. data/lib/canon/comparison/compare_profile.rb +1 -4
  8. data/lib/canon/comparison/dimensions/attribute_order_dimension.rb +2 -6
  9. data/lib/canon/comparison/dimensions/attribute_presence_dimension.rb +2 -6
  10. data/lib/canon/comparison/dimensions/attribute_values_dimension.rb +2 -6
  11. data/lib/canon/comparison/dimensions/comments_dimension.rb +2 -6
  12. data/lib/canon/comparison/dimensions/element_position_dimension.rb +2 -6
  13. data/lib/canon/comparison/dimensions/structural_whitespace_dimension.rb +2 -6
  14. data/lib/canon/comparison/dimensions/text_content_dimension.rb +3 -5
  15. data/lib/canon/comparison/format_detector.rb +29 -20
  16. data/lib/canon/comparison/html_comparator.rb +18 -29
  17. data/lib/canon/comparison/html_compare_profile.rb +3 -10
  18. data/lib/canon/comparison/html_parser.rb +1 -1
  19. data/lib/canon/comparison/json_comparator.rb +8 -0
  20. data/lib/canon/comparison/node_inspector.rb +146 -80
  21. data/lib/canon/comparison/strategies/semantic_tree_match_strategy.rb +6 -8
  22. data/lib/canon/comparison/whitespace_sensitivity.rb +55 -193
  23. data/lib/canon/comparison/xml_comparator/attribute_filter.rb +5 -10
  24. data/lib/canon/comparison/xml_comparator/child_comparison.rb +4 -4
  25. data/lib/canon/comparison/xml_comparator/diff_node_builder.rb +10 -8
  26. data/lib/canon/comparison/xml_comparator/namespace_comparator.rb +14 -28
  27. data/lib/canon/comparison/xml_comparator/node_parser.rb +12 -11
  28. data/lib/canon/comparison/xml_comparator/node_type_comparator.rb +30 -58
  29. data/lib/canon/comparison/xml_comparator.rb +61 -83
  30. data/lib/canon/comparison/xml_node_comparison.rb +15 -15
  31. data/lib/canon/comparison/yaml_comparator.rb +8 -0
  32. data/lib/canon/comparison.rb +23 -23
  33. data/lib/canon/config/profile_loader.rb +13 -13
  34. data/lib/canon/config.rb +29 -5
  35. data/lib/canon/diff/diff_classifier.rb +7 -41
  36. data/lib/canon/diff/diff_line.rb +1 -1
  37. data/lib/canon/diff/diff_node_enricher.rb +22 -24
  38. data/lib/canon/diff/node_serializer.rb +23 -30
  39. data/lib/canon/diff/path_builder.rb +24 -37
  40. data/lib/canon/diff/source_locator.rb +0 -3
  41. data/lib/canon/diff/xml_serialization_formatter.rb +8 -81
  42. data/lib/canon/diff_formatter/by_line/base_formatter.rb +7 -7
  43. data/lib/canon/diff_formatter/by_line/json_formatter.rb +1 -1
  44. data/lib/canon/diff_formatter/by_line/simple_formatter.rb +1 -1
  45. data/lib/canon/diff_formatter/by_line/xml_formatter.rb +2 -2
  46. data/lib/canon/diff_formatter/by_line/yaml_formatter.rb +1 -1
  47. data/lib/canon/diff_formatter/by_line_formatter.rb +1 -1
  48. data/lib/canon/diff_formatter/by_object/base_formatter.rb +11 -15
  49. data/lib/canon/diff_formatter/by_object/xml_formatter.rb +8 -10
  50. data/lib/canon/diff_formatter/by_object_formatter.rb +1 -1
  51. data/lib/canon/diff_formatter/debug_output.rb +12 -24
  52. data/lib/canon/diff_formatter/diff_detail_formatter/color_helper.rb +2 -2
  53. data/lib/canon/diff_formatter/diff_detail_formatter/node_utils.rb +146 -318
  54. data/lib/canon/diff_formatter/diff_detail_formatter.rb +28 -20
  55. data/lib/canon/diff_formatter/legend.rb +2 -2
  56. data/lib/canon/diff_formatter/pretty_diff_formatter.rb +2 -2
  57. data/lib/canon/diff_formatter/theme.rb +4 -4
  58. data/lib/canon/diff_formatter.rb +2 -2
  59. data/lib/canon/formatters/html_formatter.rb +1 -1
  60. data/lib/canon/formatters/html_formatter_base.rb +1 -1
  61. data/lib/canon/formatters/xml_formatter.rb +7 -32
  62. data/lib/canon/html/data_model.rb +1 -1
  63. data/lib/canon/pretty_printer/html.rb +1 -1
  64. data/lib/canon/pretty_printer/xml.rb +16 -7
  65. data/lib/canon/pretty_printer/xml_normalized.rb +9 -3
  66. data/lib/canon/rspec_matchers.rb +2 -2
  67. data/lib/canon/tree_diff/adapters/html_adapter.rb +1 -1
  68. data/lib/canon/tree_diff/adapters/xml_adapter.rb +1 -1
  69. data/lib/canon/tree_diff/core/tree_node.rb +1 -3
  70. data/lib/canon/validators/html_validator.rb +1 -1
  71. data/lib/canon/validators/xml_validator.rb +1 -1
  72. data/lib/canon/version.rb +1 -1
  73. data/lib/canon/xml/data_model.rb +131 -137
  74. data/lib/canon/xml/namespace_helper.rb +5 -0
  75. data/lib/canon/xml/node.rb +2 -1
  76. data/lib/canon/xml/nodes/root_node.rb +4 -0
  77. data/lib/canon/xml/nodes/text_node.rb +6 -1
  78. data/lib/canon/xml/sax_builder.rb +4 -6
  79. data/lib/canon/xml_backend.rb +49 -0
  80. data/lib/canon/xml_parsing.rb +271 -0
  81. data/lib/canon.rb +3 -1
  82. data/lib/tasks/benchmark_runner.rb +1 -1
  83. data/lib/tasks/performance_helpers.rb +1 -1
  84. metadata +5 -2
@@ -1,8 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "nokogiri"
3
+ require "nokogiri" unless RUBY_ENGINE == "opal"
4
4
  require "set"
5
5
  require_relative "../data_model"
6
+ require_relative "../xml_backend"
7
+ require_relative "../xml_parsing"
6
8
  require_relative "nodes/root_node"
7
9
  require_relative "nodes/element_node"
8
10
  require_relative "nodes/namespace_node"
@@ -13,115 +15,59 @@ require_relative "nodes/processing_instruction_node"
13
15
 
14
16
  module Canon
15
17
  module Xml
16
- # Builds XPath data model from XML
17
18
  class DataModel < Canon::DataModel
18
- # Build XPath data model from XML string
19
- #
20
- # @param xml_string [String] XML content to parse
21
- # @param preserve_whitespace [Boolean] Whether to preserve whitespace-only text nodes
22
- # @return [Nodes::RootNode] Root of the data model tree
23
19
  def self.from_xml(xml_string, preserve_whitespace: false)
24
- # Normalize encoding before parsing
25
20
  normalized_xml = normalize_encoding(xml_string)
26
21
 
27
- # Parse with Nokogiri
28
- doc = Nokogiri::XML(normalized_xml, &:nonet)
29
-
30
- # Check for relative namespace URIs (prohibited by C14N 1.1)
31
- check_for_relative_namespace_uris(doc)
32
-
33
- # Convert to XPath data model
34
- result = build_from_nokogiri(doc,
35
- preserve_whitespace: preserve_whitespace)
36
-
37
- # Carry libxml's parse errors on the resulting tree so the diff
38
- # report can surface them (see lutaml/canon#130). libxml's
39
- # FATAL conditions (e.g. duplicate attributes) silently drop
40
- # content from the parse tree; without surfacing the error
41
- # list, downstream diffs describe the partial tree, not the
42
- # input.
43
- errors = Array(doc.errors).map(&:to_s)
44
- result.parse_errors = errors if errors.any?
45
-
46
- result
22
+ if Canon::XmlBackend.nokogiri?
23
+ from_nokogiri_xml(normalized_xml,
24
+ preserve_whitespace: preserve_whitespace)
25
+ else
26
+ from_moxml_xml(normalized_xml,
27
+ preserve_whitespace: preserve_whitespace)
28
+ end
47
29
  end
48
30
 
49
- # Normalize XML string encoding to UTF-8
50
- #
51
- # Handles cases where:
52
- # 1. The XML declaration specifies an encoding that doesn't match the actual encoding
53
- # 2. The string's internal encoding is non-UTF-8 (without a declaration)
54
- #
55
- # For case 1, we check if the declared encoding matches the actual bytes.
56
- # If bytes are valid UTF-8 despite the declaration, we update the declaration to UTF-8.
57
- #
58
- # @param xml_string [String] XML string to normalize
59
- # @return [String] Normalized XML string with UTF-8 encoding
60
31
  def self.normalize_encoding(xml_string)
61
32
  return xml_string unless xml_string.is_a?(String)
62
33
 
63
- # Extract declared encoding from XML declaration
64
34
  declared_encoding = extract_xml_encoding(xml_string)
65
35
 
66
36
  if declared_encoding
67
- # Case 1: XML has a declaration
68
37
  if declared_encoding.upcase != "UTF-8"
69
- # Check if bytes are actually valid UTF-8 despite the declaration
70
38
  utf8_reinterpreted = try_utf8_reinterpretation(xml_string)
71
39
  if utf8_reinterpreted
72
- # Bytes are valid UTF-8 - update declaration to UTF-8
73
- return update_xml_declaration(xml_string, "UTF-8")
40
+ return update_xml_declaration(xml_string,
41
+ "UTF-8")
74
42
  end
75
43
 
76
- # Bytes aren't valid UTF-8 - must really be in declared encoding
77
44
  return transcode_to_utf8(xml_string, declared_encoding)
78
45
  end
79
46
  elsif xml_string.encoding.name != "UTF-8"
80
- # Case 2: No declaration but string encoding is non-UTF-8
81
- # First, try to re-interpret bytes as UTF-8 (handles mislabeled strings)
82
47
  reinterpreted = try_utf8_reinterpretation(xml_string)
83
48
  return reinterpreted if reinterpreted
84
49
 
85
- # If re-interpretation fails, try transcoding with the labeled encoding
86
50
  return transcode_to_utf8(xml_string, xml_string.encoding.name)
87
51
  end
88
52
 
89
53
  xml_string
90
54
  end
91
55
 
92
- # Update the encoding declaration in an XML string
93
- #
94
- # @param xml_string [String] XML string
95
- # @param new_encoding [String] New encoding to declare
96
- # @return [String] XML string with updated declaration
97
56
  def self.update_xml_declaration(xml_string, new_encoding)
98
57
  xml_string.sub(/\bencoding\s*=\s*["'][^"']+["']/i) do |_match|
99
58
  %(encoding="#{new_encoding}")
100
59
  end
101
60
  end
102
61
 
103
- # Transcode string to UTF-8
104
- #
105
- # @param xml_string [String] String to transcode
106
- # @param source_encoding [String] Source encoding to interpret bytes as
107
- # @return [String] UTF-8 transcoded string
108
62
  def self.transcode_to_utf8(xml_string, source_encoding)
109
- # First, check if the bytes are actually valid UTF-8 despite the declared encoding
110
- # If so, just re-interpret as UTF-8 (common case: declaration is wrong)
111
63
  if source_encoding != "UTF-8"
112
- # Force the bytes to be interpreted as the declared encoding, then check validity
113
64
  forced = xml_string.dup.force_encoding(source_encoding)
114
65
  if forced.valid_encoding?
115
- # Now check if the same bytes are valid UTF-8
116
66
  utf8_check = xml_string.dup.force_encoding("UTF-8")
117
67
  if utf8_check.valid_encoding?
118
- # Bytes are valid UTF-8 - the declaration is likely wrong
119
- # Return the string as UTF-8 (already is)
120
68
  return xml_string.dup.force_encoding("UTF-8")
121
69
  end
122
70
 
123
- # Bytes aren't valid UTF-8, so they must really be in source_encoding
124
- # Proceed with transcoding
125
71
  return forced.encode("UTF-8", source_encoding,
126
72
  invalid: :replace,
127
73
  undef: :replace,
@@ -129,41 +75,21 @@ module Canon
129
75
  end
130
76
  end
131
77
 
132
- # Already UTF-8 or transcoding failed, return as-is
133
78
  xml_string.dup.force_encoding("UTF-8")
134
79
  rescue EncodingError
135
80
  xml_string
136
81
  end
137
82
 
138
- # Attempt to re-interpret string as UTF-8 if bytes are valid UTF-8
139
- #
140
- # This handles the case where a string was incorrectly labeled with a different
141
- # encoding (e.g., `.encode("Shift_JIS")` on a UTF-8 string) but the actual
142
- # bytes are valid UTF-8.
143
- #
144
- # @param xml_string [String] XML string to check
145
- # @return [String, nil] UTF-8 re-interpreted string, or nil if not possible
146
83
  def self.try_utf8_reinterpretation(xml_string)
147
84
  return xml_string if xml_string.encoding.name == "UTF-8"
148
85
 
149
- # Try forcing to UTF-8 and see if it's valid
150
86
  forced = xml_string.dup.force_encoding("UTF-8")
151
87
  return forced if forced.valid_encoding?
152
88
 
153
89
  nil
154
90
  end
155
91
 
156
- # Extract encoding from XML declaration
157
- #
158
- # @param xml_string [String] XML string
159
- # @return [String, nil] Declared encoding or nil if not found
160
92
  def self.extract_xml_encoding(xml_string)
161
- # Match XML declaration with encoding attribute
162
- # Handles: <?xml version="1.0" encoding="UTF-8"?>
163
- # and: <?xml version='1.0' encoding='UTF-8'?>
164
- #
165
- # Use binary encoding to avoid encoding compatibility issues
166
- # when the string has non-ASCII compatible encoding (e.g., UTF-16)
167
93
  binary_string = xml_string.dup.force_encoding("BINARY")
168
94
  if binary_string =~ /\A\s*<\?xml[^>]*\bencoding\s*=\s*["']([^"']+)["'][^>]*\?>/i
169
95
  return Regexp.last_match(1)
@@ -172,31 +98,36 @@ module Canon
172
98
  nil
173
99
  end
174
100
 
175
- # Alias for compatibility with base class interface
176
101
  def self.parse(xml_string)
177
102
  from_xml(xml_string)
178
103
  end
179
104
 
180
- # Serialize XML node to string
181
- #
182
- # @param node [Nodes::RootNode, Nodes::ElementNode] Node to serialize
183
- # @return [String] Serialized XML string
184
105
  def self.serialize(node)
185
- # Implementation will delegate to existing XML serialization
186
- # This is a placeholder for the base class interface
187
106
  node.to_s
188
107
  end
189
108
 
190
- # Check for relative namespace URIs (prohibited by C14N 1.1)
191
- # rubocop:disable Metrics/MethodLength
109
+ def self.relative_uri?(uri)
110
+ uri !~ %r{^[a-zA-Z][a-zA-Z0-9+.-]*:}
111
+ end
112
+
113
+ # --- Nokogiri path ---
114
+
115
+ def self.from_nokogiri_xml(xml_string, preserve_whitespace:)
116
+ doc = Nokogiri::XML(xml_string, &:nonet)
117
+ check_for_relative_namespace_uris(doc)
118
+ result = build_from_nokogiri(doc,
119
+ preserve_whitespace: preserve_whitespace)
120
+ errors = Array(doc.errors).map(&:to_s)
121
+ result.parse_errors = errors if errors.any?
122
+ result
123
+ end
124
+
192
125
  def self.check_for_relative_namespace_uris(doc)
193
126
  doc.traverse do |node|
194
127
  next unless node.is_a?(Nokogiri::XML::Element)
195
128
 
196
129
  node.namespace_definitions.each do |ns|
197
130
  next if ns.href.nil? || ns.href.empty?
198
-
199
- # Check if URI is relative
200
131
  if relative_uri?(ns.href)
201
132
  raise Canon::Error,
202
133
  "Relative namespace URI not allowed: #{ns.href}"
@@ -205,23 +136,12 @@ module Canon
205
136
  end
206
137
  end
207
138
 
208
- # Check if a URI is relative
209
- def self.relative_uri?(uri)
210
- # A URI is relative if it doesn't have a scheme
211
- uri !~ %r{^[a-zA-Z][a-zA-Z0-9+.-]*:}
212
- end
213
-
214
- # Build XPath data model from Nokogiri document or fragment
215
- # rubocop:disable Metrics/MethodLength
216
139
  def self.build_from_nokogiri(nokogiri_doc, preserve_whitespace: false)
217
140
  root = Nodes::RootNode.new
218
141
 
219
142
  if nokogiri_doc.respond_to?(:root) && nokogiri_doc.root
220
- # For Documents (XML, HTML4, HTML5, Moxml): process the root element
221
143
  root.add_child(build_element_node(nokogiri_doc.root,
222
144
  preserve_whitespace: preserve_whitespace))
223
-
224
- # Process PIs and comments outside doc element
225
145
  nokogiri_doc.children.each do |child|
226
146
  next if child == nokogiri_doc.root
227
147
  next if child.is_a?(Nokogiri::XML::DTD)
@@ -231,8 +151,6 @@ module Canon
231
151
  root.add_child(node) if node
232
152
  end
233
153
  else
234
- # For DocumentFragments: process all children directly
235
- # Fragments don't have a single .root, they contain multiple top-level nodes
236
154
  nokogiri_doc.children.each do |child|
237
155
  next if child.is_a?(Nokogiri::XML::DTD)
238
156
 
@@ -245,7 +163,6 @@ module Canon
245
163
  root
246
164
  end
247
165
 
248
- # Build node from Nokogiri node
249
166
  def self.build_node_from_nokogiri(nokogiri_node,
250
167
  preserve_whitespace: false)
251
168
  case nokogiri_node
@@ -262,8 +179,6 @@ preserve_whitespace: false)
262
179
  end
263
180
  end
264
181
 
265
- # Build element node from Nokogiri element
266
- # rubocop:disable Metrics/MethodLength
267
182
  def self.build_element_node(nokogiri_element, preserve_whitespace: false)
268
183
  element = Nodes::ElementNode.new(
269
184
  name: nokogiri_element.name,
@@ -271,13 +186,9 @@ preserve_whitespace: false)
271
186
  prefix: nokogiri_element.namespace&.prefix,
272
187
  )
273
188
 
274
- # Build namespace nodes (includes inherited namespaces)
275
189
  build_namespace_nodes(nokogiri_element, element)
276
-
277
- # Build attribute nodes
278
190
  build_attribute_nodes(nokogiri_element, element)
279
191
 
280
- # Build child nodes
281
192
  nokogiri_element.children.each do |child|
282
193
  node = build_node_from_nokogiri(child,
283
194
  preserve_whitespace: preserve_whitespace)
@@ -287,9 +198,7 @@ preserve_whitespace: false)
287
198
  element
288
199
  end
289
200
 
290
- # Build namespace nodes for an element
291
201
  def self.build_namespace_nodes(nokogiri_element, element)
292
- # Collect all in-scope namespaces
293
202
  namespaces = collect_in_scope_namespaces(nokogiri_element)
294
203
 
295
204
  namespaces.each do |prefix, uri|
@@ -301,18 +210,14 @@ preserve_whitespace: false)
301
210
  end
302
211
  end
303
212
 
304
- # Collect all in-scope namespaces for an element
305
- # rubocop:disable Metrics/MethodLength
306
213
  def self.collect_in_scope_namespaces(nokogiri_element)
307
214
  namespaces = {}
308
215
 
309
- # Walk up the tree to collect all namespace declarations
310
216
  current = nokogiri_element
311
217
  while current && !current.is_a?(Nokogiri::XML::Document)
312
218
  if current.is_a?(Nokogiri::XML::Element)
313
219
  current.namespace_definitions.each do |ns|
314
220
  prefix = ns.prefix || ""
315
- # Only add if not already defined (child overrides parent)
316
221
  unless namespaces.key?(prefix)
317
222
  namespaces[prefix] = ns.href
318
223
  end
@@ -321,13 +226,11 @@ preserve_whitespace: false)
321
226
  current = current.parent
322
227
  end
323
228
 
324
- # Always include xml namespace
325
229
  namespaces["xml"] ||= "http://www.w3.org/XML/1998/namespace"
326
230
 
327
231
  namespaces
328
232
  end
329
233
 
330
- # Build attribute nodes for an element
331
234
  def self.build_attribute_nodes(nokogiri_element, element)
332
235
  nokogiri_element.attributes.each_value do |attr|
333
236
  attr_node = Nodes::AttributeNode.new(
@@ -340,39 +243,130 @@ preserve_whitespace: false)
340
243
  end
341
244
  end
342
245
 
343
- # Build text node from Nokogiri text node
344
246
  def self.build_text_node(nokogiri_text, preserve_whitespace: false)
345
- # XML text nodes: preserve all content including whitespace
346
- # Unlike HTML, XML treats all whitespace as significant
347
247
  content = nokogiri_text.content
348
248
 
349
- # Skip empty text nodes between elements (common formatting whitespace)
350
- # UNLESS preserve_whitespace is true (for structural_whitespace: :strict)
351
249
  if !preserve_whitespace && content.strip.empty? && nokogiri_text.parent.is_a?(Nokogiri::XML::Element)
352
250
  return nil
353
251
  end
354
252
 
355
- # Capture original text with entity references preserved.
356
- # nokogiri_text.to_xml returns the serialized text node which preserves
357
- # entity forms like &#x201C; instead of the decoded character U+201C.
358
253
  original = nokogiri_text.to_xml
359
-
360
- # Nokogiri already handles CDATA conversion and entity resolution
361
254
  Nodes::TextNode.new(value: content, original: original)
362
255
  end
363
256
 
364
- # Build comment node from Nokogiri comment
365
257
  def self.build_comment_node(nokogiri_comment)
366
258
  Nodes::CommentNode.new(value: nokogiri_comment.content)
367
259
  end
368
260
 
369
- # Build PI node from Nokogiri PI
370
261
  def self.build_pi_node(nokogiri_pi)
371
262
  Nodes::ProcessingInstructionNode.new(
372
263
  target: nokogiri_pi.name,
373
264
  data: nokogiri_pi.content,
374
265
  )
375
266
  end
267
+
268
+ # --- Moxml path ---
269
+
270
+ def self.from_moxml_xml(xml_string, preserve_whitespace:)
271
+ doc = Canon::XmlParsing.parse(xml_string)
272
+ build_from_moxml(doc, preserve_whitespace: preserve_whitespace)
273
+ end
274
+
275
+ def self.build_from_moxml(moxml_doc, preserve_whitespace: false)
276
+ root = Nodes::RootNode.new
277
+
278
+ if moxml_doc.respond_to?(:root) && moxml_doc.root
279
+ root.add_child(build_moxml_element_node(moxml_doc.root,
280
+ preserve_whitespace: preserve_whitespace))
281
+ end
282
+
283
+ root
284
+ end
285
+
286
+ def self.build_moxml_node(node, preserve_whitespace: false)
287
+ case node
288
+ when Moxml::Element
289
+ build_moxml_element_node(node,
290
+ preserve_whitespace: preserve_whitespace)
291
+ when Moxml::Text
292
+ build_moxml_text_node(node, preserve_whitespace: preserve_whitespace)
293
+ when Moxml::Comment
294
+ build_moxml_comment_node(node)
295
+ when Moxml::ProcessingInstruction
296
+ build_moxml_pi_node(node)
297
+ end
298
+ end
299
+
300
+ def self.build_moxml_element_node(moxml_element,
301
+ preserve_whitespace: false)
302
+ ns = moxml_element.namespace
303
+ element = Nodes::ElementNode.new(
304
+ name: moxml_element.name,
305
+ namespace_uri: ns&.uri,
306
+ prefix: ns&.prefix,
307
+ )
308
+
309
+ build_moxml_namespace_nodes(moxml_element, element)
310
+ build_moxml_attribute_nodes(moxml_element, element)
311
+
312
+ moxml_element.children.each do |child|
313
+ node = build_moxml_node(child,
314
+ preserve_whitespace: preserve_whitespace)
315
+ element.add_child(node) if node
316
+ end
317
+
318
+ element
319
+ end
320
+
321
+ def self.build_moxml_namespace_nodes(moxml_element, element)
322
+ moxml_element.namespace_definitions.each do |ns|
323
+ ns_node = Nodes::NamespaceNode.new(
324
+ prefix: ns.prefix || "",
325
+ uri: ns.uri,
326
+ )
327
+ element.add_namespace(ns_node)
328
+ end
329
+
330
+ unless element.namespaces.any? do |n|
331
+ n.prefix == "xml"
332
+ end
333
+ element.add_namespace(Nodes::NamespaceNode.new(
334
+ prefix: "xml",
335
+ uri: "http://www.w3.org/XML/1998/namespace",
336
+ ))
337
+ end
338
+ end
339
+
340
+ def self.build_moxml_attribute_nodes(moxml_element, element)
341
+ moxml_element.attributes.each do |attr|
342
+ attr_node = Nodes::AttributeNode.new(
343
+ name: attr.name,
344
+ value: attr.value,
345
+ )
346
+ element.add_attribute(attr_node)
347
+ end
348
+ end
349
+
350
+ def self.build_moxml_text_node(moxml_text, preserve_whitespace: false)
351
+ content = moxml_text.text
352
+
353
+ if !preserve_whitespace && content.strip.empty? && moxml_text.parent.is_a?(Moxml::Element)
354
+ return nil
355
+ end
356
+
357
+ Nodes::TextNode.new(value: content, original: content)
358
+ end
359
+
360
+ def self.build_moxml_comment_node(moxml_comment)
361
+ Nodes::CommentNode.new(value: moxml_comment.text)
362
+ end
363
+
364
+ def self.build_moxml_pi_node(moxml_pi)
365
+ Nodes::ProcessingInstructionNode.new(
366
+ target: moxml_pi.target,
367
+ data: moxml_pi.data,
368
+ )
369
+ end
376
370
  end
377
371
  end
378
372
  end
@@ -104,6 +104,11 @@ module Canon
104
104
  namespace_uri.to_s
105
105
  end
106
106
 
107
+ # Check if an attribute name is a namespace declaration (xmlns or xmlns:*)
108
+ def self.namespace_declaration?(attr_name)
109
+ attr_name == "xmlns" || attr_name.start_with?("xmlns:")
110
+ end
111
+
107
112
  private_class_method :normalize_namespace
108
113
  end
109
114
  end
@@ -9,6 +9,7 @@ module Canon
9
9
  def initialize
10
10
  @parent = nil
11
11
  @children = []
12
+ @in_node_set = true
12
13
  end
13
14
 
14
15
  def add_child(child)
@@ -17,7 +18,7 @@ module Canon
17
18
  end
18
19
 
19
20
  def in_node_set?
20
- instance_variable_defined?(:@in_node_set) ? @in_node_set : true
21
+ @in_node_set
21
22
  end
22
23
 
23
24
  def in_node_set=(value)
@@ -14,6 +14,10 @@ module Canon
14
14
  def node_type
15
15
  :root
16
16
  end
17
+
18
+ def children=(new_children)
19
+ @children = new_children
20
+ end
17
21
  end
18
22
  end
19
23
  end
@@ -10,7 +10,12 @@ module Canon
10
10
  # Stores both the decoded text value and the original text (with entity
11
11
  # references preserved) to enable accurate round-trip serialization.
12
12
  class TextNode < Node
13
- attr_reader :value, :original
13
+ attr_accessor :value
14
+ attr_reader :original
15
+
16
+ def original=(value)
17
+ @original = value
18
+ end
14
19
 
15
20
  # @param value [String] Decoded text content (entity references resolved)
16
21
  # @param original [String, nil] Original text as it appeared in source XML,
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "nokogiri"
3
+ require "nokogiri" unless RUBY_ENGINE == "opal"
4
4
  require_relative "nodes/root_node"
5
5
  require_relative "nodes/element_node"
6
6
  require_relative "nodes/namespace_node"
@@ -190,10 +190,8 @@ strip_doctype: false)
190
190
  last_child = parent.children.last
191
191
  if last_child&.node_type == :text
192
192
  # Combine both raw and decoded forms
193
- last_child.instance_variable_set(:@value,
194
- last_child.value + decoded_string)
195
- last_child.instance_variable_set(:@original,
196
- (last_child.original || "") + raw_string)
193
+ last_child.value = last_child.value + decoded_string
194
+ last_child.original = (last_child.original || "") + raw_string
197
195
  return
198
196
  end
199
197
 
@@ -257,7 +255,7 @@ strip_doctype: false)
257
255
  return unless doc_element
258
256
 
259
257
  other_children = root.children.reject { |c| c.node_type == :element }
260
- root.instance_variable_set(:@children, [doc_element] + other_children)
258
+ root.children = [doc_element] + other_children
261
259
  end
262
260
 
263
261
  private
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Canon
4
+ # Centralized XML backend detection for Canon.
5
+ #
6
+ # Canon supports two XML backends:
7
+ # - :nokogiri — MRI with Nokogiri installed (default, existing code path)
8
+ # - :moxml — Opal runtime or MRI without Nokogiri (uses Oga via moxml)
9
+ #
10
+ # The active backend is determined once at load time and cached.
11
+ # All XML-related code should check `Canon::XmlBackend.moxml?` or
12
+ # `Canon::XmlBackend.nokogiri?` to select the appropriate code path.
13
+ #
14
+ # This module intentionally does NOT wrap Nokogiri through moxml.
15
+ # Each backend path is independent — the Nokogiri path is the existing
16
+ # battle-tested code; the moxml path is a parallel implementation for
17
+ # environments where Nokogiri is unavailable.
18
+ module XmlBackend
19
+ class << self
20
+ def active
21
+ @active ||= detect
22
+ end
23
+
24
+ def nokogiri?
25
+ active == :nokogiri
26
+ end
27
+
28
+ def moxml?
29
+ active == :moxml
30
+ end
31
+
32
+ def reset!
33
+ @active = nil
34
+ end
35
+
36
+ private
37
+
38
+ def detect
39
+ if RUBY_ENGINE == "opal"
40
+ :moxml
41
+ elsif defined?(Nokogiri)
42
+ :nokogiri
43
+ else
44
+ :moxml
45
+ end
46
+ end
47
+ end
48
+ end
49
+ end