moxml 0.1.16 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +6 -0
  3. data/.rubocop_todo.yml +49 -133
  4. data/README.adoc +18 -0
  5. data/lib/moxml/adapter/base.rb +65 -8
  6. data/lib/moxml/adapter/headed_ox.rb +2 -1
  7. data/lib/moxml/adapter/libxml.rb +16 -3
  8. data/lib/moxml/adapter/nokogiri.rb +14 -4
  9. data/lib/moxml/adapter/oga.rb +26 -87
  10. data/lib/moxml/adapter/ox.rb +69 -19
  11. data/lib/moxml/adapter/rexml.rb +24 -3
  12. data/lib/moxml/attribute.rb +6 -0
  13. data/lib/moxml/element.rb +12 -8
  14. data/lib/moxml/node.rb +4 -1
  15. data/lib/moxml/text.rb +6 -0
  16. data/lib/moxml/version.rb +1 -1
  17. data/lib/moxml/xpath/compiler.rb +40 -21
  18. data/lib/moxml/xpath/parser.rb +12 -7
  19. data/spec/integration/all_adapters_spec.rb +1 -0
  20. data/spec/integration/shared_examples/edge_cases.rb +0 -6
  21. data/spec/integration/shared_examples/entity_reference_whitespace.rb +122 -0
  22. data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +0 -7
  23. data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +135 -0
  24. data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +0 -3
  25. data/spec/moxml/adapter/entity_restoration_spec.rb +97 -0
  26. data/spec/moxml/builder_spec.rb +16 -1
  27. data/spec/moxml/entity_preservation_spec.rb +130 -0
  28. data/spec/moxml/entity_reference_spec.rb +114 -0
  29. data/spec/moxml/entity_registry_spec.rb +68 -0
  30. data/spec/moxml/xpath/axes_spec.rb +0 -1
  31. data/spec/moxml/xpath/compiler_spec.rb +0 -2
  32. metadata +6 -12
  33. data/TODO.remaining/1-entity-reference-adapter-support.md +0 -157
  34. data/TODO.remaining/2-entity-restoration-model-driven.md +0 -169
  35. data/TODO.remaining/3-entity-reference-test-coverage.md +0 -170
  36. data/TODO.remaining/4-lenient-entities-mode.md +0 -106
  37. data/TODO.remaining/5-fixture-integrity.md +0 -65
  38. data/TODO.remaining/6-ox-element-ordering-bug.md +0 -36
  39. data/TODO.remaining/7-headed-ox-limitations.md +0 -95
  40. data/TODO.remaining/8-xpath-predicate-gaps.md +0 -68
  41. data/TODO.remaining/9-cleanup-hygiene.md +0 -42
  42. data/TODO.remaining/README.md +0 -54
@@ -8,9 +8,6 @@ module Moxml
8
8
  module Adapter
9
9
  class Oga < Base
10
10
  class << self
11
- # Standard XML entities handled natively by parsers
12
- STANDARD_XML_ENTITIES = %w[amp lt gt quot apos].freeze
13
-
14
11
  def attachments
15
12
  @attachments ||= Moxml::NativeAttachment.new
16
13
  end
@@ -24,9 +21,7 @@ module Moxml
24
21
  end
25
22
 
26
23
  def parse(xml, options = {}, _context = nil)
27
- # Pre-process XML to convert named entities to marker form (\x01name;).
28
- # Oga drops named entity references like &nbsp; during parsing.
29
- processed_xml = preprocess_named_entities(xml)
24
+ processed_xml = preprocess_entities(xml)
30
25
 
31
26
  native_doc = begin
32
27
  ::Oga.parse_xml(processed_xml, strict: options[:strict])
@@ -72,12 +67,12 @@ module Moxml
72
67
  end
73
68
 
74
69
  def create_native_text(content, _owner_doc = nil)
75
- ::Oga::XML::Text.new(text: encode_entity_markers(content))
70
+ ::Oga::XML::Text.new(text: preprocess_entities(content))
76
71
  end
77
72
 
78
73
  def create_native_entity_reference(name)
79
74
  text = ::Oga::XML::Text.new
80
- text.text = "#{ENTITY_MARKER}#{name};"
75
+ text.text = "#{self::ENTITY_MARKER}#{name};"
81
76
  attachments.set(text, :entity_name, name)
82
77
  text
83
78
  end
@@ -201,10 +196,20 @@ module Moxml
201
196
  all_children + node.children.reject do |child|
202
197
  child.is_a?(::Oga::XML::Text) &&
203
198
  child.text.strip.empty? &&
204
- !(child.previous.nil? && child.next.nil?)
199
+ !(child.previous.nil? && child.next.nil?) &&
200
+ !adjacent_to_entity_reference?(child)
205
201
  end
206
202
  end
207
203
 
204
+ def adjacent_to_entity_reference?(node)
205
+ entity_ref?(node.previous) || entity_ref?(node.next)
206
+ end
207
+
208
+ def entity_ref?(node)
209
+ node.is_a?(::Oga::XML::Text) &&
210
+ attachments.get(node, :entity_name)
211
+ end
212
+
208
213
  def parent(node)
209
214
  node.parent if node.is_a?(::Oga::XML::Node)
210
215
  end
@@ -251,7 +256,7 @@ module Moxml
251
256
  attr = ::Oga::XML::Attribute.new(
252
257
  name: name.to_s,
253
258
  namespace_name: namespace_name,
254
- value: encode_entity_markers(value.to_s),
259
+ value: preprocess_entities(value.to_s),
255
260
  )
256
261
  element.add_attribute(attr)
257
262
  end
@@ -261,7 +266,7 @@ module Moxml
261
266
  end
262
267
 
263
268
  def get_attribute_value(element, name)
264
- restore_entity_markers(element[name.to_s])
269
+ element[name.to_s]
265
270
  end
266
271
 
267
272
  def remove_attribute(element, name)
@@ -330,24 +335,23 @@ module Moxml
330
335
  end
331
336
 
332
337
  def text_content(node)
333
- restore_entity_markers(node.text)
338
+ node.text
334
339
  end
335
340
 
336
341
  def inner_text(node)
337
- text = if node.is_a?(::Oga::XML::Element)
338
- node.inner_text
339
- else
340
- node.text
341
- end
342
- restore_entity_markers(text)
342
+ if node.is_a?(::Oga::XML::Element)
343
+ node.inner_text
344
+ else
345
+ node.text
346
+ end
343
347
  end
344
348
 
345
349
  def set_text_content(node, content)
346
- encoded = encode_entity_markers(content)
350
+ processed = preprocess_entities(content)
347
351
  if node.is_a?(::Oga::XML::Element)
348
- node.inner_text = encoded
352
+ node.inner_text = processed
349
353
  else
350
- node.text = encoded
354
+ node.text = processed
351
355
  end
352
356
  end
353
357
 
@@ -439,24 +443,9 @@ module Moxml
439
443
  end
440
444
 
441
445
  def serialize(node, options = {})
442
- output = serialize_without_entity_processing(node, options)
443
- # Post-process: convert entity markers back to entity references
444
- output.gsub(ENTITY_MARKER_REGEX, '&\1;')
446
+ serialize_without_entity_processing(node, options)
445
447
  end
446
448
 
447
- # Shared entity name pattern (W3C: 2-31 chars, starts with alpha)
448
- ENTITY_PATTERN = "([a-zA-Z][a-zA-Z0-9]{1,30})"
449
-
450
- # Marker character for entity preservation through Oga's parser.
451
- # U+0001 is preserved literally by Oga through parse/serialize cycle.
452
- ENTITY_MARKER = "\x01"
453
-
454
- # Regular expression for entity marker post-processing
455
- ENTITY_MARKER_REGEX = /#{ENTITY_MARKER}#{ENTITY_PATTERN};/
456
-
457
- # Simple entity-only regex with no nested quantifiers
458
- ENTITY_REF_REGEX = /&#{ENTITY_PATTERN};/
459
-
460
449
  def has_declaration?(native_doc, _wrapper)
461
450
  decl = attachments.get(native_doc, :xml_declaration)
462
451
  if decl.nil? && !attachments.key?(native_doc, :xml_declaration)
@@ -469,32 +458,6 @@ module Moxml
469
458
 
470
459
  private
471
460
 
472
- # Convert &entity; back to \x01entity; for Oga text storage.
473
- # Used when setting text content programmatically (not from parsing).
474
- def encode_entity_markers(text)
475
- return text unless text&.include?("&")
476
-
477
- text.gsub(ENTITY_REF_REGEX) do
478
- name = ::Regexp.last_match(1)
479
-
480
- next ::Regexp.last_match(0) if STANDARD_XML_ENTITIES.include?(name)
481
-
482
- codepoint = Moxml::EntityRegistry.default.codepoint_for_name(name)
483
- if codepoint
484
- "#{ENTITY_MARKER}#{name};"
485
- else
486
- ::Regexp.last_match(0)
487
- end
488
- end
489
- end
490
-
491
- # Convert \x01entity; back to &entity; for text accessors.
492
- def restore_entity_markers(text)
493
- return text unless text
494
-
495
- text.gsub(ENTITY_MARKER_REGEX, '&\1;')
496
- end
497
-
498
461
  def serialize_without_entity_processing(node, options = {})
499
462
  # Oga's XmlGenerator doesn't support options directly
500
463
  # We need to handle declaration options ourselves for Document nodes
@@ -572,30 +535,6 @@ module Moxml
572
535
  ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(node).to_xml
573
536
  end
574
537
  end
575
-
576
- # Pre-process XML to convert named entities to marker format.
577
- # Oga drops named entity references like &nbsp; but preserves control chars.
578
- # By converting known named entities to marker form (\x01name;), we can
579
- # reconstruct them during serialization.
580
- #
581
- # @param xml [String, #to_s] The XML string to process
582
- # @return [String] The XML with known named entities converted to marker form
583
- def preprocess_named_entities(xml)
584
- return xml unless xml.is_a?(String)
585
-
586
- xml.gsub(ENTITY_REF_REGEX) do
587
- name = Regexp.last_match(1)
588
-
589
- next Regexp.last_match(0) if STANDARD_XML_ENTITIES.include?(name)
590
-
591
- codepoint = Moxml::EntityRegistry.default.codepoint_for_name(name)
592
- if codepoint
593
- "#{ENTITY_MARKER}#{name};"
594
- else
595
- Regexp.last_match(0)
596
- end
597
- end
598
- end
599
538
  end
600
539
  end
601
540
 
@@ -20,8 +20,9 @@ module Moxml
20
20
  end
21
21
 
22
22
  def parse(xml, options = {}, _context = nil)
23
+ processed_xml = preprocess_entities(xml)
23
24
  native_doc = begin
24
- result = ::Ox.parse(xml)
25
+ result = ::Ox.parse(processed_xml)
25
26
 
26
27
  # result can be either Document or Element
27
28
  if result.is_a?(::Ox::Document)
@@ -543,17 +544,18 @@ module Moxml
543
544
  end
544
545
 
545
546
  def namespace_definitions(node)
546
- ([node] + ancestors(node)).reverse.each_with_object({}) do |n, namespaces|
547
- next unless n.is_a?(::Ox::Element) && n.attributes
547
+ return [] unless node.is_a?(::Ox::Element) && node.attributes
548
548
 
549
- n.attributes.each do |name, value|
550
- next unless name.to_s.start_with?("xmlns")
549
+ namespaces = {}
550
+ node.attributes.each do |name, value|
551
+ name_s = name.to_s
552
+ next unless name_s == "xmlns" || name_s.start_with?("xmlns:")
551
553
 
552
- namespaces[name] = ::Moxml::Adapter::CustomizedOx::Namespace.new(
553
- name, value, n
554
- )
555
- end
556
- end.values
554
+ namespaces[name] = ::Moxml::Adapter::CustomizedOx::Namespace.new(
555
+ name, value, node
556
+ )
557
+ end
558
+ namespaces.values
557
559
  end
558
560
 
559
561
  # Doctype accessor methods
@@ -620,17 +622,44 @@ module Moxml
620
622
  end
621
623
 
622
624
  def serialize(node, options = {})
623
- # Fast path: skip EntityReference scan for documents (most common case)
624
- if node.is_a?(::Ox::Document) &&
625
- !attachments.get(node, :has_entity_refs)
625
+ needs_custom = needs_custom_serialize?(node)
626
+
627
+ unless needs_custom
626
628
  return serialize_standard(node, options)
627
629
  end
628
630
 
629
- if tree_has_entity_references?(node)
630
- serialize_custom(node, options)
631
- else
632
- serialize_standard(node, options)
631
+ serialize_custom(node, options)
632
+ end
633
+
634
+ def needs_custom_serialize?(node)
635
+ # Fast path: single CData with ]]>
636
+ return true if node.is_a?(::Ox::CData) && node.value&.include?("]]>")
637
+
638
+ # Only documents/elements can contain entity refs or CDATA issues
639
+ return false unless node.is_a?(::Ox::Document) || node.is_a?(::Ox::Element)
640
+
641
+ # Check cached flags on documents (most common case)
642
+ if node.is_a?(::Ox::Document)
643
+ return true if attachments.get(node, :has_entity_refs)
644
+ return true if attachments.get(node, :has_cdata_end_markers)
645
+ return false if attachments.key?(node, :has_entity_refs) &&
646
+ attachments.key?(node, :has_cdata_end_markers)
633
647
  end
648
+
649
+ # Only scan tree on first call — short-circuit on first hit
650
+ has_er = tree_has_entity_references?(node)
651
+ if has_er
652
+ attachments.set(node, :has_entity_refs, true) if node.is_a?(::Ox::Document)
653
+ return true
654
+ end
655
+
656
+ has_cdata = tree_has_cdata_end_markers?(node)
657
+ if node.is_a?(::Ox::Document)
658
+ attachments.set(node, :has_entity_refs, false)
659
+ attachments.set(node, :has_cdata_end_markers, has_cdata)
660
+ end
661
+
662
+ has_cdata
634
663
  end
635
664
 
636
665
  def has_declaration?(native_doc, _wrapper)
@@ -665,7 +694,9 @@ module Moxml
665
694
  encoding: options[:encoding],
666
695
  no_empty: options[:expand_empty],
667
696
  }
668
- output + ::Ox.dump(node, ox_options)
697
+ result = output + ::Ox.dump(node, ox_options)
698
+ # Fix CDATA ]]> end markers that Ox doesn't escape
699
+ result
669
700
  end
670
701
 
671
702
  def tree_has_entity_references?(node)
@@ -685,6 +716,19 @@ module Moxml
685
716
  end
686
717
  end
687
718
 
719
+ def tree_has_cdata_end_markers?(node)
720
+ case node
721
+ when ::Ox::CData
722
+ node.value&.include?("]]>") || false
723
+ when ::Ox::Element
724
+ node.nodes&.any? { |child| tree_has_cdata_end_markers?(child) } || false
725
+ when ::Ox::Document
726
+ node.nodes&.any? { |child| tree_has_cdata_end_markers?(child) } || false
727
+ else
728
+ false
729
+ end
730
+ end
731
+
688
732
  def serialize_custom(node, options = {})
689
733
  output = +""
690
734
  if node.is_a?(::Ox::Document)
@@ -717,7 +761,7 @@ module Moxml
717
761
  when String then escape_xml_text(node)
718
762
  when ::Moxml::Adapter::CustomizedOx::Text then escape_xml_text(node.value)
719
763
  when ::Moxml::Adapter::CustomizedOx::EntityReference then "&#{node.name};"
720
- when ::Ox::CData then "<![CDATA[#{node.value}]]>"
764
+ when ::Ox::CData then serialize_cdata(node.value)
721
765
  when ::Ox::Comment then "<!--#{node.value}-->"
722
766
  when ::Ox::Instruct then "<?#{node.target} #{node.value || ''}?>"
723
767
  when ::Ox::DocType then "<!DOCTYPE #{node.value}>"
@@ -744,6 +788,11 @@ module Moxml
744
788
  output
745
789
  end
746
790
 
791
+ def serialize_cdata(content)
792
+ escaped = content.gsub("]]>", "]]]]><![CDATA[>")
793
+ "<![CDATA[#{escaped}]]>"
794
+ end
795
+
747
796
  def escape_xml_text(text)
748
797
  text.to_s.gsub(/[<>&]/) do |match|
749
798
  case match
@@ -765,6 +814,7 @@ module Moxml
765
814
  end
766
815
  end
767
816
 
817
+
768
818
  # Translate a subset of XPath to Ox locate() syntax
769
819
  # Supports: //element, /path/to/element, .//element, element[@attr]
770
820
  # Note: Ox locate() doesn't support namespace prefixes in the path
@@ -15,6 +15,8 @@ module Moxml
15
15
  end
16
16
 
17
17
  def parse(xml, options = {}, _context = nil)
18
+ xml = "" if xml.nil?
19
+
18
20
  # Handle frozen strings by creating a mutable copy
19
21
  processed_xml = if xml.frozen?
20
22
  xml.dup.force_encoding("UTF-8").encode("UTF-8")
@@ -22,6 +24,9 @@ module Moxml
22
24
  xml.force_encoding("UTF-8").encode("UTF-8")
23
25
  end
24
26
 
27
+ # Preprocess entities to avoid double-escaping on output
28
+ processed_xml = preprocess_entities(processed_xml)
29
+
25
30
  native_doc = begin
26
31
  ::REXML::Document.new(processed_xml)
27
32
  rescue ::REXML::ParseException => e
@@ -412,7 +417,7 @@ module Moxml
412
417
  when ::REXML::Element
413
418
  # Extract text recursively from all children to match other adapters
414
419
  extract_text_recursively(node)
415
- end
420
+ end.to_s
416
421
  end
417
422
 
418
423
  def extract_text_recursively(element)
@@ -491,9 +496,25 @@ module Moxml
491
496
  end
492
497
 
493
498
  def namespace_definitions(node)
494
- node.namespaces.map do |prefix, uri|
495
- ::REXML::Attribute.new(prefix.to_s, uri, node)
499
+ return [] unless node.is_a?(::REXML::Element)
500
+
501
+ result = []
502
+ node.attributes.each_attribute do |attr|
503
+ next unless attr.prefix == "xmlns" || (attr.name == "xmlns" && attr.prefix.to_s.empty?)
504
+
505
+ result << attr
506
+ end
507
+ result
508
+ end
509
+
510
+ def in_scope_namespaces(element)
511
+ namespaces = {}
512
+ element.namespaces.each do |prefix, uri|
513
+ key = prefix.to_s.empty? ? "xmlns" : prefix.to_s
514
+ ns = ::REXML::Attribute.new(key, uri, element)
515
+ namespaces[prefix] = ns
496
516
  end
517
+ namespaces.values
497
518
  end
498
519
 
499
520
  # Doctype accessor methods
@@ -17,6 +17,12 @@ module Moxml
17
17
  end
18
18
 
19
19
  def value
20
+ val = @native.value.to_s
21
+ adapter.restore_entities(val)
22
+ end
23
+
24
+ # Returns raw native value without entity marker restoration.
25
+ def raw_value
20
26
  @native.value
21
27
  end
22
28
 
data/lib/moxml/element.rb CHANGED
@@ -46,7 +46,8 @@ module Moxml
46
46
  end
47
47
 
48
48
  def [](name)
49
- adapter.get_attribute_value(@native, name)
49
+ val = adapter.get_attribute_value(@native, name)
50
+ val ? adapter.restore_entities(val) : val
50
51
  end
51
52
 
52
53
  def attribute(name)
@@ -54,12 +55,7 @@ module Moxml
54
55
  native_attr && Attribute.new(native_attr, context)
55
56
  end
56
57
 
57
- # Alias for attribute access
58
- def get(attr_name)
59
- attribute(attr_name)
60
- end
61
-
62
- # Alias for getting attribute value (used by XPath engine)
58
+ # Returns attribute value by name (used by XPath engine)
63
59
  def get(attr_name)
64
60
  self[attr_name]
65
61
  end
@@ -137,7 +133,8 @@ module Moxml
137
133
  end
138
134
 
139
135
  def text
140
- adapter.text_content(@native)
136
+ val = adapter.text_content(@native)
137
+ adapter.restore_entities(val)
141
138
  end
142
139
 
143
140
  def text=(content)
@@ -146,6 +143,13 @@ module Moxml
146
143
  end
147
144
 
148
145
  def inner_text
146
+ text = raw_inner_text
147
+ adapter.restore_entities(text)
148
+ end
149
+
150
+ # Returns inner text without entity marker restoration.
151
+ # Used internally when raw content with markers is needed (e.g., for DOM construction).
152
+ def raw_inner_text
149
153
  adapter.inner_text(@native)
150
154
  end
151
155
 
data/lib/moxml/node.rb CHANGED
@@ -97,7 +97,10 @@ module Moxml
97
97
  serialize_options = default_options.merge(options)
98
98
  serialize_options[:no_declaration] = !should_include_declaration?(options)
99
99
 
100
- adapter.serialize(@native, serialize_options)
100
+ result = adapter.serialize(@native, serialize_options)
101
+
102
+ # Restore entity markers to named entity references
103
+ adapter.restore_entities(result)
101
104
  end
102
105
 
103
106
  def xpath(expression, namespaces = {})
data/lib/moxml/text.rb CHANGED
@@ -3,6 +3,12 @@
3
3
  module Moxml
4
4
  class Text < Node
5
5
  def content
6
+ text = raw_content
7
+ adapter.restore_entities(text)
8
+ end
9
+
10
+ # Returns raw content without entity marker restoration.
11
+ def raw_content
6
12
  adapter.text_content(@native)
7
13
  end
8
14
 
data/lib/moxml/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Moxml
4
- VERSION = "0.1.16"
4
+ VERSION = "0.1.17"
5
5
  end
@@ -388,30 +388,38 @@ module Moxml
388
388
  document_or_node(input).if_true do
389
389
  # Create a proper if-else structure that prevents double traversal
390
390
  input.is_a?(doc_class).if_true do
391
- # DOCUMENT PATH: test root, then traverse from root
392
- root = unique_literal(:root)
393
- root.assign(input.root).followed_by do
394
- root.if_true do
395
- # Test root first
396
- condition = process(ast, root)
397
- (if block_given?
398
- condition.if_true { yield root }
399
- else
400
- condition.if_true { root }
401
- end)
402
- .followed_by do
403
- # Traverse descendants FROM root only (not document.each_node)
404
- root.each_node.add_block(node) do
405
- desc_condition = process(ast, node)
406
- if block_given?
407
- desc_condition.if_true { yield node }
408
- else
409
- desc_condition.if_true { node }
391
+ # DOCUMENT PATH: test document (self), then root, then traverse
392
+ doc_condition = process(ast, input)
393
+ (if block_given?
394
+ doc_condition.if_true { yield input }
395
+ else
396
+ doc_condition.if_true { input }
397
+ end)
398
+ .followed_by do
399
+ root = unique_literal(:root)
400
+ root.assign(input.root).followed_by do
401
+ root.if_true do
402
+ # Test root
403
+ condition = process(ast, root)
404
+ (if block_given?
405
+ condition.if_true { yield root }
406
+ else
407
+ condition.if_true { root }
408
+ end)
409
+ .followed_by do
410
+ # Traverse descendants FROM root only (not document.each_node)
411
+ root.each_node.add_block(node) do
412
+ desc_condition = process(ast, node)
413
+ if block_given?
414
+ desc_condition.if_true { yield node }
415
+ else
416
+ desc_condition.if_true { node }
417
+ end
418
+ end
410
419
  end
411
- end
412
420
  end
421
+ end
413
422
  end
414
- end
415
423
  end.else do
416
424
  # NON-DOCUMENT PATH: test self, then traverse from self
417
425
  condition = process(ast, input)
@@ -497,6 +505,17 @@ module Moxml
497
505
  element_or_attribute(input)
498
506
  end
499
507
 
508
+ # Handle node type test (node(), text(), comment(), etc.)
509
+ # node() matches any node — always returns truthy
510
+ def on_node_type(ast, input)
511
+ case ast.value
512
+ when "node"
513
+ # node() matches everything — use a truthy literal
514
+ Ruby::Node.new(:lit, ["true"])
515
+ else element_or_attribute(input)
516
+ end
517
+ end
518
+
500
519
  # Match element/attribute names and namespaces
501
520
  def match_name_and_namespace(ast, input)
502
521
  ns = ast.value[:namespace]
@@ -311,10 +311,10 @@ module Moxml
311
311
  return AST::Node.absolute_path(*steps.children)
312
312
  elsif match?(:dslash)
313
313
  advance
314
- # Descendant-or-self: //
314
+ # Descendant-or-self: // (expands to /descendant-or-self::node()/)
315
315
  steps = parse_relative_path
316
316
  return AST::Node.absolute_path(
317
- AST::Node.axis("descendant-or-self", AST::Node.wildcard),
317
+ AST::Node.axis("descendant-or-self", AST::Node.node_type("node")),
318
318
  *steps.children,
319
319
  )
320
320
  end
@@ -330,9 +330,9 @@ module Moxml
330
330
  while match?(:slash) && !at_end?
331
331
  advance
332
332
  if match?(:slash)
333
- # Double slash within path
333
+ # Double slash within path: expands to descendant-or-self::node()
334
334
  advance
335
- steps << AST::Node.axis("descendant-or-self", AST::Node.wildcard)
335
+ steps << AST::Node.axis("descendant-or-self", AST::Node.node_type("node"))
336
336
  end
337
337
  steps << parse_step unless at_end? || match?(:pipe, :rbracket,
338
338
  :rparen, :comma)
@@ -352,9 +352,14 @@ module Moxml
352
352
  return AST::Node.parent
353
353
  elsif match?(:at)
354
354
  advance
355
- # Attribute: @name
356
- name = consume(:name, "Expected attribute name after @")
357
- node_test = AST::Node.test(nil, name[1])
355
+ # Attribute: @name or @*
356
+ if match?(:star)
357
+ advance
358
+ node_test = AST::Node.wildcard
359
+ else
360
+ name = consume(:name, "Expected attribute name after @")
361
+ node_test = AST::Node.test(nil, name[1])
362
+ end
358
363
  step = AST::Node.axis("attribute", node_test)
359
364
  return parse_predicates(step)
360
365
  end
@@ -28,6 +28,7 @@ RSpec.describe "Cross-adapter integration" do
28
28
  "XPath Examples",
29
29
  "Memory Usage Examples",
30
30
  "Thread Safety Examples",
31
+ "Entity Reference Whitespace Preservation",
31
32
  "Performance Examples",
32
33
  ]
33
34
 
@@ -32,12 +32,6 @@ RSpec.shared_examples "Moxml Edge Cases" do
32
32
 
33
33
  describe "malformed content handling" do
34
34
  it "handles CDATA with nested markers" do
35
- if context.config.adapter_name == :ox
36
- pending "Ox doesn't escape the end token"
37
- end
38
- if context.config.adapter_name == :headed_ox
39
- skip "HeadedOx limitation: Ox doesn't escape CDATA end markers. See docs/_pages/headed-ox-limitations.adoc"
40
- end
41
35
  cdata_text = "]]>]]>]]>"
42
36
  doc = context.create_document
43
37
  cdata = doc.create_cdata(cdata_text)