moxml 0.1.16 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +6 -0
  3. data/.rubocop_todo.yml +49 -133
  4. data/README.adoc +18 -0
  5. data/lib/moxml/adapter/base.rb +65 -8
  6. data/lib/moxml/adapter/headed_ox.rb +2 -1
  7. data/lib/moxml/adapter/libxml.rb +16 -6
  8. data/lib/moxml/adapter/nokogiri.rb +13 -7
  9. data/lib/moxml/adapter/oga.rb +35 -90
  10. data/lib/moxml/adapter/ox.rb +69 -19
  11. data/lib/moxml/adapter/rexml.rb +26 -9
  12. data/lib/moxml/attribute.rb +6 -0
  13. data/lib/moxml/config.rb +17 -2
  14. data/lib/moxml/element.rb +12 -8
  15. data/lib/moxml/node.rb +4 -1
  16. data/lib/moxml/text.rb +6 -0
  17. data/lib/moxml/version.rb +1 -1
  18. data/lib/moxml/xpath/compiler.rb +40 -21
  19. data/lib/moxml/xpath/parser.rb +12 -7
  20. data/spec/integration/all_adapters_spec.rb +1 -0
  21. data/spec/integration/shared_examples/edge_cases.rb +85 -6
  22. data/spec/integration/shared_examples/entity_reference_whitespace.rb +124 -0
  23. data/spec/integration/shared_examples/high_level/document_builder_behavior.rb +8 -6
  24. data/spec/integration/shared_examples/integration_workflows.rb +1 -1
  25. data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +0 -7
  26. data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +135 -0
  27. data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +0 -3
  28. data/spec/integration/shared_examples/node_wrappers/node_set_behavior.rb +3 -1
  29. data/spec/moxml/adapter/entity_restoration_spec.rb +97 -0
  30. data/spec/moxml/builder_spec.rb +16 -1
  31. data/spec/moxml/entity_preservation_spec.rb +130 -0
  32. data/spec/moxml/entity_reference_spec.rb +114 -0
  33. data/spec/moxml/entity_registry_spec.rb +68 -0
  34. data/spec/moxml/moxml_spec.rb +39 -0
  35. data/spec/moxml/xpath/axes_spec.rb +0 -1
  36. data/spec/moxml/xpath/compiler_spec.rb +0 -2
  37. data/spec/performance/benchmark_spec.rb +1 -1
  38. metadata +6 -12
  39. data/TODO.remaining/1-entity-reference-adapter-support.md +0 -157
  40. data/TODO.remaining/2-entity-restoration-model-driven.md +0 -169
  41. data/TODO.remaining/3-entity-reference-test-coverage.md +0 -170
  42. data/TODO.remaining/4-lenient-entities-mode.md +0 -106
  43. data/TODO.remaining/5-fixture-integrity.md +0 -65
  44. data/TODO.remaining/6-ox-element-ordering-bug.md +0 -36
  45. data/TODO.remaining/7-headed-ox-limitations.md +0 -95
  46. data/TODO.remaining/8-xpath-predicate-gaps.md +0 -68
  47. data/TODO.remaining/9-cleanup-hygiene.md +0 -42
  48. data/TODO.remaining/README.md +0 -54
@@ -8,9 +8,6 @@ module Moxml
8
8
  module Adapter
9
9
  class Oga < Base
10
10
  class << self
11
- # Standard XML entities handled natively by parsers
12
- STANDARD_XML_ENTITIES = %w[amp lt gt quot apos].freeze
13
-
14
11
  def attachments
15
12
  @attachments ||= Moxml::NativeAttachment.new
16
13
  end
@@ -24,9 +21,7 @@ module Moxml
24
21
  end
25
22
 
26
23
  def parse(xml, options = {}, _context = nil)
27
- # Pre-process XML to convert named entities to marker form (\x01name;).
28
- # Oga drops named entity references like &nbsp; during parsing.
29
- processed_xml = preprocess_named_entities(xml)
24
+ processed_xml = preprocess_entities(xml)
30
25
 
31
26
  native_doc = begin
32
27
  ::Oga.parse_xml(processed_xml, strict: options[:strict])
@@ -72,12 +67,12 @@ module Moxml
72
67
  end
73
68
 
74
69
  def create_native_text(content, _owner_doc = nil)
75
- ::Oga::XML::Text.new(text: encode_entity_markers(content))
70
+ ::Oga::XML::Text.new(text: preprocess_entities(content))
76
71
  end
77
72
 
78
73
  def create_native_entity_reference(name)
79
74
  text = ::Oga::XML::Text.new
80
- text.text = "#{ENTITY_MARKER}#{name};"
75
+ text.text = "#{self::ENTITY_MARKER}#{name};"
81
76
  attachments.set(text, :entity_name, name)
82
77
  text
83
78
  end
@@ -198,11 +193,27 @@ module Moxml
198
193
 
199
194
  return all_children unless node.is_a?(::Oga::XML::Node) || node.is_a?(::Oga::XML::Document)
200
195
 
201
- all_children + node.children.reject do |child|
202
- child.is_a?(::Oga::XML::Text) &&
203
- child.text.strip.empty? &&
204
- !(child.previous.nil? && child.next.nil?)
196
+ child_nodes = node.children.to_a
197
+ # Filter out whitespace-only text nodes at document level only.
198
+ # Document-level whitespace (between <?xml?> and <root>) is
199
+ # formatting, not content, and differs across adapters.
200
+ # Whitespace inside elements (e.g. "FigureA.1" spacing) is
201
+ # meaningful and must be preserved.
202
+ if node.is_a?(::Oga::XML::Document)
203
+ child_nodes = child_nodes.reject do |child|
204
+ child.is_a?(::Oga::XML::Text) && child.text.strip.empty?
205
+ end
205
206
  end
207
+ all_children + child_nodes
208
+ end
209
+
210
+ def adjacent_to_entity_reference?(node)
211
+ entity_ref?(node.previous) || entity_ref?(node.next)
212
+ end
213
+
214
+ def entity_ref?(node)
215
+ node.is_a?(::Oga::XML::Text) &&
216
+ attachments.get(node, :entity_name)
206
217
  end
207
218
 
208
219
  def parent(node)
@@ -251,7 +262,7 @@ module Moxml
251
262
  attr = ::Oga::XML::Attribute.new(
252
263
  name: name.to_s,
253
264
  namespace_name: namespace_name,
254
- value: encode_entity_markers(value.to_s),
265
+ value: preprocess_entities(value.to_s),
255
266
  )
256
267
  element.add_attribute(attr)
257
268
  end
@@ -261,7 +272,7 @@ module Moxml
261
272
  end
262
273
 
263
274
  def get_attribute_value(element, name)
264
- restore_entity_markers(element[name.to_s])
275
+ element[name.to_s]
265
276
  end
266
277
 
267
278
  def remove_attribute(element, name)
@@ -330,24 +341,23 @@ module Moxml
330
341
  end
331
342
 
332
343
  def text_content(node)
333
- restore_entity_markers(node.text)
344
+ node.text
334
345
  end
335
346
 
336
347
  def inner_text(node)
337
- text = if node.is_a?(::Oga::XML::Element)
338
- node.inner_text
339
- else
340
- node.text
341
- end
342
- restore_entity_markers(text)
348
+ if node.is_a?(::Oga::XML::Element)
349
+ node.inner_text
350
+ else
351
+ node.text
352
+ end
343
353
  end
344
354
 
345
355
  def set_text_content(node, content)
346
- encoded = encode_entity_markers(content)
356
+ processed = preprocess_entities(content)
347
357
  if node.is_a?(::Oga::XML::Element)
348
- node.inner_text = encoded
358
+ node.inner_text = processed
349
359
  else
350
- node.text = encoded
360
+ node.text = processed
351
361
  end
352
362
  end
353
363
 
@@ -439,24 +449,9 @@ module Moxml
439
449
  end
440
450
 
441
451
  def serialize(node, options = {})
442
- output = serialize_without_entity_processing(node, options)
443
- # Post-process: convert entity markers back to entity references
444
- output.gsub(ENTITY_MARKER_REGEX, '&\1;')
452
+ serialize_without_entity_processing(node, options)
445
453
  end
446
454
 
447
- # Shared entity name pattern (W3C: 2-31 chars, starts with alpha)
448
- ENTITY_PATTERN = "([a-zA-Z][a-zA-Z0-9]{1,30})"
449
-
450
- # Marker character for entity preservation through Oga's parser.
451
- # U+0001 is preserved literally by Oga through parse/serialize cycle.
452
- ENTITY_MARKER = "\x01"
453
-
454
- # Regular expression for entity marker post-processing
455
- ENTITY_MARKER_REGEX = /#{ENTITY_MARKER}#{ENTITY_PATTERN};/
456
-
457
- # Simple entity-only regex with no nested quantifiers
458
- ENTITY_REF_REGEX = /&#{ENTITY_PATTERN};/
459
-
460
455
  def has_declaration?(native_doc, _wrapper)
461
456
  decl = attachments.get(native_doc, :xml_declaration)
462
457
  if decl.nil? && !attachments.key?(native_doc, :xml_declaration)
@@ -469,32 +464,6 @@ module Moxml
469
464
 
470
465
  private
471
466
 
472
- # Convert &entity; back to \x01entity; for Oga text storage.
473
- # Used when setting text content programmatically (not from parsing).
474
- def encode_entity_markers(text)
475
- return text unless text&.include?("&")
476
-
477
- text.gsub(ENTITY_REF_REGEX) do
478
- name = ::Regexp.last_match(1)
479
-
480
- next ::Regexp.last_match(0) if STANDARD_XML_ENTITIES.include?(name)
481
-
482
- codepoint = Moxml::EntityRegistry.default.codepoint_for_name(name)
483
- if codepoint
484
- "#{ENTITY_MARKER}#{name};"
485
- else
486
- ::Regexp.last_match(0)
487
- end
488
- end
489
- end
490
-
491
- # Convert \x01entity; back to &entity; for text accessors.
492
- def restore_entity_markers(text)
493
- return text unless text
494
-
495
- text.gsub(ENTITY_MARKER_REGEX, '&\1;')
496
- end
497
-
498
467
  def serialize_without_entity_processing(node, options = {})
499
468
  # Oga's XmlGenerator doesn't support options directly
500
469
  # We need to handle declaration options ourselves for Document nodes
@@ -572,30 +541,6 @@ module Moxml
572
541
  ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(node).to_xml
573
542
  end
574
543
  end
575
-
576
- # Pre-process XML to convert named entities to marker format.
577
- # Oga drops named entity references like &nbsp; but preserves control chars.
578
- # By converting known named entities to marker form (\x01name;), we can
579
- # reconstruct them during serialization.
580
- #
581
- # @param xml [String, #to_s] The XML string to process
582
- # @return [String] The XML with known named entities converted to marker form
583
- def preprocess_named_entities(xml)
584
- return xml unless xml.is_a?(String)
585
-
586
- xml.gsub(ENTITY_REF_REGEX) do
587
- name = Regexp.last_match(1)
588
-
589
- next Regexp.last_match(0) if STANDARD_XML_ENTITIES.include?(name)
590
-
591
- codepoint = Moxml::EntityRegistry.default.codepoint_for_name(name)
592
- if codepoint
593
- "#{ENTITY_MARKER}#{name};"
594
- else
595
- Regexp.last_match(0)
596
- end
597
- end
598
- end
599
544
  end
600
545
  end
601
546
 
@@ -20,8 +20,9 @@ module Moxml
20
20
  end
21
21
 
22
22
  def parse(xml, options = {}, _context = nil)
23
+ processed_xml = preprocess_entities(xml)
23
24
  native_doc = begin
24
- result = ::Ox.parse(xml)
25
+ result = ::Ox.parse(processed_xml)
25
26
 
26
27
  # result can be either Document or Element
27
28
  if result.is_a?(::Ox::Document)
@@ -543,17 +544,18 @@ module Moxml
543
544
  end
544
545
 
545
546
  def namespace_definitions(node)
546
- ([node] + ancestors(node)).reverse.each_with_object({}) do |n, namespaces|
547
- next unless n.is_a?(::Ox::Element) && n.attributes
547
+ return [] unless node.is_a?(::Ox::Element) && node.attributes
548
548
 
549
- n.attributes.each do |name, value|
550
- next unless name.to_s.start_with?("xmlns")
549
+ namespaces = {}
550
+ node.attributes.each do |name, value|
551
+ name_s = name.to_s
552
+ next unless name_s == "xmlns" || name_s.start_with?("xmlns:")
551
553
 
552
- namespaces[name] = ::Moxml::Adapter::CustomizedOx::Namespace.new(
553
- name, value, n
554
- )
555
- end
556
- end.values
554
+ namespaces[name] = ::Moxml::Adapter::CustomizedOx::Namespace.new(
555
+ name, value, node
556
+ )
557
+ end
558
+ namespaces.values
557
559
  end
558
560
 
559
561
  # Doctype accessor methods
@@ -620,17 +622,44 @@ module Moxml
620
622
  end
621
623
 
622
624
  def serialize(node, options = {})
623
- # Fast path: skip EntityReference scan for documents (most common case)
624
- if node.is_a?(::Ox::Document) &&
625
- !attachments.get(node, :has_entity_refs)
625
+ needs_custom = needs_custom_serialize?(node)
626
+
627
+ unless needs_custom
626
628
  return serialize_standard(node, options)
627
629
  end
628
630
 
629
- if tree_has_entity_references?(node)
630
- serialize_custom(node, options)
631
- else
632
- serialize_standard(node, options)
631
+ serialize_custom(node, options)
632
+ end
633
+
634
+ def needs_custom_serialize?(node)
635
+ # Fast path: single CData with ]]>
636
+ return true if node.is_a?(::Ox::CData) && node.value&.include?("]]>")
637
+
638
+ # Only documents/elements can contain entity refs or CDATA issues
639
+ return false unless node.is_a?(::Ox::Document) || node.is_a?(::Ox::Element)
640
+
641
+ # Check cached flags on documents (most common case)
642
+ if node.is_a?(::Ox::Document)
643
+ return true if attachments.get(node, :has_entity_refs)
644
+ return true if attachments.get(node, :has_cdata_end_markers)
645
+ return false if attachments.key?(node, :has_entity_refs) &&
646
+ attachments.key?(node, :has_cdata_end_markers)
633
647
  end
648
+
649
+ # Only scan tree on first call — short-circuit on first hit
650
+ has_er = tree_has_entity_references?(node)
651
+ if has_er
652
+ attachments.set(node, :has_entity_refs, true) if node.is_a?(::Ox::Document)
653
+ return true
654
+ end
655
+
656
+ has_cdata = tree_has_cdata_end_markers?(node)
657
+ if node.is_a?(::Ox::Document)
658
+ attachments.set(node, :has_entity_refs, false)
659
+ attachments.set(node, :has_cdata_end_markers, has_cdata)
660
+ end
661
+
662
+ has_cdata
634
663
  end
635
664
 
636
665
  def has_declaration?(native_doc, _wrapper)
@@ -665,7 +694,9 @@ module Moxml
665
694
  encoding: options[:encoding],
666
695
  no_empty: options[:expand_empty],
667
696
  }
668
- output + ::Ox.dump(node, ox_options)
697
+ result = output + ::Ox.dump(node, ox_options)
698
+ # Fix CDATA ]]> end markers that Ox doesn't escape
699
+ result
669
700
  end
670
701
 
671
702
  def tree_has_entity_references?(node)
@@ -685,6 +716,19 @@ module Moxml
685
716
  end
686
717
  end
687
718
 
719
+ def tree_has_cdata_end_markers?(node)
720
+ case node
721
+ when ::Ox::CData
722
+ node.value&.include?("]]>") || false
723
+ when ::Ox::Element
724
+ node.nodes&.any? { |child| tree_has_cdata_end_markers?(child) } || false
725
+ when ::Ox::Document
726
+ node.nodes&.any? { |child| tree_has_cdata_end_markers?(child) } || false
727
+ else
728
+ false
729
+ end
730
+ end
731
+
688
732
  def serialize_custom(node, options = {})
689
733
  output = +""
690
734
  if node.is_a?(::Ox::Document)
@@ -717,7 +761,7 @@ module Moxml
717
761
  when String then escape_xml_text(node)
718
762
  when ::Moxml::Adapter::CustomizedOx::Text then escape_xml_text(node.value)
719
763
  when ::Moxml::Adapter::CustomizedOx::EntityReference then "&#{node.name};"
720
- when ::Ox::CData then "<![CDATA[#{node.value}]]>"
764
+ when ::Ox::CData then serialize_cdata(node.value)
721
765
  when ::Ox::Comment then "<!--#{node.value}-->"
722
766
  when ::Ox::Instruct then "<?#{node.target} #{node.value || ''}?>"
723
767
  when ::Ox::DocType then "<!DOCTYPE #{node.value}>"
@@ -744,6 +788,11 @@ module Moxml
744
788
  output
745
789
  end
746
790
 
791
+ def serialize_cdata(content)
792
+ escaped = content.gsub("]]>", "]]]]><![CDATA[>")
793
+ "<![CDATA[#{escaped}]]>"
794
+ end
795
+
747
796
  def escape_xml_text(text)
748
797
  text.to_s.gsub(/[<>&]/) do |match|
749
798
  case match
@@ -765,6 +814,7 @@ module Moxml
765
814
  end
766
815
  end
767
816
 
817
+
768
818
  # Translate a subset of XPath to Ox locate() syntax
769
819
  # Supports: //element, /path/to/element, .//element, element[@attr]
770
820
  # Note: Ox locate() doesn't support namespace prefixes in the path
@@ -15,6 +15,8 @@ module Moxml
15
15
  end
16
16
 
17
17
  def parse(xml, options = {}, _context = nil)
18
+ xml = "" if xml.nil?
19
+
18
20
  # Handle frozen strings by creating a mutable copy
19
21
  processed_xml = if xml.frozen?
20
22
  xml.dup.force_encoding("UTF-8").encode("UTF-8")
@@ -22,6 +24,9 @@ module Moxml
22
24
  xml.force_encoding("UTF-8").encode("UTF-8")
23
25
  end
24
26
 
27
+ # Preprocess entities to avoid double-escaping on output
28
+ processed_xml = preprocess_entities(processed_xml)
29
+
25
30
  native_doc = begin
26
31
  ::REXML::Document.new(processed_xml)
27
32
  rescue ::REXML::ParseException => e
@@ -172,12 +177,8 @@ module Moxml
172
177
  def children(node)
173
178
  return [] unless node.is_a?(::REXML::Parent)
174
179
 
175
- # Get all children and filter out empty text nodes between elements
176
- result = node.children.reject do |child|
177
- child.is_a?(::REXML::Text) &&
178
- child.to_s.strip.empty? &&
179
- !(child.next_sibling.nil? && child.previous_sibling.nil?)
180
- end
180
+ # Return all children preserving whitespace text nodes
181
+ result = node.children.dup
181
182
 
182
183
  # Include any EntityReference wrappers stored alongside native children
183
184
  entity_refs = attachments.get(node, :entity_refs)
@@ -412,7 +413,7 @@ module Moxml
412
413
  when ::REXML::Element
413
414
  # Extract text recursively from all children to match other adapters
414
415
  extract_text_recursively(node)
415
- end
416
+ end.to_s
416
417
  end
417
418
 
418
419
  def extract_text_recursively(element)
@@ -491,9 +492,25 @@ module Moxml
491
492
  end
492
493
 
493
494
  def namespace_definitions(node)
494
- node.namespaces.map do |prefix, uri|
495
- ::REXML::Attribute.new(prefix.to_s, uri, node)
495
+ return [] unless node.is_a?(::REXML::Element)
496
+
497
+ result = []
498
+ node.attributes.each_attribute do |attr|
499
+ next unless attr.prefix == "xmlns" || (attr.name == "xmlns" && attr.prefix.to_s.empty?)
500
+
501
+ result << attr
502
+ end
503
+ result
504
+ end
505
+
506
+ def in_scope_namespaces(element)
507
+ namespaces = {}
508
+ element.namespaces.each do |prefix, uri|
509
+ key = prefix.to_s.empty? ? "xmlns" : prefix.to_s
510
+ ns = ::REXML::Attribute.new(key, uri, element)
511
+ namespaces[prefix] = ns
496
512
  end
513
+ namespaces.values
497
514
  end
498
515
 
499
516
  # Doctype accessor methods
@@ -17,6 +17,12 @@ module Moxml
17
17
  end
18
18
 
19
19
  def value
20
+ val = @native.value.to_s
21
+ adapter.restore_entities(val)
22
+ end
23
+
24
+ # Returns raw native value without entity marker restoration.
25
+ def raw_value
20
26
  @native.value
21
27
  end
22
28
 
data/lib/moxml/config.rb CHANGED
@@ -3,7 +3,8 @@
3
3
  module Moxml
4
4
  class Config
5
5
  VALID_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
6
- DEFAULT_ADAPTER = VALID_ADAPTERS.first
6
+ DEFAULT_ADAPTER = :nokogiri
7
+ OPAL_DEFAULT_ADAPTER = :oga
7
8
 
8
9
  # Entity loading modes:
9
10
  # - :required - Must load entities, raise error if unavailable (default)
@@ -20,7 +21,21 @@ module Moxml
20
21
  end
21
22
 
22
23
  def default_adapter
23
- @default_adapter ||= DEFAULT_ADAPTER
24
+ @default_adapter ||= runtime_default_adapter
25
+ end
26
+
27
+ def runtime_default_adapter
28
+ return OPAL_DEFAULT_ADAPTER if RUBY_ENGINE == "opal"
29
+
30
+ detect_loaded_adapter || DEFAULT_ADAPTER
31
+ end
32
+
33
+ def detect_loaded_adapter
34
+ return :nokogiri if Object.const_defined?(:Nokogiri)
35
+ return :ox if Object.const_defined?(:Ox)
36
+ return :oga if Object.const_defined?(:Oga)
37
+
38
+ nil
24
39
  end
25
40
  end
26
41
 
data/lib/moxml/element.rb CHANGED
@@ -46,7 +46,8 @@ module Moxml
46
46
  end
47
47
 
48
48
  def [](name)
49
- adapter.get_attribute_value(@native, name)
49
+ val = adapter.get_attribute_value(@native, name)
50
+ val ? adapter.restore_entities(val) : val
50
51
  end
51
52
 
52
53
  def attribute(name)
@@ -54,12 +55,7 @@ module Moxml
54
55
  native_attr && Attribute.new(native_attr, context)
55
56
  end
56
57
 
57
- # Alias for attribute access
58
- def get(attr_name)
59
- attribute(attr_name)
60
- end
61
-
62
- # Alias for getting attribute value (used by XPath engine)
58
+ # Returns attribute value by name (used by XPath engine)
63
59
  def get(attr_name)
64
60
  self[attr_name]
65
61
  end
@@ -137,7 +133,8 @@ module Moxml
137
133
  end
138
134
 
139
135
  def text
140
- adapter.text_content(@native)
136
+ val = adapter.text_content(@native)
137
+ adapter.restore_entities(val)
141
138
  end
142
139
 
143
140
  def text=(content)
@@ -146,6 +143,13 @@ module Moxml
146
143
  end
147
144
 
148
145
  def inner_text
146
+ text = raw_inner_text
147
+ adapter.restore_entities(text)
148
+ end
149
+
150
+ # Returns inner text without entity marker restoration.
151
+ # Used internally when raw content with markers is needed (e.g., for DOM construction).
152
+ def raw_inner_text
149
153
  adapter.inner_text(@native)
150
154
  end
151
155
 
data/lib/moxml/node.rb CHANGED
@@ -97,7 +97,10 @@ module Moxml
97
97
  serialize_options = default_options.merge(options)
98
98
  serialize_options[:no_declaration] = !should_include_declaration?(options)
99
99
 
100
- adapter.serialize(@native, serialize_options)
100
+ result = adapter.serialize(@native, serialize_options)
101
+
102
+ # Restore entity markers to named entity references
103
+ adapter.restore_entities(result)
101
104
  end
102
105
 
103
106
  def xpath(expression, namespaces = {})
data/lib/moxml/text.rb CHANGED
@@ -3,6 +3,12 @@
3
3
  module Moxml
4
4
  class Text < Node
5
5
  def content
6
+ text = raw_content
7
+ adapter.restore_entities(text)
8
+ end
9
+
10
+ # Returns raw content without entity marker restoration.
11
+ def raw_content
6
12
  adapter.text_content(@native)
7
13
  end
8
14
 
data/lib/moxml/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Moxml
4
- VERSION = "0.1.16"
4
+ VERSION = "0.1.18"
5
5
  end
@@ -388,30 +388,38 @@ module Moxml
388
388
  document_or_node(input).if_true do
389
389
  # Create a proper if-else structure that prevents double traversal
390
390
  input.is_a?(doc_class).if_true do
391
- # DOCUMENT PATH: test root, then traverse from root
392
- root = unique_literal(:root)
393
- root.assign(input.root).followed_by do
394
- root.if_true do
395
- # Test root first
396
- condition = process(ast, root)
397
- (if block_given?
398
- condition.if_true { yield root }
399
- else
400
- condition.if_true { root }
401
- end)
402
- .followed_by do
403
- # Traverse descendants FROM root only (not document.each_node)
404
- root.each_node.add_block(node) do
405
- desc_condition = process(ast, node)
406
- if block_given?
407
- desc_condition.if_true { yield node }
408
- else
409
- desc_condition.if_true { node }
391
+ # DOCUMENT PATH: test document (self), then root, then traverse
392
+ doc_condition = process(ast, input)
393
+ (if block_given?
394
+ doc_condition.if_true { yield input }
395
+ else
396
+ doc_condition.if_true { input }
397
+ end)
398
+ .followed_by do
399
+ root = unique_literal(:root)
400
+ root.assign(input.root).followed_by do
401
+ root.if_true do
402
+ # Test root
403
+ condition = process(ast, root)
404
+ (if block_given?
405
+ condition.if_true { yield root }
406
+ else
407
+ condition.if_true { root }
408
+ end)
409
+ .followed_by do
410
+ # Traverse descendants FROM root only (not document.each_node)
411
+ root.each_node.add_block(node) do
412
+ desc_condition = process(ast, node)
413
+ if block_given?
414
+ desc_condition.if_true { yield node }
415
+ else
416
+ desc_condition.if_true { node }
417
+ end
418
+ end
410
419
  end
411
- end
412
420
  end
421
+ end
413
422
  end
414
- end
415
423
  end.else do
416
424
  # NON-DOCUMENT PATH: test self, then traverse from self
417
425
  condition = process(ast, input)
@@ -497,6 +505,17 @@ module Moxml
497
505
  element_or_attribute(input)
498
506
  end
499
507
 
508
+ # Handle node type test (node(), text(), comment(), etc.)
509
+ # node() matches any node — always returns truthy
510
+ def on_node_type(ast, input)
511
+ case ast.value
512
+ when "node"
513
+ # node() matches everything — use a truthy literal
514
+ Ruby::Node.new(:lit, ["true"])
515
+ else element_or_attribute(input)
516
+ end
517
+ end
518
+
500
519
  # Match element/attribute names and namespaces
501
520
  def match_name_and_namespace(ast, input)
502
521
  ns = ast.value[:namespace]
@@ -311,10 +311,10 @@ module Moxml
311
311
  return AST::Node.absolute_path(*steps.children)
312
312
  elsif match?(:dslash)
313
313
  advance
314
- # Descendant-or-self: //
314
+ # Descendant-or-self: // (expands to /descendant-or-self::node()/)
315
315
  steps = parse_relative_path
316
316
  return AST::Node.absolute_path(
317
- AST::Node.axis("descendant-or-self", AST::Node.wildcard),
317
+ AST::Node.axis("descendant-or-self", AST::Node.node_type("node")),
318
318
  *steps.children,
319
319
  )
320
320
  end
@@ -330,9 +330,9 @@ module Moxml
330
330
  while match?(:slash) && !at_end?
331
331
  advance
332
332
  if match?(:slash)
333
- # Double slash within path
333
+ # Double slash within path: expands to descendant-or-self::node()
334
334
  advance
335
- steps << AST::Node.axis("descendant-or-self", AST::Node.wildcard)
335
+ steps << AST::Node.axis("descendant-or-self", AST::Node.node_type("node"))
336
336
  end
337
337
  steps << parse_step unless at_end? || match?(:pipe, :rbracket,
338
338
  :rparen, :comma)
@@ -352,9 +352,14 @@ module Moxml
352
352
  return AST::Node.parent
353
353
  elsif match?(:at)
354
354
  advance
355
- # Attribute: @name
356
- name = consume(:name, "Expected attribute name after @")
357
- node_test = AST::Node.test(nil, name[1])
355
+ # Attribute: @name or @*
356
+ if match?(:star)
357
+ advance
358
+ node_test = AST::Node.wildcard
359
+ else
360
+ name = consume(:name, "Expected attribute name after @")
361
+ node_test = AST::Node.test(nil, name[1])
362
+ end
358
363
  step = AST::Node.axis("attribute", node_test)
359
364
  return parse_predicates(step)
360
365
  end