moxml 0.1.16 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/.rubocop_todo.yml +49 -133
- data/README.adoc +18 -0
- data/lib/moxml/adapter/base.rb +65 -8
- data/lib/moxml/adapter/headed_ox.rb +2 -1
- data/lib/moxml/adapter/libxml.rb +16 -3
- data/lib/moxml/adapter/nokogiri.rb +14 -4
- data/lib/moxml/adapter/oga.rb +26 -87
- data/lib/moxml/adapter/ox.rb +69 -19
- data/lib/moxml/adapter/rexml.rb +24 -3
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/element.rb +12 -8
- data/lib/moxml/node.rb +4 -1
- data/lib/moxml/text.rb +6 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +40 -21
- data/lib/moxml/xpath/parser.rb +12 -7
- data/spec/integration/all_adapters_spec.rb +1 -0
- data/spec/integration/shared_examples/edge_cases.rb +0 -6
- data/spec/integration/shared_examples/entity_reference_whitespace.rb +122 -0
- data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +0 -7
- data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +135 -0
- data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +0 -3
- data/spec/moxml/adapter/entity_restoration_spec.rb +97 -0
- data/spec/moxml/builder_spec.rb +16 -1
- data/spec/moxml/entity_preservation_spec.rb +130 -0
- data/spec/moxml/entity_reference_spec.rb +114 -0
- data/spec/moxml/entity_registry_spec.rb +68 -0
- data/spec/moxml/xpath/axes_spec.rb +0 -1
- data/spec/moxml/xpath/compiler_spec.rb +0 -2
- metadata +6 -12
- data/TODO.remaining/1-entity-reference-adapter-support.md +0 -157
- data/TODO.remaining/2-entity-restoration-model-driven.md +0 -169
- data/TODO.remaining/3-entity-reference-test-coverage.md +0 -170
- data/TODO.remaining/4-lenient-entities-mode.md +0 -106
- data/TODO.remaining/5-fixture-integrity.md +0 -65
- data/TODO.remaining/6-ox-element-ordering-bug.md +0 -36
- data/TODO.remaining/7-headed-ox-limitations.md +0 -95
- data/TODO.remaining/8-xpath-predicate-gaps.md +0 -68
- data/TODO.remaining/9-cleanup-hygiene.md +0 -42
- data/TODO.remaining/README.md +0 -54
data/lib/moxml/adapter/oga.rb
CHANGED
|
@@ -8,9 +8,6 @@ module Moxml
|
|
|
8
8
|
module Adapter
|
|
9
9
|
class Oga < Base
|
|
10
10
|
class << self
|
|
11
|
-
# Standard XML entities handled natively by parsers
|
|
12
|
-
STANDARD_XML_ENTITIES = %w[amp lt gt quot apos].freeze
|
|
13
|
-
|
|
14
11
|
def attachments
|
|
15
12
|
@attachments ||= Moxml::NativeAttachment.new
|
|
16
13
|
end
|
|
@@ -24,9 +21,7 @@ module Moxml
|
|
|
24
21
|
end
|
|
25
22
|
|
|
26
23
|
def parse(xml, options = {}, _context = nil)
|
|
27
|
-
|
|
28
|
-
# Oga drops named entity references like during parsing.
|
|
29
|
-
processed_xml = preprocess_named_entities(xml)
|
|
24
|
+
processed_xml = preprocess_entities(xml)
|
|
30
25
|
|
|
31
26
|
native_doc = begin
|
|
32
27
|
::Oga.parse_xml(processed_xml, strict: options[:strict])
|
|
@@ -72,12 +67,12 @@ module Moxml
|
|
|
72
67
|
end
|
|
73
68
|
|
|
74
69
|
def create_native_text(content, _owner_doc = nil)
|
|
75
|
-
::Oga::XML::Text.new(text:
|
|
70
|
+
::Oga::XML::Text.new(text: preprocess_entities(content))
|
|
76
71
|
end
|
|
77
72
|
|
|
78
73
|
def create_native_entity_reference(name)
|
|
79
74
|
text = ::Oga::XML::Text.new
|
|
80
|
-
text.text = "#{ENTITY_MARKER}#{name};"
|
|
75
|
+
text.text = "#{self::ENTITY_MARKER}#{name};"
|
|
81
76
|
attachments.set(text, :entity_name, name)
|
|
82
77
|
text
|
|
83
78
|
end
|
|
@@ -201,10 +196,20 @@ module Moxml
|
|
|
201
196
|
all_children + node.children.reject do |child|
|
|
202
197
|
child.is_a?(::Oga::XML::Text) &&
|
|
203
198
|
child.text.strip.empty? &&
|
|
204
|
-
!(child.previous.nil? && child.next.nil?)
|
|
199
|
+
!(child.previous.nil? && child.next.nil?) &&
|
|
200
|
+
!adjacent_to_entity_reference?(child)
|
|
205
201
|
end
|
|
206
202
|
end
|
|
207
203
|
|
|
204
|
+
def adjacent_to_entity_reference?(node)
|
|
205
|
+
entity_ref?(node.previous) || entity_ref?(node.next)
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def entity_ref?(node)
|
|
209
|
+
node.is_a?(::Oga::XML::Text) &&
|
|
210
|
+
attachments.get(node, :entity_name)
|
|
211
|
+
end
|
|
212
|
+
|
|
208
213
|
def parent(node)
|
|
209
214
|
node.parent if node.is_a?(::Oga::XML::Node)
|
|
210
215
|
end
|
|
@@ -251,7 +256,7 @@ module Moxml
|
|
|
251
256
|
attr = ::Oga::XML::Attribute.new(
|
|
252
257
|
name: name.to_s,
|
|
253
258
|
namespace_name: namespace_name,
|
|
254
|
-
value:
|
|
259
|
+
value: preprocess_entities(value.to_s),
|
|
255
260
|
)
|
|
256
261
|
element.add_attribute(attr)
|
|
257
262
|
end
|
|
@@ -261,7 +266,7 @@ module Moxml
|
|
|
261
266
|
end
|
|
262
267
|
|
|
263
268
|
def get_attribute_value(element, name)
|
|
264
|
-
|
|
269
|
+
element[name.to_s]
|
|
265
270
|
end
|
|
266
271
|
|
|
267
272
|
def remove_attribute(element, name)
|
|
@@ -330,24 +335,23 @@ module Moxml
|
|
|
330
335
|
end
|
|
331
336
|
|
|
332
337
|
def text_content(node)
|
|
333
|
-
|
|
338
|
+
node.text
|
|
334
339
|
end
|
|
335
340
|
|
|
336
341
|
def inner_text(node)
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
restore_entity_markers(text)
|
|
342
|
+
if node.is_a?(::Oga::XML::Element)
|
|
343
|
+
node.inner_text
|
|
344
|
+
else
|
|
345
|
+
node.text
|
|
346
|
+
end
|
|
343
347
|
end
|
|
344
348
|
|
|
345
349
|
def set_text_content(node, content)
|
|
346
|
-
|
|
350
|
+
processed = preprocess_entities(content)
|
|
347
351
|
if node.is_a?(::Oga::XML::Element)
|
|
348
|
-
node.inner_text =
|
|
352
|
+
node.inner_text = processed
|
|
349
353
|
else
|
|
350
|
-
node.text =
|
|
354
|
+
node.text = processed
|
|
351
355
|
end
|
|
352
356
|
end
|
|
353
357
|
|
|
@@ -439,24 +443,9 @@ module Moxml
|
|
|
439
443
|
end
|
|
440
444
|
|
|
441
445
|
def serialize(node, options = {})
|
|
442
|
-
|
|
443
|
-
# Post-process: convert entity markers back to entity references
|
|
444
|
-
output.gsub(ENTITY_MARKER_REGEX, '&\1;')
|
|
446
|
+
serialize_without_entity_processing(node, options)
|
|
445
447
|
end
|
|
446
448
|
|
|
447
|
-
# Shared entity name pattern (W3C: 2-31 chars, starts with alpha)
|
|
448
|
-
ENTITY_PATTERN = "([a-zA-Z][a-zA-Z0-9]{1,30})"
|
|
449
|
-
|
|
450
|
-
# Marker character for entity preservation through Oga's parser.
|
|
451
|
-
# U+0001 is preserved literally by Oga through parse/serialize cycle.
|
|
452
|
-
ENTITY_MARKER = "\x01"
|
|
453
|
-
|
|
454
|
-
# Regular expression for entity marker post-processing
|
|
455
|
-
ENTITY_MARKER_REGEX = /#{ENTITY_MARKER}#{ENTITY_PATTERN};/
|
|
456
|
-
|
|
457
|
-
# Simple entity-only regex with no nested quantifiers
|
|
458
|
-
ENTITY_REF_REGEX = /&#{ENTITY_PATTERN};/
|
|
459
|
-
|
|
460
449
|
def has_declaration?(native_doc, _wrapper)
|
|
461
450
|
decl = attachments.get(native_doc, :xml_declaration)
|
|
462
451
|
if decl.nil? && !attachments.key?(native_doc, :xml_declaration)
|
|
@@ -469,32 +458,6 @@ module Moxml
|
|
|
469
458
|
|
|
470
459
|
private
|
|
471
460
|
|
|
472
|
-
# Convert &entity; back to \x01entity; for Oga text storage.
|
|
473
|
-
# Used when setting text content programmatically (not from parsing).
|
|
474
|
-
def encode_entity_markers(text)
|
|
475
|
-
return text unless text&.include?("&")
|
|
476
|
-
|
|
477
|
-
text.gsub(ENTITY_REF_REGEX) do
|
|
478
|
-
name = ::Regexp.last_match(1)
|
|
479
|
-
|
|
480
|
-
next ::Regexp.last_match(0) if STANDARD_XML_ENTITIES.include?(name)
|
|
481
|
-
|
|
482
|
-
codepoint = Moxml::EntityRegistry.default.codepoint_for_name(name)
|
|
483
|
-
if codepoint
|
|
484
|
-
"#{ENTITY_MARKER}#{name};"
|
|
485
|
-
else
|
|
486
|
-
::Regexp.last_match(0)
|
|
487
|
-
end
|
|
488
|
-
end
|
|
489
|
-
end
|
|
490
|
-
|
|
491
|
-
# Convert \x01entity; back to &entity; for text accessors.
|
|
492
|
-
def restore_entity_markers(text)
|
|
493
|
-
return text unless text
|
|
494
|
-
|
|
495
|
-
text.gsub(ENTITY_MARKER_REGEX, '&\1;')
|
|
496
|
-
end
|
|
497
|
-
|
|
498
461
|
def serialize_without_entity_processing(node, options = {})
|
|
499
462
|
# Oga's XmlGenerator doesn't support options directly
|
|
500
463
|
# We need to handle declaration options ourselves for Document nodes
|
|
@@ -572,30 +535,6 @@ module Moxml
|
|
|
572
535
|
::Moxml::Adapter::CustomizedOga::XmlGenerator.new(node).to_xml
|
|
573
536
|
end
|
|
574
537
|
end
|
|
575
|
-
|
|
576
|
-
# Pre-process XML to convert named entities to marker format.
|
|
577
|
-
# Oga drops named entity references like but preserves control chars.
|
|
578
|
-
# By converting known named entities to marker form (\x01name;), we can
|
|
579
|
-
# reconstruct them during serialization.
|
|
580
|
-
#
|
|
581
|
-
# @param xml [String, #to_s] The XML string to process
|
|
582
|
-
# @return [String] The XML with known named entities converted to marker form
|
|
583
|
-
def preprocess_named_entities(xml)
|
|
584
|
-
return xml unless xml.is_a?(String)
|
|
585
|
-
|
|
586
|
-
xml.gsub(ENTITY_REF_REGEX) do
|
|
587
|
-
name = Regexp.last_match(1)
|
|
588
|
-
|
|
589
|
-
next Regexp.last_match(0) if STANDARD_XML_ENTITIES.include?(name)
|
|
590
|
-
|
|
591
|
-
codepoint = Moxml::EntityRegistry.default.codepoint_for_name(name)
|
|
592
|
-
if codepoint
|
|
593
|
-
"#{ENTITY_MARKER}#{name};"
|
|
594
|
-
else
|
|
595
|
-
Regexp.last_match(0)
|
|
596
|
-
end
|
|
597
|
-
end
|
|
598
|
-
end
|
|
599
538
|
end
|
|
600
539
|
end
|
|
601
540
|
|
data/lib/moxml/adapter/ox.rb
CHANGED
|
@@ -20,8 +20,9 @@ module Moxml
|
|
|
20
20
|
end
|
|
21
21
|
|
|
22
22
|
def parse(xml, options = {}, _context = nil)
|
|
23
|
+
processed_xml = preprocess_entities(xml)
|
|
23
24
|
native_doc = begin
|
|
24
|
-
result = ::Ox.parse(
|
|
25
|
+
result = ::Ox.parse(processed_xml)
|
|
25
26
|
|
|
26
27
|
# result can be either Document or Element
|
|
27
28
|
if result.is_a?(::Ox::Document)
|
|
@@ -543,17 +544,18 @@ module Moxml
|
|
|
543
544
|
end
|
|
544
545
|
|
|
545
546
|
def namespace_definitions(node)
|
|
546
|
-
|
|
547
|
-
next unless n.is_a?(::Ox::Element) && n.attributes
|
|
547
|
+
return [] unless node.is_a?(::Ox::Element) && node.attributes
|
|
548
548
|
|
|
549
|
-
|
|
550
|
-
|
|
549
|
+
namespaces = {}
|
|
550
|
+
node.attributes.each do |name, value|
|
|
551
|
+
name_s = name.to_s
|
|
552
|
+
next unless name_s == "xmlns" || name_s.start_with?("xmlns:")
|
|
551
553
|
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
554
|
+
namespaces[name] = ::Moxml::Adapter::CustomizedOx::Namespace.new(
|
|
555
|
+
name, value, node
|
|
556
|
+
)
|
|
557
|
+
end
|
|
558
|
+
namespaces.values
|
|
557
559
|
end
|
|
558
560
|
|
|
559
561
|
# Doctype accessor methods
|
|
@@ -620,17 +622,44 @@ module Moxml
|
|
|
620
622
|
end
|
|
621
623
|
|
|
622
624
|
def serialize(node, options = {})
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
625
|
+
needs_custom = needs_custom_serialize?(node)
|
|
626
|
+
|
|
627
|
+
unless needs_custom
|
|
626
628
|
return serialize_standard(node, options)
|
|
627
629
|
end
|
|
628
630
|
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
631
|
+
serialize_custom(node, options)
|
|
632
|
+
end
|
|
633
|
+
|
|
634
|
+
def needs_custom_serialize?(node)
|
|
635
|
+
# Fast path: single CData with ]]>
|
|
636
|
+
return true if node.is_a?(::Ox::CData) && node.value&.include?("]]>")
|
|
637
|
+
|
|
638
|
+
# Only documents/elements can contain entity refs or CDATA issues
|
|
639
|
+
return false unless node.is_a?(::Ox::Document) || node.is_a?(::Ox::Element)
|
|
640
|
+
|
|
641
|
+
# Check cached flags on documents (most common case)
|
|
642
|
+
if node.is_a?(::Ox::Document)
|
|
643
|
+
return true if attachments.get(node, :has_entity_refs)
|
|
644
|
+
return true if attachments.get(node, :has_cdata_end_markers)
|
|
645
|
+
return false if attachments.key?(node, :has_entity_refs) &&
|
|
646
|
+
attachments.key?(node, :has_cdata_end_markers)
|
|
633
647
|
end
|
|
648
|
+
|
|
649
|
+
# Only scan tree on first call — short-circuit on first hit
|
|
650
|
+
has_er = tree_has_entity_references?(node)
|
|
651
|
+
if has_er
|
|
652
|
+
attachments.set(node, :has_entity_refs, true) if node.is_a?(::Ox::Document)
|
|
653
|
+
return true
|
|
654
|
+
end
|
|
655
|
+
|
|
656
|
+
has_cdata = tree_has_cdata_end_markers?(node)
|
|
657
|
+
if node.is_a?(::Ox::Document)
|
|
658
|
+
attachments.set(node, :has_entity_refs, false)
|
|
659
|
+
attachments.set(node, :has_cdata_end_markers, has_cdata)
|
|
660
|
+
end
|
|
661
|
+
|
|
662
|
+
has_cdata
|
|
634
663
|
end
|
|
635
664
|
|
|
636
665
|
def has_declaration?(native_doc, _wrapper)
|
|
@@ -665,7 +694,9 @@ module Moxml
|
|
|
665
694
|
encoding: options[:encoding],
|
|
666
695
|
no_empty: options[:expand_empty],
|
|
667
696
|
}
|
|
668
|
-
output + ::Ox.dump(node, ox_options)
|
|
697
|
+
result = output + ::Ox.dump(node, ox_options)
|
|
698
|
+
# Fix CDATA ]]> end markers that Ox doesn't escape
|
|
699
|
+
result
|
|
669
700
|
end
|
|
670
701
|
|
|
671
702
|
def tree_has_entity_references?(node)
|
|
@@ -685,6 +716,19 @@ module Moxml
|
|
|
685
716
|
end
|
|
686
717
|
end
|
|
687
718
|
|
|
719
|
+
def tree_has_cdata_end_markers?(node)
|
|
720
|
+
case node
|
|
721
|
+
when ::Ox::CData
|
|
722
|
+
node.value&.include?("]]>") || false
|
|
723
|
+
when ::Ox::Element
|
|
724
|
+
node.nodes&.any? { |child| tree_has_cdata_end_markers?(child) } || false
|
|
725
|
+
when ::Ox::Document
|
|
726
|
+
node.nodes&.any? { |child| tree_has_cdata_end_markers?(child) } || false
|
|
727
|
+
else
|
|
728
|
+
false
|
|
729
|
+
end
|
|
730
|
+
end
|
|
731
|
+
|
|
688
732
|
def serialize_custom(node, options = {})
|
|
689
733
|
output = +""
|
|
690
734
|
if node.is_a?(::Ox::Document)
|
|
@@ -717,7 +761,7 @@ module Moxml
|
|
|
717
761
|
when String then escape_xml_text(node)
|
|
718
762
|
when ::Moxml::Adapter::CustomizedOx::Text then escape_xml_text(node.value)
|
|
719
763
|
when ::Moxml::Adapter::CustomizedOx::EntityReference then "&#{node.name};"
|
|
720
|
-
when ::Ox::CData then
|
|
764
|
+
when ::Ox::CData then serialize_cdata(node.value)
|
|
721
765
|
when ::Ox::Comment then "<!--#{node.value}-->"
|
|
722
766
|
when ::Ox::Instruct then "<?#{node.target} #{node.value || ''}?>"
|
|
723
767
|
when ::Ox::DocType then "<!DOCTYPE #{node.value}>"
|
|
@@ -744,6 +788,11 @@ module Moxml
|
|
|
744
788
|
output
|
|
745
789
|
end
|
|
746
790
|
|
|
791
|
+
def serialize_cdata(content)
|
|
792
|
+
escaped = content.gsub("]]>", "]]]]><![CDATA[>")
|
|
793
|
+
"<![CDATA[#{escaped}]]>"
|
|
794
|
+
end
|
|
795
|
+
|
|
747
796
|
def escape_xml_text(text)
|
|
748
797
|
text.to_s.gsub(/[<>&]/) do |match|
|
|
749
798
|
case match
|
|
@@ -765,6 +814,7 @@ module Moxml
|
|
|
765
814
|
end
|
|
766
815
|
end
|
|
767
816
|
|
|
817
|
+
|
|
768
818
|
# Translate a subset of XPath to Ox locate() syntax
|
|
769
819
|
# Supports: //element, /path/to/element, .//element, element[@attr]
|
|
770
820
|
# Note: Ox locate() doesn't support namespace prefixes in the path
|
data/lib/moxml/adapter/rexml.rb
CHANGED
|
@@ -15,6 +15,8 @@ module Moxml
|
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
def parse(xml, options = {}, _context = nil)
|
|
18
|
+
xml = "" if xml.nil?
|
|
19
|
+
|
|
18
20
|
# Handle frozen strings by creating a mutable copy
|
|
19
21
|
processed_xml = if xml.frozen?
|
|
20
22
|
xml.dup.force_encoding("UTF-8").encode("UTF-8")
|
|
@@ -22,6 +24,9 @@ module Moxml
|
|
|
22
24
|
xml.force_encoding("UTF-8").encode("UTF-8")
|
|
23
25
|
end
|
|
24
26
|
|
|
27
|
+
# Preprocess entities to avoid double-escaping on output
|
|
28
|
+
processed_xml = preprocess_entities(processed_xml)
|
|
29
|
+
|
|
25
30
|
native_doc = begin
|
|
26
31
|
::REXML::Document.new(processed_xml)
|
|
27
32
|
rescue ::REXML::ParseException => e
|
|
@@ -412,7 +417,7 @@ module Moxml
|
|
|
412
417
|
when ::REXML::Element
|
|
413
418
|
# Extract text recursively from all children to match other adapters
|
|
414
419
|
extract_text_recursively(node)
|
|
415
|
-
end
|
|
420
|
+
end.to_s
|
|
416
421
|
end
|
|
417
422
|
|
|
418
423
|
def extract_text_recursively(element)
|
|
@@ -491,9 +496,25 @@ module Moxml
|
|
|
491
496
|
end
|
|
492
497
|
|
|
493
498
|
def namespace_definitions(node)
|
|
494
|
-
|
|
495
|
-
|
|
499
|
+
return [] unless node.is_a?(::REXML::Element)
|
|
500
|
+
|
|
501
|
+
result = []
|
|
502
|
+
node.attributes.each_attribute do |attr|
|
|
503
|
+
next unless attr.prefix == "xmlns" || (attr.name == "xmlns" && attr.prefix.to_s.empty?)
|
|
504
|
+
|
|
505
|
+
result << attr
|
|
506
|
+
end
|
|
507
|
+
result
|
|
508
|
+
end
|
|
509
|
+
|
|
510
|
+
def in_scope_namespaces(element)
|
|
511
|
+
namespaces = {}
|
|
512
|
+
element.namespaces.each do |prefix, uri|
|
|
513
|
+
key = prefix.to_s.empty? ? "xmlns" : prefix.to_s
|
|
514
|
+
ns = ::REXML::Attribute.new(key, uri, element)
|
|
515
|
+
namespaces[prefix] = ns
|
|
496
516
|
end
|
|
517
|
+
namespaces.values
|
|
497
518
|
end
|
|
498
519
|
|
|
499
520
|
# Doctype accessor methods
|
data/lib/moxml/attribute.rb
CHANGED
data/lib/moxml/element.rb
CHANGED
|
@@ -46,7 +46,8 @@ module Moxml
|
|
|
46
46
|
end
|
|
47
47
|
|
|
48
48
|
def [](name)
|
|
49
|
-
adapter.get_attribute_value(@native, name)
|
|
49
|
+
val = adapter.get_attribute_value(@native, name)
|
|
50
|
+
val ? adapter.restore_entities(val) : val
|
|
50
51
|
end
|
|
51
52
|
|
|
52
53
|
def attribute(name)
|
|
@@ -54,12 +55,7 @@ module Moxml
|
|
|
54
55
|
native_attr && Attribute.new(native_attr, context)
|
|
55
56
|
end
|
|
56
57
|
|
|
57
|
-
#
|
|
58
|
-
def get(attr_name)
|
|
59
|
-
attribute(attr_name)
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# Alias for getting attribute value (used by XPath engine)
|
|
58
|
+
# Returns attribute value by name (used by XPath engine)
|
|
63
59
|
def get(attr_name)
|
|
64
60
|
self[attr_name]
|
|
65
61
|
end
|
|
@@ -137,7 +133,8 @@ module Moxml
|
|
|
137
133
|
end
|
|
138
134
|
|
|
139
135
|
def text
|
|
140
|
-
adapter.text_content(@native)
|
|
136
|
+
val = adapter.text_content(@native)
|
|
137
|
+
adapter.restore_entities(val)
|
|
141
138
|
end
|
|
142
139
|
|
|
143
140
|
def text=(content)
|
|
@@ -146,6 +143,13 @@ module Moxml
|
|
|
146
143
|
end
|
|
147
144
|
|
|
148
145
|
def inner_text
|
|
146
|
+
text = raw_inner_text
|
|
147
|
+
adapter.restore_entities(text)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Returns inner text without entity marker restoration.
|
|
151
|
+
# Used internally when raw content with markers is needed (e.g., for DOM construction).
|
|
152
|
+
def raw_inner_text
|
|
149
153
|
adapter.inner_text(@native)
|
|
150
154
|
end
|
|
151
155
|
|
data/lib/moxml/node.rb
CHANGED
|
@@ -97,7 +97,10 @@ module Moxml
|
|
|
97
97
|
serialize_options = default_options.merge(options)
|
|
98
98
|
serialize_options[:no_declaration] = !should_include_declaration?(options)
|
|
99
99
|
|
|
100
|
-
adapter.serialize(@native, serialize_options)
|
|
100
|
+
result = adapter.serialize(@native, serialize_options)
|
|
101
|
+
|
|
102
|
+
# Restore entity markers to named entity references
|
|
103
|
+
adapter.restore_entities(result)
|
|
101
104
|
end
|
|
102
105
|
|
|
103
106
|
def xpath(expression, namespaces = {})
|
data/lib/moxml/text.rb
CHANGED
data/lib/moxml/version.rb
CHANGED
data/lib/moxml/xpath/compiler.rb
CHANGED
|
@@ -388,30 +388,38 @@ module Moxml
|
|
|
388
388
|
document_or_node(input).if_true do
|
|
389
389
|
# Create a proper if-else structure that prevents double traversal
|
|
390
390
|
input.is_a?(doc_class).if_true do
|
|
391
|
-
# DOCUMENT PATH: test root, then traverse
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
391
|
+
# DOCUMENT PATH: test document (self), then root, then traverse
|
|
392
|
+
doc_condition = process(ast, input)
|
|
393
|
+
(if block_given?
|
|
394
|
+
doc_condition.if_true { yield input }
|
|
395
|
+
else
|
|
396
|
+
doc_condition.if_true { input }
|
|
397
|
+
end)
|
|
398
|
+
.followed_by do
|
|
399
|
+
root = unique_literal(:root)
|
|
400
|
+
root.assign(input.root).followed_by do
|
|
401
|
+
root.if_true do
|
|
402
|
+
# Test root
|
|
403
|
+
condition = process(ast, root)
|
|
404
|
+
(if block_given?
|
|
405
|
+
condition.if_true { yield root }
|
|
406
|
+
else
|
|
407
|
+
condition.if_true { root }
|
|
408
|
+
end)
|
|
409
|
+
.followed_by do
|
|
410
|
+
# Traverse descendants FROM root only (not document.each_node)
|
|
411
|
+
root.each_node.add_block(node) do
|
|
412
|
+
desc_condition = process(ast, node)
|
|
413
|
+
if block_given?
|
|
414
|
+
desc_condition.if_true { yield node }
|
|
415
|
+
else
|
|
416
|
+
desc_condition.if_true { node }
|
|
417
|
+
end
|
|
418
|
+
end
|
|
410
419
|
end
|
|
411
|
-
end
|
|
412
420
|
end
|
|
421
|
+
end
|
|
413
422
|
end
|
|
414
|
-
end
|
|
415
423
|
end.else do
|
|
416
424
|
# NON-DOCUMENT PATH: test self, then traverse from self
|
|
417
425
|
condition = process(ast, input)
|
|
@@ -497,6 +505,17 @@ module Moxml
|
|
|
497
505
|
element_or_attribute(input)
|
|
498
506
|
end
|
|
499
507
|
|
|
508
|
+
# Handle node type test (node(), text(), comment(), etc.)
|
|
509
|
+
# node() matches any node — always returns truthy
|
|
510
|
+
def on_node_type(ast, input)
|
|
511
|
+
case ast.value
|
|
512
|
+
when "node"
|
|
513
|
+
# node() matches everything — use a truthy literal
|
|
514
|
+
Ruby::Node.new(:lit, ["true"])
|
|
515
|
+
else element_or_attribute(input)
|
|
516
|
+
end
|
|
517
|
+
end
|
|
518
|
+
|
|
500
519
|
# Match element/attribute names and namespaces
|
|
501
520
|
def match_name_and_namespace(ast, input)
|
|
502
521
|
ns = ast.value[:namespace]
|
data/lib/moxml/xpath/parser.rb
CHANGED
|
@@ -311,10 +311,10 @@ module Moxml
|
|
|
311
311
|
return AST::Node.absolute_path(*steps.children)
|
|
312
312
|
elsif match?(:dslash)
|
|
313
313
|
advance
|
|
314
|
-
# Descendant-or-self: //
|
|
314
|
+
# Descendant-or-self: // (expands to /descendant-or-self::node()/)
|
|
315
315
|
steps = parse_relative_path
|
|
316
316
|
return AST::Node.absolute_path(
|
|
317
|
-
AST::Node.axis("descendant-or-self", AST::Node.
|
|
317
|
+
AST::Node.axis("descendant-or-self", AST::Node.node_type("node")),
|
|
318
318
|
*steps.children,
|
|
319
319
|
)
|
|
320
320
|
end
|
|
@@ -330,9 +330,9 @@ module Moxml
|
|
|
330
330
|
while match?(:slash) && !at_end?
|
|
331
331
|
advance
|
|
332
332
|
if match?(:slash)
|
|
333
|
-
# Double slash within path
|
|
333
|
+
# Double slash within path: expands to descendant-or-self::node()
|
|
334
334
|
advance
|
|
335
|
-
steps << AST::Node.axis("descendant-or-self", AST::Node.
|
|
335
|
+
steps << AST::Node.axis("descendant-or-self", AST::Node.node_type("node"))
|
|
336
336
|
end
|
|
337
337
|
steps << parse_step unless at_end? || match?(:pipe, :rbracket,
|
|
338
338
|
:rparen, :comma)
|
|
@@ -352,9 +352,14 @@ module Moxml
|
|
|
352
352
|
return AST::Node.parent
|
|
353
353
|
elsif match?(:at)
|
|
354
354
|
advance
|
|
355
|
-
# Attribute: @name
|
|
356
|
-
|
|
357
|
-
|
|
355
|
+
# Attribute: @name or @*
|
|
356
|
+
if match?(:star)
|
|
357
|
+
advance
|
|
358
|
+
node_test = AST::Node.wildcard
|
|
359
|
+
else
|
|
360
|
+
name = consume(:name, "Expected attribute name after @")
|
|
361
|
+
node_test = AST::Node.test(nil, name[1])
|
|
362
|
+
end
|
|
358
363
|
step = AST::Node.axis("attribute", node_test)
|
|
359
364
|
return parse_predicates(step)
|
|
360
365
|
end
|
|
@@ -32,12 +32,6 @@ RSpec.shared_examples "Moxml Edge Cases" do
|
|
|
32
32
|
|
|
33
33
|
describe "malformed content handling" do
|
|
34
34
|
it "handles CDATA with nested markers" do
|
|
35
|
-
if context.config.adapter_name == :ox
|
|
36
|
-
pending "Ox doesn't escape the end token"
|
|
37
|
-
end
|
|
38
|
-
if context.config.adapter_name == :headed_ox
|
|
39
|
-
skip "HeadedOx limitation: Ox doesn't escape CDATA end markers. See docs/_pages/headed-ox-limitations.adoc"
|
|
40
|
-
end
|
|
41
35
|
cdata_text = "]]>]]>]]>"
|
|
42
36
|
doc = context.create_document
|
|
43
37
|
cdata = doc.create_cdata(cdata_text)
|