moxml 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/.rubocop_todo.yml +49 -133
- data/README.adoc +18 -0
- data/lib/moxml/adapter/base.rb +65 -8
- data/lib/moxml/adapter/headed_ox.rb +2 -1
- data/lib/moxml/adapter/libxml.rb +16 -6
- data/lib/moxml/adapter/nokogiri.rb +13 -7
- data/lib/moxml/adapter/oga.rb +35 -90
- data/lib/moxml/adapter/ox.rb +69 -19
- data/lib/moxml/adapter/rexml.rb +26 -9
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/config.rb +17 -2
- data/lib/moxml/element.rb +12 -8
- data/lib/moxml/node.rb +4 -1
- data/lib/moxml/text.rb +6 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +40 -21
- data/lib/moxml/xpath/parser.rb +12 -7
- data/spec/integration/all_adapters_spec.rb +1 -0
- data/spec/integration/shared_examples/edge_cases.rb +85 -6
- data/spec/integration/shared_examples/entity_reference_whitespace.rb +124 -0
- data/spec/integration/shared_examples/high_level/document_builder_behavior.rb +8 -6
- data/spec/integration/shared_examples/integration_workflows.rb +1 -1
- data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +0 -7
- data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +135 -0
- data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +0 -3
- data/spec/integration/shared_examples/node_wrappers/node_set_behavior.rb +3 -1
- data/spec/moxml/adapter/entity_restoration_spec.rb +97 -0
- data/spec/moxml/builder_spec.rb +16 -1
- data/spec/moxml/entity_preservation_spec.rb +130 -0
- data/spec/moxml/entity_reference_spec.rb +114 -0
- data/spec/moxml/entity_registry_spec.rb +68 -0
- data/spec/moxml/moxml_spec.rb +39 -0
- data/spec/moxml/xpath/axes_spec.rb +0 -1
- data/spec/moxml/xpath/compiler_spec.rb +0 -2
- data/spec/performance/benchmark_spec.rb +1 -1
- metadata +6 -12
- data/TODO.remaining/1-entity-reference-adapter-support.md +0 -157
- data/TODO.remaining/2-entity-restoration-model-driven.md +0 -169
- data/TODO.remaining/3-entity-reference-test-coverage.md +0 -170
- data/TODO.remaining/4-lenient-entities-mode.md +0 -106
- data/TODO.remaining/5-fixture-integrity.md +0 -65
- data/TODO.remaining/6-ox-element-ordering-bug.md +0 -36
- data/TODO.remaining/7-headed-ox-limitations.md +0 -95
- data/TODO.remaining/8-xpath-predicate-gaps.md +0 -68
- data/TODO.remaining/9-cleanup-hygiene.md +0 -42
- data/TODO.remaining/README.md +0 -54
data/lib/moxml/adapter/oga.rb
CHANGED
|
@@ -8,9 +8,6 @@ module Moxml
|
|
|
8
8
|
module Adapter
|
|
9
9
|
class Oga < Base
|
|
10
10
|
class << self
|
|
11
|
-
# Standard XML entities handled natively by parsers
|
|
12
|
-
STANDARD_XML_ENTITIES = %w[amp lt gt quot apos].freeze
|
|
13
|
-
|
|
14
11
|
def attachments
|
|
15
12
|
@attachments ||= Moxml::NativeAttachment.new
|
|
16
13
|
end
|
|
@@ -24,9 +21,7 @@ module Moxml
|
|
|
24
21
|
end
|
|
25
22
|
|
|
26
23
|
def parse(xml, options = {}, _context = nil)
|
|
27
|
-
|
|
28
|
-
# Oga drops named entity references like during parsing.
|
|
29
|
-
processed_xml = preprocess_named_entities(xml)
|
|
24
|
+
processed_xml = preprocess_entities(xml)
|
|
30
25
|
|
|
31
26
|
native_doc = begin
|
|
32
27
|
::Oga.parse_xml(processed_xml, strict: options[:strict])
|
|
@@ -72,12 +67,12 @@ module Moxml
|
|
|
72
67
|
end
|
|
73
68
|
|
|
74
69
|
def create_native_text(content, _owner_doc = nil)
|
|
75
|
-
::Oga::XML::Text.new(text:
|
|
70
|
+
::Oga::XML::Text.new(text: preprocess_entities(content))
|
|
76
71
|
end
|
|
77
72
|
|
|
78
73
|
def create_native_entity_reference(name)
|
|
79
74
|
text = ::Oga::XML::Text.new
|
|
80
|
-
text.text = "#{ENTITY_MARKER}#{name};"
|
|
75
|
+
text.text = "#{self::ENTITY_MARKER}#{name};"
|
|
81
76
|
attachments.set(text, :entity_name, name)
|
|
82
77
|
text
|
|
83
78
|
end
|
|
@@ -198,11 +193,27 @@ module Moxml
|
|
|
198
193
|
|
|
199
194
|
return all_children unless node.is_a?(::Oga::XML::Node) || node.is_a?(::Oga::XML::Document)
|
|
200
195
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
196
|
+
child_nodes = node.children.to_a
|
|
197
|
+
# Filter out whitespace-only text nodes at document level only.
|
|
198
|
+
# Document-level whitespace (between <?xml?> and <root>) is
|
|
199
|
+
# formatting, not content, and differs across adapters.
|
|
200
|
+
# Whitespace inside elements (e.g. "FigureA.1" spacing) is
|
|
201
|
+
# meaningful and must be preserved.
|
|
202
|
+
if node.is_a?(::Oga::XML::Document)
|
|
203
|
+
child_nodes = child_nodes.reject do |child|
|
|
204
|
+
child.is_a?(::Oga::XML::Text) && child.text.strip.empty?
|
|
205
|
+
end
|
|
205
206
|
end
|
|
207
|
+
all_children + child_nodes
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def adjacent_to_entity_reference?(node)
|
|
211
|
+
entity_ref?(node.previous) || entity_ref?(node.next)
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def entity_ref?(node)
|
|
215
|
+
node.is_a?(::Oga::XML::Text) &&
|
|
216
|
+
attachments.get(node, :entity_name)
|
|
206
217
|
end
|
|
207
218
|
|
|
208
219
|
def parent(node)
|
|
@@ -251,7 +262,7 @@ module Moxml
|
|
|
251
262
|
attr = ::Oga::XML::Attribute.new(
|
|
252
263
|
name: name.to_s,
|
|
253
264
|
namespace_name: namespace_name,
|
|
254
|
-
value:
|
|
265
|
+
value: preprocess_entities(value.to_s),
|
|
255
266
|
)
|
|
256
267
|
element.add_attribute(attr)
|
|
257
268
|
end
|
|
@@ -261,7 +272,7 @@ module Moxml
|
|
|
261
272
|
end
|
|
262
273
|
|
|
263
274
|
def get_attribute_value(element, name)
|
|
264
|
-
|
|
275
|
+
element[name.to_s]
|
|
265
276
|
end
|
|
266
277
|
|
|
267
278
|
def remove_attribute(element, name)
|
|
@@ -330,24 +341,23 @@ module Moxml
|
|
|
330
341
|
end
|
|
331
342
|
|
|
332
343
|
def text_content(node)
|
|
333
|
-
|
|
344
|
+
node.text
|
|
334
345
|
end
|
|
335
346
|
|
|
336
347
|
def inner_text(node)
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
restore_entity_markers(text)
|
|
348
|
+
if node.is_a?(::Oga::XML::Element)
|
|
349
|
+
node.inner_text
|
|
350
|
+
else
|
|
351
|
+
node.text
|
|
352
|
+
end
|
|
343
353
|
end
|
|
344
354
|
|
|
345
355
|
def set_text_content(node, content)
|
|
346
|
-
|
|
356
|
+
processed = preprocess_entities(content)
|
|
347
357
|
if node.is_a?(::Oga::XML::Element)
|
|
348
|
-
node.inner_text =
|
|
358
|
+
node.inner_text = processed
|
|
349
359
|
else
|
|
350
|
-
node.text =
|
|
360
|
+
node.text = processed
|
|
351
361
|
end
|
|
352
362
|
end
|
|
353
363
|
|
|
@@ -439,24 +449,9 @@ module Moxml
|
|
|
439
449
|
end
|
|
440
450
|
|
|
441
451
|
def serialize(node, options = {})
|
|
442
|
-
|
|
443
|
-
# Post-process: convert entity markers back to entity references
|
|
444
|
-
output.gsub(ENTITY_MARKER_REGEX, '&\1;')
|
|
452
|
+
serialize_without_entity_processing(node, options)
|
|
445
453
|
end
|
|
446
454
|
|
|
447
|
-
# Shared entity name pattern (W3C: 2-31 chars, starts with alpha)
|
|
448
|
-
ENTITY_PATTERN = "([a-zA-Z][a-zA-Z0-9]{1,30})"
|
|
449
|
-
|
|
450
|
-
# Marker character for entity preservation through Oga's parser.
|
|
451
|
-
# U+0001 is preserved literally by Oga through parse/serialize cycle.
|
|
452
|
-
ENTITY_MARKER = "\x01"
|
|
453
|
-
|
|
454
|
-
# Regular expression for entity marker post-processing
|
|
455
|
-
ENTITY_MARKER_REGEX = /#{ENTITY_MARKER}#{ENTITY_PATTERN};/
|
|
456
|
-
|
|
457
|
-
# Simple entity-only regex with no nested quantifiers
|
|
458
|
-
ENTITY_REF_REGEX = /&#{ENTITY_PATTERN};/
|
|
459
|
-
|
|
460
455
|
def has_declaration?(native_doc, _wrapper)
|
|
461
456
|
decl = attachments.get(native_doc, :xml_declaration)
|
|
462
457
|
if decl.nil? && !attachments.key?(native_doc, :xml_declaration)
|
|
@@ -469,32 +464,6 @@ module Moxml
|
|
|
469
464
|
|
|
470
465
|
private
|
|
471
466
|
|
|
472
|
-
# Convert &entity; back to \x01entity; for Oga text storage.
|
|
473
|
-
# Used when setting text content programmatically (not from parsing).
|
|
474
|
-
def encode_entity_markers(text)
|
|
475
|
-
return text unless text&.include?("&")
|
|
476
|
-
|
|
477
|
-
text.gsub(ENTITY_REF_REGEX) do
|
|
478
|
-
name = ::Regexp.last_match(1)
|
|
479
|
-
|
|
480
|
-
next ::Regexp.last_match(0) if STANDARD_XML_ENTITIES.include?(name)
|
|
481
|
-
|
|
482
|
-
codepoint = Moxml::EntityRegistry.default.codepoint_for_name(name)
|
|
483
|
-
if codepoint
|
|
484
|
-
"#{ENTITY_MARKER}#{name};"
|
|
485
|
-
else
|
|
486
|
-
::Regexp.last_match(0)
|
|
487
|
-
end
|
|
488
|
-
end
|
|
489
|
-
end
|
|
490
|
-
|
|
491
|
-
# Convert \x01entity; back to &entity; for text accessors.
|
|
492
|
-
def restore_entity_markers(text)
|
|
493
|
-
return text unless text
|
|
494
|
-
|
|
495
|
-
text.gsub(ENTITY_MARKER_REGEX, '&\1;')
|
|
496
|
-
end
|
|
497
|
-
|
|
498
467
|
def serialize_without_entity_processing(node, options = {})
|
|
499
468
|
# Oga's XmlGenerator doesn't support options directly
|
|
500
469
|
# We need to handle declaration options ourselves for Document nodes
|
|
@@ -572,30 +541,6 @@ module Moxml
|
|
|
572
541
|
::Moxml::Adapter::CustomizedOga::XmlGenerator.new(node).to_xml
|
|
573
542
|
end
|
|
574
543
|
end
|
|
575
|
-
|
|
576
|
-
# Pre-process XML to convert named entities to marker format.
|
|
577
|
-
# Oga drops named entity references like but preserves control chars.
|
|
578
|
-
# By converting known named entities to marker form (\x01name;), we can
|
|
579
|
-
# reconstruct them during serialization.
|
|
580
|
-
#
|
|
581
|
-
# @param xml [String, #to_s] The XML string to process
|
|
582
|
-
# @return [String] The XML with known named entities converted to marker form
|
|
583
|
-
def preprocess_named_entities(xml)
|
|
584
|
-
return xml unless xml.is_a?(String)
|
|
585
|
-
|
|
586
|
-
xml.gsub(ENTITY_REF_REGEX) do
|
|
587
|
-
name = Regexp.last_match(1)
|
|
588
|
-
|
|
589
|
-
next Regexp.last_match(0) if STANDARD_XML_ENTITIES.include?(name)
|
|
590
|
-
|
|
591
|
-
codepoint = Moxml::EntityRegistry.default.codepoint_for_name(name)
|
|
592
|
-
if codepoint
|
|
593
|
-
"#{ENTITY_MARKER}#{name};"
|
|
594
|
-
else
|
|
595
|
-
Regexp.last_match(0)
|
|
596
|
-
end
|
|
597
|
-
end
|
|
598
|
-
end
|
|
599
544
|
end
|
|
600
545
|
end
|
|
601
546
|
|
data/lib/moxml/adapter/ox.rb
CHANGED
|
@@ -20,8 +20,9 @@ module Moxml
|
|
|
20
20
|
end
|
|
21
21
|
|
|
22
22
|
def parse(xml, options = {}, _context = nil)
|
|
23
|
+
processed_xml = preprocess_entities(xml)
|
|
23
24
|
native_doc = begin
|
|
24
|
-
result = ::Ox.parse(
|
|
25
|
+
result = ::Ox.parse(processed_xml)
|
|
25
26
|
|
|
26
27
|
# result can be either Document or Element
|
|
27
28
|
if result.is_a?(::Ox::Document)
|
|
@@ -543,17 +544,18 @@ module Moxml
|
|
|
543
544
|
end
|
|
544
545
|
|
|
545
546
|
def namespace_definitions(node)
|
|
546
|
-
|
|
547
|
-
next unless n.is_a?(::Ox::Element) && n.attributes
|
|
547
|
+
return [] unless node.is_a?(::Ox::Element) && node.attributes
|
|
548
548
|
|
|
549
|
-
|
|
550
|
-
|
|
549
|
+
namespaces = {}
|
|
550
|
+
node.attributes.each do |name, value|
|
|
551
|
+
name_s = name.to_s
|
|
552
|
+
next unless name_s == "xmlns" || name_s.start_with?("xmlns:")
|
|
551
553
|
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
554
|
+
namespaces[name] = ::Moxml::Adapter::CustomizedOx::Namespace.new(
|
|
555
|
+
name, value, node
|
|
556
|
+
)
|
|
557
|
+
end
|
|
558
|
+
namespaces.values
|
|
557
559
|
end
|
|
558
560
|
|
|
559
561
|
# Doctype accessor methods
|
|
@@ -620,17 +622,44 @@ module Moxml
|
|
|
620
622
|
end
|
|
621
623
|
|
|
622
624
|
def serialize(node, options = {})
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
625
|
+
needs_custom = needs_custom_serialize?(node)
|
|
626
|
+
|
|
627
|
+
unless needs_custom
|
|
626
628
|
return serialize_standard(node, options)
|
|
627
629
|
end
|
|
628
630
|
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
631
|
+
serialize_custom(node, options)
|
|
632
|
+
end
|
|
633
|
+
|
|
634
|
+
def needs_custom_serialize?(node)
|
|
635
|
+
# Fast path: single CData with ]]>
|
|
636
|
+
return true if node.is_a?(::Ox::CData) && node.value&.include?("]]>")
|
|
637
|
+
|
|
638
|
+
# Only documents/elements can contain entity refs or CDATA issues
|
|
639
|
+
return false unless node.is_a?(::Ox::Document) || node.is_a?(::Ox::Element)
|
|
640
|
+
|
|
641
|
+
# Check cached flags on documents (most common case)
|
|
642
|
+
if node.is_a?(::Ox::Document)
|
|
643
|
+
return true if attachments.get(node, :has_entity_refs)
|
|
644
|
+
return true if attachments.get(node, :has_cdata_end_markers)
|
|
645
|
+
return false if attachments.key?(node, :has_entity_refs) &&
|
|
646
|
+
attachments.key?(node, :has_cdata_end_markers)
|
|
633
647
|
end
|
|
648
|
+
|
|
649
|
+
# Only scan tree on first call — short-circuit on first hit
|
|
650
|
+
has_er = tree_has_entity_references?(node)
|
|
651
|
+
if has_er
|
|
652
|
+
attachments.set(node, :has_entity_refs, true) if node.is_a?(::Ox::Document)
|
|
653
|
+
return true
|
|
654
|
+
end
|
|
655
|
+
|
|
656
|
+
has_cdata = tree_has_cdata_end_markers?(node)
|
|
657
|
+
if node.is_a?(::Ox::Document)
|
|
658
|
+
attachments.set(node, :has_entity_refs, false)
|
|
659
|
+
attachments.set(node, :has_cdata_end_markers, has_cdata)
|
|
660
|
+
end
|
|
661
|
+
|
|
662
|
+
has_cdata
|
|
634
663
|
end
|
|
635
664
|
|
|
636
665
|
def has_declaration?(native_doc, _wrapper)
|
|
@@ -665,7 +694,9 @@ module Moxml
|
|
|
665
694
|
encoding: options[:encoding],
|
|
666
695
|
no_empty: options[:expand_empty],
|
|
667
696
|
}
|
|
668
|
-
output + ::Ox.dump(node, ox_options)
|
|
697
|
+
result = output + ::Ox.dump(node, ox_options)
|
|
698
|
+
# Fix CDATA ]]> end markers that Ox doesn't escape
|
|
699
|
+
result
|
|
669
700
|
end
|
|
670
701
|
|
|
671
702
|
def tree_has_entity_references?(node)
|
|
@@ -685,6 +716,19 @@ module Moxml
|
|
|
685
716
|
end
|
|
686
717
|
end
|
|
687
718
|
|
|
719
|
+
def tree_has_cdata_end_markers?(node)
|
|
720
|
+
case node
|
|
721
|
+
when ::Ox::CData
|
|
722
|
+
node.value&.include?("]]>") || false
|
|
723
|
+
when ::Ox::Element
|
|
724
|
+
node.nodes&.any? { |child| tree_has_cdata_end_markers?(child) } || false
|
|
725
|
+
when ::Ox::Document
|
|
726
|
+
node.nodes&.any? { |child| tree_has_cdata_end_markers?(child) } || false
|
|
727
|
+
else
|
|
728
|
+
false
|
|
729
|
+
end
|
|
730
|
+
end
|
|
731
|
+
|
|
688
732
|
def serialize_custom(node, options = {})
|
|
689
733
|
output = +""
|
|
690
734
|
if node.is_a?(::Ox::Document)
|
|
@@ -717,7 +761,7 @@ module Moxml
|
|
|
717
761
|
when String then escape_xml_text(node)
|
|
718
762
|
when ::Moxml::Adapter::CustomizedOx::Text then escape_xml_text(node.value)
|
|
719
763
|
when ::Moxml::Adapter::CustomizedOx::EntityReference then "&#{node.name};"
|
|
720
|
-
when ::Ox::CData then
|
|
764
|
+
when ::Ox::CData then serialize_cdata(node.value)
|
|
721
765
|
when ::Ox::Comment then "<!--#{node.value}-->"
|
|
722
766
|
when ::Ox::Instruct then "<?#{node.target} #{node.value || ''}?>"
|
|
723
767
|
when ::Ox::DocType then "<!DOCTYPE #{node.value}>"
|
|
@@ -744,6 +788,11 @@ module Moxml
|
|
|
744
788
|
output
|
|
745
789
|
end
|
|
746
790
|
|
|
791
|
+
def serialize_cdata(content)
|
|
792
|
+
escaped = content.gsub("]]>", "]]]]><![CDATA[>")
|
|
793
|
+
"<![CDATA[#{escaped}]]>"
|
|
794
|
+
end
|
|
795
|
+
|
|
747
796
|
def escape_xml_text(text)
|
|
748
797
|
text.to_s.gsub(/[<>&]/) do |match|
|
|
749
798
|
case match
|
|
@@ -765,6 +814,7 @@ module Moxml
|
|
|
765
814
|
end
|
|
766
815
|
end
|
|
767
816
|
|
|
817
|
+
|
|
768
818
|
# Translate a subset of XPath to Ox locate() syntax
|
|
769
819
|
# Supports: //element, /path/to/element, .//element, element[@attr]
|
|
770
820
|
# Note: Ox locate() doesn't support namespace prefixes in the path
|
data/lib/moxml/adapter/rexml.rb
CHANGED
|
@@ -15,6 +15,8 @@ module Moxml
|
|
|
15
15
|
end
|
|
16
16
|
|
|
17
17
|
def parse(xml, options = {}, _context = nil)
|
|
18
|
+
xml = "" if xml.nil?
|
|
19
|
+
|
|
18
20
|
# Handle frozen strings by creating a mutable copy
|
|
19
21
|
processed_xml = if xml.frozen?
|
|
20
22
|
xml.dup.force_encoding("UTF-8").encode("UTF-8")
|
|
@@ -22,6 +24,9 @@ module Moxml
|
|
|
22
24
|
xml.force_encoding("UTF-8").encode("UTF-8")
|
|
23
25
|
end
|
|
24
26
|
|
|
27
|
+
# Preprocess entities to avoid double-escaping on output
|
|
28
|
+
processed_xml = preprocess_entities(processed_xml)
|
|
29
|
+
|
|
25
30
|
native_doc = begin
|
|
26
31
|
::REXML::Document.new(processed_xml)
|
|
27
32
|
rescue ::REXML::ParseException => e
|
|
@@ -172,12 +177,8 @@ module Moxml
|
|
|
172
177
|
def children(node)
|
|
173
178
|
return [] unless node.is_a?(::REXML::Parent)
|
|
174
179
|
|
|
175
|
-
#
|
|
176
|
-
result = node.children.
|
|
177
|
-
child.is_a?(::REXML::Text) &&
|
|
178
|
-
child.to_s.strip.empty? &&
|
|
179
|
-
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
|
180
|
-
end
|
|
180
|
+
# Return all children preserving whitespace text nodes
|
|
181
|
+
result = node.children.dup
|
|
181
182
|
|
|
182
183
|
# Include any EntityReference wrappers stored alongside native children
|
|
183
184
|
entity_refs = attachments.get(node, :entity_refs)
|
|
@@ -412,7 +413,7 @@ module Moxml
|
|
|
412
413
|
when ::REXML::Element
|
|
413
414
|
# Extract text recursively from all children to match other adapters
|
|
414
415
|
extract_text_recursively(node)
|
|
415
|
-
end
|
|
416
|
+
end.to_s
|
|
416
417
|
end
|
|
417
418
|
|
|
418
419
|
def extract_text_recursively(element)
|
|
@@ -491,9 +492,25 @@ module Moxml
|
|
|
491
492
|
end
|
|
492
493
|
|
|
493
494
|
def namespace_definitions(node)
|
|
494
|
-
|
|
495
|
-
|
|
495
|
+
return [] unless node.is_a?(::REXML::Element)
|
|
496
|
+
|
|
497
|
+
result = []
|
|
498
|
+
node.attributes.each_attribute do |attr|
|
|
499
|
+
next unless attr.prefix == "xmlns" || (attr.name == "xmlns" && attr.prefix.to_s.empty?)
|
|
500
|
+
|
|
501
|
+
result << attr
|
|
502
|
+
end
|
|
503
|
+
result
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
def in_scope_namespaces(element)
|
|
507
|
+
namespaces = {}
|
|
508
|
+
element.namespaces.each do |prefix, uri|
|
|
509
|
+
key = prefix.to_s.empty? ? "xmlns" : prefix.to_s
|
|
510
|
+
ns = ::REXML::Attribute.new(key, uri, element)
|
|
511
|
+
namespaces[prefix] = ns
|
|
496
512
|
end
|
|
513
|
+
namespaces.values
|
|
497
514
|
end
|
|
498
515
|
|
|
499
516
|
# Doctype accessor methods
|
data/lib/moxml/attribute.rb
CHANGED
data/lib/moxml/config.rb
CHANGED
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
module Moxml
|
|
4
4
|
class Config
|
|
5
5
|
VALID_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
|
|
6
|
-
DEFAULT_ADAPTER =
|
|
6
|
+
DEFAULT_ADAPTER = :nokogiri
|
|
7
|
+
OPAL_DEFAULT_ADAPTER = :oga
|
|
7
8
|
|
|
8
9
|
# Entity loading modes:
|
|
9
10
|
# - :required - Must load entities, raise error if unavailable (default)
|
|
@@ -20,7 +21,21 @@ module Moxml
|
|
|
20
21
|
end
|
|
21
22
|
|
|
22
23
|
def default_adapter
|
|
23
|
-
@default_adapter ||=
|
|
24
|
+
@default_adapter ||= runtime_default_adapter
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def runtime_default_adapter
|
|
28
|
+
return OPAL_DEFAULT_ADAPTER if RUBY_ENGINE == "opal"
|
|
29
|
+
|
|
30
|
+
detect_loaded_adapter || DEFAULT_ADAPTER
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def detect_loaded_adapter
|
|
34
|
+
return :nokogiri if Object.const_defined?(:Nokogiri)
|
|
35
|
+
return :ox if Object.const_defined?(:Ox)
|
|
36
|
+
return :oga if Object.const_defined?(:Oga)
|
|
37
|
+
|
|
38
|
+
nil
|
|
24
39
|
end
|
|
25
40
|
end
|
|
26
41
|
|
data/lib/moxml/element.rb
CHANGED
|
@@ -46,7 +46,8 @@ module Moxml
|
|
|
46
46
|
end
|
|
47
47
|
|
|
48
48
|
def [](name)
|
|
49
|
-
adapter.get_attribute_value(@native, name)
|
|
49
|
+
val = adapter.get_attribute_value(@native, name)
|
|
50
|
+
val ? adapter.restore_entities(val) : val
|
|
50
51
|
end
|
|
51
52
|
|
|
52
53
|
def attribute(name)
|
|
@@ -54,12 +55,7 @@ module Moxml
|
|
|
54
55
|
native_attr && Attribute.new(native_attr, context)
|
|
55
56
|
end
|
|
56
57
|
|
|
57
|
-
#
|
|
58
|
-
def get(attr_name)
|
|
59
|
-
attribute(attr_name)
|
|
60
|
-
end
|
|
61
|
-
|
|
62
|
-
# Alias for getting attribute value (used by XPath engine)
|
|
58
|
+
# Returns attribute value by name (used by XPath engine)
|
|
63
59
|
def get(attr_name)
|
|
64
60
|
self[attr_name]
|
|
65
61
|
end
|
|
@@ -137,7 +133,8 @@ module Moxml
|
|
|
137
133
|
end
|
|
138
134
|
|
|
139
135
|
def text
|
|
140
|
-
adapter.text_content(@native)
|
|
136
|
+
val = adapter.text_content(@native)
|
|
137
|
+
adapter.restore_entities(val)
|
|
141
138
|
end
|
|
142
139
|
|
|
143
140
|
def text=(content)
|
|
@@ -146,6 +143,13 @@ module Moxml
|
|
|
146
143
|
end
|
|
147
144
|
|
|
148
145
|
def inner_text
|
|
146
|
+
text = raw_inner_text
|
|
147
|
+
adapter.restore_entities(text)
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# Returns inner text without entity marker restoration.
|
|
151
|
+
# Used internally when raw content with markers is needed (e.g., for DOM construction).
|
|
152
|
+
def raw_inner_text
|
|
149
153
|
adapter.inner_text(@native)
|
|
150
154
|
end
|
|
151
155
|
|
data/lib/moxml/node.rb
CHANGED
|
@@ -97,7 +97,10 @@ module Moxml
|
|
|
97
97
|
serialize_options = default_options.merge(options)
|
|
98
98
|
serialize_options[:no_declaration] = !should_include_declaration?(options)
|
|
99
99
|
|
|
100
|
-
adapter.serialize(@native, serialize_options)
|
|
100
|
+
result = adapter.serialize(@native, serialize_options)
|
|
101
|
+
|
|
102
|
+
# Restore entity markers to named entity references
|
|
103
|
+
adapter.restore_entities(result)
|
|
101
104
|
end
|
|
102
105
|
|
|
103
106
|
def xpath(expression, namespaces = {})
|
data/lib/moxml/text.rb
CHANGED
data/lib/moxml/version.rb
CHANGED
data/lib/moxml/xpath/compiler.rb
CHANGED
|
@@ -388,30 +388,38 @@ module Moxml
|
|
|
388
388
|
document_or_node(input).if_true do
|
|
389
389
|
# Create a proper if-else structure that prevents double traversal
|
|
390
390
|
input.is_a?(doc_class).if_true do
|
|
391
|
-
# DOCUMENT PATH: test root, then traverse
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
391
|
+
# DOCUMENT PATH: test document (self), then root, then traverse
|
|
392
|
+
doc_condition = process(ast, input)
|
|
393
|
+
(if block_given?
|
|
394
|
+
doc_condition.if_true { yield input }
|
|
395
|
+
else
|
|
396
|
+
doc_condition.if_true { input }
|
|
397
|
+
end)
|
|
398
|
+
.followed_by do
|
|
399
|
+
root = unique_literal(:root)
|
|
400
|
+
root.assign(input.root).followed_by do
|
|
401
|
+
root.if_true do
|
|
402
|
+
# Test root
|
|
403
|
+
condition = process(ast, root)
|
|
404
|
+
(if block_given?
|
|
405
|
+
condition.if_true { yield root }
|
|
406
|
+
else
|
|
407
|
+
condition.if_true { root }
|
|
408
|
+
end)
|
|
409
|
+
.followed_by do
|
|
410
|
+
# Traverse descendants FROM root only (not document.each_node)
|
|
411
|
+
root.each_node.add_block(node) do
|
|
412
|
+
desc_condition = process(ast, node)
|
|
413
|
+
if block_given?
|
|
414
|
+
desc_condition.if_true { yield node }
|
|
415
|
+
else
|
|
416
|
+
desc_condition.if_true { node }
|
|
417
|
+
end
|
|
418
|
+
end
|
|
410
419
|
end
|
|
411
|
-
end
|
|
412
420
|
end
|
|
421
|
+
end
|
|
413
422
|
end
|
|
414
|
-
end
|
|
415
423
|
end.else do
|
|
416
424
|
# NON-DOCUMENT PATH: test self, then traverse from self
|
|
417
425
|
condition = process(ast, input)
|
|
@@ -497,6 +505,17 @@ module Moxml
|
|
|
497
505
|
element_or_attribute(input)
|
|
498
506
|
end
|
|
499
507
|
|
|
508
|
+
# Handle node type test (node(), text(), comment(), etc.)
|
|
509
|
+
# node() matches any node — always returns truthy
|
|
510
|
+
def on_node_type(ast, input)
|
|
511
|
+
case ast.value
|
|
512
|
+
when "node"
|
|
513
|
+
# node() matches everything — use a truthy literal
|
|
514
|
+
Ruby::Node.new(:lit, ["true"])
|
|
515
|
+
else element_or_attribute(input)
|
|
516
|
+
end
|
|
517
|
+
end
|
|
518
|
+
|
|
500
519
|
# Match element/attribute names and namespaces
|
|
501
520
|
def match_name_and_namespace(ast, input)
|
|
502
521
|
ns = ast.value[:namespace]
|
data/lib/moxml/xpath/parser.rb
CHANGED
|
@@ -311,10 +311,10 @@ module Moxml
|
|
|
311
311
|
return AST::Node.absolute_path(*steps.children)
|
|
312
312
|
elsif match?(:dslash)
|
|
313
313
|
advance
|
|
314
|
-
# Descendant-or-self: //
|
|
314
|
+
# Descendant-or-self: // (expands to /descendant-or-self::node()/)
|
|
315
315
|
steps = parse_relative_path
|
|
316
316
|
return AST::Node.absolute_path(
|
|
317
|
-
AST::Node.axis("descendant-or-self", AST::Node.
|
|
317
|
+
AST::Node.axis("descendant-or-self", AST::Node.node_type("node")),
|
|
318
318
|
*steps.children,
|
|
319
319
|
)
|
|
320
320
|
end
|
|
@@ -330,9 +330,9 @@ module Moxml
|
|
|
330
330
|
while match?(:slash) && !at_end?
|
|
331
331
|
advance
|
|
332
332
|
if match?(:slash)
|
|
333
|
-
# Double slash within path
|
|
333
|
+
# Double slash within path: expands to descendant-or-self::node()
|
|
334
334
|
advance
|
|
335
|
-
steps << AST::Node.axis("descendant-or-self", AST::Node.
|
|
335
|
+
steps << AST::Node.axis("descendant-or-self", AST::Node.node_type("node"))
|
|
336
336
|
end
|
|
337
337
|
steps << parse_step unless at_end? || match?(:pipe, :rbracket,
|
|
338
338
|
:rparen, :comma)
|
|
@@ -352,9 +352,14 @@ module Moxml
|
|
|
352
352
|
return AST::Node.parent
|
|
353
353
|
elsif match?(:at)
|
|
354
354
|
advance
|
|
355
|
-
# Attribute: @name
|
|
356
|
-
|
|
357
|
-
|
|
355
|
+
# Attribute: @name or @*
|
|
356
|
+
if match?(:star)
|
|
357
|
+
advance
|
|
358
|
+
node_test = AST::Node.wildcard
|
|
359
|
+
else
|
|
360
|
+
name = consume(:name, "Expected attribute name after @")
|
|
361
|
+
node_test = AST::Node.test(nil, name[1])
|
|
362
|
+
end
|
|
358
363
|
step = AST::Node.axis("attribute", node_test)
|
|
359
364
|
return parse_predicates(step)
|
|
360
365
|
end
|