moxml 0.1.20 → 0.1.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/opal.yml +37 -0
- data/.rspec-opal +5 -0
- data/Gemfile +6 -0
- data/Rakefile +67 -0
- data/lib/compat/opal/rexml/namespace.rb +56 -0
- data/lib/compat/opal/rexml/parsers/baseparser.rb +952 -0
- data/lib/compat/opal/rexml/source.rb +213 -0
- data/lib/compat/opal/rexml/text.rb +418 -0
- data/lib/compat/opal/rexml/xmltokens.rb +45 -0
- data/lib/compat/opal/rexml_compat.rb +76 -0
- data/lib/moxml/adapter/base.rb +5 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +3 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +6 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -10
- data/lib/moxml/adapter/headed_ox.rb +2 -6
- data/lib/moxml/adapter/libxml/entity_ref_registry.rb +105 -0
- data/lib/moxml/adapter/libxml/entity_restorer.rb +92 -0
- data/lib/moxml/adapter/libxml.rb +386 -382
- data/lib/moxml/adapter/nokogiri.rb +7 -18
- data/lib/moxml/adapter/oga.rb +4 -22
- data/lib/moxml/adapter/ox.rb +8 -23
- data/lib/moxml/adapter/rexml.rb +29 -33
- data/lib/moxml/adapter.rb +38 -8
- data/lib/moxml/config.rb +1 -1
- data/lib/moxml/entity_registry.rb +36 -31
- data/lib/moxml/entity_registry_opal_data.rb +2137 -0
- data/lib/moxml/node.rb +19 -26
- data/lib/moxml/sax/namespace_splitter.rb +54 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +9 -1
- data/spec/consistency/adapter_parity_spec.rb +1 -1
- data/spec/integration/all_adapters_spec.rb +1 -1
- data/spec/integration/w3c_namespace_spec.rb +1 -1
- data/spec/moxml/adapter/libxml_internals_spec.rb +167 -0
- data/spec/moxml/adapter/ox_spec.rb +8 -0
- data/spec/moxml/adapter/platform_spec.rb +69 -0
- data/spec/moxml/adapter/shared_examples/adapter_contract.rb +0 -6
- data/spec/moxml/entity_registry_spec.rb +10 -0
- data/spec/moxml/native_attachment/opal_spec.rb +39 -2
- data/spec/moxml/node_type_map_spec.rb +43 -0
- data/spec/moxml/opal_rexml_adapter_spec.rb +14 -0
- data/spec/moxml/opal_smoke_spec.rb +61 -0
- data/spec/moxml/sax/namespace_splitter_spec.rb +67 -0
- data/spec/moxml/text_spec.rb +1 -1
- data/spec/performance/benchmark_spec.rb +1 -1
- data/spec/spec_helper.rb +32 -13
- data/spec/support/opal.rb +16 -0
- metadata +21 -2
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
# backtick_javascript: true
|
|
3
|
+
|
|
4
|
+
require 'corelib/array/pack'
|
|
5
|
+
|
|
6
|
+
unless defined?(StringScanner::Version)
|
|
7
|
+
class StringScanner
|
|
8
|
+
Version = "3.0.8"
|
|
9
|
+
end
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
unless String.method_defined?(:force_encoding)
|
|
13
|
+
class String
|
|
14
|
+
def force_encoding(*)
|
|
15
|
+
self
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
unless defined?(::Encoding)
|
|
21
|
+
module ::Encoding
|
|
22
|
+
UTF_8 = "UTF-8"
|
|
23
|
+
ASCII_8BIT = "ASCII-8BIT"
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
unless String.method_defined?(:encode)
|
|
28
|
+
class String
|
|
29
|
+
def encode(*)
|
|
30
|
+
self
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Opal defines mutable String methods as raising NotImplementedError.
|
|
36
|
+
# Override with functional equivalents that return new strings.
|
|
37
|
+
class String
|
|
38
|
+
def <<(str)
|
|
39
|
+
%x{return self + #{str}.to_s}
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def chomp!(sep = nil)
|
|
43
|
+
%x{
|
|
44
|
+
var r = #{chomp(sep)};
|
|
45
|
+
return r === self ? nil : r;
|
|
46
|
+
}
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def gsub!(pattern, replacement, &block)
|
|
50
|
+
%x{
|
|
51
|
+
var r = #{gsub(pattern, replacement, &block)};
|
|
52
|
+
return r === self ? nil : r;
|
|
53
|
+
}
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def squeeze!(*sets)
|
|
57
|
+
%x{
|
|
58
|
+
var r = #{squeeze(*sets)};
|
|
59
|
+
return r === self ? nil : r;
|
|
60
|
+
}
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def strip!
|
|
64
|
+
%x{
|
|
65
|
+
var r = #{strip};
|
|
66
|
+
return r === self ? nil : r;
|
|
67
|
+
}
|
|
68
|
+
end
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
class StringIO
|
|
72
|
+
def <<(str)
|
|
73
|
+
write(str)
|
|
74
|
+
self
|
|
75
|
+
end
|
|
76
|
+
end
|
data/lib/moxml/adapter/base.rb
CHANGED
|
@@ -46,6 +46,11 @@ module Moxml
|
|
|
46
46
|
else
|
|
47
47
|
xml.encode("UTF-8")
|
|
48
48
|
end
|
|
49
|
+
# Fast path: no `&` means no entity references to mark — skip
|
|
50
|
+
# the regex scan and string allocation entirely. The vast
|
|
51
|
+
# majority of XML payloads contain no entity references.
|
|
52
|
+
return str unless str.include?("&")
|
|
53
|
+
|
|
49
54
|
str.gsub(ENTITY_NAME_RE) do |match|
|
|
50
55
|
STANDARD_ENTITIES.include?(::Regexp.last_match(1)) ? match : "#{ENTITY_MARKER}#{::Regexp.last_match(1)};"
|
|
51
56
|
end
|
|
@@ -8,6 +8,9 @@ module Moxml
|
|
|
8
8
|
# This wrapper hides LibXML's strict document ownership model,
|
|
9
9
|
# allowing nodes to be moved between documents transparently.
|
|
10
10
|
# Similar pattern to Ox adapter's customized classes.
|
|
11
|
+
#
|
|
12
|
+
# The Libxml adapter owns wrapper type mapping in one place so the
|
|
13
|
+
# wrapper classes do not duplicate node-type knowledge.
|
|
11
14
|
class Node
|
|
12
15
|
attr_reader :native
|
|
13
16
|
|
|
@@ -19,7 +19,12 @@ module Moxml
|
|
|
19
19
|
# LibXML's .content already contains escaped text, but it over-escapes
|
|
20
20
|
# quotes which don't need escaping in text nodes (only in attributes)
|
|
21
21
|
def to_xml
|
|
22
|
-
@native.content
|
|
22
|
+
content = @native.content
|
|
23
|
+
# Skip the gsub allocation entirely when there's nothing to undo —
|
|
24
|
+
# the common case for parsed text without literal quotes.
|
|
25
|
+
return content unless content.include?(""")
|
|
26
|
+
|
|
27
|
+
content.gsub(""", '"')
|
|
23
28
|
end
|
|
24
29
|
end
|
|
25
30
|
end
|
|
@@ -27,8 +27,11 @@ module Moxml
|
|
|
27
27
|
end
|
|
28
28
|
end
|
|
29
29
|
|
|
30
|
+
def indented?
|
|
31
|
+
!@indentation.empty?
|
|
32
|
+
end
|
|
33
|
+
|
|
30
34
|
def write_element(node, output)
|
|
31
|
-
# output << ' ' * @level
|
|
32
35
|
output << "<#{node.expanded_name}"
|
|
33
36
|
write_attributes(node, output)
|
|
34
37
|
|
|
@@ -45,18 +48,16 @@ module Moxml
|
|
|
45
48
|
|
|
46
49
|
output << ">"
|
|
47
50
|
|
|
48
|
-
# Check for mixed content
|
|
49
51
|
has_text = node.children.any? { |c| c.is_a?(::REXML::Text) && !c.to_s.strip.empty? }
|
|
50
52
|
has_elements = node.children.any?(::REXML::Element)
|
|
51
|
-
|
|
53
|
+
indent_children = indented? && has_elements && !has_text
|
|
52
54
|
|
|
53
55
|
# Handle children based on content type
|
|
54
56
|
all_children_empty = node.children.empty? && !(entity_refs && !entity_refs.empty?)
|
|
55
57
|
unless all_children_empty
|
|
56
|
-
@level += @indentation.length
|
|
58
|
+
@level += @indentation.length if indent_children
|
|
57
59
|
|
|
58
60
|
if entity_refs && !entity_refs.empty? && child_sequence
|
|
59
|
-
# Interleave native children with entity refs using tracked sequence
|
|
60
61
|
eref_idx = 0
|
|
61
62
|
native_idx = 0
|
|
62
63
|
child_sequence.each do |type|
|
|
@@ -69,10 +70,12 @@ module Moxml
|
|
|
69
70
|
child.to_s.strip.empty? &&
|
|
70
71
|
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
|
71
72
|
|
|
73
|
+
output << "\n" << (' ' * @level) if indent_children
|
|
72
74
|
write(child, output)
|
|
73
75
|
end
|
|
74
76
|
when :eref
|
|
75
77
|
if eref_idx < entity_refs.size
|
|
78
|
+
output << "\n" << (' ' * @level) if indent_children
|
|
76
79
|
write(entity_refs[eref_idx], output)
|
|
77
80
|
eref_idx += 1
|
|
78
81
|
end
|
|
@@ -80,24 +83,22 @@ module Moxml
|
|
|
80
83
|
end
|
|
81
84
|
else
|
|
82
85
|
node.children.each_with_index do |child, _index|
|
|
83
|
-
# Skip insignificant whitespace
|
|
84
86
|
next if child.is_a?(::REXML::Text) &&
|
|
85
87
|
child.to_s.strip.empty? &&
|
|
86
88
|
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
|
87
89
|
|
|
90
|
+
output << "\n" << (' ' * @level) if indent_children
|
|
88
91
|
write(child, output)
|
|
89
92
|
end
|
|
90
93
|
end
|
|
91
94
|
|
|
92
|
-
|
|
93
|
-
unless mixed
|
|
95
|
+
if indent_children
|
|
94
96
|
@level -= @indentation.length
|
|
95
|
-
|
|
97
|
+
output << "\n" << (' ' * @level)
|
|
96
98
|
end
|
|
97
99
|
end
|
|
98
100
|
|
|
99
101
|
output << "</#{node.expanded_name}>"
|
|
100
|
-
# output << "\n" unless mixed
|
|
101
102
|
end
|
|
102
103
|
|
|
103
104
|
def write_text(node, output)
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
return if RUBY_ENGINE == "opal"
|
|
4
|
+
|
|
3
5
|
require_relative "ox"
|
|
4
6
|
require_relative "../xpath"
|
|
5
7
|
# Force load XPath modules (autoload doesn't work well with relative requires in examples)
|
|
@@ -66,14 +68,8 @@ module Moxml
|
|
|
66
68
|
# @param [Hash] namespaces Namespace prefix mappings
|
|
67
69
|
# @return [Array, Object] Native node array or scalar value
|
|
68
70
|
def xpath(node, expression, namespaces = {})
|
|
69
|
-
# If we receive a native node, wrap it first
|
|
70
|
-
# Document#xpath passes @native, but our compiled XPath needs Moxml nodes
|
|
71
71
|
unless node.is_a?(Moxml::Node)
|
|
72
|
-
# Determine the context from the node if possible
|
|
73
|
-
# For now, create a basic context for wrapped nodes
|
|
74
72
|
ctx = Context.new(:headed_ox)
|
|
75
|
-
|
|
76
|
-
# Wrap the native node - don't rebuild the whole document
|
|
77
73
|
node = Moxml::Node.wrap(node, ctx)
|
|
78
74
|
end
|
|
79
75
|
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module Adapter
|
|
5
|
+
class Libxml < Base
|
|
6
|
+
# Tracks entity-reference insertions that cannot live in LibXML's native
|
|
7
|
+
# node tree, plus the child sequence needed to serialize them in order.
|
|
8
|
+
class EntityRefRegistry
|
|
9
|
+
ENTITY_REFS_KEY = :_entity_ref_pairs
|
|
10
|
+
CHILD_SEQUENCE_KEY = :_child_seq_pairs
|
|
11
|
+
NON_WHITESPACE_RE = /\S/
|
|
12
|
+
private_constant :ENTITY_REFS_KEY, :CHILD_SEQUENCE_KEY, :NON_WHITESPACE_RE
|
|
13
|
+
|
|
14
|
+
def initialize(attachments, doc)
|
|
15
|
+
@attachments = attachments
|
|
16
|
+
@doc = doc
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def active?
|
|
20
|
+
@doc ? @attachments.key?(@doc, ENTITY_REFS_KEY) : false
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def register(element, ref)
|
|
24
|
+
return unless @doc && element
|
|
25
|
+
|
|
26
|
+
path = path_for(element)
|
|
27
|
+
|
|
28
|
+
refs_by_path = @attachments.get(@doc, ENTITY_REFS_KEY) || {}
|
|
29
|
+
(refs_by_path[path] ||= []) << ref
|
|
30
|
+
@attachments.set(@doc, ENTITY_REFS_KEY, refs_by_path)
|
|
31
|
+
|
|
32
|
+
seq_by_path = @attachments.get(@doc, CHILD_SEQUENCE_KEY) || {}
|
|
33
|
+
existing = seq_by_path[path]
|
|
34
|
+
if existing
|
|
35
|
+
existing << :eref
|
|
36
|
+
else
|
|
37
|
+
seq_by_path[path] = Array.new(count_native_children(element), :native)
|
|
38
|
+
seq_by_path[path] << :eref
|
|
39
|
+
@attachments.set(@doc, CHILD_SEQUENCE_KEY, seq_by_path)
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def append_native(element)
|
|
44
|
+
return unless @doc && element
|
|
45
|
+
|
|
46
|
+
seq_by_path = @attachments.get(@doc, CHILD_SEQUENCE_KEY)
|
|
47
|
+
return unless seq_by_path
|
|
48
|
+
|
|
49
|
+
seq = seq_by_path[path_for(element)]
|
|
50
|
+
return unless seq
|
|
51
|
+
|
|
52
|
+
seq << :native
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def refs_for(element)
|
|
56
|
+
return nil unless @doc && element
|
|
57
|
+
|
|
58
|
+
refs_by_path = @attachments.get(@doc, ENTITY_REFS_KEY)
|
|
59
|
+
refs_by_path && refs_by_path[path_for(element)]
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def sequence_for(element)
|
|
63
|
+
return nil unless @doc && element
|
|
64
|
+
|
|
65
|
+
seq_by_path = @attachments.get(@doc, CHILD_SEQUENCE_KEY)
|
|
66
|
+
seq_by_path && seq_by_path[path_for(element)]
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def serialization_for(element)
|
|
70
|
+
refs = refs_for(element)
|
|
71
|
+
return [nil, nil] unless refs && !refs.empty?
|
|
72
|
+
|
|
73
|
+
seq = sequence_for(element)
|
|
74
|
+
return [nil, nil] unless seq
|
|
75
|
+
|
|
76
|
+
[refs, seq]
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
private
|
|
80
|
+
|
|
81
|
+
def path_for(element)
|
|
82
|
+
element.path
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def count_native_children(element)
|
|
86
|
+
return 0 unless element.is_a?(::LibXML::XML::Node) && element.children?
|
|
87
|
+
|
|
88
|
+
count = 0
|
|
89
|
+
element.each_child do |child|
|
|
90
|
+
count += 1 unless blank_text_node?(child)
|
|
91
|
+
end
|
|
92
|
+
count
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def blank_text_node?(child)
|
|
96
|
+
child.text? && blank_content?(child.content)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def blank_content?(content)
|
|
100
|
+
content.nil? || !content.match?(NON_WHITESPACE_RE)
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
end
|
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Moxml
|
|
4
|
+
module Adapter
|
|
5
|
+
class Libxml < Base
|
|
6
|
+
# Restores configured character entities into explicit Moxml
|
|
7
|
+
# EntityReference nodes after LibXML has parsed the native tree.
|
|
8
|
+
class EntityRestorer
|
|
9
|
+
def initialize(doc, adapter: Libxml)
|
|
10
|
+
@doc = doc
|
|
11
|
+
@ctx = doc.context
|
|
12
|
+
@registry = @ctx.entity_registry
|
|
13
|
+
@config = @ctx.config
|
|
14
|
+
@adapter = adapter
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def run
|
|
18
|
+
return unless @registry && @doc.root
|
|
19
|
+
|
|
20
|
+
walk(@doc.root)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
private
|
|
24
|
+
|
|
25
|
+
def walk(element)
|
|
26
|
+
# Snapshot because we may add/remove siblings during the walk.
|
|
27
|
+
element.children.to_a.each do |child|
|
|
28
|
+
if child.is_a?(::Moxml::Text)
|
|
29
|
+
restore_text_node(child)
|
|
30
|
+
elsif child.is_a?(::Moxml::Element)
|
|
31
|
+
walk(child)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Matches DocumentBuilder's previous behavior, including the libxml
|
|
37
|
+
# limitation that adjacent native text nodes get merged.
|
|
38
|
+
def restore_text_node(text_node)
|
|
39
|
+
content = text_node.content
|
|
40
|
+
return unless content
|
|
41
|
+
|
|
42
|
+
chunks = chunk_text(content)
|
|
43
|
+
return if chunks.size == 1 && chunks.first.first == :text
|
|
44
|
+
|
|
45
|
+
parent = text_node.parent
|
|
46
|
+
return unless parent
|
|
47
|
+
|
|
48
|
+
text_node.remove
|
|
49
|
+
chunks.each { |type, payload| append_chunk(parent, type, payload) }
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def chunk_text(content)
|
|
53
|
+
chunks = []
|
|
54
|
+
buffer = +""
|
|
55
|
+
restorable = @registry.restorable_codepoints
|
|
56
|
+
|
|
57
|
+
content.each_char do |char|
|
|
58
|
+
cp = char.ord
|
|
59
|
+
if restorable.include?(cp) &&
|
|
60
|
+
(name = @registry.primary_name_for_codepoint(cp)) &&
|
|
61
|
+
@registry.should_restore?(cp, config: @config)
|
|
62
|
+
unless buffer.empty?
|
|
63
|
+
chunks << [:text, buffer.dup]
|
|
64
|
+
buffer.clear
|
|
65
|
+
end
|
|
66
|
+
chunks << [:eref, name]
|
|
67
|
+
else
|
|
68
|
+
buffer << char
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
chunks << [:text, buffer.dup] unless buffer.empty?
|
|
73
|
+
chunks
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def append_chunk(parent, type, payload)
|
|
77
|
+
case type
|
|
78
|
+
when :text
|
|
79
|
+
parent.add_child(::Moxml::Text.new(@adapter.create_native_text(payload), @ctx))
|
|
80
|
+
when :eref
|
|
81
|
+
parent.add_child(
|
|
82
|
+
::Moxml::EntityReference.new(
|
|
83
|
+
@adapter.create_native_entity_reference(payload),
|
|
84
|
+
@ctx,
|
|
85
|
+
),
|
|
86
|
+
)
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
end
|