moxml 0.1.20 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/opal.yml +37 -0
  3. data/.rspec-opal +5 -0
  4. data/Gemfile +6 -0
  5. data/Rakefile +67 -0
  6. data/lib/compat/opal/rexml/namespace.rb +56 -0
  7. data/lib/compat/opal/rexml/parsers/baseparser.rb +952 -0
  8. data/lib/compat/opal/rexml/source.rb +213 -0
  9. data/lib/compat/opal/rexml/text.rb +418 -0
  10. data/lib/compat/opal/rexml/xmltokens.rb +45 -0
  11. data/lib/compat/opal/rexml_compat.rb +76 -0
  12. data/lib/moxml/adapter/base.rb +5 -0
  13. data/lib/moxml/adapter/customized_libxml/node.rb +3 -0
  14. data/lib/moxml/adapter/customized_libxml/text.rb +6 -1
  15. data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -10
  16. data/lib/moxml/adapter/headed_ox.rb +2 -6
  17. data/lib/moxml/adapter/libxml/entity_ref_registry.rb +105 -0
  18. data/lib/moxml/adapter/libxml/entity_restorer.rb +92 -0
  19. data/lib/moxml/adapter/libxml.rb +386 -382
  20. data/lib/moxml/adapter/nokogiri.rb +7 -18
  21. data/lib/moxml/adapter/oga.rb +4 -22
  22. data/lib/moxml/adapter/ox.rb +8 -23
  23. data/lib/moxml/adapter/rexml.rb +29 -33
  24. data/lib/moxml/adapter.rb +38 -8
  25. data/lib/moxml/config.rb +1 -1
  26. data/lib/moxml/entity_registry.rb +36 -31
  27. data/lib/moxml/entity_registry_opal_data.rb +2137 -0
  28. data/lib/moxml/node.rb +19 -26
  29. data/lib/moxml/sax/namespace_splitter.rb +54 -0
  30. data/lib/moxml/version.rb +1 -1
  31. data/lib/moxml/xml_utils.rb +9 -1
  32. data/spec/consistency/adapter_parity_spec.rb +1 -1
  33. data/spec/integration/all_adapters_spec.rb +1 -1
  34. data/spec/integration/w3c_namespace_spec.rb +1 -1
  35. data/spec/moxml/adapter/libxml_internals_spec.rb +167 -0
  36. data/spec/moxml/adapter/ox_spec.rb +8 -0
  37. data/spec/moxml/adapter/platform_spec.rb +69 -0
  38. data/spec/moxml/adapter/shared_examples/adapter_contract.rb +0 -6
  39. data/spec/moxml/entity_registry_spec.rb +10 -0
  40. data/spec/moxml/native_attachment/opal_spec.rb +39 -2
  41. data/spec/moxml/node_type_map_spec.rb +43 -0
  42. data/spec/moxml/opal_rexml_adapter_spec.rb +14 -0
  43. data/spec/moxml/opal_smoke_spec.rb +61 -0
  44. data/spec/moxml/sax/namespace_splitter_spec.rb +67 -0
  45. data/spec/moxml/text_spec.rb +1 -1
  46. data/spec/performance/benchmark_spec.rb +1 -1
  47. data/spec/spec_helper.rb +32 -13
  48. data/spec/support/opal.rb +16 -0
  49. metadata +21 -2
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+ # backtick_javascript: true
3
+
4
+ require 'corelib/array/pack'
5
+
6
+ unless defined?(StringScanner::Version)
7
+ class StringScanner
8
+ Version = "3.0.8"
9
+ end
10
+ end
11
+
12
+ unless String.method_defined?(:force_encoding)
13
+ class String
14
+ def force_encoding(*)
15
+ self
16
+ end
17
+ end
18
+ end
19
+
20
+ unless defined?(::Encoding)
21
+ module ::Encoding
22
+ UTF_8 = "UTF-8"
23
+ ASCII_8BIT = "ASCII-8BIT"
24
+ end
25
+ end
26
+
27
+ unless String.method_defined?(:encode)
28
+ class String
29
+ def encode(*)
30
+ self
31
+ end
32
+ end
33
+ end
34
+
35
+ # Opal defines mutable String methods as raising NotImplementedError.
36
+ # Override with functional equivalents that return new strings.
37
+ class String
38
+ def <<(str)
39
+ %x{return self + #{str}.to_s}
40
+ end
41
+
42
+ def chomp!(sep = nil)
43
+ %x{
44
+ var r = #{chomp(sep)};
45
+ return r === self ? nil : r;
46
+ }
47
+ end
48
+
49
+ def gsub!(pattern, replacement, &block)
50
+ %x{
51
+ var r = #{gsub(pattern, replacement, &block)};
52
+ return r === self ? nil : r;
53
+ }
54
+ end
55
+
56
+ def squeeze!(*sets)
57
+ %x{
58
+ var r = #{squeeze(*sets)};
59
+ return r === self ? nil : r;
60
+ }
61
+ end
62
+
63
+ def strip!
64
+ %x{
65
+ var r = #{strip};
66
+ return r === self ? nil : r;
67
+ }
68
+ end
69
+ end
70
+
71
+ class StringIO
72
+ def <<(str)
73
+ write(str)
74
+ self
75
+ end
76
+ end
@@ -46,6 +46,11 @@ module Moxml
46
46
  else
47
47
  xml.encode("UTF-8")
48
48
  end
49
+ # Fast path: no `&` means no entity references to mark — skip
50
+ # the regex scan and string allocation entirely. The vast
51
+ # majority of XML payloads contain no entity references.
52
+ return str unless str.include?("&")
53
+
49
54
  str.gsub(ENTITY_NAME_RE) do |match|
50
55
  STANDARD_ENTITIES.include?(::Regexp.last_match(1)) ? match : "#{ENTITY_MARKER}#{::Regexp.last_match(1)};"
51
56
  end
@@ -8,6 +8,9 @@ module Moxml
8
8
  # This wrapper hides LibXML's strict document ownership model,
9
9
  # allowing nodes to be moved between documents transparently.
10
10
  # Similar pattern to Ox adapter's customized classes.
11
+ #
12
+ # The Libxml adapter owns wrapper type mapping in one place so the
13
+ # wrapper classes do not duplicate node-type knowledge.
11
14
  class Node
12
15
  attr_reader :native
13
16
 
@@ -19,7 +19,12 @@ module Moxml
19
19
  # LibXML's .content already contains escaped text, but it over-escapes
20
20
  # quotes which don't need escaping in text nodes (only in attributes)
21
21
  def to_xml
22
- @native.content.gsub("&quot;", '"')
22
+ content = @native.content
23
+ # Skip the gsub allocation entirely when there's nothing to undo —
24
+ # the common case for parsed text without literal quotes.
25
+ return content unless content.include?("&quot;")
26
+
27
+ content.gsub("&quot;", '"')
23
28
  end
24
29
  end
25
30
  end
@@ -27,8 +27,11 @@ module Moxml
27
27
  end
28
28
  end
29
29
 
30
+ def indented?
31
+ !@indentation.empty?
32
+ end
33
+
30
34
  def write_element(node, output)
31
- # output << ' ' * @level
32
35
  output << "<#{node.expanded_name}"
33
36
  write_attributes(node, output)
34
37
 
@@ -45,18 +48,16 @@ module Moxml
45
48
 
46
49
  output << ">"
47
50
 
48
- # Check for mixed content
49
51
  has_text = node.children.any? { |c| c.is_a?(::REXML::Text) && !c.to_s.strip.empty? }
50
52
  has_elements = node.children.any?(::REXML::Element)
51
- mixed = has_text && has_elements
53
+ indent_children = indented? && has_elements && !has_text
52
54
 
53
55
  # Handle children based on content type
54
56
  all_children_empty = node.children.empty? && !(entity_refs && !entity_refs.empty?)
55
57
  unless all_children_empty
56
- @level += @indentation.length unless mixed
58
+ @level += @indentation.length if indent_children
57
59
 
58
60
  if entity_refs && !entity_refs.empty? && child_sequence
59
- # Interleave native children with entity refs using tracked sequence
60
61
  eref_idx = 0
61
62
  native_idx = 0
62
63
  child_sequence.each do |type|
@@ -69,10 +70,12 @@ module Moxml
69
70
  child.to_s.strip.empty? &&
70
71
  !(child.next_sibling.nil? && child.previous_sibling.nil?)
71
72
 
73
+ output << "\n" << (' ' * @level) if indent_children
72
74
  write(child, output)
73
75
  end
74
76
  when :eref
75
77
  if eref_idx < entity_refs.size
78
+ output << "\n" << (' ' * @level) if indent_children
76
79
  write(entity_refs[eref_idx], output)
77
80
  eref_idx += 1
78
81
  end
@@ -80,24 +83,22 @@ module Moxml
80
83
  end
81
84
  else
82
85
  node.children.each_with_index do |child, _index|
83
- # Skip insignificant whitespace
84
86
  next if child.is_a?(::REXML::Text) &&
85
87
  child.to_s.strip.empty? &&
86
88
  !(child.next_sibling.nil? && child.previous_sibling.nil?)
87
89
 
90
+ output << "\n" << (' ' * @level) if indent_children
88
91
  write(child, output)
89
92
  end
90
93
  end
91
94
 
92
- # Reset indentation for closing tag in non-mixed content
93
- unless mixed
95
+ if indent_children
94
96
  @level -= @indentation.length
95
- # output << ' ' * @level
97
+ output << "\n" << (' ' * @level)
96
98
  end
97
99
  end
98
100
 
99
101
  output << "</#{node.expanded_name}>"
100
- # output << "\n" unless mixed
101
102
  end
102
103
 
103
104
  def write_text(node, output)
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ return if RUBY_ENGINE == "opal"
4
+
3
5
  require_relative "ox"
4
6
  require_relative "../xpath"
5
7
  # Force load XPath modules (autoload doesn't work well with relative requires in examples)
@@ -66,14 +68,8 @@ module Moxml
66
68
  # @param [Hash] namespaces Namespace prefix mappings
67
69
  # @return [Array, Object] Native node array or scalar value
68
70
  def xpath(node, expression, namespaces = {})
69
- # If we receive a native node, wrap it first
70
- # Document#xpath passes @native, but our compiled XPath needs Moxml nodes
71
71
  unless node.is_a?(Moxml::Node)
72
- # Determine the context from the node if possible
73
- # For now, create a basic context for wrapped nodes
74
72
  ctx = Context.new(:headed_ox)
75
-
76
- # Wrap the native node - don't rebuild the whole document
77
73
  node = Moxml::Node.wrap(node, ctx)
78
74
  end
79
75
 
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module Adapter
5
+ class Libxml < Base
6
+ # Tracks entity-reference insertions that cannot live in LibXML's native
7
+ # node tree, plus the child sequence needed to serialize them in order.
8
+ class EntityRefRegistry
9
+ ENTITY_REFS_KEY = :_entity_ref_pairs
10
+ CHILD_SEQUENCE_KEY = :_child_seq_pairs
11
+ NON_WHITESPACE_RE = /\S/
12
+ private_constant :ENTITY_REFS_KEY, :CHILD_SEQUENCE_KEY, :NON_WHITESPACE_RE
13
+
14
+ def initialize(attachments, doc)
15
+ @attachments = attachments
16
+ @doc = doc
17
+ end
18
+
19
+ def active?
20
+ @doc ? @attachments.key?(@doc, ENTITY_REFS_KEY) : false
21
+ end
22
+
23
+ def register(element, ref)
24
+ return unless @doc && element
25
+
26
+ path = path_for(element)
27
+
28
+ refs_by_path = @attachments.get(@doc, ENTITY_REFS_KEY) || {}
29
+ (refs_by_path[path] ||= []) << ref
30
+ @attachments.set(@doc, ENTITY_REFS_KEY, refs_by_path)
31
+
32
+ seq_by_path = @attachments.get(@doc, CHILD_SEQUENCE_KEY) || {}
33
+ existing = seq_by_path[path]
34
+ if existing
35
+ existing << :eref
36
+ else
37
+ seq_by_path[path] = Array.new(count_native_children(element), :native)
38
+ seq_by_path[path] << :eref
39
+ @attachments.set(@doc, CHILD_SEQUENCE_KEY, seq_by_path)
40
+ end
41
+ end
42
+
43
+ def append_native(element)
44
+ return unless @doc && element
45
+
46
+ seq_by_path = @attachments.get(@doc, CHILD_SEQUENCE_KEY)
47
+ return unless seq_by_path
48
+
49
+ seq = seq_by_path[path_for(element)]
50
+ return unless seq
51
+
52
+ seq << :native
53
+ end
54
+
55
+ def refs_for(element)
56
+ return nil unless @doc && element
57
+
58
+ refs_by_path = @attachments.get(@doc, ENTITY_REFS_KEY)
59
+ refs_by_path && refs_by_path[path_for(element)]
60
+ end
61
+
62
+ def sequence_for(element)
63
+ return nil unless @doc && element
64
+
65
+ seq_by_path = @attachments.get(@doc, CHILD_SEQUENCE_KEY)
66
+ seq_by_path && seq_by_path[path_for(element)]
67
+ end
68
+
69
+ def serialization_for(element)
70
+ refs = refs_for(element)
71
+ return [nil, nil] unless refs && !refs.empty?
72
+
73
+ seq = sequence_for(element)
74
+ return [nil, nil] unless seq
75
+
76
+ [refs, seq]
77
+ end
78
+
79
+ private
80
+
81
+ def path_for(element)
82
+ element.path
83
+ end
84
+
85
+ def count_native_children(element)
86
+ return 0 unless element.is_a?(::LibXML::XML::Node) && element.children?
87
+
88
+ count = 0
89
+ element.each_child do |child|
90
+ count += 1 unless blank_text_node?(child)
91
+ end
92
+ count
93
+ end
94
+
95
+ def blank_text_node?(child)
96
+ child.text? && blank_content?(child.content)
97
+ end
98
+
99
+ def blank_content?(content)
100
+ content.nil? || !content.match?(NON_WHITESPACE_RE)
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Moxml
4
+ module Adapter
5
+ class Libxml < Base
6
+ # Restores configured character entities into explicit Moxml
7
+ # EntityReference nodes after LibXML has parsed the native tree.
8
+ class EntityRestorer
9
+ def initialize(doc, adapter: Libxml)
10
+ @doc = doc
11
+ @ctx = doc.context
12
+ @registry = @ctx.entity_registry
13
+ @config = @ctx.config
14
+ @adapter = adapter
15
+ end
16
+
17
+ def run
18
+ return unless @registry && @doc.root
19
+
20
+ walk(@doc.root)
21
+ end
22
+
23
+ private
24
+
25
+ def walk(element)
26
+ # Snapshot because we may add/remove siblings during the walk.
27
+ element.children.to_a.each do |child|
28
+ if child.is_a?(::Moxml::Text)
29
+ restore_text_node(child)
30
+ elsif child.is_a?(::Moxml::Element)
31
+ walk(child)
32
+ end
33
+ end
34
+ end
35
+
36
+ # Matches DocumentBuilder's previous behavior, including the libxml
37
+ # limitation that adjacent native text nodes get merged.
38
+ def restore_text_node(text_node)
39
+ content = text_node.content
40
+ return unless content
41
+
42
+ chunks = chunk_text(content)
43
+ return if chunks.size == 1 && chunks.first.first == :text
44
+
45
+ parent = text_node.parent
46
+ return unless parent
47
+
48
+ text_node.remove
49
+ chunks.each { |type, payload| append_chunk(parent, type, payload) }
50
+ end
51
+
52
+ def chunk_text(content)
53
+ chunks = []
54
+ buffer = +""
55
+ restorable = @registry.restorable_codepoints
56
+
57
+ content.each_char do |char|
58
+ cp = char.ord
59
+ if restorable.include?(cp) &&
60
+ (name = @registry.primary_name_for_codepoint(cp)) &&
61
+ @registry.should_restore?(cp, config: @config)
62
+ unless buffer.empty?
63
+ chunks << [:text, buffer.dup]
64
+ buffer.clear
65
+ end
66
+ chunks << [:eref, name]
67
+ else
68
+ buffer << char
69
+ end
70
+ end
71
+
72
+ chunks << [:text, buffer.dup] unless buffer.empty?
73
+ chunks
74
+ end
75
+
76
+ def append_chunk(parent, type, payload)
77
+ case type
78
+ when :text
79
+ parent.add_child(::Moxml::Text.new(@adapter.create_native_text(payload), @ctx))
80
+ when :eref
81
+ parent.add_child(
82
+ ::Moxml::EntityReference.new(
83
+ @adapter.create_native_entity_reference(payload),
84
+ @ctx,
85
+ ),
86
+ )
87
+ end
88
+ end
89
+ end
90
+ end
91
+ end
92
+ end