moxml 0.1.21 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/opal.yml +37 -0
  3. data/.rspec-opal +5 -0
  4. data/Gemfile +6 -0
  5. data/Rakefile +67 -0
  6. data/lib/compat/opal/rexml/namespace.rb +56 -0
  7. data/lib/compat/opal/rexml/parsers/baseparser.rb +952 -0
  8. data/lib/compat/opal/rexml/source.rb +213 -0
  9. data/lib/compat/opal/rexml/text.rb +418 -0
  10. data/lib/compat/opal/rexml/xmltokens.rb +45 -0
  11. data/lib/compat/opal/rexml_compat.rb +76 -0
  12. data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -10
  13. data/lib/moxml/adapter/headed_ox.rb +2 -6
  14. data/lib/moxml/adapter/libxml.rb +5 -20
  15. data/lib/moxml/adapter/nokogiri.rb +7 -18
  16. data/lib/moxml/adapter/oga.rb +4 -22
  17. data/lib/moxml/adapter/ox.rb +8 -23
  18. data/lib/moxml/adapter/rexml.rb +29 -33
  19. data/lib/moxml/adapter.rb +38 -8
  20. data/lib/moxml/config.rb +1 -1
  21. data/lib/moxml/entity_registry.rb +36 -31
  22. data/lib/moxml/entity_registry_opal_data.rb +2137 -0
  23. data/lib/moxml/node.rb +19 -26
  24. data/lib/moxml/sax/namespace_splitter.rb +54 -0
  25. data/lib/moxml/version.rb +1 -1
  26. data/lib/moxml/xml_utils.rb +9 -1
  27. data/spec/consistency/adapter_parity_spec.rb +1 -1
  28. data/spec/integration/all_adapters_spec.rb +1 -1
  29. data/spec/integration/w3c_namespace_spec.rb +1 -1
  30. data/spec/moxml/adapter/ox_spec.rb +8 -0
  31. data/spec/moxml/adapter/platform_spec.rb +69 -0
  32. data/spec/moxml/adapter/shared_examples/adapter_contract.rb +0 -6
  33. data/spec/moxml/entity_registry_spec.rb +10 -0
  34. data/spec/moxml/native_attachment/opal_spec.rb +39 -2
  35. data/spec/moxml/node_type_map_spec.rb +43 -0
  36. data/spec/moxml/opal_rexml_adapter_spec.rb +14 -0
  37. data/spec/moxml/opal_smoke_spec.rb +61 -0
  38. data/spec/moxml/sax/namespace_splitter_spec.rb +67 -0
  39. data/spec/moxml/text_spec.rb +1 -1
  40. data/spec/spec_helper.rb +32 -13
  41. data/spec/support/opal.rb +16 -0
  42. metadata +17 -1
@@ -0,0 +1,76 @@
1
+ # frozen_string_literal: true
2
+ # backtick_javascript: true
3
+
4
+ require 'corelib/array/pack'
5
+
6
+ unless defined?(StringScanner::Version)
7
+ class StringScanner
8
+ Version = "3.0.8"
9
+ end
10
+ end
11
+
12
+ unless String.method_defined?(:force_encoding)
13
+ class String
14
+ def force_encoding(*)
15
+ self
16
+ end
17
+ end
18
+ end
19
+
20
+ unless defined?(::Encoding)
21
+ module ::Encoding
22
+ UTF_8 = "UTF-8"
23
+ ASCII_8BIT = "ASCII-8BIT"
24
+ end
25
+ end
26
+
27
+ unless String.method_defined?(:encode)
28
+ class String
29
+ def encode(*)
30
+ self
31
+ end
32
+ end
33
+ end
34
+
35
+ # Opal defines mutable String methods as raising NotImplementedError.
36
+ # Override with functional equivalents that return new strings.
37
+ class String
38
+ def <<(str)
39
+ %x{return self + #{str}.to_s}
40
+ end
41
+
42
+ def chomp!(sep = nil)
43
+ %x{
44
+ var r = #{chomp(sep)};
45
+ return r === self ? nil : r;
46
+ }
47
+ end
48
+
49
+ def gsub!(pattern, replacement, &block)
50
+ %x{
51
+ var r = #{gsub(pattern, replacement, &block)};
52
+ return r === self ? nil : r;
53
+ }
54
+ end
55
+
56
+ def squeeze!(*sets)
57
+ %x{
58
+ var r = #{squeeze(*sets)};
59
+ return r === self ? nil : r;
60
+ }
61
+ end
62
+
63
+ def strip!
64
+ %x{
65
+ var r = #{strip};
66
+ return r === self ? nil : r;
67
+ }
68
+ end
69
+ end
70
+
71
+ class StringIO
72
+ def <<(str)
73
+ write(str)
74
+ self
75
+ end
76
+ end
@@ -27,8 +27,11 @@ module Moxml
27
27
  end
28
28
  end
29
29
 
30
+ def indented?
31
+ !@indentation.empty?
32
+ end
33
+
30
34
  def write_element(node, output)
31
- # output << ' ' * @level
32
35
  output << "<#{node.expanded_name}"
33
36
  write_attributes(node, output)
34
37
 
@@ -45,18 +48,16 @@ module Moxml
45
48
 
46
49
  output << ">"
47
50
 
48
- # Check for mixed content
49
51
  has_text = node.children.any? { |c| c.is_a?(::REXML::Text) && !c.to_s.strip.empty? }
50
52
  has_elements = node.children.any?(::REXML::Element)
51
- mixed = has_text && has_elements
53
+ indent_children = indented? && has_elements && !has_text
52
54
 
53
55
  # Handle children based on content type
54
56
  all_children_empty = node.children.empty? && !(entity_refs && !entity_refs.empty?)
55
57
  unless all_children_empty
56
- @level += @indentation.length unless mixed
58
+ @level += @indentation.length if indent_children
57
59
 
58
60
  if entity_refs && !entity_refs.empty? && child_sequence
59
- # Interleave native children with entity refs using tracked sequence
60
61
  eref_idx = 0
61
62
  native_idx = 0
62
63
  child_sequence.each do |type|
@@ -69,10 +70,12 @@ module Moxml
69
70
  child.to_s.strip.empty? &&
70
71
  !(child.next_sibling.nil? && child.previous_sibling.nil?)
71
72
 
73
+ output << "\n" << (' ' * @level) if indent_children
72
74
  write(child, output)
73
75
  end
74
76
  when :eref
75
77
  if eref_idx < entity_refs.size
78
+ output << "\n" << (' ' * @level) if indent_children
76
79
  write(entity_refs[eref_idx], output)
77
80
  eref_idx += 1
78
81
  end
@@ -80,24 +83,22 @@ module Moxml
80
83
  end
81
84
  else
82
85
  node.children.each_with_index do |child, _index|
83
- # Skip insignificant whitespace
84
86
  next if child.is_a?(::REXML::Text) &&
85
87
  child.to_s.strip.empty? &&
86
88
  !(child.next_sibling.nil? && child.previous_sibling.nil?)
87
89
 
90
+ output << "\n" << (' ' * @level) if indent_children
88
91
  write(child, output)
89
92
  end
90
93
  end
91
94
 
92
- # Reset indentation for closing tag in non-mixed content
93
- unless mixed
95
+ if indent_children
94
96
  @level -= @indentation.length
95
- # output << ' ' * @level
97
+ output << "\n" << (' ' * @level)
96
98
  end
97
99
  end
98
100
 
99
101
  output << "</#{node.expanded_name}>"
100
- # output << "\n" unless mixed
101
102
  end
102
103
 
103
104
  def write_text(node, output)
@@ -1,5 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ return if RUBY_ENGINE == "opal"
4
+
3
5
  require_relative "ox"
4
6
  require_relative "../xpath"
5
7
  # Force load XPath modules (autoload doesn't work well with relative requires in examples)
@@ -66,14 +68,8 @@ module Moxml
66
68
  # @param [Hash] namespaces Namespace prefix mappings
67
69
  # @return [Array, Object] Native node array or scalar value
68
70
  def xpath(node, expression, namespaces = {})
69
- # If we receive a native node, wrap it first
70
- # Document#xpath passes @native, but our compiled XPath needs Moxml nodes
71
71
  unless node.is_a?(Moxml::Node)
72
- # Determine the context from the node if possible
73
- # For now, create a basic context for wrapped nodes
74
72
  ctx = Context.new(:headed_ox)
75
-
76
- # Wrap the native node - don't rebuild the whole document
77
73
  node = Moxml::Node.wrap(node, ctx)
78
74
  end
79
75
 
@@ -1,8 +1,11 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ return if RUBY_ENGINE == "opal"
4
+
3
5
  require_relative "base"
4
6
  require "libxml"
5
7
  require_relative "customized_libxml"
8
+ require_relative "../sax/namespace_splitter"
6
9
 
7
10
  module Moxml
8
11
  module Adapter
@@ -1656,6 +1659,7 @@ module Moxml
1656
1659
  # @private
1657
1660
  class LibXMLSAXBridge
1658
1661
  include ::LibXML::XML::SaxParser::Callbacks
1662
+ include Moxml::SAX::NamespaceSplitter
1659
1663
 
1660
1664
  def initialize(handler)
1661
1665
  @handler = handler
@@ -1672,26 +1676,7 @@ module Moxml
1672
1676
  end
1673
1677
 
1674
1678
  def on_start_element(name, attributes)
1675
- # Convert LibXML attributes hash to separate attrs and namespaces
1676
- attr_hash = {}
1677
- ns_hash = {}
1678
-
1679
- attributes&.each do |attr_name, attr_value|
1680
- if attr_name.to_s.start_with?("xmlns")
1681
- # Namespace declaration
1682
- prefix = if attr_name.to_s == "xmlns"
1683
- nil
1684
- else
1685
- attr_name.to_s.sub(
1686
- "xmlns:", ""
1687
- )
1688
- end
1689
- ns_hash[prefix] = attr_value
1690
- else
1691
- attr_hash[attr_name.to_s] = attr_value
1692
- end
1693
- end
1694
-
1679
+ attr_hash, ns_hash = split_attributes_and_namespaces(attributes)
1695
1680
  @handler.on_start_element(name.to_s, attr_hash, ns_hash)
1696
1681
  end
1697
1682
 
@@ -1,7 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ return if RUBY_ENGINE == "opal"
4
+
3
5
  require_relative "base"
4
6
  require "nokogiri"
7
+ require_relative "../sax/namespace_splitter"
5
8
 
6
9
  module Moxml
7
10
  module Adapter
@@ -446,6 +449,8 @@ module Moxml
446
449
  #
447
450
  # @private
448
451
  class NokogiriSAXBridge < ::Nokogiri::XML::SAX::Document
452
+ include Moxml::SAX::NamespaceSplitter
453
+
449
454
  def initialize(handler)
450
455
  super()
451
456
  @handler = handler
@@ -462,24 +467,8 @@ module Moxml
462
467
  end
463
468
 
464
469
  def start_element(name, attributes = [])
465
- # Convert Nokogiri attributes array to hash
466
- attr_hash = {}
467
- namespaces_hash = {}
468
-
469
- attributes.each do |attr|
470
- attr_name = attr[0]
471
- attr_value = attr[1]
472
-
473
- if attr_name.start_with?("xmlns")
474
- # Namespace declaration
475
- prefix = attr_name == "xmlns" ? nil : attr_name.sub("xmlns:", "")
476
- namespaces_hash[prefix] = attr_value
477
- else
478
- attr_hash[attr_name] = attr_value
479
- end
480
- end
481
-
482
- @handler.on_start_element(name, attr_hash, namespaces_hash)
470
+ attr_hash, ns_hash = split_attributes_and_namespaces(attributes)
471
+ @handler.on_start_element(name, attr_hash, ns_hash)
483
472
  end
484
473
 
485
474
  def end_element(name)
@@ -3,6 +3,7 @@
3
3
  require_relative "base"
4
4
  require_relative "customized_oga"
5
5
  require "oga"
6
+ require_relative "../sax/namespace_splitter"
6
7
 
7
8
  module Moxml
8
9
  module Adapter
@@ -555,6 +556,8 @@ module Moxml
555
556
  #
556
557
  # @private
557
558
  class OgaSAXBridge
559
+ include Moxml::SAX::NamespaceSplitter
560
+
558
561
  def initialize(handler)
559
562
  @handler = handler
560
563
  end
@@ -563,29 +566,8 @@ module Moxml
563
566
  # namespace may be nil
564
567
  # attributes is an array of [name, value] pairs
565
568
  def on_element(namespace, name, attributes)
566
- # Build full qualified name if namespace present
567
569
  element_name = namespace ? "#{namespace}:#{name}" : name
568
-
569
- # Convert Oga attributes to hash
570
- attr_hash = {}
571
- ns_hash = {}
572
-
573
- # Oga delivers attributes as array of [name, value] pairs
574
- attributes.each do |attr_name, attr_value|
575
- if attr_name.to_s.start_with?("xmlns")
576
- prefix = if attr_name.to_s == "xmlns"
577
- nil
578
- else
579
- attr_name.to_s.sub(
580
- "xmlns:", ""
581
- )
582
- end
583
- ns_hash[prefix] = attr_value
584
- else
585
- attr_hash[attr_name.to_s] = attr_value
586
- end
587
- end
588
-
570
+ attr_hash, ns_hash = split_attributes_and_namespaces(attributes)
589
571
  @handler.on_start_element(element_name, attr_hash, ns_hash)
590
572
  end
591
573
 
@@ -1,9 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ return if RUBY_ENGINE == "opal"
4
+
3
5
  require_relative "base"
4
6
  require "ox"
5
7
  require "stringio"
6
8
  require_relative "customized_ox"
9
+ require_relative "../sax/namespace_splitter"
7
10
 
8
11
  # insert :parent methods to all Ox classes inherit the Node class
9
12
  Ox::Node.attr_accessor :parent
@@ -188,7 +191,7 @@ module Moxml
188
191
  when ::Ox::Element then :element
189
192
  when ::Ox::DocType then :doctype
190
193
  when ::Moxml::Adapter::CustomizedOx::EntityReference then :entity_reference
191
- when ::Moxml::Adapter::CustomizedOx::Namespace then :banespace
194
+ when ::Moxml::Adapter::CustomizedOx::Namespace then :namespace
192
195
  when ::Moxml::Adapter::CustomizedOx::Attribute then :attribute
193
196
  else :unknown
194
197
  end
@@ -903,6 +906,8 @@ module Moxml
903
906
  #
904
907
  # @private
905
908
  class OxSAXBridge
909
+ include Moxml::SAX::NamespaceSplitter
910
+
906
911
  def initialize(handler)
907
912
  @handler = handler
908
913
  @pending_attrs = {}
@@ -972,28 +977,8 @@ module Moxml
972
977
  private
973
978
 
974
979
  def finalize_pending_element
975
- # Separate namespace declarations from regular attributes
976
- attr_hash = {}
977
- namespaces_hash = {}
978
-
979
- @pending_attrs.each do |attr_name, attr_value|
980
- if attr_name.to_s.start_with?("xmlns")
981
- # Namespace declaration
982
- prefix = if attr_name.to_s == "xmlns"
983
- nil
984
- else
985
- attr_name.to_s.sub(
986
- "xmlns:", ""
987
- )
988
- end
989
- namespaces_hash[prefix] = attr_value
990
- else
991
- attr_hash[attr_name.to_s] = attr_value
992
- end
993
- end
994
-
995
- @handler.on_start_element(@pending_element_name, attr_hash,
996
- namespaces_hash)
980
+ attr_hash, ns_hash = split_attributes_and_namespaces(@pending_attrs)
981
+ @handler.on_start_element(@pending_element_name, attr_hash, ns_hash)
997
982
 
998
983
  # Clear for next element
999
984
  @pending_attrs = {}
@@ -3,8 +3,10 @@
3
3
  require_relative "base"
4
4
  require "rexml/document"
5
5
  require "rexml/xpath"
6
- require "set"
6
+ require "set" unless RUBY_ENGINE == "opal"
7
+ require "stringio" if RUBY_ENGINE == "opal"
7
8
  require_relative "customized_rexml"
9
+ require_relative "../sax/namespace_splitter"
8
10
 
9
11
  module Moxml
10
12
  module Adapter
@@ -45,9 +47,13 @@ module Moxml
45
47
  end
46
48
 
47
49
  def extract_encoding_from_xml(xml)
48
- # Match XML declaration pattern: <?xml version="..." encoding="..."?>
49
- # Use atomic group (?>) to prevent polynomial backtracking ReDoS
50
- match = xml.match(/<\?xml(?>[^>]*)\bencoding\s*=\s*["']([^"']+)["']/i)
50
+ return "UTF-8" unless xml.start_with?("<?xml")
51
+
52
+ decl_end = xml.index("?>")
53
+ return "UTF-8" unless decl_end
54
+
55
+ decl = xml[0...decl_end]
56
+ match = decl.match(/encoding\s*=\s*["']([^"']+)["']/i)
51
57
  match ? match[1] : "UTF-8"
52
58
  end
53
59
 
@@ -195,21 +201,19 @@ module Moxml
195
201
  def next_sibling(node)
196
202
  current = node.next_sibling
197
203
 
198
- # Skip empty text nodes and duplicates
199
- seen = Set.new
204
+ seen = {}
200
205
  while current
201
206
  if current.is_a?(::REXML::Text) && current.to_s.strip.empty?
202
207
  current = current.next_sibling
203
208
  next
204
209
  end
205
210
 
206
- # Check for duplicates
207
- if seen.include?(current.object_id)
211
+ if seen[current.object_id]
208
212
  current = current.next_sibling
209
213
  next
210
214
  end
211
215
 
212
- seen.add(current.object_id)
216
+ seen[current.object_id] = true
213
217
  break
214
218
  end
215
219
 
@@ -219,21 +223,19 @@ module Moxml
219
223
  def previous_sibling(node)
220
224
  current = node.previous_sibling
221
225
 
222
- # Skip empty text nodes and duplicates
223
- seen = Set.new
226
+ seen = {}
224
227
  while current
225
228
  if current.is_a?(::REXML::Text) && current.to_s.strip.empty?
226
229
  current = current.previous_sibling
227
230
  next
228
231
  end
229
232
 
230
- # Check for duplicates
231
- if seen.include?(current.object_id)
233
+ if seen[current.object_id]
232
234
  current = current.previous_sibling
233
235
  next
234
236
  end
235
237
 
236
- seen.add(current.object_id)
238
+ seen[current.object_id] = true
237
239
  break
238
240
  end
239
241
 
@@ -546,8 +548,12 @@ module Moxml
546
548
  ns
547
549
  end
548
550
 
549
- def xpath(node, expression, _namespaces = {})
550
- node.get_elements(expression).to_a
551
+ def xpath(node, expression, namespaces = {})
552
+ if namespaces && !namespaces.empty?
553
+ ::REXML::XPath.match(node, expression, namespaces)
554
+ else
555
+ node.get_elements(expression).to_a
556
+ end
551
557
  rescue ::REXML::ParseException => e
552
558
  raise Moxml::XPathError.new(
553
559
  e.message,
@@ -563,7 +569,8 @@ module Moxml
563
569
  end
564
570
 
565
571
  def serialize(node, options = {})
566
- output = +""
572
+ output = StringIO.new("") if RUBY_ENGINE == "opal"
573
+ output ||= +""
567
574
 
568
575
  if node.is_a?(::REXML::Document)
569
576
  # Check if we should include declaration
@@ -606,7 +613,8 @@ module Moxml
606
613
  write_with_formatter(node, output, options[:indent] || 2)
607
614
  end
608
615
 
609
- output.strip
616
+ result = output.is_a?(StringIO) ? output.string : output
617
+ result.strip
610
618
  end
611
619
 
612
620
  def has_declaration?(native_doc, wrapper)
@@ -641,27 +649,15 @@ module Moxml
641
649
  #
642
650
  # @private
643
651
  class REXMLSAX2Bridge
652
+ include Moxml::SAX::NamespaceSplitter
653
+
644
654
  def initialize(handler)
645
655
  @handler = handler
646
656
  end
647
657
 
648
658
  # REXML splits element name into uri/localname/qname
649
659
  def start_element(_uri, _localname, qname, attributes)
650
- # Convert REXML attributes to hash
651
- attr_hash = {}
652
- ns_hash = {}
653
-
654
- attributes.each do |name, value|
655
- if name.to_s.start_with?("xmlns")
656
- # Namespace declaration
657
- prefix = name.to_s == "xmlns" ? nil : name.to_s.sub("xmlns:", "")
658
- ns_hash[prefix] = value
659
- else
660
- attr_hash[name.to_s] = value
661
- end
662
- end
663
-
664
- # Use qname (qualified name) for element name
660
+ attr_hash, ns_hash = split_attributes_and_namespaces(attributes)
665
661
  @handler.on_start_element(qname, attr_hash, ns_hash)
666
662
  end
667
663
 
data/lib/moxml/adapter.rb CHANGED
@@ -4,18 +4,25 @@ require_relative "adapter/base"
4
4
 
5
5
  module Moxml
6
6
  module Adapter
7
- AVALIABLE_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
7
+ AVAILABLE_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
8
+
9
+ # Adapters that work under the Opal (JavaScript) runtime.
10
+ # REXML is pure Ruby and Opal reimplements strscan/stringio in its stdlib,
11
+ # enabling REXML to compile cleanly to JavaScript.
12
+ OPAL_AVAILABLE_ADAPTERS = %i[rexml].freeze
13
+
14
+ # Registry mapping adapter names to their class name suffixes.
15
+ # Special cases (like :headed_ox → "HeadedOx") live here instead of
16
+ # a case statement, keeping the dispatch open for extension.
17
+ CONST_NAME_MAP = {
18
+ headed_ox: "HeadedOx",
19
+ }.freeze
8
20
 
9
21
  class << self
10
22
  def load(name)
23
+ validate_platform!(name)
11
24
  require_adapter(name)
12
- # Handle special case for headed_ox -> HeadedOx
13
- const_name = case name
14
- when :headed_ox
15
- "HeadedOx"
16
- else
17
- name.to_s.capitalize
18
- end
25
+ const_name = const_name_for(name)
19
26
  const_get(const_name)
20
27
  rescue LoadError => e
21
28
  raise Moxml::AdapterError.new(
@@ -26,8 +33,31 @@ module Moxml
26
33
  )
27
34
  end
28
35
 
36
+ def available?(name)
37
+ platform_adapters.include?(name.to_sym)
38
+ end
39
+
40
+ def platform_adapters
41
+ RUBY_ENGINE == "opal" ? OPAL_AVAILABLE_ADAPTERS : AVAILABLE_ADAPTERS
42
+ end
43
+
29
44
  private
30
45
 
46
+ def validate_platform!(name)
47
+ return if platform_adapters.include?(name.to_sym)
48
+
49
+ available = platform_adapters.map(&:to_s).join(", ")
50
+ raise Moxml::AdapterError.new(
51
+ "The '#{name}' adapter is not available on this platform. Available: #{available}",
52
+ adapter: name,
53
+ operation: "platform_check",
54
+ )
55
+ end
56
+
57
+ def const_name_for(name)
58
+ CONST_NAME_MAP[name.to_sym] || name.to_s.capitalize
59
+ end
60
+
31
61
  def require_adapter(name)
32
62
  require "#{__dir__}/adapter/#{name}"
33
63
  rescue LoadError
data/lib/moxml/config.rb CHANGED
@@ -4,7 +4,7 @@ module Moxml
4
4
  class Config
5
5
  VALID_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
6
6
  DEFAULT_ADAPTER = :nokogiri
7
- OPAL_DEFAULT_ADAPTER = :oga
7
+ OPAL_DEFAULT_ADAPTER = :rexml
8
8
 
9
9
  # Entity loading modes:
10
10
  # - :required - Must load entities, raise error if unavailable (default)