moxml 0.1.20 → 0.1.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/opal.yml +37 -0
  3. data/.rspec-opal +5 -0
  4. data/Gemfile +6 -0
  5. data/Rakefile +67 -0
  6. data/lib/compat/opal/rexml/namespace.rb +56 -0
  7. data/lib/compat/opal/rexml/parsers/baseparser.rb +952 -0
  8. data/lib/compat/opal/rexml/source.rb +213 -0
  9. data/lib/compat/opal/rexml/text.rb +418 -0
  10. data/lib/compat/opal/rexml/xmltokens.rb +45 -0
  11. data/lib/compat/opal/rexml_compat.rb +76 -0
  12. data/lib/moxml/adapter/base.rb +5 -0
  13. data/lib/moxml/adapter/customized_libxml/node.rb +3 -0
  14. data/lib/moxml/adapter/customized_libxml/text.rb +6 -1
  15. data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -10
  16. data/lib/moxml/adapter/headed_ox.rb +2 -6
  17. data/lib/moxml/adapter/libxml/entity_ref_registry.rb +105 -0
  18. data/lib/moxml/adapter/libxml/entity_restorer.rb +92 -0
  19. data/lib/moxml/adapter/libxml.rb +386 -382
  20. data/lib/moxml/adapter/nokogiri.rb +7 -18
  21. data/lib/moxml/adapter/oga.rb +4 -22
  22. data/lib/moxml/adapter/ox.rb +8 -23
  23. data/lib/moxml/adapter/rexml.rb +29 -33
  24. data/lib/moxml/adapter.rb +38 -8
  25. data/lib/moxml/config.rb +1 -1
  26. data/lib/moxml/entity_registry.rb +36 -31
  27. data/lib/moxml/entity_registry_opal_data.rb +2137 -0
  28. data/lib/moxml/node.rb +19 -26
  29. data/lib/moxml/sax/namespace_splitter.rb +54 -0
  30. data/lib/moxml/version.rb +1 -1
  31. data/lib/moxml/xml_utils.rb +9 -1
  32. data/spec/consistency/adapter_parity_spec.rb +1 -1
  33. data/spec/integration/all_adapters_spec.rb +1 -1
  34. data/spec/integration/w3c_namespace_spec.rb +1 -1
  35. data/spec/moxml/adapter/libxml_internals_spec.rb +167 -0
  36. data/spec/moxml/adapter/ox_spec.rb +8 -0
  37. data/spec/moxml/adapter/platform_spec.rb +69 -0
  38. data/spec/moxml/adapter/shared_examples/adapter_contract.rb +0 -6
  39. data/spec/moxml/entity_registry_spec.rb +10 -0
  40. data/spec/moxml/native_attachment/opal_spec.rb +39 -2
  41. data/spec/moxml/node_type_map_spec.rb +43 -0
  42. data/spec/moxml/opal_rexml_adapter_spec.rb +14 -0
  43. data/spec/moxml/opal_smoke_spec.rb +61 -0
  44. data/spec/moxml/sax/namespace_splitter_spec.rb +67 -0
  45. data/spec/moxml/text_spec.rb +1 -1
  46. data/spec/performance/benchmark_spec.rb +1 -1
  47. data/spec/spec_helper.rb +32 -13
  48. data/spec/support/opal.rb +16 -0
  49. metadata +21 -2
@@ -1,7 +1,10 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ return if RUBY_ENGINE == "opal"
4
+
3
5
  require_relative "base"
4
6
  require "nokogiri"
7
+ require_relative "../sax/namespace_splitter"
5
8
 
6
9
  module Moxml
7
10
  module Adapter
@@ -446,6 +449,8 @@ module Moxml
446
449
  #
447
450
  # @private
448
451
  class NokogiriSAXBridge < ::Nokogiri::XML::SAX::Document
452
+ include Moxml::SAX::NamespaceSplitter
453
+
449
454
  def initialize(handler)
450
455
  super()
451
456
  @handler = handler
@@ -462,24 +467,8 @@ module Moxml
462
467
  end
463
468
 
464
469
  def start_element(name, attributes = [])
465
- # Convert Nokogiri attributes array to hash
466
- attr_hash = {}
467
- namespaces_hash = {}
468
-
469
- attributes.each do |attr|
470
- attr_name = attr[0]
471
- attr_value = attr[1]
472
-
473
- if attr_name.start_with?("xmlns")
474
- # Namespace declaration
475
- prefix = attr_name == "xmlns" ? nil : attr_name.sub("xmlns:", "")
476
- namespaces_hash[prefix] = attr_value
477
- else
478
- attr_hash[attr_name] = attr_value
479
- end
480
- end
481
-
482
- @handler.on_start_element(name, attr_hash, namespaces_hash)
470
+ attr_hash, ns_hash = split_attributes_and_namespaces(attributes)
471
+ @handler.on_start_element(name, attr_hash, ns_hash)
483
472
  end
484
473
 
485
474
  def end_element(name)
@@ -3,6 +3,7 @@
3
3
  require_relative "base"
4
4
  require_relative "customized_oga"
5
5
  require "oga"
6
+ require_relative "../sax/namespace_splitter"
6
7
 
7
8
  module Moxml
8
9
  module Adapter
@@ -555,6 +556,8 @@ module Moxml
555
556
  #
556
557
  # @private
557
558
  class OgaSAXBridge
559
+ include Moxml::SAX::NamespaceSplitter
560
+
558
561
  def initialize(handler)
559
562
  @handler = handler
560
563
  end
@@ -563,29 +566,8 @@ module Moxml
563
566
  # namespace may be nil
564
567
  # attributes is an array of [name, value] pairs
565
568
  def on_element(namespace, name, attributes)
566
- # Build full qualified name if namespace present
567
569
  element_name = namespace ? "#{namespace}:#{name}" : name
568
-
569
- # Convert Oga attributes to hash
570
- attr_hash = {}
571
- ns_hash = {}
572
-
573
- # Oga delivers attributes as array of [name, value] pairs
574
- attributes.each do |attr_name, attr_value|
575
- if attr_name.to_s.start_with?("xmlns")
576
- prefix = if attr_name.to_s == "xmlns"
577
- nil
578
- else
579
- attr_name.to_s.sub(
580
- "xmlns:", ""
581
- )
582
- end
583
- ns_hash[prefix] = attr_value
584
- else
585
- attr_hash[attr_name.to_s] = attr_value
586
- end
587
- end
588
-
570
+ attr_hash, ns_hash = split_attributes_and_namespaces(attributes)
589
571
  @handler.on_start_element(element_name, attr_hash, ns_hash)
590
572
  end
591
573
 
@@ -1,9 +1,12 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ return if RUBY_ENGINE == "opal"
4
+
3
5
  require_relative "base"
4
6
  require "ox"
5
7
  require "stringio"
6
8
  require_relative "customized_ox"
9
+ require_relative "../sax/namespace_splitter"
7
10
 
8
11
  # insert :parent methods to all Ox classes inherit the Node class
9
12
  Ox::Node.attr_accessor :parent
@@ -188,7 +191,7 @@ module Moxml
188
191
  when ::Ox::Element then :element
189
192
  when ::Ox::DocType then :doctype
190
193
  when ::Moxml::Adapter::CustomizedOx::EntityReference then :entity_reference
191
- when ::Moxml::Adapter::CustomizedOx::Namespace then :banespace
194
+ when ::Moxml::Adapter::CustomizedOx::Namespace then :namespace
192
195
  when ::Moxml::Adapter::CustomizedOx::Attribute then :attribute
193
196
  else :unknown
194
197
  end
@@ -903,6 +906,8 @@ module Moxml
903
906
  #
904
907
  # @private
905
908
  class OxSAXBridge
909
+ include Moxml::SAX::NamespaceSplitter
910
+
906
911
  def initialize(handler)
907
912
  @handler = handler
908
913
  @pending_attrs = {}
@@ -972,28 +977,8 @@ module Moxml
972
977
  private
973
978
 
974
979
  def finalize_pending_element
975
- # Separate namespace declarations from regular attributes
976
- attr_hash = {}
977
- namespaces_hash = {}
978
-
979
- @pending_attrs.each do |attr_name, attr_value|
980
- if attr_name.to_s.start_with?("xmlns")
981
- # Namespace declaration
982
- prefix = if attr_name.to_s == "xmlns"
983
- nil
984
- else
985
- attr_name.to_s.sub(
986
- "xmlns:", ""
987
- )
988
- end
989
- namespaces_hash[prefix] = attr_value
990
- else
991
- attr_hash[attr_name.to_s] = attr_value
992
- end
993
- end
994
-
995
- @handler.on_start_element(@pending_element_name, attr_hash,
996
- namespaces_hash)
980
+ attr_hash, ns_hash = split_attributes_and_namespaces(@pending_attrs)
981
+ @handler.on_start_element(@pending_element_name, attr_hash, ns_hash)
997
982
 
998
983
  # Clear for next element
999
984
  @pending_attrs = {}
@@ -3,8 +3,10 @@
3
3
  require_relative "base"
4
4
  require "rexml/document"
5
5
  require "rexml/xpath"
6
- require "set"
6
+ require "set" unless RUBY_ENGINE == "opal"
7
+ require "stringio" if RUBY_ENGINE == "opal"
7
8
  require_relative "customized_rexml"
9
+ require_relative "../sax/namespace_splitter"
8
10
 
9
11
  module Moxml
10
12
  module Adapter
@@ -45,9 +47,13 @@ module Moxml
45
47
  end
46
48
 
47
49
  def extract_encoding_from_xml(xml)
48
- # Match XML declaration pattern: <?xml version="..." encoding="..."?>
49
- # Use atomic group (?>) to prevent polynomial backtracking ReDoS
50
- match = xml.match(/<\?xml(?>[^>]*)\bencoding\s*=\s*["']([^"']+)["']/i)
50
+ return "UTF-8" unless xml.start_with?("<?xml")
51
+
52
+ decl_end = xml.index("?>")
53
+ return "UTF-8" unless decl_end
54
+
55
+ decl = xml[0...decl_end]
56
+ match = decl.match(/encoding\s*=\s*["']([^"']+)["']/i)
51
57
  match ? match[1] : "UTF-8"
52
58
  end
53
59
 
@@ -195,21 +201,19 @@ module Moxml
195
201
  def next_sibling(node)
196
202
  current = node.next_sibling
197
203
 
198
- # Skip empty text nodes and duplicates
199
- seen = Set.new
204
+ seen = {}
200
205
  while current
201
206
  if current.is_a?(::REXML::Text) && current.to_s.strip.empty?
202
207
  current = current.next_sibling
203
208
  next
204
209
  end
205
210
 
206
- # Check for duplicates
207
- if seen.include?(current.object_id)
211
+ if seen[current.object_id]
208
212
  current = current.next_sibling
209
213
  next
210
214
  end
211
215
 
212
- seen.add(current.object_id)
216
+ seen[current.object_id] = true
213
217
  break
214
218
  end
215
219
 
@@ -219,21 +223,19 @@ module Moxml
219
223
  def previous_sibling(node)
220
224
  current = node.previous_sibling
221
225
 
222
- # Skip empty text nodes and duplicates
223
- seen = Set.new
226
+ seen = {}
224
227
  while current
225
228
  if current.is_a?(::REXML::Text) && current.to_s.strip.empty?
226
229
  current = current.previous_sibling
227
230
  next
228
231
  end
229
232
 
230
- # Check for duplicates
231
- if seen.include?(current.object_id)
233
+ if seen[current.object_id]
232
234
  current = current.previous_sibling
233
235
  next
234
236
  end
235
237
 
236
- seen.add(current.object_id)
238
+ seen[current.object_id] = true
237
239
  break
238
240
  end
239
241
 
@@ -546,8 +548,12 @@ module Moxml
546
548
  ns
547
549
  end
548
550
 
549
- def xpath(node, expression, _namespaces = {})
550
- node.get_elements(expression).to_a
551
+ def xpath(node, expression, namespaces = {})
552
+ if namespaces && !namespaces.empty?
553
+ ::REXML::XPath.match(node, expression, namespaces)
554
+ else
555
+ node.get_elements(expression).to_a
556
+ end
551
557
  rescue ::REXML::ParseException => e
552
558
  raise Moxml::XPathError.new(
553
559
  e.message,
@@ -563,7 +569,8 @@ module Moxml
563
569
  end
564
570
 
565
571
  def serialize(node, options = {})
566
- output = +""
572
+ output = StringIO.new("") if RUBY_ENGINE == "opal"
573
+ output ||= +""
567
574
 
568
575
  if node.is_a?(::REXML::Document)
569
576
  # Check if we should include declaration
@@ -606,7 +613,8 @@ module Moxml
606
613
  write_with_formatter(node, output, options[:indent] || 2)
607
614
  end
608
615
 
609
- output.strip
616
+ result = output.is_a?(StringIO) ? output.string : output
617
+ result.strip
610
618
  end
611
619
 
612
620
  def has_declaration?(native_doc, wrapper)
@@ -641,27 +649,15 @@ module Moxml
641
649
  #
642
650
  # @private
643
651
  class REXMLSAX2Bridge
652
+ include Moxml::SAX::NamespaceSplitter
653
+
644
654
  def initialize(handler)
645
655
  @handler = handler
646
656
  end
647
657
 
648
658
  # REXML splits element name into uri/localname/qname
649
659
  def start_element(_uri, _localname, qname, attributes)
650
- # Convert REXML attributes to hash
651
- attr_hash = {}
652
- ns_hash = {}
653
-
654
- attributes.each do |name, value|
655
- if name.to_s.start_with?("xmlns")
656
- # Namespace declaration
657
- prefix = name.to_s == "xmlns" ? nil : name.to_s.sub("xmlns:", "")
658
- ns_hash[prefix] = value
659
- else
660
- attr_hash[name.to_s] = value
661
- end
662
- end
663
-
664
- # Use qname (qualified name) for element name
660
+ attr_hash, ns_hash = split_attributes_and_namespaces(attributes)
665
661
  @handler.on_start_element(qname, attr_hash, ns_hash)
666
662
  end
667
663
 
data/lib/moxml/adapter.rb CHANGED
@@ -4,18 +4,25 @@ require_relative "adapter/base"
4
4
 
5
5
  module Moxml
6
6
  module Adapter
7
- AVALIABLE_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
7
+ AVAILABLE_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
8
+
9
+ # Adapters that work under the Opal (JavaScript) runtime.
10
+ # REXML is pure Ruby and Opal reimplements strscan/stringio in its stdlib,
11
+ # enabling REXML to compile cleanly to JavaScript.
12
+ OPAL_AVAILABLE_ADAPTERS = %i[rexml].freeze
13
+
14
+ # Registry mapping adapter names to their class name suffixes.
15
+ # Special cases (like :headed_ox → "HeadedOx") live here instead of
16
+ # a case statement, keeping the dispatch open for extension.
17
+ CONST_NAME_MAP = {
18
+ headed_ox: "HeadedOx",
19
+ }.freeze
8
20
 
9
21
  class << self
10
22
  def load(name)
23
+ validate_platform!(name)
11
24
  require_adapter(name)
12
- # Handle special case for headed_ox -> HeadedOx
13
- const_name = case name
14
- when :headed_ox
15
- "HeadedOx"
16
- else
17
- name.to_s.capitalize
18
- end
25
+ const_name = const_name_for(name)
19
26
  const_get(const_name)
20
27
  rescue LoadError => e
21
28
  raise Moxml::AdapterError.new(
@@ -26,8 +33,31 @@ module Moxml
26
33
  )
27
34
  end
28
35
 
36
+ def available?(name)
37
+ platform_adapters.include?(name.to_sym)
38
+ end
39
+
40
+ def platform_adapters
41
+ RUBY_ENGINE == "opal" ? OPAL_AVAILABLE_ADAPTERS : AVAILABLE_ADAPTERS
42
+ end
43
+
29
44
  private
30
45
 
46
+ def validate_platform!(name)
47
+ return if platform_adapters.include?(name.to_sym)
48
+
49
+ available = platform_adapters.map(&:to_s).join(", ")
50
+ raise Moxml::AdapterError.new(
51
+ "The '#{name}' adapter is not available on this platform. Available: #{available}",
52
+ adapter: name,
53
+ operation: "platform_check",
54
+ )
55
+ end
56
+
57
+ def const_name_for(name)
58
+ CONST_NAME_MAP[name.to_sym] || name.to_s.capitalize
59
+ end
60
+
31
61
  def require_adapter(name)
32
62
  require "#{__dir__}/adapter/#{name}"
33
63
  rescue LoadError
data/lib/moxml/config.rb CHANGED
@@ -4,7 +4,7 @@ module Moxml
4
4
  class Config
5
5
  VALID_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
6
6
  DEFAULT_ADAPTER = :nokogiri
7
- OPAL_DEFAULT_ADAPTER = :oga
7
+ OPAL_DEFAULT_ADAPTER = :rexml
8
8
 
9
9
  # Entity loading modes:
10
10
  # - :required - Must load entities, raise error if unavailable (default)
@@ -1,7 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require "json"
3
+ require "json" unless RUBY_ENGINE == "opal"
4
4
  require "set"
5
+ require_relative "entity_registry_opal_data" if RUBY_ENGINE == "opal"
5
6
 
6
7
  module Moxml
7
8
  # EntityRegistry maintains a knowledge base of XML entity definitions.
@@ -55,6 +56,10 @@ module Moxml
55
56
  # Load entity data from bundled gem data or local file
56
57
  # @return [Hash{String => String}]
57
58
  def load_entity_data
59
+ if RUBY_ENGINE == "opal"
60
+ return OPAL_ENTITY_DATA
61
+ end
62
+
58
63
  # Try multiple paths in order of priority
59
64
  paths_to_try = []
60
65
 
@@ -216,33 +221,40 @@ module Moxml
216
221
  self
217
222
  end
218
223
 
219
- # Load all entities from the W3C HTMLMathML entity set
220
- # This is called automatically by initialize
224
+ # Load all entities from the W3C HTMLMathML entity set.
225
+ # All entities are loaded during initialize; this method is a no-op
226
+ # kept for backward compatibility.
221
227
  # @return [self]
222
228
  def load_html5
223
- # All entities are loaded by default from initialize
229
+ warn "EntityRegistry#load_html5 is a no-op (all entities load during initialize)", uplevel: 1
224
230
  self
225
231
  end
226
232
 
227
- # Load MathML entity set (included in HTMLMathML)
233
+ # Load MathML entity set (included in HTMLMathML).
234
+ # All entities are loaded during initialize; this method is a no-op
235
+ # kept for backward compatibility.
228
236
  # @return [self]
229
237
  def load_mathml
230
- # All entities are loaded by default from initialize
238
+ warn "EntityRegistry#load_mathml is a no-op (all entities load during initialize)", uplevel: 1
231
239
  self
232
240
  end
233
241
 
234
- # Load ISO entity sets (included in HTMLMathML)
242
+ # Load ISO entity sets (included in HTMLMathML).
243
+ # All entities are loaded during initialize; this method is a no-op
244
+ # kept for backward compatibility.
235
245
  # @param _set_name [Symbol] (ignored, all loaded together)
236
246
  # @return [self]
237
247
  def load_iso(_set_name = :iso8879)
238
- # All entities are loaded by default from initialize
248
+ warn "EntityRegistry#load_iso is a no-op (all entities load during initialize)", uplevel: 1
239
249
  self
240
250
  end
241
251
 
242
- # Load all standard entity sets
252
+ # Load all standard entity sets.
253
+ # All entities are loaded during initialize; this method is a no-op
254
+ # kept for backward compatibility.
243
255
  # @return [self]
244
256
  def load_all
245
- # All entities are loaded by default from initialize
257
+ warn "EntityRegistry#load_all is a no-op (all entities load during initialize)", uplevel: 1
246
258
  self
247
259
  end
248
260
 
@@ -256,6 +268,17 @@ module Moxml
256
268
 
257
269
  private
258
270
 
271
+ def populate_from_hash(data)
272
+ data.each do |name, char_or_codepoint|
273
+ codepoint = char_or_codepoint.is_a?(Integer) ? char_or_codepoint : parse_codepoint(char_or_codepoint)
274
+ next unless codepoint
275
+
276
+ @by_name[name] = codepoint
277
+ @by_codepoint[codepoint] ||= []
278
+ @by_codepoint[codepoint] << name unless @by_codepoint[codepoint].include?(name)
279
+ end
280
+ end
281
+
259
282
  # Load entities from the centralized JSON data source
260
283
  # @raise [EntityDataError] if entity data is required but cannot be loaded
261
284
  # @return [void]
@@ -267,14 +290,7 @@ module Moxml
267
290
  "Entity data is not available. Set entity_load_mode to :optional or :disabled to skip entity loading."
268
291
  end
269
292
 
270
- data.each do |name, char|
271
- codepoint = parse_codepoint(char)
272
- next unless codepoint
273
-
274
- @by_name[name] = codepoint
275
- @by_codepoint[codepoint] ||= []
276
- @by_codepoint[codepoint] << name unless @by_codepoint[codepoint].include?(name)
277
- end
293
+ populate_from_hash(data)
278
294
  end
279
295
 
280
296
  # Load entities from the centralized JSON data source (optional mode)
@@ -284,14 +300,7 @@ module Moxml
284
300
  data = self.class.entity_data
285
301
  return unless data
286
302
 
287
- data.each do |name, char|
288
- codepoint = parse_codepoint(char)
289
- next unless codepoint
290
-
291
- @by_name[name] = codepoint
292
- @by_codepoint[codepoint] ||= []
293
- @by_codepoint[codepoint] << name unless @by_codepoint[codepoint].include?(name)
294
- end
303
+ populate_from_hash(data)
295
304
  rescue EntityDataError
296
305
  # Silently ignore - optional mode
297
306
  end
@@ -304,11 +313,7 @@ module Moxml
304
313
  entities = @entity_provider.call
305
314
  return unless entities
306
315
 
307
- entities.each do |name, codepoint|
308
- @by_name[name] = codepoint
309
- @by_codepoint[codepoint] ||= []
310
- @by_codepoint[codepoint] << name unless @by_codepoint[codepoint].include?(name)
311
- end
316
+ populate_from_hash(entities)
312
317
  end
313
318
 
314
319
  # Parse a Unicode character escape to codepoint