moxml 0.1.21 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/opal.yml +37 -0
- data/.gitignore +1 -0
- data/.rspec-opal +5 -0
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +680 -110
- data/Gemfile +6 -0
- data/Rakefile +70 -0
- data/lib/compat/opal/rexml/namespace.rb +59 -0
- data/lib/compat/opal/rexml/parsers/baseparser.rb +1016 -0
- data/lib/compat/opal/rexml/source.rb +214 -0
- data/lib/compat/opal/rexml/text.rb +426 -0
- data/lib/compat/opal/rexml/xmltokens.rb +45 -0
- data/lib/compat/opal/rexml_compat.rb +77 -0
- data/lib/moxml/adapter/customized_oga/xml_declaration.rb +8 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -10
- data/lib/moxml/adapter/headed_ox.rb +2 -6
- data/lib/moxml/adapter/libxml/entity_ref_registry.rb +4 -2
- data/lib/moxml/adapter/libxml/entity_restorer.rb +3 -1
- data/lib/moxml/adapter/libxml.rb +22 -24
- data/lib/moxml/adapter/nokogiri.rb +24 -33
- data/lib/moxml/adapter/oga.rb +47 -84
- data/lib/moxml/adapter/ox.rb +43 -41
- data/lib/moxml/adapter/rexml.rb +29 -33
- data/lib/moxml/adapter.rb +38 -8
- data/lib/moxml/config.rb +16 -3
- data/lib/moxml/document.rb +2 -8
- data/lib/moxml/entity_registry.rb +40 -31
- data/lib/moxml/entity_registry_opal_data.rb +2138 -0
- data/lib/moxml/node.rb +27 -26
- data/lib/moxml/sax/namespace_splitter.rb +54 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +10 -1
- data/lib/moxml.rb +7 -0
- data/spec/consistency/adapter_parity_spec.rb +1 -1
- data/spec/integration/all_adapters_spec.rb +2 -1
- data/spec/integration/shared_examples/line_ending_behavior.rb +56 -0
- data/spec/integration/w3c_namespace_spec.rb +1 -1
- data/spec/moxml/adapter/libxml_internals_spec.rb +4 -2
- data/spec/moxml/adapter/ox_spec.rb +8 -0
- data/spec/moxml/adapter/platform_spec.rb +70 -0
- data/spec/moxml/adapter/shared_examples/adapter_contract.rb +0 -6
- data/spec/moxml/config_spec.rb +33 -0
- data/spec/moxml/entity_registry_spec.rb +10 -0
- data/spec/moxml/native_attachment/opal_spec.rb +39 -2
- data/spec/moxml/node_type_map_spec.rb +43 -0
- data/spec/moxml/opal_rexml_adapter_spec.rb +14 -0
- data/spec/moxml/opal_smoke_spec.rb +61 -0
- data/spec/moxml/sax/namespace_splitter_spec.rb +67 -0
- data/spec/moxml/text_spec.rb +1 -1
- data/spec/spec_helper.rb +32 -13
- data/spec/support/opal.rb +16 -0
- metadata +19 -2
data/lib/moxml/adapter/ox.rb
CHANGED
|
@@ -1,9 +1,12 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
return if RUBY_ENGINE == "opal"
|
|
4
|
+
|
|
3
5
|
require_relative "base"
|
|
4
6
|
require "ox"
|
|
5
7
|
require "stringio"
|
|
6
8
|
require_relative "customized_ox"
|
|
9
|
+
require_relative "../sax/namespace_splitter"
|
|
7
10
|
|
|
8
11
|
# insert :parent methods to all Ox classes inherit the Node class
|
|
9
12
|
Ox::Node.attr_accessor :parent
|
|
@@ -16,7 +19,17 @@ module Moxml
|
|
|
16
19
|
end
|
|
17
20
|
|
|
18
21
|
def set_root(doc, element)
|
|
19
|
-
|
|
22
|
+
existing_root = root(doc)
|
|
23
|
+
if existing_root
|
|
24
|
+
# Replace the existing root element, preserving other children
|
|
25
|
+
element.parent = doc if element.is_a?(::Ox::Node)
|
|
26
|
+
idx = doc.nodes.index(existing_root)
|
|
27
|
+
doc.nodes[idx] = element
|
|
28
|
+
else
|
|
29
|
+
# No root yet, just append the element
|
|
30
|
+
element.parent = doc if element.is_a?(::Ox::Node)
|
|
31
|
+
doc << element
|
|
32
|
+
end
|
|
20
33
|
end
|
|
21
34
|
|
|
22
35
|
def parse(xml, options = {}, _context = nil)
|
|
@@ -98,9 +111,14 @@ module Moxml
|
|
|
98
111
|
end
|
|
99
112
|
|
|
100
113
|
def create_native_doctype(name, external_id, system_id)
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
114
|
+
value = if external_id
|
|
115
|
+
"#{name} PUBLIC \"#{external_id}\" \"#{system_id}\""
|
|
116
|
+
elsif system_id
|
|
117
|
+
"#{name} SYSTEM \"#{system_id}\""
|
|
118
|
+
else
|
|
119
|
+
"#{name}"
|
|
120
|
+
end
|
|
121
|
+
::Ox::DocType.new(value)
|
|
104
122
|
end
|
|
105
123
|
|
|
106
124
|
def create_native_processing_instruction(target, content)
|
|
@@ -188,7 +206,7 @@ module Moxml
|
|
|
188
206
|
when ::Ox::Element then :element
|
|
189
207
|
when ::Ox::DocType then :doctype
|
|
190
208
|
when ::Moxml::Adapter::CustomizedOx::EntityReference then :entity_reference
|
|
191
|
-
when ::Moxml::Adapter::CustomizedOx::Namespace then :
|
|
209
|
+
when ::Moxml::Adapter::CustomizedOx::Namespace then :namespace
|
|
192
210
|
when ::Moxml::Adapter::CustomizedOx::Attribute then :attribute
|
|
193
211
|
else :unknown
|
|
194
212
|
end
|
|
@@ -367,25 +385,27 @@ module Moxml
|
|
|
367
385
|
def add_child(element, child)
|
|
368
386
|
# Special handling for declarations on Ox documents
|
|
369
387
|
if element.is_a?(::Ox::Document) && child.is_a?(::Ox::Instruct) && child.target == "xml"
|
|
370
|
-
# Transfer declaration attributes to document
|
|
371
388
|
element.attributes ||= {}
|
|
372
|
-
if child.attributes["version"]
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
if child.attributes["encoding"]
|
|
377
|
-
element.attributes[:encoding] =
|
|
378
|
-
child.attributes["encoding"]
|
|
379
|
-
end
|
|
380
|
-
if child.attributes["standalone"]
|
|
381
|
-
element.attributes[:standalone] =
|
|
382
|
-
child.attributes["standalone"]
|
|
383
|
-
end
|
|
389
|
+
element.attributes[:version] = child.attributes["version"] if child.attributes["version"]
|
|
390
|
+
element.attributes[:encoding] = child.attributes["encoding"] if child.attributes["encoding"]
|
|
391
|
+
element.attributes[:standalone] = child.attributes["standalone"] if child.attributes["standalone"]
|
|
392
|
+
return
|
|
384
393
|
end
|
|
385
394
|
|
|
386
395
|
child.parent = element if child.is_a?(::Ox::Node)
|
|
387
396
|
element.nodes ||= []
|
|
388
|
-
|
|
397
|
+
|
|
398
|
+
# Insert doctype before root element in document
|
|
399
|
+
if element.is_a?(::Ox::Document) && child.is_a?(::Ox::DocType)
|
|
400
|
+
root_idx = element.nodes.index { |n| n.is_a?(::Ox::Element) }
|
|
401
|
+
if root_idx
|
|
402
|
+
element.nodes.insert(root_idx, child)
|
|
403
|
+
else
|
|
404
|
+
element.nodes << child
|
|
405
|
+
end
|
|
406
|
+
else
|
|
407
|
+
element.nodes << child
|
|
408
|
+
end
|
|
389
409
|
|
|
390
410
|
# Mark document if EntityReference is added (avoids tree scan in serialize)
|
|
391
411
|
if child.is_a?(::Moxml::Adapter::CustomizedOx::EntityReference)
|
|
@@ -903,6 +923,8 @@ module Moxml
|
|
|
903
923
|
#
|
|
904
924
|
# @private
|
|
905
925
|
class OxSAXBridge
|
|
926
|
+
include Moxml::SAX::NamespaceSplitter
|
|
927
|
+
|
|
906
928
|
def initialize(handler)
|
|
907
929
|
@handler = handler
|
|
908
930
|
@pending_attrs = {}
|
|
@@ -972,28 +994,8 @@ module Moxml
|
|
|
972
994
|
private
|
|
973
995
|
|
|
974
996
|
def finalize_pending_element
|
|
975
|
-
|
|
976
|
-
attr_hash
|
|
977
|
-
namespaces_hash = {}
|
|
978
|
-
|
|
979
|
-
@pending_attrs.each do |attr_name, attr_value|
|
|
980
|
-
if attr_name.to_s.start_with?("xmlns")
|
|
981
|
-
# Namespace declaration
|
|
982
|
-
prefix = if attr_name.to_s == "xmlns"
|
|
983
|
-
nil
|
|
984
|
-
else
|
|
985
|
-
attr_name.to_s.sub(
|
|
986
|
-
"xmlns:", ""
|
|
987
|
-
)
|
|
988
|
-
end
|
|
989
|
-
namespaces_hash[prefix] = attr_value
|
|
990
|
-
else
|
|
991
|
-
attr_hash[attr_name.to_s] = attr_value
|
|
992
|
-
end
|
|
993
|
-
end
|
|
994
|
-
|
|
995
|
-
@handler.on_start_element(@pending_element_name, attr_hash,
|
|
996
|
-
namespaces_hash)
|
|
997
|
+
attr_hash, ns_hash = split_attributes_and_namespaces(@pending_attrs)
|
|
998
|
+
@handler.on_start_element(@pending_element_name, attr_hash, ns_hash)
|
|
997
999
|
|
|
998
1000
|
# Clear for next element
|
|
999
1001
|
@pending_attrs = {}
|
data/lib/moxml/adapter/rexml.rb
CHANGED
|
@@ -3,8 +3,10 @@
|
|
|
3
3
|
require_relative "base"
|
|
4
4
|
require "rexml/document"
|
|
5
5
|
require "rexml/xpath"
|
|
6
|
-
require "set"
|
|
6
|
+
require "set" unless RUBY_ENGINE == "opal"
|
|
7
|
+
require "stringio" if RUBY_ENGINE == "opal"
|
|
7
8
|
require_relative "customized_rexml"
|
|
9
|
+
require_relative "../sax/namespace_splitter"
|
|
8
10
|
|
|
9
11
|
module Moxml
|
|
10
12
|
module Adapter
|
|
@@ -45,9 +47,13 @@ module Moxml
|
|
|
45
47
|
end
|
|
46
48
|
|
|
47
49
|
def extract_encoding_from_xml(xml)
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
50
|
+
return "UTF-8" unless xml.start_with?("<?xml")
|
|
51
|
+
|
|
52
|
+
decl_end = xml.index("?>")
|
|
53
|
+
return "UTF-8" unless decl_end
|
|
54
|
+
|
|
55
|
+
decl = xml[0...decl_end]
|
|
56
|
+
match = decl.match(/encoding\s*=\s*["']([^"']+)["']/i)
|
|
51
57
|
match ? match[1] : "UTF-8"
|
|
52
58
|
end
|
|
53
59
|
|
|
@@ -195,21 +201,19 @@ module Moxml
|
|
|
195
201
|
def next_sibling(node)
|
|
196
202
|
current = node.next_sibling
|
|
197
203
|
|
|
198
|
-
|
|
199
|
-
seen = Set.new
|
|
204
|
+
seen = {}
|
|
200
205
|
while current
|
|
201
206
|
if current.is_a?(::REXML::Text) && current.to_s.strip.empty?
|
|
202
207
|
current = current.next_sibling
|
|
203
208
|
next
|
|
204
209
|
end
|
|
205
210
|
|
|
206
|
-
|
|
207
|
-
if seen.include?(current.object_id)
|
|
211
|
+
if seen[current.object_id]
|
|
208
212
|
current = current.next_sibling
|
|
209
213
|
next
|
|
210
214
|
end
|
|
211
215
|
|
|
212
|
-
seen
|
|
216
|
+
seen[current.object_id] = true
|
|
213
217
|
break
|
|
214
218
|
end
|
|
215
219
|
|
|
@@ -219,21 +223,19 @@ module Moxml
|
|
|
219
223
|
def previous_sibling(node)
|
|
220
224
|
current = node.previous_sibling
|
|
221
225
|
|
|
222
|
-
|
|
223
|
-
seen = Set.new
|
|
226
|
+
seen = {}
|
|
224
227
|
while current
|
|
225
228
|
if current.is_a?(::REXML::Text) && current.to_s.strip.empty?
|
|
226
229
|
current = current.previous_sibling
|
|
227
230
|
next
|
|
228
231
|
end
|
|
229
232
|
|
|
230
|
-
|
|
231
|
-
if seen.include?(current.object_id)
|
|
233
|
+
if seen[current.object_id]
|
|
232
234
|
current = current.previous_sibling
|
|
233
235
|
next
|
|
234
236
|
end
|
|
235
237
|
|
|
236
|
-
seen
|
|
238
|
+
seen[current.object_id] = true
|
|
237
239
|
break
|
|
238
240
|
end
|
|
239
241
|
|
|
@@ -546,8 +548,12 @@ module Moxml
|
|
|
546
548
|
ns
|
|
547
549
|
end
|
|
548
550
|
|
|
549
|
-
def xpath(node, expression,
|
|
550
|
-
|
|
551
|
+
def xpath(node, expression, namespaces = {})
|
|
552
|
+
if namespaces && !namespaces.empty?
|
|
553
|
+
::REXML::XPath.match(node, expression, namespaces)
|
|
554
|
+
else
|
|
555
|
+
node.get_elements(expression).to_a
|
|
556
|
+
end
|
|
551
557
|
rescue ::REXML::ParseException => e
|
|
552
558
|
raise Moxml::XPathError.new(
|
|
553
559
|
e.message,
|
|
@@ -563,7 +569,8 @@ module Moxml
|
|
|
563
569
|
end
|
|
564
570
|
|
|
565
571
|
def serialize(node, options = {})
|
|
566
|
-
output =
|
|
572
|
+
output = StringIO.new("") if RUBY_ENGINE == "opal"
|
|
573
|
+
output ||= +""
|
|
567
574
|
|
|
568
575
|
if node.is_a?(::REXML::Document)
|
|
569
576
|
# Check if we should include declaration
|
|
@@ -606,7 +613,8 @@ module Moxml
|
|
|
606
613
|
write_with_formatter(node, output, options[:indent] || 2)
|
|
607
614
|
end
|
|
608
615
|
|
|
609
|
-
output.
|
|
616
|
+
result = output.is_a?(StringIO) ? output.string : output
|
|
617
|
+
result.strip
|
|
610
618
|
end
|
|
611
619
|
|
|
612
620
|
def has_declaration?(native_doc, wrapper)
|
|
@@ -641,27 +649,15 @@ module Moxml
|
|
|
641
649
|
#
|
|
642
650
|
# @private
|
|
643
651
|
class REXMLSAX2Bridge
|
|
652
|
+
include Moxml::SAX::NamespaceSplitter
|
|
653
|
+
|
|
644
654
|
def initialize(handler)
|
|
645
655
|
@handler = handler
|
|
646
656
|
end
|
|
647
657
|
|
|
648
658
|
# REXML splits element name into uri/localname/qname
|
|
649
659
|
def start_element(_uri, _localname, qname, attributes)
|
|
650
|
-
|
|
651
|
-
attr_hash = {}
|
|
652
|
-
ns_hash = {}
|
|
653
|
-
|
|
654
|
-
attributes.each do |name, value|
|
|
655
|
-
if name.to_s.start_with?("xmlns")
|
|
656
|
-
# Namespace declaration
|
|
657
|
-
prefix = name.to_s == "xmlns" ? nil : name.to_s.sub("xmlns:", "")
|
|
658
|
-
ns_hash[prefix] = value
|
|
659
|
-
else
|
|
660
|
-
attr_hash[name.to_s] = value
|
|
661
|
-
end
|
|
662
|
-
end
|
|
663
|
-
|
|
664
|
-
# Use qname (qualified name) for element name
|
|
660
|
+
attr_hash, ns_hash = split_attributes_and_namespaces(attributes)
|
|
665
661
|
@handler.on_start_element(qname, attr_hash, ns_hash)
|
|
666
662
|
end
|
|
667
663
|
|
data/lib/moxml/adapter.rb
CHANGED
|
@@ -4,18 +4,25 @@ require_relative "adapter/base"
|
|
|
4
4
|
|
|
5
5
|
module Moxml
|
|
6
6
|
module Adapter
|
|
7
|
-
|
|
7
|
+
AVAILABLE_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
|
|
8
|
+
|
|
9
|
+
# Adapters that work under the Opal (JavaScript) runtime.
|
|
10
|
+
# REXML is pure Ruby and Opal reimplements strscan/stringio in its stdlib,
|
|
11
|
+
# enabling REXML to compile cleanly to JavaScript.
|
|
12
|
+
OPAL_AVAILABLE_ADAPTERS = %i[rexml].freeze
|
|
13
|
+
|
|
14
|
+
# Registry mapping adapter names to their class name suffixes.
|
|
15
|
+
# Special cases (like :headed_ox → "HeadedOx") live here instead of
|
|
16
|
+
# a case statement, keeping the dispatch open for extension.
|
|
17
|
+
CONST_NAME_MAP = {
|
|
18
|
+
headed_ox: "HeadedOx",
|
|
19
|
+
}.freeze
|
|
8
20
|
|
|
9
21
|
class << self
|
|
10
22
|
def load(name)
|
|
23
|
+
validate_platform!(name)
|
|
11
24
|
require_adapter(name)
|
|
12
|
-
|
|
13
|
-
const_name = case name
|
|
14
|
-
when :headed_ox
|
|
15
|
-
"HeadedOx"
|
|
16
|
-
else
|
|
17
|
-
name.to_s.capitalize
|
|
18
|
-
end
|
|
25
|
+
const_name = const_name_for(name)
|
|
19
26
|
const_get(const_name)
|
|
20
27
|
rescue LoadError => e
|
|
21
28
|
raise Moxml::AdapterError.new(
|
|
@@ -26,8 +33,31 @@ module Moxml
|
|
|
26
33
|
)
|
|
27
34
|
end
|
|
28
35
|
|
|
36
|
+
def available?(name)
|
|
37
|
+
platform_adapters.include?(name.to_sym)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def platform_adapters
|
|
41
|
+
RUBY_ENGINE == "opal" ? OPAL_AVAILABLE_ADAPTERS : AVAILABLE_ADAPTERS
|
|
42
|
+
end
|
|
43
|
+
|
|
29
44
|
private
|
|
30
45
|
|
|
46
|
+
def validate_platform!(name)
|
|
47
|
+
return if platform_adapters.include?(name.to_sym)
|
|
48
|
+
|
|
49
|
+
available = platform_adapters.join(", ")
|
|
50
|
+
raise Moxml::AdapterError.new(
|
|
51
|
+
"The '#{name}' adapter is not available on this platform. Available: #{available}",
|
|
52
|
+
adapter: name,
|
|
53
|
+
operation: "platform_check",
|
|
54
|
+
)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
def const_name_for(name)
|
|
58
|
+
CONST_NAME_MAP[name.to_sym] || name.to_s.capitalize
|
|
59
|
+
end
|
|
60
|
+
|
|
31
61
|
def require_adapter(name)
|
|
32
62
|
require "#{__dir__}/adapter/#{name}"
|
|
33
63
|
rescue LoadError
|
data/lib/moxml/config.rb
CHANGED
|
@@ -2,9 +2,12 @@
|
|
|
2
2
|
|
|
3
3
|
module Moxml
|
|
4
4
|
class Config
|
|
5
|
+
LINE_ENDING_LF = "\n"
|
|
6
|
+
LINE_ENDING_CRLF = "\r\n"
|
|
7
|
+
VALID_LINE_ENDINGS = [LINE_ENDING_LF, LINE_ENDING_CRLF].freeze
|
|
5
8
|
VALID_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
|
|
6
9
|
DEFAULT_ADAPTER = :nokogiri
|
|
7
|
-
OPAL_DEFAULT_ADAPTER = :
|
|
10
|
+
OPAL_DEFAULT_ADAPTER = :rexml
|
|
8
11
|
|
|
9
12
|
# Entity loading modes:
|
|
10
13
|
# - :required - Must load entities, raise error if unavailable (default)
|
|
@@ -46,7 +49,7 @@ module Moxml
|
|
|
46
49
|
# - :strict — only restore DTD-declared entities (falls back to lenient until DTD parsing is implemented)
|
|
47
50
|
ENTITY_RESTORATION_MODES = %i[strict lenient].freeze
|
|
48
51
|
|
|
49
|
-
attr_reader :adapter_name
|
|
52
|
+
attr_reader :adapter_name, :default_line_ending
|
|
50
53
|
attr_accessor :strict_parsing,
|
|
51
54
|
:default_encoding,
|
|
52
55
|
:entity_encoding,
|
|
@@ -58,13 +61,23 @@ module Moxml
|
|
|
58
61
|
:namespace_validation_mode,
|
|
59
62
|
:entity_restoration_mode
|
|
60
63
|
|
|
64
|
+
def default_line_ending=(value)
|
|
65
|
+
unless VALID_LINE_ENDINGS.include?(value)
|
|
66
|
+
raise ArgumentError,
|
|
67
|
+
"Invalid line_ending: #{value.inspect}. " \
|
|
68
|
+
"Must be Config::LINE_ENDING_LF or Config::LINE_ENDING_CRLF"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
@default_line_ending = value
|
|
72
|
+
end
|
|
73
|
+
|
|
61
74
|
def initialize(adapter_name = nil, strict_parsing = nil,
|
|
62
75
|
default_encoding = nil)
|
|
63
76
|
self.adapter = adapter_name || Config.default.adapter_name
|
|
64
77
|
@strict_parsing = strict_parsing || Config.default.strict_parsing
|
|
65
78
|
@default_encoding = default_encoding || Config.default.default_encoding
|
|
66
|
-
# reserved for future use
|
|
67
79
|
@default_indent = 2
|
|
80
|
+
@default_line_ending = LINE_ENDING_LF
|
|
68
81
|
@entity_encoding = :basic
|
|
69
82
|
@restore_entities = false
|
|
70
83
|
@preload_entity_sets = []
|
data/lib/moxml/document.rb
CHANGED
|
@@ -81,14 +81,8 @@ module Moxml
|
|
|
81
81
|
if node.is_a?(Declaration)
|
|
82
82
|
# Mark that document now has a declaration
|
|
83
83
|
@has_xml_declaration = true
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
adapter.add_child(@native, node.native)
|
|
87
|
-
else
|
|
88
|
-
adapter.add_previous_sibling(adapter.children(@native).first,
|
|
89
|
-
node.native)
|
|
90
|
-
end
|
|
91
|
-
elsif root && !node.is_a?(ProcessingInstruction) && !node.is_a?(Comment)
|
|
84
|
+
adapter.add_child(@native, node.native)
|
|
85
|
+
elsif root && !node.is_a?(ProcessingInstruction) && !node.is_a?(Comment) && !node.is_a?(Doctype)
|
|
92
86
|
raise Error, "Document already has a root element"
|
|
93
87
|
else
|
|
94
88
|
adapter.add_child(@native, node.native)
|
|
@@ -1,7 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require "json"
|
|
3
|
+
require "json" unless RUBY_ENGINE == "opal"
|
|
4
4
|
require "set"
|
|
5
|
+
require_relative "entity_registry_opal_data" if RUBY_ENGINE == "opal"
|
|
5
6
|
|
|
6
7
|
module Moxml
|
|
7
8
|
# EntityRegistry maintains a knowledge base of XML entity definitions.
|
|
@@ -55,6 +56,10 @@ module Moxml
|
|
|
55
56
|
# Load entity data from bundled gem data or local file
|
|
56
57
|
# @return [Hash{String => String}]
|
|
57
58
|
def load_entity_data
|
|
59
|
+
if RUBY_ENGINE == "opal"
|
|
60
|
+
return OPAL_ENTITY_DATA
|
|
61
|
+
end
|
|
62
|
+
|
|
58
63
|
# Try multiple paths in order of priority
|
|
59
64
|
paths_to_try = []
|
|
60
65
|
|
|
@@ -216,33 +221,44 @@ module Moxml
|
|
|
216
221
|
self
|
|
217
222
|
end
|
|
218
223
|
|
|
219
|
-
# Load all entities from the W3C HTMLMathML entity set
|
|
220
|
-
#
|
|
224
|
+
# Load all entities from the W3C HTMLMathML entity set.
|
|
225
|
+
# All entities are loaded during initialize; this method is a no-op
|
|
226
|
+
# kept for backward compatibility.
|
|
221
227
|
# @return [self]
|
|
222
228
|
def load_html5
|
|
223
|
-
#
|
|
229
|
+
warn "EntityRegistry#load_html5 is a no-op (all entities load during initialize)",
|
|
230
|
+
uplevel: 1
|
|
224
231
|
self
|
|
225
232
|
end
|
|
226
233
|
|
|
227
|
-
# Load MathML entity set (included in HTMLMathML)
|
|
234
|
+
# Load MathML entity set (included in HTMLMathML).
|
|
235
|
+
# All entities are loaded during initialize; this method is a no-op
|
|
236
|
+
# kept for backward compatibility.
|
|
228
237
|
# @return [self]
|
|
229
238
|
def load_mathml
|
|
230
|
-
#
|
|
239
|
+
warn "EntityRegistry#load_mathml is a no-op (all entities load during initialize)",
|
|
240
|
+
uplevel: 1
|
|
231
241
|
self
|
|
232
242
|
end
|
|
233
243
|
|
|
234
|
-
# Load ISO entity sets (included in HTMLMathML)
|
|
244
|
+
# Load ISO entity sets (included in HTMLMathML).
|
|
245
|
+
# All entities are loaded during initialize; this method is a no-op
|
|
246
|
+
# kept for backward compatibility.
|
|
235
247
|
# @param _set_name [Symbol] (ignored, all loaded together)
|
|
236
248
|
# @return [self]
|
|
237
249
|
def load_iso(_set_name = :iso8879)
|
|
238
|
-
#
|
|
250
|
+
warn "EntityRegistry#load_iso is a no-op (all entities load during initialize)",
|
|
251
|
+
uplevel: 1
|
|
239
252
|
self
|
|
240
253
|
end
|
|
241
254
|
|
|
242
|
-
# Load all standard entity sets
|
|
255
|
+
# Load all standard entity sets.
|
|
256
|
+
# All entities are loaded during initialize; this method is a no-op
|
|
257
|
+
# kept for backward compatibility.
|
|
243
258
|
# @return [self]
|
|
244
259
|
def load_all
|
|
245
|
-
#
|
|
260
|
+
warn "EntityRegistry#load_all is a no-op (all entities load during initialize)",
|
|
261
|
+
uplevel: 1
|
|
246
262
|
self
|
|
247
263
|
end
|
|
248
264
|
|
|
@@ -256,6 +272,17 @@ module Moxml
|
|
|
256
272
|
|
|
257
273
|
private
|
|
258
274
|
|
|
275
|
+
def populate_from_hash(data)
|
|
276
|
+
data.each do |name, char_or_codepoint|
|
|
277
|
+
codepoint = char_or_codepoint.is_a?(Integer) ? char_or_codepoint : parse_codepoint(char_or_codepoint)
|
|
278
|
+
next unless codepoint
|
|
279
|
+
|
|
280
|
+
@by_name[name] = codepoint
|
|
281
|
+
@by_codepoint[codepoint] ||= []
|
|
282
|
+
@by_codepoint[codepoint] << name unless @by_codepoint[codepoint].include?(name)
|
|
283
|
+
end
|
|
284
|
+
end
|
|
285
|
+
|
|
259
286
|
# Load entities from the centralized JSON data source
|
|
260
287
|
# @raise [EntityDataError] if entity data is required but cannot be loaded
|
|
261
288
|
# @return [void]
|
|
@@ -267,14 +294,7 @@ module Moxml
|
|
|
267
294
|
"Entity data is not available. Set entity_load_mode to :optional or :disabled to skip entity loading."
|
|
268
295
|
end
|
|
269
296
|
|
|
270
|
-
data
|
|
271
|
-
codepoint = parse_codepoint(char)
|
|
272
|
-
next unless codepoint
|
|
273
|
-
|
|
274
|
-
@by_name[name] = codepoint
|
|
275
|
-
@by_codepoint[codepoint] ||= []
|
|
276
|
-
@by_codepoint[codepoint] << name unless @by_codepoint[codepoint].include?(name)
|
|
277
|
-
end
|
|
297
|
+
populate_from_hash(data)
|
|
278
298
|
end
|
|
279
299
|
|
|
280
300
|
# Load entities from the centralized JSON data source (optional mode)
|
|
@@ -284,14 +304,7 @@ module Moxml
|
|
|
284
304
|
data = self.class.entity_data
|
|
285
305
|
return unless data
|
|
286
306
|
|
|
287
|
-
data
|
|
288
|
-
codepoint = parse_codepoint(char)
|
|
289
|
-
next unless codepoint
|
|
290
|
-
|
|
291
|
-
@by_name[name] = codepoint
|
|
292
|
-
@by_codepoint[codepoint] ||= []
|
|
293
|
-
@by_codepoint[codepoint] << name unless @by_codepoint[codepoint].include?(name)
|
|
294
|
-
end
|
|
307
|
+
populate_from_hash(data)
|
|
295
308
|
rescue EntityDataError
|
|
296
309
|
# Silently ignore - optional mode
|
|
297
310
|
end
|
|
@@ -304,11 +317,7 @@ module Moxml
|
|
|
304
317
|
entities = @entity_provider.call
|
|
305
318
|
return unless entities
|
|
306
319
|
|
|
307
|
-
entities
|
|
308
|
-
@by_name[name] = codepoint
|
|
309
|
-
@by_codepoint[codepoint] ||= []
|
|
310
|
-
@by_codepoint[codepoint] << name unless @by_codepoint[codepoint].include?(name)
|
|
311
|
-
end
|
|
320
|
+
populate_from_hash(entities)
|
|
312
321
|
end
|
|
313
322
|
|
|
314
323
|
# Parse a Unicode character escape to codepoint
|