moxml 0.1.22 → 0.1.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +1 -0
- data/.rubocop_todo.yml +680 -110
- data/Rakefile +12 -9
- data/lib/compat/opal/rexml/namespace.rb +8 -5
- data/lib/compat/opal/rexml/parsers/baseparser.rb +276 -212
- data/lib/compat/opal/rexml/source.rb +28 -27
- data/lib/compat/opal/rexml/text.rb +112 -104
- data/lib/compat/opal/rexml/xmltokens.rb +8 -8
- data/lib/compat/opal/rexml_compat.rb +12 -11
- data/lib/moxml/adapter/customized_oga/xml_declaration.rb +8 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +4 -4
- data/lib/moxml/adapter/libxml/entity_ref_registry.rb +4 -2
- data/lib/moxml/adapter/libxml/entity_restorer.rb +3 -1
- data/lib/moxml/adapter/libxml.rb +17 -4
- data/lib/moxml/adapter/nokogiri.rb +17 -15
- data/lib/moxml/adapter/oga.rb +43 -62
- data/lib/moxml/adapter/ox.rb +35 -18
- data/lib/moxml/adapter.rb +1 -1
- data/lib/moxml/config.rb +15 -2
- data/lib/moxml/document.rb +2 -8
- data/lib/moxml/entity_registry.rb +8 -4
- data/lib/moxml/entity_registry_opal_data.rb +3 -2
- data/lib/moxml/node.rb +8 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +1 -0
- data/lib/moxml.rb +7 -0
- data/spec/integration/all_adapters_spec.rb +1 -0
- data/spec/integration/shared_examples/line_ending_behavior.rb +56 -0
- data/spec/moxml/adapter/libxml_internals_spec.rb +4 -2
- data/spec/moxml/adapter/platform_spec.rb +2 -1
- data/spec/moxml/config_spec.rb +33 -0
- metadata +3 -2
|
@@ -8,10 +8,17 @@ module Moxml
|
|
|
8
8
|
class XmlDeclaration < ::Oga::XML::XmlDeclaration
|
|
9
9
|
def initialize(options = {})
|
|
10
10
|
@version = options[:version] || "1.0"
|
|
11
|
-
# encoding is optional, but Oga sets it to UTF-8 by default
|
|
12
11
|
@encoding = options[:encoding]
|
|
13
12
|
@standalone = options[:standalone]
|
|
14
13
|
end
|
|
14
|
+
|
|
15
|
+
def to_xml
|
|
16
|
+
parts = ["<?xml"]
|
|
17
|
+
parts << %( version="#{version}") if version
|
|
18
|
+
parts << %( encoding="#{encoding}") if encoding
|
|
19
|
+
parts << %( standalone="#{standalone}") if standalone
|
|
20
|
+
"#{parts.join}?>"
|
|
21
|
+
end
|
|
15
22
|
end
|
|
16
23
|
end
|
|
17
24
|
end
|
|
@@ -70,12 +70,12 @@ module Moxml
|
|
|
70
70
|
child.to_s.strip.empty? &&
|
|
71
71
|
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
|
72
72
|
|
|
73
|
-
output << "\n" << (
|
|
73
|
+
output << "\n" << (" " * @level) if indent_children
|
|
74
74
|
write(child, output)
|
|
75
75
|
end
|
|
76
76
|
when :eref
|
|
77
77
|
if eref_idx < entity_refs.size
|
|
78
|
-
output << "\n" << (
|
|
78
|
+
output << "\n" << (" " * @level) if indent_children
|
|
79
79
|
write(entity_refs[eref_idx], output)
|
|
80
80
|
eref_idx += 1
|
|
81
81
|
end
|
|
@@ -87,14 +87,14 @@ module Moxml
|
|
|
87
87
|
child.to_s.strip.empty? &&
|
|
88
88
|
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
|
89
89
|
|
|
90
|
-
output << "\n" << (
|
|
90
|
+
output << "\n" << (" " * @level) if indent_children
|
|
91
91
|
write(child, output)
|
|
92
92
|
end
|
|
93
93
|
end
|
|
94
94
|
|
|
95
95
|
if indent_children
|
|
96
96
|
@level -= @indentation.length
|
|
97
|
-
output << "\n" << (
|
|
97
|
+
output << "\n" << (" " * @level)
|
|
98
98
|
end
|
|
99
99
|
end
|
|
100
100
|
|
|
@@ -9,7 +9,8 @@ module Moxml
|
|
|
9
9
|
ENTITY_REFS_KEY = :_entity_ref_pairs
|
|
10
10
|
CHILD_SEQUENCE_KEY = :_child_seq_pairs
|
|
11
11
|
NON_WHITESPACE_RE = /\S/
|
|
12
|
-
private_constant :ENTITY_REFS_KEY, :CHILD_SEQUENCE_KEY,
|
|
12
|
+
private_constant :ENTITY_REFS_KEY, :CHILD_SEQUENCE_KEY,
|
|
13
|
+
:NON_WHITESPACE_RE
|
|
13
14
|
|
|
14
15
|
def initialize(attachments, doc)
|
|
15
16
|
@attachments = attachments
|
|
@@ -34,7 +35,8 @@ module Moxml
|
|
|
34
35
|
if existing
|
|
35
36
|
existing << :eref
|
|
36
37
|
else
|
|
37
|
-
seq_by_path[path] =
|
|
38
|
+
seq_by_path[path] =
|
|
39
|
+
Array.new(count_native_children(element), :native)
|
|
38
40
|
seq_by_path[path] << :eref
|
|
39
41
|
@attachments.set(@doc, CHILD_SEQUENCE_KEY, seq_by_path)
|
|
40
42
|
end
|
|
@@ -76,7 +76,9 @@ module Moxml
|
|
|
76
76
|
def append_chunk(parent, type, payload)
|
|
77
77
|
case type
|
|
78
78
|
when :text
|
|
79
|
-
parent.add_child(::Moxml::Text.new(
|
|
79
|
+
parent.add_child(::Moxml::Text.new(
|
|
80
|
+
@adapter.create_native_text(payload), @ctx
|
|
81
|
+
))
|
|
80
82
|
when :eref
|
|
81
83
|
parent.add_child(
|
|
82
84
|
::Moxml::EntityReference.new(
|
data/lib/moxml/adapter/libxml.rb
CHANGED
|
@@ -1182,7 +1182,8 @@ module Moxml
|
|
|
1182
1182
|
end
|
|
1183
1183
|
|
|
1184
1184
|
ESCAPE_XML_RE = /[&<>"]/
|
|
1185
|
-
ESCAPE_XML_MAP = { "&" => "&", "<" => "<", ">" => ">",
|
|
1185
|
+
ESCAPE_XML_MAP = { "&" => "&", "<" => "<", ">" => ">",
|
|
1186
|
+
'"' => """ }.freeze
|
|
1186
1187
|
private_constant :ESCAPE_XML_RE, :ESCAPE_XML_MAP
|
|
1187
1188
|
|
|
1188
1189
|
def escape_xml(text)
|
|
@@ -1278,7 +1279,13 @@ module Moxml
|
|
|
1278
1279
|
# attachment query that otherwise fires for every element under
|
|
1279
1280
|
# Monitor#synchronize.
|
|
1280
1281
|
eref_active = doc_eref_active?(elem.doc) if eref_active.nil?
|
|
1281
|
-
entity_refs, child_sequence = eref_active
|
|
1282
|
+
entity_refs, child_sequence = if eref_active
|
|
1283
|
+
lookup_entity_ref_serialization(elem)
|
|
1284
|
+
else
|
|
1285
|
+
[
|
|
1286
|
+
nil, nil
|
|
1287
|
+
]
|
|
1288
|
+
end
|
|
1282
1289
|
|
|
1283
1290
|
# Always use verbose format <tag></tag> for consistency with other adapters
|
|
1284
1291
|
output << ">"
|
|
@@ -1622,8 +1629,14 @@ module Moxml
|
|
|
1622
1629
|
# duplicated — callers that need the subtree use deep_duplicate_node.
|
|
1623
1630
|
def shallow_duplicate_element(native_node)
|
|
1624
1631
|
new_node = ::LibXML::XML::Node.new(native_node.name)
|
|
1625
|
-
|
|
1626
|
-
|
|
1632
|
+
if native_node.is_a?(::LibXML::XML::Node)
|
|
1633
|
+
copy_element_namespaces(native_node,
|
|
1634
|
+
new_node)
|
|
1635
|
+
end
|
|
1636
|
+
if native_node.attributes?
|
|
1637
|
+
copy_element_attributes(native_node,
|
|
1638
|
+
new_node)
|
|
1639
|
+
end
|
|
1627
1640
|
new_node
|
|
1628
1641
|
end
|
|
1629
1642
|
|
|
@@ -245,25 +245,22 @@ module Moxml
|
|
|
245
245
|
end
|
|
246
246
|
|
|
247
247
|
def add_child(element, child)
|
|
248
|
-
# Special handling for declarations on Nokogiri documents
|
|
249
248
|
if element.is_a?(::Nokogiri::XML::Document) &&
|
|
250
249
|
child.is_a?(::Nokogiri::XML::ProcessingInstruction) &&
|
|
251
250
|
child.name == "xml"
|
|
252
|
-
# Set document's xml_decl property
|
|
253
251
|
version = declaration_attribute(child, "version") || "1.0"
|
|
254
252
|
encoding = declaration_attribute(child, "encoding")
|
|
255
253
|
standalone = declaration_attribute(child, "standalone")
|
|
256
254
|
|
|
257
|
-
# Store declaration state in attachment map
|
|
258
255
|
attachments.set(element, :xml_decl, {
|
|
259
256
|
version: version,
|
|
260
257
|
encoding: encoding,
|
|
261
258
|
standalone: standalone,
|
|
262
259
|
}.compact)
|
|
260
|
+
return
|
|
263
261
|
end
|
|
264
262
|
|
|
265
263
|
if node_type(child) == :doctype
|
|
266
|
-
# avoid exceptions: cannot reparent Nokogiri::XML::DTD there
|
|
267
264
|
element.create_internal_subset(
|
|
268
265
|
child.name, child.external_id, child.system_id
|
|
269
266
|
)
|
|
@@ -397,23 +394,28 @@ module Moxml
|
|
|
397
394
|
save_options |= ::Nokogiri::XML::Node::SaveOptions::FORMAT
|
|
398
395
|
end
|
|
399
396
|
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if xml_decl.nil?
|
|
397
|
+
custom_decl = nil
|
|
398
|
+
if options[:no_declaration]
|
|
399
|
+
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
|
|
400
|
+
elsif attachments.key?(node, :xml_decl) && (xml_decl = attachments.get(node, :xml_decl))
|
|
401
|
+
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION
|
|
402
|
+
attrs = ["version=\"#{xml_decl[:version]}\""]
|
|
403
|
+
attrs << "encoding=\"#{xml_decl[:encoding]}\"" if xml_decl[:encoding]
|
|
404
|
+
attrs << "standalone=\"#{xml_decl[:standalone]}\"" if xml_decl[:standalone]
|
|
405
|
+
custom_decl = "<?xml #{attrs.join(' ')}?>"
|
|
410
406
|
end
|
|
411
407
|
|
|
412
|
-
node.to_xml(
|
|
408
|
+
result = node.to_xml(
|
|
413
409
|
indent: options[:indent],
|
|
414
410
|
encoding: options[:encoding],
|
|
415
411
|
save_with: save_options,
|
|
416
412
|
)
|
|
413
|
+
|
|
414
|
+
if custom_decl
|
|
415
|
+
result = "#{custom_decl}\n#{result}"
|
|
416
|
+
end
|
|
417
|
+
|
|
418
|
+
result
|
|
417
419
|
end
|
|
418
420
|
|
|
419
421
|
def has_declaration?(native_doc, wrapper)
|
data/lib/moxml/adapter/oga.rb
CHANGED
|
@@ -289,11 +289,25 @@ module Moxml
|
|
|
289
289
|
child_or_text
|
|
290
290
|
end
|
|
291
291
|
|
|
292
|
-
# Special handling for declarations on Oga documents
|
|
293
292
|
if element.is_a?(::Oga::XML::Document) &&
|
|
294
293
|
child.is_a?(::Oga::XML::XmlDeclaration)
|
|
295
|
-
# Track declaration state in attachment map
|
|
296
294
|
attachments.set(element, :xml_declaration, child)
|
|
295
|
+
return
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
# Insert doctype before root element in document
|
|
299
|
+
if element.is_a?(::Oga::XML::Document) && child.is_a?(::Oga::XML::Doctype)
|
|
300
|
+
root_idx = nil
|
|
301
|
+
element.children.each_with_index do |n, i|
|
|
302
|
+
if n.is_a?(::Oga::XML::Element)
|
|
303
|
+
root_idx = i
|
|
304
|
+
break
|
|
305
|
+
end
|
|
306
|
+
end
|
|
307
|
+
if root_idx
|
|
308
|
+
element.children.insert(root_idx, child)
|
|
309
|
+
return
|
|
310
|
+
end
|
|
297
311
|
end
|
|
298
312
|
|
|
299
313
|
element.children << child
|
|
@@ -465,86 +479,53 @@ module Moxml
|
|
|
465
479
|
|
|
466
480
|
private
|
|
467
481
|
|
|
482
|
+
def declaration_to_xml(decl)
|
|
483
|
+
parts = ["<?xml"]
|
|
484
|
+
parts << %( version="#{decl.version}") if decl.version
|
|
485
|
+
parts << %( encoding="#{decl.encoding}") if decl.encoding
|
|
486
|
+
parts << %( standalone="#{decl.standalone}") if decl.standalone
|
|
487
|
+
"#{parts.join}?>"
|
|
488
|
+
end
|
|
489
|
+
|
|
468
490
|
def serialize_without_entity_processing(node, options = {})
|
|
469
|
-
# Oga's XmlGenerator doesn't support options directly
|
|
470
|
-
# We need to handle declaration options ourselves for Document nodes
|
|
471
491
|
if node.is_a?(::Oga::XML::Document)
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
effective_xml_declaration = node.xml_declaration || attachments.get(
|
|
475
|
-
node, :xml_declaration
|
|
476
|
-
)
|
|
492
|
+
effective_xml_declaration = attachments.get(node, :xml_declaration)
|
|
493
|
+
|
|
477
494
|
should_include_decl = if options.key?(:no_declaration)
|
|
478
495
|
!options[:no_declaration]
|
|
479
496
|
elsif options.key?(:declaration)
|
|
480
497
|
options[:declaration]
|
|
481
498
|
else
|
|
482
|
-
|
|
483
|
-
effective_xml_declaration ? true : false
|
|
499
|
+
effective_xml_declaration || node.xml_declaration ? true : false
|
|
484
500
|
end
|
|
485
501
|
|
|
486
|
-
|
|
487
|
-
# This prevents duplicate declarations when document already has one
|
|
488
|
-
has_existing_declaration = node.children.any?(::Oga::XML::XmlDeclaration)
|
|
489
|
-
|
|
490
|
-
if should_include_decl && !effective_xml_declaration && !has_existing_declaration
|
|
491
|
-
# Need to add declaration - create default one
|
|
492
|
-
output = []
|
|
493
|
-
output << '<?xml version="1.0" encoding="UTF-8"?>'
|
|
494
|
-
output << "\n"
|
|
495
|
-
|
|
496
|
-
# Serialize doctype if present
|
|
497
|
-
output << node.doctype.to_xml << "\n" if node.doctype
|
|
498
|
-
|
|
499
|
-
# Serialize children
|
|
500
|
-
node.children.each do |child|
|
|
501
|
-
output << ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(child).to_xml
|
|
502
|
-
end
|
|
503
|
-
|
|
504
|
-
return output.join
|
|
505
|
-
elsif !should_include_decl
|
|
506
|
-
# Skip xml_declaration
|
|
507
|
-
output = []
|
|
508
|
-
|
|
509
|
-
# Serialize doctype if present
|
|
510
|
-
output << node.doctype.to_xml << "\n" if node.doctype
|
|
511
|
-
|
|
512
|
-
# Serialize root and other children
|
|
513
|
-
node.children.each do |child|
|
|
514
|
-
next if child.is_a?(::Oga::XML::XmlDeclaration)
|
|
502
|
+
output = []
|
|
515
503
|
|
|
516
|
-
|
|
504
|
+
if should_include_decl
|
|
505
|
+
decl = effective_xml_declaration || node.xml_declaration
|
|
506
|
+
if decl
|
|
507
|
+
output << declaration_to_xml(decl)
|
|
508
|
+
else
|
|
509
|
+
output << '<?xml version="1.0" encoding="UTF-8"?>'
|
|
517
510
|
end
|
|
518
|
-
|
|
519
|
-
return output.join
|
|
511
|
+
output << "\n"
|
|
520
512
|
end
|
|
521
|
-
end
|
|
522
513
|
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
))
|
|
528
|
-
if node.is_a?(::Oga::XML::Document) && effective_xml_declaration
|
|
529
|
-
# Document has declaration - use custom handling to avoid duplicates
|
|
530
|
-
output = []
|
|
531
|
-
xml_declaration_serialized = false
|
|
514
|
+
if node.doctype
|
|
515
|
+
output << node.doctype.to_xml
|
|
516
|
+
output << "\n"
|
|
517
|
+
end
|
|
532
518
|
|
|
533
|
-
# Serialize children, but skip XmlDeclaration if it would cause duplication
|
|
534
519
|
node.children.each do |child|
|
|
535
|
-
|
|
536
|
-
next if xml_declaration && xml_declaration_serialized
|
|
537
|
-
|
|
538
|
-
xml_declaration_serialized = true if xml_declaration
|
|
520
|
+
next if child.is_a?(::Oga::XML::XmlDeclaration)
|
|
539
521
|
|
|
540
522
|
output << ::Moxml::Adapter::CustomizedOga::XmlGenerator.new(child).to_xml
|
|
541
523
|
end
|
|
542
524
|
|
|
543
|
-
output.join
|
|
544
|
-
else
|
|
545
|
-
# Normal case - use XmlGenerator directly
|
|
546
|
-
::Moxml::Adapter::CustomizedOga::XmlGenerator.new(node).to_xml
|
|
525
|
+
return output.join
|
|
547
526
|
end
|
|
527
|
+
|
|
528
|
+
::Moxml::Adapter::CustomizedOga::XmlGenerator.new(node).to_xml
|
|
548
529
|
end
|
|
549
530
|
end
|
|
550
531
|
end
|
data/lib/moxml/adapter/ox.rb
CHANGED
|
@@ -19,7 +19,17 @@ module Moxml
|
|
|
19
19
|
end
|
|
20
20
|
|
|
21
21
|
def set_root(doc, element)
|
|
22
|
-
|
|
22
|
+
existing_root = root(doc)
|
|
23
|
+
if existing_root
|
|
24
|
+
# Replace the existing root element, preserving other children
|
|
25
|
+
element.parent = doc if element.is_a?(::Ox::Node)
|
|
26
|
+
idx = doc.nodes.index(existing_root)
|
|
27
|
+
doc.nodes[idx] = element
|
|
28
|
+
else
|
|
29
|
+
# No root yet, just append the element
|
|
30
|
+
element.parent = doc if element.is_a?(::Ox::Node)
|
|
31
|
+
doc << element
|
|
32
|
+
end
|
|
23
33
|
end
|
|
24
34
|
|
|
25
35
|
def parse(xml, options = {}, _context = nil)
|
|
@@ -101,9 +111,14 @@ module Moxml
|
|
|
101
111
|
end
|
|
102
112
|
|
|
103
113
|
def create_native_doctype(name, external_id, system_id)
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
114
|
+
value = if external_id
|
|
115
|
+
"#{name} PUBLIC \"#{external_id}\" \"#{system_id}\""
|
|
116
|
+
elsif system_id
|
|
117
|
+
"#{name} SYSTEM \"#{system_id}\""
|
|
118
|
+
else
|
|
119
|
+
"#{name}"
|
|
120
|
+
end
|
|
121
|
+
::Ox::DocType.new(value)
|
|
107
122
|
end
|
|
108
123
|
|
|
109
124
|
def create_native_processing_instruction(target, content)
|
|
@@ -370,25 +385,27 @@ module Moxml
|
|
|
370
385
|
def add_child(element, child)
|
|
371
386
|
# Special handling for declarations on Ox documents
|
|
372
387
|
if element.is_a?(::Ox::Document) && child.is_a?(::Ox::Instruct) && child.target == "xml"
|
|
373
|
-
# Transfer declaration attributes to document
|
|
374
388
|
element.attributes ||= {}
|
|
375
|
-
if child.attributes["version"]
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
if child.attributes["encoding"]
|
|
380
|
-
element.attributes[:encoding] =
|
|
381
|
-
child.attributes["encoding"]
|
|
382
|
-
end
|
|
383
|
-
if child.attributes["standalone"]
|
|
384
|
-
element.attributes[:standalone] =
|
|
385
|
-
child.attributes["standalone"]
|
|
386
|
-
end
|
|
389
|
+
element.attributes[:version] = child.attributes["version"] if child.attributes["version"]
|
|
390
|
+
element.attributes[:encoding] = child.attributes["encoding"] if child.attributes["encoding"]
|
|
391
|
+
element.attributes[:standalone] = child.attributes["standalone"] if child.attributes["standalone"]
|
|
392
|
+
return
|
|
387
393
|
end
|
|
388
394
|
|
|
389
395
|
child.parent = element if child.is_a?(::Ox::Node)
|
|
390
396
|
element.nodes ||= []
|
|
391
|
-
|
|
397
|
+
|
|
398
|
+
# Insert doctype before root element in document
|
|
399
|
+
if element.is_a?(::Ox::Document) && child.is_a?(::Ox::DocType)
|
|
400
|
+
root_idx = element.nodes.index { |n| n.is_a?(::Ox::Element) }
|
|
401
|
+
if root_idx
|
|
402
|
+
element.nodes.insert(root_idx, child)
|
|
403
|
+
else
|
|
404
|
+
element.nodes << child
|
|
405
|
+
end
|
|
406
|
+
else
|
|
407
|
+
element.nodes << child
|
|
408
|
+
end
|
|
392
409
|
|
|
393
410
|
# Mark document if EntityReference is added (avoids tree scan in serialize)
|
|
394
411
|
if child.is_a?(::Moxml::Adapter::CustomizedOx::EntityReference)
|
data/lib/moxml/adapter.rb
CHANGED
|
@@ -46,7 +46,7 @@ module Moxml
|
|
|
46
46
|
def validate_platform!(name)
|
|
47
47
|
return if platform_adapters.include?(name.to_sym)
|
|
48
48
|
|
|
49
|
-
available = platform_adapters.
|
|
49
|
+
available = platform_adapters.join(", ")
|
|
50
50
|
raise Moxml::AdapterError.new(
|
|
51
51
|
"The '#{name}' adapter is not available on this platform. Available: #{available}",
|
|
52
52
|
adapter: name,
|
data/lib/moxml/config.rb
CHANGED
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
|
|
3
3
|
module Moxml
|
|
4
4
|
class Config
|
|
5
|
+
LINE_ENDING_LF = "\n"
|
|
6
|
+
LINE_ENDING_CRLF = "\r\n"
|
|
7
|
+
VALID_LINE_ENDINGS = [LINE_ENDING_LF, LINE_ENDING_CRLF].freeze
|
|
5
8
|
VALID_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
|
|
6
9
|
DEFAULT_ADAPTER = :nokogiri
|
|
7
10
|
OPAL_DEFAULT_ADAPTER = :rexml
|
|
@@ -46,7 +49,7 @@ module Moxml
|
|
|
46
49
|
# - :strict — only restore DTD-declared entities (falls back to lenient until DTD parsing is implemented)
|
|
47
50
|
ENTITY_RESTORATION_MODES = %i[strict lenient].freeze
|
|
48
51
|
|
|
49
|
-
attr_reader :adapter_name
|
|
52
|
+
attr_reader :adapter_name, :default_line_ending
|
|
50
53
|
attr_accessor :strict_parsing,
|
|
51
54
|
:default_encoding,
|
|
52
55
|
:entity_encoding,
|
|
@@ -58,13 +61,23 @@ module Moxml
|
|
|
58
61
|
:namespace_validation_mode,
|
|
59
62
|
:entity_restoration_mode
|
|
60
63
|
|
|
64
|
+
def default_line_ending=(value)
|
|
65
|
+
unless VALID_LINE_ENDINGS.include?(value)
|
|
66
|
+
raise ArgumentError,
|
|
67
|
+
"Invalid line_ending: #{value.inspect}. " \
|
|
68
|
+
"Must be Config::LINE_ENDING_LF or Config::LINE_ENDING_CRLF"
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
@default_line_ending = value
|
|
72
|
+
end
|
|
73
|
+
|
|
61
74
|
def initialize(adapter_name = nil, strict_parsing = nil,
|
|
62
75
|
default_encoding = nil)
|
|
63
76
|
self.adapter = adapter_name || Config.default.adapter_name
|
|
64
77
|
@strict_parsing = strict_parsing || Config.default.strict_parsing
|
|
65
78
|
@default_encoding = default_encoding || Config.default.default_encoding
|
|
66
|
-
# reserved for future use
|
|
67
79
|
@default_indent = 2
|
|
80
|
+
@default_line_ending = LINE_ENDING_LF
|
|
68
81
|
@entity_encoding = :basic
|
|
69
82
|
@restore_entities = false
|
|
70
83
|
@preload_entity_sets = []
|
data/lib/moxml/document.rb
CHANGED
|
@@ -81,14 +81,8 @@ module Moxml
|
|
|
81
81
|
if node.is_a?(Declaration)
|
|
82
82
|
# Mark that document now has a declaration
|
|
83
83
|
@has_xml_declaration = true
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
adapter.add_child(@native, node.native)
|
|
87
|
-
else
|
|
88
|
-
adapter.add_previous_sibling(adapter.children(@native).first,
|
|
89
|
-
node.native)
|
|
90
|
-
end
|
|
91
|
-
elsif root && !node.is_a?(ProcessingInstruction) && !node.is_a?(Comment)
|
|
84
|
+
adapter.add_child(@native, node.native)
|
|
85
|
+
elsif root && !node.is_a?(ProcessingInstruction) && !node.is_a?(Comment) && !node.is_a?(Doctype)
|
|
92
86
|
raise Error, "Document already has a root element"
|
|
93
87
|
else
|
|
94
88
|
adapter.add_child(@native, node.native)
|
|
@@ -226,7 +226,8 @@ module Moxml
|
|
|
226
226
|
# kept for backward compatibility.
|
|
227
227
|
# @return [self]
|
|
228
228
|
def load_html5
|
|
229
|
-
warn "EntityRegistry#load_html5 is a no-op (all entities load during initialize)",
|
|
229
|
+
warn "EntityRegistry#load_html5 is a no-op (all entities load during initialize)",
|
|
230
|
+
uplevel: 1
|
|
230
231
|
self
|
|
231
232
|
end
|
|
232
233
|
|
|
@@ -235,7 +236,8 @@ module Moxml
|
|
|
235
236
|
# kept for backward compatibility.
|
|
236
237
|
# @return [self]
|
|
237
238
|
def load_mathml
|
|
238
|
-
warn "EntityRegistry#load_mathml is a no-op (all entities load during initialize)",
|
|
239
|
+
warn "EntityRegistry#load_mathml is a no-op (all entities load during initialize)",
|
|
240
|
+
uplevel: 1
|
|
239
241
|
self
|
|
240
242
|
end
|
|
241
243
|
|
|
@@ -245,7 +247,8 @@ module Moxml
|
|
|
245
247
|
# @param _set_name [Symbol] (ignored, all loaded together)
|
|
246
248
|
# @return [self]
|
|
247
249
|
def load_iso(_set_name = :iso8879)
|
|
248
|
-
warn "EntityRegistry#load_iso is a no-op (all entities load during initialize)",
|
|
250
|
+
warn "EntityRegistry#load_iso is a no-op (all entities load during initialize)",
|
|
251
|
+
uplevel: 1
|
|
249
252
|
self
|
|
250
253
|
end
|
|
251
254
|
|
|
@@ -254,7 +257,8 @@ module Moxml
|
|
|
254
257
|
# kept for backward compatibility.
|
|
255
258
|
# @return [self]
|
|
256
259
|
def load_all
|
|
257
|
-
warn "EntityRegistry#load_all is a no-op (all entities load during initialize)",
|
|
260
|
+
warn "EntityRegistry#load_all is a no-op (all entities load during initialize)",
|
|
261
|
+
uplevel: 1
|
|
258
262
|
self
|
|
259
263
|
end
|
|
260
264
|
|
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
|
+
|
|
2
3
|
#
|
|
3
4
|
# Auto-generated entity data for Opal runtime.
|
|
4
|
-
#
|
|
5
|
-
#
|
|
5
|
+
# Source: data/w3c_entities.json (2125 entities)
|
|
6
|
+
# Regenerate with: rake opal:generate_entity_data
|
|
6
7
|
|
|
7
8
|
module Moxml
|
|
8
9
|
class EntityRegistry
|
data/lib/moxml/node.rb
CHANGED
|
@@ -98,6 +98,7 @@ module Moxml
|
|
|
98
98
|
serialize_options[:no_declaration] = !should_include_declaration?(options)
|
|
99
99
|
|
|
100
100
|
result = adapter.serialize(@native, serialize_options)
|
|
101
|
+
result = apply_line_ending(result, serialize_options[:line_ending])
|
|
101
102
|
|
|
102
103
|
# Restore entity markers to named entity references
|
|
103
104
|
adapter.restore_entities(result)
|
|
@@ -279,6 +280,7 @@ module Moxml
|
|
|
279
280
|
{
|
|
280
281
|
encoding: context.config.default_encoding,
|
|
281
282
|
indent: context.config.default_indent,
|
|
283
|
+
line_ending: context.config.default_line_ending,
|
|
282
284
|
# The short format of empty tags in Oga and Nokogiri isn't configurable
|
|
283
285
|
# Oga: <empty /> (with a space)
|
|
284
286
|
# Nokogiri: <empty/> (without a space)
|
|
@@ -294,5 +296,11 @@ module Moxml
|
|
|
294
296
|
# For Document nodes, delegate to adapter for native state check
|
|
295
297
|
adapter.has_declaration?(@native, self)
|
|
296
298
|
end
|
|
299
|
+
|
|
300
|
+
def apply_line_ending(xml, line_ending)
|
|
301
|
+
return xml if line_ending == Config::LINE_ENDING_LF || !xml.include?("\n")
|
|
302
|
+
|
|
303
|
+
xml.gsub(/\r?\n/, line_ending)
|
|
304
|
+
end
|
|
297
305
|
end
|
|
298
306
|
end
|
data/lib/moxml/version.rb
CHANGED
data/lib/moxml/xml_utils.rb
CHANGED
data/lib/moxml.rb
CHANGED
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.shared_examples "Moxml Line Ending" do
|
|
4
|
+
describe "Line ending configuration" do
|
|
5
|
+
let(:context) { Moxml.new }
|
|
6
|
+
let(:xml) { "<root><child>text</child></root>" }
|
|
7
|
+
|
|
8
|
+
it "produces no CRLF with LF default" do
|
|
9
|
+
doc = context.parse(xml)
|
|
10
|
+
expect(doc.to_xml).not_to include("\r\n")
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it "produces no bare LF with CRLF configured" do
|
|
14
|
+
context.config.default_line_ending = Moxml::Config::LINE_ENDING_CRLF
|
|
15
|
+
doc = context.parse(xml)
|
|
16
|
+
expect(doc.to_xml).not_to match(/(?<!\r)\n/)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it "allows per-call CRLF override producing no bare LF" do
|
|
20
|
+
doc = context.parse(xml)
|
|
21
|
+
output = doc.to_xml(line_ending: Moxml::Config::LINE_ENDING_CRLF)
|
|
22
|
+
expect(output).not_to match(/(?<!\r)\n/)
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it "per-call LF override wins over config CRLF" do
|
|
26
|
+
context.config.default_line_ending = Moxml::Config::LINE_ENDING_CRLF
|
|
27
|
+
doc = context.parse(xml)
|
|
28
|
+
expect(doc.to_xml(line_ending: Moxml::Config::LINE_ENDING_LF))
|
|
29
|
+
.not_to include("\r\n")
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
it "produces identical bytes on re-serialization with CRLF" do
|
|
33
|
+
context.config.default_line_ending = Moxml::Config::LINE_ENDING_CRLF
|
|
34
|
+
doc = context.parse(xml)
|
|
35
|
+
first = doc.to_xml
|
|
36
|
+
|
|
37
|
+
ctx2 = Moxml.new
|
|
38
|
+
ctx2.config.default_line_ending = Moxml::Config::LINE_ENDING_CRLF
|
|
39
|
+
result = ctx2.parse(first)
|
|
40
|
+
expect(result.to_xml).to eq(first)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it "preserves element structure through CRLF round-trip" do
|
|
44
|
+
doc = context.parse("<root><a>text</a><b>more</b></root>")
|
|
45
|
+
context.config.default_line_ending = Moxml::Config::LINE_ENDING_CRLF
|
|
46
|
+
crlf_output = doc.to_xml
|
|
47
|
+
|
|
48
|
+
ctx2 = Moxml.new
|
|
49
|
+
result = ctx2.parse(crlf_output)
|
|
50
|
+
elements = result.root.children.select(&:element?)
|
|
51
|
+
expect(elements.map(&:name)).to eq(%w[a b])
|
|
52
|
+
expect(elements[0].children.first.content).to eq("text")
|
|
53
|
+
expect(elements[1].children.first.content).to eq("more")
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
end
|