moxml 0.1.14 → 0.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +117 -66
- data/Gemfile +1 -0
- data/README.adoc +11 -9
- data/Rakefile +34 -1
- data/TODO.remaining/1-entity-reference-adapter-support.md +157 -0
- data/TODO.remaining/2-entity-restoration-model-driven.md +169 -0
- data/TODO.remaining/3-entity-reference-test-coverage.md +170 -0
- data/TODO.remaining/4-lenient-entities-mode.md +106 -0
- data/TODO.remaining/5-fixture-integrity.md +65 -0
- data/TODO.remaining/6-ox-element-ordering-bug.md +36 -0
- data/TODO.remaining/7-headed-ox-limitations.md +95 -0
- data/TODO.remaining/8-xpath-predicate-gaps.md +68 -0
- data/TODO.remaining/9-cleanup-hygiene.md +42 -0
- data/TODO.remaining/README.md +54 -0
- data/benchmarks/generate_report.rb +1 -1
- data/docs/_pages/configuration.adoc +22 -19
- data/docs/_tutorials/namespace-handling.adoc +5 -5
- data/lib/moxml/adapter/base.rb +22 -3
- data/lib/moxml/adapter/customized_libxml/declaration.rb +1 -1
- data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
- data/lib/moxml/adapter/customized_libxml.rb +18 -0
- data/lib/moxml/adapter/customized_oga.rb +10 -0
- data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
- data/lib/moxml/adapter/customized_ox.rb +12 -0
- data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
- data/lib/moxml/adapter/customized_rexml/formatter.rb +44 -20
- data/lib/moxml/adapter/customized_rexml.rb +11 -0
- data/lib/moxml/adapter/headed_ox.rb +37 -14
- data/lib/moxml/adapter/libxml.rb +233 -119
- data/lib/moxml/adapter/nokogiri.rb +22 -11
- data/lib/moxml/adapter/oga.rb +64 -25
- data/lib/moxml/adapter/ox.rb +198 -42
- data/lib/moxml/adapter/rexml.rb +64 -13
- data/lib/moxml/attribute.rb +3 -0
- data/lib/moxml/builder.rb +78 -24
- data/lib/moxml/config.rb +24 -7
- data/lib/moxml/declaration.rb +4 -2
- data/lib/moxml/document.rb +8 -1
- data/lib/moxml/document_builder.rb +44 -37
- data/lib/moxml/element.rb +18 -5
- data/lib/moxml/entity_registry.rb +51 -1
- data/lib/moxml/native_attachment.rb +65 -0
- data/lib/moxml/node.rb +39 -50
- data/lib/moxml/node_set.rb +43 -15
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +4 -1
- data/lib/moxml.rb +1 -0
- data/scripts/format_xml.rb +16 -0
- data/scripts/pretty_format_xml.rb +14 -0
- data/spec/consistency/round_trip_spec.rb +3 -30
- data/spec/integration/all_adapters_spec.rb +1 -0
- data/spec/integration/headed_ox_integration_spec.rb +0 -2
- data/spec/integration/shared_examples/edge_cases.rb +7 -4
- data/spec/integration/shared_examples/integration_workflows.rb +3 -3
- data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +1 -1
- data/spec/integration/shared_examples/node_wrappers/entity_reference_behavior.rb +224 -0
- data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +1 -1
- data/spec/moxml/adapter/headed_ox_spec.rb +8 -8
- data/spec/moxml/adapter/oga_spec.rb +46 -0
- data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
- data/spec/moxml/allocation_benchmark_spec.rb +96 -0
- data/spec/moxml/allocation_guard_spec.rb +282 -0
- data/spec/moxml/builder_spec.rb +256 -0
- data/spec/moxml/config_spec.rb +11 -11
- data/spec/moxml/doctype_spec.rb +41 -0
- data/spec/moxml/lazy_parse_spec.rb +115 -0
- data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
- data/spec/moxml/node_cache_spec.rb +110 -0
- data/spec/moxml/node_set_cache_spec.rb +90 -0
- data/spec/moxml/xml_utils_spec.rb +32 -0
- data/spec/moxml/xpath/axes_spec.rb +1 -1
- data/spec/moxml/xpath/compiler_spec.rb +2 -2
- data/spec/moxml/xpath/functions/position_functions_spec.rb +5 -5
- data/spec/moxml/xpath/functions/special_functions_spec.rb +1 -1
- data/spec/performance/memory_usage_spec.rb +0 -4
- data/spec/support/allocation_helper.rb +165 -0
- data/spec/support/w3c_namespace_helpers.rb +2 -1
- metadata +29 -2
|
@@ -7,6 +7,10 @@ module Moxml
|
|
|
7
7
|
module Adapter
|
|
8
8
|
class Nokogiri < Base
|
|
9
9
|
class << self
|
|
10
|
+
def attachments
|
|
11
|
+
@attachments ||= Moxml::NativeAttachment.new
|
|
12
|
+
end
|
|
13
|
+
|
|
10
14
|
def set_root(doc, element)
|
|
11
15
|
doc.root = element
|
|
12
16
|
end
|
|
@@ -31,7 +35,7 @@ module Moxml
|
|
|
31
35
|
|
|
32
36
|
# Use provided context if available, otherwise create new one
|
|
33
37
|
ctx = _context || Context.new(:nokogiri)
|
|
34
|
-
|
|
38
|
+
Document.new(native_doc, ctx)
|
|
35
39
|
end
|
|
36
40
|
|
|
37
41
|
# SAX parsing implementation for Nokogiri
|
|
@@ -47,7 +51,7 @@ module Moxml
|
|
|
47
51
|
parser = ::Nokogiri::XML::SAX::Parser.new(bridge)
|
|
48
52
|
|
|
49
53
|
# Parse
|
|
50
|
-
if xml.
|
|
54
|
+
if xml.is_a?(IO) || xml.is_a?(StringIO)
|
|
51
55
|
parser.parse(xml)
|
|
52
56
|
else
|
|
53
57
|
parser.parse(xml.to_s)
|
|
@@ -202,7 +206,7 @@ module Moxml
|
|
|
202
206
|
end
|
|
203
207
|
|
|
204
208
|
def root(document)
|
|
205
|
-
document.
|
|
209
|
+
document.is_a?(::Nokogiri::XML::Document) ? document.root : document.children.first
|
|
206
210
|
end
|
|
207
211
|
|
|
208
212
|
def attribute_element(attr)
|
|
@@ -241,8 +245,8 @@ module Moxml
|
|
|
241
245
|
encoding = declaration_attribute(child, "encoding")
|
|
242
246
|
standalone = declaration_attribute(child, "standalone")
|
|
243
247
|
|
|
244
|
-
#
|
|
245
|
-
|
|
248
|
+
# Store declaration state in attachment map
|
|
249
|
+
attachments.set(element, :xml_decl, {
|
|
246
250
|
version: version,
|
|
247
251
|
encoding: encoding,
|
|
248
252
|
standalone: standalone,
|
|
@@ -273,7 +277,7 @@ module Moxml
|
|
|
273
277
|
node.name == "xml" &&
|
|
274
278
|
node.parent.is_a?(::Nokogiri::XML::Document)
|
|
275
279
|
# Clear document's xml_decl when removing declaration
|
|
276
|
-
node.parent
|
|
280
|
+
attachments.set(node.parent, :xml_decl, nil)
|
|
277
281
|
end
|
|
278
282
|
|
|
279
283
|
node.remove
|
|
@@ -387,13 +391,12 @@ module Moxml
|
|
|
387
391
|
# Handle declaration option
|
|
388
392
|
# Priority:
|
|
389
393
|
# 1. Explicit no_declaration option
|
|
390
|
-
# 2. Check
|
|
394
|
+
# 2. Check attachment-stored xml_decl (when remove is called, this becomes nil)
|
|
391
395
|
if options.key?(:no_declaration)
|
|
392
396
|
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if options[:no_declaration]
|
|
393
|
-
elsif
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
xml_decl = node.instance_variable_get(:@xml_decl)
|
|
397
|
+
elsif attachments.key?(node, :xml_decl)
|
|
398
|
+
# State stored in attachment - if nil, declaration was removed
|
|
399
|
+
xml_decl = attachments.get(node, :xml_decl)
|
|
397
400
|
save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if xml_decl.nil?
|
|
398
401
|
end
|
|
399
402
|
|
|
@@ -404,6 +407,14 @@ module Moxml
|
|
|
404
407
|
)
|
|
405
408
|
end
|
|
406
409
|
|
|
410
|
+
def has_declaration?(native_doc, wrapper)
|
|
411
|
+
if attachments.key?(native_doc, :xml_decl)
|
|
412
|
+
!attachments.get(native_doc, :xml_decl).nil?
|
|
413
|
+
else
|
|
414
|
+
wrapper.has_xml_declaration
|
|
415
|
+
end
|
|
416
|
+
end
|
|
417
|
+
|
|
407
418
|
private
|
|
408
419
|
|
|
409
420
|
def build_declaration_attrs(version, encoding, standalone)
|
data/lib/moxml/adapter/oga.rb
CHANGED
|
@@ -1,8 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative "base"
|
|
4
|
-
require_relative "customized_oga
|
|
5
|
-
require_relative "customized_oga/xml_declaration"
|
|
4
|
+
require_relative "customized_oga"
|
|
6
5
|
require "oga"
|
|
7
6
|
|
|
8
7
|
module Moxml
|
|
@@ -12,6 +11,10 @@ module Moxml
|
|
|
12
11
|
# Standard XML entities handled natively by parsers
|
|
13
12
|
STANDARD_XML_ENTITIES = %w[amp lt gt quot apos].freeze
|
|
14
13
|
|
|
14
|
+
def attachments
|
|
15
|
+
@attachments ||= Moxml::NativeAttachment.new
|
|
16
|
+
end
|
|
17
|
+
|
|
15
18
|
def set_root(doc, element)
|
|
16
19
|
# Clear existing root element if any - Oga's NodeSet needs special handling
|
|
17
20
|
# We need to manually remove elements since NodeSet doesn't support clear or delete_if
|
|
@@ -46,7 +49,7 @@ module Moxml
|
|
|
46
49
|
def sax_parse(xml, handler)
|
|
47
50
|
bridge = OgaSAXBridge.new(handler)
|
|
48
51
|
|
|
49
|
-
xml_string = xml.
|
|
52
|
+
xml_string = xml.is_a?(IO) || xml.is_a?(StringIO) ? xml.read : xml.to_s
|
|
50
53
|
|
|
51
54
|
# Manually call start_document (Oga doesn't)
|
|
52
55
|
handler.on_start_document
|
|
@@ -72,6 +75,17 @@ module Moxml
|
|
|
72
75
|
::Oga::XML::Text.new(text: encode_entity_markers(content))
|
|
73
76
|
end
|
|
74
77
|
|
|
78
|
+
def create_native_entity_reference(name)
|
|
79
|
+
text = ::Oga::XML::Text.new
|
|
80
|
+
text.text = "#{ENTITY_MARKER}#{name};"
|
|
81
|
+
attachments.set(text, :entity_name, name)
|
|
82
|
+
text
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def entity_reference_name(node)
|
|
86
|
+
attachments.get(node, :entity_name)
|
|
87
|
+
end
|
|
88
|
+
|
|
75
89
|
def create_native_cdata(content, _owner_doc = nil)
|
|
76
90
|
::Oga::XML::Cdata.new(text: content)
|
|
77
91
|
end
|
|
@@ -132,10 +146,9 @@ module Moxml
|
|
|
132
146
|
end
|
|
133
147
|
|
|
134
148
|
def namespace(element)
|
|
135
|
-
|
|
149
|
+
case element
|
|
150
|
+
when ::Oga::XML::Element, ::Oga::XML::Attribute
|
|
136
151
|
element.namespace
|
|
137
|
-
elsif element.respond_to?(:namespaces)
|
|
138
|
-
element.namespaces.values.last
|
|
139
152
|
end
|
|
140
153
|
rescue NoMethodError
|
|
141
154
|
# Oga attributes fail with NoMethodError:
|
|
@@ -150,7 +163,12 @@ module Moxml
|
|
|
150
163
|
def node_type(node)
|
|
151
164
|
case node
|
|
152
165
|
when ::Oga::XML::Element then :element
|
|
153
|
-
when ::Oga::XML::Text
|
|
166
|
+
when ::Oga::XML::Text
|
|
167
|
+
if attachments.key?(node, :entity_name)
|
|
168
|
+
:entity_reference
|
|
169
|
+
else
|
|
170
|
+
:text
|
|
171
|
+
end
|
|
154
172
|
when ::Oga::XML::Cdata then :cdata
|
|
155
173
|
when ::Oga::XML::Comment then :comment
|
|
156
174
|
when ::Oga::XML::Attribute then :attribute
|
|
@@ -178,7 +196,7 @@ module Moxml
|
|
|
178
196
|
node.doctype].compact
|
|
179
197
|
end
|
|
180
198
|
|
|
181
|
-
return all_children unless node.
|
|
199
|
+
return all_children unless node.is_a?(::Oga::XML::Node) || node.is_a?(::Oga::XML::Document)
|
|
182
200
|
|
|
183
201
|
all_children + node.children.reject do |child|
|
|
184
202
|
child.is_a?(::Oga::XML::Text) &&
|
|
@@ -188,7 +206,7 @@ module Moxml
|
|
|
188
206
|
end
|
|
189
207
|
|
|
190
208
|
def parent(node)
|
|
191
|
-
node.parent if node.
|
|
209
|
+
node.parent if node.is_a?(::Oga::XML::Node)
|
|
192
210
|
end
|
|
193
211
|
|
|
194
212
|
def next_sibling(node)
|
|
@@ -215,7 +233,7 @@ module Moxml
|
|
|
215
233
|
end
|
|
216
234
|
|
|
217
235
|
def attributes(element)
|
|
218
|
-
return [] unless element.
|
|
236
|
+
return [] unless element.is_a?(::Oga::XML::Element)
|
|
219
237
|
|
|
220
238
|
# remove attributes-namespaces
|
|
221
239
|
element.attributes.reject do |attr|
|
|
@@ -262,8 +280,8 @@ module Moxml
|
|
|
262
280
|
# Special handling for declarations on Oga documents
|
|
263
281
|
if element.is_a?(::Oga::XML::Document) &&
|
|
264
282
|
child.is_a?(::Oga::XML::XmlDeclaration)
|
|
265
|
-
#
|
|
266
|
-
|
|
283
|
+
# Track declaration state in attachment map
|
|
284
|
+
attachments.set(element, :xml_declaration, child)
|
|
267
285
|
end
|
|
268
286
|
|
|
269
287
|
element.children << child
|
|
@@ -295,8 +313,8 @@ module Moxml
|
|
|
295
313
|
# Special handling for declarations on Oga documents
|
|
296
314
|
if node.is_a?(::Oga::XML::XmlDeclaration) &&
|
|
297
315
|
node.parent.is_a?(::Oga::XML::Document)
|
|
298
|
-
# Clear
|
|
299
|
-
node.parent
|
|
316
|
+
# Clear declaration state in attachment map
|
|
317
|
+
attachments.set(node.parent, :xml_declaration, nil)
|
|
300
318
|
end
|
|
301
319
|
|
|
302
320
|
node.remove
|
|
@@ -316,10 +334,9 @@ module Moxml
|
|
|
316
334
|
end
|
|
317
335
|
|
|
318
336
|
def inner_text(node)
|
|
319
|
-
text = if node.
|
|
337
|
+
text = if node.is_a?(::Oga::XML::Element)
|
|
320
338
|
node.inner_text
|
|
321
339
|
else
|
|
322
|
-
# Oga::XML::Text node for example
|
|
323
340
|
node.text
|
|
324
341
|
end
|
|
325
342
|
restore_entity_markers(text)
|
|
@@ -327,7 +344,7 @@ module Moxml
|
|
|
327
344
|
|
|
328
345
|
def set_text_content(node, content)
|
|
329
346
|
encoded = encode_entity_markers(content)
|
|
330
|
-
if node.
|
|
347
|
+
if node.is_a?(::Oga::XML::Element)
|
|
331
348
|
node.inner_text = encoded
|
|
332
349
|
else
|
|
333
350
|
node.text = encoded
|
|
@@ -370,22 +387,32 @@ module Moxml
|
|
|
370
387
|
end
|
|
371
388
|
|
|
372
389
|
def namespace_definitions(node)
|
|
373
|
-
return [] unless node.
|
|
390
|
+
return [] unless node.is_a?(::Oga::XML::Element)
|
|
374
391
|
|
|
375
392
|
node.namespaces.values
|
|
376
393
|
end
|
|
377
394
|
|
|
378
395
|
# Doctype accessor methods
|
|
396
|
+
# Note: Oga stores SYSTEM identifier in public_id for SYSTEM doctypes.
|
|
397
|
+
# See: Oga::XML::Doctype puts SYSTEM dtd in public_id, system_id is nil.
|
|
379
398
|
def doctype_name(native)
|
|
380
399
|
native.name
|
|
381
400
|
end
|
|
382
401
|
|
|
383
402
|
def doctype_external_id(native)
|
|
384
|
-
native.
|
|
403
|
+
if native.type == "SYSTEM"
|
|
404
|
+
nil
|
|
405
|
+
else
|
|
406
|
+
native.public_id
|
|
407
|
+
end
|
|
385
408
|
end
|
|
386
409
|
|
|
387
410
|
def doctype_system_id(native)
|
|
388
|
-
native.
|
|
411
|
+
if native.type == "SYSTEM"
|
|
412
|
+
native.public_id
|
|
413
|
+
else
|
|
414
|
+
native.system_id
|
|
415
|
+
end
|
|
389
416
|
end
|
|
390
417
|
|
|
391
418
|
def xpath(node, expression, namespaces = nil)
|
|
@@ -430,6 +457,16 @@ module Moxml
|
|
|
430
457
|
# Simple entity-only regex with no nested quantifiers
|
|
431
458
|
ENTITY_REF_REGEX = /&#{ENTITY_PATTERN};/
|
|
432
459
|
|
|
460
|
+
def has_declaration?(native_doc, _wrapper)
|
|
461
|
+
decl = attachments.get(native_doc, :xml_declaration)
|
|
462
|
+
if decl.nil? && !attachments.key?(native_doc, :xml_declaration)
|
|
463
|
+
# No attachment entry - check native doc (for parsed documents)
|
|
464
|
+
native_doc.respond_to?(:xml_declaration) && !native_doc.xml_declaration.nil?
|
|
465
|
+
else
|
|
466
|
+
!decl.nil?
|
|
467
|
+
end
|
|
468
|
+
end
|
|
469
|
+
|
|
433
470
|
private
|
|
434
471
|
|
|
435
472
|
# Convert &entity; back to \x01entity; for Oga text storage.
|
|
@@ -463,21 +500,22 @@ module Moxml
|
|
|
463
500
|
# We need to handle declaration options ourselves for Document nodes
|
|
464
501
|
if node.is_a?(::Oga::XML::Document)
|
|
465
502
|
# Check if we should include declaration
|
|
466
|
-
# Priority: explicit option > existence of xml_declaration
|
|
503
|
+
# Priority: explicit option > existence of xml_declaration (native or attachment)
|
|
504
|
+
effective_xml_declaration = node.xml_declaration || attachments.get(node, :xml_declaration)
|
|
467
505
|
should_include_decl = if options.key?(:no_declaration)
|
|
468
506
|
!options[:no_declaration]
|
|
469
507
|
elsif options.key?(:declaration)
|
|
470
508
|
options[:declaration]
|
|
471
509
|
else
|
|
472
|
-
# Default: include if document has xml_declaration
|
|
473
|
-
|
|
510
|
+
# Default: include if document has xml_declaration
|
|
511
|
+
effective_xml_declaration ? true : false
|
|
474
512
|
end
|
|
475
513
|
|
|
476
514
|
# Fix: Check if declaration already exists in children
|
|
477
515
|
# This prevents duplicate declarations when document already has one
|
|
478
516
|
has_existing_declaration = node.children.any?(::Oga::XML::XmlDeclaration)
|
|
479
517
|
|
|
480
|
-
if should_include_decl && !
|
|
518
|
+
if should_include_decl && !effective_xml_declaration && !has_existing_declaration
|
|
481
519
|
# Need to add declaration - create default one
|
|
482
520
|
output = []
|
|
483
521
|
output << '<?xml version="1.0" encoding="UTF-8"?>'
|
|
@@ -512,7 +550,8 @@ module Moxml
|
|
|
512
550
|
|
|
513
551
|
# Default: use XmlGenerator
|
|
514
552
|
# But first check if we need to handle declaration specially
|
|
515
|
-
|
|
553
|
+
effective_xml_declaration = node.is_a?(::Oga::XML::Document) && (node.xml_declaration || attachments.get(node, :xml_declaration))
|
|
554
|
+
if node.is_a?(::Oga::XML::Document) && effective_xml_declaration
|
|
516
555
|
# Document has declaration - use custom handling to avoid duplicates
|
|
517
556
|
output = []
|
|
518
557
|
xml_declaration_serialized = false
|