moxml 0.1.13 → 0.1.15
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop_todo.yml +117 -66
- data/Gemfile +1 -0
- data/README.adoc +11 -9
- data/Rakefile +3 -1
- data/docs/_pages/configuration.adoc +22 -19
- data/docs/_tutorials/namespace-handling.adoc +5 -5
- data/lib/moxml/adapter/base.rb +8 -3
- data/lib/moxml/adapter/customized_libxml/entity_reference.rb +23 -0
- data/lib/moxml/adapter/customized_libxml.rb +18 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +2 -2
- data/lib/moxml/adapter/customized_oga.rb +10 -0
- data/lib/moxml/adapter/customized_ox/entity_reference.rb +25 -0
- data/lib/moxml/adapter/customized_ox.rb +12 -0
- data/lib/moxml/adapter/customized_rexml/entity_reference.rb +19 -0
- data/lib/moxml/adapter/customized_rexml/formatter.rb +2 -0
- data/lib/moxml/adapter/customized_rexml.rb +11 -0
- data/lib/moxml/adapter/headed_ox.rb +9 -3
- data/lib/moxml/adapter/libxml.rb +76 -62
- data/lib/moxml/adapter/nokogiri.rb +4 -5
- data/lib/moxml/adapter/oga.rb +50 -26
- data/lib/moxml/adapter/ox.rb +189 -41
- data/lib/moxml/adapter/rexml.rb +27 -8
- data/lib/moxml/attribute.rb +3 -0
- data/lib/moxml/builder.rb +1 -0
- data/lib/moxml/config.rb +7 -7
- data/lib/moxml/document.rb +5 -1
- data/lib/moxml/document_builder.rb +37 -31
- data/lib/moxml/element.rb +13 -5
- data/lib/moxml/entity_registry.rb +36 -0
- data/lib/moxml/node.rb +23 -2
- data/lib/moxml/node_set.rb +43 -15
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils.rb +1 -1
- data/spec/integration/shared_examples/edge_cases.rb +3 -0
- data/spec/moxml/adapter/oga_spec.rb +62 -0
- data/spec/moxml/adapter/shared_examples/adapter_contract.rb +1 -12
- data/spec/moxml/allocation_benchmark_spec.rb +96 -0
- data/spec/moxml/allocation_guard_spec.rb +282 -0
- data/spec/moxml/builder_spec.rb +22 -0
- data/spec/moxml/config_spec.rb +11 -11
- data/spec/moxml/doctype_spec.rb +41 -0
- data/spec/moxml/lazy_parse_spec.rb +115 -0
- data/spec/moxml/namespace_uri_validation_spec.rb +11 -3
- data/spec/moxml/node_cache_spec.rb +110 -0
- data/spec/moxml/node_set_cache_spec.rb +90 -0
- data/spec/moxml/xml_utils_spec.rb +32 -0
- data/spec/support/allocation_helper.rb +165 -0
- data/spec/support/w3c_namespace_helpers.rb +2 -1
- metadata +15 -2
|
@@ -13,15 +13,13 @@ module Moxml
|
|
|
13
13
|
@current_doc = context.create_document(native_doc)
|
|
14
14
|
|
|
15
15
|
# Transfer has_declaration flag if present
|
|
16
|
-
if native_doc.
|
|
17
|
-
native_doc.instance_variable_defined?(:@moxml_has_declaration)
|
|
16
|
+
if native_doc.instance_variable_defined?(:@moxml_has_declaration)
|
|
18
17
|
has_declaration = native_doc.instance_variable_get(:@moxml_has_declaration)
|
|
19
18
|
@current_doc.has_xml_declaration = has_declaration
|
|
20
19
|
end
|
|
21
20
|
|
|
22
21
|
# Transfer DOCTYPE from parsed document if it exists
|
|
23
|
-
if native_doc.
|
|
24
|
-
native_doc.instance_variable_defined?(:@moxml_doctype)
|
|
22
|
+
if native_doc.instance_variable_defined?(:@moxml_doctype)
|
|
25
23
|
doctype = native_doc.instance_variable_get(:@moxml_doctype)
|
|
26
24
|
if doctype
|
|
27
25
|
@current_doc.native.instance_variable_set(:@moxml_doctype,
|
|
@@ -70,43 +68,54 @@ module Moxml
|
|
|
70
68
|
content = adapter.text_content(node)
|
|
71
69
|
|
|
72
70
|
# Check if we should restore entity references for this text
|
|
73
|
-
if context.config.restore_entities && content
|
|
71
|
+
if context.config.restore_entities && text_has_restorable_entities?(content)
|
|
74
72
|
restore_entities_in_text(content)
|
|
75
73
|
else
|
|
76
74
|
@node_stack.last&.add_child(Text.new(prepared, context))
|
|
77
75
|
end
|
|
78
76
|
end
|
|
79
77
|
|
|
78
|
+
def text_has_restorable_entities?(content)
|
|
79
|
+
return false unless content
|
|
80
|
+
|
|
81
|
+
registry = context.entity_registry
|
|
82
|
+
codepoints = registry.restorable_codepoints
|
|
83
|
+
content.each_char do |char|
|
|
84
|
+
return true if codepoints.include?(char.ord)
|
|
85
|
+
end
|
|
86
|
+
false
|
|
87
|
+
end
|
|
88
|
+
|
|
80
89
|
def restore_entities_in_text(content)
|
|
81
90
|
parent = @node_stack.last
|
|
82
91
|
return unless parent
|
|
83
92
|
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
"<" => "lt",
|
|
88
|
-
">" => "gt",
|
|
89
|
-
"&" => "amp",
|
|
90
|
-
'"' => "quot",
|
|
91
|
-
"'" => "apos",
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
# Process character by character
|
|
95
|
-
chars = content.to_s.chars
|
|
96
|
-
chars.each do |char|
|
|
97
|
-
codepoint = char.ord
|
|
98
|
-
entity_name = context.entity_registry.primary_name_for_codepoint(codepoint)
|
|
93
|
+
registry = context.entity_registry
|
|
94
|
+
config = context.config
|
|
95
|
+
buffer = +""
|
|
99
96
|
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
97
|
+
content.to_s.each_char do |char|
|
|
98
|
+
codepoint = char.ord
|
|
99
|
+
name = registry.primary_name_for_codepoint(codepoint)
|
|
100
|
+
|
|
101
|
+
if name && registry.should_restore?(codepoint, config: config)
|
|
102
|
+
# Flush buffered text before the entity
|
|
103
|
+
unless buffer.empty?
|
|
104
|
+
parent.add_child(Text.new(adapter.create_text(buffer), context))
|
|
105
|
+
buffer.clear
|
|
106
|
+
end
|
|
107
|
+
parent.add_child(
|
|
108
|
+
EntityReference.new(adapter.create_entity_reference(name), context),
|
|
109
|
+
)
|
|
104
110
|
else
|
|
105
|
-
|
|
106
|
-
text_node = adapter.create_text(char)
|
|
107
|
-
parent.add_child(Text.new(text_node, context))
|
|
111
|
+
buffer << char
|
|
108
112
|
end
|
|
109
113
|
end
|
|
114
|
+
|
|
115
|
+
# Flush remaining buffer
|
|
116
|
+
unless buffer.empty?
|
|
117
|
+
parent.add_child(Text.new(adapter.create_text(buffer), context))
|
|
118
|
+
end
|
|
110
119
|
end
|
|
111
120
|
|
|
112
121
|
def visit_cdata(node)
|
|
@@ -135,10 +144,7 @@ module Moxml
|
|
|
135
144
|
end
|
|
136
145
|
|
|
137
146
|
def visit_children(node)
|
|
138
|
-
|
|
139
|
-
node_children.each do |child|
|
|
140
|
-
visit_node(child)
|
|
141
|
-
end
|
|
147
|
+
children(node).each { |child| visit_node(child) }
|
|
142
148
|
end
|
|
143
149
|
|
|
144
150
|
def node_type(node)
|
data/lib/moxml/element.rb
CHANGED
|
@@ -42,6 +42,7 @@ module Moxml
|
|
|
42
42
|
|
|
43
43
|
def []=(name, value)
|
|
44
44
|
adapter.set_attribute(@native, name, normalize_xml_value(value))
|
|
45
|
+
@attributes_cache = nil
|
|
45
46
|
end
|
|
46
47
|
|
|
47
48
|
def [](name)
|
|
@@ -64,19 +65,23 @@ module Moxml
|
|
|
64
65
|
end
|
|
65
66
|
|
|
66
67
|
def attributes
|
|
67
|
-
adapter.attributes(@native).map do |attr|
|
|
68
|
-
Attribute.new(attr, context)
|
|
68
|
+
@attributes ||= adapter.attributes(@native).map do |attr|
|
|
69
|
+
a = Attribute.new(attr, context)
|
|
70
|
+
a.instance_variable_set(:@parent_node, self)
|
|
71
|
+
a
|
|
69
72
|
end
|
|
70
73
|
end
|
|
71
74
|
|
|
72
75
|
def remove_attribute(name)
|
|
73
76
|
adapter.remove_attribute(@native, name)
|
|
77
|
+
@attributes_cache = nil
|
|
74
78
|
self
|
|
75
79
|
end
|
|
76
80
|
|
|
77
81
|
def add_namespace(prefix, uri)
|
|
78
82
|
adapter.create_namespace(@native, prefix, uri,
|
|
79
|
-
|
|
83
|
+
namespace_validation_mode: context.config.namespace_validation_mode)
|
|
84
|
+
@namespaces_cache = nil
|
|
80
85
|
self
|
|
81
86
|
rescue ValidationError => e
|
|
82
87
|
# Re-raise as NamespaceError, provide attributes for error context
|
|
@@ -103,15 +108,16 @@ module Moxml
|
|
|
103
108
|
adapter.set_namespace(
|
|
104
109
|
@native,
|
|
105
110
|
adapter.create_namespace(@native, *ns_or_hash.to_a.first,
|
|
106
|
-
|
|
111
|
+
namespace_validation_mode: context.config.namespace_validation_mode),
|
|
107
112
|
)
|
|
108
113
|
else
|
|
109
114
|
adapter.set_namespace(@native, ns_or_hash&.native)
|
|
110
115
|
end
|
|
116
|
+
@namespaces_cache = nil
|
|
111
117
|
end
|
|
112
118
|
|
|
113
119
|
def namespaces
|
|
114
|
-
adapter.namespace_definitions(@native).map do |ns|
|
|
120
|
+
@namespaces ||= adapter.namespace_definitions(@native).map do |ns|
|
|
115
121
|
Namespace.new(ns, context)
|
|
116
122
|
end
|
|
117
123
|
end
|
|
@@ -136,6 +142,7 @@ module Moxml
|
|
|
136
142
|
|
|
137
143
|
def text=(content)
|
|
138
144
|
adapter.set_text_content(@native, normalize_xml_value(content))
|
|
145
|
+
invalidate_children_cache!
|
|
139
146
|
end
|
|
140
147
|
|
|
141
148
|
def inner_text
|
|
@@ -149,6 +156,7 @@ module Moxml
|
|
|
149
156
|
def inner_xml=(xml)
|
|
150
157
|
doc = context.parse("<root>#{xml}</root>")
|
|
151
158
|
adapter.replace_children(@native, doc.root.children.map(&:native))
|
|
159
|
+
invalidate_children_cache!
|
|
152
160
|
end
|
|
153
161
|
|
|
154
162
|
# Fluent interface methods
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "json"
|
|
4
|
+
require "set"
|
|
4
5
|
|
|
5
6
|
module Moxml
|
|
6
7
|
# EntityRegistry maintains a knowledge base of XML entity definitions.
|
|
@@ -26,6 +27,9 @@ module Moxml
|
|
|
26
27
|
# W3C entity data file name
|
|
27
28
|
ENTITY_DATA_FILE = "w3c_entities.json"
|
|
28
29
|
|
|
30
|
+
# Standard XML predefined entities (XML spec §4.6)
|
|
31
|
+
STANDARD_CODEPOINTS = Set[0x26, 0x3C, 0x3E, 0x22, 0x27].freeze
|
|
32
|
+
|
|
29
33
|
class << self
|
|
30
34
|
# Get the raw entity data from the bundled JSON source
|
|
31
35
|
# @return [Hash{String => String}] entity name to character mapping
|
|
@@ -153,6 +157,38 @@ module Moxml
|
|
|
153
157
|
@by_codepoint[codepoint]&.first
|
|
154
158
|
end
|
|
155
159
|
|
|
160
|
+
# Check if a codepoint is one of the 5 standard XML predefined entities
|
|
161
|
+
# @param codepoint [Integer] Unicode codepoint
|
|
162
|
+
# @return [Boolean]
|
|
163
|
+
def standard_entity?(codepoint)
|
|
164
|
+
STANDARD_CODEPOINTS.include?(codepoint)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# Determine if an entity reference should be restored for a codepoint.
|
|
168
|
+
# Standard XML entities are always restored (required by XML spec).
|
|
169
|
+
# Non-standard entities are only restored when restore_entities is enabled.
|
|
170
|
+
# @param codepoint [Integer] Unicode codepoint
|
|
171
|
+
# @param config [Moxml::Config] configuration object
|
|
172
|
+
# @return [Boolean]
|
|
173
|
+
def should_restore?(codepoint, config:)
|
|
174
|
+
name = primary_name_for_codepoint(codepoint)
|
|
175
|
+
return false unless name
|
|
176
|
+
return true if standard_entity?(codepoint)
|
|
177
|
+
|
|
178
|
+
config.restore_entities
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
# Returns the set of codepoints that could potentially be restored as entities.
|
|
182
|
+
# Used by DocumentBuilder for O(1) fast-path checks.
|
|
183
|
+
# @return [Set<Integer>]
|
|
184
|
+
def restorable_codepoints
|
|
185
|
+
@restorable_codepoints ||= if @by_name.empty?
|
|
186
|
+
STANDARD_CODEPOINTS
|
|
187
|
+
else
|
|
188
|
+
Set.new(@by_name.values).freeze
|
|
189
|
+
end
|
|
190
|
+
end
|
|
191
|
+
|
|
156
192
|
# Register additional entities
|
|
157
193
|
# @param entities [Hash{String => Integer}] name => codepoint mapping
|
|
158
194
|
# @return [self]
|
data/lib/moxml/node.rb
CHANGED
|
@@ -16,8 +16,8 @@ module Moxml
|
|
|
16
16
|
|
|
17
17
|
def initialize(native, context)
|
|
18
18
|
@context = context
|
|
19
|
-
# @native = adapter.patch_node(native)
|
|
20
19
|
@native = native
|
|
20
|
+
@parent_node = nil
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
def document
|
|
@@ -29,9 +29,10 @@ module Moxml
|
|
|
29
29
|
end
|
|
30
30
|
|
|
31
31
|
def children
|
|
32
|
-
NodeSet.new(
|
|
32
|
+
@children ||= NodeSet.new(
|
|
33
33
|
adapter.children(@native).map { adapter.patch_node(_1, @native) },
|
|
34
34
|
context,
|
|
35
|
+
self,
|
|
35
36
|
)
|
|
36
37
|
end
|
|
37
38
|
|
|
@@ -46,29 +47,37 @@ module Moxml
|
|
|
46
47
|
def add_child(node)
|
|
47
48
|
node = prepare_node(node)
|
|
48
49
|
adapter.add_child(@native, node.native)
|
|
50
|
+
node.instance_variable_set(:@parent_node, self)
|
|
51
|
+
invalidate_children_cache!
|
|
49
52
|
self
|
|
50
53
|
end
|
|
51
54
|
|
|
52
55
|
def add_previous_sibling(node)
|
|
53
56
|
node = prepare_node(node)
|
|
54
57
|
adapter.add_previous_sibling(@native, node.native)
|
|
58
|
+
invalidate_parent_children_cache!
|
|
55
59
|
self
|
|
56
60
|
end
|
|
57
61
|
|
|
58
62
|
def add_next_sibling(node)
|
|
59
63
|
node = prepare_node(node)
|
|
60
64
|
adapter.add_next_sibling(@native, node.native)
|
|
65
|
+
invalidate_parent_children_cache!
|
|
61
66
|
self
|
|
62
67
|
end
|
|
63
68
|
|
|
64
69
|
def remove
|
|
70
|
+
invalidate_parent_children_cache!
|
|
65
71
|
adapter.remove(@native)
|
|
72
|
+
invalidate_children_cache!
|
|
66
73
|
self
|
|
67
74
|
end
|
|
68
75
|
|
|
69
76
|
def replace(node)
|
|
70
77
|
node = prepare_node(node)
|
|
78
|
+
invalidate_parent_children_cache!
|
|
71
79
|
adapter.replace(@native, node.native)
|
|
80
|
+
invalidate_children_cache!
|
|
72
81
|
self
|
|
73
82
|
end
|
|
74
83
|
|
|
@@ -229,6 +238,18 @@ module Moxml
|
|
|
229
238
|
context.config.adapter
|
|
230
239
|
end
|
|
231
240
|
|
|
241
|
+
# Invalidate cached children. Called by mutation methods
|
|
242
|
+
# and by Element attribute/namespace caches.
|
|
243
|
+
def invalidate_children_cache!
|
|
244
|
+
@children = nil
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
# Invalidate parent's cached children when this node
|
|
248
|
+
# is removed/replaced from its parent's child list.
|
|
249
|
+
def invalidate_parent_children_cache!
|
|
250
|
+
@parent_node&.invalidate_children_cache!
|
|
251
|
+
end
|
|
252
|
+
|
|
232
253
|
private
|
|
233
254
|
|
|
234
255
|
def prepare_node(node)
|
data/lib/moxml/node_set.rb
CHANGED
|
@@ -6,60 +6,72 @@ module Moxml
|
|
|
6
6
|
|
|
7
7
|
attr_reader :nodes, :context
|
|
8
8
|
|
|
9
|
-
def initialize(nodes, context)
|
|
9
|
+
def initialize(nodes, context, parent_node = nil)
|
|
10
10
|
@nodes = Array(nodes)
|
|
11
11
|
@context = context
|
|
12
|
+
@wrapped = Array.new(@nodes.size)
|
|
13
|
+
@parent_node = parent_node
|
|
12
14
|
end
|
|
13
15
|
|
|
14
16
|
def each
|
|
15
17
|
return to_enum(:each) unless block_given?
|
|
16
18
|
|
|
17
|
-
nodes.
|
|
19
|
+
@nodes.each_with_index do |node, i|
|
|
20
|
+
@wrapped[i] ||= wrap_with_parent(node)
|
|
21
|
+
yield @wrapped[i]
|
|
22
|
+
end
|
|
18
23
|
self
|
|
19
24
|
end
|
|
20
25
|
|
|
21
26
|
def [](index)
|
|
22
27
|
case index
|
|
23
28
|
when Integer
|
|
24
|
-
|
|
29
|
+
actual = index.negative? ? @nodes.size + index : index
|
|
30
|
+
return nil unless actual >= 0 && actual < @nodes.size
|
|
31
|
+
|
|
32
|
+
@wrapped[actual] ||= wrap_with_parent(@nodes[actual])
|
|
25
33
|
when Range
|
|
26
|
-
|
|
34
|
+
self.class.new(@nodes[index], @context)
|
|
27
35
|
end
|
|
28
36
|
end
|
|
29
37
|
|
|
30
38
|
def first(n = nil)
|
|
31
39
|
if n.nil?
|
|
32
|
-
|
|
40
|
+
@nodes.empty? ? nil : self[0]
|
|
33
41
|
else
|
|
34
|
-
|
|
42
|
+
n.times.filter_map { |i| self[i] }
|
|
35
43
|
end
|
|
36
44
|
end
|
|
37
45
|
|
|
38
46
|
def last
|
|
39
|
-
|
|
47
|
+
@nodes.empty? ? nil : self[@nodes.size - 1]
|
|
40
48
|
end
|
|
41
49
|
|
|
42
50
|
def empty?
|
|
43
|
-
nodes.empty?
|
|
51
|
+
@nodes.empty?
|
|
44
52
|
end
|
|
45
53
|
|
|
46
54
|
def size
|
|
47
|
-
nodes.size
|
|
55
|
+
@nodes.size
|
|
48
56
|
end
|
|
49
57
|
alias length size
|
|
50
58
|
|
|
51
59
|
def to_a
|
|
52
|
-
|
|
60
|
+
@nodes.each_with_index do |_node, i|
|
|
61
|
+
@wrapped[i] ||= wrap_with_parent(@nodes[i])
|
|
62
|
+
end
|
|
63
|
+
@wrapped.compact
|
|
53
64
|
end
|
|
54
65
|
|
|
55
66
|
def +(other)
|
|
56
|
-
self.class.new(nodes + other.nodes, context)
|
|
67
|
+
self.class.new(@nodes + other.nodes, @context)
|
|
57
68
|
end
|
|
58
69
|
|
|
59
70
|
def <<(node)
|
|
60
71
|
# If it's a wrapped Moxml node, unwrap to native before storing
|
|
61
72
|
native_node = node.respond_to?(:native) ? node.native : node
|
|
62
73
|
@nodes << native_node
|
|
74
|
+
@wrapped << nil
|
|
63
75
|
self
|
|
64
76
|
end
|
|
65
77
|
alias push <<
|
|
@@ -78,14 +90,14 @@ module Moxml
|
|
|
78
90
|
true
|
|
79
91
|
end
|
|
80
92
|
end
|
|
81
|
-
self.class.new(unique_natives, context)
|
|
93
|
+
self.class.new(unique_natives, @context)
|
|
82
94
|
end
|
|
83
95
|
|
|
84
96
|
def ==(other)
|
|
85
97
|
self.class == other.class &&
|
|
86
98
|
length == other.length &&
|
|
87
|
-
nodes.each_with_index.all? do |
|
|
88
|
-
|
|
99
|
+
@nodes.each_with_index.all? do |_node, index|
|
|
100
|
+
self[index] == other[index]
|
|
89
101
|
end
|
|
90
102
|
end
|
|
91
103
|
|
|
@@ -103,8 +115,24 @@ module Moxml
|
|
|
103
115
|
def delete(node)
|
|
104
116
|
# If it's a wrapped Moxml node, unwrap to native
|
|
105
117
|
native_node = node.respond_to?(:native) ? node.native : node
|
|
106
|
-
@nodes.
|
|
118
|
+
idx = @nodes.index(native_node)
|
|
119
|
+
if idx
|
|
120
|
+
@nodes.delete_at(idx)
|
|
121
|
+
@wrapped.delete_at(idx)
|
|
122
|
+
else
|
|
123
|
+
@nodes.delete(native_node)
|
|
124
|
+
end
|
|
107
125
|
self
|
|
108
126
|
end
|
|
127
|
+
|
|
128
|
+
private
|
|
129
|
+
|
|
130
|
+
def wrap_with_parent(native_node)
|
|
131
|
+
wrapped = Moxml::Node.wrap(native_node, @context)
|
|
132
|
+
if @parent_node && wrapped
|
|
133
|
+
wrapped.instance_variable_set(:@parent_node, @parent_node)
|
|
134
|
+
end
|
|
135
|
+
wrapped
|
|
136
|
+
end
|
|
109
137
|
end
|
|
110
138
|
end
|
data/lib/moxml/version.rb
CHANGED
data/lib/moxml/xml_utils.rb
CHANGED
|
@@ -95,6 +95,9 @@ RSpec.shared_examples "Moxml Edge Cases" do
|
|
|
95
95
|
if context.config.adapter_name == :libxml
|
|
96
96
|
skip "LibXML cannot query empty default namespace with XPath (documented limitation)"
|
|
97
97
|
end
|
|
98
|
+
if context.config.adapter_name == :nokogiri
|
|
99
|
+
skip "Nokogiri XPath does not support querying empty namespace with xmlns prefix mapping"
|
|
100
|
+
end
|
|
98
101
|
xml = <<~XML
|
|
99
102
|
<root xmlns="http://default1.org">
|
|
100
103
|
<child xmlns="http://default2.org">
|
|
@@ -12,6 +12,22 @@ RSpec.describe Moxml::Adapter::Oga do
|
|
|
12
12
|
|
|
13
13
|
it_behaves_like "xml adapter"
|
|
14
14
|
|
|
15
|
+
describe "serialization" do
|
|
16
|
+
it "does not duplicate XML declarations when declaration nodes repeat" do
|
|
17
|
+
context = Moxml::Context.new(:oga)
|
|
18
|
+
doc = context.create_document
|
|
19
|
+
|
|
20
|
+
doc.add_child(doc.create_declaration("1.0", "UTF-8"))
|
|
21
|
+
doc.add_child(doc.create_declaration("1.0", "UTF-8"))
|
|
22
|
+
doc.add_child(doc.create_element("root"))
|
|
23
|
+
|
|
24
|
+
serialized = doc.to_xml
|
|
25
|
+
|
|
26
|
+
expect(serialized.scan("<?xml").size).to eq(1)
|
|
27
|
+
expect(serialized).to include("<root></root>")
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
15
31
|
describe "entity handling" do
|
|
16
32
|
it "preserves non-breaking space through parse and serialize round-trip" do
|
|
17
33
|
xml = "<root>Item One</root>"
|
|
@@ -103,4 +119,50 @@ RSpec.describe Moxml::Adapter::Oga do
|
|
|
103
119
|
expect(serialized).not_to include("\x01")
|
|
104
120
|
end
|
|
105
121
|
end
|
|
122
|
+
|
|
123
|
+
describe "doctype handling" do
|
|
124
|
+
it "correctly parses PUBLIC doctype" do
|
|
125
|
+
xml = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html/>'
|
|
126
|
+
doc = described_class.parse(xml)
|
|
127
|
+
doctype = doc.children.find { |c| c.is_a?(Moxml::Doctype) }
|
|
128
|
+
|
|
129
|
+
expect(doctype.name).to eq("html")
|
|
130
|
+
expect(doctype.external_id).to eq("-//W3C//DTD XHTML 1.0 Strict//EN")
|
|
131
|
+
expect(doctype.system_id).to eq("http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd")
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
it "correctly parses SYSTEM doctype" do
|
|
135
|
+
xml = '<!DOCTYPE config SYSTEM "config.dtd"><config/>'
|
|
136
|
+
doc = described_class.parse(xml)
|
|
137
|
+
doctype = doc.children.find { |c| c.is_a?(Moxml::Doctype) }
|
|
138
|
+
|
|
139
|
+
expect(doctype.name).to eq("config")
|
|
140
|
+
expect(doctype.external_id).to be_nil
|
|
141
|
+
expect(doctype.system_id).to eq("config.dtd")
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
it "correctly parses simple doctype" do
|
|
145
|
+
xml = "<!DOCTYPE html><html/>"
|
|
146
|
+
doc = described_class.parse(xml)
|
|
147
|
+
doctype = doc.children.find { |c| c.is_a?(Moxml::Doctype) }
|
|
148
|
+
|
|
149
|
+
expect(doctype.name).to eq("html")
|
|
150
|
+
expect(doctype.external_id).to be_nil
|
|
151
|
+
expect(doctype.system_id).to be_nil
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
it "round-trips PUBLIC doctype" do
|
|
155
|
+
xml = '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"><html/>'
|
|
156
|
+
doc = described_class.parse(xml)
|
|
157
|
+
|
|
158
|
+
expect(doc.to_xml).to include('PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd"')
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
it "round-trips SYSTEM doctype" do
|
|
162
|
+
xml = '<!DOCTYPE config SYSTEM "config.dtd"><config/>'
|
|
163
|
+
doc = described_class.parse(xml)
|
|
164
|
+
|
|
165
|
+
expect(doc.to_xml).to include('SYSTEM "config.dtd"')
|
|
166
|
+
end
|
|
167
|
+
end
|
|
106
168
|
end
|
|
@@ -4,18 +4,7 @@
|
|
|
4
4
|
# A better way is to run it through Moxml wrappers
|
|
5
5
|
RSpec.shared_examples "xml adapter" do
|
|
6
6
|
let(:xml) do
|
|
7
|
-
|
|
8
|
-
<?xml version="1.0"?>
|
|
9
|
-
<root xmlns="http://example.org" xmlns:x="http://example.org/x">
|
|
10
|
-
<child id="1">Text</child>
|
|
11
|
-
<child id="2"/>
|
|
12
|
-
<x:special>
|
|
13
|
-
<![CDATA[Some <special> text]]>
|
|
14
|
-
<!-- A comment -->
|
|
15
|
-
<?pi target?>
|
|
16
|
-
</x:special>
|
|
17
|
-
</root>
|
|
18
|
-
XML
|
|
7
|
+
'<?xml version="1.0"?><root xmlns="http://example.org" xmlns:x="http://example.org/x"><child id="1">Text</child><child id="2"/><x:special><![CDATA[Some <special> text]]><!-- A comment --><?pi target?></x:special></root>'
|
|
19
8
|
end
|
|
20
9
|
|
|
21
10
|
describe ".parse" do
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
require "support/allocation_helper"
|
|
5
|
+
|
|
6
|
+
# Detailed allocation benchmarks — only run with RUN_PERFORMANCE=1.
|
|
7
|
+
# These measure exact allocation counts and compare across adapters.
|
|
8
|
+
RSpec.describe "Moxml allocation benchmarks", :performance do
|
|
9
|
+
shared_examples "reduced allocations" do |adapter_name|
|
|
10
|
+
let(:ctx) { Moxml::Context.new(adapter_name) }
|
|
11
|
+
|
|
12
|
+
it "parse allocates fewer objects than a 100-element baseline" do
|
|
13
|
+
xml = generate_xml(100)
|
|
14
|
+
allocs = AllocationHelper.count_allocations { ctx.parse(xml) }
|
|
15
|
+
# Before lazy parse: ~18,000 allocations for 100 elements via DocumentBuilder
|
|
16
|
+
# After lazy parse: should be dramatically less (document wrapper + root only)
|
|
17
|
+
expect(allocs).to be < 5000,
|
|
18
|
+
"Expected <5000 allocations for 100-element parse, got #{allocs}"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
it "parse + root access is allocation-efficient" do
|
|
22
|
+
xml = generate_xml(50)
|
|
23
|
+
allocs = AllocationHelper.count_allocations do
|
|
24
|
+
doc = ctx.parse(xml)
|
|
25
|
+
doc.root.name
|
|
26
|
+
end
|
|
27
|
+
expect(allocs).to be < 2000,
|
|
28
|
+
"Expected <2000 allocations for parse + root.name, got #{allocs}"
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it "children access is cached (repeated calls don't increase allocations)" do
|
|
32
|
+
xml = "<root><a/><b/><c/></root>"
|
|
33
|
+
doc = ctx.parse(xml)
|
|
34
|
+
root = doc.root
|
|
35
|
+
|
|
36
|
+
allocs1 = AllocationHelper.count_allocations { root.children.to_a }
|
|
37
|
+
allocs2 = AllocationHelper.count_allocations { root.children.to_a }
|
|
38
|
+
|
|
39
|
+
# Second call should allocate fewer objects because children are cached
|
|
40
|
+
expect(allocs2).to be <= allocs1,
|
|
41
|
+
"Second children.to_a (#{allocs2}) should allocate <= first (#{allocs1})"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
it "attributes access is cached" do
|
|
45
|
+
xml = '<root a="1" b="2" c="3"><child d="4"/></root>'
|
|
46
|
+
doc = ctx.parse(xml)
|
|
47
|
+
root = doc.root
|
|
48
|
+
|
|
49
|
+
allocs1 = AllocationHelper.count_allocations { root.attributes }
|
|
50
|
+
allocs2 = AllocationHelper.count_allocations { root.attributes }
|
|
51
|
+
|
|
52
|
+
expect(allocs2).to be <= allocs1,
|
|
53
|
+
"Second attributes call (#{allocs2}) should allocate <= first (#{allocs1})"
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
it "namespaces access is cached" do
|
|
57
|
+
xml = '<root xmlns:a="http://a.com" xmlns:b="http://b.com"><a:child/></root>'
|
|
58
|
+
doc = ctx.parse(xml)
|
|
59
|
+
root = doc.root
|
|
60
|
+
|
|
61
|
+
allocs1 = AllocationHelper.count_allocations { root.namespaces }
|
|
62
|
+
allocs2 = AllocationHelper.count_allocations { root.namespaces }
|
|
63
|
+
|
|
64
|
+
expect(allocs2).to be <= allocs1,
|
|
65
|
+
"Second namespaces call (#{allocs2}) should allocate <= first (#{allocs1})"
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
it "NodeSet iteration is cached (second iteration allocates less)" do
|
|
69
|
+
xml = generate_xml(20)
|
|
70
|
+
doc = ctx.parse(xml)
|
|
71
|
+
root = doc.root
|
|
72
|
+
|
|
73
|
+
allocs1 = AllocationHelper.count_allocations do
|
|
74
|
+
root.children.each do |_c|
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
allocs2 = AllocationHelper.count_allocations do
|
|
78
|
+
root.children.each do |_c|
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
expect(allocs2).to be <= allocs1,
|
|
83
|
+
"Second NodeSet iteration (#{allocs2}) should allocate <= first (#{allocs1})"
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
AllocationHelper::GUARDED_ADAPTERS.each do |adapter_name|
|
|
88
|
+
describe "#{adapter_name} adapter" do
|
|
89
|
+
before(:all) do
|
|
90
|
+
skip("#{adapter_name} adapter not available") unless AllocationHelper.adapter_available?(adapter_name)
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
it_behaves_like "reduced allocations", adapter_name
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
end
|