moxml 0.1.17 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/moxml/adapter/libxml.rb +0 -3
- data/lib/moxml/adapter/nokogiri.rb +1 -5
- data/lib/moxml/adapter/oga.rb +11 -5
- data/lib/moxml/adapter/rexml.rb +2 -6
- data/lib/moxml/config.rb +17 -2
- data/lib/moxml/version.rb +1 -1
- data/spec/integration/shared_examples/edge_cases.rb +85 -0
- data/spec/integration/shared_examples/entity_reference_whitespace.rb +5 -3
- data/spec/integration/shared_examples/high_level/document_builder_behavior.rb +8 -6
- data/spec/integration/shared_examples/integration_workflows.rb +1 -1
- data/spec/integration/shared_examples/node_wrappers/node_set_behavior.rb +3 -1
- data/spec/moxml/moxml_spec.rb +39 -0
- data/spec/performance/benchmark_spec.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: c1daf227e9effc582c66e780516135481aa48e467226a4267974633a4673f786
|
|
4
|
+
data.tar.gz: 5b230e79a208eb4b1c5e32175df364e5b42f9e766c4c8189eb18fcad09bb79bf
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e39941f6f51567655c246f1e8d6225c6ab572c68958fd9ca4a74ca191af4493e1bf74ff0feed84792cc87e5681cce1d8ae291533e992d07b70fdfb1063d96a67
|
|
7
|
+
data.tar.gz: 41ca4a3954bf2e713703124758309e5d1e056d44826cdacf3b4693c5bf37c463579462a6c0ecb3aa65e4c3cafaeef1e2ad0d8a5af9ebe2e1b9f4a695e7678ff4
|
data/lib/moxml/adapter/libxml.rb
CHANGED
|
@@ -182,11 +182,7 @@ module Moxml
|
|
|
182
182
|
end
|
|
183
183
|
|
|
184
184
|
def children(node)
|
|
185
|
-
node.children
|
|
186
|
-
child.text? && child.content.strip.empty? &&
|
|
187
|
-
!(child.previous_sibling.nil? && child.next_sibling.nil?) &&
|
|
188
|
-
!adjacent_to_entity_reference?(child)
|
|
189
|
-
end
|
|
185
|
+
node.children
|
|
190
186
|
end
|
|
191
187
|
|
|
192
188
|
def adjacent_to_entity_reference?(node)
|
data/lib/moxml/adapter/oga.rb
CHANGED
|
@@ -193,12 +193,18 @@ module Moxml
|
|
|
193
193
|
|
|
194
194
|
return all_children unless node.is_a?(::Oga::XML::Node) || node.is_a?(::Oga::XML::Document)
|
|
195
195
|
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
196
|
+
child_nodes = node.children.to_a
|
|
197
|
+
# Filter out whitespace-only text nodes at document level only.
|
|
198
|
+
# Document-level whitespace (between <?xml?> and <root>) is
|
|
199
|
+
# formatting, not content, and differs across adapters.
|
|
200
|
+
# Whitespace inside elements (e.g. "FigureA.1" spacing) is
|
|
201
|
+
# meaningful and must be preserved.
|
|
202
|
+
if node.is_a?(::Oga::XML::Document)
|
|
203
|
+
child_nodes = child_nodes.reject do |child|
|
|
204
|
+
child.is_a?(::Oga::XML::Text) && child.text.strip.empty?
|
|
205
|
+
end
|
|
201
206
|
end
|
|
207
|
+
all_children + child_nodes
|
|
202
208
|
end
|
|
203
209
|
|
|
204
210
|
def adjacent_to_entity_reference?(node)
|
data/lib/moxml/adapter/rexml.rb
CHANGED
|
@@ -177,12 +177,8 @@ module Moxml
|
|
|
177
177
|
def children(node)
|
|
178
178
|
return [] unless node.is_a?(::REXML::Parent)
|
|
179
179
|
|
|
180
|
-
#
|
|
181
|
-
result = node.children.
|
|
182
|
-
child.is_a?(::REXML::Text) &&
|
|
183
|
-
child.to_s.strip.empty? &&
|
|
184
|
-
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
|
185
|
-
end
|
|
180
|
+
# Return all children preserving whitespace text nodes
|
|
181
|
+
result = node.children.dup
|
|
186
182
|
|
|
187
183
|
# Include any EntityReference wrappers stored alongside native children
|
|
188
184
|
entity_refs = attachments.get(node, :entity_refs)
|
data/lib/moxml/config.rb
CHANGED
|
@@ -3,7 +3,8 @@
|
|
|
3
3
|
module Moxml
|
|
4
4
|
class Config
|
|
5
5
|
VALID_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
|
|
6
|
-
DEFAULT_ADAPTER =
|
|
6
|
+
DEFAULT_ADAPTER = :nokogiri
|
|
7
|
+
OPAL_DEFAULT_ADAPTER = :oga
|
|
7
8
|
|
|
8
9
|
# Entity loading modes:
|
|
9
10
|
# - :required - Must load entities, raise error if unavailable (default)
|
|
@@ -20,7 +21,21 @@ module Moxml
|
|
|
20
21
|
end
|
|
21
22
|
|
|
22
23
|
def default_adapter
|
|
23
|
-
@default_adapter ||=
|
|
24
|
+
@default_adapter ||= runtime_default_adapter
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def runtime_default_adapter
|
|
28
|
+
return OPAL_DEFAULT_ADAPTER if RUBY_ENGINE == "opal"
|
|
29
|
+
|
|
30
|
+
detect_loaded_adapter || DEFAULT_ADAPTER
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def detect_loaded_adapter
|
|
34
|
+
return :nokogiri if Object.const_defined?(:Nokogiri)
|
|
35
|
+
return :ox if Object.const_defined?(:Ox)
|
|
36
|
+
return :oga if Object.const_defined?(:Oga)
|
|
37
|
+
|
|
38
|
+
nil
|
|
24
39
|
end
|
|
25
40
|
end
|
|
26
41
|
|
data/lib/moxml/version.rb
CHANGED
|
@@ -167,6 +167,91 @@ RSpec.shared_examples "Moxml Edge Cases" do
|
|
|
167
167
|
end
|
|
168
168
|
end
|
|
169
169
|
|
|
170
|
+
describe "whitespace text node preservation" do
|
|
171
|
+
# Ox/HeadedOx do not generate whitespace-only text nodes in their parser,
|
|
172
|
+
# so these tests only apply to adapters that do (Nokogiri, OGA, REXML, LibXML)
|
|
173
|
+
let(:preserves_ws) { !%i[ox headed_ox].include?(context.config.adapter_name) }
|
|
174
|
+
|
|
175
|
+
it "preserves whitespace-only text nodes between sibling elements" do
|
|
176
|
+
unless preserves_ws
|
|
177
|
+
skip "Ox/HeadedOx parser does not generate whitespace-only text nodes"
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
xml = <<~XML
|
|
181
|
+
<root>
|
|
182
|
+
<a>1</a>
|
|
183
|
+
<b>2</b>
|
|
184
|
+
<c>3</c>
|
|
185
|
+
</root>
|
|
186
|
+
XML
|
|
187
|
+
|
|
188
|
+
doc = context.parse(xml)
|
|
189
|
+
children = doc.root.children
|
|
190
|
+
|
|
191
|
+
# Should have whitespace text nodes between elements
|
|
192
|
+
expect(children.size).to be > 3
|
|
193
|
+
|
|
194
|
+
# Whitespace text nodes should be Text nodes
|
|
195
|
+
ws_nodes = children.select { |c| c.is_a?(Moxml::Text) && c.content.strip.empty? }
|
|
196
|
+
expect(ws_nodes).not_to be_empty
|
|
197
|
+
|
|
198
|
+
# Element children should still be accessible
|
|
199
|
+
elements = children.select { |c| c.is_a?(Moxml::Element) }
|
|
200
|
+
expect(elements.map(&:name)).to eq(%w[a b c])
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
it "preserves inline whitespace text nodes between text and elements" do
|
|
204
|
+
xml = "<p>Figure <sub>A</sub>.1</p>"
|
|
205
|
+
doc = context.parse(xml)
|
|
206
|
+
|
|
207
|
+
children = doc.root.children
|
|
208
|
+
expect(children.size).to eq(3)
|
|
209
|
+
|
|
210
|
+
# First child: "Figure " text node
|
|
211
|
+
expect(children[0]).to be_a(Moxml::Text)
|
|
212
|
+
expect(children[0].content).to eq("Figure ")
|
|
213
|
+
|
|
214
|
+
# Second child: <sub> element
|
|
215
|
+
expect(children[1]).to be_a(Moxml::Element)
|
|
216
|
+
expect(children[1].name).to eq("sub")
|
|
217
|
+
expect(children[1].text).to eq("A")
|
|
218
|
+
|
|
219
|
+
# Third child: ".1" text node
|
|
220
|
+
expect(children[2]).to be_a(Moxml::Text)
|
|
221
|
+
expect(children[2].content).to eq(".1")
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
it "preserves space-only text node as meaningful content" do
|
|
225
|
+
xml = "<p>Hello <b>world</b>!</p>"
|
|
226
|
+
doc = context.parse(xml)
|
|
227
|
+
|
|
228
|
+
children = doc.root.children
|
|
229
|
+
expect(children.size).to eq(3)
|
|
230
|
+
|
|
231
|
+
expect(children[0].content).to eq("Hello ")
|
|
232
|
+
expect(children[1]).to be_a(Moxml::Element)
|
|
233
|
+
expect(children[2].content).to eq("!")
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
it "distinguishes whitespace text nodes from element children" do
|
|
237
|
+
unless preserves_ws
|
|
238
|
+
skip "Ox/HeadedOx parser does not generate whitespace-only text nodes"
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
xml = "<root> <child/> </root>"
|
|
242
|
+
doc = context.parse(xml)
|
|
243
|
+
|
|
244
|
+
children = doc.root.children
|
|
245
|
+
# " " before child, " " after child
|
|
246
|
+
expect(children.size).to eq(3)
|
|
247
|
+
expect(children[0]).to be_a(Moxml::Text)
|
|
248
|
+
expect(children[0].content).to eq(" ")
|
|
249
|
+
expect(children[1]).to be_a(Moxml::Element)
|
|
250
|
+
expect(children[2]).to be_a(Moxml::Text)
|
|
251
|
+
expect(children[2].content).to eq(" ")
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
170
255
|
describe "document structure edge cases" do
|
|
171
256
|
it "handles deeply nested elements" do
|
|
172
257
|
doc = context.create_document
|
|
@@ -104,7 +104,7 @@ RSpec.shared_examples "Entity Reference Whitespace Preservation" do
|
|
|
104
104
|
end
|
|
105
105
|
|
|
106
106
|
describe "structural whitespace filtering" do
|
|
107
|
-
it "
|
|
107
|
+
it "preserves whitespace text nodes between elements" do
|
|
108
108
|
xml = <<~XML
|
|
109
109
|
<root>
|
|
110
110
|
<child1/>
|
|
@@ -115,8 +115,10 @@ RSpec.shared_examples "Entity Reference Whitespace Preservation" do
|
|
|
115
115
|
doc = context.parse(xml)
|
|
116
116
|
children = doc.root.children
|
|
117
117
|
|
|
118
|
-
|
|
119
|
-
|
|
118
|
+
# Whitespace text nodes between elements are preserved
|
|
119
|
+
elements = children.select { |c| c.is_a?(Moxml::Element) }
|
|
120
|
+
expect(elements.length).to eq(2)
|
|
121
|
+
expect(elements.map(&:name)).to eq(%w[child1 child2])
|
|
120
122
|
end
|
|
121
123
|
end
|
|
122
124
|
end
|
|
@@ -32,12 +32,14 @@ RSpec.shared_examples "Moxml::DocumentBuilder" do
|
|
|
32
32
|
|
|
33
33
|
expect(doc.root.namespaces.count).to eq(1)
|
|
34
34
|
expect(doc.root.namespaces.first.uri).to eq("http://example.org")
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
expect(
|
|
38
|
-
expect(
|
|
39
|
-
expect(
|
|
40
|
-
expect(
|
|
35
|
+
# Whitespace text nodes are preserved between elements
|
|
36
|
+
non_ws_children = doc.root.children.reject { |c| c.is_a?(Moxml::Text) && c.content.strip.empty? }
|
|
37
|
+
expect(non_ws_children[0]).to be_a(Moxml::Comment)
|
|
38
|
+
expect(non_ws_children[1]).to be_a(Moxml::Element)
|
|
39
|
+
expect(non_ws_children[1].name).to eq("child")
|
|
40
|
+
expect(non_ws_children[1]["id"]).to eq("1")
|
|
41
|
+
expect(non_ws_children[1].children.find { |c| c.is_a?(Moxml::Cdata) }).to be_a(Moxml::Cdata)
|
|
42
|
+
expect(non_ws_children[2]).to be_a(Moxml::ProcessingInstruction)
|
|
41
43
|
end
|
|
42
44
|
end
|
|
43
45
|
end
|
|
@@ -113,7 +113,7 @@ RSpec.shared_examples "Moxml Integration" do
|
|
|
113
113
|
expect(attr).to eq("value")
|
|
114
114
|
|
|
115
115
|
# Test namespace override
|
|
116
|
-
deeper = a_element.children.
|
|
116
|
+
deeper = a_element.children.find { |c| c.is_a?(Moxml::Element) }
|
|
117
117
|
expect(deeper.namespace.uri).to eq("http://other.org")
|
|
118
118
|
end
|
|
119
119
|
end
|
|
@@ -41,7 +41,9 @@ RSpec.shared_examples "Moxml::NodeSet" do
|
|
|
41
41
|
end
|
|
42
42
|
|
|
43
43
|
it "compares nodes" do
|
|
44
|
-
|
|
44
|
+
xpath_results = doc.xpath("//child")
|
|
45
|
+
element_children = doc.root.children.select { |c| c.is_a?(Moxml::Element) }
|
|
46
|
+
expect(xpath_results.map(&:native)).to eq(element_children.map(&:native))
|
|
45
47
|
end
|
|
46
48
|
end
|
|
47
49
|
|
data/spec/moxml/moxml_spec.rb
CHANGED
|
@@ -2,6 +2,28 @@
|
|
|
2
2
|
|
|
3
3
|
# spec/moxml_spec.rb
|
|
4
4
|
RSpec.describe Moxml do
|
|
5
|
+
around do |example|
|
|
6
|
+
original_default = Moxml::Config.instance_variable_get(:@default)
|
|
7
|
+
original_default_adapter = Moxml::Config.instance_variable_get(:@default_adapter)
|
|
8
|
+
|
|
9
|
+
Moxml::Config.remove_instance_variable(:@default) if Moxml::Config.instance_variable_defined?(:@default)
|
|
10
|
+
Moxml::Config.remove_instance_variable(:@default_adapter) if Moxml::Config.instance_variable_defined?(:@default_adapter)
|
|
11
|
+
|
|
12
|
+
example.run
|
|
13
|
+
ensure
|
|
14
|
+
if original_default.nil?
|
|
15
|
+
Moxml::Config.remove_instance_variable(:@default) if Moxml::Config.instance_variable_defined?(:@default)
|
|
16
|
+
else
|
|
17
|
+
Moxml::Config.instance_variable_set(:@default, original_default)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
if original_default_adapter.nil?
|
|
21
|
+
Moxml::Config.remove_instance_variable(:@default_adapter) if Moxml::Config.instance_variable_defined?(:@default_adapter)
|
|
22
|
+
else
|
|
23
|
+
Moxml::Config.instance_variable_set(:@default_adapter, original_default_adapter)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
5
27
|
it "has a version number" do
|
|
6
28
|
expect(Moxml::VERSION).not_to be_nil
|
|
7
29
|
end
|
|
@@ -34,6 +56,23 @@ RSpec.describe Moxml do
|
|
|
34
56
|
expect(context.config.adapter_name).to eq(:nokogiri)
|
|
35
57
|
end
|
|
36
58
|
|
|
59
|
+
it "defaults to oga on Opal" do
|
|
60
|
+
stub_const("RUBY_ENGINE", "opal")
|
|
61
|
+
|
|
62
|
+
context = described_class.new
|
|
63
|
+
expect(context.config.adapter_name).to eq(:oga)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
it "prefers ox when it is already loaded" do
|
|
67
|
+
allow(Object).to receive(:const_defined?).and_call_original
|
|
68
|
+
allow(Object).to receive(:const_defined?).with(:Nokogiri).and_return(false)
|
|
69
|
+
allow(Object).to receive(:const_defined?).with(:Ox).and_return(true)
|
|
70
|
+
allow(Object).to receive(:const_defined?).with(:Oga).and_return(false)
|
|
71
|
+
|
|
72
|
+
context = described_class.new
|
|
73
|
+
expect(context.config.adapter_name).to eq(:ox)
|
|
74
|
+
end
|
|
75
|
+
|
|
37
76
|
it "uses configured options from the block" do
|
|
38
77
|
described_class.configure do |config|
|
|
39
78
|
config.default_adapter = :oga
|
|
@@ -27,7 +27,7 @@ RSpec.shared_examples "Performance Examples" do
|
|
|
27
27
|
{
|
|
28
28
|
nokogiri: { parser: 15, serializer: 1000 },
|
|
29
29
|
oga: { parser: 10, serializer: 100 },
|
|
30
|
-
rexml: { parser: 0, serializer:
|
|
30
|
+
rexml: { parser: 0, serializer: 5 },
|
|
31
31
|
ox: { parser: 2, serializer: 1000 },
|
|
32
32
|
headed_ox: { parser: 2, serializer: 1000 },
|
|
33
33
|
libxml: { parser: 10, serializer: 30 },
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: moxml
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.18
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-27 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: |
|
|
14
14
|
Moxml is a unified XML manipulation library that provides a common API
|