moxml 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bbd69145e9a360635af848bf0bdda2883e35760b2763021f6bf6f1d6dca9827e
4
- data.tar.gz: aa492e21514fd80a01f98709eddf8c3aa323b584210d56534ad5e2c2b467df18
3
+ metadata.gz: c1daf227e9effc582c66e780516135481aa48e467226a4267974633a4673f786
4
+ data.tar.gz: 5b230e79a208eb4b1c5e32175df364e5b42f9e766c4c8189eb18fcad09bb79bf
5
5
  SHA512:
6
- metadata.gz: 1cdb7d6c934f1ea788a40d81d987c97d4c1fc21ad71d22eaac73abf45d093680667f3303b35934378b8cce0d99e3fc9db47c85632678247426527d7fb3491bed
7
- data.tar.gz: 79c352eb8df9b86831d554e17538abd4da8a6dfce61b4e566bc236334601e43bff7c670894ea05c72abac40bb1b3b90375ef8caaf6b488a9c43bc33fc70d6785
6
+ metadata.gz: e39941f6f51567655c246f1e8d6225c6ab572c68958fd9ca4a74ca191af4493e1bf74ff0feed84792cc87e5681cce1d8ae291533e992d07b70fdfb1063d96a67
7
+ data.tar.gz: 41ca4a3954bf2e713703124758309e5d1e056d44826cdacf3b4693c5bf37c463579462a6c0ecb3aa65e4c3cafaeef1e2ad0d8a5af9ebe2e1b9f4a695e7678ff4
@@ -294,9 +294,6 @@ module Moxml
294
294
  result = []
295
295
  if native_node.children?
296
296
  native_node.each_child do |child|
297
- # Skip whitespace-only text nodes
298
- next if child.text? && child.content.to_s.strip.empty?
299
-
300
297
  result << patch_node(child)
301
298
  end
302
299
  end
@@ -182,11 +182,7 @@ module Moxml
182
182
  end
183
183
 
184
184
  def children(node)
185
- node.children.reject do |child|
186
- child.text? && child.content.strip.empty? &&
187
- !(child.previous_sibling.nil? && child.next_sibling.nil?) &&
188
- !adjacent_to_entity_reference?(child)
189
- end
185
+ node.children
190
186
  end
191
187
 
192
188
  def adjacent_to_entity_reference?(node)
@@ -193,12 +193,18 @@ module Moxml
193
193
 
194
194
  return all_children unless node.is_a?(::Oga::XML::Node) || node.is_a?(::Oga::XML::Document)
195
195
 
196
- all_children + node.children.reject do |child|
197
- child.is_a?(::Oga::XML::Text) &&
198
- child.text.strip.empty? &&
199
- !(child.previous.nil? && child.next.nil?) &&
200
- !adjacent_to_entity_reference?(child)
196
+ child_nodes = node.children.to_a
197
+ # Filter out whitespace-only text nodes at document level only.
198
+ # Document-level whitespace (between <?xml?> and <root>) is
199
+ # formatting, not content, and differs across adapters.
200
+ # Whitespace inside elements (e.g. "FigureA.1" spacing) is
201
+ # meaningful and must be preserved.
202
+ if node.is_a?(::Oga::XML::Document)
203
+ child_nodes = child_nodes.reject do |child|
204
+ child.is_a?(::Oga::XML::Text) && child.text.strip.empty?
205
+ end
201
206
  end
207
+ all_children + child_nodes
202
208
  end
203
209
 
204
210
  def adjacent_to_entity_reference?(node)
@@ -177,12 +177,8 @@ module Moxml
177
177
  def children(node)
178
178
  return [] unless node.is_a?(::REXML::Parent)
179
179
 
180
- # Get all children and filter out empty text nodes between elements
181
- result = node.children.reject do |child|
182
- child.is_a?(::REXML::Text) &&
183
- child.to_s.strip.empty? &&
184
- !(child.next_sibling.nil? && child.previous_sibling.nil?)
185
- end
180
+ # Return all children preserving whitespace text nodes
181
+ result = node.children.dup
186
182
 
187
183
  # Include any EntityReference wrappers stored alongside native children
188
184
  entity_refs = attachments.get(node, :entity_refs)
data/lib/moxml/config.rb CHANGED
@@ -3,7 +3,8 @@
3
3
  module Moxml
4
4
  class Config
5
5
  VALID_ADAPTERS = %i[nokogiri oga rexml ox headed_ox libxml].freeze
6
- DEFAULT_ADAPTER = VALID_ADAPTERS.first
6
+ DEFAULT_ADAPTER = :nokogiri
7
+ OPAL_DEFAULT_ADAPTER = :oga
7
8
 
8
9
  # Entity loading modes:
9
10
  # - :required - Must load entities, raise error if unavailable (default)
@@ -20,7 +21,21 @@ module Moxml
20
21
  end
21
22
 
22
23
  def default_adapter
23
- @default_adapter ||= DEFAULT_ADAPTER
24
+ @default_adapter ||= runtime_default_adapter
25
+ end
26
+
27
+ def runtime_default_adapter
28
+ return OPAL_DEFAULT_ADAPTER if RUBY_ENGINE == "opal"
29
+
30
+ detect_loaded_adapter || DEFAULT_ADAPTER
31
+ end
32
+
33
+ def detect_loaded_adapter
34
+ return :nokogiri if Object.const_defined?(:Nokogiri)
35
+ return :ox if Object.const_defined?(:Ox)
36
+ return :oga if Object.const_defined?(:Oga)
37
+
38
+ nil
24
39
  end
25
40
  end
26
41
 
data/lib/moxml/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Moxml
4
- VERSION = "0.1.17"
4
+ VERSION = "0.1.18"
5
5
  end
@@ -167,6 +167,91 @@ RSpec.shared_examples "Moxml Edge Cases" do
167
167
  end
168
168
  end
169
169
 
170
+ describe "whitespace text node preservation" do
171
+ # Ox/HeadedOx do not generate whitespace-only text nodes in their parser,
172
+ # so these tests only apply to adapters that do (Nokogiri, OGA, REXML, LibXML)
173
+ let(:preserves_ws) { !%i[ox headed_ox].include?(context.config.adapter_name) }
174
+
175
+ it "preserves whitespace-only text nodes between sibling elements" do
176
+ unless preserves_ws
177
+ skip "Ox/HeadedOx parser does not generate whitespace-only text nodes"
178
+ end
179
+
180
+ xml = <<~XML
181
+ <root>
182
+ <a>1</a>
183
+ <b>2</b>
184
+ <c>3</c>
185
+ </root>
186
+ XML
187
+
188
+ doc = context.parse(xml)
189
+ children = doc.root.children
190
+
191
+ # Should have whitespace text nodes between elements
192
+ expect(children.size).to be > 3
193
+
194
+ # Whitespace text nodes should be Text nodes
195
+ ws_nodes = children.select { |c| c.is_a?(Moxml::Text) && c.content.strip.empty? }
196
+ expect(ws_nodes).not_to be_empty
197
+
198
+ # Element children should still be accessible
199
+ elements = children.select { |c| c.is_a?(Moxml::Element) }
200
+ expect(elements.map(&:name)).to eq(%w[a b c])
201
+ end
202
+
203
+ it "preserves inline whitespace text nodes between text and elements" do
204
+ xml = "<p>Figure <sub>A</sub>.1</p>"
205
+ doc = context.parse(xml)
206
+
207
+ children = doc.root.children
208
+ expect(children.size).to eq(3)
209
+
210
+ # First child: "Figure " text node
211
+ expect(children[0]).to be_a(Moxml::Text)
212
+ expect(children[0].content).to eq("Figure ")
213
+
214
+ # Second child: <sub> element
215
+ expect(children[1]).to be_a(Moxml::Element)
216
+ expect(children[1].name).to eq("sub")
217
+ expect(children[1].text).to eq("A")
218
+
219
+ # Third child: ".1" text node
220
+ expect(children[2]).to be_a(Moxml::Text)
221
+ expect(children[2].content).to eq(".1")
222
+ end
223
+
224
+ it "preserves space-only text node as meaningful content" do
225
+ xml = "<p>Hello <b>world</b>!</p>"
226
+ doc = context.parse(xml)
227
+
228
+ children = doc.root.children
229
+ expect(children.size).to eq(3)
230
+
231
+ expect(children[0].content).to eq("Hello ")
232
+ expect(children[1]).to be_a(Moxml::Element)
233
+ expect(children[2].content).to eq("!")
234
+ end
235
+
236
+ it "distinguishes whitespace text nodes from element children" do
237
+ unless preserves_ws
238
+ skip "Ox/HeadedOx parser does not generate whitespace-only text nodes"
239
+ end
240
+
241
+ xml = "<root> <child/> </root>"
242
+ doc = context.parse(xml)
243
+
244
+ children = doc.root.children
245
+ # " " before child, " " after child
246
+ expect(children.size).to eq(3)
247
+ expect(children[0]).to be_a(Moxml::Text)
248
+ expect(children[0].content).to eq(" ")
249
+ expect(children[1]).to be_a(Moxml::Element)
250
+ expect(children[2]).to be_a(Moxml::Text)
251
+ expect(children[2].content).to eq(" ")
252
+ end
253
+ end
254
+
170
255
  describe "document structure edge cases" do
171
256
  it "handles deeply nested elements" do
172
257
  doc = context.create_document
@@ -104,7 +104,7 @@ RSpec.shared_examples "Entity Reference Whitespace Preservation" do
104
104
  end
105
105
 
106
106
  describe "structural whitespace filtering" do
107
- it "still filters whitespace between elements" do
107
+ it "preserves whitespace text nodes between elements" do
108
108
  xml = <<~XML
109
109
  <root>
110
110
  <child1/>
@@ -115,8 +115,10 @@ RSpec.shared_examples "Entity Reference Whitespace Preservation" do
115
115
  doc = context.parse(xml)
116
116
  children = doc.root.children
117
117
 
118
- expect(children.length).to eq(2)
119
- expect(children.all?(Moxml::Element)).to be true
118
+ # Whitespace text nodes between elements are preserved
119
+ elements = children.select { |c| c.is_a?(Moxml::Element) }
120
+ expect(elements.length).to eq(2)
121
+ expect(elements.map(&:name)).to eq(%w[child1 child2])
120
122
  end
121
123
  end
122
124
  end
@@ -32,12 +32,14 @@ RSpec.shared_examples "Moxml::DocumentBuilder" do
32
32
 
33
33
  expect(doc.root.namespaces.count).to eq(1)
34
34
  expect(doc.root.namespaces.first.uri).to eq("http://example.org")
35
- expect(doc.root.children[0]).to be_a(Moxml::Comment)
36
- expect(doc.root.children[1]).to be_a(Moxml::Element)
37
- expect(doc.root.children[1].name).to eq("child")
38
- expect(doc.root.children[1]["id"]).to eq("1")
39
- expect(doc.root.children[1].children.first).to be_a(Moxml::Cdata)
40
- expect(doc.root.children[2]).to be_a(Moxml::ProcessingInstruction)
35
+ # Whitespace text nodes are preserved between elements
36
+ non_ws_children = doc.root.children.reject { |c| c.is_a?(Moxml::Text) && c.content.strip.empty? }
37
+ expect(non_ws_children[0]).to be_a(Moxml::Comment)
38
+ expect(non_ws_children[1]).to be_a(Moxml::Element)
39
+ expect(non_ws_children[1].name).to eq("child")
40
+ expect(non_ws_children[1]["id"]).to eq("1")
41
+ expect(non_ws_children[1].children.find { |c| c.is_a?(Moxml::Cdata) }).to be_a(Moxml::Cdata)
42
+ expect(non_ws_children[2]).to be_a(Moxml::ProcessingInstruction)
41
43
  end
42
44
  end
43
45
  end
@@ -113,7 +113,7 @@ RSpec.shared_examples "Moxml Integration" do
113
113
  expect(attr).to eq("value")
114
114
 
115
115
  # Test namespace override
116
- deeper = a_element.children.first
116
+ deeper = a_element.children.find { |c| c.is_a?(Moxml::Element) }
117
117
  expect(deeper.namespace.uri).to eq("http://other.org")
118
118
  end
119
119
  end
@@ -41,7 +41,9 @@ RSpec.shared_examples "Moxml::NodeSet" do
41
41
  end
42
42
 
43
43
  it "compares nodes" do
44
- expect(doc.xpath("//child")).to eq(doc.root.children)
44
+ xpath_results = doc.xpath("//child")
45
+ element_children = doc.root.children.select { |c| c.is_a?(Moxml::Element) }
46
+ expect(xpath_results.map(&:native)).to eq(element_children.map(&:native))
45
47
  end
46
48
  end
47
49
 
@@ -2,6 +2,28 @@
2
2
 
3
3
  # spec/moxml_spec.rb
4
4
  RSpec.describe Moxml do
5
+ around do |example|
6
+ original_default = Moxml::Config.instance_variable_get(:@default)
7
+ original_default_adapter = Moxml::Config.instance_variable_get(:@default_adapter)
8
+
9
+ Moxml::Config.remove_instance_variable(:@default) if Moxml::Config.instance_variable_defined?(:@default)
10
+ Moxml::Config.remove_instance_variable(:@default_adapter) if Moxml::Config.instance_variable_defined?(:@default_adapter)
11
+
12
+ example.run
13
+ ensure
14
+ if original_default.nil?
15
+ Moxml::Config.remove_instance_variable(:@default) if Moxml::Config.instance_variable_defined?(:@default)
16
+ else
17
+ Moxml::Config.instance_variable_set(:@default, original_default)
18
+ end
19
+
20
+ if original_default_adapter.nil?
21
+ Moxml::Config.remove_instance_variable(:@default_adapter) if Moxml::Config.instance_variable_defined?(:@default_adapter)
22
+ else
23
+ Moxml::Config.instance_variable_set(:@default_adapter, original_default_adapter)
24
+ end
25
+ end
26
+
5
27
  it "has a version number" do
6
28
  expect(Moxml::VERSION).not_to be_nil
7
29
  end
@@ -34,6 +56,23 @@ RSpec.describe Moxml do
34
56
  expect(context.config.adapter_name).to eq(:nokogiri)
35
57
  end
36
58
 
59
+ it "defaults to oga on Opal" do
60
+ stub_const("RUBY_ENGINE", "opal")
61
+
62
+ context = described_class.new
63
+ expect(context.config.adapter_name).to eq(:oga)
64
+ end
65
+
66
+ it "prefers ox when it is already loaded" do
67
+ allow(Object).to receive(:const_defined?).and_call_original
68
+ allow(Object).to receive(:const_defined?).with(:Nokogiri).and_return(false)
69
+ allow(Object).to receive(:const_defined?).with(:Ox).and_return(true)
70
+ allow(Object).to receive(:const_defined?).with(:Oga).and_return(false)
71
+
72
+ context = described_class.new
73
+ expect(context.config.adapter_name).to eq(:ox)
74
+ end
75
+
37
76
  it "uses configured options from the block" do
38
77
  described_class.configure do |config|
39
78
  config.default_adapter = :oga
@@ -27,7 +27,7 @@ RSpec.shared_examples "Performance Examples" do
27
27
  {
28
28
  nokogiri: { parser: 15, serializer: 1000 },
29
29
  oga: { parser: 10, serializer: 100 },
30
- rexml: { parser: 0, serializer: 60 },
30
+ rexml: { parser: 0, serializer: 5 },
31
31
  ox: { parser: 2, serializer: 1000 },
32
32
  headed_ox: { parser: 2, serializer: 1000 },
33
33
  libxml: { parser: 10, serializer: 30 },
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: moxml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.17
4
+ version: 0.1.18
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-04-24 00:00:00.000000000 Z
11
+ date: 2026-04-27 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  Moxml is a unified XML manipulation library that provides a common API