moxml 0.1.19 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -470,7 +470,9 @@ module Moxml
470
470
  if node.is_a?(::Oga::XML::Document)
471
471
  # Check if we should include declaration
472
472
  # Priority: explicit option > existence of xml_declaration (native or attachment)
473
- effective_xml_declaration = node.xml_declaration || attachments.get(node, :xml_declaration)
473
+ effective_xml_declaration = node.xml_declaration || attachments.get(
474
+ node, :xml_declaration
475
+ )
474
476
  should_include_decl = if options.key?(:no_declaration)
475
477
  !options[:no_declaration]
476
478
  elsif options.key?(:declaration)
@@ -519,7 +521,9 @@ module Moxml
519
521
 
520
522
  # Default: use XmlGenerator
521
523
  # But first check if we need to handle declaration specially
522
- effective_xml_declaration = node.is_a?(::Oga::XML::Document) && (node.xml_declaration || attachments.get(node, :xml_declaration))
524
+ effective_xml_declaration = node.is_a?(::Oga::XML::Document) && (node.xml_declaration || attachments.get(
525
+ node, :xml_declaration
526
+ ))
523
527
  if node.is_a?(::Oga::XML::Document) && effective_xml_declaration
524
528
  # Document has declaration - use custom handling to avoid duplicates
525
529
  output = []
@@ -74,8 +74,7 @@ module Moxml
74
74
  end
75
75
 
76
76
  def create_native_element(name, _owner_doc = nil)
77
- element = ::Ox::Element.new(name)
78
- element
77
+ ::Ox::Element.new(name)
79
78
  end
80
79
 
81
80
  def create_native_text(content, _owner_doc = nil)
@@ -622,6 +621,12 @@ module Moxml
622
621
  end
623
622
 
624
623
  def serialize(node, options = {})
624
+ # CustomizedOx::Text subclasses ::Ox::Node so it can carry a @parent
625
+ # back-reference, but that makes it unknown to Ox.dump's XML emitter,
626
+ # which then falls back to generic object marshalling. Short-circuit
627
+ # here with proper XML escaping.
628
+ return escape_xml_text(node.value) if node.is_a?(CustomizedOx::Text)
629
+
625
630
  needs_custom = needs_custom_serialize?(node)
626
631
 
627
632
  unless needs_custom
@@ -643,7 +648,7 @@ module Moxml
643
648
  return true if attachments.get(node, :has_entity_refs)
644
649
  return true if attachments.get(node, :has_cdata_end_markers)
645
650
  return false if attachments.key?(node, :has_entity_refs) &&
646
- attachments.key?(node, :has_cdata_end_markers)
651
+ attachments.key?(node, :has_cdata_end_markers)
647
652
  end
648
653
 
649
654
  # Only scan tree on first call — short-circuit on first hit
@@ -694,9 +699,8 @@ module Moxml
694
699
  encoding: options[:encoding],
695
700
  no_empty: options[:expand_empty],
696
701
  }
697
- result = output + ::Ox.dump(node, ox_options)
702
+ output + ::Ox.dump(node, ox_options)
698
703
  # Fix CDATA ]]> end markers that Ox doesn't escape
699
- result
700
704
  end
701
705
 
702
706
  def tree_has_entity_references?(node)
@@ -721,9 +725,13 @@ module Moxml
721
725
  when ::Ox::CData
722
726
  node.value&.include?("]]>") || false
723
727
  when ::Ox::Element
724
- node.nodes&.any? { |child| tree_has_cdata_end_markers?(child) } || false
728
+ node.nodes&.any? do |child|
729
+ tree_has_cdata_end_markers?(child)
730
+ end || false
725
731
  when ::Ox::Document
726
- node.nodes&.any? { |child| tree_has_cdata_end_markers?(child) } || false
732
+ node.nodes&.any? do |child|
733
+ tree_has_cdata_end_markers?(child)
734
+ end || false
727
735
  else
728
736
  false
729
737
  end
@@ -814,7 +822,6 @@ module Moxml
814
822
  end
815
823
  end
816
824
 
817
-
818
825
  # Translate a subset of XPath to Ox locate() syntax
819
826
  # Supports: //element, /path/to/element, .//element, element[@attr]
820
827
  # Note: Ox locate() doesn't support namespace prefixes in the path
data/lib/moxml/builder.rb CHANGED
@@ -30,12 +30,14 @@ module Moxml
30
30
  # and a valid XML tag name (XSD/RelaxNG).
31
31
  def element(name_or_attrs = nil, attributes = {}, &block)
32
32
  if name_or_attrs.is_a?(Hash)
33
- return create_element_node("element", name_or_attrs, block: block, eval_block: false)
33
+ return create_element_node("element", name_or_attrs, block: block,
34
+ eval_block: false)
34
35
  end
35
36
 
36
37
  raise ArgumentError, "element requires a tag name" if name_or_attrs.nil?
37
38
 
38
- create_element_node(name_or_attrs, attributes, block: block, eval_block: true)
39
+ create_element_node(name_or_attrs, attributes, block: block,
40
+ eval_block: true)
39
41
  end
40
42
 
41
43
  def text(content)
@@ -101,10 +103,14 @@ module Moxml
101
103
  text_content = args.first.is_a?(String) ? args.shift : nil
102
104
  attrs = args.first.is_a?(Hash) ? args.shift : {}
103
105
 
104
- raise ArgumentError, "unexpected arguments for #{method_name}: #{args.inspect}" unless args.empty?
106
+ unless args.empty?
107
+ raise ArgumentError,
108
+ "unexpected arguments for #{method_name}: #{args.inspect}"
109
+ end
105
110
 
106
111
  if text_content && block
107
- raise ArgumentError, "#{method_name}: cannot combine text content with a block"
112
+ raise ArgumentError,
113
+ "#{method_name}: cannot combine text content with a block"
108
114
  end
109
115
 
110
116
  # Strip trailing underscore to allow reserved Ruby method names as tags
@@ -126,7 +132,8 @@ module Moxml
126
132
  # Single method for all element creation.
127
133
  # eval_block: true → instance_eval (build DSL context)
128
134
  # eval_block: false → yield (preserves caller's self)
129
- def create_element_node(tag_name, attrs = {}, text_content: nil, block: nil, eval_block: true)
135
+ def create_element_node(tag_name, attrs = {}, text_content: nil,
136
+ block: nil, eval_block: true)
130
137
  el = @document.create_element(tag_name)
131
138
 
132
139
  attrs.each do |key, value|
@@ -156,6 +156,7 @@ module Moxml
156
156
  def primary_name_for_codepoint(codepoint)
157
157
  names = @by_codepoint[codepoint]
158
158
  return nil unless names&.any?
159
+
159
160
  # Prefer lowercase names (e.g., "amp" over "AMP") for XML compatibility
160
161
  names.find { |n| n == n.downcase } || names.first
161
162
  end
data/lib/moxml/text.rb CHANGED
@@ -15,5 +15,9 @@ module Moxml
15
15
  def content=(text)
16
16
  adapter.set_text_content(@native, normalize_xml_value(text))
17
17
  end
18
+
19
+ def to_s
20
+ content
21
+ end
18
22
  end
19
23
  end
data/lib/moxml/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Moxml
4
- VERSION = "0.1.19"
4
+ VERSION = "0.1.21"
5
5
  end
@@ -1758,7 +1758,8 @@ module Moxml
1758
1758
  current = visit.pop
1759
1759
 
1760
1760
  # Function name is stored in :value field, not children
1761
- if (current.type == :call || current.type == :function) && current.value == name
1761
+ if %i[call
1762
+ function].include?(current.type) && current.value == name
1762
1763
  return true
1763
1764
  end
1764
1765
 
@@ -170,7 +170,9 @@ RSpec.shared_examples "Moxml Edge Cases" do
170
170
  describe "whitespace text node preservation" do
171
171
  # Ox/HeadedOx do not generate whitespace-only text nodes in their parser,
172
172
  # so these tests only apply to adapters that do (Nokogiri, OGA, REXML, LibXML)
173
- let(:preserves_ws) { !%i[ox headed_ox].include?(context.config.adapter_name) }
173
+ let(:preserves_ws) do
174
+ !%i[ox headed_ox].include?(context.config.adapter_name)
175
+ end
174
176
 
175
177
  it "preserves whitespace-only text nodes between sibling elements" do
176
178
  unless preserves_ws
@@ -196,7 +198,7 @@ RSpec.shared_examples "Moxml Edge Cases" do
196
198
  expect(ws_nodes).not_to be_empty
197
199
 
198
200
  # Element children should still be accessible
199
- elements = children.select { |c| c.is_a?(Moxml::Element) }
201
+ elements = children.grep(Moxml::Element)
200
202
  expect(elements.map(&:name)).to eq(%w[a b c])
201
203
  end
202
204
 
@@ -116,7 +116,7 @@ RSpec.shared_examples "Entity Reference Whitespace Preservation" do
116
116
  children = doc.root.children
117
117
 
118
118
  # Whitespace text nodes between elements are preserved
119
- elements = children.select { |c| c.is_a?(Moxml::Element) }
119
+ elements = children.grep(Moxml::Element)
120
120
  expect(elements.length).to eq(2)
121
121
  expect(elements.map(&:name)).to eq(%w[child1 child2])
122
122
  end
@@ -38,7 +38,9 @@ RSpec.shared_examples "Moxml::DocumentBuilder" do
38
38
  expect(non_ws_children[1]).to be_a(Moxml::Element)
39
39
  expect(non_ws_children[1].name).to eq("child")
40
40
  expect(non_ws_children[1]["id"]).to eq("1")
41
- expect(non_ws_children[1].children.find { |c| c.is_a?(Moxml::Cdata) }).to be_a(Moxml::Cdata)
41
+ expect(non_ws_children[1].children.find do |c|
42
+ c.is_a?(Moxml::Cdata)
43
+ end).to be_a(Moxml::Cdata)
42
44
  expect(non_ws_children[2]).to be_a(Moxml::ProcessingInstruction)
43
45
  end
44
46
  end
@@ -129,16 +129,20 @@ RSpec.shared_examples "Moxml::EntityReference" do
129
129
 
130
130
  describe "entity restoration" do
131
131
  it "restores standard XML entities when enabled" do
132
- ctx_restore = Moxml.new(context.config.adapter_name) { |c| c.restore_entities = true }
132
+ ctx_restore = Moxml.new(context.config.adapter_name) do |c|
133
+ c.restore_entities = true
134
+ end
133
135
  doc = ctx_restore.parse("<p>a&amp;b</p>")
134
136
  output = doc.to_xml
135
137
  expect(output).to include("&amp;")
136
138
  end
137
139
 
138
140
  it "does not create entity references when disabled" do
139
- ctx_no_restore = Moxml.new(context.config.adapter_name) { |c| c.restore_entities = false }
141
+ ctx_no_restore = Moxml.new(context.config.adapter_name) do |c|
142
+ c.restore_entities = false
143
+ end
140
144
  doc = ctx_no_restore.parse("<p>text</p>")
141
- refs = doc.root.children.select { |c| c.is_a?(Moxml::EntityReference) }
145
+ refs = doc.root.children.grep(Moxml::EntityReference)
142
146
  expect(refs).to be_empty
143
147
  end
144
148
  end
@@ -197,7 +201,9 @@ RSpec.shared_examples "Moxml::EntityReference" do
197
201
 
198
202
  it "rejects invalid modes" do
199
203
  cfg = Moxml::Config.new(context.config.adapter_name)
200
- expect { cfg.entity_restoration_mode = :bogus }.to raise_error(ArgumentError)
204
+ expect do
205
+ cfg.entity_restoration_mode = :bogus
206
+ end.to raise_error(ArgumentError)
201
207
  end
202
208
 
203
209
  it "restores non-standard entities in lenient mode" do
@@ -42,7 +42,7 @@ RSpec.shared_examples "Moxml::NodeSet" do
42
42
 
43
43
  it "compares nodes" do
44
44
  xpath_results = doc.xpath("//child")
45
- element_children = doc.root.children.select { |c| c.is_a?(Moxml::Element) }
45
+ element_children = doc.root.children.grep(Moxml::Element)
46
46
  expect(xpath_results.map(&:native)).to eq(element_children.map(&:native))
47
47
  end
48
48
  end
@@ -145,7 +145,7 @@ RSpec.describe Moxml::Adapter::HeadedOx do
145
145
  it "returns first matching node" do
146
146
  result = adapter.at_xpath(doc, "//book")
147
147
 
148
- expect(result).to be_a(::Ox::Element)
148
+ expect(result).to be_a(Ox::Element)
149
149
  expect(result.name).to eq("book")
150
150
  end
151
151
 
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "libxml"
5
+ rescue LoadError
6
+ return
7
+ end
8
+
9
+ require "moxml/adapter/libxml"
10
+
11
+ # Targeted tests for private helpers extracted during the perf refactor.
12
+ # The public `serialize` path covers them transitively, but a future
13
+ # refactor of those helpers benefits from fine-grained safety nets.
14
+ RSpec.describe Moxml::Adapter::Libxml do
15
+ describe ".emit_children_with_layout" do
16
+ let(:adapter) { described_class }
17
+
18
+ def parse_root(xml)
19
+ doc = LibXML::XML::Parser.string(xml).parse
20
+ doc.root
21
+ end
22
+
23
+ def emit(root, indent_size: 2, depth: 0, eref_active: false)
24
+ output = +""
25
+ adapter.send(:emit_children_with_layout, output, root, indent_size, depth,
26
+ eref_active: eref_active)
27
+ output
28
+ end
29
+
30
+ context "with all-element children" do
31
+ it "emits a newline + per-level padding before each child" do
32
+ root = parse_root("<root><a/><b/></root>")
33
+ expect(emit(root)).to eq("\n <a></a>\n <b></b>")
34
+ end
35
+
36
+ it "emits no padding when indent_size is zero" do
37
+ root = parse_root("<root><a/><b/></root>")
38
+ expect(emit(root, indent_size: 0)).to eq("\n<a></a>\n<b></b>")
39
+ end
40
+
41
+ it "scales padding with depth" do
42
+ root = parse_root("<root><a/></root>")
43
+ expect(emit(root, depth: 2)).to eq("\n <a></a>")
44
+ end
45
+ end
46
+
47
+ context "with mixed text + element content" do
48
+ it "does not emit newlines (text suppresses block layout)" do
49
+ root = parse_root("<p>hello<b>world</b>!</p>")
50
+ expect(emit(root)).to eq("hello<b>world</b>!")
51
+ end
52
+
53
+ it "emits a newline only before the first element when text follows" do
54
+ root = parse_root("<p><b>world</b>!</p>")
55
+ # First child is element, prev_block starts true → newline before it.
56
+ # Then text "!" sets prev_block false; no further block-level
57
+ # children follow, so no additional newlines.
58
+ expect(emit(root)).to eq("\n <b>world</b>!")
59
+ end
60
+ end
61
+
62
+ context "with CDATA + element children" do
63
+ it "treats cdata as text-like and suppresses surrounding newlines" do
64
+ cdata_xml = "<r><![CDATA[X]]><a/></r>"
65
+ root = parse_root(cdata_xml)
66
+ # CDATA is text-like → no newline before it, prev_block goes false,
67
+ # then <a/> follows but prev_block was set false by CDATA, so no \n.
68
+ expect(emit(root)).to eq("<![CDATA[X]]><a></a>")
69
+ end
70
+ end
71
+
72
+ context "with comment + element children" do
73
+ it "treats comment as block-level and emits newlines between siblings" do
74
+ root = parse_root("<x><!-- c --><y/></x>")
75
+ expect(emit(root)).to eq("\n <!-- c -->\n <y></y>")
76
+ end
77
+ end
78
+
79
+ context "with whitespace-only text children" do
80
+ it "skips them and produces the same layout as a doc without them" do
81
+ with_ws = emit(parse_root("<root> <a/> <b/> </root>"))
82
+ without_ws = emit(parse_root("<root><a/><b/></root>"))
83
+ expect(with_ws).to eq(without_ws)
84
+ end
85
+ end
86
+
87
+ context "with no children" do
88
+ it "appends nothing" do
89
+ root = parse_root("<empty/>")
90
+ expect(emit(root)).to eq("")
91
+ end
92
+ end
93
+ end
94
+
95
+ describe ".lookup_entity_ref_serialization" do
96
+ let(:adapter) { described_class }
97
+ let(:context) { Moxml.new(:libxml) }
98
+
99
+ # `lookup_entity_ref_serialization` is called from the recursive
100
+ # serialize path with a raw libxml ::Node (not the wrapper), so we
101
+ # unwrap to match the call-site contract.
102
+ def libxml_native(moxml_node)
103
+ adapter.send(:unpatch_node, moxml_node.native)
104
+ end
105
+
106
+ it "returns [nil, nil] when the document has no entity-ref attachments" do
107
+ root = libxml_native(context.parse("<root><a/></root>").root)
108
+ expect(adapter.send(:lookup_entity_ref_serialization, root)).to eq([nil, nil])
109
+ end
110
+
111
+ it "returns [nil, nil] for an element with no entity refs even when the doc has erefs elsewhere" do
112
+ doc = context.parse("<root><a/><b/></root>")
113
+ a = doc.root.children.first
114
+ b = doc.root.children.last
115
+ eref = Moxml::EntityReference.new(
116
+ adapter.create_native_entity_reference("amp"), context
117
+ )
118
+ a.add_child(eref)
119
+
120
+ expect(adapter.send(:lookup_entity_ref_serialization, libxml_native(b)))
121
+ .to eq([nil, nil])
122
+ end
123
+
124
+ it "returns [refs, sequence] when both are registered for the element" do
125
+ doc = context.parse("<root><a>text</a></root>")
126
+ a = doc.root.children.first
127
+ eref = Moxml::EntityReference.new(
128
+ adapter.create_native_entity_reference("amp"), context
129
+ )
130
+ a.add_child(eref)
131
+
132
+ refs, seq = adapter.send(:lookup_entity_ref_serialization, libxml_native(a))
133
+ expect(refs).to be_an(Array).and(satisfy { |r| !r.empty? })
134
+ expect(seq).to be_an(Array).and(include(:eref))
135
+ end
136
+ end
137
+
138
+ describe "entity-ref interleaved serialization" do
139
+ let(:adapter) { described_class }
140
+ let(:context) { Moxml.new(:libxml) }
141
+
142
+ it "preserves normal child indentation when entity refs are present" do
143
+ doc = context.parse("<root><a><b/></a></root>")
144
+ a = doc.root.children.first
145
+ eref = Moxml::EntityReference.new(
146
+ adapter.create_native_entity_reference("amp"), context
147
+ )
148
+ a.add_child(eref)
149
+
150
+ expect(doc.to_xml(no_declaration: true, indent: 2))
151
+ .to eq("<root>\n <a>\n <b></b>&amp;</a></root>")
152
+ end
153
+ end
154
+
155
+ describe Moxml::Adapter::Libxml::EntityRestorer do
156
+ let(:context) { Moxml.new(:libxml) }
157
+
158
+ it "restores entities through its public entry point" do
159
+ doc = context.parse("<p>\u00A9</p>")
160
+ context.config.restore_entities = true
161
+
162
+ described_class.new(doc).run
163
+
164
+ expect(doc.to_xml(no_declaration: true)).to eq("<p>&copy;</p>")
165
+ end
166
+ end
167
+ end
@@ -19,6 +19,29 @@ RSpec.describe Moxml::Text do
19
19
  text = doc.root.children.first
20
20
  expect(text.to_xml).to eq("plain text")
21
21
  end
22
+
23
+ it "escapes XML special characters" do
24
+ escaped_doc = context.parse("<root>a &lt; b &amp; c</root>")
25
+ text = escaped_doc.root.children.first
26
+ expect(text.to_xml).to eq("a &lt; b &amp; c")
27
+ end
28
+ end
29
+
30
+ describe "#to_s" do
31
+ it "returns text content" do
32
+ text = doc.root.children.first
33
+ expect(text.to_s).to eq("plain text")
34
+ end
35
+
36
+ it "is consistent across adapters" do
37
+ Moxml::Adapter::AVALIABLE_ADAPTERS.each do |adapter_name|
38
+ ctx = Moxml.new(adapter_name)
39
+ d = ctx.parse("<root>hello world</root>")
40
+ text = d.root.children.first
41
+ expect(text.to_s).to eq("hello world"),
42
+ "Text#to_s for #{adapter_name} adapter"
43
+ end
44
+ end
22
45
  end
23
46
 
24
47
  describe "creation" do
@@ -127,7 +127,7 @@ RSpec.describe "XPath Node Functions" do
127
127
  it "inherits language from parent element" do
128
128
  ast = Moxml::XPath::Parser.parse('lang("en")')
129
129
  proc = Moxml::XPath::Compiler.compile_with_cache(ast)
130
- child = doc_with_lang.root.children.select { |c| c.is_a?(Moxml::Element) }.first
130
+ child = doc_with_lang.root.children.grep(Moxml::Element).first
131
131
  result = proc.call(child)
132
132
 
133
133
  expect(result).to be true
@@ -136,7 +136,7 @@ RSpec.describe "XPath Node Functions" do
136
136
  it "uses closest xml:lang attribute" do
137
137
  ast = Moxml::XPath::Parser.parse('lang("fr")')
138
138
  proc = Moxml::XPath::Compiler.compile_with_cache(ast)
139
- elements = doc_with_lang.root.children.select { |c| c.is_a?(Moxml::Element) }
139
+ elements = doc_with_lang.root.children.grep(Moxml::Element)
140
140
  other = elements[1]
141
141
  result = proc.call(other)
142
142
 
@@ -30,7 +30,7 @@ RSpec.shared_examples "Performance Examples" do
30
30
  rexml: { parser: 0, serializer: 5 },
31
31
  ox: { parser: 2, serializer: 1000 },
32
32
  headed_ox: { parser: 2, serializer: 1000 },
33
- libxml: { parser: 2, serializer: 3 },
33
+ libxml: { parser: 500, serializer: 60 },
34
34
  }
35
35
  end
36
36
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: moxml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.19
4
+ version: 0.1.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-05-01 00:00:00.000000000 Z
11
+ date: 2026-05-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  Moxml is a unified XML manipulation library that provides a common API
@@ -121,6 +121,8 @@ files:
121
121
  - lib/moxml/adapter/customized_rexml/formatter.rb
122
122
  - lib/moxml/adapter/headed_ox.rb
123
123
  - lib/moxml/adapter/libxml.rb
124
+ - lib/moxml/adapter/libxml/entity_ref_registry.rb
125
+ - lib/moxml/adapter/libxml/entity_restorer.rb
124
126
  - lib/moxml/adapter/nokogiri.rb
125
127
  - lib/moxml/adapter/oga.rb
126
128
  - lib/moxml/adapter/ox.rb
@@ -297,6 +299,7 @@ files:
297
299
  - spec/moxml/adapter/base_spec.rb
298
300
  - spec/moxml/adapter/entity_restoration_spec.rb
299
301
  - spec/moxml/adapter/headed_ox_spec.rb
302
+ - spec/moxml/adapter/libxml_internals_spec.rb
300
303
  - spec/moxml/adapter/libxml_spec.rb
301
304
  - spec/moxml/adapter/nokogiri_spec.rb
302
305
  - spec/moxml/adapter/oga_spec.rb