moxml 0.1.20 → 0.1.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/moxml/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Moxml
4
- VERSION = "0.1.20"
4
+ VERSION = "0.1.21"
5
5
  end
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ begin
4
+ require "libxml"
5
+ rescue LoadError
6
+ return
7
+ end
8
+
9
+ require "moxml/adapter/libxml"
10
+
11
+ # Targeted tests for private helpers extracted during the perf refactor.
12
+ # The public `serialize` path covers them transitively, but a future
13
+ # refactor of those helpers benefits from fine-grained safety nets.
14
+ RSpec.describe Moxml::Adapter::Libxml do
15
+ describe ".emit_children_with_layout" do
16
+ let(:adapter) { described_class }
17
+
18
+ def parse_root(xml)
19
+ doc = LibXML::XML::Parser.string(xml).parse
20
+ doc.root
21
+ end
22
+
23
+ def emit(root, indent_size: 2, depth: 0, eref_active: false)
24
+ output = +""
25
+ adapter.send(:emit_children_with_layout, output, root, indent_size, depth,
26
+ eref_active: eref_active)
27
+ output
28
+ end
29
+
30
+ context "with all-element children" do
31
+ it "emits a newline + per-level padding before each child" do
32
+ root = parse_root("<root><a/><b/></root>")
33
+ expect(emit(root)).to eq("\n <a></a>\n <b></b>")
34
+ end
35
+
36
+ it "emits no padding when indent_size is zero" do
37
+ root = parse_root("<root><a/><b/></root>")
38
+ expect(emit(root, indent_size: 0)).to eq("\n<a></a>\n<b></b>")
39
+ end
40
+
41
+ it "scales padding with depth" do
42
+ root = parse_root("<root><a/></root>")
43
+ expect(emit(root, depth: 2)).to eq("\n <a></a>")
44
+ end
45
+ end
46
+
47
+ context "with mixed text + element content" do
48
+ it "does not emit newlines (text suppresses block layout)" do
49
+ root = parse_root("<p>hello<b>world</b>!</p>")
50
+ expect(emit(root)).to eq("hello<b>world</b>!")
51
+ end
52
+
53
+ it "emits a newline only before the first element when text follows" do
54
+ root = parse_root("<p><b>world</b>!</p>")
55
+ # First child is element, prev_block starts true → newline before it.
56
+ # Then text "!" sets prev_block false; no further block-level
57
+ # children follow, so no additional newlines.
58
+ expect(emit(root)).to eq("\n <b>world</b>!")
59
+ end
60
+ end
61
+
62
+ context "with CDATA + element children" do
63
+ it "treats cdata as text-like and suppresses surrounding newlines" do
64
+ cdata_xml = "<r><![CDATA[X]]><a/></r>"
65
+ root = parse_root(cdata_xml)
66
+ # CDATA is text-like → no newline before it, prev_block goes false,
67
+ # then <a/> follows but prev_block was set false by CDATA, so no \n.
68
+ expect(emit(root)).to eq("<![CDATA[X]]><a></a>")
69
+ end
70
+ end
71
+
72
+ context "with comment + element children" do
73
+ it "treats comment as block-level and emits newlines between siblings" do
74
+ root = parse_root("<x><!-- c --><y/></x>")
75
+ expect(emit(root)).to eq("\n <!-- c -->\n <y></y>")
76
+ end
77
+ end
78
+
79
+ context "with whitespace-only text children" do
80
+ it "skips them and produces the same layout as a doc without them" do
81
+ with_ws = emit(parse_root("<root> <a/> <b/> </root>"))
82
+ without_ws = emit(parse_root("<root><a/><b/></root>"))
83
+ expect(with_ws).to eq(without_ws)
84
+ end
85
+ end
86
+
87
+ context "with no children" do
88
+ it "appends nothing" do
89
+ root = parse_root("<empty/>")
90
+ expect(emit(root)).to eq("")
91
+ end
92
+ end
93
+ end
94
+
95
+ describe ".lookup_entity_ref_serialization" do
96
+ let(:adapter) { described_class }
97
+ let(:context) { Moxml.new(:libxml) }
98
+
99
+ # `lookup_entity_ref_serialization` is called from the recursive
100
+ # serialize path with a raw libxml ::Node (not the wrapper), so we
101
+ # unwrap to match the call-site contract.
102
+ def libxml_native(moxml_node)
103
+ adapter.send(:unpatch_node, moxml_node.native)
104
+ end
105
+
106
+ it "returns [nil, nil] when the document has no entity-ref attachments" do
107
+ root = libxml_native(context.parse("<root><a/></root>").root)
108
+ expect(adapter.send(:lookup_entity_ref_serialization, root)).to eq([nil, nil])
109
+ end
110
+
111
+ it "returns [nil, nil] for an element with no entity refs even when the doc has erefs elsewhere" do
112
+ doc = context.parse("<root><a/><b/></root>")
113
+ a = doc.root.children.first
114
+ b = doc.root.children.last
115
+ eref = Moxml::EntityReference.new(
116
+ adapter.create_native_entity_reference("amp"), context
117
+ )
118
+ a.add_child(eref)
119
+
120
+ expect(adapter.send(:lookup_entity_ref_serialization, libxml_native(b)))
121
+ .to eq([nil, nil])
122
+ end
123
+
124
+ it "returns [refs, sequence] when both are registered for the element" do
125
+ doc = context.parse("<root><a>text</a></root>")
126
+ a = doc.root.children.first
127
+ eref = Moxml::EntityReference.new(
128
+ adapter.create_native_entity_reference("amp"), context
129
+ )
130
+ a.add_child(eref)
131
+
132
+ refs, seq = adapter.send(:lookup_entity_ref_serialization, libxml_native(a))
133
+ expect(refs).to be_an(Array).and(satisfy { |r| !r.empty? })
134
+ expect(seq).to be_an(Array).and(include(:eref))
135
+ end
136
+ end
137
+
138
+ describe "entity-ref interleaved serialization" do
139
+ let(:adapter) { described_class }
140
+ let(:context) { Moxml.new(:libxml) }
141
+
142
+ it "preserves normal child indentation when entity refs are present" do
143
+ doc = context.parse("<root><a><b/></a></root>")
144
+ a = doc.root.children.first
145
+ eref = Moxml::EntityReference.new(
146
+ adapter.create_native_entity_reference("amp"), context
147
+ )
148
+ a.add_child(eref)
149
+
150
+ expect(doc.to_xml(no_declaration: true, indent: 2))
151
+ .to eq("<root>\n <a>\n <b></b>&amp;</a></root>")
152
+ end
153
+ end
154
+
155
+ describe Moxml::Adapter::Libxml::EntityRestorer do
156
+ let(:context) { Moxml.new(:libxml) }
157
+
158
+ it "restores entities through its public entry point" do
159
+ doc = context.parse("<p>\u00A9</p>")
160
+ context.config.restore_entities = true
161
+
162
+ described_class.new(doc).run
163
+
164
+ expect(doc.to_xml(no_declaration: true)).to eq("<p>&copy;</p>")
165
+ end
166
+ end
167
+ end
@@ -30,7 +30,7 @@ RSpec.shared_examples "Performance Examples" do
30
30
  rexml: { parser: 0, serializer: 5 },
31
31
  ox: { parser: 2, serializer: 1000 },
32
32
  headed_ox: { parser: 2, serializer: 1000 },
33
- libxml: { parser: 2, serializer: 3 },
33
+ libxml: { parser: 500, serializer: 60 },
34
34
  }
35
35
  end
36
36
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: moxml
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.20
4
+ version: 0.1.21
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2026-05-03 00:00:00.000000000 Z
11
+ date: 2026-05-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: |
14
14
  Moxml is a unified XML manipulation library that provides a common API
@@ -121,6 +121,8 @@ files:
121
121
  - lib/moxml/adapter/customized_rexml/formatter.rb
122
122
  - lib/moxml/adapter/headed_ox.rb
123
123
  - lib/moxml/adapter/libxml.rb
124
+ - lib/moxml/adapter/libxml/entity_ref_registry.rb
125
+ - lib/moxml/adapter/libxml/entity_restorer.rb
124
126
  - lib/moxml/adapter/nokogiri.rb
125
127
  - lib/moxml/adapter/oga.rb
126
128
  - lib/moxml/adapter/ox.rb
@@ -297,6 +299,7 @@ files:
297
299
  - spec/moxml/adapter/base_spec.rb
298
300
  - spec/moxml/adapter/entity_restoration_spec.rb
299
301
  - spec/moxml/adapter/headed_ox_spec.rb
302
+ - spec/moxml/adapter/libxml_internals_spec.rb
300
303
  - spec/moxml/adapter/libxml_spec.rb
301
304
  - spec/moxml/adapter/nokogiri_spec.rb
302
305
  - spec/moxml/adapter/oga_spec.rb