moxml 0.1.20 → 0.1.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/moxml/adapter/base.rb +5 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +3 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +6 -1
- data/lib/moxml/adapter/libxml/entity_ref_registry.rb +105 -0
- data/lib/moxml/adapter/libxml/entity_restorer.rb +92 -0
- data/lib/moxml/adapter/libxml.rb +381 -362
- data/lib/moxml/version.rb +1 -1
- data/spec/moxml/adapter/libxml_internals_spec.rb +167 -0
- data/spec/performance/benchmark_spec.rb +1 -1
- metadata +5 -2
data/lib/moxml/version.rb
CHANGED
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
begin
|
|
4
|
+
require "libxml"
|
|
5
|
+
rescue LoadError
|
|
6
|
+
return
|
|
7
|
+
end
|
|
8
|
+
|
|
9
|
+
require "moxml/adapter/libxml"
|
|
10
|
+
|
|
11
|
+
# Targeted tests for private helpers extracted during the perf refactor.
|
|
12
|
+
# The public `serialize` path covers them transitively, but a future
|
|
13
|
+
# refactor of those helpers benefits from fine-grained safety nets.
|
|
14
|
+
RSpec.describe Moxml::Adapter::Libxml do
|
|
15
|
+
describe ".emit_children_with_layout" do
|
|
16
|
+
let(:adapter) { described_class }
|
|
17
|
+
|
|
18
|
+
def parse_root(xml)
|
|
19
|
+
doc = LibXML::XML::Parser.string(xml).parse
|
|
20
|
+
doc.root
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def emit(root, indent_size: 2, depth: 0, eref_active: false)
|
|
24
|
+
output = +""
|
|
25
|
+
adapter.send(:emit_children_with_layout, output, root, indent_size, depth,
|
|
26
|
+
eref_active: eref_active)
|
|
27
|
+
output
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
context "with all-element children" do
|
|
31
|
+
it "emits a newline + per-level padding before each child" do
|
|
32
|
+
root = parse_root("<root><a/><b/></root>")
|
|
33
|
+
expect(emit(root)).to eq("\n <a></a>\n <b></b>")
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
it "emits no padding when indent_size is zero" do
|
|
37
|
+
root = parse_root("<root><a/><b/></root>")
|
|
38
|
+
expect(emit(root, indent_size: 0)).to eq("\n<a></a>\n<b></b>")
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
it "scales padding with depth" do
|
|
42
|
+
root = parse_root("<root><a/></root>")
|
|
43
|
+
expect(emit(root, depth: 2)).to eq("\n <a></a>")
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
context "with mixed text + element content" do
|
|
48
|
+
it "does not emit newlines (text suppresses block layout)" do
|
|
49
|
+
root = parse_root("<p>hello<b>world</b>!</p>")
|
|
50
|
+
expect(emit(root)).to eq("hello<b>world</b>!")
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
it "emits a newline only before the first element when text follows" do
|
|
54
|
+
root = parse_root("<p><b>world</b>!</p>")
|
|
55
|
+
# First child is element, prev_block starts true → newline before it.
|
|
56
|
+
# Then text "!" sets prev_block false; no further block-level
|
|
57
|
+
# children follow, so no additional newlines.
|
|
58
|
+
expect(emit(root)).to eq("\n <b>world</b>!")
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
context "with CDATA + element children" do
|
|
63
|
+
it "treats cdata as text-like and suppresses surrounding newlines" do
|
|
64
|
+
cdata_xml = "<r><![CDATA[X]]><a/></r>"
|
|
65
|
+
root = parse_root(cdata_xml)
|
|
66
|
+
# CDATA is text-like → no newline before it, prev_block goes false,
|
|
67
|
+
# then <a/> follows but prev_block was set false by CDATA, so no \n.
|
|
68
|
+
expect(emit(root)).to eq("<![CDATA[X]]><a></a>")
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
context "with comment + element children" do
|
|
73
|
+
it "treats comment as block-level and emits newlines between siblings" do
|
|
74
|
+
root = parse_root("<x><!-- c --><y/></x>")
|
|
75
|
+
expect(emit(root)).to eq("\n <!-- c -->\n <y></y>")
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
context "with whitespace-only text children" do
|
|
80
|
+
it "skips them and produces the same layout as a doc without them" do
|
|
81
|
+
with_ws = emit(parse_root("<root> <a/> <b/> </root>"))
|
|
82
|
+
without_ws = emit(parse_root("<root><a/><b/></root>"))
|
|
83
|
+
expect(with_ws).to eq(without_ws)
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
context "with no children" do
|
|
88
|
+
it "appends nothing" do
|
|
89
|
+
root = parse_root("<empty/>")
|
|
90
|
+
expect(emit(root)).to eq("")
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
describe ".lookup_entity_ref_serialization" do
|
|
96
|
+
let(:adapter) { described_class }
|
|
97
|
+
let(:context) { Moxml.new(:libxml) }
|
|
98
|
+
|
|
99
|
+
# `lookup_entity_ref_serialization` is called from the recursive
|
|
100
|
+
# serialize path with a raw libxml ::Node (not the wrapper), so we
|
|
101
|
+
# unwrap to match the call-site contract.
|
|
102
|
+
def libxml_native(moxml_node)
|
|
103
|
+
adapter.send(:unpatch_node, moxml_node.native)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
it "returns [nil, nil] when the document has no entity-ref attachments" do
|
|
107
|
+
root = libxml_native(context.parse("<root><a/></root>").root)
|
|
108
|
+
expect(adapter.send(:lookup_entity_ref_serialization, root)).to eq([nil, nil])
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
it "returns [nil, nil] for an element with no entity refs even when the doc has erefs elsewhere" do
|
|
112
|
+
doc = context.parse("<root><a/><b/></root>")
|
|
113
|
+
a = doc.root.children.first
|
|
114
|
+
b = doc.root.children.last
|
|
115
|
+
eref = Moxml::EntityReference.new(
|
|
116
|
+
adapter.create_native_entity_reference("amp"), context
|
|
117
|
+
)
|
|
118
|
+
a.add_child(eref)
|
|
119
|
+
|
|
120
|
+
expect(adapter.send(:lookup_entity_ref_serialization, libxml_native(b)))
|
|
121
|
+
.to eq([nil, nil])
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
it "returns [refs, sequence] when both are registered for the element" do
|
|
125
|
+
doc = context.parse("<root><a>text</a></root>")
|
|
126
|
+
a = doc.root.children.first
|
|
127
|
+
eref = Moxml::EntityReference.new(
|
|
128
|
+
adapter.create_native_entity_reference("amp"), context
|
|
129
|
+
)
|
|
130
|
+
a.add_child(eref)
|
|
131
|
+
|
|
132
|
+
refs, seq = adapter.send(:lookup_entity_ref_serialization, libxml_native(a))
|
|
133
|
+
expect(refs).to be_an(Array).and(satisfy { |r| !r.empty? })
|
|
134
|
+
expect(seq).to be_an(Array).and(include(:eref))
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
describe "entity-ref interleaved serialization" do
|
|
139
|
+
let(:adapter) { described_class }
|
|
140
|
+
let(:context) { Moxml.new(:libxml) }
|
|
141
|
+
|
|
142
|
+
it "preserves normal child indentation when entity refs are present" do
|
|
143
|
+
doc = context.parse("<root><a><b/></a></root>")
|
|
144
|
+
a = doc.root.children.first
|
|
145
|
+
eref = Moxml::EntityReference.new(
|
|
146
|
+
adapter.create_native_entity_reference("amp"), context
|
|
147
|
+
)
|
|
148
|
+
a.add_child(eref)
|
|
149
|
+
|
|
150
|
+
expect(doc.to_xml(no_declaration: true, indent: 2))
|
|
151
|
+
.to eq("<root>\n <a>\n <b></b>&</a></root>")
|
|
152
|
+
end
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
describe Moxml::Adapter::Libxml::EntityRestorer do
|
|
156
|
+
let(:context) { Moxml.new(:libxml) }
|
|
157
|
+
|
|
158
|
+
it "restores entities through its public entry point" do
|
|
159
|
+
doc = context.parse("<p>\u00A9</p>")
|
|
160
|
+
context.config.restore_entities = true
|
|
161
|
+
|
|
162
|
+
described_class.new(doc).run
|
|
163
|
+
|
|
164
|
+
expect(doc.to_xml(no_declaration: true)).to eq("<p>©</p>")
|
|
165
|
+
end
|
|
166
|
+
end
|
|
167
|
+
end
|
|
@@ -30,7 +30,7 @@ RSpec.shared_examples "Performance Examples" do
|
|
|
30
30
|
rexml: { parser: 0, serializer: 5 },
|
|
31
31
|
ox: { parser: 2, serializer: 1000 },
|
|
32
32
|
headed_ox: { parser: 2, serializer: 1000 },
|
|
33
|
-
libxml: { parser:
|
|
33
|
+
libxml: { parser: 500, serializer: 60 },
|
|
34
34
|
}
|
|
35
35
|
end
|
|
36
36
|
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: moxml
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.21
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-05-
|
|
11
|
+
date: 2026-05-25 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: |
|
|
14
14
|
Moxml is a unified XML manipulation library that provides a common API
|
|
@@ -121,6 +121,8 @@ files:
|
|
|
121
121
|
- lib/moxml/adapter/customized_rexml/formatter.rb
|
|
122
122
|
- lib/moxml/adapter/headed_ox.rb
|
|
123
123
|
- lib/moxml/adapter/libxml.rb
|
|
124
|
+
- lib/moxml/adapter/libxml/entity_ref_registry.rb
|
|
125
|
+
- lib/moxml/adapter/libxml/entity_restorer.rb
|
|
124
126
|
- lib/moxml/adapter/nokogiri.rb
|
|
125
127
|
- lib/moxml/adapter/oga.rb
|
|
126
128
|
- lib/moxml/adapter/ox.rb
|
|
@@ -297,6 +299,7 @@ files:
|
|
|
297
299
|
- spec/moxml/adapter/base_spec.rb
|
|
298
300
|
- spec/moxml/adapter/entity_restoration_spec.rb
|
|
299
301
|
- spec/moxml/adapter/headed_ox_spec.rb
|
|
302
|
+
- spec/moxml/adapter/libxml_internals_spec.rb
|
|
300
303
|
- spec/moxml/adapter/libxml_spec.rb
|
|
301
304
|
- spec/moxml/adapter/nokogiri_spec.rb
|
|
302
305
|
- spec/moxml/adapter/oga_spec.rb
|