moxml 0.1.3 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/.rubocop_todo.yml +48 -20
- data/Gemfile +3 -0
- data/LICENSE.md +33 -0
- data/README.adoc +95 -23
- data/lib/moxml/adapter/base.rb +20 -2
- data/lib/moxml/adapter/customized_ox/attribute.rb +29 -0
- data/lib/moxml/adapter/customized_ox/namespace.rb +34 -0
- data/lib/moxml/adapter/customized_ox/text.rb +12 -0
- data/lib/moxml/adapter/customized_rexml/formatter.rb +195 -0
- data/lib/moxml/adapter/nokogiri.rb +4 -2
- data/lib/moxml/adapter/oga.rb +25 -9
- data/lib/moxml/adapter/ox.rb +238 -92
- data/lib/moxml/adapter/rexml.rb +462 -0
- data/lib/moxml/adapter.rb +1 -1
- data/lib/moxml/attribute.rb +2 -2
- data/lib/moxml/cdata.rb +0 -4
- data/lib/moxml/comment.rb +0 -4
- data/lib/moxml/config.rb +1 -1
- data/lib/moxml/context.rb +2 -2
- data/lib/moxml/doctype.rb +1 -5
- data/lib/moxml/document.rb +1 -1
- data/lib/moxml/document_builder.rb +14 -18
- data/lib/moxml/element.rb +4 -3
- data/lib/moxml/namespace.rb +5 -1
- data/lib/moxml/node.rb +17 -2
- data/lib/moxml/node_set.rb +8 -1
- data/lib/moxml/processing_instruction.rb +0 -4
- data/lib/moxml/text.rb +0 -4
- data/lib/moxml/version.rb +1 -1
- data/lib/ox/node.rb +9 -0
- data/spec/fixtures/small.xml +1 -0
- data/spec/moxml/adapter/rexml_spec.rb +14 -0
- data/spec/moxml/all_with_adapters_spec.rb +2 -3
- data/spec/support/shared_examples/builder.rb +19 -2
- data/spec/support/shared_examples/cdata.rb +7 -5
- data/spec/support/shared_examples/declaration.rb +17 -4
- data/spec/support/shared_examples/doctype.rb +2 -1
- data/spec/support/shared_examples/document.rb +10 -0
- data/spec/support/shared_examples/edge_cases.rb +9 -3
- data/spec/support/shared_examples/element.rb +5 -1
- data/spec/support/shared_examples/examples/benchmark_spec.rb +51 -0
- data/spec/support/shared_examples/examples/memory.rb +30 -17
- data/spec/support/shared_examples/examples/readme_examples.rb +5 -0
- data/spec/support/shared_examples/examples/thread_safety.rb +2 -0
- data/spec/support/shared_examples/examples/xpath.rb +34 -3
- data/spec/support/shared_examples/integration.rb +6 -2
- data/spec/support/shared_examples/namespace.rb +16 -0
- data/spec/support/shared_examples/node.rb +4 -0
- data/spec/support/shared_examples/node_set.rb +20 -0
- data/spec/support/shared_examples/processing_instruction.rb +1 -1
- data/spec/support/shared_examples/text.rb +2 -1
- data/spec/support/shared_examples/xml_adapter.rb +169 -7
- metadata +13 -3
@@ -0,0 +1,195 @@
|
|
1
|
+
require "rexml/formatters/pretty"
|
2
|
+
|
3
|
+
module Moxml
|
4
|
+
module Adapter
|
5
|
+
module CustomizedRexml
|
6
|
+
# Custom REXML formatter that fixes indentation and wrapping issues
|
7
|
+
class Formatter < ::REXML::Formatters::Pretty
|
8
|
+
def initialize(indentation: 2, self_close_empty: false)
|
9
|
+
@indentation = " " * indentation
|
10
|
+
@level = 0
|
11
|
+
@compact = true
|
12
|
+
@width = -1 # Disable line wrapping
|
13
|
+
@self_close_empty = self_close_empty
|
14
|
+
end
|
15
|
+
|
16
|
+
def write(node, output)
|
17
|
+
case node
|
18
|
+
when ::REXML::XMLDecl
|
19
|
+
write_declaration(node, output)
|
20
|
+
else
|
21
|
+
super
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def write_element(node, output)
|
26
|
+
# output << ' ' * @level
|
27
|
+
output << "<#{node.expanded_name}"
|
28
|
+
write_attributes(node, output)
|
29
|
+
|
30
|
+
if node.children.empty? && @self_close_empty
|
31
|
+
output << "/>"
|
32
|
+
return
|
33
|
+
end
|
34
|
+
|
35
|
+
output << ">"
|
36
|
+
|
37
|
+
# Check for mixed content
|
38
|
+
has_text = node.children.any? { |c| c.is_a?(::REXML::Text) && !c.to_s.strip.empty? }
|
39
|
+
has_elements = node.children.any? { |c| c.is_a?(::REXML::Element) }
|
40
|
+
mixed = has_text && has_elements
|
41
|
+
|
42
|
+
# Handle children based on content type
|
43
|
+
unless node.children.empty?
|
44
|
+
@level += @indentation.length unless mixed
|
45
|
+
|
46
|
+
node.children.each_with_index do |child, _index|
|
47
|
+
# Skip insignificant whitespace
|
48
|
+
next if child.is_a?(::REXML::Text) &&
|
49
|
+
child.to_s.strip.empty? &&
|
50
|
+
!(child.next_sibling.nil? && child.previous_sibling.nil?)
|
51
|
+
|
52
|
+
# Indent non-text nodes in non-mixed content
|
53
|
+
# if !mixed && !child.is_a?(::REXML::Text)
|
54
|
+
# output << ' ' * @level
|
55
|
+
# end
|
56
|
+
|
57
|
+
write(child, output)
|
58
|
+
|
59
|
+
# Add newlines between elements in non-mixed content
|
60
|
+
# if !mixed && !child.is_a?(::REXML::Text) && index < node.children.size - 1
|
61
|
+
# output << "\n"
|
62
|
+
# end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Reset indentation for closing tag in non-mixed content
|
66
|
+
unless mixed
|
67
|
+
@level -= @indentation.length
|
68
|
+
# output << ' ' * @level
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
output << "</#{node.expanded_name}>"
|
73
|
+
# output << "\n" unless mixed
|
74
|
+
end
|
75
|
+
|
76
|
+
def write_text(node, output)
|
77
|
+
text = node.value
|
78
|
+
return if text.empty?
|
79
|
+
|
80
|
+
output << escape_text(text)
|
81
|
+
end
|
82
|
+
|
83
|
+
def escape_text(text)
|
84
|
+
text.to_s.gsub(/[<>&]/) do |match|
|
85
|
+
case match
|
86
|
+
when "<" then "<"
|
87
|
+
when ">" then ">"
|
88
|
+
when "&" then "&"
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
private
|
94
|
+
|
95
|
+
def find_significant_sibling(node, direction)
|
96
|
+
method = direction == :next ? :next_sibling : :previous_sibling
|
97
|
+
sibling = node.send(method)
|
98
|
+
sibling = sibling.send(method) while sibling && sibling.is_a?(::REXML::Text) && sibling.to_s.strip.empty?
|
99
|
+
sibling
|
100
|
+
end
|
101
|
+
|
102
|
+
def write_cdata(node, output)
|
103
|
+
# output << ' ' * @level
|
104
|
+
output << ::REXML::CData::START
|
105
|
+
output << node.to_s.gsub(::REXML::CData::STOP, "]]]]><![CDATA[>")
|
106
|
+
output << ::REXML::CData::STOP
|
107
|
+
# output << "\n"
|
108
|
+
end
|
109
|
+
|
110
|
+
def write_comment(node, output)
|
111
|
+
# output << ' ' * @level
|
112
|
+
output << "<!--"
|
113
|
+
output << node.to_s
|
114
|
+
output << "-->"
|
115
|
+
# output << "\n"
|
116
|
+
end
|
117
|
+
|
118
|
+
def write_instruction(node, output)
|
119
|
+
# output << ' ' * @level
|
120
|
+
output << "<?"
|
121
|
+
output << node.target
|
122
|
+
output << " "
|
123
|
+
output << node.content if node.content
|
124
|
+
output << "?>"
|
125
|
+
# output << "\n"
|
126
|
+
end
|
127
|
+
|
128
|
+
def write_document(node, output)
|
129
|
+
node.children.each do |child|
|
130
|
+
write(child, output)
|
131
|
+
# output << "\n" unless child == node.children.last
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
def write_doctype(node, output)
|
136
|
+
output << "<!DOCTYPE "
|
137
|
+
output << node.name
|
138
|
+
output << " "
|
139
|
+
output << node.external_id if node.external_id
|
140
|
+
output << ">"
|
141
|
+
# output << "\n"
|
142
|
+
end
|
143
|
+
|
144
|
+
def write_declaration(node, output)
|
145
|
+
output << "<?xml"
|
146
|
+
output << %( version="#{node.version}") if node.version
|
147
|
+
output << %( encoding="#{node.encoding.to_s.upcase}") if node.writeencoding
|
148
|
+
output << %( standalone="#{node.standalone}") if node.standalone
|
149
|
+
output << "?>"
|
150
|
+
# output << "\n"
|
151
|
+
end
|
152
|
+
|
153
|
+
def write_attributes(node, output)
|
154
|
+
# First write namespace declarations
|
155
|
+
node.attributes.each do |name, attr|
|
156
|
+
next unless name.to_s.start_with?("xmlns:") || name.to_s == "xmlns"
|
157
|
+
|
158
|
+
name = "xmlns" if name.to_s == "xmlns:" # convert the default namespace
|
159
|
+
value = attr.respond_to?(:value) ? attr.value : attr
|
160
|
+
output << " #{name}=\"#{value}\""
|
161
|
+
end
|
162
|
+
|
163
|
+
# Then write regular attributes
|
164
|
+
node.attributes.each do |name, attr|
|
165
|
+
next if name.to_s.start_with?("xmlns:") || name.to_s == "xmlns"
|
166
|
+
|
167
|
+
output << " "
|
168
|
+
output << if attr.respond_to?(:prefix) && attr.prefix
|
169
|
+
"#{attr.prefix}:#{attr.name}"
|
170
|
+
else
|
171
|
+
name.to_s
|
172
|
+
end
|
173
|
+
|
174
|
+
output << "=\""
|
175
|
+
value = attr.respond_to?(:value) ? attr.value : attr
|
176
|
+
output << escape_attribute_value(value.to_s)
|
177
|
+
output << "\""
|
178
|
+
end
|
179
|
+
end
|
180
|
+
|
181
|
+
def escape_attribute_value(value)
|
182
|
+
value.to_s.gsub(/[<>&"]/) do |match|
|
183
|
+
case match
|
184
|
+
when "<" then "<"
|
185
|
+
when ">" then ">"
|
186
|
+
when "&" then "&"
|
187
|
+
when '"' then """
|
188
|
+
# when "'" then '''
|
189
|
+
end
|
190
|
+
end
|
191
|
+
end
|
192
|
+
end
|
193
|
+
end
|
194
|
+
end
|
195
|
+
end
|
@@ -31,7 +31,7 @@ module Moxml
|
|
31
31
|
DocumentBuilder.new(Context.new(:nokogiri)).build(native_doc)
|
32
32
|
end
|
33
33
|
|
34
|
-
def create_document
|
34
|
+
def create_document(_native_doc = nil)
|
35
35
|
::Nokogiri::XML::Document.new
|
36
36
|
end
|
37
37
|
|
@@ -120,6 +120,8 @@ module Moxml
|
|
120
120
|
when ::Nokogiri::XML::CDATA then :cdata
|
121
121
|
when ::Nokogiri::XML::Text then :text
|
122
122
|
when ::Nokogiri::XML::Comment then :comment
|
123
|
+
when ::Nokogiri::XML::Attr then :attribute
|
124
|
+
when ::Nokogiri::XML::Namespace then :namespace
|
123
125
|
when ::Nokogiri::XML::ProcessingInstruction then :processing_instruction
|
124
126
|
when ::Nokogiri::XML::Document, ::Nokogiri::XML::DocumentFragment then :document
|
125
127
|
when ::Nokogiri::XML::DTD then :doctype
|
@@ -221,7 +223,7 @@ module Moxml
|
|
221
223
|
end
|
222
224
|
|
223
225
|
def text_content(node)
|
224
|
-
node.
|
226
|
+
node.text
|
225
227
|
end
|
226
228
|
|
227
229
|
def inner_text(node)
|
data/lib/moxml/adapter/oga.rb
CHANGED
@@ -24,7 +24,7 @@ module Moxml
|
|
24
24
|
DocumentBuilder.new(Context.new(:oga)).build(native_doc)
|
25
25
|
end
|
26
26
|
|
27
|
-
def create_document
|
27
|
+
def create_document(_native_doc = nil)
|
28
28
|
::Oga::XML::Document.new
|
29
29
|
end
|
30
30
|
|
@@ -111,6 +111,8 @@ module Moxml
|
|
111
111
|
when ::Oga::XML::Text then :text
|
112
112
|
when ::Oga::XML::Cdata then :cdata
|
113
113
|
when ::Oga::XML::Comment then :comment
|
114
|
+
when ::Oga::XML::Attribute then :attribute
|
115
|
+
when ::Oga::XML::Namespace then :namespace
|
114
116
|
when ::Oga::XML::ProcessingInstruction then :processing_instruction
|
115
117
|
when ::Oga::XML::Document then :document
|
116
118
|
when ::Oga::XML::Doctype then :doctype
|
@@ -213,11 +215,25 @@ module Moxml
|
|
213
215
|
end
|
214
216
|
|
215
217
|
def add_previous_sibling(node, sibling)
|
216
|
-
node.
|
218
|
+
if node.parent == sibling.parent
|
219
|
+
# Oga doesn't manipulate children of the same parent
|
220
|
+
dup_sibling = node.node_set.delete(sibling)
|
221
|
+
index = node.node_set.index(node)
|
222
|
+
node.node_set.insert(index, dup_sibling)
|
223
|
+
else
|
224
|
+
node.before(sibling)
|
225
|
+
end
|
217
226
|
end
|
218
227
|
|
219
228
|
def add_next_sibling(node, sibling)
|
220
|
-
node.
|
229
|
+
if node.parent == sibling.parent
|
230
|
+
# Oga doesn't manipulate children of the same parent
|
231
|
+
dup_sibling = node.node_set.delete(sibling)
|
232
|
+
index = node.node_set.index(node) + 1
|
233
|
+
node.node_set.insert(index, dup_sibling)
|
234
|
+
else
|
235
|
+
node.after(sibling)
|
236
|
+
end
|
221
237
|
end
|
222
238
|
|
223
239
|
def remove(node)
|
@@ -229,7 +245,7 @@ module Moxml
|
|
229
245
|
end
|
230
246
|
|
231
247
|
def replace_children(node, new_children)
|
232
|
-
node.
|
248
|
+
node.children = []
|
233
249
|
new_children.each { |child| add_child(node, child) }
|
234
250
|
end
|
235
251
|
|
@@ -247,7 +263,7 @@ module Moxml
|
|
247
263
|
end
|
248
264
|
|
249
265
|
def set_text_content(node, content)
|
250
|
-
if node.respond_to?(:inner_text)
|
266
|
+
if node.respond_to?(:inner_text=)
|
251
267
|
node.inner_text = content
|
252
268
|
else
|
253
269
|
# Oga::XML::Text node for example
|
@@ -296,14 +312,14 @@ module Moxml
|
|
296
312
|
node.namespaces.values
|
297
313
|
end
|
298
314
|
|
299
|
-
def xpath(node, expression,
|
300
|
-
node.xpath(expression).to_a
|
315
|
+
def xpath(node, expression, namespaces = nil)
|
316
|
+
node.xpath(expression, {}, namespaces: namespaces&.transform_keys(&:to_s)).to_a
|
301
317
|
rescue ::LL::ParserError => e
|
302
318
|
raise Moxml::XPathError, e.message
|
303
319
|
end
|
304
320
|
|
305
|
-
def at_xpath(node, expression,
|
306
|
-
node.at_xpath(expression)
|
321
|
+
def at_xpath(node, expression, namespaces = nil)
|
322
|
+
node.at_xpath(expression, namespaces: namespaces)
|
307
323
|
rescue ::Oga::XPath::Error => e
|
308
324
|
raise Moxml::XPathError, e.message
|
309
325
|
end
|