moxml 0.1.3 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +0 -1
  3. data/.rubocop_todo.yml +48 -20
  4. data/Gemfile +3 -0
  5. data/LICENSE.md +33 -0
  6. data/README.adoc +95 -23
  7. data/lib/moxml/adapter/base.rb +20 -2
  8. data/lib/moxml/adapter/customized_ox/attribute.rb +29 -0
  9. data/lib/moxml/adapter/customized_ox/namespace.rb +34 -0
  10. data/lib/moxml/adapter/customized_ox/text.rb +12 -0
  11. data/lib/moxml/adapter/customized_rexml/formatter.rb +195 -0
  12. data/lib/moxml/adapter/nokogiri.rb +4 -2
  13. data/lib/moxml/adapter/oga.rb +25 -9
  14. data/lib/moxml/adapter/ox.rb +238 -92
  15. data/lib/moxml/adapter/rexml.rb +462 -0
  16. data/lib/moxml/adapter.rb +1 -1
  17. data/lib/moxml/attribute.rb +2 -2
  18. data/lib/moxml/cdata.rb +0 -4
  19. data/lib/moxml/comment.rb +0 -4
  20. data/lib/moxml/config.rb +1 -1
  21. data/lib/moxml/context.rb +2 -2
  22. data/lib/moxml/doctype.rb +1 -5
  23. data/lib/moxml/document.rb +1 -1
  24. data/lib/moxml/document_builder.rb +14 -18
  25. data/lib/moxml/element.rb +4 -3
  26. data/lib/moxml/namespace.rb +5 -1
  27. data/lib/moxml/node.rb +17 -2
  28. data/lib/moxml/node_set.rb +8 -1
  29. data/lib/moxml/processing_instruction.rb +0 -4
  30. data/lib/moxml/text.rb +0 -4
  31. data/lib/moxml/version.rb +1 -1
  32. data/lib/ox/node.rb +9 -0
  33. data/spec/fixtures/small.xml +1 -0
  34. data/spec/moxml/adapter/rexml_spec.rb +14 -0
  35. data/spec/moxml/all_with_adapters_spec.rb +2 -3
  36. data/spec/support/shared_examples/builder.rb +19 -2
  37. data/spec/support/shared_examples/cdata.rb +7 -5
  38. data/spec/support/shared_examples/declaration.rb +17 -4
  39. data/spec/support/shared_examples/doctype.rb +2 -1
  40. data/spec/support/shared_examples/document.rb +10 -0
  41. data/spec/support/shared_examples/edge_cases.rb +9 -3
  42. data/spec/support/shared_examples/element.rb +5 -1
  43. data/spec/support/shared_examples/examples/benchmark_spec.rb +51 -0
  44. data/spec/support/shared_examples/examples/memory.rb +30 -17
  45. data/spec/support/shared_examples/examples/readme_examples.rb +5 -0
  46. data/spec/support/shared_examples/examples/thread_safety.rb +2 -0
  47. data/spec/support/shared_examples/examples/xpath.rb +34 -3
  48. data/spec/support/shared_examples/integration.rb +6 -2
  49. data/spec/support/shared_examples/namespace.rb +16 -0
  50. data/spec/support/shared_examples/node.rb +4 -0
  51. data/spec/support/shared_examples/node_set.rb +20 -0
  52. data/spec/support/shared_examples/processing_instruction.rb +1 -1
  53. data/spec/support/shared_examples/text.rb +2 -1
  54. data/spec/support/shared_examples/xml_adapter.rb +169 -7
  55. metadata +13 -3
@@ -0,0 +1,195 @@
1
+ require "rexml/formatters/pretty"
2
+
3
+ module Moxml
4
+ module Adapter
5
+ module CustomizedRexml
6
+ # Custom REXML formatter that fixes indentation and wrapping issues
7
+ class Formatter < ::REXML::Formatters::Pretty
8
+ def initialize(indentation: 2, self_close_empty: false)
9
+ @indentation = " " * indentation
10
+ @level = 0
11
+ @compact = true
12
+ @width = -1 # Disable line wrapping
13
+ @self_close_empty = self_close_empty
14
+ end
15
+
16
+ def write(node, output)
17
+ case node
18
+ when ::REXML::XMLDecl
19
+ write_declaration(node, output)
20
+ else
21
+ super
22
+ end
23
+ end
24
+
25
+ def write_element(node, output)
26
+ # output << ' ' * @level
27
+ output << "<#{node.expanded_name}"
28
+ write_attributes(node, output)
29
+
30
+ if node.children.empty? && @self_close_empty
31
+ output << "/>"
32
+ return
33
+ end
34
+
35
+ output << ">"
36
+
37
+ # Check for mixed content
38
+ has_text = node.children.any? { |c| c.is_a?(::REXML::Text) && !c.to_s.strip.empty? }
39
+ has_elements = node.children.any? { |c| c.is_a?(::REXML::Element) }
40
+ mixed = has_text && has_elements
41
+
42
+ # Handle children based on content type
43
+ unless node.children.empty?
44
+ @level += @indentation.length unless mixed
45
+
46
+ node.children.each_with_index do |child, _index|
47
+ # Skip insignificant whitespace
48
+ next if child.is_a?(::REXML::Text) &&
49
+ child.to_s.strip.empty? &&
50
+ !(child.next_sibling.nil? && child.previous_sibling.nil?)
51
+
52
+ # Indent non-text nodes in non-mixed content
53
+ # if !mixed && !child.is_a?(::REXML::Text)
54
+ # output << ' ' * @level
55
+ # end
56
+
57
+ write(child, output)
58
+
59
+ # Add newlines between elements in non-mixed content
60
+ # if !mixed && !child.is_a?(::REXML::Text) && index < node.children.size - 1
61
+ # output << "\n"
62
+ # end
63
+ end
64
+
65
+ # Reset indentation for closing tag in non-mixed content
66
+ unless mixed
67
+ @level -= @indentation.length
68
+ # output << ' ' * @level
69
+ end
70
+ end
71
+
72
+ output << "</#{node.expanded_name}>"
73
+ # output << "\n" unless mixed
74
+ end
75
+
76
+ def write_text(node, output)
77
+ text = node.value
78
+ return if text.empty?
79
+
80
+ output << escape_text(text)
81
+ end
82
+
83
+ def escape_text(text)
84
+ text.to_s.gsub(/[<>&]/) do |match|
85
+ case match
86
+ when "<" then "&lt;"
87
+ when ">" then "&gt;"
88
+ when "&" then "&amp;"
89
+ end
90
+ end
91
+ end
92
+
93
+ private
94
+
95
+ def find_significant_sibling(node, direction)
96
+ method = direction == :next ? :next_sibling : :previous_sibling
97
+ sibling = node.send(method)
98
+ sibling = sibling.send(method) while sibling && sibling.is_a?(::REXML::Text) && sibling.to_s.strip.empty?
99
+ sibling
100
+ end
101
+
102
+ def write_cdata(node, output)
103
+ # output << ' ' * @level
104
+ output << ::REXML::CData::START
105
+ output << node.to_s.gsub(::REXML::CData::STOP, "]]]]><![CDATA[>")
106
+ output << ::REXML::CData::STOP
107
+ # output << "\n"
108
+ end
109
+
110
+ def write_comment(node, output)
111
+ # output << ' ' * @level
112
+ output << "<!--"
113
+ output << node.to_s
114
+ output << "-->"
115
+ # output << "\n"
116
+ end
117
+
118
+ def write_instruction(node, output)
119
+ # output << ' ' * @level
120
+ output << "<?"
121
+ output << node.target
122
+ output << " "
123
+ output << node.content if node.content
124
+ output << "?>"
125
+ # output << "\n"
126
+ end
127
+
128
+ def write_document(node, output)
129
+ node.children.each do |child|
130
+ write(child, output)
131
+ # output << "\n" unless child == node.children.last
132
+ end
133
+ end
134
+
135
+ def write_doctype(node, output)
136
+ output << "<!DOCTYPE "
137
+ output << node.name
138
+ output << " "
139
+ output << node.external_id if node.external_id
140
+ output << ">"
141
+ # output << "\n"
142
+ end
143
+
144
+ def write_declaration(node, output)
145
+ output << "<?xml"
146
+ output << %( version="#{node.version}") if node.version
147
+ output << %( encoding="#{node.encoding.to_s.upcase}") if node.writeencoding
148
+ output << %( standalone="#{node.standalone}") if node.standalone
149
+ output << "?>"
150
+ # output << "\n"
151
+ end
152
+
153
+ def write_attributes(node, output)
154
+ # First write namespace declarations
155
+ node.attributes.each do |name, attr|
156
+ next unless name.to_s.start_with?("xmlns:") || name.to_s == "xmlns"
157
+
158
+ name = "xmlns" if name.to_s == "xmlns:" # convert the default namespace
159
+ value = attr.respond_to?(:value) ? attr.value : attr
160
+ output << " #{name}=\"#{value}\""
161
+ end
162
+
163
+ # Then write regular attributes
164
+ node.attributes.each do |name, attr|
165
+ next if name.to_s.start_with?("xmlns:") || name.to_s == "xmlns"
166
+
167
+ output << " "
168
+ output << if attr.respond_to?(:prefix) && attr.prefix
169
+ "#{attr.prefix}:#{attr.name}"
170
+ else
171
+ name.to_s
172
+ end
173
+
174
+ output << "=\""
175
+ value = attr.respond_to?(:value) ? attr.value : attr
176
+ output << escape_attribute_value(value.to_s)
177
+ output << "\""
178
+ end
179
+ end
180
+
181
+ def escape_attribute_value(value)
182
+ value.to_s.gsub(/[<>&"]/) do |match|
183
+ case match
184
+ when "<" then "&lt;"
185
+ when ">" then "&gt;"
186
+ when "&" then "&amp;"
187
+ when '"' then "&quot;"
188
+ # when "'" then '&apos;'
189
+ end
190
+ end
191
+ end
192
+ end
193
+ end
194
+ end
195
+ end
@@ -31,7 +31,7 @@ module Moxml
31
31
  DocumentBuilder.new(Context.new(:nokogiri)).build(native_doc)
32
32
  end
33
33
 
34
- def create_document
34
+ def create_document(_native_doc = nil)
35
35
  ::Nokogiri::XML::Document.new
36
36
  end
37
37
 
@@ -120,6 +120,8 @@ module Moxml
120
120
  when ::Nokogiri::XML::CDATA then :cdata
121
121
  when ::Nokogiri::XML::Text then :text
122
122
  when ::Nokogiri::XML::Comment then :comment
123
+ when ::Nokogiri::XML::Attr then :attribute
124
+ when ::Nokogiri::XML::Namespace then :namespace
123
125
  when ::Nokogiri::XML::ProcessingInstruction then :processing_instruction
124
126
  when ::Nokogiri::XML::Document, ::Nokogiri::XML::DocumentFragment then :document
125
127
  when ::Nokogiri::XML::DTD then :doctype
@@ -221,7 +223,7 @@ module Moxml
221
223
  end
222
224
 
223
225
  def text_content(node)
224
- node.content
226
+ node.text
225
227
  end
226
228
 
227
229
  def inner_text(node)
@@ -24,7 +24,7 @@ module Moxml
24
24
  DocumentBuilder.new(Context.new(:oga)).build(native_doc)
25
25
  end
26
26
 
27
- def create_document
27
+ def create_document(_native_doc = nil)
28
28
  ::Oga::XML::Document.new
29
29
  end
30
30
 
@@ -111,6 +111,8 @@ module Moxml
111
111
  when ::Oga::XML::Text then :text
112
112
  when ::Oga::XML::Cdata then :cdata
113
113
  when ::Oga::XML::Comment then :comment
114
+ when ::Oga::XML::Attribute then :attribute
115
+ when ::Oga::XML::Namespace then :namespace
114
116
  when ::Oga::XML::ProcessingInstruction then :processing_instruction
115
117
  when ::Oga::XML::Document then :document
116
118
  when ::Oga::XML::Doctype then :doctype
@@ -213,11 +215,25 @@ module Moxml
213
215
  end
214
216
 
215
217
  def add_previous_sibling(node, sibling)
216
- node.before(sibling)
218
+ if node.parent == sibling.parent
219
+ # Oga doesn't manipulate children of the same parent
220
+ dup_sibling = node.node_set.delete(sibling)
221
+ index = node.node_set.index(node)
222
+ node.node_set.insert(index, dup_sibling)
223
+ else
224
+ node.before(sibling)
225
+ end
217
226
  end
218
227
 
219
228
  def add_next_sibling(node, sibling)
220
- node.after(sibling)
229
+ if node.parent == sibling.parent
230
+ # Oga doesn't manipulate children of the same parent
231
+ dup_sibling = node.node_set.delete(sibling)
232
+ index = node.node_set.index(node) + 1
233
+ node.node_set.insert(index, dup_sibling)
234
+ else
235
+ node.after(sibling)
236
+ end
221
237
  end
222
238
 
223
239
  def remove(node)
@@ -229,7 +245,7 @@ module Moxml
229
245
  end
230
246
 
231
247
  def replace_children(node, new_children)
232
- node.inner_text = ""
248
+ node.children = []
233
249
  new_children.each { |child| add_child(node, child) }
234
250
  end
235
251
 
@@ -247,7 +263,7 @@ module Moxml
247
263
  end
248
264
 
249
265
  def set_text_content(node, content)
250
- if node.respond_to?(:inner_text)
266
+ if node.respond_to?(:inner_text=)
251
267
  node.inner_text = content
252
268
  else
253
269
  # Oga::XML::Text node for example
@@ -296,14 +312,14 @@ module Moxml
296
312
  node.namespaces.values
297
313
  end
298
314
 
299
- def xpath(node, expression, _namespaces = {})
300
- node.xpath(expression).to_a
315
+ def xpath(node, expression, namespaces = nil)
316
+ node.xpath(expression, {}, namespaces: namespaces&.transform_keys(&:to_s)).to_a
301
317
  rescue ::LL::ParserError => e
302
318
  raise Moxml::XPathError, e.message
303
319
  end
304
320
 
305
- def at_xpath(node, expression, _namespaces = {})
306
- node.at_xpath(expression)
321
+ def at_xpath(node, expression, namespaces = nil)
322
+ node.at_xpath(expression, namespaces: namespaces)
307
323
  rescue ::Oga::XPath::Error => e
308
324
  raise Moxml::XPathError, e.message
309
325
  end