moxml 0.1.0 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rake.yml +15 -0
  3. data/.github/workflows/release.yml +23 -0
  4. data/.gitignore +3 -0
  5. data/.rubocop.yml +2 -0
  6. data/.rubocop_todo.yml +65 -0
  7. data/.ruby-version +1 -0
  8. data/Gemfile +10 -3
  9. data/README.adoc +401 -594
  10. data/lib/moxml/adapter/base.rb +102 -0
  11. data/lib/moxml/adapter/customized_oga/xml_declaration.rb +18 -0
  12. data/lib/moxml/adapter/customized_oga/xml_generator.rb +104 -0
  13. data/lib/moxml/adapter/nokogiri.rb +319 -0
  14. data/lib/moxml/adapter/oga.rb +318 -0
  15. data/lib/moxml/adapter/ox.rb +325 -0
  16. data/lib/moxml/adapter.rb +26 -170
  17. data/lib/moxml/attribute.rb +47 -14
  18. data/lib/moxml/builder.rb +64 -0
  19. data/lib/moxml/cdata.rb +4 -26
  20. data/lib/moxml/comment.rb +6 -22
  21. data/lib/moxml/config.rb +39 -15
  22. data/lib/moxml/context.rb +29 -0
  23. data/lib/moxml/declaration.rb +16 -26
  24. data/lib/moxml/doctype.rb +9 -0
  25. data/lib/moxml/document.rb +51 -63
  26. data/lib/moxml/document_builder.rb +87 -0
  27. data/lib/moxml/element.rb +63 -97
  28. data/lib/moxml/error.rb +20 -0
  29. data/lib/moxml/namespace.rb +12 -37
  30. data/lib/moxml/node.rb +78 -58
  31. data/lib/moxml/node_set.rb +19 -222
  32. data/lib/moxml/processing_instruction.rb +6 -25
  33. data/lib/moxml/text.rb +4 -26
  34. data/lib/moxml/version.rb +1 -1
  35. data/lib/moxml/xml_utils/encoder.rb +55 -0
  36. data/lib/moxml/xml_utils.rb +80 -0
  37. data/lib/moxml.rb +33 -33
  38. data/moxml.gemspec +1 -1
  39. data/spec/moxml/adapter/nokogiri_spec.rb +14 -0
  40. data/spec/moxml/adapter/oga_spec.rb +14 -0
  41. data/spec/moxml/adapter/ox_spec.rb +49 -0
  42. data/spec/moxml/all_with_adapters_spec.rb +46 -0
  43. data/spec/moxml/config_spec.rb +55 -0
  44. data/spec/moxml/error_spec.rb +71 -0
  45. data/spec/moxml/examples/adapter_spec.rb +27 -0
  46. data/spec/moxml_spec.rb +50 -0
  47. data/spec/spec_helper.rb +32 -0
  48. data/spec/support/shared_examples/attribute.rb +165 -0
  49. data/spec/support/shared_examples/builder.rb +25 -0
  50. data/spec/support/shared_examples/cdata.rb +70 -0
  51. data/spec/support/shared_examples/comment.rb +65 -0
  52. data/spec/support/shared_examples/context.rb +35 -0
  53. data/spec/support/shared_examples/declaration.rb +93 -0
  54. data/spec/support/shared_examples/doctype.rb +25 -0
  55. data/spec/support/shared_examples/document.rb +110 -0
  56. data/spec/support/shared_examples/document_builder.rb +43 -0
  57. data/spec/support/shared_examples/edge_cases.rb +185 -0
  58. data/spec/support/shared_examples/element.rb +130 -0
  59. data/spec/support/shared_examples/examples/attribute.rb +42 -0
  60. data/spec/support/shared_examples/examples/basic_usage.rb +67 -0
  61. data/spec/support/shared_examples/examples/memory.rb +54 -0
  62. data/spec/support/shared_examples/examples/namespace.rb +65 -0
  63. data/spec/support/shared_examples/examples/readme_examples.rb +100 -0
  64. data/spec/support/shared_examples/examples/thread_safety.rb +43 -0
  65. data/spec/support/shared_examples/examples/xpath.rb +39 -0
  66. data/spec/support/shared_examples/integration.rb +135 -0
  67. data/spec/support/shared_examples/namespace.rb +96 -0
  68. data/spec/support/shared_examples/node.rb +110 -0
  69. data/spec/support/shared_examples/node_set.rb +90 -0
  70. data/spec/support/shared_examples/processing_instruction.rb +88 -0
  71. data/spec/support/shared_examples/text.rb +66 -0
  72. data/spec/support/shared_examples/xml_adapter.rb +191 -0
  73. data/spec/support/xml_matchers.rb +27 -0
  74. metadata +55 -6
  75. data/.github/workflows/main.yml +0 -27
  76. data/lib/moxml/error_handler.rb +0 -77
  77. data/lib/moxml/errors.rb +0 -169
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../xml_utils"
4
+ require_relative "../document_builder"
5
+
6
+ module Moxml
7
+ module Adapter
8
+ class Base
9
+ # include XmlUtils
10
+
11
+ class << self
12
+ include XmlUtils
13
+
14
+ def set_root(doc, element)
15
+ raise NotImplementedError
16
+ end
17
+
18
+ def parse(xml, options = {})
19
+ raise NotImplementedError
20
+ end
21
+
22
+ def create_document
23
+ raise NotImplementedError
24
+ end
25
+
26
+ def create_element(name)
27
+ validate_element_name(name)
28
+ create_native_element(name)
29
+ end
30
+
31
+ def create_text(content)
32
+ create_native_text(normalize_xml_value(content))
33
+ end
34
+
35
+ def create_cdata(content)
36
+ create_native_cdata(normalize_xml_value(content))
37
+ end
38
+
39
+ def create_comment(content)
40
+ validate_comment_content(content)
41
+ create_native_comment(normalize_xml_value(content))
42
+ end
43
+
44
+ def create_doctype(name, external_id, system_id)
45
+ create_native_doctype(name, external_id, system_id)
46
+ end
47
+
48
+ def create_processing_instruction(target, content)
49
+ validate_pi_target(target)
50
+ create_native_processing_instruction(target, normalize_xml_value(content))
51
+ end
52
+
53
+ def create_declaration(version = "1.0", encoding = "UTF-8", standalone = nil)
54
+ validate_declaration_version(version)
55
+ validate_declaration_encoding(encoding)
56
+ validate_declaration_standalone(standalone)
57
+ create_native_declaration(version, encoding, standalone)
58
+ end
59
+
60
+ def create_namespace(element, prefix, uri)
61
+ validate_prefix(prefix) if prefix
62
+ validate_uri(uri)
63
+ create_native_namespace(element, prefix, uri)
64
+ end
65
+
66
+ protected
67
+
68
+ def create_native_element(name)
69
+ raise NotImplementedError
70
+ end
71
+
72
+ def create_native_text(content)
73
+ raise NotImplementedError
74
+ end
75
+
76
+ def create_native_cdata(content)
77
+ raise NotImplementedError
78
+ end
79
+
80
+ def create_native_comment(content)
81
+ raise NotImplementedError
82
+ end
83
+
84
+ def create_native_doctype(name, external_id, system_id)
85
+ raise NotImplementedError
86
+ end
87
+
88
+ def create_native_processing_instruction(target, content)
89
+ raise NotImplementedError
90
+ end
91
+
92
+ def create_native_declaration(version, encoding, standalone)
93
+ raise NotImplementedError
94
+ end
95
+
96
+ def create_native_namespace(element, prefix, uri)
97
+ raise NotImplementedError
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "oga"
4
+
5
+ module Moxml
6
+ module Adapter
7
+ module CustomizedOga
8
+ class XmlDeclaration < ::Oga::XML::XmlDeclaration
9
+ def initialize(options = {})
10
+ @version = options[:version] || "1.0"
11
+ # encoding is optional, but Oga sets it to UTF-8 by default
12
+ @encoding = options[:encoding]
13
+ @standalone = options[:standalone]
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,104 @@
1
+ # rubocop:disable Style/FrozenStringLiteralComment
2
+
3
+ require "oga"
4
+
5
+ # monkey patch the Oga generator because it's not configurable
6
+ # https://github.com/yorickpeterse/oga/blob/main/lib/oga/xml/generator.rb
7
+ module Moxml
8
+ module Adapter
9
+ module CustomizedOga
10
+ class XmlGenerator < ::Oga::XML::Generator
11
+ def self_closing?(_element)
12
+ # Always expand tags
13
+ false
14
+ end
15
+
16
+ def on_element(element, output)
17
+ name = element.expanded_name
18
+
19
+ attrs = ""
20
+ element.attributes.each do |attr|
21
+ attrs << " "
22
+ on_attribute(attr, attrs)
23
+ end
24
+
25
+ closing_tag = if self_closing?(element)
26
+ html_void_element?(element) ? ">" : " />"
27
+ else
28
+ ">"
29
+ end
30
+
31
+ output << "<#{name}#{attrs}#{closing_tag}"
32
+ end
33
+
34
+ def on_namespace_definition(ns, output)
35
+ name = "xmlns"
36
+ name += ":#{ns.name}" unless ns.name.nil?
37
+
38
+ output << %(#{name}="#{ns.uri}")
39
+ end
40
+
41
+ def on_attribute(attr, output)
42
+ return super unless attr.value&.include?("'")
43
+
44
+ output << %(#{attr.expanded_name}="#{encode(attr.value)}")
45
+ end
46
+
47
+ def on_cdata(node, output)
48
+ # Escape the end sequence
49
+ return super unless node.text.include?("]]>")
50
+
51
+ chunks = node.text.split(/(\]\]>)/)
52
+ chunks = ["]]", ">"] if chunks.size == 1
53
+
54
+ while (index = chunks.index("]]>"))
55
+ # the end tag cannot be the first and the last at the same time
56
+
57
+ if index.zero?
58
+ # it's the first text chunk
59
+ chunks[index] = "]]"
60
+ chunks[index + 1] = ">#{chunks[index + 1]}"
61
+ elsif index - 1 == chunks.size
62
+ # it's the last text chunk
63
+ chunks[index - 1] += "]]"
64
+ chunks[index] = ">"
65
+ else
66
+ # it's a chunk in the middle
67
+ chunks[index - 1] += "]]"
68
+ chunks[index + 1] = ">#{chunks[index + 1]}"
69
+ chunks.delete_at(index)
70
+ end
71
+ end
72
+
73
+ chunks.each do |chunk|
74
+ output << "<![CDATA[#{chunk}]]>"
75
+ end
76
+
77
+ output
78
+ end
79
+
80
+ def on_processing_instruction(node, output)
81
+ # put the space between the name and text
82
+ output << "<?#{node.name} #{node.text}?>"
83
+ end
84
+
85
+ def on_xml_declaration(node, output)
86
+ super
87
+ # remove the space before the closing tag
88
+ output.gsub!(/ \?>$/, "?>")
89
+ end
90
+
91
+ protected
92
+
93
+ def encode(input)
94
+ # similar to ::Oga::XML::Entities.encode_attribute
95
+ input&.gsub(
96
+ ::Oga::XML::Entities::ENCODE_ATTRIBUTE_REGEXP,
97
+ # Keep apostrophes in attributes
98
+ ::Oga::XML::Entities::ENCODE_ATTRIBUTE_MAPPING.merge("'" => "'")
99
+ )
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,319 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base"
4
+ require "nokogiri"
5
+
6
+ module Moxml
7
+ module Adapter
8
+ class Nokogiri < Base
9
+ class << self
10
+ def set_root(doc, element)
11
+ doc.root = element
12
+ end
13
+
14
+ def parse(xml, options = {})
15
+ native_doc = begin
16
+ if options[:fragment]
17
+ ::Nokogiri::XML::DocumentFragment.parse(xml) do |config|
18
+ config.strict.nonet
19
+ config.recover unless options[:strict]
20
+ end
21
+ else
22
+ ::Nokogiri::XML(xml, nil, options[:encoding]) do |config|
23
+ config.strict.nonet
24
+ config.recover unless options[:strict]
25
+ end
26
+ end
27
+ rescue ::Nokogiri::XML::SyntaxError => e
28
+ raise Moxml::ParseError.new(e.message, line: e.line, column: e.column)
29
+ end
30
+
31
+ DocumentBuilder.new(Context.new(:nokogiri)).build(native_doc)
32
+ end
33
+
34
+ def create_document
35
+ ::Nokogiri::XML::Document.new
36
+ end
37
+
38
+ def create_fragment
39
+ # document fragments are weird and should be used with caution:
40
+ # https://github.com/sparklemotion/nokogiri/issues/572
41
+ ::Nokogiri::XML::DocumentFragment.new(
42
+ ::Nokogiri::XML::Document.new
43
+ )
44
+ end
45
+
46
+ def create_native_element(name)
47
+ ::Nokogiri::XML::Element.new(name, create_document)
48
+ end
49
+
50
+ def create_native_text(content)
51
+ ::Nokogiri::XML::Text.new(content, create_document)
52
+ end
53
+
54
+ def create_native_cdata(content)
55
+ ::Nokogiri::XML::CDATA.new(create_document, content)
56
+ end
57
+
58
+ def create_native_comment(content)
59
+ ::Nokogiri::XML::Comment.new(create_document, content)
60
+ end
61
+
62
+ def create_native_doctype(name, external_id, system_id)
63
+ create_document.create_internal_subset(
64
+ name, external_id, system_id
65
+ )
66
+ end
67
+
68
+ def create_native_processing_instruction(target, content)
69
+ ::Nokogiri::XML::ProcessingInstruction.new(
70
+ ::Nokogiri::XML::Document.new, target, content
71
+ )
72
+ end
73
+
74
+ def create_native_declaration(version, encoding, standalone)
75
+ ::Nokogiri::XML::ProcessingInstruction.new(
76
+ create_document,
77
+ "xml",
78
+ build_declaration_attrs(version, encoding, standalone)
79
+ )
80
+ end
81
+
82
+ def declaration_attribute(declaration, attr_name)
83
+ return nil unless declaration.content
84
+
85
+ match = declaration.content.match(/#{attr_name}="([^"]*)"/)
86
+ match && match[1]
87
+ end
88
+
89
+ def set_declaration_attribute(declaration, attr_name, value)
90
+ attrs = current_declaration_attributes(declaration)
91
+ if value.nil?
92
+ attrs.delete(attr_name)
93
+ else
94
+ attrs[attr_name] = value
95
+ end
96
+
97
+ declaration.native_content =
98
+ attrs.map { |k, v| %(#{k}="#{v}") }.join(" ")
99
+ end
100
+
101
+ def set_namespace(element, ns)
102
+ element.namespace = ns
103
+ end
104
+
105
+ def namespace(element)
106
+ element.namespace
107
+ end
108
+
109
+ def processing_instruction_target(node)
110
+ node.name
111
+ end
112
+
113
+ def create_native_namespace(element, prefix, uri)
114
+ element.add_namespace_definition(prefix, uri)
115
+ end
116
+
117
+ def node_type(node)
118
+ case node
119
+ when ::Nokogiri::XML::Element then :element
120
+ when ::Nokogiri::XML::CDATA then :cdata
121
+ when ::Nokogiri::XML::Text then :text
122
+ when ::Nokogiri::XML::Comment then :comment
123
+ when ::Nokogiri::XML::ProcessingInstruction then :processing_instruction
124
+ when ::Nokogiri::XML::Document, ::Nokogiri::XML::DocumentFragment then :document
125
+ when ::Nokogiri::XML::DTD then :doctype
126
+ else :unknown
127
+ end
128
+ end
129
+
130
+ def node_name(node)
131
+ node.name
132
+ end
133
+
134
+ def set_node_name(node, name)
135
+ node.name = name
136
+ end
137
+
138
+ def children(node)
139
+ node.children.reject do |child|
140
+ child.text? && child.content.strip.empty? &&
141
+ !(child.previous_sibling.nil? && child.next_sibling.nil?)
142
+ end
143
+ end
144
+
145
+ def replace_children(node, new_children)
146
+ node.children.unlink
147
+ new_children.each { |child| add_child(node, child) }
148
+ end
149
+
150
+ def parent(node)
151
+ node.parent
152
+ end
153
+
154
+ def next_sibling(node)
155
+ node.next_sibling
156
+ end
157
+
158
+ def previous_sibling(node)
159
+ node.previous_sibling
160
+ end
161
+
162
+ def document(node)
163
+ node.document
164
+ end
165
+
166
+ def root(document)
167
+ document.respond_to?(:root) ? document.root : document.children.first
168
+ end
169
+
170
+ def attribute_element(attr)
171
+ attr.parent
172
+ end
173
+
174
+ def attributes(element)
175
+ element.attributes.values
176
+ end
177
+
178
+ def set_attribute(element, name, value)
179
+ element[name.to_s] = value.to_s
180
+ end
181
+
182
+ def get_attribute(element, name)
183
+ # attributes keys don't include attribute namespaces
184
+ element.attributes[name.to_s]
185
+ end
186
+
187
+ def get_attribute_value(element, name)
188
+ # get the attribute value by its name including a namespace
189
+ element[name.to_s]
190
+ end
191
+
192
+ def remove_attribute(element, name)
193
+ element.remove_attribute(name.to_s)
194
+ end
195
+
196
+ def add_child(element, child)
197
+ if node_type(child) == :doctype
198
+ # avoid exceptions: cannot reparent Nokogiri::XML::DTD there
199
+ element.create_internal_subset(
200
+ child.name, child.external_id, child.system_id
201
+ )
202
+ else
203
+ element.add_child(child)
204
+ end
205
+ end
206
+
207
+ def add_previous_sibling(node, sibling)
208
+ node.add_previous_sibling(sibling)
209
+ end
210
+
211
+ def add_next_sibling(node, sibling)
212
+ node.add_next_sibling(sibling)
213
+ end
214
+
215
+ def remove(node)
216
+ node.remove
217
+ end
218
+
219
+ def replace(node, new_node)
220
+ node.replace(new_node)
221
+ end
222
+
223
+ def text_content(node)
224
+ node.content
225
+ end
226
+
227
+ def inner_text(node)
228
+ text_children = node.children - node.element_children
229
+ text_children.map(&:content).join
230
+ end
231
+
232
+ def set_text_content(node, content)
233
+ node.native_content = content
234
+ end
235
+
236
+ def cdata_content(node)
237
+ node.content
238
+ end
239
+
240
+ def set_cdata_content(node, content)
241
+ node.content = content
242
+ end
243
+
244
+ def comment_content(node)
245
+ node.content
246
+ end
247
+
248
+ def set_comment_content(node, content)
249
+ node.native_content = content
250
+ end
251
+
252
+ def processing_instruction_content(node)
253
+ node.content
254
+ end
255
+
256
+ def set_processing_instruction_content(node, content)
257
+ node.native_content = content
258
+ end
259
+
260
+ def namespace_prefix(namespace)
261
+ namespace.prefix
262
+ end
263
+
264
+ def namespace_uri(namespace)
265
+ namespace.href
266
+ end
267
+
268
+ def namespace_definitions(node)
269
+ node.namespace_definitions
270
+ end
271
+
272
+ def xpath(node, expression, namespaces = nil)
273
+ node.xpath(expression, namespaces).to_a
274
+ rescue ::Nokogiri::XML::XPath::SyntaxError => e
275
+ raise Moxml::XPathError, e.message
276
+ end
277
+
278
+ def at_xpath(node, expression, namespaces = nil)
279
+ node.at_xpath(expression, namespaces)
280
+ rescue ::Nokogiri::XML::XPath::SyntaxError => e
281
+ raise Moxml::XPathError, e.message
282
+ end
283
+
284
+ def serialize(node, options = {})
285
+ save_options = ::Nokogiri::XML::Node::SaveOptions::AS_XML
286
+
287
+ # Don't force expand empty elements if they're really empty
288
+ save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS if options[:expand_empty]
289
+ save_options |= ::Nokogiri::XML::Node::SaveOptions::FORMAT if options[:indent].to_i.positive?
290
+ save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if options[:no_declaration]
291
+
292
+ node.to_xml(
293
+ indent: options[:indent],
294
+ encoding: options[:encoding],
295
+ save_with: save_options
296
+ )
297
+ end
298
+
299
+ private
300
+
301
+ def build_declaration_attrs(version, encoding, standalone)
302
+ attrs = { "version" => version }
303
+ attrs["encoding"] = encoding if encoding
304
+ attrs["standalone"] = standalone if standalone
305
+ attrs.map { |k, v| %(#{k}="#{v}") }.join(" ")
306
+ end
307
+
308
+ def current_declaration_attributes(declaration)
309
+ ::Moxml::Declaration::ALLOWED_ATTRIBUTES.inject({}) do |hsh, attr_name|
310
+ value = declaration_attribute(declaration, attr_name)
311
+ next hsh if value.nil?
312
+
313
+ hsh.merge(attr_name => value)
314
+ end
315
+ end
316
+ end
317
+ end
318
+ end
319
+ end