moxml 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rake.yml +15 -0
  3. data/.github/workflows/release.yml +23 -0
  4. data/.gitignore +3 -0
  5. data/.rubocop.yml +2 -0
  6. data/.rubocop_todo.yml +65 -0
  7. data/.ruby-version +1 -0
  8. data/Gemfile +10 -3
  9. data/README.adoc +400 -594
  10. data/lib/moxml/adapter/base.rb +102 -0
  11. data/lib/moxml/adapter/customized_oga/xml_declaration.rb +18 -0
  12. data/lib/moxml/adapter/customized_oga/xml_generator.rb +104 -0
  13. data/lib/moxml/adapter/nokogiri.rb +314 -0
  14. data/lib/moxml/adapter/oga.rb +309 -0
  15. data/lib/moxml/adapter/ox.rb +325 -0
  16. data/lib/moxml/adapter.rb +26 -170
  17. data/lib/moxml/attribute.rb +47 -14
  18. data/lib/moxml/builder.rb +64 -0
  19. data/lib/moxml/cdata.rb +4 -26
  20. data/lib/moxml/comment.rb +6 -22
  21. data/lib/moxml/config.rb +39 -15
  22. data/lib/moxml/context.rb +29 -0
  23. data/lib/moxml/declaration.rb +16 -26
  24. data/lib/moxml/doctype.rb +9 -0
  25. data/lib/moxml/document.rb +51 -63
  26. data/lib/moxml/document_builder.rb +87 -0
  27. data/lib/moxml/element.rb +61 -99
  28. data/lib/moxml/error.rb +20 -0
  29. data/lib/moxml/namespace.rb +12 -37
  30. data/lib/moxml/node.rb +78 -58
  31. data/lib/moxml/node_set.rb +19 -222
  32. data/lib/moxml/processing_instruction.rb +6 -25
  33. data/lib/moxml/text.rb +4 -26
  34. data/lib/moxml/version.rb +1 -1
  35. data/lib/moxml/xml_utils/encoder.rb +55 -0
  36. data/lib/moxml/xml_utils.rb +80 -0
  37. data/lib/moxml.rb +33 -33
  38. data/moxml.gemspec +1 -1
  39. data/spec/moxml/adapter/nokogiri_spec.rb +14 -0
  40. data/spec/moxml/adapter/oga_spec.rb +14 -0
  41. data/spec/moxml/adapter/ox_spec.rb +49 -0
  42. data/spec/moxml/all_with_adapters_spec.rb +46 -0
  43. data/spec/moxml/config_spec.rb +55 -0
  44. data/spec/moxml/error_spec.rb +71 -0
  45. data/spec/moxml/examples/adapter_spec.rb +27 -0
  46. data/spec/moxml_spec.rb +50 -0
  47. data/spec/spec_helper.rb +32 -0
  48. data/spec/support/shared_examples/attribute.rb +165 -0
  49. data/spec/support/shared_examples/builder.rb +25 -0
  50. data/spec/support/shared_examples/cdata.rb +70 -0
  51. data/spec/support/shared_examples/comment.rb +65 -0
  52. data/spec/support/shared_examples/context.rb +35 -0
  53. data/spec/support/shared_examples/declaration.rb +93 -0
  54. data/spec/support/shared_examples/doctype.rb +25 -0
  55. data/spec/support/shared_examples/document.rb +110 -0
  56. data/spec/support/shared_examples/document_builder.rb +43 -0
  57. data/spec/support/shared_examples/edge_cases.rb +185 -0
  58. data/spec/support/shared_examples/element.rb +110 -0
  59. data/spec/support/shared_examples/examples/attribute.rb +42 -0
  60. data/spec/support/shared_examples/examples/basic_usage.rb +67 -0
  61. data/spec/support/shared_examples/examples/memory.rb +54 -0
  62. data/spec/support/shared_examples/examples/namespace.rb +65 -0
  63. data/spec/support/shared_examples/examples/readme_examples.rb +100 -0
  64. data/spec/support/shared_examples/examples/thread_safety.rb +43 -0
  65. data/spec/support/shared_examples/examples/xpath.rb +39 -0
  66. data/spec/support/shared_examples/integration.rb +135 -0
  67. data/spec/support/shared_examples/namespace.rb +96 -0
  68. data/spec/support/shared_examples/node.rb +110 -0
  69. data/spec/support/shared_examples/node_set.rb +90 -0
  70. data/spec/support/shared_examples/processing_instruction.rb +88 -0
  71. data/spec/support/shared_examples/text.rb +66 -0
  72. data/spec/support/shared_examples/xml_adapter.rb +191 -0
  73. data/spec/support/xml_matchers.rb +27 -0
  74. metadata +55 -6
  75. data/.github/workflows/main.yml +0 -27
  76. data/lib/moxml/error_handler.rb +0 -77
  77. data/lib/moxml/errors.rb +0 -169
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../xml_utils"
4
+ require_relative "../document_builder"
5
+
6
+ module Moxml
7
+ module Adapter
8
+ class Base
9
+ # include XmlUtils
10
+
11
+ class << self
12
+ include XmlUtils
13
+
14
+ def set_root(doc, element)
15
+ raise NotImplementedError
16
+ end
17
+
18
+ def parse(xml, options = {})
19
+ raise NotImplementedError
20
+ end
21
+
22
+ def create_document
23
+ raise NotImplementedError
24
+ end
25
+
26
+ def create_element(name)
27
+ validate_element_name(name)
28
+ create_native_element(name)
29
+ end
30
+
31
+ def create_text(content)
32
+ create_native_text(normalize_xml_value(content))
33
+ end
34
+
35
+ def create_cdata(content)
36
+ create_native_cdata(normalize_xml_value(content))
37
+ end
38
+
39
+ def create_comment(content)
40
+ validate_comment_content(content)
41
+ create_native_comment(normalize_xml_value(content))
42
+ end
43
+
44
+ def create_doctype(name, external_id, system_id)
45
+ create_native_doctype(name, external_id, system_id)
46
+ end
47
+
48
+ def create_processing_instruction(target, content)
49
+ validate_pi_target(target)
50
+ create_native_processing_instruction(target, normalize_xml_value(content))
51
+ end
52
+
53
+ def create_declaration(version = "1.0", encoding = "UTF-8", standalone = nil)
54
+ validate_declaration_version(version)
55
+ validate_declaration_encoding(encoding)
56
+ validate_declaration_standalone(standalone)
57
+ create_native_declaration(version, encoding, standalone)
58
+ end
59
+
60
+ def create_namespace(element, prefix, uri)
61
+ validate_prefix(prefix) if prefix
62
+ validate_uri(uri)
63
+ create_native_namespace(element, prefix, uri)
64
+ end
65
+
66
+ protected
67
+
68
+ def create_native_element(name)
69
+ raise NotImplementedError
70
+ end
71
+
72
+ def create_native_text(content)
73
+ raise NotImplementedError
74
+ end
75
+
76
+ def create_native_cdata(content)
77
+ raise NotImplementedError
78
+ end
79
+
80
+ def create_native_comment(content)
81
+ raise NotImplementedError
82
+ end
83
+
84
+ def create_native_doctype(name, external_id, system_id)
85
+ raise NotImplementedError
86
+ end
87
+
88
+ def create_native_processing_instruction(target, content)
89
+ raise NotImplementedError
90
+ end
91
+
92
+ def create_native_declaration(version, encoding, standalone)
93
+ raise NotImplementedError
94
+ end
95
+
96
+ def create_native_namespace(element, prefix, uri)
97
+ raise NotImplementedError
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "oga"
4
+
5
+ module Moxml
6
+ module Adapter
7
+ module CustomizedOga
8
+ class XmlDeclaration < ::Oga::XML::XmlDeclaration
9
+ def initialize(options = {})
10
+ @version = options[:version] || "1.0"
11
+ # encoding is optional, but Oga sets it to UTF-8 by default
12
+ @encoding = options[:encoding]
13
+ @standalone = options[:standalone]
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,104 @@
1
+ # rubocop:disable Style/FrozenStringLiteralComment
2
+
3
+ require "oga"
4
+
5
+ # monkey patch the Oga generator because it's not configurable
6
+ # https://github.com/yorickpeterse/oga/blob/main/lib/oga/xml/generator.rb
7
+ module Moxml
8
+ module Adapter
9
+ module CustomizedOga
10
+ class XmlGenerator < ::Oga::XML::Generator
11
+ def self_closing?(_element)
12
+ # Always expand tags
13
+ false
14
+ end
15
+
16
+ def on_element(element, output)
17
+ name = element.expanded_name
18
+
19
+ attrs = ""
20
+ element.attributes.each do |attr|
21
+ attrs << " "
22
+ on_attribute(attr, attrs)
23
+ end
24
+
25
+ closing_tag = if self_closing?(element)
26
+ html_void_element?(element) ? ">" : " />"
27
+ else
28
+ ">"
29
+ end
30
+
31
+ output << "<#{name}#{attrs}#{closing_tag}"
32
+ end
33
+
34
+ def on_namespace_definition(ns, output)
35
+ name = "xmlns"
36
+ name += ":#{ns.name}" unless ns.name.nil?
37
+
38
+ output << %(#{name}="#{ns.uri}")
39
+ end
40
+
41
+ def on_attribute(attr, output)
42
+ return super unless attr.value&.include?("'")
43
+
44
+ output << %(#{attr.expanded_name}="#{encode(attr.value)}")
45
+ end
46
+
47
+ def on_cdata(node, output)
48
+ # Escape the end sequence
49
+ return super unless node.text.include?("]]>")
50
+
51
+ chunks = node.text.split(/(\]\]>)/)
52
+ chunks = ["]]", ">"] if chunks.size == 1
53
+
54
+ while (index = chunks.index("]]>"))
55
+ # the end tag cannot be the first and the last at the same time
56
+
57
+ if index.zero?
58
+ # it's the first text chunk
59
+ chunks[index] = "]]"
60
+ chunks[index + 1] = ">#{chunks[index + 1]}"
61
+ elsif index - 1 == chunks.size
62
+ # it's the last text chunk
63
+ chunks[index - 1] += "]]"
64
+ chunks[index] = ">"
65
+ else
66
+ # it's a chunk in the middle
67
+ chunks[index - 1] += "]]"
68
+ chunks[index + 1] = ">#{chunks[index + 1]}"
69
+ chunks.delete_at(index)
70
+ end
71
+ end
72
+
73
+ chunks.each do |chunk|
74
+ output << "<![CDATA[#{chunk}]]>"
75
+ end
76
+
77
+ output
78
+ end
79
+
80
+ def on_processing_instruction(node, output)
81
+ # put the space between the name and text
82
+ output << "<?#{node.name} #{node.text}?>"
83
+ end
84
+
85
+ def on_xml_declaration(node, output)
86
+ super
87
+ # remove the space before the closing tag
88
+ output.gsub!(/ \?>$/, "?>")
89
+ end
90
+
91
+ protected
92
+
93
+ def encode(input)
94
+ # similar to ::Oga::XML::Entities.encode_attribute
95
+ input&.gsub(
96
+ ::Oga::XML::Entities::ENCODE_ATTRIBUTE_REGEXP,
97
+ # Keep apostrophes in attributes
98
+ ::Oga::XML::Entities::ENCODE_ATTRIBUTE_MAPPING.merge("'" => "'")
99
+ )
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,314 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base"
4
+ require "nokogiri"
5
+
6
+ module Moxml
7
+ module Adapter
8
+ class Nokogiri < Base
9
+ class << self
10
+ def set_root(doc, element)
11
+ doc.root = element
12
+ end
13
+
14
+ def parse(xml, options = {})
15
+ native_doc = begin
16
+ if options[:fragment]
17
+ ::Nokogiri::XML::DocumentFragment.parse(xml) do |config|
18
+ config.strict.nonet
19
+ config.recover unless options[:strict]
20
+ end
21
+ else
22
+ ::Nokogiri::XML(xml, nil, options[:encoding]) do |config|
23
+ config.strict.nonet
24
+ config.recover unless options[:strict]
25
+ end
26
+ end
27
+ rescue ::Nokogiri::XML::SyntaxError => e
28
+ raise Moxml::ParseError.new(e.message, line: e.line, column: e.column)
29
+ end
30
+
31
+ DocumentBuilder.new(Context.new(:nokogiri)).build(native_doc)
32
+ end
33
+
34
+ def create_document
35
+ ::Nokogiri::XML::Document.new
36
+ end
37
+
38
+ def create_fragment
39
+ # document fragments are weird and should be used with caution:
40
+ # https://github.com/sparklemotion/nokogiri/issues/572
41
+ ::Nokogiri::XML::DocumentFragment.new(
42
+ ::Nokogiri::XML::Document.new
43
+ )
44
+ end
45
+
46
+ def create_native_element(name)
47
+ ::Nokogiri::XML::Element.new(name, create_document)
48
+ end
49
+
50
+ def create_native_text(content)
51
+ ::Nokogiri::XML::Text.new(content, create_document)
52
+ end
53
+
54
+ def create_native_cdata(content)
55
+ ::Nokogiri::XML::CDATA.new(create_document, content)
56
+ end
57
+
58
+ def create_native_comment(content)
59
+ ::Nokogiri::XML::Comment.new(create_document, content)
60
+ end
61
+
62
+ def create_native_doctype(name, external_id, system_id)
63
+ create_document.create_internal_subset(
64
+ name, external_id, system_id
65
+ )
66
+ end
67
+
68
+ def create_native_processing_instruction(target, content)
69
+ ::Nokogiri::XML::ProcessingInstruction.new(
70
+ ::Nokogiri::XML::Document.new, target, content
71
+ )
72
+ end
73
+
74
+ def create_native_declaration(version, encoding, standalone)
75
+ ::Nokogiri::XML::ProcessingInstruction.new(
76
+ create_document,
77
+ "xml",
78
+ build_declaration_attrs(version, encoding, standalone)
79
+ )
80
+ end
81
+
82
+ def declaration_attribute(declaration, attr_name)
83
+ return nil unless declaration.content
84
+
85
+ match = declaration.content.match(/#{attr_name}="([^"]*)"/)
86
+ match && match[1]
87
+ end
88
+
89
+ def set_declaration_attribute(declaration, attr_name, value)
90
+ attrs = current_declaration_attributes(declaration)
91
+ if value.nil?
92
+ attrs.delete(attr_name)
93
+ else
94
+ attrs[attr_name] = value
95
+ end
96
+
97
+ declaration.native_content =
98
+ attrs.map { |k, v| %(#{k}="#{v}") }.join(" ")
99
+ end
100
+
101
+ def set_namespace(element, ns)
102
+ element.namespace = ns
103
+ end
104
+
105
+ def namespace(element)
106
+ element.namespace
107
+ end
108
+
109
+ def processing_instruction_target(node)
110
+ node.name
111
+ end
112
+
113
+ def create_native_namespace(element, prefix, uri)
114
+ element.add_namespace_definition(prefix, uri)
115
+ end
116
+
117
+ def node_type(node)
118
+ case node
119
+ when ::Nokogiri::XML::Element then :element
120
+ when ::Nokogiri::XML::CDATA then :cdata
121
+ when ::Nokogiri::XML::Text then :text
122
+ when ::Nokogiri::XML::Comment then :comment
123
+ when ::Nokogiri::XML::ProcessingInstruction then :processing_instruction
124
+ when ::Nokogiri::XML::Document, ::Nokogiri::XML::DocumentFragment then :document
125
+ when ::Nokogiri::XML::DTD then :doctype
126
+ else :unknown
127
+ end
128
+ end
129
+
130
+ def node_name(node)
131
+ node.name
132
+ end
133
+
134
+ def set_node_name(node, name)
135
+ node.name = name
136
+ end
137
+
138
+ def children(node)
139
+ node.children.reject do |child|
140
+ child.text? && child.content.strip.empty? &&
141
+ !(child.previous_sibling.nil? && child.next_sibling.nil?)
142
+ end
143
+ end
144
+
145
+ def replace_children(node, new_children)
146
+ node.children.unlink
147
+ new_children.each { |child| add_child(node, child) }
148
+ end
149
+
150
+ def parent(node)
151
+ node.parent
152
+ end
153
+
154
+ def next_sibling(node)
155
+ node.next_sibling
156
+ end
157
+
158
+ def previous_sibling(node)
159
+ node.previous_sibling
160
+ end
161
+
162
+ def document(node)
163
+ node.document
164
+ end
165
+
166
+ def root(document)
167
+ document.respond_to?(:root) ? document.root : document.children.first
168
+ end
169
+
170
+ def attribute_element(attr)
171
+ attr.parent
172
+ end
173
+
174
+ def attributes(element)
175
+ element.attributes.values
176
+ end
177
+
178
+ def set_attribute(element, name, value)
179
+ element[name.to_s] = value.to_s
180
+ end
181
+
182
+ def get_attribute(element, name)
183
+ # attributes keys don't include attribute namespaces
184
+ element.attributes[name.to_s]
185
+ end
186
+
187
+ def get_attribute_value(element, name)
188
+ # get the attribute value by its name including a namespace
189
+ element[name.to_s]
190
+ end
191
+
192
+ def remove_attribute(element, name)
193
+ element.remove_attribute(name.to_s)
194
+ end
195
+
196
+ def add_child(element, child)
197
+ if node_type(child) == :doctype
198
+ # avoid exceptions: cannot reparent Nokogiri::XML::DTD there
199
+ element.create_internal_subset(
200
+ child.name, child.external_id, child.system_id
201
+ )
202
+ else
203
+ element.add_child(child)
204
+ end
205
+ end
206
+
207
+ def add_previous_sibling(node, sibling)
208
+ node.add_previous_sibling(sibling)
209
+ end
210
+
211
+ def add_next_sibling(node, sibling)
212
+ node.add_next_sibling(sibling)
213
+ end
214
+
215
+ def remove(node)
216
+ node.remove
217
+ end
218
+
219
+ def replace(node, new_node)
220
+ node.replace(new_node)
221
+ end
222
+
223
+ def text_content(node)
224
+ node.content
225
+ end
226
+
227
+ def set_text_content(node, content)
228
+ node.native_content = content
229
+ end
230
+
231
+ def cdata_content(node)
232
+ node.content
233
+ end
234
+
235
+ def set_cdata_content(node, content)
236
+ node.content = content
237
+ end
238
+
239
+ def comment_content(node)
240
+ node.content
241
+ end
242
+
243
+ def set_comment_content(node, content)
244
+ node.native_content = content
245
+ end
246
+
247
+ def processing_instruction_content(node)
248
+ node.content
249
+ end
250
+
251
+ def set_processing_instruction_content(node, content)
252
+ node.native_content = content
253
+ end
254
+
255
+ def namespace_prefix(namespace)
256
+ namespace.prefix
257
+ end
258
+
259
+ def namespace_uri(namespace)
260
+ namespace.href
261
+ end
262
+
263
+ def namespace_definitions(node)
264
+ node.namespace_definitions
265
+ end
266
+
267
+ def xpath(node, expression, namespaces = nil)
268
+ node.xpath(expression, namespaces).to_a
269
+ rescue ::Nokogiri::XML::XPath::SyntaxError => e
270
+ raise Moxml::XPathError, e.message
271
+ end
272
+
273
+ def at_xpath(node, expression, namespaces = nil)
274
+ node.at_xpath(expression, namespaces)
275
+ rescue ::Nokogiri::XML::XPath::SyntaxError => e
276
+ raise Moxml::XPathError, e.message
277
+ end
278
+
279
+ def serialize(node, options = {})
280
+ save_options = ::Nokogiri::XML::Node::SaveOptions::AS_XML
281
+
282
+ # Don't force expand empty elements if they're really empty
283
+ save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS if options[:expand_empty]
284
+ save_options |= ::Nokogiri::XML::Node::SaveOptions::FORMAT if options[:indent].to_i.positive?
285
+ save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if options[:no_declaration]
286
+
287
+ node.to_xml(
288
+ indent: options[:indent],
289
+ encoding: options[:encoding],
290
+ save_with: save_options
291
+ )
292
+ end
293
+
294
+ private
295
+
296
+ def build_declaration_attrs(version, encoding, standalone)
297
+ attrs = { "version" => version }
298
+ attrs["encoding"] = encoding if encoding
299
+ attrs["standalone"] = standalone if standalone
300
+ attrs.map { |k, v| %(#{k}="#{v}") }.join(" ")
301
+ end
302
+
303
+ def current_declaration_attributes(declaration)
304
+ ::Moxml::Declaration::ALLOWED_ATTRIBUTES.inject({}) do |hsh, attr_name|
305
+ value = declaration_attribute(declaration, attr_name)
306
+ next hsh if value.nil?
307
+
308
+ hsh.merge(attr_name => value)
309
+ end
310
+ end
311
+ end
312
+ end
313
+ end
314
+ end