moxml 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rake.yml +15 -0
  3. data/.github/workflows/release.yml +23 -0
  4. data/.gitignore +3 -0
  5. data/.rubocop.yml +2 -0
  6. data/.rubocop_todo.yml +65 -0
  7. data/.ruby-version +1 -0
  8. data/Gemfile +10 -3
  9. data/README.adoc +400 -594
  10. data/lib/moxml/adapter/base.rb +102 -0
  11. data/lib/moxml/adapter/customized_oga/xml_declaration.rb +18 -0
  12. data/lib/moxml/adapter/customized_oga/xml_generator.rb +104 -0
  13. data/lib/moxml/adapter/nokogiri.rb +314 -0
  14. data/lib/moxml/adapter/oga.rb +309 -0
  15. data/lib/moxml/adapter/ox.rb +325 -0
  16. data/lib/moxml/adapter.rb +26 -170
  17. data/lib/moxml/attribute.rb +47 -14
  18. data/lib/moxml/builder.rb +64 -0
  19. data/lib/moxml/cdata.rb +4 -26
  20. data/lib/moxml/comment.rb +6 -22
  21. data/lib/moxml/config.rb +39 -15
  22. data/lib/moxml/context.rb +29 -0
  23. data/lib/moxml/declaration.rb +16 -26
  24. data/lib/moxml/doctype.rb +9 -0
  25. data/lib/moxml/document.rb +51 -63
  26. data/lib/moxml/document_builder.rb +87 -0
  27. data/lib/moxml/element.rb +61 -99
  28. data/lib/moxml/error.rb +20 -0
  29. data/lib/moxml/namespace.rb +12 -37
  30. data/lib/moxml/node.rb +78 -58
  31. data/lib/moxml/node_set.rb +19 -222
  32. data/lib/moxml/processing_instruction.rb +6 -25
  33. data/lib/moxml/text.rb +4 -26
  34. data/lib/moxml/version.rb +1 -1
  35. data/lib/moxml/xml_utils/encoder.rb +55 -0
  36. data/lib/moxml/xml_utils.rb +80 -0
  37. data/lib/moxml.rb +33 -33
  38. data/moxml.gemspec +1 -1
  39. data/spec/moxml/adapter/nokogiri_spec.rb +14 -0
  40. data/spec/moxml/adapter/oga_spec.rb +14 -0
  41. data/spec/moxml/adapter/ox_spec.rb +49 -0
  42. data/spec/moxml/all_with_adapters_spec.rb +46 -0
  43. data/spec/moxml/config_spec.rb +55 -0
  44. data/spec/moxml/error_spec.rb +71 -0
  45. data/spec/moxml/examples/adapter_spec.rb +27 -0
  46. data/spec/moxml_spec.rb +50 -0
  47. data/spec/spec_helper.rb +32 -0
  48. data/spec/support/shared_examples/attribute.rb +165 -0
  49. data/spec/support/shared_examples/builder.rb +25 -0
  50. data/spec/support/shared_examples/cdata.rb +70 -0
  51. data/spec/support/shared_examples/comment.rb +65 -0
  52. data/spec/support/shared_examples/context.rb +35 -0
  53. data/spec/support/shared_examples/declaration.rb +93 -0
  54. data/spec/support/shared_examples/doctype.rb +25 -0
  55. data/spec/support/shared_examples/document.rb +110 -0
  56. data/spec/support/shared_examples/document_builder.rb +43 -0
  57. data/spec/support/shared_examples/edge_cases.rb +185 -0
  58. data/spec/support/shared_examples/element.rb +110 -0
  59. data/spec/support/shared_examples/examples/attribute.rb +42 -0
  60. data/spec/support/shared_examples/examples/basic_usage.rb +67 -0
  61. data/spec/support/shared_examples/examples/memory.rb +54 -0
  62. data/spec/support/shared_examples/examples/namespace.rb +65 -0
  63. data/spec/support/shared_examples/examples/readme_examples.rb +100 -0
  64. data/spec/support/shared_examples/examples/thread_safety.rb +43 -0
  65. data/spec/support/shared_examples/examples/xpath.rb +39 -0
  66. data/spec/support/shared_examples/integration.rb +135 -0
  67. data/spec/support/shared_examples/namespace.rb +96 -0
  68. data/spec/support/shared_examples/node.rb +110 -0
  69. data/spec/support/shared_examples/node_set.rb +90 -0
  70. data/spec/support/shared_examples/processing_instruction.rb +88 -0
  71. data/spec/support/shared_examples/text.rb +66 -0
  72. data/spec/support/shared_examples/xml_adapter.rb +191 -0
  73. data/spec/support/xml_matchers.rb +27 -0
  74. metadata +55 -6
  75. data/.github/workflows/main.yml +0 -27
  76. data/lib/moxml/error_handler.rb +0 -77
  77. data/lib/moxml/errors.rb +0 -169
@@ -0,0 +1,102 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../xml_utils"
4
+ require_relative "../document_builder"
5
+
6
+ module Moxml
7
+ module Adapter
8
+ class Base
9
+ # include XmlUtils
10
+
11
+ class << self
12
+ include XmlUtils
13
+
14
+ def set_root(doc, element)
15
+ raise NotImplementedError
16
+ end
17
+
18
+ def parse(xml, options = {})
19
+ raise NotImplementedError
20
+ end
21
+
22
+ def create_document
23
+ raise NotImplementedError
24
+ end
25
+
26
+ def create_element(name)
27
+ validate_element_name(name)
28
+ create_native_element(name)
29
+ end
30
+
31
+ def create_text(content)
32
+ create_native_text(normalize_xml_value(content))
33
+ end
34
+
35
+ def create_cdata(content)
36
+ create_native_cdata(normalize_xml_value(content))
37
+ end
38
+
39
+ def create_comment(content)
40
+ validate_comment_content(content)
41
+ create_native_comment(normalize_xml_value(content))
42
+ end
43
+
44
+ def create_doctype(name, external_id, system_id)
45
+ create_native_doctype(name, external_id, system_id)
46
+ end
47
+
48
+ def create_processing_instruction(target, content)
49
+ validate_pi_target(target)
50
+ create_native_processing_instruction(target, normalize_xml_value(content))
51
+ end
52
+
53
+ def create_declaration(version = "1.0", encoding = "UTF-8", standalone = nil)
54
+ validate_declaration_version(version)
55
+ validate_declaration_encoding(encoding)
56
+ validate_declaration_standalone(standalone)
57
+ create_native_declaration(version, encoding, standalone)
58
+ end
59
+
60
+ def create_namespace(element, prefix, uri)
61
+ validate_prefix(prefix) if prefix
62
+ validate_uri(uri)
63
+ create_native_namespace(element, prefix, uri)
64
+ end
65
+
66
+ protected
67
+
68
+ def create_native_element(name)
69
+ raise NotImplementedError
70
+ end
71
+
72
+ def create_native_text(content)
73
+ raise NotImplementedError
74
+ end
75
+
76
+ def create_native_cdata(content)
77
+ raise NotImplementedError
78
+ end
79
+
80
+ def create_native_comment(content)
81
+ raise NotImplementedError
82
+ end
83
+
84
+ def create_native_doctype(name, external_id, system_id)
85
+ raise NotImplementedError
86
+ end
87
+
88
+ def create_native_processing_instruction(target, content)
89
+ raise NotImplementedError
90
+ end
91
+
92
+ def create_native_declaration(version, encoding, standalone)
93
+ raise NotImplementedError
94
+ end
95
+
96
+ def create_native_namespace(element, prefix, uri)
97
+ raise NotImplementedError
98
+ end
99
+ end
100
+ end
101
+ end
102
+ end
@@ -0,0 +1,18 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "oga"
4
+
5
+ module Moxml
6
+ module Adapter
7
+ module CustomizedOga
8
+ class XmlDeclaration < ::Oga::XML::XmlDeclaration
9
+ def initialize(options = {})
10
+ @version = options[:version] || "1.0"
11
+ # encoding is optional, but Oga sets it to UTF-8 by default
12
+ @encoding = options[:encoding]
13
+ @standalone = options[:standalone]
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,104 @@
1
+ # rubocop:disable Style/FrozenStringLiteralComment
2
+
3
+ require "oga"
4
+
5
+ # monkey patch the Oga generator because it's not configurable
6
+ # https://github.com/yorickpeterse/oga/blob/main/lib/oga/xml/generator.rb
7
+ module Moxml
8
+ module Adapter
9
+ module CustomizedOga
10
+ class XmlGenerator < ::Oga::XML::Generator
11
+ def self_closing?(_element)
12
+ # Always expand tags
13
+ false
14
+ end
15
+
16
+ def on_element(element, output)
17
+ name = element.expanded_name
18
+
19
+ attrs = ""
20
+ element.attributes.each do |attr|
21
+ attrs << " "
22
+ on_attribute(attr, attrs)
23
+ end
24
+
25
+ closing_tag = if self_closing?(element)
26
+ html_void_element?(element) ? ">" : " />"
27
+ else
28
+ ">"
29
+ end
30
+
31
+ output << "<#{name}#{attrs}#{closing_tag}"
32
+ end
33
+
34
+ def on_namespace_definition(ns, output)
35
+ name = "xmlns"
36
+ name += ":#{ns.name}" unless ns.name.nil?
37
+
38
+ output << %(#{name}="#{ns.uri}")
39
+ end
40
+
41
+ def on_attribute(attr, output)
42
+ return super unless attr.value&.include?("'")
43
+
44
+ output << %(#{attr.expanded_name}="#{encode(attr.value)}")
45
+ end
46
+
47
+ def on_cdata(node, output)
48
+ # Escape the end sequence
49
+ return super unless node.text.include?("]]>")
50
+
51
+ chunks = node.text.split(/(\]\]>)/)
52
+ chunks = ["]]", ">"] if chunks.size == 1
53
+
54
+ while (index = chunks.index("]]>"))
55
+ # the end tag cannot be the first and the last at the same time
56
+
57
+ if index.zero?
58
+ # it's the first text chunk
59
+ chunks[index] = "]]"
60
+ chunks[index + 1] = ">#{chunks[index + 1]}"
61
+ elsif index - 1 == chunks.size
62
+ # it's the last text chunk
63
+ chunks[index - 1] += "]]"
64
+ chunks[index] = ">"
65
+ else
66
+ # it's a chunk in the middle
67
+ chunks[index - 1] += "]]"
68
+ chunks[index + 1] = ">#{chunks[index + 1]}"
69
+ chunks.delete_at(index)
70
+ end
71
+ end
72
+
73
+ chunks.each do |chunk|
74
+ output << "<![CDATA[#{chunk}]]>"
75
+ end
76
+
77
+ output
78
+ end
79
+
80
+ def on_processing_instruction(node, output)
81
+ # put the space between the name and text
82
+ output << "<?#{node.name} #{node.text}?>"
83
+ end
84
+
85
+ def on_xml_declaration(node, output)
86
+ super
87
+ # remove the space before the closing tag
88
+ output.gsub!(/ \?>$/, "?>")
89
+ end
90
+
91
+ protected
92
+
93
+ def encode(input)
94
+ # similar to ::Oga::XML::Entities.encode_attribute
95
+ input&.gsub(
96
+ ::Oga::XML::Entities::ENCODE_ATTRIBUTE_REGEXP,
97
+ # Keep apostrophes in attributes
98
+ ::Oga::XML::Entities::ENCODE_ATTRIBUTE_MAPPING.merge("'" => "'")
99
+ )
100
+ end
101
+ end
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,314 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "base"
4
+ require "nokogiri"
5
+
6
+ module Moxml
7
+ module Adapter
8
+ class Nokogiri < Base
9
+ class << self
10
+ def set_root(doc, element)
11
+ doc.root = element
12
+ end
13
+
14
+ def parse(xml, options = {})
15
+ native_doc = begin
16
+ if options[:fragment]
17
+ ::Nokogiri::XML::DocumentFragment.parse(xml) do |config|
18
+ config.strict.nonet
19
+ config.recover unless options[:strict]
20
+ end
21
+ else
22
+ ::Nokogiri::XML(xml, nil, options[:encoding]) do |config|
23
+ config.strict.nonet
24
+ config.recover unless options[:strict]
25
+ end
26
+ end
27
+ rescue ::Nokogiri::XML::SyntaxError => e
28
+ raise Moxml::ParseError.new(e.message, line: e.line, column: e.column)
29
+ end
30
+
31
+ DocumentBuilder.new(Context.new(:nokogiri)).build(native_doc)
32
+ end
33
+
34
+ def create_document
35
+ ::Nokogiri::XML::Document.new
36
+ end
37
+
38
+ def create_fragment
39
+ # document fragments are weird and should be used with caution:
40
+ # https://github.com/sparklemotion/nokogiri/issues/572
41
+ ::Nokogiri::XML::DocumentFragment.new(
42
+ ::Nokogiri::XML::Document.new
43
+ )
44
+ end
45
+
46
+ def create_native_element(name)
47
+ ::Nokogiri::XML::Element.new(name, create_document)
48
+ end
49
+
50
+ def create_native_text(content)
51
+ ::Nokogiri::XML::Text.new(content, create_document)
52
+ end
53
+
54
+ def create_native_cdata(content)
55
+ ::Nokogiri::XML::CDATA.new(create_document, content)
56
+ end
57
+
58
+ def create_native_comment(content)
59
+ ::Nokogiri::XML::Comment.new(create_document, content)
60
+ end
61
+
62
+ def create_native_doctype(name, external_id, system_id)
63
+ create_document.create_internal_subset(
64
+ name, external_id, system_id
65
+ )
66
+ end
67
+
68
+ def create_native_processing_instruction(target, content)
69
+ ::Nokogiri::XML::ProcessingInstruction.new(
70
+ ::Nokogiri::XML::Document.new, target, content
71
+ )
72
+ end
73
+
74
+ def create_native_declaration(version, encoding, standalone)
75
+ ::Nokogiri::XML::ProcessingInstruction.new(
76
+ create_document,
77
+ "xml",
78
+ build_declaration_attrs(version, encoding, standalone)
79
+ )
80
+ end
81
+
82
+ def declaration_attribute(declaration, attr_name)
83
+ return nil unless declaration.content
84
+
85
+ match = declaration.content.match(/#{attr_name}="([^"]*)"/)
86
+ match && match[1]
87
+ end
88
+
89
+ def set_declaration_attribute(declaration, attr_name, value)
90
+ attrs = current_declaration_attributes(declaration)
91
+ if value.nil?
92
+ attrs.delete(attr_name)
93
+ else
94
+ attrs[attr_name] = value
95
+ end
96
+
97
+ declaration.native_content =
98
+ attrs.map { |k, v| %(#{k}="#{v}") }.join(" ")
99
+ end
100
+
101
+ def set_namespace(element, ns)
102
+ element.namespace = ns
103
+ end
104
+
105
+ def namespace(element)
106
+ element.namespace
107
+ end
108
+
109
+ def processing_instruction_target(node)
110
+ node.name
111
+ end
112
+
113
+ def create_native_namespace(element, prefix, uri)
114
+ element.add_namespace_definition(prefix, uri)
115
+ end
116
+
117
+ def node_type(node)
118
+ case node
119
+ when ::Nokogiri::XML::Element then :element
120
+ when ::Nokogiri::XML::CDATA then :cdata
121
+ when ::Nokogiri::XML::Text then :text
122
+ when ::Nokogiri::XML::Comment then :comment
123
+ when ::Nokogiri::XML::ProcessingInstruction then :processing_instruction
124
+ when ::Nokogiri::XML::Document, ::Nokogiri::XML::DocumentFragment then :document
125
+ when ::Nokogiri::XML::DTD then :doctype
126
+ else :unknown
127
+ end
128
+ end
129
+
130
+ def node_name(node)
131
+ node.name
132
+ end
133
+
134
+ def set_node_name(node, name)
135
+ node.name = name
136
+ end
137
+
138
+ def children(node)
139
+ node.children.reject do |child|
140
+ child.text? && child.content.strip.empty? &&
141
+ !(child.previous_sibling.nil? && child.next_sibling.nil?)
142
+ end
143
+ end
144
+
145
+ def replace_children(node, new_children)
146
+ node.children.unlink
147
+ new_children.each { |child| add_child(node, child) }
148
+ end
149
+
150
+ def parent(node)
151
+ node.parent
152
+ end
153
+
154
+ def next_sibling(node)
155
+ node.next_sibling
156
+ end
157
+
158
+ def previous_sibling(node)
159
+ node.previous_sibling
160
+ end
161
+
162
+ def document(node)
163
+ node.document
164
+ end
165
+
166
+ def root(document)
167
+ document.respond_to?(:root) ? document.root : document.children.first
168
+ end
169
+
170
+ def attribute_element(attr)
171
+ attr.parent
172
+ end
173
+
174
+ def attributes(element)
175
+ element.attributes.values
176
+ end
177
+
178
+ def set_attribute(element, name, value)
179
+ element[name.to_s] = value.to_s
180
+ end
181
+
182
+ def get_attribute(element, name)
183
+ # attributes keys don't include attribute namespaces
184
+ element.attributes[name.to_s]
185
+ end
186
+
187
+ def get_attribute_value(element, name)
188
+ # get the attribute value by its name including a namespace
189
+ element[name.to_s]
190
+ end
191
+
192
+ def remove_attribute(element, name)
193
+ element.remove_attribute(name.to_s)
194
+ end
195
+
196
+ def add_child(element, child)
197
+ if node_type(child) == :doctype
198
+ # avoid exceptions: cannot reparent Nokogiri::XML::DTD there
199
+ element.create_internal_subset(
200
+ child.name, child.external_id, child.system_id
201
+ )
202
+ else
203
+ element.add_child(child)
204
+ end
205
+ end
206
+
207
+ def add_previous_sibling(node, sibling)
208
+ node.add_previous_sibling(sibling)
209
+ end
210
+
211
+ def add_next_sibling(node, sibling)
212
+ node.add_next_sibling(sibling)
213
+ end
214
+
215
+ def remove(node)
216
+ node.remove
217
+ end
218
+
219
+ def replace(node, new_node)
220
+ node.replace(new_node)
221
+ end
222
+
223
+ def text_content(node)
224
+ node.content
225
+ end
226
+
227
+ def set_text_content(node, content)
228
+ node.native_content = content
229
+ end
230
+
231
+ def cdata_content(node)
232
+ node.content
233
+ end
234
+
235
+ def set_cdata_content(node, content)
236
+ node.content = content
237
+ end
238
+
239
+ def comment_content(node)
240
+ node.content
241
+ end
242
+
243
+ def set_comment_content(node, content)
244
+ node.native_content = content
245
+ end
246
+
247
+ def processing_instruction_content(node)
248
+ node.content
249
+ end
250
+
251
+ def set_processing_instruction_content(node, content)
252
+ node.native_content = content
253
+ end
254
+
255
+ def namespace_prefix(namespace)
256
+ namespace.prefix
257
+ end
258
+
259
+ def namespace_uri(namespace)
260
+ namespace.href
261
+ end
262
+
263
+ def namespace_definitions(node)
264
+ node.namespace_definitions
265
+ end
266
+
267
+ def xpath(node, expression, namespaces = nil)
268
+ node.xpath(expression, namespaces).to_a
269
+ rescue ::Nokogiri::XML::XPath::SyntaxError => e
270
+ raise Moxml::XPathError, e.message
271
+ end
272
+
273
+ def at_xpath(node, expression, namespaces = nil)
274
+ node.at_xpath(expression, namespaces)
275
+ rescue ::Nokogiri::XML::XPath::SyntaxError => e
276
+ raise Moxml::XPathError, e.message
277
+ end
278
+
279
+ def serialize(node, options = {})
280
+ save_options = ::Nokogiri::XML::Node::SaveOptions::AS_XML
281
+
282
+ # Don't force expand empty elements if they're really empty
283
+ save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_EMPTY_TAGS if options[:expand_empty]
284
+ save_options |= ::Nokogiri::XML::Node::SaveOptions::FORMAT if options[:indent].to_i.positive?
285
+ save_options |= ::Nokogiri::XML::Node::SaveOptions::NO_DECLARATION if options[:no_declaration]
286
+
287
+ node.to_xml(
288
+ indent: options[:indent],
289
+ encoding: options[:encoding],
290
+ save_with: save_options
291
+ )
292
+ end
293
+
294
+ private
295
+
296
+ def build_declaration_attrs(version, encoding, standalone)
297
+ attrs = { "version" => version }
298
+ attrs["encoding"] = encoding if encoding
299
+ attrs["standalone"] = standalone if standalone
300
+ attrs.map { |k, v| %(#{k}="#{v}") }.join(" ")
301
+ end
302
+
303
+ def current_declaration_attributes(declaration)
304
+ ::Moxml::Declaration::ALLOWED_ATTRIBUTES.inject({}) do |hsh, attr_name|
305
+ value = declaration_attribute(declaration, attr_name)
306
+ next hsh if value.nil?
307
+
308
+ hsh.merge(attr_name => value)
309
+ end
310
+ end
311
+ end
312
+ end
313
+ end
314
+ end