moxml 0.1.0 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +15 -0
- data/.github/workflows/release.yml +23 -0
- data/.gitignore +3 -0
- data/.rubocop.yml +2 -0
- data/.rubocop_todo.yml +65 -0
- data/.ruby-version +1 -0
- data/Gemfile +10 -3
- data/README.adoc +401 -594
- data/lib/moxml/adapter/base.rb +102 -0
- data/lib/moxml/adapter/customized_oga/xml_declaration.rb +18 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +104 -0
- data/lib/moxml/adapter/nokogiri.rb +319 -0
- data/lib/moxml/adapter/oga.rb +318 -0
- data/lib/moxml/adapter/ox.rb +325 -0
- data/lib/moxml/adapter.rb +26 -170
- data/lib/moxml/attribute.rb +47 -14
- data/lib/moxml/builder.rb +64 -0
- data/lib/moxml/cdata.rb +4 -26
- data/lib/moxml/comment.rb +6 -22
- data/lib/moxml/config.rb +39 -15
- data/lib/moxml/context.rb +29 -0
- data/lib/moxml/declaration.rb +16 -26
- data/lib/moxml/doctype.rb +9 -0
- data/lib/moxml/document.rb +51 -63
- data/lib/moxml/document_builder.rb +87 -0
- data/lib/moxml/element.rb +63 -97
- data/lib/moxml/error.rb +20 -0
- data/lib/moxml/namespace.rb +12 -37
- data/lib/moxml/node.rb +78 -58
- data/lib/moxml/node_set.rb +19 -222
- data/lib/moxml/processing_instruction.rb +6 -25
- data/lib/moxml/text.rb +4 -26
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +55 -0
- data/lib/moxml/xml_utils.rb +80 -0
- data/lib/moxml.rb +33 -33
- data/moxml.gemspec +1 -1
- data/spec/moxml/adapter/nokogiri_spec.rb +14 -0
- data/spec/moxml/adapter/oga_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +49 -0
- data/spec/moxml/all_with_adapters_spec.rb +46 -0
- data/spec/moxml/config_spec.rb +55 -0
- data/spec/moxml/error_spec.rb +71 -0
- data/spec/moxml/examples/adapter_spec.rb +27 -0
- data/spec/moxml_spec.rb +50 -0
- data/spec/spec_helper.rb +32 -0
- data/spec/support/shared_examples/attribute.rb +165 -0
- data/spec/support/shared_examples/builder.rb +25 -0
- data/spec/support/shared_examples/cdata.rb +70 -0
- data/spec/support/shared_examples/comment.rb +65 -0
- data/spec/support/shared_examples/context.rb +35 -0
- data/spec/support/shared_examples/declaration.rb +93 -0
- data/spec/support/shared_examples/doctype.rb +25 -0
- data/spec/support/shared_examples/document.rb +110 -0
- data/spec/support/shared_examples/document_builder.rb +43 -0
- data/spec/support/shared_examples/edge_cases.rb +185 -0
- data/spec/support/shared_examples/element.rb +130 -0
- data/spec/support/shared_examples/examples/attribute.rb +42 -0
- data/spec/support/shared_examples/examples/basic_usage.rb +67 -0
- data/spec/support/shared_examples/examples/memory.rb +54 -0
- data/spec/support/shared_examples/examples/namespace.rb +65 -0
- data/spec/support/shared_examples/examples/readme_examples.rb +100 -0
- data/spec/support/shared_examples/examples/thread_safety.rb +43 -0
- data/spec/support/shared_examples/examples/xpath.rb +39 -0
- data/spec/support/shared_examples/integration.rb +135 -0
- data/spec/support/shared_examples/namespace.rb +96 -0
- data/spec/support/shared_examples/node.rb +110 -0
- data/spec/support/shared_examples/node_set.rb +90 -0
- data/spec/support/shared_examples/processing_instruction.rb +88 -0
- data/spec/support/shared_examples/text.rb +66 -0
- data/spec/support/shared_examples/xml_adapter.rb +191 -0
- data/spec/support/xml_matchers.rb +27 -0
- metadata +55 -6
- data/.github/workflows/main.yml +0 -27
- data/lib/moxml/error_handler.rb +0 -77
- data/lib/moxml/errors.rb +0 -169
data/README.adoc
CHANGED
@@ -1,770 +1,577 @@
|
|
1
|
-
= Moxml:
|
1
|
+
= Moxml: Modern XML processing for Ruby
|
2
|
+
:toc: macro
|
3
|
+
:toclevels: 3
|
4
|
+
:toc-title: Contents
|
5
|
+
:source-highlighter: highlight.js
|
2
6
|
|
3
|
-
|
7
|
+
image:https://github.com/lutaml/moxml/workflows/rake/badge.svg["Build Status", link="https://github.com/lutaml/moxml/actions?workflow=rake"]
|
4
8
|
|
5
|
-
|
6
|
-
interface for working with XML documents, regardless of the underlying XML
|
7
|
-
library.
|
9
|
+
toc::[]
|
8
10
|
|
9
|
-
==
|
11
|
+
== Introduction and purpose
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
13
|
+
Moxml provides a unified, modern XML processing interface for Ruby applications.
|
14
|
+
It offers a consistent API that abstracts away the underlying XML implementation
|
15
|
+
details while maintaining high performance through efficient node mapping and
|
16
|
+
native XPath querying.
|
17
|
+
|
18
|
+
Key features:
|
15
19
|
|
16
|
-
|
20
|
+
* Intuitive, Ruby-idiomatic API for XML manipulation
|
21
|
+
* Consistent interface across different XML libraries
|
22
|
+
* Efficient node mapping for XPath queries
|
23
|
+
* Support for all XML node types and features
|
24
|
+
* Easy switching between XML processing engines
|
25
|
+
* Clean separation between interface and implementation
|
17
26
|
|
18
|
-
|
27
|
+
== Getting started
|
19
28
|
|
20
|
-
|
29
|
+
Install the gem and at least one supported XML library:
|
21
30
|
|
22
31
|
[source,ruby]
|
23
32
|
----
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
config.backend = :nokogiri # or :ox, :oga
|
28
|
-
end
|
33
|
+
# In your Gemfile
|
34
|
+
gem 'moxml'
|
35
|
+
gem 'nokogiri' # Or 'ox' or 'oga'
|
29
36
|
----
|
30
37
|
|
31
|
-
===
|
38
|
+
=== Basic document creation
|
32
39
|
|
33
40
|
[source,ruby]
|
34
41
|
----
|
35
|
-
|
36
|
-
doc = Moxml::Document.new
|
37
|
-
|
38
|
-
# Parse from string
|
39
|
-
doc = Moxml::Document.parse("<root><child>content</child></root>")
|
42
|
+
require 'moxml'
|
40
43
|
|
41
|
-
#
|
42
|
-
doc = Moxml
|
43
|
-
----
|
44
|
+
# Create a new XML document
|
45
|
+
doc = Moxml.new.create_document
|
44
46
|
|
45
|
-
|
47
|
+
# Add XML declaration
|
48
|
+
doc.add_declaration(version: "1.0", encoding: "UTF-8")
|
46
49
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
doc = Moxml::Document.new
|
51
|
-
root = doc.create_element('root')
|
50
|
+
# Create root element with namespace
|
51
|
+
root = doc.create_element('book')
|
52
|
+
root.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
|
52
53
|
doc.add_child(root)
|
53
54
|
|
54
|
-
#
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
doc = Moxml::Document.parse(xml_string, encoding: 'UTF-8')
|
55
|
+
# Add content
|
56
|
+
title = doc.create_element('dc:title')
|
57
|
+
title.text = 'XML Processing with Ruby'
|
58
|
+
root.add_child(title)
|
59
59
|
|
60
|
-
#
|
61
|
-
doc
|
62
|
-
encoding: 'UTF-8',
|
63
|
-
strict: true
|
64
|
-
})
|
60
|
+
# Output formatted XML
|
61
|
+
puts doc.to_xml(indent: 2)
|
65
62
|
----
|
66
63
|
|
67
|
-
|
64
|
+
== Working with documents
|
68
65
|
|
69
|
-
|
70
|
-
----
|
71
|
-
# Working with namespaces
|
72
|
-
doc = Moxml::Document.new
|
73
|
-
root = doc.create_element('root')
|
74
|
-
root['xmlns:custom'] = 'http://example.com/ns'
|
75
|
-
child = doc.create_element('custom:element')
|
76
|
-
root.add_child(child)
|
77
|
-
|
78
|
-
# Creating structured data
|
79
|
-
person = doc.create_element('person')
|
80
|
-
person['id'] = '123'
|
81
|
-
name = doc.create_element('name')
|
82
|
-
name.add_child(doc.create_text('John Doe'))
|
83
|
-
person.add_child(name)
|
84
|
-
|
85
|
-
# Working with attributes
|
86
|
-
element = doc.create_element('div')
|
87
|
-
element['class'] = 'container'
|
88
|
-
element['data-id'] = '123'
|
89
|
-
element['style'] = 'color: blue'
|
90
|
-
|
91
|
-
# Handling special characters
|
92
|
-
text = doc.create_text('Special chars: < > & " \'')
|
93
|
-
cdata = doc.create_cdata('<script>alert("Hello!");</script>')
|
94
|
-
|
95
|
-
# Processing instructions
|
96
|
-
pi = doc.create_processing_instruction('xml-stylesheet',
|
97
|
-
'type="text/xsl" href="style.xsl"')
|
98
|
-
doc.add_child(pi)
|
99
|
-
----
|
66
|
+
=== Using the builder pattern
|
100
67
|
|
101
|
-
|
68
|
+
The builder pattern provides a clean DSL for creating XML documents:
|
102
69
|
|
103
70
|
[source,ruby]
|
104
71
|
----
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
# Add attributes
|
109
|
-
element['class'] = 'content'
|
110
|
-
|
111
|
-
# Access attributes
|
112
|
-
class_attr = element['class']
|
113
|
-
|
114
|
-
# Add child elements
|
115
|
-
child = element.create_element('child')
|
116
|
-
element.add_child(child)
|
72
|
+
doc = Moxml.new.build do
|
73
|
+
declaration version: "1.0", encoding: "UTF-8"
|
117
74
|
|
118
|
-
|
119
|
-
|
75
|
+
element 'library', xmlns: 'http://example.org/library' do
|
76
|
+
element 'book' do
|
77
|
+
element 'title' do
|
78
|
+
text 'Ruby Programming'
|
79
|
+
end
|
120
80
|
|
121
|
-
|
122
|
-
text
|
123
|
-
|
81
|
+
element 'author' do
|
82
|
+
text 'Jane Smith'
|
83
|
+
end
|
124
84
|
|
125
|
-
|
126
|
-
element
|
127
|
-
.add_child(doc.create_element('child'))
|
128
|
-
.add_child(doc.create_text('content'))
|
129
|
-
['class'] = 'new-class'
|
85
|
+
comment 'Publication details'
|
86
|
+
element 'published', year: '2024'
|
130
87
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
.add_child(doc.create_text('Hello'))
|
136
|
-
div.add_child(doc.create_element('br'))
|
137
|
-
div.add_child(doc.create_text('World'))
|
88
|
+
cdata '<custom>metadata</custom>'
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
138
92
|
----
|
139
93
|
|
140
|
-
===
|
94
|
+
=== Direct document manipulation
|
141
95
|
|
142
96
|
[source,ruby]
|
143
97
|
----
|
144
|
-
|
145
|
-
plain_text = Moxml::Text.new("Simple text")
|
146
|
-
multiline_text = Moxml::Text.new("Line 1\nLine 2")
|
147
|
-
special_chars = Moxml::Text.new("Special: & < > \" '")
|
148
|
-
|
149
|
-
# CDATA sections for different content types
|
150
|
-
script_cdata = Moxml::Cdata.new("function() { alert('Hello!'); }")
|
151
|
-
xml_cdata = Moxml::Cdata.new("<data><item>value</item></data>")
|
152
|
-
mixed_cdata = Moxml::Cdata.new("Text with ]]> characters")
|
153
|
-
|
154
|
-
# Comments for documentation
|
155
|
-
todo_comment = Moxml::Comment.new("TODO: Add validation")
|
156
|
-
section_comment = Moxml::Comment.new("----- Section Break -----")
|
157
|
-
debug_comment = Moxml::Comment.new("DEBUG: Remove in production")
|
158
|
-
|
159
|
-
# Processing instructions for various uses
|
160
|
-
style_pi = Moxml::ProcessingInstruction.new(
|
161
|
-
"xml-stylesheet",
|
162
|
-
'type="text/css" href="style.css"'
|
163
|
-
)
|
164
|
-
php_pi = Moxml::ProcessingInstruction.new(
|
165
|
-
"php",
|
166
|
-
'echo "<?php echo $var; ?>>";'
|
167
|
-
)
|
168
|
-
custom_pi = Moxml::ProcessingInstruction.new(
|
169
|
-
"custom-processor",
|
170
|
-
'param1="value1" param2="value2"'
|
171
|
-
)
|
172
|
-
----
|
173
|
-
|
174
|
-
=== Element manipulation examples
|
98
|
+
doc = Moxml.new.create_document
|
175
99
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
root = doc.create_element('
|
100
|
+
# Add declaration
|
101
|
+
doc.add_declaration(version: "1.0", encoding: "UTF-8")
|
102
|
+
|
103
|
+
# Create root with namespace
|
104
|
+
root = doc.create_element('library')
|
105
|
+
root.add_namespace(nil, 'http://example.org/library')
|
181
106
|
doc.add_child(root)
|
182
107
|
|
183
|
-
#
|
184
|
-
|
185
|
-
|
108
|
+
# Add elements with attributes
|
109
|
+
book = doc.create_element('book')
|
110
|
+
book['id'] = 'b1'
|
111
|
+
root.add_child(book)
|
186
112
|
|
113
|
+
# Add mixed content
|
114
|
+
book.add_child(doc.create_comment('Book details'))
|
187
115
|
title = doc.create_element('title')
|
188
|
-
title.
|
189
|
-
|
190
|
-
|
191
|
-
meta = doc.create_element('meta')
|
192
|
-
meta['charset'] = 'UTF-8'
|
193
|
-
head.add_child(meta)
|
194
|
-
|
195
|
-
# Create body section
|
196
|
-
body = doc.create_element('body')
|
197
|
-
root.add_child(body)
|
198
|
-
|
199
|
-
div = doc.create_element('div')
|
200
|
-
div['class'] = 'container'
|
201
|
-
body.add_child(div)
|
202
|
-
|
203
|
-
# Add multiple paragraphs
|
204
|
-
3.times do |i|
|
205
|
-
p = doc.create_element('p')
|
206
|
-
p.add_child(doc.create_text("Paragraph #{i + 1}"))
|
207
|
-
div.add_child(p)
|
208
|
-
end
|
209
|
-
|
210
|
-
# Working with lists
|
211
|
-
ul = doc.create_element('ul')
|
212
|
-
div.add_child(ul)
|
213
|
-
|
214
|
-
['Item 1', 'Item 2', 'Item 3'].each do |text|
|
215
|
-
li = doc.create_element('li')
|
216
|
-
li.add_child(doc.create_text(text))
|
217
|
-
ul.add_child(li)
|
218
|
-
end
|
219
|
-
|
220
|
-
# Adding link element
|
221
|
-
a = doc.create_element('a')
|
222
|
-
a['href'] = 'https://example.com'
|
223
|
-
a.add_child(doc.create_text('Visit Example'))
|
224
|
-
div.add_child(a)
|
116
|
+
title.text = 'Ruby Programming'
|
117
|
+
book.add_child(title)
|
225
118
|
----
|
226
119
|
|
227
|
-
|
120
|
+
== XML objects and their methods
|
228
121
|
|
229
|
-
|
230
|
-
----
|
231
|
-
# Cloning nodes
|
232
|
-
original = doc.create_element('div')
|
233
|
-
original['id'] = 'original'
|
234
|
-
clone = original.clone
|
235
|
-
|
236
|
-
# Moving nodes
|
237
|
-
target = doc.create_element('target')
|
238
|
-
source = doc.create_element('source')
|
239
|
-
source.add_child(doc.create_text('Content'))
|
240
|
-
target.add_child(source)
|
241
|
-
|
242
|
-
# Replacing nodes
|
243
|
-
old_node = doc.at_xpath('//old')
|
244
|
-
new_node = doc.create_element('new')
|
245
|
-
old_node.replace(new_node)
|
246
|
-
|
247
|
-
# Inserting before/after
|
248
|
-
reference = doc.create_element('reference')
|
249
|
-
before = doc.create_element('before')
|
250
|
-
after = doc.create_element('after')
|
251
|
-
reference.add_previous_sibling(before)
|
252
|
-
reference.add_next_sibling(after)
|
253
|
-
|
254
|
-
# Conditional manipulation
|
255
|
-
element = doc.at_xpath('//conditional')
|
256
|
-
if element['flag'] == 'true'
|
257
|
-
element.add_child(doc.create_text('Flag is true'))
|
258
|
-
else
|
259
|
-
element.remove
|
260
|
-
end
|
261
|
-
----
|
122
|
+
=== Document object
|
262
123
|
|
263
|
-
|
124
|
+
The Document object represents an XML document and serves as the root container
|
125
|
+
for all XML nodes.
|
264
126
|
|
265
127
|
[source,ruby]
|
266
128
|
----
|
267
|
-
# Creating
|
268
|
-
doc = Moxml
|
269
|
-
|
270
|
-
root['xmlns'] = 'http://example.com/default'
|
271
|
-
root['xmlns:custom'] = 'http://example.com/custom'
|
272
|
-
doc.add_child(root)
|
129
|
+
# Creating a document
|
130
|
+
doc = Moxml.new.create_document
|
131
|
+
doc = Moxml.new.parse(xml_string)
|
273
132
|
|
274
|
-
#
|
275
|
-
|
276
|
-
|
133
|
+
# Document properties and methods
|
134
|
+
doc.encoding # Get document encoding
|
135
|
+
doc.encoding = "UTF-8" # Set document encoding
|
136
|
+
doc.version # Get XML version
|
137
|
+
doc.version = "1.1" # Set XML version
|
138
|
+
doc.standalone # Get standalone declaration
|
139
|
+
doc.standalone = "yes" # Set standalone declaration
|
277
140
|
|
278
|
-
|
279
|
-
root
|
141
|
+
# Document structure
|
142
|
+
doc.root # Get root element
|
143
|
+
doc.children # Get all top-level nodes
|
144
|
+
doc.add_child(node) # Add a child node
|
145
|
+
doc.remove_child(node) # Remove a child node
|
280
146
|
|
281
|
-
#
|
282
|
-
|
147
|
+
# Node creation methods
|
148
|
+
doc.create_element(name) # Create new element
|
149
|
+
doc.create_text(content) # Create text node
|
150
|
+
doc.create_cdata(content) # Create CDATA section
|
151
|
+
doc.create_comment(content) # Create comment
|
152
|
+
doc.create_processing_instruction(target, content) # Create PI
|
283
153
|
|
284
|
-
#
|
285
|
-
|
286
|
-
|
154
|
+
# Document querying
|
155
|
+
doc.xpath(expression) # Find nodes by XPath
|
156
|
+
doc.at_xpath(expression) # Find first node by XPath
|
157
|
+
|
158
|
+
# Serialization
|
159
|
+
doc.to_xml(options) # Convert to XML string
|
287
160
|
----
|
288
161
|
|
289
|
-
===
|
162
|
+
=== Element object
|
163
|
+
|
164
|
+
Elements are the primary structural components of an XML document, representing
|
165
|
+
tags with attributes and content.
|
290
166
|
|
291
167
|
[source,ruby]
|
292
168
|
----
|
293
|
-
#
|
294
|
-
|
169
|
+
# Element properties
|
170
|
+
element.name # Get element name
|
171
|
+
element.name = "new_name" # Set element name
|
172
|
+
element.text # Get text content
|
173
|
+
element.text = "content" # Set text content
|
174
|
+
element.inner_text # Get text content for current node only
|
175
|
+
element.inner_html # Get inner XML content
|
176
|
+
element.inner_html = xml # Set inner XML content
|
177
|
+
|
178
|
+
# Attributes
|
179
|
+
element[name] # Get attribute value
|
180
|
+
element[name] = value # Set attribute value
|
181
|
+
element.attributes # Get all attributes
|
182
|
+
element.remove_attribute(name) # Remove attribute
|
183
|
+
|
184
|
+
# Namespace handling
|
185
|
+
element.namespace # Get element's namespace
|
186
|
+
element.namespace = ns # Set element's namespace
|
187
|
+
element.add_namespace(prefix, uri) # Add new namespace
|
188
|
+
element.namespaces # Get all namespace definitions
|
189
|
+
|
190
|
+
# Node structure
|
191
|
+
element.parent # Get parent node
|
192
|
+
element.children # Get child nodes
|
193
|
+
element.add_child(node) # Add child node
|
194
|
+
element.remove_child(node) # Remove child node
|
195
|
+
element.add_previous_sibling(node) # Add sibling before
|
196
|
+
element.add_next_sibling(node) # Add sibling after
|
197
|
+
element.replace(node) # Replace with another node
|
198
|
+
element.remove # Remove from document
|
199
|
+
|
200
|
+
# Node type checking
|
201
|
+
element.element? # Returns true
|
202
|
+
element.text? # Returns false
|
203
|
+
element.cdata? # Returns false
|
204
|
+
element.comment? # Returns false
|
205
|
+
element.processing_instruction? # Returns false
|
206
|
+
|
207
|
+
# Node querying
|
208
|
+
element.xpath(expression) # Find nodes by XPath
|
209
|
+
element.at_xpath(expression) # Find first node by XPath
|
210
|
+
----
|
211
|
+
|
212
|
+
=== Text object
|
213
|
+
|
214
|
+
Text nodes represent character data in the XML document.
|
295
215
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
)
|
216
|
+
[source,ruby]
|
217
|
+
----
|
218
|
+
# Creating text nodes
|
219
|
+
text = doc.create_text("content")
|
301
220
|
|
302
|
-
#
|
303
|
-
|
304
|
-
|
305
|
-
encoding: 'UTF-8',
|
306
|
-
standalone: 'yes'
|
307
|
-
)
|
221
|
+
# Text properties
|
222
|
+
text.content # Get text content
|
223
|
+
text.content = "new" # Set text content
|
308
224
|
|
309
|
-
#
|
310
|
-
|
311
|
-
indent: 0,
|
312
|
-
pretty: false,
|
313
|
-
xml_declaration: false
|
314
|
-
)
|
225
|
+
# Node type checking
|
226
|
+
text.text? # Returns true
|
315
227
|
|
316
|
-
#
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
xml_declaration: true
|
321
|
-
)
|
228
|
+
# Structure
|
229
|
+
text.parent # Get parent node
|
230
|
+
text.remove # Remove from document
|
231
|
+
text.replace(node) # Replace with another node
|
322
232
|
----
|
323
233
|
|
324
|
-
|
234
|
+
=== CDATA object
|
325
235
|
|
326
|
-
|
236
|
+
CDATA sections contain text that should not be parsed as markup.
|
327
237
|
|
328
238
|
[source,ruby]
|
329
239
|
----
|
330
|
-
#
|
331
|
-
|
332
|
-
begin
|
333
|
-
# Process document
|
334
|
-
result = process_document(doc)
|
335
|
-
ensure
|
336
|
-
# Clear references
|
337
|
-
doc = nil
|
338
|
-
GC.start
|
339
|
-
end
|
240
|
+
# Creating CDATA sections
|
241
|
+
cdata = doc.create_cdata("<raw>content</raw>")
|
340
242
|
|
341
|
-
#
|
342
|
-
|
343
|
-
|
344
|
-
process_node(node)
|
345
|
-
# Clear reference
|
346
|
-
node = nil
|
347
|
-
end
|
243
|
+
# CDATA properties
|
244
|
+
cdata.content # Get CDATA content
|
245
|
+
cdata.content = "new" # Set CDATA content
|
348
246
|
|
349
|
-
#
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
nodeset = nil
|
357
|
-
GC.start
|
358
|
-
end
|
247
|
+
# Node type checking
|
248
|
+
cdata.cdata? # Returns true
|
249
|
+
|
250
|
+
# Structure
|
251
|
+
cdata.parent # Get parent node
|
252
|
+
cdata.remove # Remove from document
|
253
|
+
cdata.replace(node) # Replace with another node
|
359
254
|
----
|
360
255
|
|
361
|
-
===
|
256
|
+
=== Comment object
|
257
|
+
|
258
|
+
Comments contain human-readable notes in the XML document.
|
362
259
|
|
363
260
|
[source,ruby]
|
364
261
|
----
|
365
|
-
#
|
366
|
-
|
367
|
-
# Use native CSS selectors
|
368
|
-
nodes = doc.native.css('complex > selector')
|
369
|
-
nodes.each do |native_node|
|
370
|
-
node = Moxml::Node.wrap(native_node)
|
371
|
-
# Process node
|
372
|
-
end
|
373
|
-
|
374
|
-
# Use native XPath
|
375
|
-
results = doc.native.xpath('//complex/xpath/expression')
|
376
|
-
end
|
262
|
+
# Creating comments
|
263
|
+
comment = doc.create_comment("Note")
|
377
264
|
|
378
|
-
#
|
379
|
-
|
380
|
-
|
381
|
-
doc = Moxml::Document.parse(xml, {
|
382
|
-
mode: :generic,
|
383
|
-
effort: :tolerant,
|
384
|
-
smart: true
|
385
|
-
})
|
386
|
-
|
387
|
-
# Direct element creation
|
388
|
-
element = Ox::Element.new('name')
|
389
|
-
wrapped = Moxml::Element.new(element)
|
390
|
-
end
|
265
|
+
# Comment properties
|
266
|
+
comment.content # Get comment content
|
267
|
+
comment.content = "new" # Set comment content
|
391
268
|
|
392
|
-
#
|
393
|
-
|
394
|
-
# Use native parsing features
|
395
|
-
doc = Moxml::Document.parse(xml, {
|
396
|
-
encoding: 'UTF-8',
|
397
|
-
strict: true
|
398
|
-
})
|
269
|
+
# Node type checking
|
270
|
+
comment.comment? # Returns true
|
399
271
|
|
400
|
-
|
401
|
-
|
402
|
-
|
272
|
+
# Structure
|
273
|
+
comment.parent # Get parent node
|
274
|
+
comment.remove # Remove from document
|
275
|
+
comment.replace(node) # Replace with another node
|
403
276
|
----
|
404
277
|
|
405
|
-
===
|
278
|
+
=== Processing instruction object
|
279
|
+
|
280
|
+
Processing instructions provide instructions to applications processing the XML.
|
406
281
|
|
407
282
|
[source,ruby]
|
408
283
|
----
|
409
|
-
#
|
410
|
-
|
284
|
+
# Creating processing instructions
|
285
|
+
pi = doc.create_processing_instruction("xml-stylesheet",
|
286
|
+
'type="text/xsl" href="style.xsl"')
|
411
287
|
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
288
|
+
# PI properties
|
289
|
+
pi.target # Get PI target
|
290
|
+
pi.target = "new" # Set PI target
|
291
|
+
pi.content # Get PI content
|
292
|
+
pi.content = "new" # Set PI content
|
416
293
|
|
417
|
-
|
418
|
-
|
419
|
-
doc = Moxml::Document.parse(xml_string)
|
420
|
-
# Process document
|
421
|
-
result = doc.to_xml
|
422
|
-
doc = nil
|
423
|
-
result
|
424
|
-
end
|
425
|
-
end
|
426
|
-
end
|
294
|
+
# Node type checking
|
295
|
+
pi.processing_instruction? # Returns true
|
427
296
|
|
428
|
-
#
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
doc = Moxml::Document.parse(xml)
|
433
|
-
# Process document
|
434
|
-
doc = nil
|
435
|
-
end
|
436
|
-
end
|
437
|
-
threads.each(&:join)
|
438
|
-
end
|
439
|
-
|
440
|
-
# Thread-local document storage
|
441
|
-
Thread.new do
|
442
|
-
Thread.current[:document] = Moxml::Document.new
|
443
|
-
# Process document
|
444
|
-
ensure
|
445
|
-
Thread.current[:document] = nil
|
446
|
-
end
|
297
|
+
# Structure
|
298
|
+
pi.parent # Get parent node
|
299
|
+
pi.remove # Remove from document
|
300
|
+
pi.replace(node) # Replace with another node
|
447
301
|
----
|
448
302
|
|
449
|
-
|
303
|
+
=== Attribute object
|
450
304
|
|
451
|
-
|
452
|
-
|
453
|
-
==== Parsing errors
|
305
|
+
Attributes represent name-value pairs on elements.
|
454
306
|
|
455
307
|
[source,ruby]
|
456
308
|
----
|
457
|
-
#
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
# Attempt recovery
|
463
|
-
xml_string = cleanup_xml(xml_string)
|
464
|
-
retry
|
465
|
-
end
|
309
|
+
# Attribute properties
|
310
|
+
attr.name # Get attribute name
|
311
|
+
attr.name = "new" # Set attribute name
|
312
|
+
attr.value # Get attribute value
|
313
|
+
attr.value = "new" # Set attribute value
|
466
314
|
|
467
|
-
#
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
detected_encoding = detect_encoding(xml_string)
|
474
|
-
retry if detected_encoding
|
475
|
-
end
|
476
|
-
raise
|
477
|
-
end
|
315
|
+
# Namespace handling
|
316
|
+
attr.namespace # Get attribute's namespace
|
317
|
+
attr.namespace = ns # Set attribute's namespace
|
318
|
+
|
319
|
+
# Node type checking
|
320
|
+
attr.attribute? # Returns true
|
478
321
|
----
|
479
322
|
|
480
|
-
|
323
|
+
=== Namespace object
|
324
|
+
|
325
|
+
Namespaces define XML namespaces used in the document.
|
481
326
|
|
482
327
|
[source,ruby]
|
483
328
|
----
|
484
|
-
#
|
485
|
-
|
486
|
-
|
487
|
-
File.open(path) do |file|
|
488
|
-
doc = Moxml::Document.parse(file)
|
489
|
-
doc.xpath('//chunk').each do |chunk|
|
490
|
-
process_chunk(chunk)
|
491
|
-
chunk = nil
|
492
|
-
end
|
493
|
-
doc = nil
|
494
|
-
end
|
495
|
-
GC.start
|
496
|
-
end
|
329
|
+
# Namespace properties
|
330
|
+
ns.prefix # Get namespace prefix
|
331
|
+
ns.uri # Get namespace URI
|
497
332
|
|
498
|
-
#
|
499
|
-
|
333
|
+
# Formatting
|
334
|
+
ns.to_s # Format as xmlns declaration
|
500
335
|
|
501
|
-
|
502
|
-
|
503
|
-
|
336
|
+
# Node type checking
|
337
|
+
ns.namespace? # Returns true
|
338
|
+
----
|
504
339
|
|
505
|
-
|
506
|
-
result = process_document(doc)
|
507
|
-
doc = nil
|
508
|
-
GC.start
|
340
|
+
=== Node traversal and inspection
|
509
341
|
|
510
|
-
|
511
|
-
puts "Memory usage: #{final_memory - initial_memory}MB"
|
342
|
+
Each node type provides methods for traversing the document structure:
|
512
343
|
|
513
|
-
|
514
|
-
end
|
344
|
+
[source,ruby]
|
515
345
|
----
|
346
|
+
node.parent # Get parent node
|
347
|
+
node.children # Get child nodes
|
348
|
+
node.next_sibling # Get next sibling
|
349
|
+
node.previous_sibling # Get previous sibling
|
350
|
+
node.ancestors # Get all ancestor nodes
|
351
|
+
node.descendants # Get all descendant nodes
|
516
352
|
|
517
|
-
|
353
|
+
# Type checking
|
354
|
+
node.element? # Is it an element?
|
355
|
+
node.text? # Is it a text node?
|
356
|
+
node.cdata? # Is it a CDATA section?
|
357
|
+
node.comment? # Is it a comment?
|
358
|
+
node.processing_instruction? # Is it a PI?
|
359
|
+
node.attribute? # Is it an attribute?
|
360
|
+
node.namespace? # Is it a namespace?
|
518
361
|
|
519
|
-
|
362
|
+
# Node information
|
363
|
+
node.document # Get owning document
|
364
|
+
node.path # Get XPath to node
|
365
|
+
node.line_number # Get source line number (if available)
|
520
366
|
----
|
521
|
-
# Handle backend limitations
|
522
|
-
def safe_xpath(doc, xpath)
|
523
|
-
case Moxml.config.backend
|
524
|
-
when :nokogiri
|
525
|
-
doc.xpath(xpath)
|
526
|
-
when :ox
|
527
|
-
# Ox has limited XPath support
|
528
|
-
fallback_xpath_search(doc, xpath)
|
529
|
-
when :oga
|
530
|
-
# Handle Oga-specific XPath syntax
|
531
|
-
modified_xpath = adjust_xpath_for_oga(xpath)
|
532
|
-
doc.xpath(modified_xpath)
|
533
|
-
end
|
534
|
-
end
|
535
367
|
|
536
|
-
|
537
|
-
def with_backend(backend)
|
538
|
-
original_backend = Moxml.config.backend
|
539
|
-
Moxml.config.backend = backend
|
540
|
-
yield
|
541
|
-
ensure
|
542
|
-
Moxml.config.backend = original_backend
|
543
|
-
end
|
544
|
-
----
|
368
|
+
== Advanced features
|
545
369
|
|
546
|
-
===
|
370
|
+
=== XPath querying and node mapping
|
547
371
|
|
548
|
-
|
372
|
+
Moxml provides efficient XPath querying by leveraging the native XML library's
|
373
|
+
implementation while maintaining consistent node mapping:
|
549
374
|
|
550
375
|
[source,ruby]
|
551
376
|
----
|
552
|
-
#
|
553
|
-
|
554
|
-
|
555
|
-
root = doc.create_element('root')
|
556
|
-
doc.add_child(root)
|
557
|
-
|
558
|
-
# Pre-allocate elements
|
559
|
-
elements = Array.new(1000) do |i|
|
560
|
-
elem = doc.create_element('item')
|
561
|
-
elem['id'] = i.to_s
|
562
|
-
elem
|
563
|
-
end
|
377
|
+
# Find all book elements
|
378
|
+
books = doc.xpath('//book')
|
379
|
+
# Returns Moxml::Element objects mapped to native nodes
|
564
380
|
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
381
|
+
# Find with namespaces
|
382
|
+
titles = doc.xpath('//dc:title',
|
383
|
+
'dc' => 'http://purl.org/dc/elements/1.1/')
|
384
|
+
|
385
|
+
# Find first matching node
|
386
|
+
first_book = doc.at_xpath('//book')
|
569
387
|
|
570
|
-
|
388
|
+
# Chain queries
|
389
|
+
doc.xpath('//book').each do |book|
|
390
|
+
# Each book is a mapped Moxml::Element
|
391
|
+
title = book.at_xpath('.//title')
|
392
|
+
puts "#{book['id']}: #{title.text}"
|
571
393
|
end
|
394
|
+
----
|
572
395
|
|
573
|
-
|
574
|
-
def process_large_xml(xml_string)
|
575
|
-
result = []
|
576
|
-
doc = Moxml::Document.parse(xml_string)
|
396
|
+
=== Namespace handling
|
577
397
|
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
end
|
398
|
+
[source,ruby]
|
399
|
+
----
|
400
|
+
# Add namespace to element
|
401
|
+
element.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
|
583
402
|
|
584
|
-
|
585
|
-
|
403
|
+
# Create element in namespace
|
404
|
+
title = doc.create_element('dc:title')
|
405
|
+
title.text = 'Document Title'
|
586
406
|
|
587
|
-
|
588
|
-
|
407
|
+
# Query with namespaces
|
408
|
+
doc.xpath('//dc:title',
|
409
|
+
'dc' => 'http://purl.org/dc/elements/1.1/')
|
589
410
|
----
|
590
411
|
|
591
|
-
|
412
|
+
=== Accessing native implementation
|
413
|
+
|
414
|
+
While not typically needed, you can access the underlying XML library's nodes:
|
592
415
|
|
593
416
|
[source,ruby]
|
594
417
|
----
|
595
|
-
#
|
596
|
-
|
597
|
-
# Cache frequently used nodes
|
598
|
-
@header_nodes ||= doc.xpath('//header').to_a
|
418
|
+
# Get native node
|
419
|
+
native_node = element.native
|
599
420
|
|
600
|
-
|
601
|
-
|
421
|
+
# Get adapter being used
|
422
|
+
adapter = element.context.config.adapter
|
602
423
|
|
603
|
-
|
604
|
-
|
605
|
-
|
424
|
+
# Create from native node
|
425
|
+
element = Moxml::Element.new(native_node, context)
|
426
|
+
----
|
606
427
|
|
607
|
-
|
608
|
-
def efficient_attribute_handling(element)
|
609
|
-
# Cache attribute values
|
610
|
-
@cached_attrs ||= element.attributes
|
428
|
+
== Error handling
|
611
429
|
|
612
|
-
|
613
|
-
|
430
|
+
Moxml provides specific error classes for different types of errors that may
|
431
|
+
occur during XML processing:
|
614
432
|
|
615
|
-
|
616
|
-
|
617
|
-
|
433
|
+
[source,ruby]
|
434
|
+
----
|
435
|
+
begin
|
436
|
+
doc = context.parse(xml_string)
|
437
|
+
rescue Moxml::ParseError => e
|
438
|
+
# Handles XML parsing errors
|
439
|
+
puts "Parse error at line #{e.line}, column #{e.column}"
|
440
|
+
puts "Message: #{e.message}"
|
441
|
+
rescue Moxml::ValidationError => e
|
442
|
+
# Handles XML validation errors
|
443
|
+
puts "Validation error: #{e.message}"
|
444
|
+
rescue Moxml::XPathError => e
|
445
|
+
# Handles XPath expression errors
|
446
|
+
puts "XPath error: #{e.message}"
|
447
|
+
rescue Moxml::Error => e
|
448
|
+
# Handles other Moxml-specific errors
|
449
|
+
puts "Error: #{e.message}"
|
618
450
|
end
|
619
451
|
----
|
620
452
|
|
621
|
-
|
453
|
+
== Configuration
|
454
|
+
|
455
|
+
Moxml can be configured globally or per instance:
|
622
456
|
|
623
457
|
[source,ruby]
|
624
458
|
----
|
625
|
-
#
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
indent: 2,
|
637
|
-
pretty: true,
|
638
|
-
xml_declaration: true
|
639
|
-
)
|
640
|
-
|
641
|
-
# Stream large documents
|
642
|
-
File.open('large.xml', 'w') do |file|
|
643
|
-
doc.write_to(file, indent: 2)
|
644
|
-
end
|
459
|
+
# Global configuration
|
460
|
+
Moxml.configure do |config|
|
461
|
+
config.default_adapter = :nokogiri
|
462
|
+
config.strict = true
|
463
|
+
config.encoding = 'UTF-8'
|
464
|
+
end
|
465
|
+
|
466
|
+
# Instance configuration
|
467
|
+
moxml = Moxml.new do |config|
|
468
|
+
config.adapter = :ox
|
469
|
+
config.strict = false
|
645
470
|
end
|
646
471
|
----
|
647
472
|
|
648
|
-
|
473
|
+
== Thread safety
|
649
474
|
|
650
|
-
|
475
|
+
Moxml is thread-safe when used properly. Each instance maintains its own state
|
476
|
+
and can be used safely in concurrent operations:
|
651
477
|
|
652
478
|
[source,ruby]
|
653
479
|
----
|
654
|
-
|
655
|
-
def
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
if node.respond_to?(:attributes)
|
660
|
-
node.attributes.each do |name, attr|
|
661
|
-
puts "#{indent} @#{name}=#{attr.value.inspect}"
|
662
|
-
end
|
480
|
+
class XmlProcessor
|
481
|
+
def initialize
|
482
|
+
@mutex = Mutex.new
|
483
|
+
@context = Moxml.new
|
663
484
|
end
|
664
485
|
|
665
|
-
|
666
|
-
|
486
|
+
def process(xml)
|
487
|
+
@mutex.synchronize do
|
488
|
+
doc = @context.parse(xml)
|
489
|
+
# Modify document
|
490
|
+
doc.to_xml
|
491
|
+
end
|
667
492
|
end
|
668
493
|
end
|
494
|
+
----
|
669
495
|
|
670
|
-
|
671
|
-
def debug_node_operations
|
672
|
-
nodes_created = 0
|
673
|
-
nodes_removed = 0
|
496
|
+
== Performance considerations
|
674
497
|
|
675
|
-
|
676
|
-
ensure
|
677
|
-
puts "Nodes created: #{nodes_created}"
|
678
|
-
puts "Nodes removed: #{nodes_removed}"
|
679
|
-
end
|
680
|
-
----
|
498
|
+
=== Memory management
|
681
499
|
|
682
|
-
|
500
|
+
Moxml maintains a node registry to ensure consistent object mapping:
|
683
501
|
|
684
502
|
[source,ruby]
|
685
503
|
----
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
element = doc.create_element('test')
|
692
|
-
doc.add_child(element)
|
504
|
+
doc = context.parse(large_xml)
|
505
|
+
# Process document
|
506
|
+
doc = nil # Allow garbage collection of document and registry
|
507
|
+
GC.start # Force garbage collection if needed
|
508
|
+
----
|
693
509
|
|
694
|
-
|
695
|
-
raise "Node creation failed" unless doc.root
|
696
|
-
raise "Node type wrong" unless doc.root.is_a?(Moxml::Element)
|
510
|
+
=== Efficient querying
|
697
511
|
|
698
|
-
|
699
|
-
xml = doc.to_xml
|
700
|
-
raise "Serialization failed" unless xml.include?('<test/>')
|
512
|
+
Use specific XPath expressions for better performance:
|
701
513
|
|
702
|
-
|
703
|
-
rescue => e
|
704
|
-
puts "Backend verification failed: #{e.message}"
|
705
|
-
end
|
514
|
+
[source,ruby]
|
706
515
|
----
|
516
|
+
# More efficient - specific path
|
517
|
+
doc.xpath('//book/title')
|
707
518
|
|
708
|
-
|
519
|
+
# Less efficient - requires full document scan
|
520
|
+
doc.xpath('//title')
|
709
521
|
|
710
|
-
|
522
|
+
# Most efficient - direct child access
|
523
|
+
root.xpath('./title')
|
524
|
+
----
|
711
525
|
|
712
|
-
|
713
|
-
* `Moxml::ParseError` - XML parsing errors
|
714
|
-
* `Moxml::ArgumentError` - Invalid argument errors
|
526
|
+
== Best practices
|
715
527
|
|
716
|
-
===
|
528
|
+
=== Document creation
|
717
529
|
|
718
530
|
[source,ruby]
|
719
531
|
----
|
720
|
-
#
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
532
|
+
# Preferred - using builder pattern
|
533
|
+
doc = Moxml.new.build do
|
534
|
+
declaration version: "1.0", encoding: "UTF-8"
|
535
|
+
element 'root' do
|
536
|
+
element 'child' do
|
537
|
+
text 'content'
|
538
|
+
end
|
539
|
+
end
|
727
540
|
end
|
728
541
|
|
729
|
-
#
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
end
|
542
|
+
# Alternative - direct manipulation
|
543
|
+
doc = Moxml.new.create_document
|
544
|
+
doc.add_declaration(version: "1.0", encoding: "UTF-8")
|
545
|
+
root = doc.create_element('root')
|
546
|
+
doc.add_child(root)
|
547
|
+
----
|
736
548
|
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
549
|
+
=== Node manipulation
|
550
|
+
|
551
|
+
[source,ruby]
|
552
|
+
----
|
553
|
+
# Preferred - chainable operations
|
554
|
+
element
|
555
|
+
.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
|
556
|
+
.add_child(doc.create_text('content'))
|
557
|
+
|
558
|
+
# Preferred - clear node type checking
|
559
|
+
if node.element?
|
560
|
+
node.add_child(doc.create_text('content'))
|
749
561
|
end
|
750
562
|
----
|
751
563
|
|
752
564
|
== Contributing
|
753
565
|
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
566
|
+
1. Fork the repository
|
567
|
+
2. Create your feature branch (`git checkout -b feature/my-new-feature`)
|
568
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
569
|
+
4. Push to the branch (`git push origin feature/my-new-feature`)
|
570
|
+
5. Create a new Pull Request
|
758
571
|
|
759
|
-
|
760
|
-
* Add tests for new features
|
761
|
-
* Update documentation
|
762
|
-
* Ensure backwards compatibility
|
763
|
-
* Consider performance implications
|
764
|
-
* Test with all supported backends
|
572
|
+
== License
|
765
573
|
|
766
|
-
|
574
|
+
Copyright (c) 2024 Ribose Inc.
|
767
575
|
|
768
|
-
|
576
|
+
This project is licensed under the BSD-2-Clause License. See the LICENSE file for details.
|
769
577
|
|
770
|
-
The gem is available as open source under the terms of the BSD-2-Clause License.
|