moxml 0.1.0 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +15 -0
- data/.github/workflows/release.yml +23 -0
- data/.gitignore +3 -0
- data/.rubocop.yml +2 -0
- data/.rubocop_todo.yml +65 -0
- data/.ruby-version +1 -0
- data/Gemfile +10 -3
- data/README.adoc +401 -594
- data/lib/moxml/adapter/base.rb +102 -0
- data/lib/moxml/adapter/customized_oga/xml_declaration.rb +18 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +104 -0
- data/lib/moxml/adapter/nokogiri.rb +319 -0
- data/lib/moxml/adapter/oga.rb +318 -0
- data/lib/moxml/adapter/ox.rb +325 -0
- data/lib/moxml/adapter.rb +26 -170
- data/lib/moxml/attribute.rb +47 -14
- data/lib/moxml/builder.rb +64 -0
- data/lib/moxml/cdata.rb +4 -26
- data/lib/moxml/comment.rb +6 -22
- data/lib/moxml/config.rb +39 -15
- data/lib/moxml/context.rb +29 -0
- data/lib/moxml/declaration.rb +16 -26
- data/lib/moxml/doctype.rb +9 -0
- data/lib/moxml/document.rb +51 -63
- data/lib/moxml/document_builder.rb +87 -0
- data/lib/moxml/element.rb +63 -97
- data/lib/moxml/error.rb +20 -0
- data/lib/moxml/namespace.rb +12 -37
- data/lib/moxml/node.rb +78 -58
- data/lib/moxml/node_set.rb +19 -222
- data/lib/moxml/processing_instruction.rb +6 -25
- data/lib/moxml/text.rb +4 -26
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +55 -0
- data/lib/moxml/xml_utils.rb +80 -0
- data/lib/moxml.rb +33 -33
- data/moxml.gemspec +1 -1
- data/spec/moxml/adapter/nokogiri_spec.rb +14 -0
- data/spec/moxml/adapter/oga_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +49 -0
- data/spec/moxml/all_with_adapters_spec.rb +46 -0
- data/spec/moxml/config_spec.rb +55 -0
- data/spec/moxml/error_spec.rb +71 -0
- data/spec/moxml/examples/adapter_spec.rb +27 -0
- data/spec/moxml_spec.rb +50 -0
- data/spec/spec_helper.rb +32 -0
- data/spec/support/shared_examples/attribute.rb +165 -0
- data/spec/support/shared_examples/builder.rb +25 -0
- data/spec/support/shared_examples/cdata.rb +70 -0
- data/spec/support/shared_examples/comment.rb +65 -0
- data/spec/support/shared_examples/context.rb +35 -0
- data/spec/support/shared_examples/declaration.rb +93 -0
- data/spec/support/shared_examples/doctype.rb +25 -0
- data/spec/support/shared_examples/document.rb +110 -0
- data/spec/support/shared_examples/document_builder.rb +43 -0
- data/spec/support/shared_examples/edge_cases.rb +185 -0
- data/spec/support/shared_examples/element.rb +130 -0
- data/spec/support/shared_examples/examples/attribute.rb +42 -0
- data/spec/support/shared_examples/examples/basic_usage.rb +67 -0
- data/spec/support/shared_examples/examples/memory.rb +54 -0
- data/spec/support/shared_examples/examples/namespace.rb +65 -0
- data/spec/support/shared_examples/examples/readme_examples.rb +100 -0
- data/spec/support/shared_examples/examples/thread_safety.rb +43 -0
- data/spec/support/shared_examples/examples/xpath.rb +39 -0
- data/spec/support/shared_examples/integration.rb +135 -0
- data/spec/support/shared_examples/namespace.rb +96 -0
- data/spec/support/shared_examples/node.rb +110 -0
- data/spec/support/shared_examples/node_set.rb +90 -0
- data/spec/support/shared_examples/processing_instruction.rb +88 -0
- data/spec/support/shared_examples/text.rb +66 -0
- data/spec/support/shared_examples/xml_adapter.rb +191 -0
- data/spec/support/xml_matchers.rb +27 -0
- metadata +55 -6
- data/.github/workflows/main.yml +0 -27
- data/lib/moxml/error_handler.rb +0 -77
- data/lib/moxml/errors.rb +0 -169
data/README.adoc
CHANGED
@@ -1,770 +1,577 @@
|
|
1
|
-
= Moxml:
|
1
|
+
= Moxml: Modern XML processing for Ruby
|
2
|
+
:toc: macro
|
3
|
+
:toclevels: 3
|
4
|
+
:toc-title: Contents
|
5
|
+
:source-highlighter: highlight.js
|
2
6
|
|
3
|
-
|
7
|
+
image:https://github.com/lutaml/moxml/workflows/rake/badge.svg["Build Status", link="https://github.com/lutaml/moxml/actions?workflow=rake"]
|
4
8
|
|
5
|
-
|
6
|
-
interface for working with XML documents, regardless of the underlying XML
|
7
|
-
library.
|
9
|
+
toc::[]
|
8
10
|
|
9
|
-
==
|
11
|
+
== Introduction and purpose
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
13
|
+
Moxml provides a unified, modern XML processing interface for Ruby applications.
|
14
|
+
It offers a consistent API that abstracts away the underlying XML implementation
|
15
|
+
details while maintaining high performance through efficient node mapping and
|
16
|
+
native XPath querying.
|
17
|
+
|
18
|
+
Key features:
|
15
19
|
|
16
|
-
|
20
|
+
* Intuitive, Ruby-idiomatic API for XML manipulation
|
21
|
+
* Consistent interface across different XML libraries
|
22
|
+
* Efficient node mapping for XPath queries
|
23
|
+
* Support for all XML node types and features
|
24
|
+
* Easy switching between XML processing engines
|
25
|
+
* Clean separation between interface and implementation
|
17
26
|
|
18
|
-
|
27
|
+
== Getting started
|
19
28
|
|
20
|
-
|
29
|
+
Install the gem and at least one supported XML library:
|
21
30
|
|
22
31
|
[source,ruby]
|
23
32
|
----
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
config.backend = :nokogiri # or :ox, :oga
|
28
|
-
end
|
33
|
+
# In your Gemfile
|
34
|
+
gem 'moxml'
|
35
|
+
gem 'nokogiri' # Or 'ox' or 'oga'
|
29
36
|
----
|
30
37
|
|
31
|
-
===
|
38
|
+
=== Basic document creation
|
32
39
|
|
33
40
|
[source,ruby]
|
34
41
|
----
|
35
|
-
|
36
|
-
doc = Moxml::Document.new
|
37
|
-
|
38
|
-
# Parse from string
|
39
|
-
doc = Moxml::Document.parse("<root><child>content</child></root>")
|
42
|
+
require 'moxml'
|
40
43
|
|
41
|
-
#
|
42
|
-
doc = Moxml
|
43
|
-
----
|
44
|
+
# Create a new XML document
|
45
|
+
doc = Moxml.new.create_document
|
44
46
|
|
45
|
-
|
47
|
+
# Add XML declaration
|
48
|
+
doc.add_declaration(version: "1.0", encoding: "UTF-8")
|
46
49
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
doc = Moxml::Document.new
|
51
|
-
root = doc.create_element('root')
|
50
|
+
# Create root element with namespace
|
51
|
+
root = doc.create_element('book')
|
52
|
+
root.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
|
52
53
|
doc.add_child(root)
|
53
54
|
|
54
|
-
#
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
doc = Moxml::Document.parse(xml_string, encoding: 'UTF-8')
|
55
|
+
# Add content
|
56
|
+
title = doc.create_element('dc:title')
|
57
|
+
title.text = 'XML Processing with Ruby'
|
58
|
+
root.add_child(title)
|
59
59
|
|
60
|
-
#
|
61
|
-
doc
|
62
|
-
encoding: 'UTF-8',
|
63
|
-
strict: true
|
64
|
-
})
|
60
|
+
# Output formatted XML
|
61
|
+
puts doc.to_xml(indent: 2)
|
65
62
|
----
|
66
63
|
|
67
|
-
|
64
|
+
== Working with documents
|
68
65
|
|
69
|
-
|
70
|
-
----
|
71
|
-
# Working with namespaces
|
72
|
-
doc = Moxml::Document.new
|
73
|
-
root = doc.create_element('root')
|
74
|
-
root['xmlns:custom'] = 'http://example.com/ns'
|
75
|
-
child = doc.create_element('custom:element')
|
76
|
-
root.add_child(child)
|
77
|
-
|
78
|
-
# Creating structured data
|
79
|
-
person = doc.create_element('person')
|
80
|
-
person['id'] = '123'
|
81
|
-
name = doc.create_element('name')
|
82
|
-
name.add_child(doc.create_text('John Doe'))
|
83
|
-
person.add_child(name)
|
84
|
-
|
85
|
-
# Working with attributes
|
86
|
-
element = doc.create_element('div')
|
87
|
-
element['class'] = 'container'
|
88
|
-
element['data-id'] = '123'
|
89
|
-
element['style'] = 'color: blue'
|
90
|
-
|
91
|
-
# Handling special characters
|
92
|
-
text = doc.create_text('Special chars: < > & " \'')
|
93
|
-
cdata = doc.create_cdata('<script>alert("Hello!");</script>')
|
94
|
-
|
95
|
-
# Processing instructions
|
96
|
-
pi = doc.create_processing_instruction('xml-stylesheet',
|
97
|
-
'type="text/xsl" href="style.xsl"')
|
98
|
-
doc.add_child(pi)
|
99
|
-
----
|
66
|
+
=== Using the builder pattern
|
100
67
|
|
101
|
-
|
68
|
+
The builder pattern provides a clean DSL for creating XML documents:
|
102
69
|
|
103
70
|
[source,ruby]
|
104
71
|
----
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
# Add attributes
|
109
|
-
element['class'] = 'content'
|
110
|
-
|
111
|
-
# Access attributes
|
112
|
-
class_attr = element['class']
|
113
|
-
|
114
|
-
# Add child elements
|
115
|
-
child = element.create_element('child')
|
116
|
-
element.add_child(child)
|
72
|
+
doc = Moxml.new.build do
|
73
|
+
declaration version: "1.0", encoding: "UTF-8"
|
117
74
|
|
118
|
-
|
119
|
-
|
75
|
+
element 'library', xmlns: 'http://example.org/library' do
|
76
|
+
element 'book' do
|
77
|
+
element 'title' do
|
78
|
+
text 'Ruby Programming'
|
79
|
+
end
|
120
80
|
|
121
|
-
|
122
|
-
text
|
123
|
-
|
81
|
+
element 'author' do
|
82
|
+
text 'Jane Smith'
|
83
|
+
end
|
124
84
|
|
125
|
-
|
126
|
-
element
|
127
|
-
.add_child(doc.create_element('child'))
|
128
|
-
.add_child(doc.create_text('content'))
|
129
|
-
['class'] = 'new-class'
|
85
|
+
comment 'Publication details'
|
86
|
+
element 'published', year: '2024'
|
130
87
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
.add_child(doc.create_text('Hello'))
|
136
|
-
div.add_child(doc.create_element('br'))
|
137
|
-
div.add_child(doc.create_text('World'))
|
88
|
+
cdata '<custom>metadata</custom>'
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
138
92
|
----
|
139
93
|
|
140
|
-
===
|
94
|
+
=== Direct document manipulation
|
141
95
|
|
142
96
|
[source,ruby]
|
143
97
|
----
|
144
|
-
|
145
|
-
plain_text = Moxml::Text.new("Simple text")
|
146
|
-
multiline_text = Moxml::Text.new("Line 1\nLine 2")
|
147
|
-
special_chars = Moxml::Text.new("Special: & < > \" '")
|
148
|
-
|
149
|
-
# CDATA sections for different content types
|
150
|
-
script_cdata = Moxml::Cdata.new("function() { alert('Hello!'); }")
|
151
|
-
xml_cdata = Moxml::Cdata.new("<data><item>value</item></data>")
|
152
|
-
mixed_cdata = Moxml::Cdata.new("Text with ]]> characters")
|
153
|
-
|
154
|
-
# Comments for documentation
|
155
|
-
todo_comment = Moxml::Comment.new("TODO: Add validation")
|
156
|
-
section_comment = Moxml::Comment.new("----- Section Break -----")
|
157
|
-
debug_comment = Moxml::Comment.new("DEBUG: Remove in production")
|
158
|
-
|
159
|
-
# Processing instructions for various uses
|
160
|
-
style_pi = Moxml::ProcessingInstruction.new(
|
161
|
-
"xml-stylesheet",
|
162
|
-
'type="text/css" href="style.css"'
|
163
|
-
)
|
164
|
-
php_pi = Moxml::ProcessingInstruction.new(
|
165
|
-
"php",
|
166
|
-
'echo "<?php echo $var; ?>>";'
|
167
|
-
)
|
168
|
-
custom_pi = Moxml::ProcessingInstruction.new(
|
169
|
-
"custom-processor",
|
170
|
-
'param1="value1" param2="value2"'
|
171
|
-
)
|
172
|
-
----
|
173
|
-
|
174
|
-
=== Element manipulation examples
|
98
|
+
doc = Moxml.new.create_document
|
175
99
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
root = doc.create_element('
|
100
|
+
# Add declaration
|
101
|
+
doc.add_declaration(version: "1.0", encoding: "UTF-8")
|
102
|
+
|
103
|
+
# Create root with namespace
|
104
|
+
root = doc.create_element('library')
|
105
|
+
root.add_namespace(nil, 'http://example.org/library')
|
181
106
|
doc.add_child(root)
|
182
107
|
|
183
|
-
#
|
184
|
-
|
185
|
-
|
108
|
+
# Add elements with attributes
|
109
|
+
book = doc.create_element('book')
|
110
|
+
book['id'] = 'b1'
|
111
|
+
root.add_child(book)
|
186
112
|
|
113
|
+
# Add mixed content
|
114
|
+
book.add_child(doc.create_comment('Book details'))
|
187
115
|
title = doc.create_element('title')
|
188
|
-
title.
|
189
|
-
|
190
|
-
|
191
|
-
meta = doc.create_element('meta')
|
192
|
-
meta['charset'] = 'UTF-8'
|
193
|
-
head.add_child(meta)
|
194
|
-
|
195
|
-
# Create body section
|
196
|
-
body = doc.create_element('body')
|
197
|
-
root.add_child(body)
|
198
|
-
|
199
|
-
div = doc.create_element('div')
|
200
|
-
div['class'] = 'container'
|
201
|
-
body.add_child(div)
|
202
|
-
|
203
|
-
# Add multiple paragraphs
|
204
|
-
3.times do |i|
|
205
|
-
p = doc.create_element('p')
|
206
|
-
p.add_child(doc.create_text("Paragraph #{i + 1}"))
|
207
|
-
div.add_child(p)
|
208
|
-
end
|
209
|
-
|
210
|
-
# Working with lists
|
211
|
-
ul = doc.create_element('ul')
|
212
|
-
div.add_child(ul)
|
213
|
-
|
214
|
-
['Item 1', 'Item 2', 'Item 3'].each do |text|
|
215
|
-
li = doc.create_element('li')
|
216
|
-
li.add_child(doc.create_text(text))
|
217
|
-
ul.add_child(li)
|
218
|
-
end
|
219
|
-
|
220
|
-
# Adding link element
|
221
|
-
a = doc.create_element('a')
|
222
|
-
a['href'] = 'https://example.com'
|
223
|
-
a.add_child(doc.create_text('Visit Example'))
|
224
|
-
div.add_child(a)
|
116
|
+
title.text = 'Ruby Programming'
|
117
|
+
book.add_child(title)
|
225
118
|
----
|
226
119
|
|
227
|
-
|
120
|
+
== XML objects and their methods
|
228
121
|
|
229
|
-
|
230
|
-
----
|
231
|
-
# Cloning nodes
|
232
|
-
original = doc.create_element('div')
|
233
|
-
original['id'] = 'original'
|
234
|
-
clone = original.clone
|
235
|
-
|
236
|
-
# Moving nodes
|
237
|
-
target = doc.create_element('target')
|
238
|
-
source = doc.create_element('source')
|
239
|
-
source.add_child(doc.create_text('Content'))
|
240
|
-
target.add_child(source)
|
241
|
-
|
242
|
-
# Replacing nodes
|
243
|
-
old_node = doc.at_xpath('//old')
|
244
|
-
new_node = doc.create_element('new')
|
245
|
-
old_node.replace(new_node)
|
246
|
-
|
247
|
-
# Inserting before/after
|
248
|
-
reference = doc.create_element('reference')
|
249
|
-
before = doc.create_element('before')
|
250
|
-
after = doc.create_element('after')
|
251
|
-
reference.add_previous_sibling(before)
|
252
|
-
reference.add_next_sibling(after)
|
253
|
-
|
254
|
-
# Conditional manipulation
|
255
|
-
element = doc.at_xpath('//conditional')
|
256
|
-
if element['flag'] == 'true'
|
257
|
-
element.add_child(doc.create_text('Flag is true'))
|
258
|
-
else
|
259
|
-
element.remove
|
260
|
-
end
|
261
|
-
----
|
122
|
+
=== Document object
|
262
123
|
|
263
|
-
|
124
|
+
The Document object represents an XML document and serves as the root container
|
125
|
+
for all XML nodes.
|
264
126
|
|
265
127
|
[source,ruby]
|
266
128
|
----
|
267
|
-
# Creating
|
268
|
-
doc = Moxml
|
269
|
-
|
270
|
-
root['xmlns'] = 'http://example.com/default'
|
271
|
-
root['xmlns:custom'] = 'http://example.com/custom'
|
272
|
-
doc.add_child(root)
|
129
|
+
# Creating a document
|
130
|
+
doc = Moxml.new.create_document
|
131
|
+
doc = Moxml.new.parse(xml_string)
|
273
132
|
|
274
|
-
#
|
275
|
-
|
276
|
-
|
133
|
+
# Document properties and methods
|
134
|
+
doc.encoding # Get document encoding
|
135
|
+
doc.encoding = "UTF-8" # Set document encoding
|
136
|
+
doc.version # Get XML version
|
137
|
+
doc.version = "1.1" # Set XML version
|
138
|
+
doc.standalone # Get standalone declaration
|
139
|
+
doc.standalone = "yes" # Set standalone declaration
|
277
140
|
|
278
|
-
|
279
|
-
root
|
141
|
+
# Document structure
|
142
|
+
doc.root # Get root element
|
143
|
+
doc.children # Get all top-level nodes
|
144
|
+
doc.add_child(node) # Add a child node
|
145
|
+
doc.remove_child(node) # Remove a child node
|
280
146
|
|
281
|
-
#
|
282
|
-
|
147
|
+
# Node creation methods
|
148
|
+
doc.create_element(name) # Create new element
|
149
|
+
doc.create_text(content) # Create text node
|
150
|
+
doc.create_cdata(content) # Create CDATA section
|
151
|
+
doc.create_comment(content) # Create comment
|
152
|
+
doc.create_processing_instruction(target, content) # Create PI
|
283
153
|
|
284
|
-
#
|
285
|
-
|
286
|
-
|
154
|
+
# Document querying
|
155
|
+
doc.xpath(expression) # Find nodes by XPath
|
156
|
+
doc.at_xpath(expression) # Find first node by XPath
|
157
|
+
|
158
|
+
# Serialization
|
159
|
+
doc.to_xml(options) # Convert to XML string
|
287
160
|
----
|
288
161
|
|
289
|
-
===
|
162
|
+
=== Element object
|
163
|
+
|
164
|
+
Elements are the primary structural components of an XML document, representing
|
165
|
+
tags with attributes and content.
|
290
166
|
|
291
167
|
[source,ruby]
|
292
168
|
----
|
293
|
-
#
|
294
|
-
|
169
|
+
# Element properties
|
170
|
+
element.name # Get element name
|
171
|
+
element.name = "new_name" # Set element name
|
172
|
+
element.text # Get text content
|
173
|
+
element.text = "content" # Set text content
|
174
|
+
element.inner_text # Get text content for current node only
|
175
|
+
element.inner_html # Get inner XML content
|
176
|
+
element.inner_html = xml # Set inner XML content
|
177
|
+
|
178
|
+
# Attributes
|
179
|
+
element[name] # Get attribute value
|
180
|
+
element[name] = value # Set attribute value
|
181
|
+
element.attributes # Get all attributes
|
182
|
+
element.remove_attribute(name) # Remove attribute
|
183
|
+
|
184
|
+
# Namespace handling
|
185
|
+
element.namespace # Get element's namespace
|
186
|
+
element.namespace = ns # Set element's namespace
|
187
|
+
element.add_namespace(prefix, uri) # Add new namespace
|
188
|
+
element.namespaces # Get all namespace definitions
|
189
|
+
|
190
|
+
# Node structure
|
191
|
+
element.parent # Get parent node
|
192
|
+
element.children # Get child nodes
|
193
|
+
element.add_child(node) # Add child node
|
194
|
+
element.remove_child(node) # Remove child node
|
195
|
+
element.add_previous_sibling(node) # Add sibling before
|
196
|
+
element.add_next_sibling(node) # Add sibling after
|
197
|
+
element.replace(node) # Replace with another node
|
198
|
+
element.remove # Remove from document
|
199
|
+
|
200
|
+
# Node type checking
|
201
|
+
element.element? # Returns true
|
202
|
+
element.text? # Returns false
|
203
|
+
element.cdata? # Returns false
|
204
|
+
element.comment? # Returns false
|
205
|
+
element.processing_instruction? # Returns false
|
206
|
+
|
207
|
+
# Node querying
|
208
|
+
element.xpath(expression) # Find nodes by XPath
|
209
|
+
element.at_xpath(expression) # Find first node by XPath
|
210
|
+
----
|
211
|
+
|
212
|
+
=== Text object
|
213
|
+
|
214
|
+
Text nodes represent character data in the XML document.
|
295
215
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
)
|
216
|
+
[source,ruby]
|
217
|
+
----
|
218
|
+
# Creating text nodes
|
219
|
+
text = doc.create_text("content")
|
301
220
|
|
302
|
-
#
|
303
|
-
|
304
|
-
|
305
|
-
encoding: 'UTF-8',
|
306
|
-
standalone: 'yes'
|
307
|
-
)
|
221
|
+
# Text properties
|
222
|
+
text.content # Get text content
|
223
|
+
text.content = "new" # Set text content
|
308
224
|
|
309
|
-
#
|
310
|
-
|
311
|
-
indent: 0,
|
312
|
-
pretty: false,
|
313
|
-
xml_declaration: false
|
314
|
-
)
|
225
|
+
# Node type checking
|
226
|
+
text.text? # Returns true
|
315
227
|
|
316
|
-
#
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
xml_declaration: true
|
321
|
-
)
|
228
|
+
# Structure
|
229
|
+
text.parent # Get parent node
|
230
|
+
text.remove # Remove from document
|
231
|
+
text.replace(node) # Replace with another node
|
322
232
|
----
|
323
233
|
|
324
|
-
|
234
|
+
=== CDATA object
|
325
235
|
|
326
|
-
|
236
|
+
CDATA sections contain text that should not be parsed as markup.
|
327
237
|
|
328
238
|
[source,ruby]
|
329
239
|
----
|
330
|
-
#
|
331
|
-
|
332
|
-
begin
|
333
|
-
# Process document
|
334
|
-
result = process_document(doc)
|
335
|
-
ensure
|
336
|
-
# Clear references
|
337
|
-
doc = nil
|
338
|
-
GC.start
|
339
|
-
end
|
240
|
+
# Creating CDATA sections
|
241
|
+
cdata = doc.create_cdata("<raw>content</raw>")
|
340
242
|
|
341
|
-
#
|
342
|
-
|
343
|
-
|
344
|
-
process_node(node)
|
345
|
-
# Clear reference
|
346
|
-
node = nil
|
347
|
-
end
|
243
|
+
# CDATA properties
|
244
|
+
cdata.content # Get CDATA content
|
245
|
+
cdata.content = "new" # Set CDATA content
|
348
246
|
|
349
|
-
#
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
nodeset = nil
|
357
|
-
GC.start
|
358
|
-
end
|
247
|
+
# Node type checking
|
248
|
+
cdata.cdata? # Returns true
|
249
|
+
|
250
|
+
# Structure
|
251
|
+
cdata.parent # Get parent node
|
252
|
+
cdata.remove # Remove from document
|
253
|
+
cdata.replace(node) # Replace with another node
|
359
254
|
----
|
360
255
|
|
361
|
-
===
|
256
|
+
=== Comment object
|
257
|
+
|
258
|
+
Comments contain human-readable notes in the XML document.
|
362
259
|
|
363
260
|
[source,ruby]
|
364
261
|
----
|
365
|
-
#
|
366
|
-
|
367
|
-
# Use native CSS selectors
|
368
|
-
nodes = doc.native.css('complex > selector')
|
369
|
-
nodes.each do |native_node|
|
370
|
-
node = Moxml::Node.wrap(native_node)
|
371
|
-
# Process node
|
372
|
-
end
|
373
|
-
|
374
|
-
# Use native XPath
|
375
|
-
results = doc.native.xpath('//complex/xpath/expression')
|
376
|
-
end
|
262
|
+
# Creating comments
|
263
|
+
comment = doc.create_comment("Note")
|
377
264
|
|
378
|
-
#
|
379
|
-
|
380
|
-
|
381
|
-
doc = Moxml::Document.parse(xml, {
|
382
|
-
mode: :generic,
|
383
|
-
effort: :tolerant,
|
384
|
-
smart: true
|
385
|
-
})
|
386
|
-
|
387
|
-
# Direct element creation
|
388
|
-
element = Ox::Element.new('name')
|
389
|
-
wrapped = Moxml::Element.new(element)
|
390
|
-
end
|
265
|
+
# Comment properties
|
266
|
+
comment.content # Get comment content
|
267
|
+
comment.content = "new" # Set comment content
|
391
268
|
|
392
|
-
#
|
393
|
-
|
394
|
-
# Use native parsing features
|
395
|
-
doc = Moxml::Document.parse(xml, {
|
396
|
-
encoding: 'UTF-8',
|
397
|
-
strict: true
|
398
|
-
})
|
269
|
+
# Node type checking
|
270
|
+
comment.comment? # Returns true
|
399
271
|
|
400
|
-
|
401
|
-
|
402
|
-
|
272
|
+
# Structure
|
273
|
+
comment.parent # Get parent node
|
274
|
+
comment.remove # Remove from document
|
275
|
+
comment.replace(node) # Replace with another node
|
403
276
|
----
|
404
277
|
|
405
|
-
===
|
278
|
+
=== Processing instruction object
|
279
|
+
|
280
|
+
Processing instructions provide instructions to applications processing the XML.
|
406
281
|
|
407
282
|
[source,ruby]
|
408
283
|
----
|
409
|
-
#
|
410
|
-
|
284
|
+
# Creating processing instructions
|
285
|
+
pi = doc.create_processing_instruction("xml-stylesheet",
|
286
|
+
'type="text/xsl" href="style.xsl"')
|
411
287
|
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
288
|
+
# PI properties
|
289
|
+
pi.target # Get PI target
|
290
|
+
pi.target = "new" # Set PI target
|
291
|
+
pi.content # Get PI content
|
292
|
+
pi.content = "new" # Set PI content
|
416
293
|
|
417
|
-
|
418
|
-
|
419
|
-
doc = Moxml::Document.parse(xml_string)
|
420
|
-
# Process document
|
421
|
-
result = doc.to_xml
|
422
|
-
doc = nil
|
423
|
-
result
|
424
|
-
end
|
425
|
-
end
|
426
|
-
end
|
294
|
+
# Node type checking
|
295
|
+
pi.processing_instruction? # Returns true
|
427
296
|
|
428
|
-
#
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
doc = Moxml::Document.parse(xml)
|
433
|
-
# Process document
|
434
|
-
doc = nil
|
435
|
-
end
|
436
|
-
end
|
437
|
-
threads.each(&:join)
|
438
|
-
end
|
439
|
-
|
440
|
-
# Thread-local document storage
|
441
|
-
Thread.new do
|
442
|
-
Thread.current[:document] = Moxml::Document.new
|
443
|
-
# Process document
|
444
|
-
ensure
|
445
|
-
Thread.current[:document] = nil
|
446
|
-
end
|
297
|
+
# Structure
|
298
|
+
pi.parent # Get parent node
|
299
|
+
pi.remove # Remove from document
|
300
|
+
pi.replace(node) # Replace with another node
|
447
301
|
----
|
448
302
|
|
449
|
-
|
303
|
+
=== Attribute object
|
450
304
|
|
451
|
-
|
452
|
-
|
453
|
-
==== Parsing errors
|
305
|
+
Attributes represent name-value pairs on elements.
|
454
306
|
|
455
307
|
[source,ruby]
|
456
308
|
----
|
457
|
-
#
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
# Attempt recovery
|
463
|
-
xml_string = cleanup_xml(xml_string)
|
464
|
-
retry
|
465
|
-
end
|
309
|
+
# Attribute properties
|
310
|
+
attr.name # Get attribute name
|
311
|
+
attr.name = "new" # Set attribute name
|
312
|
+
attr.value # Get attribute value
|
313
|
+
attr.value = "new" # Set attribute value
|
466
314
|
|
467
|
-
#
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
detected_encoding = detect_encoding(xml_string)
|
474
|
-
retry if detected_encoding
|
475
|
-
end
|
476
|
-
raise
|
477
|
-
end
|
315
|
+
# Namespace handling
|
316
|
+
attr.namespace # Get attribute's namespace
|
317
|
+
attr.namespace = ns # Set attribute's namespace
|
318
|
+
|
319
|
+
# Node type checking
|
320
|
+
attr.attribute? # Returns true
|
478
321
|
----
|
479
322
|
|
480
|
-
|
323
|
+
=== Namespace object
|
324
|
+
|
325
|
+
Namespaces define XML namespaces used in the document.
|
481
326
|
|
482
327
|
[source,ruby]
|
483
328
|
----
|
484
|
-
#
|
485
|
-
|
486
|
-
|
487
|
-
File.open(path) do |file|
|
488
|
-
doc = Moxml::Document.parse(file)
|
489
|
-
doc.xpath('//chunk').each do |chunk|
|
490
|
-
process_chunk(chunk)
|
491
|
-
chunk = nil
|
492
|
-
end
|
493
|
-
doc = nil
|
494
|
-
end
|
495
|
-
GC.start
|
496
|
-
end
|
329
|
+
# Namespace properties
|
330
|
+
ns.prefix # Get namespace prefix
|
331
|
+
ns.uri # Get namespace URI
|
497
332
|
|
498
|
-
#
|
499
|
-
|
333
|
+
# Formatting
|
334
|
+
ns.to_s # Format as xmlns declaration
|
500
335
|
|
501
|
-
|
502
|
-
|
503
|
-
|
336
|
+
# Node type checking
|
337
|
+
ns.namespace? # Returns true
|
338
|
+
----
|
504
339
|
|
505
|
-
|
506
|
-
result = process_document(doc)
|
507
|
-
doc = nil
|
508
|
-
GC.start
|
340
|
+
=== Node traversal and inspection
|
509
341
|
|
510
|
-
|
511
|
-
puts "Memory usage: #{final_memory - initial_memory}MB"
|
342
|
+
Each node type provides methods for traversing the document structure:
|
512
343
|
|
513
|
-
|
514
|
-
end
|
344
|
+
[source,ruby]
|
515
345
|
----
|
346
|
+
node.parent # Get parent node
|
347
|
+
node.children # Get child nodes
|
348
|
+
node.next_sibling # Get next sibling
|
349
|
+
node.previous_sibling # Get previous sibling
|
350
|
+
node.ancestors # Get all ancestor nodes
|
351
|
+
node.descendants # Get all descendant nodes
|
516
352
|
|
517
|
-
|
353
|
+
# Type checking
|
354
|
+
node.element? # Is it an element?
|
355
|
+
node.text? # Is it a text node?
|
356
|
+
node.cdata? # Is it a CDATA section?
|
357
|
+
node.comment? # Is it a comment?
|
358
|
+
node.processing_instruction? # Is it a PI?
|
359
|
+
node.attribute? # Is it an attribute?
|
360
|
+
node.namespace? # Is it a namespace?
|
518
361
|
|
519
|
-
|
362
|
+
# Node information
|
363
|
+
node.document # Get owning document
|
364
|
+
node.path # Get XPath to node
|
365
|
+
node.line_number # Get source line number (if available)
|
520
366
|
----
|
521
|
-
# Handle backend limitations
|
522
|
-
def safe_xpath(doc, xpath)
|
523
|
-
case Moxml.config.backend
|
524
|
-
when :nokogiri
|
525
|
-
doc.xpath(xpath)
|
526
|
-
when :ox
|
527
|
-
# Ox has limited XPath support
|
528
|
-
fallback_xpath_search(doc, xpath)
|
529
|
-
when :oga
|
530
|
-
# Handle Oga-specific XPath syntax
|
531
|
-
modified_xpath = adjust_xpath_for_oga(xpath)
|
532
|
-
doc.xpath(modified_xpath)
|
533
|
-
end
|
534
|
-
end
|
535
367
|
|
536
|
-
|
537
|
-
def with_backend(backend)
|
538
|
-
original_backend = Moxml.config.backend
|
539
|
-
Moxml.config.backend = backend
|
540
|
-
yield
|
541
|
-
ensure
|
542
|
-
Moxml.config.backend = original_backend
|
543
|
-
end
|
544
|
-
----
|
368
|
+
== Advanced features
|
545
369
|
|
546
|
-
===
|
370
|
+
=== XPath querying and node mapping
|
547
371
|
|
548
|
-
|
372
|
+
Moxml provides efficient XPath querying by leveraging the native XML library's
|
373
|
+
implementation while maintaining consistent node mapping:
|
549
374
|
|
550
375
|
[source,ruby]
|
551
376
|
----
|
552
|
-
#
|
553
|
-
|
554
|
-
|
555
|
-
root = doc.create_element('root')
|
556
|
-
doc.add_child(root)
|
557
|
-
|
558
|
-
# Pre-allocate elements
|
559
|
-
elements = Array.new(1000) do |i|
|
560
|
-
elem = doc.create_element('item')
|
561
|
-
elem['id'] = i.to_s
|
562
|
-
elem
|
563
|
-
end
|
377
|
+
# Find all book elements
|
378
|
+
books = doc.xpath('//book')
|
379
|
+
# Returns Moxml::Element objects mapped to native nodes
|
564
380
|
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
381
|
+
# Find with namespaces
|
382
|
+
titles = doc.xpath('//dc:title',
|
383
|
+
'dc' => 'http://purl.org/dc/elements/1.1/')
|
384
|
+
|
385
|
+
# Find first matching node
|
386
|
+
first_book = doc.at_xpath('//book')
|
569
387
|
|
570
|
-
|
388
|
+
# Chain queries
|
389
|
+
doc.xpath('//book').each do |book|
|
390
|
+
# Each book is a mapped Moxml::Element
|
391
|
+
title = book.at_xpath('.//title')
|
392
|
+
puts "#{book['id']}: #{title.text}"
|
571
393
|
end
|
394
|
+
----
|
572
395
|
|
573
|
-
|
574
|
-
def process_large_xml(xml_string)
|
575
|
-
result = []
|
576
|
-
doc = Moxml::Document.parse(xml_string)
|
396
|
+
=== Namespace handling
|
577
397
|
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
end
|
398
|
+
[source,ruby]
|
399
|
+
----
|
400
|
+
# Add namespace to element
|
401
|
+
element.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
|
583
402
|
|
584
|
-
|
585
|
-
|
403
|
+
# Create element in namespace
|
404
|
+
title = doc.create_element('dc:title')
|
405
|
+
title.text = 'Document Title'
|
586
406
|
|
587
|
-
|
588
|
-
|
407
|
+
# Query with namespaces
|
408
|
+
doc.xpath('//dc:title',
|
409
|
+
'dc' => 'http://purl.org/dc/elements/1.1/')
|
589
410
|
----
|
590
411
|
|
591
|
-
|
412
|
+
=== Accessing native implementation
|
413
|
+
|
414
|
+
While not typically needed, you can access the underlying XML library's nodes:
|
592
415
|
|
593
416
|
[source,ruby]
|
594
417
|
----
|
595
|
-
#
|
596
|
-
|
597
|
-
# Cache frequently used nodes
|
598
|
-
@header_nodes ||= doc.xpath('//header').to_a
|
418
|
+
# Get native node
|
419
|
+
native_node = element.native
|
599
420
|
|
600
|
-
|
601
|
-
|
421
|
+
# Get adapter being used
|
422
|
+
adapter = element.context.config.adapter
|
602
423
|
|
603
|
-
|
604
|
-
|
605
|
-
|
424
|
+
# Create from native node
|
425
|
+
element = Moxml::Element.new(native_node, context)
|
426
|
+
----
|
606
427
|
|
607
|
-
|
608
|
-
def efficient_attribute_handling(element)
|
609
|
-
# Cache attribute values
|
610
|
-
@cached_attrs ||= element.attributes
|
428
|
+
== Error handling
|
611
429
|
|
612
|
-
|
613
|
-
|
430
|
+
Moxml provides specific error classes for different types of errors that may
|
431
|
+
occur during XML processing:
|
614
432
|
|
615
|
-
|
616
|
-
|
617
|
-
|
433
|
+
[source,ruby]
|
434
|
+
----
|
435
|
+
begin
|
436
|
+
doc = context.parse(xml_string)
|
437
|
+
rescue Moxml::ParseError => e
|
438
|
+
# Handles XML parsing errors
|
439
|
+
puts "Parse error at line #{e.line}, column #{e.column}"
|
440
|
+
puts "Message: #{e.message}"
|
441
|
+
rescue Moxml::ValidationError => e
|
442
|
+
# Handles XML validation errors
|
443
|
+
puts "Validation error: #{e.message}"
|
444
|
+
rescue Moxml::XPathError => e
|
445
|
+
# Handles XPath expression errors
|
446
|
+
puts "XPath error: #{e.message}"
|
447
|
+
rescue Moxml::Error => e
|
448
|
+
# Handles other Moxml-specific errors
|
449
|
+
puts "Error: #{e.message}"
|
618
450
|
end
|
619
451
|
----
|
620
452
|
|
621
|
-
|
453
|
+
== Configuration
|
454
|
+
|
455
|
+
Moxml can be configured globally or per instance:
|
622
456
|
|
623
457
|
[source,ruby]
|
624
458
|
----
|
625
|
-
#
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
indent: 2,
|
637
|
-
pretty: true,
|
638
|
-
xml_declaration: true
|
639
|
-
)
|
640
|
-
|
641
|
-
# Stream large documents
|
642
|
-
File.open('large.xml', 'w') do |file|
|
643
|
-
doc.write_to(file, indent: 2)
|
644
|
-
end
|
459
|
+
# Global configuration
|
460
|
+
Moxml.configure do |config|
|
461
|
+
config.default_adapter = :nokogiri
|
462
|
+
config.strict = true
|
463
|
+
config.encoding = 'UTF-8'
|
464
|
+
end
|
465
|
+
|
466
|
+
# Instance configuration
|
467
|
+
moxml = Moxml.new do |config|
|
468
|
+
config.adapter = :ox
|
469
|
+
config.strict = false
|
645
470
|
end
|
646
471
|
----
|
647
472
|
|
648
|
-
|
473
|
+
== Thread safety
|
649
474
|
|
650
|
-
|
475
|
+
Moxml is thread-safe when used properly. Each instance maintains its own state
|
476
|
+
and can be used safely in concurrent operations:
|
651
477
|
|
652
478
|
[source,ruby]
|
653
479
|
----
|
654
|
-
|
655
|
-
def
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
if node.respond_to?(:attributes)
|
660
|
-
node.attributes.each do |name, attr|
|
661
|
-
puts "#{indent} @#{name}=#{attr.value.inspect}"
|
662
|
-
end
|
480
|
+
class XmlProcessor
|
481
|
+
def initialize
|
482
|
+
@mutex = Mutex.new
|
483
|
+
@context = Moxml.new
|
663
484
|
end
|
664
485
|
|
665
|
-
|
666
|
-
|
486
|
+
def process(xml)
|
487
|
+
@mutex.synchronize do
|
488
|
+
doc = @context.parse(xml)
|
489
|
+
# Modify document
|
490
|
+
doc.to_xml
|
491
|
+
end
|
667
492
|
end
|
668
493
|
end
|
494
|
+
----
|
669
495
|
|
670
|
-
|
671
|
-
def debug_node_operations
|
672
|
-
nodes_created = 0
|
673
|
-
nodes_removed = 0
|
496
|
+
== Performance considerations
|
674
497
|
|
675
|
-
|
676
|
-
ensure
|
677
|
-
puts "Nodes created: #{nodes_created}"
|
678
|
-
puts "Nodes removed: #{nodes_removed}"
|
679
|
-
end
|
680
|
-
----
|
498
|
+
=== Memory management
|
681
499
|
|
682
|
-
|
500
|
+
Moxml maintains a node registry to ensure consistent object mapping:
|
683
501
|
|
684
502
|
[source,ruby]
|
685
503
|
----
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
element = doc.create_element('test')
|
692
|
-
doc.add_child(element)
|
504
|
+
doc = context.parse(large_xml)
|
505
|
+
# Process document
|
506
|
+
doc = nil # Allow garbage collection of document and registry
|
507
|
+
GC.start # Force garbage collection if needed
|
508
|
+
----
|
693
509
|
|
694
|
-
|
695
|
-
raise "Node creation failed" unless doc.root
|
696
|
-
raise "Node type wrong" unless doc.root.is_a?(Moxml::Element)
|
510
|
+
=== Efficient querying
|
697
511
|
|
698
|
-
|
699
|
-
xml = doc.to_xml
|
700
|
-
raise "Serialization failed" unless xml.include?('<test/>')
|
512
|
+
Use specific XPath expressions for better performance:
|
701
513
|
|
702
|
-
|
703
|
-
rescue => e
|
704
|
-
puts "Backend verification failed: #{e.message}"
|
705
|
-
end
|
514
|
+
[source,ruby]
|
706
515
|
----
|
516
|
+
# More efficient - specific path
|
517
|
+
doc.xpath('//book/title')
|
707
518
|
|
708
|
-
|
519
|
+
# Less efficient - requires full document scan
|
520
|
+
doc.xpath('//title')
|
709
521
|
|
710
|
-
|
522
|
+
# Most efficient - direct child access
|
523
|
+
root.xpath('./title')
|
524
|
+
----
|
711
525
|
|
712
|
-
|
713
|
-
* `Moxml::ParseError` - XML parsing errors
|
714
|
-
* `Moxml::ArgumentError` - Invalid argument errors
|
526
|
+
== Best practices
|
715
527
|
|
716
|
-
===
|
528
|
+
=== Document creation
|
717
529
|
|
718
530
|
[source,ruby]
|
719
531
|
----
|
720
|
-
#
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
532
|
+
# Preferred - using builder pattern
|
533
|
+
doc = Moxml.new.build do
|
534
|
+
declaration version: "1.0", encoding: "UTF-8"
|
535
|
+
element 'root' do
|
536
|
+
element 'child' do
|
537
|
+
text 'content'
|
538
|
+
end
|
539
|
+
end
|
727
540
|
end
|
728
541
|
|
729
|
-
#
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
end
|
542
|
+
# Alternative - direct manipulation
|
543
|
+
doc = Moxml.new.create_document
|
544
|
+
doc.add_declaration(version: "1.0", encoding: "UTF-8")
|
545
|
+
root = doc.create_element('root')
|
546
|
+
doc.add_child(root)
|
547
|
+
----
|
736
548
|
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
549
|
+
=== Node manipulation
|
550
|
+
|
551
|
+
[source,ruby]
|
552
|
+
----
|
553
|
+
# Preferred - chainable operations
|
554
|
+
element
|
555
|
+
.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
|
556
|
+
.add_child(doc.create_text('content'))
|
557
|
+
|
558
|
+
# Preferred - clear node type checking
|
559
|
+
if node.element?
|
560
|
+
node.add_child(doc.create_text('content'))
|
749
561
|
end
|
750
562
|
----
|
751
563
|
|
752
564
|
== Contributing
|
753
565
|
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
566
|
+
1. Fork the repository
|
567
|
+
2. Create your feature branch (`git checkout -b feature/my-new-feature`)
|
568
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
569
|
+
4. Push to the branch (`git push origin feature/my-new-feature`)
|
570
|
+
5. Create a new Pull Request
|
758
571
|
|
759
|
-
|
760
|
-
* Add tests for new features
|
761
|
-
* Update documentation
|
762
|
-
* Ensure backwards compatibility
|
763
|
-
* Consider performance implications
|
764
|
-
* Test with all supported backends
|
572
|
+
== License
|
765
573
|
|
766
|
-
|
574
|
+
Copyright (c) 2024 Ribose Inc.
|
767
575
|
|
768
|
-
|
576
|
+
This project is licensed under the BSD-2-Clause License. See the LICENSE file for details.
|
769
577
|
|
770
|
-
The gem is available as open source under the terms of the BSD-2-Clause License.
|