moxml 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +15 -0
- data/.github/workflows/release.yml +23 -0
- data/.gitignore +3 -0
- data/.rubocop.yml +2 -0
- data/.rubocop_todo.yml +65 -0
- data/.ruby-version +1 -0
- data/Gemfile +10 -3
- data/README.adoc +400 -594
- data/lib/moxml/adapter/base.rb +102 -0
- data/lib/moxml/adapter/customized_oga/xml_declaration.rb +18 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +104 -0
- data/lib/moxml/adapter/nokogiri.rb +314 -0
- data/lib/moxml/adapter/oga.rb +309 -0
- data/lib/moxml/adapter/ox.rb +325 -0
- data/lib/moxml/adapter.rb +26 -170
- data/lib/moxml/attribute.rb +47 -14
- data/lib/moxml/builder.rb +64 -0
- data/lib/moxml/cdata.rb +4 -26
- data/lib/moxml/comment.rb +6 -22
- data/lib/moxml/config.rb +39 -15
- data/lib/moxml/context.rb +29 -0
- data/lib/moxml/declaration.rb +16 -26
- data/lib/moxml/doctype.rb +9 -0
- data/lib/moxml/document.rb +51 -63
- data/lib/moxml/document_builder.rb +87 -0
- data/lib/moxml/element.rb +61 -99
- data/lib/moxml/error.rb +20 -0
- data/lib/moxml/namespace.rb +12 -37
- data/lib/moxml/node.rb +78 -58
- data/lib/moxml/node_set.rb +19 -222
- data/lib/moxml/processing_instruction.rb +6 -25
- data/lib/moxml/text.rb +4 -26
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +55 -0
- data/lib/moxml/xml_utils.rb +80 -0
- data/lib/moxml.rb +33 -33
- data/moxml.gemspec +1 -1
- data/spec/moxml/adapter/nokogiri_spec.rb +14 -0
- data/spec/moxml/adapter/oga_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +49 -0
- data/spec/moxml/all_with_adapters_spec.rb +46 -0
- data/spec/moxml/config_spec.rb +55 -0
- data/spec/moxml/error_spec.rb +71 -0
- data/spec/moxml/examples/adapter_spec.rb +27 -0
- data/spec/moxml_spec.rb +50 -0
- data/spec/spec_helper.rb +32 -0
- data/spec/support/shared_examples/attribute.rb +165 -0
- data/spec/support/shared_examples/builder.rb +25 -0
- data/spec/support/shared_examples/cdata.rb +70 -0
- data/spec/support/shared_examples/comment.rb +65 -0
- data/spec/support/shared_examples/context.rb +35 -0
- data/spec/support/shared_examples/declaration.rb +93 -0
- data/spec/support/shared_examples/doctype.rb +25 -0
- data/spec/support/shared_examples/document.rb +110 -0
- data/spec/support/shared_examples/document_builder.rb +43 -0
- data/spec/support/shared_examples/edge_cases.rb +185 -0
- data/spec/support/shared_examples/element.rb +110 -0
- data/spec/support/shared_examples/examples/attribute.rb +42 -0
- data/spec/support/shared_examples/examples/basic_usage.rb +67 -0
- data/spec/support/shared_examples/examples/memory.rb +54 -0
- data/spec/support/shared_examples/examples/namespace.rb +65 -0
- data/spec/support/shared_examples/examples/readme_examples.rb +100 -0
- data/spec/support/shared_examples/examples/thread_safety.rb +43 -0
- data/spec/support/shared_examples/examples/xpath.rb +39 -0
- data/spec/support/shared_examples/integration.rb +135 -0
- data/spec/support/shared_examples/namespace.rb +96 -0
- data/spec/support/shared_examples/node.rb +110 -0
- data/spec/support/shared_examples/node_set.rb +90 -0
- data/spec/support/shared_examples/processing_instruction.rb +88 -0
- data/spec/support/shared_examples/text.rb +66 -0
- data/spec/support/shared_examples/xml_adapter.rb +191 -0
- data/spec/support/xml_matchers.rb +27 -0
- metadata +55 -6
- data/.github/workflows/main.yml +0 -27
- data/lib/moxml/error_handler.rb +0 -77
- data/lib/moxml/errors.rb +0 -169
data/README.adoc
CHANGED
@@ -1,770 +1,576 @@
|
|
1
|
-
= Moxml:
|
1
|
+
= Moxml: Modern XML processing for Ruby
|
2
|
+
:toc: macro
|
3
|
+
:toclevels: 3
|
4
|
+
:toc-title: Contents
|
5
|
+
:source-highlighter: highlight.js
|
2
6
|
|
3
|
-
|
7
|
+
image:https://github.com/lutaml/moxml/workflows/rake/badge.svg["Build Status", link="https://github.com/lutaml/moxml/actions?workflow=rake"]
|
4
8
|
|
5
|
-
|
6
|
-
interface for working with XML documents, regardless of the underlying XML
|
7
|
-
library.
|
9
|
+
toc::[]
|
8
10
|
|
9
|
-
==
|
11
|
+
== Introduction and purpose
|
10
12
|
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
13
|
+
Moxml provides a unified, modern XML processing interface for Ruby applications.
|
14
|
+
It offers a consistent API that abstracts away the underlying XML implementation
|
15
|
+
details while maintaining high performance through efficient node mapping and
|
16
|
+
native XPath querying.
|
17
|
+
|
18
|
+
Key features:
|
15
19
|
|
16
|
-
|
20
|
+
* Intuitive, Ruby-idiomatic API for XML manipulation
|
21
|
+
* Consistent interface across different XML libraries
|
22
|
+
* Efficient node mapping for XPath queries
|
23
|
+
* Support for all XML node types and features
|
24
|
+
* Easy switching between XML processing engines
|
25
|
+
* Clean separation between interface and implementation
|
17
26
|
|
18
|
-
|
27
|
+
== Getting started
|
19
28
|
|
20
|
-
|
29
|
+
Install the gem and at least one supported XML library:
|
21
30
|
|
22
31
|
[source,ruby]
|
23
32
|
----
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
config.backend = :nokogiri # or :ox, :oga
|
28
|
-
end
|
33
|
+
# In your Gemfile
|
34
|
+
gem 'moxml'
|
35
|
+
gem 'nokogiri' # Or 'ox' or 'oga'
|
29
36
|
----
|
30
37
|
|
31
|
-
===
|
38
|
+
=== Basic document creation
|
32
39
|
|
33
40
|
[source,ruby]
|
34
41
|
----
|
35
|
-
|
36
|
-
doc = Moxml::Document.new
|
37
|
-
|
38
|
-
# Parse from string
|
39
|
-
doc = Moxml::Document.parse("<root><child>content</child></root>")
|
42
|
+
require 'moxml'
|
40
43
|
|
41
|
-
#
|
42
|
-
doc = Moxml
|
43
|
-
----
|
44
|
+
# Create a new XML document
|
45
|
+
doc = Moxml.new.create_document
|
44
46
|
|
45
|
-
|
47
|
+
# Add XML declaration
|
48
|
+
doc.add_declaration(version: "1.0", encoding: "UTF-8")
|
46
49
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
doc = Moxml::Document.new
|
51
|
-
root = doc.create_element('root')
|
50
|
+
# Create root element with namespace
|
51
|
+
root = doc.create_element('book')
|
52
|
+
root.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
|
52
53
|
doc.add_child(root)
|
53
54
|
|
54
|
-
#
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
doc = Moxml::Document.parse(xml_string, encoding: 'UTF-8')
|
55
|
+
# Add content
|
56
|
+
title = doc.create_element('dc:title')
|
57
|
+
title.text = 'XML Processing with Ruby'
|
58
|
+
root.add_child(title)
|
59
59
|
|
60
|
-
#
|
61
|
-
doc
|
62
|
-
encoding: 'UTF-8',
|
63
|
-
strict: true
|
64
|
-
})
|
60
|
+
# Output formatted XML
|
61
|
+
puts doc.to_xml(indent: 2)
|
65
62
|
----
|
66
63
|
|
67
|
-
|
64
|
+
== Working with documents
|
68
65
|
|
69
|
-
|
70
|
-
----
|
71
|
-
# Working with namespaces
|
72
|
-
doc = Moxml::Document.new
|
73
|
-
root = doc.create_element('root')
|
74
|
-
root['xmlns:custom'] = 'http://example.com/ns'
|
75
|
-
child = doc.create_element('custom:element')
|
76
|
-
root.add_child(child)
|
77
|
-
|
78
|
-
# Creating structured data
|
79
|
-
person = doc.create_element('person')
|
80
|
-
person['id'] = '123'
|
81
|
-
name = doc.create_element('name')
|
82
|
-
name.add_child(doc.create_text('John Doe'))
|
83
|
-
person.add_child(name)
|
84
|
-
|
85
|
-
# Working with attributes
|
86
|
-
element = doc.create_element('div')
|
87
|
-
element['class'] = 'container'
|
88
|
-
element['data-id'] = '123'
|
89
|
-
element['style'] = 'color: blue'
|
90
|
-
|
91
|
-
# Handling special characters
|
92
|
-
text = doc.create_text('Special chars: < > & " \'')
|
93
|
-
cdata = doc.create_cdata('<script>alert("Hello!");</script>')
|
94
|
-
|
95
|
-
# Processing instructions
|
96
|
-
pi = doc.create_processing_instruction('xml-stylesheet',
|
97
|
-
'type="text/xsl" href="style.xsl"')
|
98
|
-
doc.add_child(pi)
|
99
|
-
----
|
66
|
+
=== Using the builder pattern
|
100
67
|
|
101
|
-
|
68
|
+
The builder pattern provides a clean DSL for creating XML documents:
|
102
69
|
|
103
70
|
[source,ruby]
|
104
71
|
----
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
# Add attributes
|
109
|
-
element['class'] = 'content'
|
110
|
-
|
111
|
-
# Access attributes
|
112
|
-
class_attr = element['class']
|
113
|
-
|
114
|
-
# Add child elements
|
115
|
-
child = element.create_element('child')
|
116
|
-
element.add_child(child)
|
72
|
+
doc = Moxml.new.build do
|
73
|
+
declaration version: "1.0", encoding: "UTF-8"
|
117
74
|
|
118
|
-
|
119
|
-
|
75
|
+
element 'library', xmlns: 'http://example.org/library' do
|
76
|
+
element 'book' do
|
77
|
+
element 'title' do
|
78
|
+
text 'Ruby Programming'
|
79
|
+
end
|
120
80
|
|
121
|
-
|
122
|
-
text
|
123
|
-
|
81
|
+
element 'author' do
|
82
|
+
text 'Jane Smith'
|
83
|
+
end
|
124
84
|
|
125
|
-
|
126
|
-
element
|
127
|
-
.add_child(doc.create_element('child'))
|
128
|
-
.add_child(doc.create_text('content'))
|
129
|
-
['class'] = 'new-class'
|
85
|
+
comment 'Publication details'
|
86
|
+
element 'published', year: '2024'
|
130
87
|
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
.add_child(doc.create_text('Hello'))
|
136
|
-
div.add_child(doc.create_element('br'))
|
137
|
-
div.add_child(doc.create_text('World'))
|
88
|
+
cdata '<custom>metadata</custom>'
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
138
92
|
----
|
139
93
|
|
140
|
-
===
|
94
|
+
=== Direct document manipulation
|
141
95
|
|
142
96
|
[source,ruby]
|
143
97
|
----
|
144
|
-
|
145
|
-
plain_text = Moxml::Text.new("Simple text")
|
146
|
-
multiline_text = Moxml::Text.new("Line 1\nLine 2")
|
147
|
-
special_chars = Moxml::Text.new("Special: & < > \" '")
|
148
|
-
|
149
|
-
# CDATA sections for different content types
|
150
|
-
script_cdata = Moxml::Cdata.new("function() { alert('Hello!'); }")
|
151
|
-
xml_cdata = Moxml::Cdata.new("<data><item>value</item></data>")
|
152
|
-
mixed_cdata = Moxml::Cdata.new("Text with ]]> characters")
|
153
|
-
|
154
|
-
# Comments for documentation
|
155
|
-
todo_comment = Moxml::Comment.new("TODO: Add validation")
|
156
|
-
section_comment = Moxml::Comment.new("----- Section Break -----")
|
157
|
-
debug_comment = Moxml::Comment.new("DEBUG: Remove in production")
|
158
|
-
|
159
|
-
# Processing instructions for various uses
|
160
|
-
style_pi = Moxml::ProcessingInstruction.new(
|
161
|
-
"xml-stylesheet",
|
162
|
-
'type="text/css" href="style.css"'
|
163
|
-
)
|
164
|
-
php_pi = Moxml::ProcessingInstruction.new(
|
165
|
-
"php",
|
166
|
-
'echo "<?php echo $var; ?>>";'
|
167
|
-
)
|
168
|
-
custom_pi = Moxml::ProcessingInstruction.new(
|
169
|
-
"custom-processor",
|
170
|
-
'param1="value1" param2="value2"'
|
171
|
-
)
|
172
|
-
----
|
173
|
-
|
174
|
-
=== Element manipulation examples
|
98
|
+
doc = Moxml.new.create_document
|
175
99
|
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
root = doc.create_element('
|
100
|
+
# Add declaration
|
101
|
+
doc.add_declaration(version: "1.0", encoding: "UTF-8")
|
102
|
+
|
103
|
+
# Create root with namespace
|
104
|
+
root = doc.create_element('library')
|
105
|
+
root.add_namespace(nil, 'http://example.org/library')
|
181
106
|
doc.add_child(root)
|
182
107
|
|
183
|
-
#
|
184
|
-
|
185
|
-
|
108
|
+
# Add elements with attributes
|
109
|
+
book = doc.create_element('book')
|
110
|
+
book['id'] = 'b1'
|
111
|
+
root.add_child(book)
|
186
112
|
|
113
|
+
# Add mixed content
|
114
|
+
book.add_child(doc.create_comment('Book details'))
|
187
115
|
title = doc.create_element('title')
|
188
|
-
title.
|
189
|
-
|
190
|
-
|
191
|
-
meta = doc.create_element('meta')
|
192
|
-
meta['charset'] = 'UTF-8'
|
193
|
-
head.add_child(meta)
|
194
|
-
|
195
|
-
# Create body section
|
196
|
-
body = doc.create_element('body')
|
197
|
-
root.add_child(body)
|
198
|
-
|
199
|
-
div = doc.create_element('div')
|
200
|
-
div['class'] = 'container'
|
201
|
-
body.add_child(div)
|
202
|
-
|
203
|
-
# Add multiple paragraphs
|
204
|
-
3.times do |i|
|
205
|
-
p = doc.create_element('p')
|
206
|
-
p.add_child(doc.create_text("Paragraph #{i + 1}"))
|
207
|
-
div.add_child(p)
|
208
|
-
end
|
209
|
-
|
210
|
-
# Working with lists
|
211
|
-
ul = doc.create_element('ul')
|
212
|
-
div.add_child(ul)
|
213
|
-
|
214
|
-
['Item 1', 'Item 2', 'Item 3'].each do |text|
|
215
|
-
li = doc.create_element('li')
|
216
|
-
li.add_child(doc.create_text(text))
|
217
|
-
ul.add_child(li)
|
218
|
-
end
|
219
|
-
|
220
|
-
# Adding link element
|
221
|
-
a = doc.create_element('a')
|
222
|
-
a['href'] = 'https://example.com'
|
223
|
-
a.add_child(doc.create_text('Visit Example'))
|
224
|
-
div.add_child(a)
|
116
|
+
title.text = 'Ruby Programming'
|
117
|
+
book.add_child(title)
|
225
118
|
----
|
226
119
|
|
227
|
-
|
120
|
+
== XML objects and their methods
|
228
121
|
|
229
|
-
|
230
|
-
----
|
231
|
-
# Cloning nodes
|
232
|
-
original = doc.create_element('div')
|
233
|
-
original['id'] = 'original'
|
234
|
-
clone = original.clone
|
235
|
-
|
236
|
-
# Moving nodes
|
237
|
-
target = doc.create_element('target')
|
238
|
-
source = doc.create_element('source')
|
239
|
-
source.add_child(doc.create_text('Content'))
|
240
|
-
target.add_child(source)
|
241
|
-
|
242
|
-
# Replacing nodes
|
243
|
-
old_node = doc.at_xpath('//old')
|
244
|
-
new_node = doc.create_element('new')
|
245
|
-
old_node.replace(new_node)
|
246
|
-
|
247
|
-
# Inserting before/after
|
248
|
-
reference = doc.create_element('reference')
|
249
|
-
before = doc.create_element('before')
|
250
|
-
after = doc.create_element('after')
|
251
|
-
reference.add_previous_sibling(before)
|
252
|
-
reference.add_next_sibling(after)
|
253
|
-
|
254
|
-
# Conditional manipulation
|
255
|
-
element = doc.at_xpath('//conditional')
|
256
|
-
if element['flag'] == 'true'
|
257
|
-
element.add_child(doc.create_text('Flag is true'))
|
258
|
-
else
|
259
|
-
element.remove
|
260
|
-
end
|
261
|
-
----
|
122
|
+
=== Document object
|
262
123
|
|
263
|
-
|
124
|
+
The Document object represents an XML document and serves as the root container
|
125
|
+
for all XML nodes.
|
264
126
|
|
265
127
|
[source,ruby]
|
266
128
|
----
|
267
|
-
# Creating
|
268
|
-
doc = Moxml
|
269
|
-
|
270
|
-
root['xmlns'] = 'http://example.com/default'
|
271
|
-
root['xmlns:custom'] = 'http://example.com/custom'
|
272
|
-
doc.add_child(root)
|
129
|
+
# Creating a document
|
130
|
+
doc = Moxml.new.create_document
|
131
|
+
doc = Moxml.new.parse(xml_string)
|
273
132
|
|
274
|
-
#
|
275
|
-
|
276
|
-
|
133
|
+
# Document properties and methods
|
134
|
+
doc.encoding # Get document encoding
|
135
|
+
doc.encoding = "UTF-8" # Set document encoding
|
136
|
+
doc.version # Get XML version
|
137
|
+
doc.version = "1.1" # Set XML version
|
138
|
+
doc.standalone # Get standalone declaration
|
139
|
+
doc.standalone = "yes" # Set standalone declaration
|
277
140
|
|
278
|
-
|
279
|
-
root
|
141
|
+
# Document structure
|
142
|
+
doc.root # Get root element
|
143
|
+
doc.children # Get all top-level nodes
|
144
|
+
doc.add_child(node) # Add a child node
|
145
|
+
doc.remove_child(node) # Remove a child node
|
280
146
|
|
281
|
-
#
|
282
|
-
|
147
|
+
# Node creation methods
|
148
|
+
doc.create_element(name) # Create new element
|
149
|
+
doc.create_text(content) # Create text node
|
150
|
+
doc.create_cdata(content) # Create CDATA section
|
151
|
+
doc.create_comment(content) # Create comment
|
152
|
+
doc.create_processing_instruction(target, content) # Create PI
|
283
153
|
|
284
|
-
#
|
285
|
-
|
286
|
-
|
154
|
+
# Document querying
|
155
|
+
doc.xpath(expression) # Find nodes by XPath
|
156
|
+
doc.at_xpath(expression) # Find first node by XPath
|
157
|
+
|
158
|
+
# Serialization
|
159
|
+
doc.to_xml(options) # Convert to XML string
|
287
160
|
----
|
288
161
|
|
289
|
-
===
|
162
|
+
=== Element object
|
163
|
+
|
164
|
+
Elements are the primary structural components of an XML document, representing
|
165
|
+
tags with attributes and content.
|
290
166
|
|
291
167
|
[source,ruby]
|
292
168
|
----
|
293
|
-
#
|
294
|
-
|
169
|
+
# Element properties
|
170
|
+
element.name # Get element name
|
171
|
+
element.name = "new_name" # Set element name
|
172
|
+
element.text # Get text content
|
173
|
+
element.text = "content" # Set text content
|
174
|
+
element.inner_html # Get inner XML content
|
175
|
+
element.inner_html = xml # Set inner XML content
|
176
|
+
|
177
|
+
# Attributes
|
178
|
+
element[name] # Get attribute value
|
179
|
+
element[name] = value # Set attribute value
|
180
|
+
element.attributes # Get all attributes
|
181
|
+
element.remove_attribute(name) # Remove attribute
|
182
|
+
|
183
|
+
# Namespace handling
|
184
|
+
element.namespace # Get element's namespace
|
185
|
+
element.namespace = ns # Set element's namespace
|
186
|
+
element.add_namespace(prefix, uri) # Add new namespace
|
187
|
+
element.namespaces # Get all namespace definitions
|
188
|
+
|
189
|
+
# Node structure
|
190
|
+
element.parent # Get parent node
|
191
|
+
element.children # Get child nodes
|
192
|
+
element.add_child(node) # Add child node
|
193
|
+
element.remove_child(node) # Remove child node
|
194
|
+
element.add_previous_sibling(node) # Add sibling before
|
195
|
+
element.add_next_sibling(node) # Add sibling after
|
196
|
+
element.replace(node) # Replace with another node
|
197
|
+
element.remove # Remove from document
|
198
|
+
|
199
|
+
# Node type checking
|
200
|
+
element.element? # Returns true
|
201
|
+
element.text? # Returns false
|
202
|
+
element.cdata? # Returns false
|
203
|
+
element.comment? # Returns false
|
204
|
+
element.processing_instruction? # Returns false
|
205
|
+
|
206
|
+
# Node querying
|
207
|
+
element.xpath(expression) # Find nodes by XPath
|
208
|
+
element.at_xpath(expression) # Find first node by XPath
|
209
|
+
----
|
210
|
+
|
211
|
+
=== Text object
|
212
|
+
|
213
|
+
Text nodes represent character data in the XML document.
|
295
214
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
)
|
215
|
+
[source,ruby]
|
216
|
+
----
|
217
|
+
# Creating text nodes
|
218
|
+
text = doc.create_text("content")
|
301
219
|
|
302
|
-
#
|
303
|
-
|
304
|
-
|
305
|
-
encoding: 'UTF-8',
|
306
|
-
standalone: 'yes'
|
307
|
-
)
|
220
|
+
# Text properties
|
221
|
+
text.content # Get text content
|
222
|
+
text.content = "new" # Set text content
|
308
223
|
|
309
|
-
#
|
310
|
-
|
311
|
-
indent: 0,
|
312
|
-
pretty: false,
|
313
|
-
xml_declaration: false
|
314
|
-
)
|
224
|
+
# Node type checking
|
225
|
+
text.text? # Returns true
|
315
226
|
|
316
|
-
#
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
xml_declaration: true
|
321
|
-
)
|
227
|
+
# Structure
|
228
|
+
text.parent # Get parent node
|
229
|
+
text.remove # Remove from document
|
230
|
+
text.replace(node) # Replace with another node
|
322
231
|
----
|
323
232
|
|
324
|
-
|
233
|
+
=== CDATA object
|
325
234
|
|
326
|
-
|
235
|
+
CDATA sections contain text that should not be parsed as markup.
|
327
236
|
|
328
237
|
[source,ruby]
|
329
238
|
----
|
330
|
-
#
|
331
|
-
|
332
|
-
begin
|
333
|
-
# Process document
|
334
|
-
result = process_document(doc)
|
335
|
-
ensure
|
336
|
-
# Clear references
|
337
|
-
doc = nil
|
338
|
-
GC.start
|
339
|
-
end
|
239
|
+
# Creating CDATA sections
|
240
|
+
cdata = doc.create_cdata("<raw>content</raw>")
|
340
241
|
|
341
|
-
#
|
342
|
-
|
343
|
-
|
344
|
-
process_node(node)
|
345
|
-
# Clear reference
|
346
|
-
node = nil
|
347
|
-
end
|
242
|
+
# CDATA properties
|
243
|
+
cdata.content # Get CDATA content
|
244
|
+
cdata.content = "new" # Set CDATA content
|
348
245
|
|
349
|
-
#
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
354
|
-
|
355
|
-
|
356
|
-
nodeset = nil
|
357
|
-
GC.start
|
358
|
-
end
|
246
|
+
# Node type checking
|
247
|
+
cdata.cdata? # Returns true
|
248
|
+
|
249
|
+
# Structure
|
250
|
+
cdata.parent # Get parent node
|
251
|
+
cdata.remove # Remove from document
|
252
|
+
cdata.replace(node) # Replace with another node
|
359
253
|
----
|
360
254
|
|
361
|
-
===
|
255
|
+
=== Comment object
|
256
|
+
|
257
|
+
Comments contain human-readable notes in the XML document.
|
362
258
|
|
363
259
|
[source,ruby]
|
364
260
|
----
|
365
|
-
#
|
366
|
-
|
367
|
-
# Use native CSS selectors
|
368
|
-
nodes = doc.native.css('complex > selector')
|
369
|
-
nodes.each do |native_node|
|
370
|
-
node = Moxml::Node.wrap(native_node)
|
371
|
-
# Process node
|
372
|
-
end
|
373
|
-
|
374
|
-
# Use native XPath
|
375
|
-
results = doc.native.xpath('//complex/xpath/expression')
|
376
|
-
end
|
261
|
+
# Creating comments
|
262
|
+
comment = doc.create_comment("Note")
|
377
263
|
|
378
|
-
#
|
379
|
-
|
380
|
-
|
381
|
-
doc = Moxml::Document.parse(xml, {
|
382
|
-
mode: :generic,
|
383
|
-
effort: :tolerant,
|
384
|
-
smart: true
|
385
|
-
})
|
386
|
-
|
387
|
-
# Direct element creation
|
388
|
-
element = Ox::Element.new('name')
|
389
|
-
wrapped = Moxml::Element.new(element)
|
390
|
-
end
|
264
|
+
# Comment properties
|
265
|
+
comment.content # Get comment content
|
266
|
+
comment.content = "new" # Set comment content
|
391
267
|
|
392
|
-
#
|
393
|
-
|
394
|
-
# Use native parsing features
|
395
|
-
doc = Moxml::Document.parse(xml, {
|
396
|
-
encoding: 'UTF-8',
|
397
|
-
strict: true
|
398
|
-
})
|
268
|
+
# Node type checking
|
269
|
+
comment.comment? # Returns true
|
399
270
|
|
400
|
-
|
401
|
-
|
402
|
-
|
271
|
+
# Structure
|
272
|
+
comment.parent # Get parent node
|
273
|
+
comment.remove # Remove from document
|
274
|
+
comment.replace(node) # Replace with another node
|
403
275
|
----
|
404
276
|
|
405
|
-
===
|
277
|
+
=== Processing instruction object
|
278
|
+
|
279
|
+
Processing instructions provide instructions to applications processing the XML.
|
406
280
|
|
407
281
|
[source,ruby]
|
408
282
|
----
|
409
|
-
#
|
410
|
-
|
283
|
+
# Creating processing instructions
|
284
|
+
pi = doc.create_processing_instruction("xml-stylesheet",
|
285
|
+
'type="text/xsl" href="style.xsl"')
|
411
286
|
|
412
|
-
|
413
|
-
|
414
|
-
|
415
|
-
|
287
|
+
# PI properties
|
288
|
+
pi.target # Get PI target
|
289
|
+
pi.target = "new" # Set PI target
|
290
|
+
pi.content # Get PI content
|
291
|
+
pi.content = "new" # Set PI content
|
416
292
|
|
417
|
-
|
418
|
-
|
419
|
-
doc = Moxml::Document.parse(xml_string)
|
420
|
-
# Process document
|
421
|
-
result = doc.to_xml
|
422
|
-
doc = nil
|
423
|
-
result
|
424
|
-
end
|
425
|
-
end
|
426
|
-
end
|
293
|
+
# Node type checking
|
294
|
+
pi.processing_instruction? # Returns true
|
427
295
|
|
428
|
-
#
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
doc = Moxml::Document.parse(xml)
|
433
|
-
# Process document
|
434
|
-
doc = nil
|
435
|
-
end
|
436
|
-
end
|
437
|
-
threads.each(&:join)
|
438
|
-
end
|
439
|
-
|
440
|
-
# Thread-local document storage
|
441
|
-
Thread.new do
|
442
|
-
Thread.current[:document] = Moxml::Document.new
|
443
|
-
# Process document
|
444
|
-
ensure
|
445
|
-
Thread.current[:document] = nil
|
446
|
-
end
|
296
|
+
# Structure
|
297
|
+
pi.parent # Get parent node
|
298
|
+
pi.remove # Remove from document
|
299
|
+
pi.replace(node) # Replace with another node
|
447
300
|
----
|
448
301
|
|
449
|
-
|
302
|
+
=== Attribute object
|
450
303
|
|
451
|
-
|
452
|
-
|
453
|
-
==== Parsing errors
|
304
|
+
Attributes represent name-value pairs on elements.
|
454
305
|
|
455
306
|
[source,ruby]
|
456
307
|
----
|
457
|
-
#
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
# Attempt recovery
|
463
|
-
xml_string = cleanup_xml(xml_string)
|
464
|
-
retry
|
465
|
-
end
|
308
|
+
# Attribute properties
|
309
|
+
attr.name # Get attribute name
|
310
|
+
attr.name = "new" # Set attribute name
|
311
|
+
attr.value # Get attribute value
|
312
|
+
attr.value = "new" # Set attribute value
|
466
313
|
|
467
|
-
#
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
detected_encoding = detect_encoding(xml_string)
|
474
|
-
retry if detected_encoding
|
475
|
-
end
|
476
|
-
raise
|
477
|
-
end
|
314
|
+
# Namespace handling
|
315
|
+
attr.namespace # Get attribute's namespace
|
316
|
+
attr.namespace = ns # Set attribute's namespace
|
317
|
+
|
318
|
+
# Node type checking
|
319
|
+
attr.attribute? # Returns true
|
478
320
|
----
|
479
321
|
|
480
|
-
|
322
|
+
=== Namespace object
|
323
|
+
|
324
|
+
Namespaces define XML namespaces used in the document.
|
481
325
|
|
482
326
|
[source,ruby]
|
483
327
|
----
|
484
|
-
#
|
485
|
-
|
486
|
-
|
487
|
-
File.open(path) do |file|
|
488
|
-
doc = Moxml::Document.parse(file)
|
489
|
-
doc.xpath('//chunk').each do |chunk|
|
490
|
-
process_chunk(chunk)
|
491
|
-
chunk = nil
|
492
|
-
end
|
493
|
-
doc = nil
|
494
|
-
end
|
495
|
-
GC.start
|
496
|
-
end
|
328
|
+
# Namespace properties
|
329
|
+
ns.prefix # Get namespace prefix
|
330
|
+
ns.uri # Get namespace URI
|
497
331
|
|
498
|
-
#
|
499
|
-
|
332
|
+
# Formatting
|
333
|
+
ns.to_s # Format as xmlns declaration
|
500
334
|
|
501
|
-
|
502
|
-
|
503
|
-
|
335
|
+
# Node type checking
|
336
|
+
ns.namespace? # Returns true
|
337
|
+
----
|
504
338
|
|
505
|
-
|
506
|
-
result = process_document(doc)
|
507
|
-
doc = nil
|
508
|
-
GC.start
|
339
|
+
=== Node traversal and inspection
|
509
340
|
|
510
|
-
|
511
|
-
puts "Memory usage: #{final_memory - initial_memory}MB"
|
341
|
+
Each node type provides methods for traversing the document structure:
|
512
342
|
|
513
|
-
|
514
|
-
end
|
343
|
+
[source,ruby]
|
515
344
|
----
|
345
|
+
node.parent # Get parent node
|
346
|
+
node.children # Get child nodes
|
347
|
+
node.next_sibling # Get next sibling
|
348
|
+
node.previous_sibling # Get previous sibling
|
349
|
+
node.ancestors # Get all ancestor nodes
|
350
|
+
node.descendants # Get all descendant nodes
|
516
351
|
|
517
|
-
|
352
|
+
# Type checking
|
353
|
+
node.element? # Is it an element?
|
354
|
+
node.text? # Is it a text node?
|
355
|
+
node.cdata? # Is it a CDATA section?
|
356
|
+
node.comment? # Is it a comment?
|
357
|
+
node.processing_instruction? # Is it a PI?
|
358
|
+
node.attribute? # Is it an attribute?
|
359
|
+
node.namespace? # Is it a namespace?
|
518
360
|
|
519
|
-
|
361
|
+
# Node information
|
362
|
+
node.document # Get owning document
|
363
|
+
node.path # Get XPath to node
|
364
|
+
node.line_number # Get source line number (if available)
|
520
365
|
----
|
521
|
-
# Handle backend limitations
|
522
|
-
def safe_xpath(doc, xpath)
|
523
|
-
case Moxml.config.backend
|
524
|
-
when :nokogiri
|
525
|
-
doc.xpath(xpath)
|
526
|
-
when :ox
|
527
|
-
# Ox has limited XPath support
|
528
|
-
fallback_xpath_search(doc, xpath)
|
529
|
-
when :oga
|
530
|
-
# Handle Oga-specific XPath syntax
|
531
|
-
modified_xpath = adjust_xpath_for_oga(xpath)
|
532
|
-
doc.xpath(modified_xpath)
|
533
|
-
end
|
534
|
-
end
|
535
366
|
|
536
|
-
|
537
|
-
def with_backend(backend)
|
538
|
-
original_backend = Moxml.config.backend
|
539
|
-
Moxml.config.backend = backend
|
540
|
-
yield
|
541
|
-
ensure
|
542
|
-
Moxml.config.backend = original_backend
|
543
|
-
end
|
544
|
-
----
|
367
|
+
== Advanced features
|
545
368
|
|
546
|
-
===
|
369
|
+
=== XPath querying and node mapping
|
547
370
|
|
548
|
-
|
371
|
+
Moxml provides efficient XPath querying by leveraging the native XML library's
|
372
|
+
implementation while maintaining consistent node mapping:
|
549
373
|
|
550
374
|
[source,ruby]
|
551
375
|
----
|
552
|
-
#
|
553
|
-
|
554
|
-
|
555
|
-
root = doc.create_element('root')
|
556
|
-
doc.add_child(root)
|
557
|
-
|
558
|
-
# Pre-allocate elements
|
559
|
-
elements = Array.new(1000) do |i|
|
560
|
-
elem = doc.create_element('item')
|
561
|
-
elem['id'] = i.to_s
|
562
|
-
elem
|
563
|
-
end
|
376
|
+
# Find all book elements
|
377
|
+
books = doc.xpath('//book')
|
378
|
+
# Returns Moxml::Element objects mapped to native nodes
|
564
379
|
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
380
|
+
# Find with namespaces
|
381
|
+
titles = doc.xpath('//dc:title',
|
382
|
+
'dc' => 'http://purl.org/dc/elements/1.1/')
|
383
|
+
|
384
|
+
# Find first matching node
|
385
|
+
first_book = doc.at_xpath('//book')
|
569
386
|
|
570
|
-
|
387
|
+
# Chain queries
|
388
|
+
doc.xpath('//book').each do |book|
|
389
|
+
# Each book is a mapped Moxml::Element
|
390
|
+
title = book.at_xpath('.//title')
|
391
|
+
puts "#{book['id']}: #{title.text}"
|
571
392
|
end
|
393
|
+
----
|
572
394
|
|
573
|
-
|
574
|
-
def process_large_xml(xml_string)
|
575
|
-
result = []
|
576
|
-
doc = Moxml::Document.parse(xml_string)
|
395
|
+
=== Namespace handling
|
577
396
|
|
578
|
-
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
end
|
397
|
+
[source,ruby]
|
398
|
+
----
|
399
|
+
# Add namespace to element
|
400
|
+
element.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
|
583
401
|
|
584
|
-
|
585
|
-
|
402
|
+
# Create element in namespace
|
403
|
+
title = doc.create_element('dc:title')
|
404
|
+
title.text = 'Document Title'
|
586
405
|
|
587
|
-
|
588
|
-
|
406
|
+
# Query with namespaces
|
407
|
+
doc.xpath('//dc:title',
|
408
|
+
'dc' => 'http://purl.org/dc/elements/1.1/')
|
589
409
|
----
|
590
410
|
|
591
|
-
|
411
|
+
=== Accessing native implementation
|
412
|
+
|
413
|
+
While not typically needed, you can access the underlying XML library's nodes:
|
592
414
|
|
593
415
|
[source,ruby]
|
594
416
|
----
|
595
|
-
#
|
596
|
-
|
597
|
-
# Cache frequently used nodes
|
598
|
-
@header_nodes ||= doc.xpath('//header').to_a
|
417
|
+
# Get native node
|
418
|
+
native_node = element.native
|
599
419
|
|
600
|
-
|
601
|
-
|
420
|
+
# Get adapter being used
|
421
|
+
adapter = element.context.config.adapter
|
602
422
|
|
603
|
-
|
604
|
-
|
605
|
-
|
423
|
+
# Create from native node
|
424
|
+
element = Moxml::Element.new(native_node, context)
|
425
|
+
----
|
606
426
|
|
607
|
-
|
608
|
-
def efficient_attribute_handling(element)
|
609
|
-
# Cache attribute values
|
610
|
-
@cached_attrs ||= element.attributes
|
427
|
+
== Error handling
|
611
428
|
|
612
|
-
|
613
|
-
|
429
|
+
Moxml provides specific error classes for different types of errors that may
|
430
|
+
occur during XML processing:
|
614
431
|
|
615
|
-
|
616
|
-
|
617
|
-
|
432
|
+
[source,ruby]
|
433
|
+
----
|
434
|
+
begin
|
435
|
+
doc = context.parse(xml_string)
|
436
|
+
rescue Moxml::ParseError => e
|
437
|
+
# Handles XML parsing errors
|
438
|
+
puts "Parse error at line #{e.line}, column #{e.column}"
|
439
|
+
puts "Message: #{e.message}"
|
440
|
+
rescue Moxml::ValidationError => e
|
441
|
+
# Handles XML validation errors
|
442
|
+
puts "Validation error: #{e.message}"
|
443
|
+
rescue Moxml::XPathError => e
|
444
|
+
# Handles XPath expression errors
|
445
|
+
puts "XPath error: #{e.message}"
|
446
|
+
rescue Moxml::Error => e
|
447
|
+
# Handles other Moxml-specific errors
|
448
|
+
puts "Error: #{e.message}"
|
618
449
|
end
|
619
450
|
----
|
620
451
|
|
621
|
-
|
452
|
+
== Configuration
|
453
|
+
|
454
|
+
Moxml can be configured globally or per instance:
|
622
455
|
|
623
456
|
[source,ruby]
|
624
457
|
----
|
625
|
-
#
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
indent: 2,
|
637
|
-
pretty: true,
|
638
|
-
xml_declaration: true
|
639
|
-
)
|
640
|
-
|
641
|
-
# Stream large documents
|
642
|
-
File.open('large.xml', 'w') do |file|
|
643
|
-
doc.write_to(file, indent: 2)
|
644
|
-
end
|
458
|
+
# Global configuration
|
459
|
+
Moxml.configure do |config|
|
460
|
+
config.default_adapter = :nokogiri
|
461
|
+
config.strict = true
|
462
|
+
config.encoding = 'UTF-8'
|
463
|
+
end
|
464
|
+
|
465
|
+
# Instance configuration
|
466
|
+
moxml = Moxml.new do |config|
|
467
|
+
config.adapter = :ox
|
468
|
+
config.strict = false
|
645
469
|
end
|
646
470
|
----
|
647
471
|
|
648
|
-
|
472
|
+
== Thread safety
|
649
473
|
|
650
|
-
|
474
|
+
Moxml is thread-safe when used properly. Each instance maintains its own state
|
475
|
+
and can be used safely in concurrent operations:
|
651
476
|
|
652
477
|
[source,ruby]
|
653
478
|
----
|
654
|
-
|
655
|
-
def
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
if node.respond_to?(:attributes)
|
660
|
-
node.attributes.each do |name, attr|
|
661
|
-
puts "#{indent} @#{name}=#{attr.value.inspect}"
|
662
|
-
end
|
479
|
+
class XmlProcessor
|
480
|
+
def initialize
|
481
|
+
@mutex = Mutex.new
|
482
|
+
@context = Moxml.new
|
663
483
|
end
|
664
484
|
|
665
|
-
|
666
|
-
|
485
|
+
def process(xml)
|
486
|
+
@mutex.synchronize do
|
487
|
+
doc = @context.parse(xml)
|
488
|
+
# Modify document
|
489
|
+
doc.to_xml
|
490
|
+
end
|
667
491
|
end
|
668
492
|
end
|
493
|
+
----
|
669
494
|
|
670
|
-
|
671
|
-
def debug_node_operations
|
672
|
-
nodes_created = 0
|
673
|
-
nodes_removed = 0
|
495
|
+
== Performance considerations
|
674
496
|
|
675
|
-
|
676
|
-
ensure
|
677
|
-
puts "Nodes created: #{nodes_created}"
|
678
|
-
puts "Nodes removed: #{nodes_removed}"
|
679
|
-
end
|
680
|
-
----
|
497
|
+
=== Memory management
|
681
498
|
|
682
|
-
|
499
|
+
Moxml maintains a node registry to ensure consistent object mapping:
|
683
500
|
|
684
501
|
[source,ruby]
|
685
502
|
----
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
element = doc.create_element('test')
|
692
|
-
doc.add_child(element)
|
503
|
+
doc = context.parse(large_xml)
|
504
|
+
# Process document
|
505
|
+
doc = nil # Allow garbage collection of document and registry
|
506
|
+
GC.start # Force garbage collection if needed
|
507
|
+
----
|
693
508
|
|
694
|
-
|
695
|
-
raise "Node creation failed" unless doc.root
|
696
|
-
raise "Node type wrong" unless doc.root.is_a?(Moxml::Element)
|
509
|
+
=== Efficient querying
|
697
510
|
|
698
|
-
|
699
|
-
xml = doc.to_xml
|
700
|
-
raise "Serialization failed" unless xml.include?('<test/>')
|
511
|
+
Use specific XPath expressions for better performance:
|
701
512
|
|
702
|
-
|
703
|
-
rescue => e
|
704
|
-
puts "Backend verification failed: #{e.message}"
|
705
|
-
end
|
513
|
+
[source,ruby]
|
706
514
|
----
|
515
|
+
# More efficient - specific path
|
516
|
+
doc.xpath('//book/title')
|
707
517
|
|
708
|
-
|
518
|
+
# Less efficient - requires full document scan
|
519
|
+
doc.xpath('//title')
|
709
520
|
|
710
|
-
|
521
|
+
# Most efficient - direct child access
|
522
|
+
root.xpath('./title')
|
523
|
+
----
|
711
524
|
|
712
|
-
|
713
|
-
* `Moxml::ParseError` - XML parsing errors
|
714
|
-
* `Moxml::ArgumentError` - Invalid argument errors
|
525
|
+
== Best practices
|
715
526
|
|
716
|
-
===
|
527
|
+
=== Document creation
|
717
528
|
|
718
529
|
[source,ruby]
|
719
530
|
----
|
720
|
-
#
|
721
|
-
|
722
|
-
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
531
|
+
# Preferred - using builder pattern
|
532
|
+
doc = Moxml.new.build do
|
533
|
+
declaration version: "1.0", encoding: "UTF-8"
|
534
|
+
element 'root' do
|
535
|
+
element 'child' do
|
536
|
+
text 'content'
|
537
|
+
end
|
538
|
+
end
|
727
539
|
end
|
728
540
|
|
729
|
-
#
|
730
|
-
|
731
|
-
|
732
|
-
|
733
|
-
|
734
|
-
|
735
|
-
end
|
541
|
+
# Alternative - direct manipulation
|
542
|
+
doc = Moxml.new.create_document
|
543
|
+
doc.add_declaration(version: "1.0", encoding: "UTF-8")
|
544
|
+
root = doc.create_element('root')
|
545
|
+
doc.add_child(root)
|
546
|
+
----
|
736
547
|
|
737
|
-
|
738
|
-
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
548
|
+
=== Node manipulation
|
549
|
+
|
550
|
+
[source,ruby]
|
551
|
+
----
|
552
|
+
# Preferred - chainable operations
|
553
|
+
element
|
554
|
+
.add_namespace('dc', 'http://purl.org/dc/elements/1.1/')
|
555
|
+
.add_child(doc.create_text('content'))
|
556
|
+
|
557
|
+
# Preferred - clear node type checking
|
558
|
+
if node.element?
|
559
|
+
node.add_child(doc.create_text('content'))
|
749
560
|
end
|
750
561
|
----
|
751
562
|
|
752
563
|
== Contributing
|
753
564
|
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
565
|
+
1. Fork the repository
|
566
|
+
2. Create your feature branch (`git checkout -b feature/my-new-feature`)
|
567
|
+
3. Commit your changes (`git commit -am 'Add some feature'`)
|
568
|
+
4. Push to the branch (`git push origin feature/my-new-feature`)
|
569
|
+
5. Create a new Pull Request
|
758
570
|
|
759
|
-
|
760
|
-
* Add tests for new features
|
761
|
-
* Update documentation
|
762
|
-
* Ensure backwards compatibility
|
763
|
-
* Consider performance implications
|
764
|
-
* Test with all supported backends
|
571
|
+
== License
|
765
572
|
|
766
|
-
|
573
|
+
Copyright (c) 2024 Ribose Inc.
|
767
574
|
|
768
|
-
|
575
|
+
This project is licensed under the BSD-2-Clause License. See the LICENSE file for details.
|
769
576
|
|
770
|
-
The gem is available as open source under the terms of the BSD-2-Clause License.
|