moxml 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +224 -43
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +51 -0
- data/docs/_guides/modifying-xml.adoc +292 -0
- data/docs/_guides/parsing-xml.adoc +230 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_guides/xml-declaration.adoc +450 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +97 -0
- data/docs/_pages/adapters/libxml.adoc +285 -0
- data/docs/_pages/adapters/nokogiri.adoc +251 -0
- data/docs/_pages/adapters/oga.adoc +291 -0
- data/docs/_pages/adapters/ox.adoc +56 -0
- data/docs/_pages/adapters/rexml.adoc +292 -0
- data/docs/_pages/best-practices.adoc +429 -0
- data/docs/_pages/compatibility.adoc +467 -0
- data/docs/_pages/configuration.adoc +250 -0
- data/docs/_pages/error-handling.adoc +349 -0
- data/docs/_pages/headed-ox-limitations.adoc +574 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +140 -0
- data/docs/_pages/node-api-reference.adoc +49 -0
- data/docs/_pages/performance.adoc +35 -0
- data/docs/_pages/quick-start.adoc +243 -0
- data/docs/_pages/thread-safety.adoc +28 -0
- data/docs/_references/document-api.adoc +407 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +267 -0
- data/docs/_tutorials/builder-pattern.adoc +342 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +324 -0
- data/docs/_tutorials/xpath-queries.adoc +358 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1564 -0
- data/lib/moxml/adapter/nokogiri.rb +156 -9
- data/lib/moxml/adapter/oga.rb +190 -15
- data/lib/moxml/adapter/ox.rb +322 -28
- data/lib/moxml/adapter/rexml.rb +157 -28
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +57 -2
- data/lib/moxml/declaration.rb +9 -0
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +53 -6
- data/lib/moxml/document_builder.rb +34 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +155 -4
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1770 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_preservation_spec.rb +217 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +178 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
|
@@ -0,0 +1,292 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Modifying XML
|
|
3
|
+
nav_order: 3
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
== Modifying XML
|
|
7
|
+
|
|
8
|
+
=== Purpose
|
|
9
|
+
|
|
10
|
+
Learn how to modify existing XML documents by adding, updating, and removing
|
|
11
|
+
elements, attributes, and content.
|
|
12
|
+
|
|
13
|
+
=== Modifying element content
|
|
14
|
+
|
|
15
|
+
Update text content of elements:
|
|
16
|
+
|
|
17
|
+
[source,ruby]
|
|
18
|
+
----
|
|
19
|
+
xml = '<library><book><title>Old Title</title></book></library>'
|
|
20
|
+
doc = Moxml.new.parse(xml)
|
|
21
|
+
|
|
22
|
+
# Find and update
|
|
23
|
+
title = doc.at_xpath('//title')
|
|
24
|
+
title.text = 'New Title'
|
|
25
|
+
|
|
26
|
+
puts doc.to_xml
|
|
27
|
+
# => <library><book><title>New Title</title></book></library>
|
|
28
|
+
----
|
|
29
|
+
|
|
30
|
+
=== Adding elements
|
|
31
|
+
|
|
32
|
+
Add new elements to existing documents:
|
|
33
|
+
|
|
34
|
+
[source,ruby]
|
|
35
|
+
----
|
|
36
|
+
xml = '<library><book id="1"><title>Ruby Basics</title></book></library>'
|
|
37
|
+
doc = Moxml.new.parse(xml)
|
|
38
|
+
|
|
39
|
+
book = doc.at_xpath('//book[@id="1"]')
|
|
40
|
+
|
|
41
|
+
# Add author element
|
|
42
|
+
author = doc.create_element('author')
|
|
43
|
+
author.text = 'Jane Smith'
|
|
44
|
+
book.add_child(author)
|
|
45
|
+
|
|
46
|
+
# Add price element
|
|
47
|
+
price = doc.create_element('price')
|
|
48
|
+
price.text = '29.99'
|
|
49
|
+
price['currency'] = 'USD'
|
|
50
|
+
book.add_child(price)
|
|
51
|
+
|
|
52
|
+
# Add ISBN element
|
|
53
|
+
isbn = doc.create_element('isbn')
|
|
54
|
+
isbn.text = '978-0-123456-78-9'
|
|
55
|
+
book.add_child(isbn)
|
|
56
|
+
|
|
57
|
+
puts doc.to_xml(indent: 2)
|
|
58
|
+
----
|
|
59
|
+
|
|
60
|
+
=== Removing elements
|
|
61
|
+
|
|
62
|
+
Remove elements from documents:
|
|
63
|
+
|
|
64
|
+
[source,ruby]
|
|
65
|
+
----
|
|
66
|
+
xml = <<~XML
|
|
67
|
+
<library>
|
|
68
|
+
<book id="1">
|
|
69
|
+
<title>Ruby Basics</title>
|
|
70
|
+
<author>Jane Smith</author>
|
|
71
|
+
<draft>true</draft>
|
|
72
|
+
</book>
|
|
73
|
+
</library>
|
|
74
|
+
XML
|
|
75
|
+
|
|
76
|
+
doc = Moxml.new.parse(xml)
|
|
77
|
+
|
|
78
|
+
# Find and remove element
|
|
79
|
+
draft = doc.at_xpath('//draft')
|
|
80
|
+
draft.remove
|
|
81
|
+
|
|
82
|
+
# Remove by parent
|
|
83
|
+
book = doc.at_xpath('//book')
|
|
84
|
+
author = book.at_xpath('.//author')
|
|
85
|
+
book.remove_child(author)
|
|
86
|
+
|
|
87
|
+
puts doc.to_xml
|
|
88
|
+
----
|
|
89
|
+
|
|
90
|
+
=== Modifying attributes
|
|
91
|
+
|
|
92
|
+
Update, add, and remove attributes:
|
|
93
|
+
|
|
94
|
+
[source,ruby]
|
|
95
|
+
----
|
|
96
|
+
xml = '<book id="1" status="draft">Ruby Basics</book>'
|
|
97
|
+
doc = Moxml.new.parse(xml)
|
|
98
|
+
|
|
99
|
+
book = doc.root
|
|
100
|
+
|
|
101
|
+
# Update existing attribute
|
|
102
|
+
book['id'] = '100'
|
|
103
|
+
|
|
104
|
+
# Add new attribute
|
|
105
|
+
book['edition'] = '2nd'
|
|
106
|
+
book['category'] = 'programming'
|
|
107
|
+
|
|
108
|
+
# Remove attribute
|
|
109
|
+
book.remove_attribute('status')
|
|
110
|
+
|
|
111
|
+
# Get all attributes
|
|
112
|
+
book.attributes.each do |attr|
|
|
113
|
+
puts "#{attr.name}=#{attr.value}"
|
|
114
|
+
end
|
|
115
|
+
# => id=100
|
|
116
|
+
# => edition=2nd
|
|
117
|
+
# => category=programming
|
|
118
|
+
----
|
|
119
|
+
|
|
120
|
+
=== Replacing nodes
|
|
121
|
+
|
|
122
|
+
Replace elements with new content:
|
|
123
|
+
|
|
124
|
+
[source,ruby]
|
|
125
|
+
----
|
|
126
|
+
xml = '<book><title>Old Title</title><author>Old Author</author></book>'
|
|
127
|
+
doc = Moxml.new.parse(xml)
|
|
128
|
+
|
|
129
|
+
# Replace title element
|
|
130
|
+
old_title = doc.at_xpath('//title')
|
|
131
|
+
new_title = doc.create_element('title')
|
|
132
|
+
new_title.text = 'New Title'
|
|
133
|
+
new_title['lang'] = 'en'
|
|
134
|
+
|
|
135
|
+
old_title.replace(new_title)
|
|
136
|
+
|
|
137
|
+
# Replace text node
|
|
138
|
+
author = doc.at_xpath('//author')
|
|
139
|
+
author.children.first.replace(doc.create_text('New Author'))
|
|
140
|
+
|
|
141
|
+
puts doc.to_xml
|
|
142
|
+
----
|
|
143
|
+
|
|
144
|
+
=== Adding siblings
|
|
145
|
+
|
|
146
|
+
Insert elements relative to existing nodes:
|
|
147
|
+
|
|
148
|
+
[source,ruby]
|
|
149
|
+
----
|
|
150
|
+
xml = <<~XML
|
|
151
|
+
<book>
|
|
152
|
+
<title>Ruby Programming</title>
|
|
153
|
+
<price>29.99</price>
|
|
154
|
+
</book>
|
|
155
|
+
XML
|
|
156
|
+
|
|
157
|
+
doc = Moxml.new.parse(xml)
|
|
158
|
+
|
|
159
|
+
# Add before price
|
|
160
|
+
price = doc.at_xpath('//price')
|
|
161
|
+
author = doc.create_element('author')
|
|
162
|
+
author.text = 'Jane Smith'
|
|
163
|
+
price.add_previous_sibling(author)
|
|
164
|
+
|
|
165
|
+
# Add after title
|
|
166
|
+
title = doc.at_xpath('//title')
|
|
167
|
+
subtitle = doc.create_element('subtitle')
|
|
168
|
+
subtitle.text = 'A Comprehensive Guide'
|
|
169
|
+
title.add_next_sibling(subtitle)
|
|
170
|
+
|
|
171
|
+
puts doc.to_xml(indent: 2)
|
|
172
|
+
----
|
|
173
|
+
|
|
174
|
+
Output:
|
|
175
|
+
|
|
176
|
+
[source,xml]
|
|
177
|
+
----
|
|
178
|
+
<book>
|
|
179
|
+
<title>Ruby Programming</title>
|
|
180
|
+
<subtitle>A Comprehensive Guide</subtitle>
|
|
181
|
+
<author>Jane Smith</author>
|
|
182
|
+
<price>29.99</price>
|
|
183
|
+
</book>
|
|
184
|
+
----
|
|
185
|
+
|
|
186
|
+
=== Batch modifications
|
|
187
|
+
|
|
188
|
+
Update multiple elements at once:
|
|
189
|
+
|
|
190
|
+
[source,ruby]
|
|
191
|
+
----
|
|
192
|
+
xml = <<~XML
|
|
193
|
+
<library>
|
|
194
|
+
<book><price currency="USD">29.99</price></book>
|
|
195
|
+
<book><price currency="USD">39.99</price></book>
|
|
196
|
+
<book><price currency="USD">19.99</price></book>
|
|
197
|
+
</library>
|
|
198
|
+
XML
|
|
199
|
+
|
|
200
|
+
doc = Moxml.new.parse(xml)
|
|
201
|
+
|
|
202
|
+
# Apply 10% discount to all books
|
|
203
|
+
doc.xpath('//price').each do |price|
|
|
204
|
+
current = price.text.to_f
|
|
205
|
+
discounted = (current * 0.9).round(2)
|
|
206
|
+
price.text = discounted.to_s
|
|
207
|
+
price['original'] = current.to_s
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
puts doc.to_xml(indent: 2)
|
|
211
|
+
----
|
|
212
|
+
|
|
213
|
+
=== Preserving structure
|
|
214
|
+
|
|
215
|
+
Maintain document structure during modifications:
|
|
216
|
+
|
|
217
|
+
[source,ruby]
|
|
218
|
+
----
|
|
219
|
+
xml = <<~XML
|
|
220
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
221
|
+
<library>
|
|
222
|
+
<book id="1">Ruby Basics</book>
|
|
223
|
+
</library>
|
|
224
|
+
XML
|
|
225
|
+
|
|
226
|
+
doc = Moxml.new.parse(xml)
|
|
227
|
+
|
|
228
|
+
# Modifications preserve declaration
|
|
229
|
+
book = doc.at_xpath('//book')
|
|
230
|
+
book['edition'] = '2nd'
|
|
231
|
+
|
|
232
|
+
# Original structure maintained
|
|
233
|
+
puts doc.to_xml(indent: 2)
|
|
234
|
+
# => Still has <?xml ... ?> declaration
|
|
235
|
+
----
|
|
236
|
+
|
|
237
|
+
=== Common modification patterns
|
|
238
|
+
|
|
239
|
+
==== Update or create pattern
|
|
240
|
+
|
|
241
|
+
[source,ruby]
|
|
242
|
+
----
|
|
243
|
+
def ensure_element(parent, name, text)
|
|
244
|
+
elem = parent.at_xpath(".//#{name}")
|
|
245
|
+
|
|
246
|
+
if elem
|
|
247
|
+
# Update existing
|
|
248
|
+
elem.text = text
|
|
249
|
+
else
|
|
250
|
+
# Create new
|
|
251
|
+
elem = parent.document.create_element(name)
|
|
252
|
+
elem.text = text
|
|
253
|
+
parent.add_child(elem)
|
|
254
|
+
end
|
|
255
|
+
|
|
256
|
+
elem
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
book = doc.at_xpath('//book')
|
|
260
|
+
ensure_element(book, 'author', 'Jane Smith')
|
|
261
|
+
ensure_element(book, 'price', '29.99')
|
|
262
|
+
----
|
|
263
|
+
|
|
264
|
+
==== Conditional modification
|
|
265
|
+
|
|
266
|
+
[source,ruby]
|
|
267
|
+
----
|
|
268
|
+
doc.xpath('//book').each do |book|
|
|
269
|
+
price = book.at_xpath('.//price')
|
|
270
|
+
next unless price
|
|
271
|
+
|
|
272
|
+
# Add discount for expensive books
|
|
273
|
+
if price.text.to_f > 30
|
|
274
|
+
book['discount'] = '10%'
|
|
275
|
+
price.text = (price.text.to_f * 0.9).to_s
|
|
276
|
+
end
|
|
277
|
+
end
|
|
278
|
+
----
|
|
279
|
+
|
|
280
|
+
=== Best practices
|
|
281
|
+
|
|
282
|
+
. **Find before modifying** - always locate elements first
|
|
283
|
+
. **Check element exists** before calling methods on it
|
|
284
|
+
. **Use transactions** for complex modifications if needed
|
|
285
|
+
. **Validate structure** after major changes
|
|
286
|
+
. **Preserve document metadata** (declarations, encoding)
|
|
287
|
+
|
|
288
|
+
=== See also
|
|
289
|
+
|
|
290
|
+
* link:creating-documents[Creating documents] - Build from scratch
|
|
291
|
+
* link:../tutorials/basic-usage[Basic usage] - Fundamentals
|
|
292
|
+
* link:../references/element-api[Element API] - Complete method reference
|
|
@@ -0,0 +1,230 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Parsing XML
|
|
3
|
+
nav_order: 2
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
== Parsing XML
|
|
7
|
+
|
|
8
|
+
=== Purpose
|
|
9
|
+
|
|
10
|
+
Learn how to parse XML from various sources including strings, files, and IO
|
|
11
|
+
streams using different Moxml adapters.
|
|
12
|
+
|
|
13
|
+
=== Basic string parsing
|
|
14
|
+
|
|
15
|
+
Parse XML from a string:
|
|
16
|
+
|
|
17
|
+
[source,ruby]
|
|
18
|
+
----
|
|
19
|
+
require 'moxml'
|
|
20
|
+
|
|
21
|
+
xml_string = '<root><child>content</child></root>'
|
|
22
|
+
|
|
23
|
+
# Parse with default adapter
|
|
24
|
+
doc = Moxml.new.parse(xml_string)
|
|
25
|
+
|
|
26
|
+
# Access parsed content
|
|
27
|
+
puts doc.root.name # => "root"
|
|
28
|
+
puts doc.root.children.first.text # => "content"
|
|
29
|
+
----
|
|
30
|
+
|
|
31
|
+
=== Parsing from files
|
|
32
|
+
|
|
33
|
+
Read and parse XML files:
|
|
34
|
+
|
|
35
|
+
[source,ruby]
|
|
36
|
+
----
|
|
37
|
+
# Read file first, then parse
|
|
38
|
+
xml_content = File.read('document.xml')
|
|
39
|
+
doc = Moxml.new.parse(xml_content)
|
|
40
|
+
|
|
41
|
+
# Or use File.open with read
|
|
42
|
+
File.open('document.xml') do |file|
|
|
43
|
+
doc = Moxml.new.parse(file.read)
|
|
44
|
+
# Process document
|
|
45
|
+
end
|
|
46
|
+
----
|
|
47
|
+
|
|
48
|
+
=== Parsing with options
|
|
49
|
+
|
|
50
|
+
Control parsing behavior:
|
|
51
|
+
|
|
52
|
+
[source,ruby]
|
|
53
|
+
----
|
|
54
|
+
# Strict parsing (raises errors on malformed XML)
|
|
55
|
+
doc = Moxml.new.parse(xml, strict: true)
|
|
56
|
+
|
|
57
|
+
# With specific encoding
|
|
58
|
+
doc = Moxml.new.parse(xml, encoding: 'ISO-8859-1')
|
|
59
|
+
|
|
60
|
+
# Relaxed parsing (attempts to handle malformed XML)
|
|
61
|
+
doc = Moxml.new.parse(possibly_invalid_xml, strict: false)
|
|
62
|
+
----
|
|
63
|
+
|
|
64
|
+
=== Handling parse errors
|
|
65
|
+
|
|
66
|
+
Catch and handle parsing errors:
|
|
67
|
+
|
|
68
|
+
[source,ruby]
|
|
69
|
+
----
|
|
70
|
+
xml = '<root><unclosed>'
|
|
71
|
+
|
|
72
|
+
begin
|
|
73
|
+
doc = Moxml.new.parse(xml, strict: true)
|
|
74
|
+
rescue Moxml::ParseError => e
|
|
75
|
+
puts "Parse failed at line #{e.line}, column #{e.column}"
|
|
76
|
+
puts "Error: #{e.message}"
|
|
77
|
+
puts e.to_s # Includes helpful hints
|
|
78
|
+
end
|
|
79
|
+
----
|
|
80
|
+
|
|
81
|
+
=== Parsing large documents
|
|
82
|
+
|
|
83
|
+
Handle large XML files efficiently:
|
|
84
|
+
|
|
85
|
+
[source,ruby]
|
|
86
|
+
----
|
|
87
|
+
# For large files, consider memory usage
|
|
88
|
+
large_xml = File.read('large_document.xml')
|
|
89
|
+
|
|
90
|
+
context = Moxml.new
|
|
91
|
+
# Choose appropriate adapter for size
|
|
92
|
+
context.config.adapter = :ox # Fast for large files
|
|
93
|
+
|
|
94
|
+
doc = context.parse(large_xml)
|
|
95
|
+
|
|
96
|
+
# Process in chunks if possible
|
|
97
|
+
doc.xpath('//record').each_slice(1000) do |records|
|
|
98
|
+
process_batch(records)
|
|
99
|
+
end
|
|
100
|
+
----
|
|
101
|
+
|
|
102
|
+
=== Parsing with different adapters
|
|
103
|
+
|
|
104
|
+
Each adapter may handle edge cases differently:
|
|
105
|
+
|
|
106
|
+
[source,ruby]
|
|
107
|
+
----
|
|
108
|
+
xml_with_namespaces = <<~XML
|
|
109
|
+
<library xmlns="http://example.org">
|
|
110
|
+
<book>Title</book>
|
|
111
|
+
</library>
|
|
112
|
+
XML
|
|
113
|
+
|
|
114
|
+
# Parse with Nokogiri (full namespace support)
|
|
115
|
+
context_nokogiri = Moxml.new
|
|
116
|
+
context_nokogiri.config.adapter = :nokogiri
|
|
117
|
+
doc = context_nokogiri.parse(xml_with_namespaces)
|
|
118
|
+
|
|
119
|
+
# Parse with REXML (limited namespace XPath)
|
|
120
|
+
context_rexml = Moxml.new
|
|
121
|
+
context_rexml.config.adapter = :rexml
|
|
122
|
+
doc = context_rexml.parse(xml_with_namespaces)
|
|
123
|
+
# Namespace preserved but XPath queries limited
|
|
124
|
+
----
|
|
125
|
+
|
|
126
|
+
=== Common parse patterns
|
|
127
|
+
|
|
128
|
+
==== Parse and extract data
|
|
129
|
+
|
|
130
|
+
[source,ruby]
|
|
131
|
+
----
|
|
132
|
+
xml = <<~XML
|
|
133
|
+
<products>
|
|
134
|
+
<product id="1">
|
|
135
|
+
<name>Widget A</name>
|
|
136
|
+
<price>9.99</price>
|
|
137
|
+
</product>
|
|
138
|
+
<product id="2">
|
|
139
|
+
<name>Widget B</name>
|
|
140
|
+
<price>14.99</price>
|
|
141
|
+
</product>
|
|
142
|
+
</products>
|
|
143
|
+
XML
|
|
144
|
+
|
|
145
|
+
doc = Moxml.new.parse(xml)
|
|
146
|
+
|
|
147
|
+
# Extract data into Ruby structures
|
|
148
|
+
products = doc.xpath('//product').map do |prod|
|
|
149
|
+
{
|
|
150
|
+
id: prod['id'],
|
|
151
|
+
name: prod.at_xpath('.//name').text,
|
|
152
|
+
price: prod.at_xpath('.//price').text.to_f
|
|
153
|
+
}
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
products.each { |p| puts "#{p[:name]}: $#{p[:price]}" }
|
|
157
|
+
----
|
|
158
|
+
|
|
159
|
+
==== Parse and validate
|
|
160
|
+
|
|
161
|
+
[source,ruby]
|
|
162
|
+
----
|
|
163
|
+
doc = Moxml.new.parse(xml)
|
|
164
|
+
|
|
165
|
+
# Validate required elements exist
|
|
166
|
+
required_elements = ['title', 'author', 'price']
|
|
167
|
+
|
|
168
|
+
required_elements.each do |elem|
|
|
169
|
+
unless doc.at_xpath("//#{elem}")
|
|
170
|
+
raise "Missing required element: #{elem}"
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
----
|
|
174
|
+
|
|
175
|
+
=== Troubleshooting
|
|
176
|
+
|
|
177
|
+
**Encoding issues:**
|
|
178
|
+
|
|
179
|
+
[source,ruby]
|
|
180
|
+
----
|
|
181
|
+
# Specify encoding explicitly
|
|
182
|
+
doc = Moxml.new.parse(xml, encoding: 'UTF-8')
|
|
183
|
+
|
|
184
|
+
# Or let adapter auto-detect
|
|
185
|
+
doc = Moxml.new.parse(xml) # Usually works
|
|
186
|
+
----
|
|
187
|
+
|
|
188
|
+
**Malformed XML:**
|
|
189
|
+
|
|
190
|
+
[source,ruby]
|
|
191
|
+
----
|
|
192
|
+
# Use relaxed parsing
|
|
193
|
+
doc = Moxml.new.parse(possibly_broken_xml, strict: false)
|
|
194
|
+
|
|
195
|
+
# Check what was parsed
|
|
196
|
+
puts doc.root.name
|
|
197
|
+
puts doc.to_xml # See what was actually parsed
|
|
198
|
+
----
|
|
199
|
+
|
|
200
|
+
**Empty or whitespace:**
|
|
201
|
+
|
|
202
|
+
[source,ruby]
|
|
203
|
+
----
|
|
204
|
+
xml = " \n <root/> \n "
|
|
205
|
+
|
|
206
|
+
# Whitespace is handled automatically
|
|
207
|
+
doc = Moxml.new.parse(xml)
|
|
208
|
+
puts doc.root.name # => "root"
|
|
209
|
+
----
|
|
210
|
+
|
|
211
|
+
=== Best practices
|
|
212
|
+
|
|
213
|
+
. **Always use strict mode in production** for data integrity
|
|
214
|
+
. **Specify encoding** when working with non-UTF-8 documents
|
|
215
|
+
. **Handle parse errors** gracefully with appropriate error messages
|
|
216
|
+
. **Choose the right adapter** based on document size and complexity
|
|
217
|
+
. **Validate critical elements** after parsing
|
|
218
|
+
|
|
219
|
+
=== Next steps
|
|
220
|
+
|
|
221
|
+
* link:xpath-queries[XPath queries tutorial] - Learn advanced querying
|
|
222
|
+
* link:working-with-elements[Working with elements] - Element manipulation
|
|
223
|
+
* link:../guides/error-handling[Error handling guide] - Comprehensive error
|
|
224
|
+
management
|
|
225
|
+
|
|
226
|
+
=== See also
|
|
227
|
+
|
|
228
|
+
* link:../pages/adapters/[Adapters] - Choose the right adapter for parsing
|
|
229
|
+
* link:../references/document-api[Document API] - Complete parsing reference
|
|
230
|
+
* link:../pages/compatibility[Compatibility] - Adapter differences
|