moxml 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +10 -10
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +224 -43
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +51 -0
  21. data/docs/_guides/modifying-xml.adoc +292 -0
  22. data/docs/_guides/parsing-xml.adoc +230 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_guides/xml-declaration.adoc +450 -0
  26. data/docs/_pages/adapter-compatibility.adoc +369 -0
  27. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  28. data/docs/_pages/adapters/index.adoc +97 -0
  29. data/docs/_pages/adapters/libxml.adoc +285 -0
  30. data/docs/_pages/adapters/nokogiri.adoc +251 -0
  31. data/docs/_pages/adapters/oga.adoc +291 -0
  32. data/docs/_pages/adapters/ox.adoc +56 -0
  33. data/docs/_pages/adapters/rexml.adoc +292 -0
  34. data/docs/_pages/best-practices.adoc +429 -0
  35. data/docs/_pages/compatibility.adoc +467 -0
  36. data/docs/_pages/configuration.adoc +250 -0
  37. data/docs/_pages/error-handling.adoc +349 -0
  38. data/docs/_pages/headed-ox-limitations.adoc +574 -0
  39. data/docs/_pages/headed-ox.adoc +1025 -0
  40. data/docs/_pages/index.adoc +35 -0
  41. data/docs/_pages/installation.adoc +140 -0
  42. data/docs/_pages/node-api-reference.adoc +49 -0
  43. data/docs/_pages/performance.adoc +35 -0
  44. data/docs/_pages/quick-start.adoc +243 -0
  45. data/docs/_pages/thread-safety.adoc +28 -0
  46. data/docs/_references/document-api.adoc +407 -0
  47. data/docs/_references/index.adoc +48 -0
  48. data/docs/_tutorials/basic-usage.adoc +267 -0
  49. data/docs/_tutorials/builder-pattern.adoc +342 -0
  50. data/docs/_tutorials/index.adoc +33 -0
  51. data/docs/_tutorials/namespace-handling.adoc +324 -0
  52. data/docs/_tutorials/xpath-queries.adoc +358 -0
  53. data/docs/index.adoc +122 -0
  54. data/examples/README.md +124 -0
  55. data/examples/api_client/README.md +424 -0
  56. data/examples/api_client/api_client.rb +394 -0
  57. data/examples/api_client/example_response.xml +48 -0
  58. data/examples/headed_ox_example/README.md +90 -0
  59. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  60. data/examples/rss_parser/README.md +194 -0
  61. data/examples/rss_parser/example_feed.xml +93 -0
  62. data/examples/rss_parser/rss_parser.rb +189 -0
  63. data/examples/sax_parsing/README.md +50 -0
  64. data/examples/sax_parsing/data_extractor.rb +75 -0
  65. data/examples/sax_parsing/example.xml +21 -0
  66. data/examples/sax_parsing/large_file.rb +78 -0
  67. data/examples/sax_parsing/simple_parser.rb +55 -0
  68. data/examples/web_scraper/README.md +352 -0
  69. data/examples/web_scraper/example_page.html +201 -0
  70. data/examples/web_scraper/web_scraper.rb +312 -0
  71. data/lib/moxml/adapter/base.rb +107 -28
  72. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  73. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  74. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  75. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  76. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  77. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  78. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  79. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  80. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
  81. data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
  82. data/lib/moxml/adapter/headed_ox.rb +161 -0
  83. data/lib/moxml/adapter/libxml.rb +1564 -0
  84. data/lib/moxml/adapter/nokogiri.rb +156 -9
  85. data/lib/moxml/adapter/oga.rb +190 -15
  86. data/lib/moxml/adapter/ox.rb +322 -28
  87. data/lib/moxml/adapter/rexml.rb +157 -28
  88. data/lib/moxml/adapter.rb +21 -4
  89. data/lib/moxml/attribute.rb +6 -0
  90. data/lib/moxml/builder.rb +40 -4
  91. data/lib/moxml/config.rb +8 -3
  92. data/lib/moxml/context.rb +57 -2
  93. data/lib/moxml/declaration.rb +9 -0
  94. data/lib/moxml/doctype.rb +13 -1
  95. data/lib/moxml/document.rb +53 -6
  96. data/lib/moxml/document_builder.rb +34 -5
  97. data/lib/moxml/element.rb +71 -2
  98. data/lib/moxml/error.rb +175 -6
  99. data/lib/moxml/node.rb +155 -4
  100. data/lib/moxml/node_set.rb +34 -0
  101. data/lib/moxml/sax/block_handler.rb +194 -0
  102. data/lib/moxml/sax/element_handler.rb +124 -0
  103. data/lib/moxml/sax/handler.rb +113 -0
  104. data/lib/moxml/sax.rb +31 -0
  105. data/lib/moxml/version.rb +1 -1
  106. data/lib/moxml/xml_utils/encoder.rb +4 -4
  107. data/lib/moxml/xml_utils.rb +7 -4
  108. data/lib/moxml/xpath/ast/node.rb +159 -0
  109. data/lib/moxml/xpath/cache.rb +91 -0
  110. data/lib/moxml/xpath/compiler.rb +1770 -0
  111. data/lib/moxml/xpath/context.rb +26 -0
  112. data/lib/moxml/xpath/conversion.rb +124 -0
  113. data/lib/moxml/xpath/engine.rb +52 -0
  114. data/lib/moxml/xpath/errors.rb +101 -0
  115. data/lib/moxml/xpath/lexer.rb +304 -0
  116. data/lib/moxml/xpath/parser.rb +485 -0
  117. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  118. data/lib/moxml/xpath/ruby/node.rb +193 -0
  119. data/lib/moxml/xpath.rb +37 -0
  120. data/lib/moxml.rb +5 -2
  121. data/moxml.gemspec +3 -1
  122. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  123. data/spec/consistency/README.md +77 -0
  124. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  125. data/spec/examples/README.md +75 -0
  126. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  127. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  128. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  129. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  130. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  131. data/spec/integration/README.md +71 -0
  132. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  133. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  134. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  135. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  136. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  137. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  138. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  139. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  140. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  141. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
  142. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  143. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  144. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  145. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  146. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  147. data/spec/moxml/README.md +41 -0
  148. data/spec/moxml/adapter/.gitkeep +0 -0
  149. data/spec/moxml/adapter/README.md +61 -0
  150. data/spec/moxml/adapter/base_spec.rb +27 -0
  151. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  152. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  153. data/spec/moxml/adapter/ox_spec.rb +9 -8
  154. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  155. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  156. data/spec/moxml/adapter_spec.rb +16 -0
  157. data/spec/moxml/attribute_spec.rb +30 -0
  158. data/spec/moxml/builder_spec.rb +33 -0
  159. data/spec/moxml/cdata_spec.rb +31 -0
  160. data/spec/moxml/comment_spec.rb +31 -0
  161. data/spec/moxml/config_spec.rb +3 -3
  162. data/spec/moxml/context_spec.rb +28 -0
  163. data/spec/moxml/declaration_preservation_spec.rb +217 -0
  164. data/spec/moxml/declaration_spec.rb +36 -0
  165. data/spec/moxml/doctype_spec.rb +33 -0
  166. data/spec/moxml/document_builder_spec.rb +30 -0
  167. data/spec/moxml/document_spec.rb +105 -0
  168. data/spec/moxml/element_spec.rb +143 -0
  169. data/spec/moxml/error_spec.rb +266 -22
  170. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  171. data/spec/moxml/namespace_spec.rb +32 -0
  172. data/spec/moxml/node_set_spec.rb +39 -0
  173. data/spec/moxml/node_spec.rb +37 -0
  174. data/spec/moxml/processing_instruction_spec.rb +34 -0
  175. data/spec/moxml/sax_spec.rb +1067 -0
  176. data/spec/moxml/text_spec.rb +31 -0
  177. data/spec/moxml/version_spec.rb +14 -0
  178. data/spec/moxml/xml_utils/.gitkeep +0 -0
  179. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  180. data/spec/moxml/xml_utils_spec.rb +49 -0
  181. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  182. data/spec/moxml/xpath/axes_spec.rb +296 -0
  183. data/spec/moxml/xpath/cache_spec.rb +358 -0
  184. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  185. data/spec/moxml/xpath/context_spec.rb +210 -0
  186. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  187. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  188. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  189. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  190. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  191. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  192. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  193. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  194. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  195. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  196. data/spec/moxml/xpath/parser_spec.rb +364 -0
  197. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  198. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  199. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  200. data/spec/moxml/xpath_spec.rb +77 -0
  201. data/spec/performance/README.md +83 -0
  202. data/spec/performance/benchmark_spec.rb +64 -0
  203. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
  204. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  205. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  206. data/spec/spec_helper.rb +58 -1
  207. data/spec/support/xml_matchers.rb +1 -1
  208. metadata +178 -34
  209. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  210. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  213. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  214. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  215. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
@@ -0,0 +1,292 @@
1
+ ---
2
+ title: Modifying XML
3
+ nav_order: 3
4
+ ---
5
+
6
+ == Modifying XML
7
+
8
+ === Purpose
9
+
10
+ Learn how to modify existing XML documents by adding, updating, and removing
11
+ elements, attributes, and content.
12
+
13
+ === Modifying element content
14
+
15
+ Update text content of elements:
16
+
17
+ [source,ruby]
18
+ ----
19
+ xml = '<library><book><title>Old Title</title></book></library>'
20
+ doc = Moxml.new.parse(xml)
21
+
22
+ # Find and update
23
+ title = doc.at_xpath('//title')
24
+ title.text = 'New Title'
25
+
26
+ puts doc.to_xml
27
+ # => <library><book><title>New Title</title></book></library>
28
+ ----
29
+
30
+ === Adding elements
31
+
32
+ Add new elements to existing documents:
33
+
34
+ [source,ruby]
35
+ ----
36
+ xml = '<library><book id="1"><title>Ruby Basics</title></book></library>'
37
+ doc = Moxml.new.parse(xml)
38
+
39
+ book = doc.at_xpath('//book[@id="1"]')
40
+
41
+ # Add author element
42
+ author = doc.create_element('author')
43
+ author.text = 'Jane Smith'
44
+ book.add_child(author)
45
+
46
+ # Add price element
47
+ price = doc.create_element('price')
48
+ price.text = '29.99'
49
+ price['currency'] = 'USD'
50
+ book.add_child(price)
51
+
52
+ # Add ISBN element
53
+ isbn = doc.create_element('isbn')
54
+ isbn.text = '978-0-123456-78-9'
55
+ book.add_child(isbn)
56
+
57
+ puts doc.to_xml(indent: 2)
58
+ ----
59
+
60
+ === Removing elements
61
+
62
+ Remove elements from documents:
63
+
64
+ [source,ruby]
65
+ ----
66
+ xml = <<~XML
67
+ <library>
68
+ <book id="1">
69
+ <title>Ruby Basics</title>
70
+ <author>Jane Smith</author>
71
+ <draft>true</draft>
72
+ </book>
73
+ </library>
74
+ XML
75
+
76
+ doc = Moxml.new.parse(xml)
77
+
78
+ # Find and remove element
79
+ draft = doc.at_xpath('//draft')
80
+ draft.remove
81
+
82
+ # Remove by parent
83
+ book = doc.at_xpath('//book')
84
+ author = book.at_xpath('.//author')
85
+ book.remove_child(author)
86
+
87
+ puts doc.to_xml
88
+ ----
89
+
90
+ === Modifying attributes
91
+
92
+ Update, add, and remove attributes:
93
+
94
+ [source,ruby]
95
+ ----
96
+ xml = '<book id="1" status="draft">Ruby Basics</book>'
97
+ doc = Moxml.new.parse(xml)
98
+
99
+ book = doc.root
100
+
101
+ # Update existing attribute
102
+ book['id'] = '100'
103
+
104
+ # Add new attribute
105
+ book['edition'] = '2nd'
106
+ book['category'] = 'programming'
107
+
108
+ # Remove attribute
109
+ book.remove_attribute('status')
110
+
111
+ # Get all attributes
112
+ book.attributes.each do |attr|
113
+ puts "#{attr.name}=#{attr.value}"
114
+ end
115
+ # => id=100
116
+ # => edition=2nd
117
+ # => category=programming
118
+ ----
119
+
120
+ === Replacing nodes
121
+
122
+ Replace elements with new content:
123
+
124
+ [source,ruby]
125
+ ----
126
+ xml = '<book><title>Old Title</title><author>Old Author</author></book>'
127
+ doc = Moxml.new.parse(xml)
128
+
129
+ # Replace title element
130
+ old_title = doc.at_xpath('//title')
131
+ new_title = doc.create_element('title')
132
+ new_title.text = 'New Title'
133
+ new_title['lang'] = 'en'
134
+
135
+ old_title.replace(new_title)
136
+
137
+ # Replace text node
138
+ author = doc.at_xpath('//author')
139
+ author.children.first.replace(doc.create_text('New Author'))
140
+
141
+ puts doc.to_xml
142
+ ----
143
+
144
+ === Adding siblings
145
+
146
+ Insert elements relative to existing nodes:
147
+
148
+ [source,ruby]
149
+ ----
150
+ xml = <<~XML
151
+ <book>
152
+ <title>Ruby Programming</title>
153
+ <price>29.99</price>
154
+ </book>
155
+ XML
156
+
157
+ doc = Moxml.new.parse(xml)
158
+
159
+ # Add before price
160
+ price = doc.at_xpath('//price')
161
+ author = doc.create_element('author')
162
+ author.text = 'Jane Smith'
163
+ price.add_previous_sibling(author)
164
+
165
+ # Add after title
166
+ title = doc.at_xpath('//title')
167
+ subtitle = doc.create_element('subtitle')
168
+ subtitle.text = 'A Comprehensive Guide'
169
+ title.add_next_sibling(subtitle)
170
+
171
+ puts doc.to_xml(indent: 2)
172
+ ----
173
+
174
+ Output:
175
+
176
+ [source,xml]
177
+ ----
178
+ <book>
179
+ <title>Ruby Programming</title>
180
+ <subtitle>A Comprehensive Guide</subtitle>
181
+ <author>Jane Smith</author>
182
+ <price>29.99</price>
183
+ </book>
184
+ ----
185
+
186
+ === Batch modifications
187
+
188
+ Update multiple elements at once:
189
+
190
+ [source,ruby]
191
+ ----
192
+ xml = <<~XML
193
+ <library>
194
+ <book><price currency="USD">29.99</price></book>
195
+ <book><price currency="USD">39.99</price></book>
196
+ <book><price currency="USD">19.99</price></book>
197
+ </library>
198
+ XML
199
+
200
+ doc = Moxml.new.parse(xml)
201
+
202
+ # Apply 10% discount to all books
203
+ doc.xpath('//price').each do |price|
204
+ current = price.text.to_f
205
+ discounted = (current * 0.9).round(2)
206
+ price.text = discounted.to_s
207
+ price['original'] = current.to_s
208
+ end
209
+
210
+ puts doc.to_xml(indent: 2)
211
+ ----
212
+
213
+ === Preserving structure
214
+
215
+ Maintain document structure during modifications:
216
+
217
+ [source,ruby]
218
+ ----
219
+ xml = <<~XML
220
+ <?xml version="1.0" encoding="UTF-8"?>
221
+ <library>
222
+ <book id="1">Ruby Basics</book>
223
+ </library>
224
+ XML
225
+
226
+ doc = Moxml.new.parse(xml)
227
+
228
+ # Modifications preserve declaration
229
+ book = doc.at_xpath('//book')
230
+ book['edition'] = '2nd'
231
+
232
+ # Original structure maintained
233
+ puts doc.to_xml(indent: 2)
234
+ # => Still has <?xml ... ?> declaration
235
+ ----
236
+
237
+ === Common modification patterns
238
+
239
+ ==== Update or create pattern
240
+
241
+ [source,ruby]
242
+ ----
243
+ def ensure_element(parent, name, text)
244
+ elem = parent.at_xpath(".//#{name}")
245
+
246
+ if elem
247
+ # Update existing
248
+ elem.text = text
249
+ else
250
+ # Create new
251
+ elem = parent.document.create_element(name)
252
+ elem.text = text
253
+ parent.add_child(elem)
254
+ end
255
+
256
+ elem
257
+ end
258
+
259
+ book = doc.at_xpath('//book')
260
+ ensure_element(book, 'author', 'Jane Smith')
261
+ ensure_element(book, 'price', '29.99')
262
+ ----
263
+
264
+ ==== Conditional modification
265
+
266
+ [source,ruby]
267
+ ----
268
+ doc.xpath('//book').each do |book|
269
+ price = book.at_xpath('.//price')
270
+ next unless price
271
+
272
+ # Add discount for expensive books
273
+ if price.text.to_f > 30
274
+ book['discount'] = '10%'
275
+ price.text = (price.text.to_f * 0.9).to_s
276
+ end
277
+ end
278
+ ----
279
+
280
+ === Best practices
281
+
282
+ . **Find before modifying** - always locate elements first
283
+ . **Check element exists** before calling methods on it
284
+ . **Use transactions** for complex modifications if needed
285
+ . **Validate structure** after major changes
286
+ . **Preserve document metadata** (declarations, encoding)
287
+
288
+ === See also
289
+
290
+ * link:creating-documents[Creating documents] - Build from scratch
291
+ * link:../tutorials/basic-usage[Basic usage] - Fundamentals
292
+ * link:../references/element-api[Element API] - Complete method reference
@@ -0,0 +1,230 @@
1
+ ---
2
+ title: Parsing XML
3
+ nav_order: 2
4
+ ---
5
+
6
+ == Parsing XML
7
+
8
+ === Purpose
9
+
10
+ Learn how to parse XML from various sources including strings, files, and IO
11
+ streams using different Moxml adapters.
12
+
13
+ === Basic string parsing
14
+
15
+ Parse XML from a string:
16
+
17
+ [source,ruby]
18
+ ----
19
+ require 'moxml'
20
+
21
+ xml_string = '<root><child>content</child></root>'
22
+
23
+ # Parse with default adapter
24
+ doc = Moxml.new.parse(xml_string)
25
+
26
+ # Access parsed content
27
+ puts doc.root.name # => "root"
28
+ puts doc.root.children.first.text # => "content"
29
+ ----
30
+
31
+ === Parsing from files
32
+
33
+ Read and parse XML files:
34
+
35
+ [source,ruby]
36
+ ----
37
+ # Read file first, then parse
38
+ xml_content = File.read('document.xml')
39
+ doc = Moxml.new.parse(xml_content)
40
+
41
+ # Or use File.open with read
42
+ File.open('document.xml') do |file|
43
+ doc = Moxml.new.parse(file.read)
44
+ # Process document
45
+ end
46
+ ----
47
+
48
+ === Parsing with options
49
+
50
+ Control parsing behavior:
51
+
52
+ [source,ruby]
53
+ ----
54
+ # Strict parsing (raises errors on malformed XML)
55
+ doc = Moxml.new.parse(xml, strict: true)
56
+
57
+ # With specific encoding
58
+ doc = Moxml.new.parse(xml, encoding: 'ISO-8859-1')
59
+
60
+ # Relaxed parsing (attempts to handle malformed XML)
61
+ doc = Moxml.new.parse(possibly_invalid_xml, strict: false)
62
+ ----
63
+
64
+ === Handling parse errors
65
+
66
+ Catch and handle parsing errors:
67
+
68
+ [source,ruby]
69
+ ----
70
+ xml = '<root><unclosed>'
71
+
72
+ begin
73
+ doc = Moxml.new.parse(xml, strict: true)
74
+ rescue Moxml::ParseError => e
75
+ puts "Parse failed at line #{e.line}, column #{e.column}"
76
+ puts "Error: #{e.message}"
77
+ puts e.to_s # Includes helpful hints
78
+ end
79
+ ----
80
+
81
+ === Parsing large documents
82
+
83
+ Handle large XML files efficiently:
84
+
85
+ [source,ruby]
86
+ ----
87
+ # For large files, consider memory usage
88
+ large_xml = File.read('large_document.xml')
89
+
90
+ context = Moxml.new
91
+ # Choose appropriate adapter for size
92
+ context.config.adapter = :ox # Fast for large files
93
+
94
+ doc = context.parse(large_xml)
95
+
96
+ # Process in chunks if possible
97
+ doc.xpath('//record').each_slice(1000) do |records|
98
+ process_batch(records)
99
+ end
100
+ ----
101
+
102
+ === Parsing with different adapters
103
+
104
+ Each adapter may handle edge cases differently:
105
+
106
+ [source,ruby]
107
+ ----
108
+ xml_with_namespaces = <<~XML
109
+ <library xmlns="http://example.org">
110
+ <book>Title</book>
111
+ </library>
112
+ XML
113
+
114
+ # Parse with Nokogiri (full namespace support)
115
+ context_nokogiri = Moxml.new
116
+ context_nokogiri.config.adapter = :nokogiri
117
+ doc = context_nokogiri.parse(xml_with_namespaces)
118
+
119
+ # Parse with REXML (limited namespace XPath)
120
+ context_rexml = Moxml.new
121
+ context_rexml.config.adapter = :rexml
122
+ doc = context_rexml.parse(xml_with_namespaces)
123
+ # Namespace preserved but XPath queries limited
124
+ ----
125
+
126
+ === Common parse patterns
127
+
128
+ ==== Parse and extract data
129
+
130
+ [source,ruby]
131
+ ----
132
+ xml = <<~XML
133
+ <products>
134
+ <product id="1">
135
+ <name>Widget A</name>
136
+ <price>9.99</price>
137
+ </product>
138
+ <product id="2">
139
+ <name>Widget B</name>
140
+ <price>14.99</price>
141
+ </product>
142
+ </products>
143
+ XML
144
+
145
+ doc = Moxml.new.parse(xml)
146
+
147
+ # Extract data into Ruby structures
148
+ products = doc.xpath('//product').map do |prod|
149
+ {
150
+ id: prod['id'],
151
+ name: prod.at_xpath('.//name').text,
152
+ price: prod.at_xpath('.//price').text.to_f
153
+ }
154
+ end
155
+
156
+ products.each { |p| puts "#{p[:name]}: $#{p[:price]}" }
157
+ ----
158
+
159
+ ==== Parse and validate
160
+
161
+ [source,ruby]
162
+ ----
163
+ doc = Moxml.new.parse(xml)
164
+
165
+ # Validate required elements exist
166
+ required_elements = ['title', 'author', 'price']
167
+
168
+ required_elements.each do |elem|
169
+ unless doc.at_xpath("//#{elem}")
170
+ raise "Missing required element: #{elem}"
171
+ end
172
+ end
173
+ ----
174
+
175
+ === Troubleshooting
176
+
177
+ **Encoding issues:**
178
+
179
+ [source,ruby]
180
+ ----
181
+ # Specify encoding explicitly
182
+ doc = Moxml.new.parse(xml, encoding: 'UTF-8')
183
+
184
+ # Or let adapter auto-detect
185
+ doc = Moxml.new.parse(xml) # Usually works
186
+ ----
187
+
188
+ **Malformed XML:**
189
+
190
+ [source,ruby]
191
+ ----
192
+ # Use relaxed parsing
193
+ doc = Moxml.new.parse(possibly_broken_xml, strict: false)
194
+
195
+ # Check what was parsed
196
+ puts doc.root.name
197
+ puts doc.to_xml # See what was actually parsed
198
+ ----
199
+
200
+ **Empty or whitespace:**
201
+
202
+ [source,ruby]
203
+ ----
204
+ xml = " \n <root/> \n "
205
+
206
+ # Whitespace is handled automatically
207
+ doc = Moxml.new.parse(xml)
208
+ puts doc.root.name # => "root"
209
+ ----
210
+
211
+ === Best practices
212
+
213
+ . **Always use strict mode in production** for data integrity
214
+ . **Specify encoding** when working with non-UTF-8 documents
215
+ . **Handle parse errors** gracefully with appropriate error messages
216
+ . **Choose the right adapter** based on document size and complexity
217
+ . **Validate critical elements** after parsing
218
+
219
+ === Next steps
220
+
221
+ * link:xpath-queries[XPath queries tutorial] - Learn advanced querying
222
+ * link:working-with-elements[Working with elements] - Element manipulation
223
+ * link:../guides/error-handling[Error handling guide] - Comprehensive error
224
+ management
225
+
226
+ === See also
227
+
228
+ * link:../pages/adapters/[Adapters] - Choose the right adapter for parsing
229
+ * link:../references/document-api[Document API] - Complete parsing reference
230
+ * link:../pages/compatibility[Compatibility] - Adapter differences