moxml 0.1.7 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (212) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +10 -10
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +238 -40
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +45 -0
  21. data/docs/_guides/modifying-xml.adoc +293 -0
  22. data/docs/_guides/parsing-xml.adoc +231 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_pages/adapter-compatibility.adoc +369 -0
  26. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  27. data/docs/_pages/adapters/index.adoc +98 -0
  28. data/docs/_pages/adapters/libxml.adoc +286 -0
  29. data/docs/_pages/adapters/nokogiri.adoc +252 -0
  30. data/docs/_pages/adapters/oga.adoc +292 -0
  31. data/docs/_pages/adapters/ox.adoc +55 -0
  32. data/docs/_pages/adapters/rexml.adoc +293 -0
  33. data/docs/_pages/best-practices.adoc +430 -0
  34. data/docs/_pages/compatibility.adoc +468 -0
  35. data/docs/_pages/configuration.adoc +251 -0
  36. data/docs/_pages/error-handling.adoc +350 -0
  37. data/docs/_pages/headed-ox-limitations.adoc +558 -0
  38. data/docs/_pages/headed-ox.adoc +1025 -0
  39. data/docs/_pages/index.adoc +35 -0
  40. data/docs/_pages/installation.adoc +141 -0
  41. data/docs/_pages/node-api-reference.adoc +50 -0
  42. data/docs/_pages/performance.adoc +36 -0
  43. data/docs/_pages/quick-start.adoc +244 -0
  44. data/docs/_pages/thread-safety.adoc +29 -0
  45. data/docs/_references/document-api.adoc +408 -0
  46. data/docs/_references/index.adoc +48 -0
  47. data/docs/_tutorials/basic-usage.adoc +268 -0
  48. data/docs/_tutorials/builder-pattern.adoc +343 -0
  49. data/docs/_tutorials/index.adoc +33 -0
  50. data/docs/_tutorials/namespace-handling.adoc +325 -0
  51. data/docs/_tutorials/xpath-queries.adoc +359 -0
  52. data/docs/index.adoc +122 -0
  53. data/examples/README.md +124 -0
  54. data/examples/api_client/README.md +424 -0
  55. data/examples/api_client/api_client.rb +394 -0
  56. data/examples/api_client/example_response.xml +48 -0
  57. data/examples/headed_ox_example/README.md +90 -0
  58. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  59. data/examples/rss_parser/README.md +194 -0
  60. data/examples/rss_parser/example_feed.xml +93 -0
  61. data/examples/rss_parser/rss_parser.rb +189 -0
  62. data/examples/sax_parsing/README.md +50 -0
  63. data/examples/sax_parsing/data_extractor.rb +75 -0
  64. data/examples/sax_parsing/example.xml +21 -0
  65. data/examples/sax_parsing/large_file.rb +78 -0
  66. data/examples/sax_parsing/simple_parser.rb +55 -0
  67. data/examples/web_scraper/README.md +352 -0
  68. data/examples/web_scraper/example_page.html +201 -0
  69. data/examples/web_scraper/web_scraper.rb +312 -0
  70. data/lib/moxml/adapter/base.rb +107 -28
  71. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  72. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  73. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  74. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  75. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  76. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  77. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  78. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  79. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
  80. data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
  81. data/lib/moxml/adapter/headed_ox.rb +161 -0
  82. data/lib/moxml/adapter/libxml.rb +1548 -0
  83. data/lib/moxml/adapter/nokogiri.rb +121 -9
  84. data/lib/moxml/adapter/oga.rb +123 -12
  85. data/lib/moxml/adapter/ox.rb +282 -26
  86. data/lib/moxml/adapter/rexml.rb +127 -20
  87. data/lib/moxml/adapter.rb +21 -4
  88. data/lib/moxml/attribute.rb +6 -0
  89. data/lib/moxml/builder.rb +40 -4
  90. data/lib/moxml/config.rb +8 -3
  91. data/lib/moxml/context.rb +39 -1
  92. data/lib/moxml/doctype.rb +13 -1
  93. data/lib/moxml/document.rb +39 -6
  94. data/lib/moxml/document_builder.rb +27 -5
  95. data/lib/moxml/element.rb +71 -2
  96. data/lib/moxml/error.rb +175 -6
  97. data/lib/moxml/node.rb +94 -3
  98. data/lib/moxml/node_set.rb +34 -0
  99. data/lib/moxml/sax/block_handler.rb +194 -0
  100. data/lib/moxml/sax/element_handler.rb +124 -0
  101. data/lib/moxml/sax/handler.rb +113 -0
  102. data/lib/moxml/sax.rb +31 -0
  103. data/lib/moxml/version.rb +1 -1
  104. data/lib/moxml/xml_utils/encoder.rb +4 -4
  105. data/lib/moxml/xml_utils.rb +7 -4
  106. data/lib/moxml/xpath/ast/node.rb +159 -0
  107. data/lib/moxml/xpath/cache.rb +91 -0
  108. data/lib/moxml/xpath/compiler.rb +1768 -0
  109. data/lib/moxml/xpath/context.rb +26 -0
  110. data/lib/moxml/xpath/conversion.rb +124 -0
  111. data/lib/moxml/xpath/engine.rb +52 -0
  112. data/lib/moxml/xpath/errors.rb +101 -0
  113. data/lib/moxml/xpath/lexer.rb +304 -0
  114. data/lib/moxml/xpath/parser.rb +485 -0
  115. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  116. data/lib/moxml/xpath/ruby/node.rb +193 -0
  117. data/lib/moxml/xpath.rb +37 -0
  118. data/lib/moxml.rb +5 -2
  119. data/moxml.gemspec +3 -1
  120. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  121. data/spec/consistency/README.md +77 -0
  122. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  123. data/spec/examples/README.md +75 -0
  124. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  125. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  126. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  127. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  128. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  129. data/spec/integration/README.md +71 -0
  130. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  131. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  132. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  133. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  134. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  135. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  136. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  137. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  138. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  139. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
  140. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  141. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  142. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  143. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  144. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  145. data/spec/moxml/README.md +41 -0
  146. data/spec/moxml/adapter/.gitkeep +0 -0
  147. data/spec/moxml/adapter/README.md +61 -0
  148. data/spec/moxml/adapter/base_spec.rb +27 -0
  149. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  150. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  151. data/spec/moxml/adapter/ox_spec.rb +9 -8
  152. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  153. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  154. data/spec/moxml/adapter_spec.rb +16 -0
  155. data/spec/moxml/attribute_spec.rb +30 -0
  156. data/spec/moxml/builder_spec.rb +33 -0
  157. data/spec/moxml/cdata_spec.rb +31 -0
  158. data/spec/moxml/comment_spec.rb +31 -0
  159. data/spec/moxml/config_spec.rb +3 -3
  160. data/spec/moxml/context_spec.rb +28 -0
  161. data/spec/moxml/declaration_spec.rb +36 -0
  162. data/spec/moxml/doctype_spec.rb +33 -0
  163. data/spec/moxml/document_builder_spec.rb +30 -0
  164. data/spec/moxml/document_spec.rb +105 -0
  165. data/spec/moxml/element_spec.rb +143 -0
  166. data/spec/moxml/error_spec.rb +266 -22
  167. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  168. data/spec/moxml/namespace_spec.rb +32 -0
  169. data/spec/moxml/node_set_spec.rb +39 -0
  170. data/spec/moxml/node_spec.rb +37 -0
  171. data/spec/moxml/processing_instruction_spec.rb +34 -0
  172. data/spec/moxml/sax_spec.rb +1067 -0
  173. data/spec/moxml/text_spec.rb +31 -0
  174. data/spec/moxml/version_spec.rb +14 -0
  175. data/spec/moxml/xml_utils/.gitkeep +0 -0
  176. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  177. data/spec/moxml/xml_utils_spec.rb +49 -0
  178. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  179. data/spec/moxml/xpath/axes_spec.rb +296 -0
  180. data/spec/moxml/xpath/cache_spec.rb +358 -0
  181. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  182. data/spec/moxml/xpath/context_spec.rb +210 -0
  183. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  184. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  185. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  186. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  187. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  188. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  189. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  190. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  191. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  192. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  193. data/spec/moxml/xpath/parser_spec.rb +364 -0
  194. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  195. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  196. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  197. data/spec/moxml/xpath_spec.rb +77 -0
  198. data/spec/performance/README.md +83 -0
  199. data/spec/performance/benchmark_spec.rb +64 -0
  200. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
  201. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  202. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  203. data/spec/spec_helper.rb +58 -1
  204. data/spec/support/xml_matchers.rb +1 -1
  205. metadata +176 -34
  206. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  207. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  208. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  209. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  210. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
@@ -0,0 +1,231 @@
1
+ ---
2
+ title: Parsing XML
3
+ parent: Overview
4
+ nav_order: 2
5
+ ---
6
+
7
+ == Parsing XML
8
+
9
+ === Purpose
10
+
11
+ Learn how to parse XML from various sources including strings, files, and IO
12
+ streams using different Moxml adapters.
13
+
14
+ === Basic string parsing
15
+
16
+ Parse XML from a string:
17
+
18
+ [source,ruby]
19
+ ----
20
+ require 'moxml'
21
+
22
+ xml_string = '<root><child>content</child></root>'
23
+
24
+ # Parse with default adapter
25
+ doc = Moxml.new.parse(xml_string)
26
+
27
+ # Access parsed content
28
+ puts doc.root.name # => "root"
29
+ puts doc.root.children.first.text # => "content"
30
+ ----
31
+
32
+ === Parsing from files
33
+
34
+ Read and parse XML files:
35
+
36
+ [source,ruby]
37
+ ----
38
+ # Read file first, then parse
39
+ xml_content = File.read('document.xml')
40
+ doc = Moxml.new.parse(xml_content)
41
+
42
+ # Or use File.open with read
43
+ File.open('document.xml') do |file|
44
+ doc = Moxml.new.parse(file.read)
45
+ # Process document
46
+ end
47
+ ----
48
+
49
+ === Parsing with options
50
+
51
+ Control parsing behavior:
52
+
53
+ [source,ruby]
54
+ ----
55
+ # Strict parsing (raises errors on malformed XML)
56
+ doc = Moxml.new.parse(xml, strict: true)
57
+
58
+ # With specific encoding
59
+ doc = Moxml.new.parse(xml, encoding: 'ISO-8859-1')
60
+
61
+ # Relaxed parsing (attempts to handle malformed XML)
62
+ doc = Moxml.new.parse(possibly_invalid_xml, strict: false)
63
+ ----
64
+
65
+ === Handling parse errors
66
+
67
+ Catch and handle parsing errors:
68
+
69
+ [source,ruby]
70
+ ----
71
+ xml = '<root><unclosed>'
72
+
73
+ begin
74
+ doc = Moxml.new.parse(xml, strict: true)
75
+ rescue Moxml::ParseError => e
76
+ puts "Parse failed at line #{e.line}, column #{e.column}"
77
+ puts "Error: #{e.message}"
78
+ puts e.to_s # Includes helpful hints
79
+ end
80
+ ----
81
+
82
+ === Parsing large documents
83
+
84
+ Handle large XML files efficiently:
85
+
86
+ [source,ruby]
87
+ ----
88
+ # For large files, consider memory usage
89
+ large_xml = File.read('large_document.xml')
90
+
91
+ context = Moxml.new
92
+ # Choose appropriate adapter for size
93
+ context.config.adapter = :ox # Fast for large files
94
+
95
+ doc = context.parse(large_xml)
96
+
97
+ # Process in chunks if possible
98
+ doc.xpath('//record').each_slice(1000) do |records|
99
+ process_batch(records)
100
+ end
101
+ ----
102
+
103
+ === Parsing with different adapters
104
+
105
+ Each adapter may handle edge cases differently:
106
+
107
+ [source,ruby]
108
+ ----
109
+ xml_with_namespaces = <<~XML
110
+ <library xmlns="http://example.org">
111
+ <book>Title</book>
112
+ </library>
113
+ XML
114
+
115
+ # Parse with Nokogiri (full namespace support)
116
+ context_nokogiri = Moxml.new
117
+ context_nokogiri.config.adapter = :nokogiri
118
+ doc = context_nokogiri.parse(xml_with_namespaces)
119
+
120
+ # Parse with REXML (limited namespace XPath)
121
+ context_rexml = Moxml.new
122
+ context_rexml.config.adapter = :rexml
123
+ doc = context_rexml.parse(xml_with_namespaces)
124
+ # Namespace preserved but XPath queries limited
125
+ ----
126
+
127
+ === Common parse patterns
128
+
129
+ ==== Parse and extract data
130
+
131
+ [source,ruby]
132
+ ----
133
+ xml = <<~XML
134
+ <products>
135
+ <product id="1">
136
+ <name>Widget A</name>
137
+ <price>9.99</price>
138
+ </product>
139
+ <product id="2">
140
+ <name>Widget B</name>
141
+ <price>14.99</price>
142
+ </product>
143
+ </products>
144
+ XML
145
+
146
+ doc = Moxml.new.parse(xml)
147
+
148
+ # Extract data into Ruby structures
149
+ products = doc.xpath('//product').map do |prod|
150
+ {
151
+ id: prod['id'],
152
+ name: prod.at_xpath('.//name').text,
153
+ price: prod.at_xpath('.//price').text.to_f
154
+ }
155
+ end
156
+
157
+ products.each { |p| puts "#{p[:name]}: $#{p[:price]}" }
158
+ ----
159
+
160
+ ==== Parse and validate
161
+
162
+ [source,ruby]
163
+ ----
164
+ doc = Moxml.new.parse(xml)
165
+
166
+ # Validate required elements exist
167
+ required_elements = ['title', 'author', 'price']
168
+
169
+ required_elements.each do |elem|
170
+ unless doc.at_xpath("//#{elem}")
171
+ raise "Missing required element: #{elem}"
172
+ end
173
+ end
174
+ ----
175
+
176
+ === Troubleshooting
177
+
178
+ **Encoding issues:**
179
+
180
+ [source,ruby]
181
+ ----
182
+ # Specify encoding explicitly
183
+ doc = Moxml.new.parse(xml, encoding: 'UTF-8')
184
+
185
+ # Or let adapter auto-detect
186
+ doc = Moxml.new.parse(xml) # Usually works
187
+ ----
188
+
189
+ **Malformed XML:**
190
+
191
+ [source,ruby]
192
+ ----
193
+ # Use relaxed parsing
194
+ doc = Moxml.new.parse(possibly_broken_xml, strict: false)
195
+
196
+ # Check what was parsed
197
+ puts doc.root.name
198
+ puts doc.to_xml # See what was actually parsed
199
+ ----
200
+
201
+ **Empty or whitespace:**
202
+
203
+ [source,ruby]
204
+ ----
205
+ xml = " \n <root/> \n "
206
+
207
+ # Whitespace is handled automatically
208
+ doc = Moxml.new.parse(xml)
209
+ puts doc.root.name # => "root"
210
+ ----
211
+
212
+ === Best practices
213
+
214
+ . **Always use strict mode in production** for data integrity
215
+ . **Specify encoding** when working with non-UTF-8 documents
216
+ . **Handle parse errors** gracefully with appropriate error messages
217
+ . **Choose the right adapter** based on document size and complexity
218
+ . **Validate critical elements** after parsing
219
+
220
+ === Next steps
221
+
222
+ * link:xpath-queries[XPath queries tutorial] - Learn advanced querying
223
+ * link:working-with-elements[Working with elements] - Element manipulation
224
+ * link:../guides/error-handling[Error handling guide] - Comprehensive error
225
+ management
226
+
227
+ === See also
228
+
229
+ * link:../pages/adapters/[Adapters] - Choose the right adapter for parsing
230
+ * link:../references/document-api[Document API] - Complete parsing reference
231
+ * link:../pages/compatibility[Compatibility] - Adapter differences