moxml 0.1.7 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +224 -43
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +51 -0
- data/docs/_guides/modifying-xml.adoc +292 -0
- data/docs/_guides/parsing-xml.adoc +230 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_guides/xml-declaration.adoc +450 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +97 -0
- data/docs/_pages/adapters/libxml.adoc +285 -0
- data/docs/_pages/adapters/nokogiri.adoc +251 -0
- data/docs/_pages/adapters/oga.adoc +291 -0
- data/docs/_pages/adapters/ox.adoc +56 -0
- data/docs/_pages/adapters/rexml.adoc +292 -0
- data/docs/_pages/best-practices.adoc +429 -0
- data/docs/_pages/compatibility.adoc +467 -0
- data/docs/_pages/configuration.adoc +250 -0
- data/docs/_pages/error-handling.adoc +349 -0
- data/docs/_pages/headed-ox-limitations.adoc +574 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +140 -0
- data/docs/_pages/node-api-reference.adoc +49 -0
- data/docs/_pages/performance.adoc +35 -0
- data/docs/_pages/quick-start.adoc +243 -0
- data/docs/_pages/thread-safety.adoc +28 -0
- data/docs/_references/document-api.adoc +407 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +267 -0
- data/docs/_tutorials/builder-pattern.adoc +342 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +324 -0
- data/docs/_tutorials/xpath-queries.adoc +358 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +13 -8
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1564 -0
- data/lib/moxml/adapter/nokogiri.rb +156 -9
- data/lib/moxml/adapter/oga.rb +190 -15
- data/lib/moxml/adapter/ox.rb +322 -28
- data/lib/moxml/adapter/rexml.rb +157 -28
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +57 -2
- data/lib/moxml/declaration.rb +9 -0
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +53 -6
- data/lib/moxml/document_builder.rb +34 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +155 -4
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1770 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -5
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_preservation_spec.rb +217 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +4 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +178 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
|
@@ -0,0 +1,429 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Best practices
|
|
3
|
+
nav_order: 9
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
== Best practices
|
|
7
|
+
|
|
8
|
+
=== Purpose
|
|
9
|
+
|
|
10
|
+
Recommended patterns and practices for using Moxml effectively in production
|
|
11
|
+
applications.
|
|
12
|
+
|
|
13
|
+
=== Document creation
|
|
14
|
+
|
|
15
|
+
==== Use builder pattern for new documents
|
|
16
|
+
|
|
17
|
+
[source,ruby]
|
|
18
|
+
----
|
|
19
|
+
# Preferred - clean and maintainable
|
|
20
|
+
doc = Moxml::Builder.build(Moxml.new) do |xml|
|
|
21
|
+
xml.declaration version: "1.0", encoding: "UTF-8"
|
|
22
|
+
xml.library do
|
|
23
|
+
xml.book "Content"
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Avoid - verbose and error-prone
|
|
28
|
+
doc = Moxml.new.create_document
|
|
29
|
+
doc.add_child(doc.create_declaration)
|
|
30
|
+
root = doc.create_element('library')
|
|
31
|
+
# ... many more lines
|
|
32
|
+
----
|
|
33
|
+
|
|
34
|
+
==== Use direct manipulation for modifications
|
|
35
|
+
|
|
36
|
+
[source,ruby]
|
|
37
|
+
----
|
|
38
|
+
# Preferred for modifying existing documents
|
|
39
|
+
doc = Moxml.new.parse(xml)
|
|
40
|
+
book = doc.at_xpath('//book[@id="1"]')
|
|
41
|
+
book['edition'] = '2nd'
|
|
42
|
+
book.at_xpath('.//price').text = '24.99'
|
|
43
|
+
----
|
|
44
|
+
|
|
45
|
+
=== XPath queries
|
|
46
|
+
|
|
47
|
+
==== Use specific paths
|
|
48
|
+
|
|
49
|
+
[source,ruby]
|
|
50
|
+
----
|
|
51
|
+
# More efficient - specific path
|
|
52
|
+
doc.xpath('/library/section/book')
|
|
53
|
+
|
|
54
|
+
# Less efficient - requires full document scan
|
|
55
|
+
doc.xpath('//book')
|
|
56
|
+
|
|
57
|
+
# Most efficient - from known parent
|
|
58
|
+
section.xpath('./book')
|
|
59
|
+
----
|
|
60
|
+
|
|
61
|
+
==== Cache query results
|
|
62
|
+
|
|
63
|
+
[source,ruby]
|
|
64
|
+
----
|
|
65
|
+
# Inefficient - queries multiple times
|
|
66
|
+
doc.xpath('//book').each do |book|
|
|
67
|
+
if doc.xpath('//book').length > 10
|
|
68
|
+
# ...
|
|
69
|
+
end
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Better - cache the result
|
|
73
|
+
books = doc.xpath('//book')
|
|
74
|
+
books.each do |book|
|
|
75
|
+
if books.length > 10
|
|
76
|
+
# ...
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
----
|
|
80
|
+
|
|
81
|
+
==== Use at_xpath for single results
|
|
82
|
+
|
|
83
|
+
[source,ruby]
|
|
84
|
+
----
|
|
85
|
+
# Preferred - when expecting single result
|
|
86
|
+
book = doc.at_xpath('//book[@id="1"]')
|
|
87
|
+
|
|
88
|
+
# Avoid - unnecessary array creation
|
|
89
|
+
book = doc.xpath('//book[@id="1"]').first
|
|
90
|
+
----
|
|
91
|
+
|
|
92
|
+
=== Adapter selection
|
|
93
|
+
|
|
94
|
+
==== Choose adapter explicitly in production
|
|
95
|
+
|
|
96
|
+
[source,ruby]
|
|
97
|
+
----
|
|
98
|
+
# Good - explicit and predictable
|
|
99
|
+
class XmlProcessor
|
|
100
|
+
def initialize
|
|
101
|
+
@context = Moxml.new
|
|
102
|
+
@context.config.adapter = :nokogiri
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
# Avoid - relies on gem load order
|
|
107
|
+
class XmlProcessor
|
|
108
|
+
def initialize
|
|
109
|
+
@context = Moxml.new # Uses whatever is available
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
----
|
|
113
|
+
|
|
114
|
+
==== Match adapter to use case
|
|
115
|
+
|
|
116
|
+
[source,ruby]
|
|
117
|
+
----
|
|
118
|
+
# High-throughput simple docs
|
|
119
|
+
fast_context = Moxml.new
|
|
120
|
+
fast_context.config.adapter = :ox
|
|
121
|
+
|
|
122
|
+
# Complex XPath queries
|
|
123
|
+
full_context = Moxml.new
|
|
124
|
+
full_context.config.adapter = :nokogiri
|
|
125
|
+
|
|
126
|
+
# Pure Ruby requirement
|
|
127
|
+
pure_context = Moxml.new
|
|
128
|
+
pure_context.config.adapter = :oga
|
|
129
|
+
----
|
|
130
|
+
|
|
131
|
+
=== Error handling
|
|
132
|
+
|
|
133
|
+
==== Always use strict parsing in production
|
|
134
|
+
|
|
135
|
+
[source,ruby]
|
|
136
|
+
----
|
|
137
|
+
# Production
|
|
138
|
+
def parse_production(xml)
|
|
139
|
+
Moxml.new.parse(xml, strict: true)
|
|
140
|
+
rescue Moxml::ParseError => e
|
|
141
|
+
logger.error("Invalid XML received: #{e.message}")
|
|
142
|
+
raise
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Development/testing only
|
|
146
|
+
def parse_development(xml)
|
|
147
|
+
Moxml.new.parse(xml, strict: false)
|
|
148
|
+
end
|
|
149
|
+
----
|
|
150
|
+
|
|
151
|
+
==== Catch specific errors
|
|
152
|
+
|
|
153
|
+
[source,ruby]
|
|
154
|
+
----
|
|
155
|
+
# Good - targeted error handling
|
|
156
|
+
begin
|
|
157
|
+
doc = Moxml.new.parse(xml)
|
|
158
|
+
process(doc)
|
|
159
|
+
rescue Moxml::ParseError => e
|
|
160
|
+
handle_parse_error(e)
|
|
161
|
+
rescue Moxml::XPathError => e
|
|
162
|
+
handle_xpath_error(e)
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# Avoid - too broad
|
|
166
|
+
begin
|
|
167
|
+
doc = Moxml.new.parse(xml)
|
|
168
|
+
process(doc)
|
|
169
|
+
rescue StandardError => e
|
|
170
|
+
# Catches too much
|
|
171
|
+
end
|
|
172
|
+
----
|
|
173
|
+
|
|
174
|
+
=== Namespace handling
|
|
175
|
+
|
|
176
|
+
==== Define namespace mappings clearly
|
|
177
|
+
|
|
178
|
+
[source,ruby]
|
|
179
|
+
----
|
|
180
|
+
# Good - clear and reusable
|
|
181
|
+
NAMESPACES = {
|
|
182
|
+
'dc' => 'http://purl.org/dc/elements/1.1/',
|
|
183
|
+
'xhtml' => 'http://www.w3.org/1999/xhtml'
|
|
184
|
+
}.freeze
|
|
185
|
+
|
|
186
|
+
titles = doc.xpath('//dc:title', NAMESPACES)
|
|
187
|
+
|
|
188
|
+
# Avoid - inline everywhere
|
|
189
|
+
doc.xpath('//dc:title', { 'dc' => 'http://purl.org/dc/elements/1.1/' })
|
|
190
|
+
doc.xpath('//dc:creator', { 'dc' => 'http://purl.org/dc/elements/1.1/' })
|
|
191
|
+
----
|
|
192
|
+
|
|
193
|
+
==== Check adapter namespace support
|
|
194
|
+
|
|
195
|
+
[source,ruby]
|
|
196
|
+
----
|
|
197
|
+
def query_with_namespace(doc, expression, namespaces)
|
|
198
|
+
adapter = doc.context.config.adapter.name
|
|
199
|
+
|
|
200
|
+
if adapter.include?('Rexml') || adapter.include?('Ox')
|
|
201
|
+
# Fallback for limited namespace support
|
|
202
|
+
query_without_namespace(doc, expression)
|
|
203
|
+
else
|
|
204
|
+
doc.xpath(expression, namespaces)
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
----
|
|
208
|
+
|
|
209
|
+
=== Memory management
|
|
210
|
+
|
|
211
|
+
==== Release document references
|
|
212
|
+
|
|
213
|
+
[source,ruby]
|
|
214
|
+
----
|
|
215
|
+
# Process large documents
|
|
216
|
+
def process_large_xml(xml)
|
|
217
|
+
doc = Moxml.new.parse(xml)
|
|
218
|
+
result = extract_data(doc)
|
|
219
|
+
doc = nil # Allow GC
|
|
220
|
+
result
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
# Batch processing
|
|
224
|
+
xml_files.each do |file|
|
|
225
|
+
doc = Moxml.new.parse(File.read(file))
|
|
226
|
+
process(doc)
|
|
227
|
+
doc = nil # Release before next iteration
|
|
228
|
+
GC.start if large_file?(file)
|
|
229
|
+
end
|
|
230
|
+
----
|
|
231
|
+
|
|
232
|
+
==== Use streaming for very large files
|
|
233
|
+
|
|
234
|
+
[source,ruby]
|
|
235
|
+
----
|
|
236
|
+
# For extremely large documents
|
|
237
|
+
def process_huge_xml(filename)
|
|
238
|
+
# Process in chunks if possible
|
|
239
|
+
File.open(filename) do |file|
|
|
240
|
+
# Read and process incrementally
|
|
241
|
+
end
|
|
242
|
+
end
|
|
243
|
+
----
|
|
244
|
+
|
|
245
|
+
=== Thread safety
|
|
246
|
+
|
|
247
|
+
==== Use separate contexts per thread
|
|
248
|
+
|
|
249
|
+
[source,ruby]
|
|
250
|
+
----
|
|
251
|
+
# Good - thread-safe
|
|
252
|
+
class XmlProcessor
|
|
253
|
+
def process(xml)
|
|
254
|
+
context = Moxml.new # New context per call
|
|
255
|
+
doc = context.parse(xml)
|
|
256
|
+
# Process...
|
|
257
|
+
end
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
# Avoid - shared state
|
|
261
|
+
class XmlProcessor
|
|
262
|
+
def initialize
|
|
263
|
+
@context = Moxml.new
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
def process(xml)
|
|
267
|
+
# Multiple threads share @context
|
|
268
|
+
@context.parse(xml) # Not thread-safe
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
----
|
|
272
|
+
|
|
273
|
+
==== Protect shared resources
|
|
274
|
+
|
|
275
|
+
[source,ruby]
|
|
276
|
+
----
|
|
277
|
+
class ThreadSafeProcessor
|
|
278
|
+
def initialize
|
|
279
|
+
@mutex = Mutex.new
|
|
280
|
+
@context = Moxml.new
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
def process(xml)
|
|
284
|
+
@mutex.synchronize do
|
|
285
|
+
doc = @context.parse(xml)
|
|
286
|
+
# Modify document safely
|
|
287
|
+
doc.to_xml
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
end
|
|
291
|
+
----
|
|
292
|
+
|
|
293
|
+
=== Performance optimization
|
|
294
|
+
|
|
295
|
+
==== Reuse context instances
|
|
296
|
+
|
|
297
|
+
[source,ruby]
|
|
298
|
+
----
|
|
299
|
+
# Good - reuse context
|
|
300
|
+
class DocumentProcessor
|
|
301
|
+
def initialize
|
|
302
|
+
@context = Moxml.new
|
|
303
|
+
@context.config.adapter = :nokogiri
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
def process_many(xml_documents)
|
|
307
|
+
xml_documents.map do |xml|
|
|
308
|
+
@context.parse(xml) # Reuses same context
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
end
|
|
312
|
+
----
|
|
313
|
+
|
|
314
|
+
==== Choose appropriate adapter
|
|
315
|
+
|
|
316
|
+
[source,ruby]
|
|
317
|
+
----
|
|
318
|
+
# Match adapter to workload
|
|
319
|
+
def select_adapter(xml)
|
|
320
|
+
if complex_xpath_needed?(xml)
|
|
321
|
+
:nokogiri # Full XPath support
|
|
322
|
+
elsif simple_parsing?(xml)
|
|
323
|
+
:ox # Maximum speed
|
|
324
|
+
else
|
|
325
|
+
:nokogiri # Safe default
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
context = Moxml.new
|
|
330
|
+
context.config.adapter = select_adapter(xml)
|
|
331
|
+
----
|
|
332
|
+
|
|
333
|
+
=== Code organization
|
|
334
|
+
|
|
335
|
+
==== Extract XML operations into methods
|
|
336
|
+
|
|
337
|
+
[source,ruby]
|
|
338
|
+
----
|
|
339
|
+
class BookProcessor
|
|
340
|
+
def initialize
|
|
341
|
+
@context = Moxml.new
|
|
342
|
+
@context.config.adapter = :nokogiri
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def parse_catalog(xml)
|
|
346
|
+
@context.parse(xml)
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
def find_book(doc, id)
|
|
350
|
+
doc.at_xpath("//book[@id='#{id}']")
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
def update_price(book, new_price)
|
|
354
|
+
price_elem = book.at_xpath('.//price')
|
|
355
|
+
price_elem.text = new_price.to_s
|
|
356
|
+
end
|
|
357
|
+
end
|
|
358
|
+
----
|
|
359
|
+
|
|
360
|
+
==== Use value objects for XML data
|
|
361
|
+
|
|
362
|
+
[source,ruby]
|
|
363
|
+
----
|
|
364
|
+
class Book
|
|
365
|
+
attr_accessor :id, :title, :author, :price
|
|
366
|
+
|
|
367
|
+
def self.from_xml(element)
|
|
368
|
+
new.tap do |book|
|
|
369
|
+
book.id = element['id']
|
|
370
|
+
book.title = element.at_xpath('.//title')&.text
|
|
371
|
+
book.author = element.at_xpath('.//author')&.text
|
|
372
|
+
book.price = element.at_xpath('.//price')&.text&.to_f
|
|
373
|
+
end
|
|
374
|
+
end
|
|
375
|
+
|
|
376
|
+
def to_xml(doc)
|
|
377
|
+
doc.create_element('book').tap do |elem|
|
|
378
|
+
elem['id'] = id
|
|
379
|
+
elem.add_child(doc.create_element('title').tap { |e| e.text = title })
|
|
380
|
+
elem.add_child(doc.create_element('author').tap { |e| e.text = author })
|
|
381
|
+
elem.add_child(doc.create_element('price').tap { |e| e.text = price.to_s })
|
|
382
|
+
end
|
|
383
|
+
end
|
|
384
|
+
end
|
|
385
|
+
----
|
|
386
|
+
|
|
387
|
+
=== Testing
|
|
388
|
+
|
|
389
|
+
==== Test with multiple adapters
|
|
390
|
+
|
|
391
|
+
[source,ruby]
|
|
392
|
+
----
|
|
393
|
+
RSpec.describe "XML Processing" do
|
|
394
|
+
[:nokogiri, :libxml, :oga].each do |adapter_name|
|
|
395
|
+
context "with #{adapter_name}" do
|
|
396
|
+
let(:context) do
|
|
397
|
+
Moxml.new.tap { |c| c.config.adapter = adapter_name }
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
it "processes correctly" do
|
|
401
|
+
doc = context.parse(xml)
|
|
402
|
+
# Test operations
|
|
403
|
+
end
|
|
404
|
+
end
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
----
|
|
408
|
+
|
|
409
|
+
==== Use fixtures for test XML
|
|
410
|
+
|
|
411
|
+
[source,ruby]
|
|
412
|
+
----
|
|
413
|
+
# spec/fixtures/sample.xml
|
|
414
|
+
XML_FIXTURE = File.read('spec/fixtures/sample.xml')
|
|
415
|
+
|
|
416
|
+
RSpec.describe "Processing" do
|
|
417
|
+
let(:doc) { Moxml.new.parse(XML_FIXTURE) }
|
|
418
|
+
|
|
419
|
+
it "extracts data" do
|
|
420
|
+
# Test with consistent fixture
|
|
421
|
+
end
|
|
422
|
+
end
|
|
423
|
+
----
|
|
424
|
+
|
|
425
|
+
=== See also
|
|
426
|
+
|
|
427
|
+
* link:configuration[Configuration] - Setup and options
|
|
428
|
+
* link:error-handling[Error handling] - Error management
|
|
429
|
+
* link:../guides/performance-optimization[Performance optimization]
|