moxml 0.1.6 → 0.1.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/dependent-repos.json +5 -0
  3. data/.github/workflows/dependent-tests.yml +20 -0
  4. data/.github/workflows/docs.yml +59 -0
  5. data/.github/workflows/rake.yml +12 -4
  6. data/.github/workflows/release.yml +5 -3
  7. data/.gitignore +37 -0
  8. data/.rubocop.yml +15 -7
  9. data/.rubocop_todo.yml +238 -40
  10. data/Gemfile +14 -9
  11. data/LICENSE.md +6 -2
  12. data/README.adoc +535 -373
  13. data/Rakefile +53 -0
  14. data/benchmarks/.gitignore +6 -0
  15. data/benchmarks/generate_report.rb +550 -0
  16. data/docs/Gemfile +13 -0
  17. data/docs/_config.yml +138 -0
  18. data/docs/_guides/advanced-features.adoc +87 -0
  19. data/docs/_guides/development-testing.adoc +165 -0
  20. data/docs/_guides/index.adoc +45 -0
  21. data/docs/_guides/modifying-xml.adoc +293 -0
  22. data/docs/_guides/parsing-xml.adoc +231 -0
  23. data/docs/_guides/sax-parsing.adoc +603 -0
  24. data/docs/_guides/working-with-documents.adoc +118 -0
  25. data/docs/_pages/adapter-compatibility.adoc +369 -0
  26. data/docs/_pages/adapters/headed-ox.adoc +237 -0
  27. data/docs/_pages/adapters/index.adoc +98 -0
  28. data/docs/_pages/adapters/libxml.adoc +286 -0
  29. data/docs/_pages/adapters/nokogiri.adoc +252 -0
  30. data/docs/_pages/adapters/oga.adoc +292 -0
  31. data/docs/_pages/adapters/ox.adoc +55 -0
  32. data/docs/_pages/adapters/rexml.adoc +293 -0
  33. data/docs/_pages/best-practices.adoc +430 -0
  34. data/docs/_pages/compatibility.adoc +468 -0
  35. data/docs/_pages/configuration.adoc +251 -0
  36. data/docs/_pages/error-handling.adoc +350 -0
  37. data/docs/_pages/headed-ox-limitations.adoc +558 -0
  38. data/docs/_pages/headed-ox.adoc +1025 -0
  39. data/docs/_pages/index.adoc +35 -0
  40. data/docs/_pages/installation.adoc +141 -0
  41. data/docs/_pages/node-api-reference.adoc +50 -0
  42. data/docs/_pages/performance.adoc +36 -0
  43. data/docs/_pages/quick-start.adoc +244 -0
  44. data/docs/_pages/thread-safety.adoc +29 -0
  45. data/docs/_references/document-api.adoc +408 -0
  46. data/docs/_references/index.adoc +48 -0
  47. data/docs/_tutorials/basic-usage.adoc +268 -0
  48. data/docs/_tutorials/builder-pattern.adoc +343 -0
  49. data/docs/_tutorials/index.adoc +33 -0
  50. data/docs/_tutorials/namespace-handling.adoc +325 -0
  51. data/docs/_tutorials/xpath-queries.adoc +359 -0
  52. data/docs/index.adoc +122 -0
  53. data/examples/README.md +124 -0
  54. data/examples/api_client/README.md +424 -0
  55. data/examples/api_client/api_client.rb +394 -0
  56. data/examples/api_client/example_response.xml +48 -0
  57. data/examples/headed_ox_example/README.md +90 -0
  58. data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
  59. data/examples/rss_parser/README.md +194 -0
  60. data/examples/rss_parser/example_feed.xml +93 -0
  61. data/examples/rss_parser/rss_parser.rb +189 -0
  62. data/examples/sax_parsing/README.md +50 -0
  63. data/examples/sax_parsing/data_extractor.rb +75 -0
  64. data/examples/sax_parsing/example.xml +21 -0
  65. data/examples/sax_parsing/large_file.rb +78 -0
  66. data/examples/sax_parsing/simple_parser.rb +55 -0
  67. data/examples/web_scraper/README.md +352 -0
  68. data/examples/web_scraper/example_page.html +201 -0
  69. data/examples/web_scraper/web_scraper.rb +312 -0
  70. data/lib/moxml/adapter/base.rb +107 -28
  71. data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
  72. data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
  73. data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
  74. data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
  75. data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
  76. data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
  77. data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
  78. data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
  79. data/lib/moxml/adapter/customized_ox/attribute.rb +28 -3
  80. data/lib/moxml/adapter/customized_ox/namespace.rb +0 -2
  81. data/lib/moxml/adapter/customized_ox/text.rb +0 -2
  82. data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
  83. data/lib/moxml/adapter/headed_ox.rb +161 -0
  84. data/lib/moxml/adapter/libxml.rb +1548 -0
  85. data/lib/moxml/adapter/nokogiri.rb +121 -9
  86. data/lib/moxml/adapter/oga.rb +123 -12
  87. data/lib/moxml/adapter/ox.rb +283 -27
  88. data/lib/moxml/adapter/rexml.rb +127 -20
  89. data/lib/moxml/adapter.rb +21 -4
  90. data/lib/moxml/attribute.rb +6 -0
  91. data/lib/moxml/builder.rb +40 -4
  92. data/lib/moxml/config.rb +8 -3
  93. data/lib/moxml/context.rb +39 -1
  94. data/lib/moxml/doctype.rb +13 -1
  95. data/lib/moxml/document.rb +39 -6
  96. data/lib/moxml/document_builder.rb +27 -5
  97. data/lib/moxml/element.rb +71 -2
  98. data/lib/moxml/error.rb +175 -6
  99. data/lib/moxml/node.rb +94 -3
  100. data/lib/moxml/node_set.rb +34 -0
  101. data/lib/moxml/sax/block_handler.rb +194 -0
  102. data/lib/moxml/sax/element_handler.rb +124 -0
  103. data/lib/moxml/sax/handler.rb +113 -0
  104. data/lib/moxml/sax.rb +31 -0
  105. data/lib/moxml/version.rb +1 -1
  106. data/lib/moxml/xml_utils/encoder.rb +4 -4
  107. data/lib/moxml/xml_utils.rb +7 -4
  108. data/lib/moxml/xpath/ast/node.rb +159 -0
  109. data/lib/moxml/xpath/cache.rb +91 -0
  110. data/lib/moxml/xpath/compiler.rb +1768 -0
  111. data/lib/moxml/xpath/context.rb +26 -0
  112. data/lib/moxml/xpath/conversion.rb +124 -0
  113. data/lib/moxml/xpath/engine.rb +52 -0
  114. data/lib/moxml/xpath/errors.rb +101 -0
  115. data/lib/moxml/xpath/lexer.rb +304 -0
  116. data/lib/moxml/xpath/parser.rb +485 -0
  117. data/lib/moxml/xpath/ruby/generator.rb +269 -0
  118. data/lib/moxml/xpath/ruby/node.rb +193 -0
  119. data/lib/moxml/xpath.rb +37 -0
  120. data/lib/moxml.rb +5 -2
  121. data/moxml.gemspec +3 -1
  122. data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
  123. data/spec/consistency/README.md +77 -0
  124. data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
  125. data/spec/examples/README.md +75 -0
  126. data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
  127. data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
  128. data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
  129. data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
  130. data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
  131. data/spec/integration/README.md +71 -0
  132. data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
  133. data/spec/integration/headed_ox_integration_spec.rb +326 -0
  134. data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
  135. data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
  136. data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
  137. data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
  138. data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
  139. data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
  140. data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
  141. data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
  142. data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
  143. data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
  144. data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
  145. data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
  146. data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
  147. data/spec/moxml/README.md +41 -0
  148. data/spec/moxml/adapter/.gitkeep +0 -0
  149. data/spec/moxml/adapter/README.md +61 -0
  150. data/spec/moxml/adapter/base_spec.rb +27 -0
  151. data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
  152. data/spec/moxml/adapter/libxml_spec.rb +14 -0
  153. data/spec/moxml/adapter/ox_spec.rb +9 -8
  154. data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
  155. data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
  156. data/spec/moxml/adapter_spec.rb +16 -0
  157. data/spec/moxml/attribute_spec.rb +30 -0
  158. data/spec/moxml/builder_spec.rb +33 -0
  159. data/spec/moxml/cdata_spec.rb +31 -0
  160. data/spec/moxml/comment_spec.rb +31 -0
  161. data/spec/moxml/config_spec.rb +3 -3
  162. data/spec/moxml/context_spec.rb +28 -0
  163. data/spec/moxml/declaration_spec.rb +36 -0
  164. data/spec/moxml/doctype_spec.rb +33 -0
  165. data/spec/moxml/document_builder_spec.rb +30 -0
  166. data/spec/moxml/document_spec.rb +105 -0
  167. data/spec/moxml/element_spec.rb +143 -0
  168. data/spec/moxml/error_spec.rb +266 -22
  169. data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
  170. data/spec/moxml/namespace_spec.rb +32 -0
  171. data/spec/moxml/node_set_spec.rb +39 -0
  172. data/spec/moxml/node_spec.rb +37 -0
  173. data/spec/moxml/processing_instruction_spec.rb +34 -0
  174. data/spec/moxml/sax_spec.rb +1067 -0
  175. data/spec/moxml/text_spec.rb +31 -0
  176. data/spec/moxml/version_spec.rb +14 -0
  177. data/spec/moxml/xml_utils/.gitkeep +0 -0
  178. data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
  179. data/spec/moxml/xml_utils_spec.rb +49 -0
  180. data/spec/moxml/xpath/ast/node_spec.rb +83 -0
  181. data/spec/moxml/xpath/axes_spec.rb +296 -0
  182. data/spec/moxml/xpath/cache_spec.rb +358 -0
  183. data/spec/moxml/xpath/compiler_spec.rb +406 -0
  184. data/spec/moxml/xpath/context_spec.rb +210 -0
  185. data/spec/moxml/xpath/conversion_spec.rb +365 -0
  186. data/spec/moxml/xpath/fixtures/sample.xml +25 -0
  187. data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
  188. data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
  189. data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
  190. data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
  191. data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
  192. data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
  193. data/spec/moxml/xpath/lexer_spec.rb +488 -0
  194. data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
  195. data/spec/moxml/xpath/parser_spec.rb +364 -0
  196. data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
  197. data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
  198. data/spec/moxml/xpath_capabilities_spec.rb +199 -0
  199. data/spec/moxml/xpath_spec.rb +77 -0
  200. data/spec/performance/README.md +83 -0
  201. data/spec/performance/benchmark_spec.rb +64 -0
  202. data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
  203. data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
  204. data/spec/performance/xpath_benchmark_spec.rb +259 -0
  205. data/spec/spec_helper.rb +58 -1
  206. data/spec/support/xml_matchers.rb +1 -1
  207. metadata +176 -35
  208. data/lib/ox/node.rb +0 -9
  209. data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
  210. /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
  211. /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
  212. /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
  213. /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
  214. /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
  215. /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
data/Rakefile CHANGED
@@ -9,4 +9,57 @@ require "rubocop/rake_task"
9
9
 
10
10
  RuboCop::RakeTask.new
11
11
 
12
+ namespace :spec do
13
+ desc "Run unit tests only"
14
+ RSpec::Core::RakeTask.new(:unit) do |t|
15
+ t.pattern = "spec/unit/**/*_spec.rb"
16
+ end
17
+
18
+ desc "Run adapter tests only"
19
+ RSpec::Core::RakeTask.new(:adapter) do |t|
20
+ t.pattern = "spec/moxml/adapter/**/*_spec.rb"
21
+ end
22
+
23
+ desc "Run integration tests only"
24
+ RSpec::Core::RakeTask.new(:integration) do |t|
25
+ t.pattern = "spec/integration/**/*_spec.rb"
26
+ end
27
+
28
+ desc "Run consistency tests only"
29
+ RSpec::Core::RakeTask.new(:consistency) do |t|
30
+ t.pattern = "spec/consistency/**/*_spec.rb"
31
+ end
32
+
33
+ desc "Run example tests"
34
+ RSpec::Core::RakeTask.new(:examples) do |t|
35
+ t.pattern = "spec/examples/**/*_spec.rb"
36
+ end
37
+
38
+ desc "Run performance benchmarks"
39
+ RSpec::Core::RakeTask.new(:performance) do |t|
40
+ t.pattern = "spec/performance/**/*_spec.rb"
41
+ t.rspec_opts = "--tag performance"
42
+ end
43
+
44
+ desc "Run unit + adapter + integration (fast feedback)"
45
+ task fast: %i[unit adapter integration]
46
+
47
+ desc "Run everything including examples and performance"
48
+ task all: %i[unit adapter integration consistency examples
49
+ performance]
50
+ end
51
+
52
+ namespace :benchmark do
53
+ desc "Run XPath performance benchmarks"
54
+ task :xpath do
55
+ ENV.delete("SKIP_BENCHMARKS")
56
+ sh "bundle exec rspec spec/performance/xpath_benchmark_spec.rb"
57
+ end
58
+
59
+ desc "Generate adapter benchmark report"
60
+ task :report do
61
+ ruby "benchmarks/generate_report.rb"
62
+ end
63
+ end
64
+
12
65
  task default: %i[spec rubocop]
@@ -0,0 +1,6 @@
1
+ # Ignore benchmark result files (machine-specific)
2
+ *.yml
3
+ *.yaml
4
+
5
+ # Keep directory structure
6
+ !.gitignore
@@ -0,0 +1,550 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "benchmark"
4
+ require "benchmark/ips"
5
+ require_relative "../lib/moxml"
6
+ require "rbconfig"
7
+
8
+ # Benchmark Report Generator for Moxml Adapters
9
+ class MoxmlBenchmarkReport
10
+ ADAPTERS = %i[nokogiri oga rexml libxml ox headed_ox].freeze
11
+
12
+ # Test XML documents of varying complexity
13
+ SIMPLE_XML = <<~XML
14
+ <?xml version="1.0" encoding="UTF-8"?>
15
+ <root>
16
+ <item id="1">Simple</item>
17
+ </root>
18
+ XML
19
+
20
+ MEDIUM_XML = lambda {
21
+ xml = <<~XML
22
+ <?xml version="1.0" encoding="UTF-8"?>
23
+ <library xmlns="http://example.org/library" xmlns:dc="http://purl.org/dc/elements/1.1/">
24
+ XML
25
+ 50.times do |i|
26
+ xml += <<~ITEM
27
+ <book id="book-#{i}">
28
+ <dc:title>Book Title #{i}</dc:title>
29
+ <dc:author>Author #{i}</dc:author>
30
+ <published year="#{2020 + (i % 5)}"/>
31
+ <description>A detailed description of book #{i} with some content.</description>
32
+ </book>
33
+ ITEM
34
+ end
35
+ "#{xml}</library>"
36
+ }.call
37
+
38
+ LARGE_XML = lambda {
39
+ xml = <<~XML
40
+ <?xml version="1.0" encoding="UTF-8"?>
41
+ <catalog xmlns="http://example.org/catalog">
42
+ XML
43
+ 500.times do |i|
44
+ xml += <<~ITEM
45
+ <product id="prod-#{i}" category="cat-#{i % 10}">
46
+ <name>Product #{i}</name>
47
+ <price currency="USD">#{(i % 100) + 9.99}</price>
48
+ <stock>#{i % 1000}</stock>
49
+ <description>Description for product #{i}</description>
50
+ <metadata>
51
+ <created>2024-01-#{(i % 28) + 1}</created>
52
+ <updated>2024-10-#{(i % 28) + 1}</updated>
53
+ </metadata>
54
+ </product>
55
+ ITEM
56
+ end
57
+ "#{xml}</catalog>"
58
+ }.call
59
+
60
+ COMPLEX_NESTED_XML = <<~XML
61
+ <?xml version="1.0" encoding="UTF-8"?>
62
+ <root xmlns:a="http://a.org" xmlns:b="http://b.org">
63
+ <a:level1>
64
+ <b:level2>
65
+ <a:level3>
66
+ <b:level4>
67
+ <a:level5>Deep content</a:level5>
68
+ </b:level4>
69
+ </a:level3>
70
+ </b:level2>
71
+ </a:level1>
72
+ </root>
73
+ XML
74
+
75
+ def initialize
76
+ @results = {}
77
+ @errors = {}
78
+ @timestamp = Time.now
79
+ end
80
+
81
+ def run
82
+ puts "=" * 80
83
+ puts "Moxml Adapter Benchmark Report Generator"
84
+ puts "=" * 80
85
+ puts ""
86
+
87
+ ADAPTERS.each do |adapter|
88
+ puts "\nBenchmarking #{adapter.to_s.upcase} adapter..."
89
+ benchmark_adapter(adapter)
90
+ end
91
+
92
+ generate_report
93
+ end
94
+
95
+ private
96
+
97
+ def benchmark_adapter(adapter_name)
98
+ @results[adapter_name] = {}
99
+ @errors[adapter_name] = []
100
+
101
+ begin
102
+ context = Moxml.new do |config|
103
+ config.adapter = adapter_name
104
+ end
105
+
106
+ # Parsing benchmarks
107
+ @results[adapter_name][:parse_simple] =
108
+ benchmark_parse(context, SIMPLE_XML)
109
+ @results[adapter_name][:parse_medium] =
110
+ benchmark_parse(context, MEDIUM_XML)
111
+ @results[adapter_name][:parse_large] = benchmark_parse(context, LARGE_XML)
112
+ @results[adapter_name][:parse_complex] =
113
+ benchmark_parse(context, COMPLEX_NESTED_XML)
114
+
115
+ # Serialization benchmarks
116
+ context.parse(MEDIUM_XML)
117
+ @results[adapter_name][:serialize_simple] =
118
+ benchmark_serialize(context, SIMPLE_XML)
119
+ @results[adapter_name][:serialize_medium] =
120
+ benchmark_serialize(context, MEDIUM_XML)
121
+ @results[adapter_name][:serialize_large] =
122
+ benchmark_serialize(context, LARGE_XML)
123
+
124
+ # XPath benchmarks
125
+ @results[adapter_name][:xpath_simple] =
126
+ benchmark_xpath_simple(context, MEDIUM_XML)
127
+ @results[adapter_name][:xpath_complex] =
128
+ benchmark_xpath_complex(context, MEDIUM_XML)
129
+ @results[adapter_name][:xpath_namespace] =
130
+ benchmark_xpath_namespace(context, MEDIUM_XML)
131
+
132
+ # Memory benchmarks
133
+ @results[adapter_name][:memory_medium] =
134
+ benchmark_memory(context, MEDIUM_XML)
135
+ @results[adapter_name][:memory_large] =
136
+ benchmark_memory(context, LARGE_XML)
137
+
138
+ puts " ✓ #{adapter_name} benchmarks completed"
139
+ rescue StandardError => e
140
+ @errors[adapter_name] << "Failed to benchmark: #{e.message}"
141
+ puts " ✗ #{adapter_name} benchmarks failed: #{e.message}"
142
+ end
143
+ end
144
+
145
+ def benchmark_parse(context, xml)
146
+ result = nil
147
+ Benchmark.ips do |x|
148
+ x.config(time: 2, warmup: 1)
149
+ x.report("parse") { result = context.parse(xml) }
150
+ end.entries.first.ips.round(2)
151
+ rescue StandardError => e
152
+ @errors[context.config.adapter_name] << "Parse error: #{e.message}"
153
+ 0.0
154
+ end
155
+
156
+ def benchmark_serialize(context, xml)
157
+ doc = context.parse(xml)
158
+ Benchmark.ips do |x|
159
+ x.config(time: 2, warmup: 1)
160
+ x.report("serialize") { doc.to_xml }
161
+ end.entries.first.ips.round(2)
162
+ rescue StandardError => e
163
+ @errors[context.config.adapter_name] << "Serialize error: #{e.message}"
164
+ 0.0
165
+ end
166
+
167
+ def benchmark_xpath_simple(context, xml)
168
+ doc = context.parse(xml)
169
+ Benchmark.ips do |x|
170
+ x.config(time: 2, warmup: 1)
171
+ x.report("xpath_simple") { doc.xpath("//book") }
172
+ end.entries.first.ips.round(2)
173
+ rescue StandardError => e
174
+ @errors[context.config.adapter_name] << "XPath simple error: #{e.message}"
175
+ 0.0
176
+ end
177
+
178
+ def benchmark_xpath_complex(context, xml)
179
+ doc = context.parse(xml)
180
+ Benchmark.ips do |x|
181
+ x.config(time: 2, warmup: 1)
182
+ x.report("xpath_complex") { doc.xpath("//book[@id]") }
183
+ end.entries.first.ips.round(2)
184
+ rescue StandardError => e
185
+ @errors[context.config.adapter_name] << "XPath complex error: #{e.message}"
186
+ 0.0
187
+ end
188
+
189
+ def benchmark_xpath_namespace(context, xml)
190
+ doc = context.parse(xml)
191
+ namespaces = { "dc" => "http://purl.org/dc/elements/1.1/" }
192
+ Benchmark.ips do |x|
193
+ x.config(time: 2, warmup: 1)
194
+ x.report("xpath_ns") { doc.xpath("//dc:title", namespaces) }
195
+ end.entries.first.ips.round(2)
196
+ rescue StandardError => e
197
+ @errors[context.config.adapter_name] << "XPath namespace error: #{e.message}"
198
+ 0.0
199
+ end
200
+
201
+ def benchmark_memory(context, xml)
202
+ before = get_memory_usage
203
+ 10.times { context.parse(xml) }
204
+ after = get_memory_usage
205
+ ((after - before) / 10.0).round(2)
206
+ rescue StandardError => e
207
+ @errors[context.config.adapter_name] << "Memory error: #{e.message}"
208
+ 0.0
209
+ end
210
+
211
+ def get_memory_usage
212
+ # Get memory usage in MB
213
+ if RUBY_PLATFORM.include?("darwin")
214
+ `ps -o rss= -p #{Process.pid}`.to_i / 1024.0
215
+ elsif RUBY_PLATFORM.include?("linux")
216
+ `ps -o rss= -p #{Process.pid}`.to_i / 1024.0
217
+ else
218
+ 0.0 # Not supported on this platform
219
+ end
220
+ end
221
+
222
+ def calculate_grade(adapter_name)
223
+ results = @results[adapter_name]
224
+ return "N/A" if results.empty? || @errors[adapter_name].any?
225
+
226
+ # Weighted scoring
227
+ score = 0
228
+ score += normalize_score(results[:parse_medium], 100, 2000) * 30 # 30% weight
229
+ score += normalize_score(results[:serialize_medium], 100, 1500) * 25 # 25% weight
230
+ score += normalize_score(results[:xpath_simple], 100, 2000) * 20 # 20% weight
231
+ score += normalize_score(results[:memory_medium], 5, 50, inverse: true) * 15 # 15% weight (lower is better)
232
+ score += (@errors[adapter_name].empty? ? 10 : 0) # 10% reliability
233
+
234
+ case score
235
+ when 90..100 then "A+"
236
+ when 80..89 then "A"
237
+ when 70..79 then "B+"
238
+ when 60..69 then "B"
239
+ when 50..59 then "C"
240
+ else "D"
241
+ end
242
+ end
243
+
244
+ def normalize_score(value, min, max, inverse: false)
245
+ return 0 if value.nil? || value.zero?
246
+
247
+ normalized = ((value - min).to_f / (max - min) * 100).clamp(0, 100)
248
+ inverse ? (100 - normalized) : normalized
249
+ end
250
+
251
+ def memory_stars(mb)
252
+ case mb
253
+ when 0..10 then "⭐⭐⭐⭐⭐"
254
+ when 10..20 then "⭐⭐⭐⭐"
255
+ when 20..40 then "⭐⭐⭐"
256
+ when 40..80 then "⭐⭐"
257
+ else "⭐"
258
+ end
259
+ end
260
+
261
+ def generate_report
262
+ File.open("benchmarks/PERFORMANCE_REPORT.md", "w") do |f|
263
+ write_header(f)
264
+ write_summary_table(f)
265
+ write_detailed_results(f)
266
+ write_recommendations(f)
267
+ write_environment_details(f)
268
+ write_errors(f) if @errors.values.any?(&:any?)
269
+ end
270
+
271
+ puts "\n#{'=' * 80}"
272
+ puts "Report generated: benchmarks/PERFORMANCE_REPORT.md"
273
+ puts "=" * 80
274
+ end
275
+
276
+ def write_header(f)
277
+ f.puts "# Moxml Adapter Performance Benchmarks"
278
+ f.puts ""
279
+ f.puts "Generated: #{@timestamp.strftime('%Y-%m-%d %H:%M:%S %Z')}"
280
+ f.puts ""
281
+ f.puts "This report compares the performance of all Moxml adapters across various"
282
+ f.puts "benchmarks including parsing, serialization, XPath queries, and memory usage."
283
+ f.puts ""
284
+ end
285
+
286
+ def write_summary_table(f)
287
+ f.puts "## Summary"
288
+ f.puts ""
289
+ f.puts "| Adapter | Parse (ips) | Serialize (ips) | XPath (ips) | Memory (MB) | Grade |"
290
+ f.puts "|---------|-------------|-----------------|-------------|-------------|-------|"
291
+
292
+ ADAPTERS.each do |adapter|
293
+ next if @results[adapter].empty?
294
+
295
+ results = @results[adapter]
296
+ parse_ips = results[:parse_medium]&.round(0) || "N/A"
297
+ serialize_ips = results[:serialize_medium]&.round(0) || "N/A"
298
+ xpath_ips = results[:xpath_simple]&.round(0) || "N/A"
299
+ memory = results[:memory_medium]&.round(1) || "N/A"
300
+ stars = memory.is_a?(Numeric) ? memory_stars(memory) : "N/A"
301
+ grade = calculate_grade(adapter)
302
+
303
+ f.puts "| #{adapter.to_s.capitalize} | #{parse_ips} | #{serialize_ips} | #{xpath_ips} | #{memory} #{stars} | #{grade} |"
304
+ end
305
+ f.puts ""
306
+ end
307
+
308
+ def write_detailed_results(f)
309
+ f.puts "## Detailed Results"
310
+ f.puts ""
311
+
312
+ # Parsing benchmarks
313
+ f.puts "### Parsing Performance"
314
+ f.puts ""
315
+ f.puts "| Adapter | Simple XML | Medium XML | Large XML | Complex Nested |"
316
+ f.puts "|---------|------------|------------|-----------|----------------|"
317
+ ADAPTERS.each do |adapter|
318
+ next if @results[adapter].empty?
319
+
320
+ r = @results[adapter]
321
+ f.puts "| #{adapter.to_s.capitalize} | #{r[:parse_simple]&.round(0) || 'N/A'} ips | #{r[:parse_medium]&.round(0) || 'N/A'} ips | #{r[:parse_large]&.round(0) || 'N/A'} ips | #{r[:parse_complex]&.round(0) || 'N/A'} ips |"
322
+ end
323
+ f.puts ""
324
+ f.puts "**Document Sizes:**"
325
+ f.puts "- Simple: #{SIMPLE_XML.bytesize} bytes"
326
+ f.puts "- Medium: #{MEDIUM_XML.bytesize} bytes (50 book elements with namespaces)"
327
+ f.puts "- Large: #{LARGE_XML.bytesize} bytes (500 product elements)"
328
+ f.puts "- Complex: #{COMPLEX_NESTED_XML.bytesize} bytes (deeply nested with namespaces)"
329
+ f.puts ""
330
+
331
+ # Serialization benchmarks
332
+ f.puts "### Serialization Performance"
333
+ f.puts ""
334
+ f.puts "| Adapter | Simple XML | Medium XML | Large XML |"
335
+ f.puts "|---------|------------|------------|-----------|"
336
+ ADAPTERS.each do |adapter|
337
+ next if @results[adapter].empty?
338
+
339
+ r = @results[adapter]
340
+ f.puts "| #{adapter.to_s.capitalize} | #{r[:serialize_simple]&.round(0) || 'N/A'} ips | #{r[:serialize_medium]&.round(0) || 'N/A'} ips | #{r[:serialize_large]&.round(0) || 'N/A'} ips |"
341
+ end
342
+ f.puts ""
343
+
344
+ # XPath benchmarks
345
+ f.puts "### XPath Query Performance"
346
+ f.puts ""
347
+ f.puts "| Adapter | Simple Query | Complex Query | Namespace Query |"
348
+ f.puts "|---------|--------------|---------------|-----------------|"
349
+ ADAPTERS.each do |adapter|
350
+ next if @results[adapter].empty?
351
+
352
+ r = @results[adapter]
353
+ f.puts "| #{adapter.to_s.capitalize} | #{r[:xpath_simple]&.round(0) || 'N/A'} ips | #{r[:xpath_complex]&.round(0) || 'N/A'} ips | #{r[:xpath_namespace]&.round(0) || 'N/A'} ips |"
354
+ end
355
+ f.puts ""
356
+ f.puts "**Query Types:**"
357
+ f.puts "- Simple: `//book` (find all book elements)"
358
+ f.puts "- Complex: `//book[@id]` (find books with id attribute)"
359
+ f.puts "- Namespace: `//dc:title` (find elements in dc namespace)"
360
+ f.puts ""
361
+
362
+ # Memory benchmarks
363
+ f.puts "### Memory Usage"
364
+ f.puts ""
365
+ f.puts "| Adapter | Medium Document | Large Document |"
366
+ f.puts "|---------|-----------------|----------------|"
367
+ ADAPTERS.each do |adapter|
368
+ next if @results[adapter].empty?
369
+
370
+ r = @results[adapter]
371
+ medium_mem = r[:memory_medium]&.round(1) || "N/A"
372
+ large_mem = r[:memory_large]&.round(1) || "N/A"
373
+ f.puts "| #{adapter.to_s.capitalize} | #{medium_mem} MB | #{large_mem} MB |"
374
+ end
375
+ f.puts ""
376
+ f.puts "**Note:** Memory measurements show average increase per document parse."
377
+ f.puts ""
378
+
379
+ # Performance chart (ASCII art)
380
+ write_performance_chart(f)
381
+ end
382
+
383
+ def write_performance_chart(f)
384
+ f.puts "### Performance Visualization"
385
+ f.puts ""
386
+ f.puts "```"
387
+ f.puts "Relative Performance (Higher is Better)"
388
+ f.puts ""
389
+
390
+ max_parse = @results.values.map { |r| r[:parse_medium] || 0 }.max
391
+ max_serialize = @results.values.map { |r| r[:serialize_medium] || 0 }.max
392
+ max_xpath = @results.values.map { |r| r[:xpath_simple] || 0 }.max
393
+
394
+ f.puts "Parsing (Medium XML):"
395
+ ADAPTERS.each do |adapter|
396
+ next if @results[adapter].empty?
397
+
398
+ value = @results[adapter][:parse_medium] || 0
399
+ bar_length = (value.to_f / max_parse * 50).to_i
400
+ f.puts " #{adapter.to_s.capitalize.ljust(10)} #{'█' * bar_length} #{value.round(0)} ips"
401
+ end
402
+ f.puts ""
403
+
404
+ f.puts "Serialization (Medium XML):"
405
+ ADAPTERS.each do |adapter|
406
+ next if @results[adapter].empty?
407
+
408
+ value = @results[adapter][:serialize_medium] || 0
409
+ bar_length = (value.to_f / max_serialize * 50).to_i
410
+ f.puts " #{adapter.to_s.capitalize.ljust(10)} #{'█' * bar_length} #{value.round(0)} ips"
411
+ end
412
+ f.puts ""
413
+
414
+ f.puts "XPath Queries (Simple):"
415
+ ADAPTERS.each do |adapter|
416
+ next if @results[adapter].empty?
417
+
418
+ value = @results[adapter][:xpath_simple] || 0
419
+ bar_length = (value.to_f / max_xpath * 50).to_i
420
+ f.puts " #{adapter.to_s.capitalize.ljust(10)} #{'█' * bar_length} #{value.round(0)} ips"
421
+ end
422
+ f.puts "```"
423
+ f.puts ""
424
+ end
425
+
426
+ def write_recommendations(f)
427
+ f.puts "## Recommendations"
428
+ f.puts ""
429
+
430
+ # Find best performers
431
+ best_parse = ADAPTERS.max_by { |a| @results[a][:parse_medium] || 0 }
432
+ best_serialize = ADAPTERS.max_by { |a| @results[a][:serialize_medium] || 0 }
433
+ best_xpath = ADAPTERS.max_by { |a| @results[a][:xpath_simple] || 0 }
434
+ best_memory = ADAPTERS.min_by { |a| @results[a][:memory_medium] || Float::INFINITY }
435
+
436
+ f.puts "### Best Performers"
437
+ f.puts ""
438
+ f.puts "- **Fastest Parser:** #{best_parse.to_s.capitalize} (#{@results[best_parse][:parse_medium]&.round(0)} ips)"
439
+ f.puts "- **Fastest Serializer:** #{best_serialize.to_s.capitalize} (#{@results[best_serialize][:serialize_medium]&.round(0)} ips)"
440
+ f.puts "- **Fastest XPath:** #{best_xpath.to_s.capitalize} (#{@results[best_xpath][:xpath_simple]&.round(0)} ips)"
441
+ f.puts "- **Lowest Memory:** #{best_memory.to_s.capitalize} (#{@results[best_memory][:memory_medium]&.round(1)} MB per document)"
442
+ f.puts ""
443
+
444
+ f.puts "### Adapter Selection Guide"
445
+ f.puts ""
446
+ f.puts "**Choose Nokogiri when:**"
447
+ f.puts "- You need industry-standard, battle-tested XML processing"
448
+ f.puts "- Balanced performance across all operations is important"
449
+ f.puts "- Full XPath and namespace support is required"
450
+ f.puts "- You need the largest community and ecosystem"
451
+ f.puts ""
452
+
453
+ f.puts "**Choose Oga when:**"
454
+ f.puts "- Pure Ruby implementation is required (JRuby, TruffleRuby)"
455
+ f.puts "- You want good performance without C extensions"
456
+ f.puts "- Cross-platform compatibility is critical"
457
+ f.puts ""
458
+
459
+ f.puts "**Choose REXML when:**"
460
+ f.puts "- No external dependencies are allowed (stdlib only)"
461
+ f.puts "- Maximum portability is needed"
462
+ f.puts "- Performance is not the primary concern"
463
+ f.puts "- You're working with small to medium documents"
464
+ f.puts ""
465
+
466
+ f.puts "**Choose LibXML when:**"
467
+ f.puts "- You need an alternative to Nokogiri with similar features"
468
+ f.puts "- Full namespace and XPath support is required"
469
+ f.puts "- Good balance of speed and features is important"
470
+ f.puts ""
471
+
472
+ f.puts "**Choose Ox when:**"
473
+ f.puts "- Maximum parsing and serialization speed is critical"
474
+ f.puts "- Memory efficiency is paramount"
475
+ f.puts "- XPath usage is minimal or you can work with basic queries"
476
+ f.puts "- Document structures are relatively simple"
477
+ f.puts ""
478
+ f.puts "**CAUTION:** Ox's custom XPath engine supports common patterns but may not handle"
479
+ f.puts "complex XPath expressions. Test thoroughly if advanced XPath is needed."
480
+ f.puts ""
481
+
482
+ f.puts "**Choose HeadedOx when:**"
483
+ f.puts "- Need Ox's fast parsing with comprehensive XPath support"
484
+ f.puts "- Require all 27 XPath 1.0 functions (count, sum, contains, etc.)"
485
+ f.puts "- Complex XPath predicates are essential"
486
+ f.puts "- Want pure Ruby XPath engine for debugging"
487
+ f.puts "- Basic namespace support is sufficient (6 of 13 axes)"
488
+ f.puts "- Can accept 99.20% pass rate (16 documented Ox limitations)"
489
+ f.puts ""
490
+ f.puts "**Note:** HeadedOx = Ox parsing speed + full XPath features."
491
+ f.puts "See docs/HEADED_OX_LIMITATIONS.md for complete details."
492
+ f.puts ""
493
+ end
494
+
495
+ def write_environment_details(f)
496
+ f.puts "## Test Environment"
497
+ f.puts ""
498
+ f.puts "- **Ruby Version:** #{RUBY_VERSION}"
499
+ f.puts "- **Ruby Platform:** #{RUBY_PLATFORM}"
500
+ f.puts "- **OS:** #{RbConfig::CONFIG['host_os']}"
501
+ f.puts "- **Architecture:** #{RbConfig::CONFIG['host_cpu']}"
502
+ f.puts "- **Moxml Version:** #{Moxml::VERSION}"
503
+ f.puts "- **Benchmark Time:** #{@timestamp.strftime('%Y-%m-%d %H:%M:%S %Z')}"
504
+ f.puts ""
505
+ f.puts "### Gem Versions"
506
+ f.puts ""
507
+ ADAPTERS.each do |adapter|
508
+ case adapter
509
+ when :nokogiri
510
+ require "nokogiri"
511
+ f.puts "- Nokogiri: #{Nokogiri::VERSION}"
512
+ when :oga
513
+ require "oga"
514
+ f.puts "- Oga: #{Oga::VERSION}"
515
+ when :ox
516
+ require "ox"
517
+ f.puts "- Ox: #{Ox::VERSION}"
518
+ when :libxml
519
+ require "libxml"
520
+ f.puts "- LibXML-Ruby: #{LibXML::XML::VERSION}"
521
+ when :rexml
522
+ f.puts "- REXML: (stdlib)"
523
+ end
524
+ rescue LoadError
525
+ f.puts "- #{adapter.to_s.capitalize}: Not installed"
526
+ end
527
+ f.puts ""
528
+ end
529
+
530
+ def write_errors(f)
531
+ f.puts "## Errors and Warnings"
532
+ f.puts ""
533
+ @errors.each do |adapter, errors|
534
+ next if errors.empty?
535
+
536
+ f.puts "### #{adapter.to_s.capitalize}"
537
+ f.puts ""
538
+ errors.each do |error|
539
+ f.puts "- #{error}"
540
+ end
541
+ f.puts ""
542
+ end
543
+ end
544
+ end
545
+
546
+ # Run the benchmark report
547
+ if __FILE__ == $PROGRAM_NAME
548
+ report = MoxmlBenchmarkReport.new
549
+ report.run
550
+ end
data/docs/Gemfile ADDED
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ gem "jekyll"
6
+ gem "jekyll-asciidoc"
7
+ gem "jekyll-seo-tag"
8
+ gem "jekyll-sitemap"
9
+ gem "just-the-docs"
10
+
11
+ group :jekyll_plugins do
12
+ gem "asciidoctor"
13
+ end