moxml 0.1.7 → 0.1.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/dependent-repos.json +5 -0
- data/.github/workflows/dependent-tests.yml +20 -0
- data/.github/workflows/docs.yml +59 -0
- data/.github/workflows/rake.yml +10 -10
- data/.github/workflows/release.yml +5 -3
- data/.gitignore +37 -0
- data/.rubocop.yml +15 -7
- data/.rubocop_todo.yml +238 -40
- data/Gemfile +14 -9
- data/LICENSE.md +6 -2
- data/README.adoc +535 -373
- data/Rakefile +53 -0
- data/benchmarks/.gitignore +6 -0
- data/benchmarks/generate_report.rb +550 -0
- data/docs/Gemfile +13 -0
- data/docs/_config.yml +138 -0
- data/docs/_guides/advanced-features.adoc +87 -0
- data/docs/_guides/development-testing.adoc +165 -0
- data/docs/_guides/index.adoc +45 -0
- data/docs/_guides/modifying-xml.adoc +293 -0
- data/docs/_guides/parsing-xml.adoc +231 -0
- data/docs/_guides/sax-parsing.adoc +603 -0
- data/docs/_guides/working-with-documents.adoc +118 -0
- data/docs/_pages/adapter-compatibility.adoc +369 -0
- data/docs/_pages/adapters/headed-ox.adoc +237 -0
- data/docs/_pages/adapters/index.adoc +98 -0
- data/docs/_pages/adapters/libxml.adoc +286 -0
- data/docs/_pages/adapters/nokogiri.adoc +252 -0
- data/docs/_pages/adapters/oga.adoc +292 -0
- data/docs/_pages/adapters/ox.adoc +55 -0
- data/docs/_pages/adapters/rexml.adoc +293 -0
- data/docs/_pages/best-practices.adoc +430 -0
- data/docs/_pages/compatibility.adoc +468 -0
- data/docs/_pages/configuration.adoc +251 -0
- data/docs/_pages/error-handling.adoc +350 -0
- data/docs/_pages/headed-ox-limitations.adoc +558 -0
- data/docs/_pages/headed-ox.adoc +1025 -0
- data/docs/_pages/index.adoc +35 -0
- data/docs/_pages/installation.adoc +141 -0
- data/docs/_pages/node-api-reference.adoc +50 -0
- data/docs/_pages/performance.adoc +36 -0
- data/docs/_pages/quick-start.adoc +244 -0
- data/docs/_pages/thread-safety.adoc +29 -0
- data/docs/_references/document-api.adoc +408 -0
- data/docs/_references/index.adoc +48 -0
- data/docs/_tutorials/basic-usage.adoc +268 -0
- data/docs/_tutorials/builder-pattern.adoc +343 -0
- data/docs/_tutorials/index.adoc +33 -0
- data/docs/_tutorials/namespace-handling.adoc +325 -0
- data/docs/_tutorials/xpath-queries.adoc +359 -0
- data/docs/index.adoc +122 -0
- data/examples/README.md +124 -0
- data/examples/api_client/README.md +424 -0
- data/examples/api_client/api_client.rb +394 -0
- data/examples/api_client/example_response.xml +48 -0
- data/examples/headed_ox_example/README.md +90 -0
- data/examples/headed_ox_example/headed_ox_demo.rb +71 -0
- data/examples/rss_parser/README.md +194 -0
- data/examples/rss_parser/example_feed.xml +93 -0
- data/examples/rss_parser/rss_parser.rb +189 -0
- data/examples/sax_parsing/README.md +50 -0
- data/examples/sax_parsing/data_extractor.rb +75 -0
- data/examples/sax_parsing/example.xml +21 -0
- data/examples/sax_parsing/large_file.rb +78 -0
- data/examples/sax_parsing/simple_parser.rb +55 -0
- data/examples/web_scraper/README.md +352 -0
- data/examples/web_scraper/example_page.html +201 -0
- data/examples/web_scraper/web_scraper.rb +312 -0
- data/lib/moxml/adapter/base.rb +107 -28
- data/lib/moxml/adapter/customized_libxml/cdata.rb +28 -0
- data/lib/moxml/adapter/customized_libxml/comment.rb +24 -0
- data/lib/moxml/adapter/customized_libxml/declaration.rb +85 -0
- data/lib/moxml/adapter/customized_libxml/element.rb +39 -0
- data/lib/moxml/adapter/customized_libxml/node.rb +44 -0
- data/lib/moxml/adapter/customized_libxml/processing_instruction.rb +31 -0
- data/lib/moxml/adapter/customized_libxml/text.rb +27 -0
- data/lib/moxml/adapter/customized_oga/xml_generator.rb +1 -1
- data/lib/moxml/adapter/customized_ox/attribute.rb +28 -1
- data/lib/moxml/adapter/customized_rexml/formatter.rb +11 -6
- data/lib/moxml/adapter/headed_ox.rb +161 -0
- data/lib/moxml/adapter/libxml.rb +1548 -0
- data/lib/moxml/adapter/nokogiri.rb +121 -9
- data/lib/moxml/adapter/oga.rb +123 -12
- data/lib/moxml/adapter/ox.rb +282 -26
- data/lib/moxml/adapter/rexml.rb +127 -20
- data/lib/moxml/adapter.rb +21 -4
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/builder.rb +40 -4
- data/lib/moxml/config.rb +8 -3
- data/lib/moxml/context.rb +39 -1
- data/lib/moxml/doctype.rb +13 -1
- data/lib/moxml/document.rb +39 -6
- data/lib/moxml/document_builder.rb +27 -5
- data/lib/moxml/element.rb +71 -2
- data/lib/moxml/error.rb +175 -6
- data/lib/moxml/node.rb +94 -3
- data/lib/moxml/node_set.rb +34 -0
- data/lib/moxml/sax/block_handler.rb +194 -0
- data/lib/moxml/sax/element_handler.rb +124 -0
- data/lib/moxml/sax/handler.rb +113 -0
- data/lib/moxml/sax.rb +31 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xml_utils/encoder.rb +4 -4
- data/lib/moxml/xml_utils.rb +7 -4
- data/lib/moxml/xpath/ast/node.rb +159 -0
- data/lib/moxml/xpath/cache.rb +91 -0
- data/lib/moxml/xpath/compiler.rb +1768 -0
- data/lib/moxml/xpath/context.rb +26 -0
- data/lib/moxml/xpath/conversion.rb +124 -0
- data/lib/moxml/xpath/engine.rb +52 -0
- data/lib/moxml/xpath/errors.rb +101 -0
- data/lib/moxml/xpath/lexer.rb +304 -0
- data/lib/moxml/xpath/parser.rb +485 -0
- data/lib/moxml/xpath/ruby/generator.rb +269 -0
- data/lib/moxml/xpath/ruby/node.rb +193 -0
- data/lib/moxml/xpath.rb +37 -0
- data/lib/moxml.rb +5 -2
- data/moxml.gemspec +3 -1
- data/old-specs/moxml/adapter/customized_libxml/.gitkeep +6 -0
- data/spec/consistency/README.md +77 -0
- data/spec/{moxml/examples/adapter_spec.rb → consistency/adapter_parity_spec.rb} +4 -4
- data/spec/examples/README.md +75 -0
- data/spec/{support/shared_examples/examples/attribute.rb → examples/attribute_examples_spec.rb} +1 -1
- data/spec/{support/shared_examples/examples/basic_usage.rb → examples/basic_usage_spec.rb} +2 -2
- data/spec/{support/shared_examples/examples/namespace.rb → examples/namespace_examples_spec.rb} +3 -3
- data/spec/{support/shared_examples/examples/readme_examples.rb → examples/readme_examples_spec.rb} +6 -4
- data/spec/{support/shared_examples/examples/xpath.rb → examples/xpath_examples_spec.rb} +10 -6
- data/spec/integration/README.md +71 -0
- data/spec/{moxml/all_with_adapters_spec.rb → integration/all_adapters_spec.rb} +3 -2
- data/spec/integration/headed_ox_integration_spec.rb +326 -0
- data/spec/{support → integration}/shared_examples/edge_cases.rb +37 -10
- data/spec/integration/shared_examples/high_level/.gitkeep +0 -0
- data/spec/{support/shared_examples/context.rb → integration/shared_examples/high_level/context_behavior.rb} +2 -1
- data/spec/{support/shared_examples/integration.rb → integration/shared_examples/integration_workflows.rb} +23 -6
- data/spec/integration/shared_examples/node_wrappers/.gitkeep +0 -0
- data/spec/{support/shared_examples/cdata.rb → integration/shared_examples/node_wrappers/cdata_behavior.rb} +6 -1
- data/spec/{support/shared_examples/comment.rb → integration/shared_examples/node_wrappers/comment_behavior.rb} +2 -1
- data/spec/{support/shared_examples/declaration.rb → integration/shared_examples/node_wrappers/declaration_behavior.rb} +5 -2
- data/spec/{support/shared_examples/doctype.rb → integration/shared_examples/node_wrappers/doctype_behavior.rb} +2 -2
- data/spec/{support/shared_examples/document.rb → integration/shared_examples/node_wrappers/document_behavior.rb} +1 -1
- data/spec/{support/shared_examples/node.rb → integration/shared_examples/node_wrappers/node_behavior.rb} +9 -2
- data/spec/{support/shared_examples/node_set.rb → integration/shared_examples/node_wrappers/node_set_behavior.rb} +1 -18
- data/spec/{support/shared_examples/processing_instruction.rb → integration/shared_examples/node_wrappers/processing_instruction_behavior.rb} +6 -2
- data/spec/moxml/README.md +41 -0
- data/spec/moxml/adapter/.gitkeep +0 -0
- data/spec/moxml/adapter/README.md +61 -0
- data/spec/moxml/adapter/base_spec.rb +27 -0
- data/spec/moxml/adapter/headed_ox_spec.rb +311 -0
- data/spec/moxml/adapter/libxml_spec.rb +14 -0
- data/spec/moxml/adapter/ox_spec.rb +9 -8
- data/spec/moxml/adapter/shared_examples/.gitkeep +0 -0
- data/spec/{support/shared_examples/xml_adapter.rb → moxml/adapter/shared_examples/adapter_contract.rb} +39 -12
- data/spec/moxml/adapter_spec.rb +16 -0
- data/spec/moxml/attribute_spec.rb +30 -0
- data/spec/moxml/builder_spec.rb +33 -0
- data/spec/moxml/cdata_spec.rb +31 -0
- data/spec/moxml/comment_spec.rb +31 -0
- data/spec/moxml/config_spec.rb +3 -3
- data/spec/moxml/context_spec.rb +28 -0
- data/spec/moxml/declaration_spec.rb +36 -0
- data/spec/moxml/doctype_spec.rb +33 -0
- data/spec/moxml/document_builder_spec.rb +30 -0
- data/spec/moxml/document_spec.rb +105 -0
- data/spec/moxml/element_spec.rb +143 -0
- data/spec/moxml/error_spec.rb +266 -22
- data/spec/{moxml_spec.rb → moxml/moxml_spec.rb} +9 -9
- data/spec/moxml/namespace_spec.rb +32 -0
- data/spec/moxml/node_set_spec.rb +39 -0
- data/spec/moxml/node_spec.rb +37 -0
- data/spec/moxml/processing_instruction_spec.rb +34 -0
- data/spec/moxml/sax_spec.rb +1067 -0
- data/spec/moxml/text_spec.rb +31 -0
- data/spec/moxml/version_spec.rb +14 -0
- data/spec/moxml/xml_utils/.gitkeep +0 -0
- data/spec/moxml/xml_utils/encoder_spec.rb +27 -0
- data/spec/moxml/xml_utils_spec.rb +49 -0
- data/spec/moxml/xpath/ast/node_spec.rb +83 -0
- data/spec/moxml/xpath/axes_spec.rb +296 -0
- data/spec/moxml/xpath/cache_spec.rb +358 -0
- data/spec/moxml/xpath/compiler_spec.rb +406 -0
- data/spec/moxml/xpath/context_spec.rb +210 -0
- data/spec/moxml/xpath/conversion_spec.rb +365 -0
- data/spec/moxml/xpath/fixtures/sample.xml +25 -0
- data/spec/moxml/xpath/functions/boolean_functions_spec.rb +114 -0
- data/spec/moxml/xpath/functions/node_functions_spec.rb +145 -0
- data/spec/moxml/xpath/functions/numeric_functions_spec.rb +164 -0
- data/spec/moxml/xpath/functions/position_functions_spec.rb +93 -0
- data/spec/moxml/xpath/functions/special_functions_spec.rb +89 -0
- data/spec/moxml/xpath/functions/string_functions_spec.rb +381 -0
- data/spec/moxml/xpath/lexer_spec.rb +488 -0
- data/spec/moxml/xpath/parser_integration_spec.rb +210 -0
- data/spec/moxml/xpath/parser_spec.rb +364 -0
- data/spec/moxml/xpath/ruby/generator_spec.rb +421 -0
- data/spec/moxml/xpath/ruby/node_spec.rb +291 -0
- data/spec/moxml/xpath_capabilities_spec.rb +199 -0
- data/spec/moxml/xpath_spec.rb +77 -0
- data/spec/performance/README.md +83 -0
- data/spec/performance/benchmark_spec.rb +64 -0
- data/spec/{support/shared_examples/examples/memory.rb → performance/memory_usage_spec.rb} +3 -1
- data/spec/{support/shared_examples/examples/thread_safety.rb → performance/thread_safety_spec.rb} +3 -1
- data/spec/performance/xpath_benchmark_spec.rb +259 -0
- data/spec/spec_helper.rb +58 -1
- data/spec/support/xml_matchers.rb +1 -1
- metadata +176 -34
- data/spec/support/shared_examples/examples/benchmark_spec.rb +0 -51
- /data/spec/{support/shared_examples/builder.rb → integration/shared_examples/high_level/builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/document_builder.rb → integration/shared_examples/high_level/document_builder_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/attribute.rb → integration/shared_examples/node_wrappers/attribute_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/element.rb → integration/shared_examples/node_wrappers/element_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/namespace.rb → integration/shared_examples/node_wrappers/namespace_behavior.rb} +0 -0
- /data/spec/{support/shared_examples/text.rb → integration/shared_examples/node_wrappers/text_behavior.rb} +0 -0
|
@@ -0,0 +1,1067 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
|
|
5
|
+
RSpec.describe Moxml::SAX do
|
|
6
|
+
let(:xml) do
|
|
7
|
+
<<~XML
|
|
8
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
9
|
+
<library>
|
|
10
|
+
<book id="1" category="programming">
|
|
11
|
+
<title>Ruby Programming</title>
|
|
12
|
+
<author>Jane Smith</author>
|
|
13
|
+
<price>29.99</price>
|
|
14
|
+
</book>
|
|
15
|
+
<book id="2" category="fiction">
|
|
16
|
+
<title>The Great Novel</title>
|
|
17
|
+
<author>John Doe</author>
|
|
18
|
+
<price>19.99</price>
|
|
19
|
+
</book>
|
|
20
|
+
</library>
|
|
21
|
+
XML
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
describe "Handler" do
|
|
25
|
+
it "provides base event methods" do
|
|
26
|
+
handler = Moxml::SAX::Handler.new
|
|
27
|
+
expect(handler).to respond_to(:on_start_document)
|
|
28
|
+
expect(handler).to respond_to(:on_end_document)
|
|
29
|
+
expect(handler).to respond_to(:on_start_element)
|
|
30
|
+
expect(handler).to respond_to(:on_end_element)
|
|
31
|
+
expect(handler).to respond_to(:on_characters)
|
|
32
|
+
expect(handler).to respond_to(:on_cdata)
|
|
33
|
+
expect(handler).to respond_to(:on_comment)
|
|
34
|
+
expect(handler).to respond_to(:on_processing_instruction)
|
|
35
|
+
expect(handler).to respond_to(:on_error)
|
|
36
|
+
expect(handler).to respond_to(:on_warning)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
describe "ElementHandler" do
|
|
41
|
+
it "tracks element stack" do
|
|
42
|
+
handler = Moxml::SAX::ElementHandler.new
|
|
43
|
+
|
|
44
|
+
handler.on_start_element("library", {}, {})
|
|
45
|
+
expect(handler.element_stack).to eq(["library"])
|
|
46
|
+
expect(handler.current_element).to eq("library")
|
|
47
|
+
|
|
48
|
+
handler.on_start_element("book", { "id" => "1" }, {})
|
|
49
|
+
expect(handler.element_stack).to eq(["library", "book"])
|
|
50
|
+
expect(handler.current_element).to eq("book")
|
|
51
|
+
expect(handler.parent_element).to eq("library")
|
|
52
|
+
|
|
53
|
+
handler.on_end_element("book")
|
|
54
|
+
expect(handler.element_stack).to eq(["library"])
|
|
55
|
+
|
|
56
|
+
handler.on_end_element("library")
|
|
57
|
+
expect(handler.element_stack).to be_empty
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it "provides path matching" do
|
|
61
|
+
handler = Moxml::SAX::ElementHandler.new
|
|
62
|
+
|
|
63
|
+
handler.on_start_element("library", {}, {})
|
|
64
|
+
handler.on_start_element("book", {}, {})
|
|
65
|
+
handler.on_start_element("title", {}, {})
|
|
66
|
+
|
|
67
|
+
expect(handler.path_matches?("/library/book/title")).to be true
|
|
68
|
+
expect(handler.path_matches?(%r{book/title$})).to be true
|
|
69
|
+
expect(handler.path_matches?("/library/author")).to be false
|
|
70
|
+
|
|
71
|
+
expect(handler.path_string).to eq("/library/book/title")
|
|
72
|
+
expect(handler.depth).to eq(3)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it "checks element presence" do
|
|
76
|
+
handler = Moxml::SAX::ElementHandler.new
|
|
77
|
+
|
|
78
|
+
handler.on_start_element("library", {}, {})
|
|
79
|
+
expect(handler.in_element?("library")).to be true
|
|
80
|
+
expect(handler.in_element?("book")).to be false
|
|
81
|
+
|
|
82
|
+
handler.on_start_element("book", {}, {})
|
|
83
|
+
expect(handler.in_element?("library")).to be true
|
|
84
|
+
expect(handler.in_element?("book")).to be true
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
describe "BlockHandler" do
|
|
89
|
+
it "supports DSL-style handler definition" do
|
|
90
|
+
elements = []
|
|
91
|
+
|
|
92
|
+
handler = Moxml::SAX::BlockHandler.new do
|
|
93
|
+
start_element { |name, _attrs| elements << name }
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
handler.on_start_element("book", {}, {})
|
|
97
|
+
handler.on_start_element("title", {}, {})
|
|
98
|
+
|
|
99
|
+
expect(elements).to eq(["book", "title"])
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
it "supports multiple event handlers" do
|
|
103
|
+
events = []
|
|
104
|
+
|
|
105
|
+
handler = Moxml::SAX::BlockHandler.new do
|
|
106
|
+
start_document { events << :start_doc }
|
|
107
|
+
start_element { |name| events << [:start, name] }
|
|
108
|
+
end_element { |name| events << [:end, name] }
|
|
109
|
+
characters do |text|
|
|
110
|
+
events << [:text, text.strip] unless text.strip.empty?
|
|
111
|
+
end
|
|
112
|
+
end_document { events << :end_doc }
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
handler.on_start_document
|
|
116
|
+
handler.on_start_element("book", {}, {})
|
|
117
|
+
handler.on_characters("Title")
|
|
118
|
+
handler.on_end_element("book")
|
|
119
|
+
handler.on_end_document
|
|
120
|
+
|
|
121
|
+
expect(events).to eq([
|
|
122
|
+
:start_doc,
|
|
123
|
+
[:start, "book"],
|
|
124
|
+
[:text, "Title"],
|
|
125
|
+
[:end, "book"],
|
|
126
|
+
:end_doc,
|
|
127
|
+
])
|
|
128
|
+
end
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
describe "SAX parsing with Nokogiri adapter", adapter: :nokogiri do
|
|
132
|
+
let(:context) { Moxml.new(:nokogiri) }
|
|
133
|
+
|
|
134
|
+
it "parses with class-based handler" do
|
|
135
|
+
# Custom handler to extract book data
|
|
136
|
+
class BookExtractor < Moxml::SAX::ElementHandler
|
|
137
|
+
attr_reader :books
|
|
138
|
+
|
|
139
|
+
def initialize
|
|
140
|
+
super
|
|
141
|
+
@books = []
|
|
142
|
+
@current_book = nil
|
|
143
|
+
@current_text = ""
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
def on_start_element(name, attributes = {}, namespaces = {})
|
|
147
|
+
super
|
|
148
|
+
case name
|
|
149
|
+
when "book"
|
|
150
|
+
@current_book = { id: attributes["id"],
|
|
151
|
+
category: attributes["category"] }
|
|
152
|
+
when "title", "author", "price"
|
|
153
|
+
@current_text = +"" # Use dup to create mutable string
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
def on_characters(text)
|
|
158
|
+
@current_text << text if @current_book && !@current_text.frozen?
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
def on_end_element(name)
|
|
162
|
+
case name
|
|
163
|
+
when "title"
|
|
164
|
+
@current_book[:title] = @current_text.strip if @current_book
|
|
165
|
+
when "author"
|
|
166
|
+
@current_book[:author] = @current_text.strip if @current_book
|
|
167
|
+
when "price"
|
|
168
|
+
@current_book[:price] = @current_text.strip.to_f if @current_book
|
|
169
|
+
when "book"
|
|
170
|
+
@books << @current_book if @current_book
|
|
171
|
+
@current_book = nil
|
|
172
|
+
end
|
|
173
|
+
super
|
|
174
|
+
end
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
handler = BookExtractor.new
|
|
178
|
+
context.sax_parse(xml, handler)
|
|
179
|
+
|
|
180
|
+
expect(handler.books.length).to eq(2)
|
|
181
|
+
|
|
182
|
+
expect(handler.books[0]).to include(
|
|
183
|
+
id: "1",
|
|
184
|
+
category: "programming",
|
|
185
|
+
title: "Ruby Programming",
|
|
186
|
+
author: "Jane Smith",
|
|
187
|
+
price: 29.99,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
expect(handler.books[1]).to include(
|
|
191
|
+
id: "2",
|
|
192
|
+
category: "fiction",
|
|
193
|
+
title: "The Great Novel",
|
|
194
|
+
author: "John Doe",
|
|
195
|
+
price: 19.99,
|
|
196
|
+
)
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
it "parses with block-based handler" do
|
|
200
|
+
books = []
|
|
201
|
+
current_book = nil
|
|
202
|
+
current_field = nil
|
|
203
|
+
current_text = ""
|
|
204
|
+
|
|
205
|
+
context.sax_parse(xml) do
|
|
206
|
+
start_element do |name, attrs|
|
|
207
|
+
case name
|
|
208
|
+
when "book"
|
|
209
|
+
current_book = { id: attrs["id"] }
|
|
210
|
+
when "title", "author", "price"
|
|
211
|
+
current_field = name
|
|
212
|
+
current_text = +"" # Use dup to create mutable string
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
characters do |text|
|
|
217
|
+
current_text << text if current_field && !current_text.frozen?
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
end_element do |name|
|
|
221
|
+
case name
|
|
222
|
+
when "title", "author"
|
|
223
|
+
if current_book
|
|
224
|
+
current_book[current_field.to_sym] =
|
|
225
|
+
current_text.strip
|
|
226
|
+
end
|
|
227
|
+
current_field = nil
|
|
228
|
+
when "price"
|
|
229
|
+
if current_book
|
|
230
|
+
current_book[current_field.to_sym] =
|
|
231
|
+
current_text.strip.to_f
|
|
232
|
+
end
|
|
233
|
+
current_field = nil
|
|
234
|
+
when "book"
|
|
235
|
+
books << current_book if current_book
|
|
236
|
+
current_book = nil
|
|
237
|
+
end
|
|
238
|
+
end
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
expect(books.length).to eq(2)
|
|
242
|
+
expect(books[0][:title]).to eq("Ruby Programming")
|
|
243
|
+
expect(books[1][:title]).to eq("The Great Novel")
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
it "handles errors gracefully" do
|
|
247
|
+
invalid_xml = "<book><title>Unclosed"
|
|
248
|
+
|
|
249
|
+
handler = Moxml::SAX::Handler.new
|
|
250
|
+
|
|
251
|
+
# Override error handler to catch it
|
|
252
|
+
def handler.on_error(error)
|
|
253
|
+
@error_caught = true
|
|
254
|
+
@error_message = error.message
|
|
255
|
+
end
|
|
256
|
+
|
|
257
|
+
def handler.error_caught?
|
|
258
|
+
@error_caught
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def handler.error_message
|
|
262
|
+
@error_message
|
|
263
|
+
end
|
|
264
|
+
|
|
265
|
+
context.sax_parse(invalid_xml, handler)
|
|
266
|
+
|
|
267
|
+
expect(handler.error_caught?).to be true
|
|
268
|
+
expect(handler.error_message).to match(/Premature end of data|unclosed token/i)
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
describe "SAX parsing with Ox adapter", adapter: :ox do
|
|
273
|
+
let(:context) { Moxml.new(:ox) }
|
|
274
|
+
|
|
275
|
+
it "parses with class-based handler" do
|
|
276
|
+
# Reuse BookExtractor from Nokogiri tests
|
|
277
|
+
class BookExtractorOx < Moxml::SAX::ElementHandler
|
|
278
|
+
attr_reader :books
|
|
279
|
+
|
|
280
|
+
def initialize
|
|
281
|
+
super
|
|
282
|
+
@books = []
|
|
283
|
+
@current_book = nil
|
|
284
|
+
@current_text = ""
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
def on_start_element(name, attributes = {}, namespaces = {})
|
|
288
|
+
super
|
|
289
|
+
case name
|
|
290
|
+
when "book"
|
|
291
|
+
@current_book = { id: attributes["id"],
|
|
292
|
+
category: attributes["category"] }
|
|
293
|
+
when "title", "author", "price"
|
|
294
|
+
@current_text = +""
|
|
295
|
+
end
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def on_characters(text)
|
|
299
|
+
@current_text << text if @current_book && !@current_text.frozen?
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def on_end_element(name)
|
|
303
|
+
case name
|
|
304
|
+
when "title"
|
|
305
|
+
@current_book[:title] = @current_text.strip if @current_book
|
|
306
|
+
when "author"
|
|
307
|
+
@current_book[:author] = @current_text.strip if @current_book
|
|
308
|
+
when "price"
|
|
309
|
+
@current_book[:price] = @current_text.strip.to_f if @current_book
|
|
310
|
+
when "book"
|
|
311
|
+
@books << @current_book if @current_book
|
|
312
|
+
@current_book = nil
|
|
313
|
+
end
|
|
314
|
+
super
|
|
315
|
+
end
|
|
316
|
+
end
|
|
317
|
+
|
|
318
|
+
handler = BookExtractorOx.new
|
|
319
|
+
context.sax_parse(xml, handler)
|
|
320
|
+
|
|
321
|
+
expect(handler.books.length).to eq(2)
|
|
322
|
+
|
|
323
|
+
expect(handler.books[0]).to include(
|
|
324
|
+
id: "1",
|
|
325
|
+
category: "programming",
|
|
326
|
+
title: "Ruby Programming",
|
|
327
|
+
author: "Jane Smith",
|
|
328
|
+
price: 29.99,
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
expect(handler.books[1]).to include(
|
|
332
|
+
id: "2",
|
|
333
|
+
category: "fiction",
|
|
334
|
+
title: "The Great Novel",
|
|
335
|
+
author: "John Doe",
|
|
336
|
+
price: 19.99,
|
|
337
|
+
)
|
|
338
|
+
end
|
|
339
|
+
|
|
340
|
+
it "parses with block-based handler" do
|
|
341
|
+
# Use a result collector that's accessible from block context
|
|
342
|
+
results = { books: [], current_book: nil, current_field: nil,
|
|
343
|
+
current_text: +"" }
|
|
344
|
+
|
|
345
|
+
context.sax_parse(xml) do
|
|
346
|
+
start_element do |name, attrs|
|
|
347
|
+
case name
|
|
348
|
+
when "book"
|
|
349
|
+
results[:current_book] = { id: attrs["id"] }
|
|
350
|
+
when "title", "author", "price"
|
|
351
|
+
results[:current_field] = name
|
|
352
|
+
results[:current_text] = +""
|
|
353
|
+
end
|
|
354
|
+
end
|
|
355
|
+
|
|
356
|
+
characters do |text|
|
|
357
|
+
if results[:current_field] && !results[:current_text].frozen?
|
|
358
|
+
results[:current_text] << text
|
|
359
|
+
end
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
end_element do |name|
|
|
363
|
+
case name
|
|
364
|
+
when "title", "author"
|
|
365
|
+
if results[:current_book] && results[:current_field]
|
|
366
|
+
results[:current_book][results[:current_field].to_sym] =
|
|
367
|
+
results[:current_text].strip
|
|
368
|
+
end
|
|
369
|
+
results[:current_field] = nil
|
|
370
|
+
when "price"
|
|
371
|
+
if results[:current_book] && results[:current_field]
|
|
372
|
+
results[:current_book][results[:current_field].to_sym] =
|
|
373
|
+
results[:current_text].strip.to_f
|
|
374
|
+
end
|
|
375
|
+
results[:current_field] = nil
|
|
376
|
+
when "book"
|
|
377
|
+
results[:books] << results[:current_book] if results[:current_book]
|
|
378
|
+
results[:current_book] = nil
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
end
|
|
382
|
+
|
|
383
|
+
expect(results[:books].length).to eq(2)
|
|
384
|
+
expect(results[:books][0][:title]).to eq("Ruby Programming")
|
|
385
|
+
expect(results[:books][1][:title]).to eq("The Great Novel")
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
it "handles errors gracefully" do
|
|
389
|
+
invalid_xml = "<book><title>Unclosed"
|
|
390
|
+
|
|
391
|
+
handler = Moxml::SAX::Handler.new
|
|
392
|
+
|
|
393
|
+
# Override error handler to catch it
|
|
394
|
+
def handler.on_error(error)
|
|
395
|
+
@error_caught = true
|
|
396
|
+
@error_message = error.message
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
def handler.error_caught?
|
|
400
|
+
@error_caught
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
def handler.error_message
|
|
404
|
+
@error_message
|
|
405
|
+
end
|
|
406
|
+
|
|
407
|
+
context.sax_parse(invalid_xml, handler)
|
|
408
|
+
|
|
409
|
+
expect(handler.error_caught?).to be true
|
|
410
|
+
expect(handler.error_message).to match(/invalid|unclosed|premature|mismatch|not closed/i)
|
|
411
|
+
end
|
|
412
|
+
|
|
413
|
+
it "documents CDATA limitation" do
|
|
414
|
+
# Ox SAX does not have separate CDATA events
|
|
415
|
+
# All CDATA content is delivered as text() events
|
|
416
|
+
skip "Ox SAX does not support separate CDATA events"
|
|
417
|
+
|
|
418
|
+
xml_with_cdata = "<root><![CDATA[special content]]></root>"
|
|
419
|
+
|
|
420
|
+
cdata_found = false
|
|
421
|
+
context.sax_parse(xml_with_cdata) do
|
|
422
|
+
cdata { |_text| cdata_found = true }
|
|
423
|
+
end
|
|
424
|
+
|
|
425
|
+
# This will fail because Ox doesn't support separate CDATA events
|
|
426
|
+
expect(cdata_found).to be true
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
it "documents comment limitation" do
|
|
430
|
+
# Ox SAX does not have separate comment events
|
|
431
|
+
skip "Ox SAX does not support separate comment events"
|
|
432
|
+
|
|
433
|
+
xml_with_comment = "<root><!-- comment --><data>content</data></root>"
|
|
434
|
+
|
|
435
|
+
comment_found = false
|
|
436
|
+
context.sax_parse(xml_with_comment) do
|
|
437
|
+
comment { |_text| comment_found = true }
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
# This will fail because Ox doesn't support comment events
|
|
441
|
+
expect(comment_found).to be true
|
|
442
|
+
end
|
|
443
|
+
|
|
444
|
+
it "documents processing instruction limitation" do
|
|
445
|
+
# Ox SAX does not have separate PI events
|
|
446
|
+
skip "Ox SAX does not support separate processing instruction events"
|
|
447
|
+
|
|
448
|
+
xml_with_pi = '<?xml-stylesheet type="text/xsl" href="style.xsl"?><root/>'
|
|
449
|
+
|
|
450
|
+
pi_found = false
|
|
451
|
+
context.sax_parse(xml_with_pi) do
|
|
452
|
+
processing_instruction { |_target, _data| pi_found = true }
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
# This will fail because Ox doesn't support PI events
|
|
456
|
+
expect(pi_found).to be true
|
|
457
|
+
end
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
describe "SAX parsing with REXML adapter", adapter: :rexml do
|
|
461
|
+
let(:context) { Moxml.new(:rexml) }
|
|
462
|
+
|
|
463
|
+
it "parses with class-based handler" do
|
|
464
|
+
# Reuse BookExtractor pattern
|
|
465
|
+
class BookExtractorRexml < Moxml::SAX::ElementHandler
|
|
466
|
+
attr_reader :books
|
|
467
|
+
|
|
468
|
+
def initialize
|
|
469
|
+
super
|
|
470
|
+
@books = []
|
|
471
|
+
@current_book = nil
|
|
472
|
+
@current_text = ""
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
def on_start_element(name, attributes = {}, namespaces = {})
|
|
476
|
+
super
|
|
477
|
+
case name
|
|
478
|
+
when "book"
|
|
479
|
+
@current_book = { id: attributes["id"],
|
|
480
|
+
category: attributes["category"] }
|
|
481
|
+
when "title", "author", "price"
|
|
482
|
+
@current_text = +""
|
|
483
|
+
end
|
|
484
|
+
end
|
|
485
|
+
|
|
486
|
+
def on_characters(text)
|
|
487
|
+
@current_text << text if @current_book && !@current_text.frozen?
|
|
488
|
+
end
|
|
489
|
+
|
|
490
|
+
def on_end_element(name)
|
|
491
|
+
case name
|
|
492
|
+
when "title"
|
|
493
|
+
@current_book[:title] = @current_text.strip if @current_book
|
|
494
|
+
when "author"
|
|
495
|
+
@current_book[:author] = @current_text.strip if @current_book
|
|
496
|
+
when "price"
|
|
497
|
+
@current_book[:price] = @current_text.strip.to_f if @current_book
|
|
498
|
+
when "book"
|
|
499
|
+
@books << @current_book if @current_book
|
|
500
|
+
@current_book = nil
|
|
501
|
+
end
|
|
502
|
+
super
|
|
503
|
+
end
|
|
504
|
+
end
|
|
505
|
+
|
|
506
|
+
handler = BookExtractorRexml.new
|
|
507
|
+
context.sax_parse(xml, handler)
|
|
508
|
+
|
|
509
|
+
expect(handler.books.length).to eq(2)
|
|
510
|
+
|
|
511
|
+
expect(handler.books[0]).to include(
|
|
512
|
+
id: "1",
|
|
513
|
+
category: "programming",
|
|
514
|
+
title: "Ruby Programming",
|
|
515
|
+
author: "Jane Smith",
|
|
516
|
+
price: 29.99,
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
expect(handler.books[1]).to include(
|
|
520
|
+
id: "2",
|
|
521
|
+
category: "fiction",
|
|
522
|
+
title: "The Great Novel",
|
|
523
|
+
author: "John Doe",
|
|
524
|
+
price: 19.99,
|
|
525
|
+
)
|
|
526
|
+
end
|
|
527
|
+
|
|
528
|
+
it "parses with block-based handler" do
|
|
529
|
+
results = { books: [], current_book: nil, current_field: nil,
|
|
530
|
+
current_text: +"" }
|
|
531
|
+
|
|
532
|
+
context.sax_parse(xml) do
|
|
533
|
+
start_element do |name, attrs|
|
|
534
|
+
case name
|
|
535
|
+
when "book"
|
|
536
|
+
results[:current_book] = { id: attrs["id"] }
|
|
537
|
+
when "title", "author", "price"
|
|
538
|
+
results[:current_field] = name
|
|
539
|
+
results[:current_text] = +""
|
|
540
|
+
end
|
|
541
|
+
end
|
|
542
|
+
|
|
543
|
+
characters do |text|
|
|
544
|
+
if results[:current_field] && !results[:current_text].frozen?
|
|
545
|
+
results[:current_text] << text
|
|
546
|
+
end
|
|
547
|
+
end
|
|
548
|
+
|
|
549
|
+
end_element do |name|
|
|
550
|
+
case name
|
|
551
|
+
when "title", "author"
|
|
552
|
+
if results[:current_book] && results[:current_field]
|
|
553
|
+
results[:current_book][results[:current_field].to_sym] =
|
|
554
|
+
results[:current_text].strip
|
|
555
|
+
end
|
|
556
|
+
results[:current_field] = nil
|
|
557
|
+
when "price"
|
|
558
|
+
if results[:current_book] && results[:current_field]
|
|
559
|
+
results[:current_book][results[:current_field].to_sym] =
|
|
560
|
+
results[:current_text].strip.to_f
|
|
561
|
+
end
|
|
562
|
+
results[:current_field] = nil
|
|
563
|
+
when "book"
|
|
564
|
+
results[:books] << results[:current_book] if results[:current_book]
|
|
565
|
+
results[:current_book] = nil
|
|
566
|
+
end
|
|
567
|
+
end
|
|
568
|
+
end
|
|
569
|
+
|
|
570
|
+
expect(results[:books].length).to eq(2)
|
|
571
|
+
expect(results[:books][0][:title]).to eq("Ruby Programming")
|
|
572
|
+
expect(results[:books][1][:title]).to eq("The Great Novel")
|
|
573
|
+
end
|
|
574
|
+
|
|
575
|
+
it "handles errors gracefully" do
|
|
576
|
+
invalid_xml = "<book><title>Unclosed"
|
|
577
|
+
|
|
578
|
+
handler = Moxml::SAX::Handler.new
|
|
579
|
+
|
|
580
|
+
# Override error handler to catch it
|
|
581
|
+
def handler.on_error(error)
|
|
582
|
+
@error_caught = true
|
|
583
|
+
@error_message = error.message
|
|
584
|
+
end
|
|
585
|
+
|
|
586
|
+
def handler.error_caught?
|
|
587
|
+
@error_caught
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
def handler.error_message
|
|
591
|
+
@error_message
|
|
592
|
+
end
|
|
593
|
+
|
|
594
|
+
context.sax_parse(invalid_xml, handler)
|
|
595
|
+
|
|
596
|
+
expect(handler.error_caught?).to be true
|
|
597
|
+
expect(handler.error_message).to match(/missing|end tag|unclosed/i)
|
|
598
|
+
end
|
|
599
|
+
end
|
|
600
|
+
|
|
601
|
+
describe "SAX parsing with Oga adapter", adapter: :oga do
|
|
602
|
+
let(:context) { Moxml.new(:oga) }
|
|
603
|
+
|
|
604
|
+
it "parses with class-based handler" do
|
|
605
|
+
# Reuse BookExtractor pattern
|
|
606
|
+
class BookExtractorOga < Moxml::SAX::ElementHandler
|
|
607
|
+
attr_reader :books
|
|
608
|
+
|
|
609
|
+
def initialize
|
|
610
|
+
super
|
|
611
|
+
@books = []
|
|
612
|
+
@current_book = nil
|
|
613
|
+
@current_text = ""
|
|
614
|
+
end
|
|
615
|
+
|
|
616
|
+
def on_start_element(name, attributes = {}, namespaces = {})
|
|
617
|
+
super
|
|
618
|
+
case name
|
|
619
|
+
when "book"
|
|
620
|
+
@current_book = { id: attributes["id"],
|
|
621
|
+
category: attributes["category"] }
|
|
622
|
+
when "title", "author", "price"
|
|
623
|
+
@current_text = +""
|
|
624
|
+
end
|
|
625
|
+
end
|
|
626
|
+
|
|
627
|
+
def on_characters(text)
|
|
628
|
+
@current_text << text if @current_book && !@current_text.frozen?
|
|
629
|
+
end
|
|
630
|
+
|
|
631
|
+
def on_end_element(name)
|
|
632
|
+
case name
|
|
633
|
+
when "title"
|
|
634
|
+
@current_book[:title] = @current_text.strip if @current_book
|
|
635
|
+
when "author"
|
|
636
|
+
@current_book[:author] = @current_text.strip if @current_book
|
|
637
|
+
when "price"
|
|
638
|
+
@current_book[:price] = @current_text.strip.to_f if @current_book
|
|
639
|
+
when "book"
|
|
640
|
+
@books << @current_book if @current_book
|
|
641
|
+
@current_book = nil
|
|
642
|
+
end
|
|
643
|
+
super
|
|
644
|
+
end
|
|
645
|
+
end
|
|
646
|
+
|
|
647
|
+
handler = BookExtractorOga.new
|
|
648
|
+
context.sax_parse(xml, handler)
|
|
649
|
+
|
|
650
|
+
expect(handler.books.length).to eq(2)
|
|
651
|
+
|
|
652
|
+
expect(handler.books[0]).to include(
|
|
653
|
+
id: "1",
|
|
654
|
+
category: "programming",
|
|
655
|
+
title: "Ruby Programming",
|
|
656
|
+
author: "Jane Smith",
|
|
657
|
+
price: 29.99,
|
|
658
|
+
)
|
|
659
|
+
|
|
660
|
+
expect(handler.books[1]).to include(
|
|
661
|
+
id: "2",
|
|
662
|
+
category: "fiction",
|
|
663
|
+
title: "The Great Novel",
|
|
664
|
+
author: "John Doe",
|
|
665
|
+
price: 19.99,
|
|
666
|
+
)
|
|
667
|
+
end
|
|
668
|
+
|
|
669
|
+
it "parses with block-based handler" do
|
|
670
|
+
results = { books: [], current_book: nil, current_field: nil,
|
|
671
|
+
current_text: +"" }
|
|
672
|
+
|
|
673
|
+
context.sax_parse(xml) do
|
|
674
|
+
start_element do |name, attrs|
|
|
675
|
+
case name
|
|
676
|
+
when "book"
|
|
677
|
+
results[:current_book] = { id: attrs["id"] }
|
|
678
|
+
when "title", "author", "price"
|
|
679
|
+
results[:current_field] = name
|
|
680
|
+
results[:current_text] = +""
|
|
681
|
+
end
|
|
682
|
+
end
|
|
683
|
+
|
|
684
|
+
characters do |text|
|
|
685
|
+
if results[:current_field] && !results[:current_text].frozen?
|
|
686
|
+
results[:current_text] << text
|
|
687
|
+
end
|
|
688
|
+
end
|
|
689
|
+
|
|
690
|
+
end_element do |name|
|
|
691
|
+
case name
|
|
692
|
+
when "title", "author"
|
|
693
|
+
if results[:current_book] && results[:current_field]
|
|
694
|
+
results[:current_book][results[:current_field].to_sym] =
|
|
695
|
+
results[:current_text].strip
|
|
696
|
+
end
|
|
697
|
+
results[:current_field] = nil
|
|
698
|
+
when "price"
|
|
699
|
+
if results[:current_book] && results[:current_field]
|
|
700
|
+
results[:current_book][results[:current_field].to_sym] =
|
|
701
|
+
results[:current_text].strip.to_f
|
|
702
|
+
end
|
|
703
|
+
results[:current_field] = nil
|
|
704
|
+
when "book"
|
|
705
|
+
results[:books] << results[:current_book] if results[:current_book]
|
|
706
|
+
results[:current_book] = nil
|
|
707
|
+
end
|
|
708
|
+
end
|
|
709
|
+
end
|
|
710
|
+
|
|
711
|
+
expect(results[:books].length).to eq(2)
|
|
712
|
+
expect(results[:books][0][:title]).to eq("Ruby Programming")
|
|
713
|
+
expect(results[:books][1][:title]).to eq("The Great Novel")
|
|
714
|
+
end
|
|
715
|
+
|
|
716
|
+
it "handles errors gracefully" do
|
|
717
|
+
skip "Oga SAX parser may be more lenient with malformed XML"
|
|
718
|
+
|
|
719
|
+
invalid_xml = "<book><title>Unclosed"
|
|
720
|
+
|
|
721
|
+
handler = Moxml::SAX::Handler.new
|
|
722
|
+
|
|
723
|
+
# Override error handler to catch it
|
|
724
|
+
def handler.on_error(error)
|
|
725
|
+
@error_caught = true
|
|
726
|
+
@error_message = error.message
|
|
727
|
+
end
|
|
728
|
+
|
|
729
|
+
def handler.error_caught?
|
|
730
|
+
@error_caught
|
|
731
|
+
end
|
|
732
|
+
|
|
733
|
+
def handler.error_message
|
|
734
|
+
@error_message
|
|
735
|
+
end
|
|
736
|
+
|
|
737
|
+
context.sax_parse(invalid_xml, handler)
|
|
738
|
+
|
|
739
|
+
expect(handler.error_caught?).to be true
|
|
740
|
+
expect(handler.error_message).to match(/unexpected|invalid|unclosed/i)
|
|
741
|
+
end
|
|
742
|
+
end
|
|
743
|
+
|
|
744
|
+
describe "SAX parsing with LibXML adapter", adapter: :libxml do
|
|
745
|
+
let(:context) { Moxml.new(:libxml) }
|
|
746
|
+
|
|
747
|
+
it "parses with class-based handler" do
|
|
748
|
+
# Reuse BookExtractor pattern
|
|
749
|
+
class BookExtractorLibxml < Moxml::SAX::ElementHandler
|
|
750
|
+
attr_reader :books
|
|
751
|
+
|
|
752
|
+
def initialize
|
|
753
|
+
super
|
|
754
|
+
@books = []
|
|
755
|
+
@current_book = nil
|
|
756
|
+
@current_text = ""
|
|
757
|
+
end
|
|
758
|
+
|
|
759
|
+
def on_start_element(name, attributes = {}, namespaces = {})
|
|
760
|
+
super
|
|
761
|
+
case name
|
|
762
|
+
when "book"
|
|
763
|
+
@current_book = { id: attributes["id"],
|
|
764
|
+
category: attributes["category"] }
|
|
765
|
+
when "title", "author", "price"
|
|
766
|
+
@current_text = +""
|
|
767
|
+
end
|
|
768
|
+
end
|
|
769
|
+
|
|
770
|
+
def on_characters(text)
|
|
771
|
+
@current_text << text if @current_book && !@current_text.frozen?
|
|
772
|
+
end
|
|
773
|
+
|
|
774
|
+
def on_end_element(name)
|
|
775
|
+
case name
|
|
776
|
+
when "title"
|
|
777
|
+
@current_book[:title] = @current_text.strip if @current_book
|
|
778
|
+
when "author"
|
|
779
|
+
@current_book[:author] = @current_text.strip if @current_book
|
|
780
|
+
when "price"
|
|
781
|
+
@current_book[:price] = @current_text.strip.to_f if @current_book
|
|
782
|
+
when "book"
|
|
783
|
+
@books << @current_book if @current_book
|
|
784
|
+
@current_book = nil
|
|
785
|
+
end
|
|
786
|
+
super
|
|
787
|
+
end
|
|
788
|
+
end
|
|
789
|
+
|
|
790
|
+
handler = BookExtractorLibxml.new
|
|
791
|
+
context.sax_parse(xml, handler)
|
|
792
|
+
|
|
793
|
+
expect(handler.books.length).to eq(2)
|
|
794
|
+
|
|
795
|
+
expect(handler.books[0]).to include(
|
|
796
|
+
id: "1",
|
|
797
|
+
category: "programming",
|
|
798
|
+
title: "Ruby Programming",
|
|
799
|
+
author: "Jane Smith",
|
|
800
|
+
price: 29.99,
|
|
801
|
+
)
|
|
802
|
+
|
|
803
|
+
expect(handler.books[1]).to include(
|
|
804
|
+
id: "2",
|
|
805
|
+
category: "fiction",
|
|
806
|
+
title: "The Great Novel",
|
|
807
|
+
author: "John Doe",
|
|
808
|
+
price: 19.99,
|
|
809
|
+
)
|
|
810
|
+
end
|
|
811
|
+
|
|
812
|
+
it "parses with block-based handler" do
|
|
813
|
+
results = { books: [], current_book: nil, current_field: nil,
|
|
814
|
+
current_text: +"" }
|
|
815
|
+
|
|
816
|
+
context.sax_parse(xml) do
|
|
817
|
+
start_element do |name, attrs|
|
|
818
|
+
case name
|
|
819
|
+
when "book"
|
|
820
|
+
results[:current_book] = { id: attrs["id"] }
|
|
821
|
+
when "title", "author", "price"
|
|
822
|
+
results[:current_field] = name
|
|
823
|
+
results[:current_text] = +""
|
|
824
|
+
end
|
|
825
|
+
end
|
|
826
|
+
|
|
827
|
+
characters do |text|
|
|
828
|
+
if results[:current_field] && !results[:current_text].frozen?
|
|
829
|
+
results[:current_text] << text
|
|
830
|
+
end
|
|
831
|
+
end
|
|
832
|
+
|
|
833
|
+
end_element do |name|
|
|
834
|
+
case name
|
|
835
|
+
when "title", "author"
|
|
836
|
+
if results[:current_book] && results[:current_field]
|
|
837
|
+
results[:current_book][results[:current_field].to_sym] =
|
|
838
|
+
results[:current_text].strip
|
|
839
|
+
end
|
|
840
|
+
results[:current_field] = nil
|
|
841
|
+
when "price"
|
|
842
|
+
if results[:current_book] && results[:current_field]
|
|
843
|
+
results[:current_book][results[:current_field].to_sym] =
|
|
844
|
+
results[:current_text].strip.to_f
|
|
845
|
+
end
|
|
846
|
+
results[:current_field] = nil
|
|
847
|
+
when "book"
|
|
848
|
+
results[:books] << results[:current_book] if results[:current_book]
|
|
849
|
+
results[:current_book] = nil
|
|
850
|
+
end
|
|
851
|
+
end
|
|
852
|
+
end
|
|
853
|
+
|
|
854
|
+
expect(results[:books].length).to eq(2)
|
|
855
|
+
expect(results[:books][0][:title]).to eq("Ruby Programming")
|
|
856
|
+
expect(results[:books][1][:title]).to eq("The Great Novel")
|
|
857
|
+
end
|
|
858
|
+
|
|
859
|
+
it "handles errors gracefully" do
|
|
860
|
+
invalid_xml = "<book><title>Unclosed"
|
|
861
|
+
|
|
862
|
+
handler = Moxml::SAX::Handler.new
|
|
863
|
+
|
|
864
|
+
# Override error handler to catch it
|
|
865
|
+
def handler.on_error(error)
|
|
866
|
+
@error_caught = true
|
|
867
|
+
@error_message = error.message
|
|
868
|
+
end
|
|
869
|
+
|
|
870
|
+
def handler.error_caught?
|
|
871
|
+
@error_caught
|
|
872
|
+
end
|
|
873
|
+
|
|
874
|
+
def handler.error_message
|
|
875
|
+
@error_message
|
|
876
|
+
end
|
|
877
|
+
|
|
878
|
+
context.sax_parse(invalid_xml, handler)
|
|
879
|
+
|
|
880
|
+
expect(handler.error_caught?).to be true
|
|
881
|
+
expect(handler.error_message).to match(/error|missing|unclosed|premature/i)
|
|
882
|
+
end
|
|
883
|
+
end
|
|
884
|
+
|
|
885
|
+
describe "SAX parsing with HeadedOx adapter", adapter: :headed_ox do
|
|
886
|
+
let(:context) { Moxml.new(:headed_ox) }
|
|
887
|
+
|
|
888
|
+
it "parses with class-based handler" do
|
|
889
|
+
# Reuse BookExtractor pattern
|
|
890
|
+
class BookExtractorHeadedOx < Moxml::SAX::ElementHandler
|
|
891
|
+
attr_reader :books
|
|
892
|
+
|
|
893
|
+
def initialize
|
|
894
|
+
super
|
|
895
|
+
@books = []
|
|
896
|
+
@current_book = nil
|
|
897
|
+
@current_text = ""
|
|
898
|
+
end
|
|
899
|
+
|
|
900
|
+
def on_start_element(name, attributes = {}, namespaces = {})
|
|
901
|
+
super
|
|
902
|
+
case name
|
|
903
|
+
when "book"
|
|
904
|
+
@current_book = { id: attributes["id"],
|
|
905
|
+
category: attributes["category"] }
|
|
906
|
+
when "title", "author", "price"
|
|
907
|
+
@current_text = +""
|
|
908
|
+
end
|
|
909
|
+
end
|
|
910
|
+
|
|
911
|
+
def on_characters(text)
|
|
912
|
+
@current_text << text if @current_book && !@current_text.frozen?
|
|
913
|
+
end
|
|
914
|
+
|
|
915
|
+
def on_end_element(name)
|
|
916
|
+
case name
|
|
917
|
+
when "title"
|
|
918
|
+
@current_book[:title] = @current_text.strip if @current_book
|
|
919
|
+
when "author"
|
|
920
|
+
@current_book[:author] = @current_text.strip if @current_book
|
|
921
|
+
when "price"
|
|
922
|
+
@current_book[:price] = @current_text.strip.to_f if @current_book
|
|
923
|
+
when "book"
|
|
924
|
+
@books << @current_book if @current_book
|
|
925
|
+
@current_book = nil
|
|
926
|
+
end
|
|
927
|
+
super
|
|
928
|
+
end
|
|
929
|
+
end
|
|
930
|
+
|
|
931
|
+
handler = BookExtractorHeadedOx.new
|
|
932
|
+
context.sax_parse(xml, handler)
|
|
933
|
+
|
|
934
|
+
expect(handler.books.length).to eq(2)
|
|
935
|
+
|
|
936
|
+
expect(handler.books[0]).to include(
|
|
937
|
+
id: "1",
|
|
938
|
+
category: "programming",
|
|
939
|
+
title: "Ruby Programming",
|
|
940
|
+
author: "Jane Smith",
|
|
941
|
+
price: 29.99,
|
|
942
|
+
)
|
|
943
|
+
|
|
944
|
+
expect(handler.books[1]).to include(
|
|
945
|
+
id: "2",
|
|
946
|
+
category: "fiction",
|
|
947
|
+
title: "The Great Novel",
|
|
948
|
+
author: "John Doe",
|
|
949
|
+
price: 19.99,
|
|
950
|
+
)
|
|
951
|
+
end
|
|
952
|
+
|
|
953
|
+
it "parses with block-based handler" do
|
|
954
|
+
results = { books: [], current_book: nil, current_field: nil,
|
|
955
|
+
current_text: +"" }
|
|
956
|
+
|
|
957
|
+
context.sax_parse(xml) do
|
|
958
|
+
start_element do |name, attrs|
|
|
959
|
+
case name
|
|
960
|
+
when "book"
|
|
961
|
+
results[:current_book] = { id: attrs["id"] }
|
|
962
|
+
when "title", "author", "price"
|
|
963
|
+
results[:current_field] = name
|
|
964
|
+
results[:current_text] = +""
|
|
965
|
+
end
|
|
966
|
+
end
|
|
967
|
+
|
|
968
|
+
characters do |text|
|
|
969
|
+
if results[:current_field] && !results[:current_text].frozen?
|
|
970
|
+
results[:current_text] << text
|
|
971
|
+
end
|
|
972
|
+
end
|
|
973
|
+
|
|
974
|
+
end_element do |name|
|
|
975
|
+
case name
|
|
976
|
+
when "title", "author"
|
|
977
|
+
if results[:current_book] && results[:current_field]
|
|
978
|
+
results[:current_book][results[:current_field].to_sym] =
|
|
979
|
+
results[:current_text].strip
|
|
980
|
+
end
|
|
981
|
+
results[:current_field] = nil
|
|
982
|
+
when "price"
|
|
983
|
+
if results[:current_book] && results[:current_field]
|
|
984
|
+
results[:current_book][results[:current_field].to_sym] =
|
|
985
|
+
results[:current_text].strip.to_f
|
|
986
|
+
end
|
|
987
|
+
results[:current_field] = nil
|
|
988
|
+
when "book"
|
|
989
|
+
results[:books] << results[:current_book] if results[:current_book]
|
|
990
|
+
results[:current_book] = nil
|
|
991
|
+
end
|
|
992
|
+
end
|
|
993
|
+
end
|
|
994
|
+
|
|
995
|
+
expect(results[:books].length).to eq(2)
|
|
996
|
+
expect(results[:books][0][:title]).to eq("Ruby Programming")
|
|
997
|
+
expect(results[:books][1][:title]).to eq("The Great Novel")
|
|
998
|
+
end
|
|
999
|
+
|
|
1000
|
+
it "handles errors gracefully" do
|
|
1001
|
+
invalid_xml = "<book><title>Unclosed"
|
|
1002
|
+
|
|
1003
|
+
handler = Moxml::SAX::Handler.new
|
|
1004
|
+
|
|
1005
|
+
# Override error handler to catch it
|
|
1006
|
+
def handler.on_error(error)
|
|
1007
|
+
@error_caught = true
|
|
1008
|
+
@error_message = error.message
|
|
1009
|
+
end
|
|
1010
|
+
|
|
1011
|
+
def handler.error_caught?
|
|
1012
|
+
@error_caught
|
|
1013
|
+
end
|
|
1014
|
+
|
|
1015
|
+
def handler.error_message
|
|
1016
|
+
@error_message
|
|
1017
|
+
end
|
|
1018
|
+
|
|
1019
|
+
context.sax_parse(invalid_xml, handler)
|
|
1020
|
+
|
|
1021
|
+
expect(handler.error_caught?).to be true
|
|
1022
|
+
expect(handler.error_message).to match(/invalid|unclosed|premature|mismatch|not closed/i)
|
|
1023
|
+
end
|
|
1024
|
+
|
|
1025
|
+
it "documents CDATA limitation (inherited from Ox)" do
|
|
1026
|
+
# HeadedOx inherits Ox's SAX implementation, which does not have separate CDATA events
|
|
1027
|
+
skip "HeadedOx SAX (inherited from Ox) does not support separate CDATA events"
|
|
1028
|
+
|
|
1029
|
+
xml_with_cdata = "<root><![CDATA[special content]]></root>"
|
|
1030
|
+
|
|
1031
|
+
cdata_found = false
|
|
1032
|
+
context.sax_parse(xml_with_cdata) do
|
|
1033
|
+
cdata { |_text| cdata_found = true }
|
|
1034
|
+
end
|
|
1035
|
+
|
|
1036
|
+
expect(cdata_found).to be true
|
|
1037
|
+
end
|
|
1038
|
+
|
|
1039
|
+
it "documents comment limitation (inherited from Ox)" do
|
|
1040
|
+
# HeadedOx inherits Ox's SAX implementation, which does not have separate comment events
|
|
1041
|
+
skip "HeadedOx SAX (inherited from Ox) does not support separate comment events"
|
|
1042
|
+
|
|
1043
|
+
xml_with_comment = "<root><!-- comment --><data>content</data></root>"
|
|
1044
|
+
|
|
1045
|
+
comment_found = false
|
|
1046
|
+
context.sax_parse(xml_with_comment) do
|
|
1047
|
+
comment { |_text| comment_found = true }
|
|
1048
|
+
end
|
|
1049
|
+
|
|
1050
|
+
expect(comment_found).to be true
|
|
1051
|
+
end
|
|
1052
|
+
|
|
1053
|
+
it "documents processing instruction limitation (inherited from Ox)" do
|
|
1054
|
+
# HeadedOx inherits Ox's SAX implementation, which does not have separate PI events
|
|
1055
|
+
skip "HeadedOx SAX (inherited from Ox) does not support separate processing instruction events"
|
|
1056
|
+
|
|
1057
|
+
xml_with_pi = '<?xml-stylesheet type="text/xsl" href="style.xsl"?><root/>'
|
|
1058
|
+
|
|
1059
|
+
pi_found = false
|
|
1060
|
+
context.sax_parse(xml_with_pi) do
|
|
1061
|
+
processing_instruction { |_target, _data| pi_found = true }
|
|
1062
|
+
end
|
|
1063
|
+
|
|
1064
|
+
expect(pi_found).to be true
|
|
1065
|
+
end
|
|
1066
|
+
end
|
|
1067
|
+
end
|