multisax 0.0.6 → 0.0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Xerces.md +27 -0
- data/lib/multisax.rb +63 -21
- data/spec/multisax_spec.rb +34 -12
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 89ef3e32c76021c3d85738ec9d08e6897dd8ef8a
|
4
|
+
data.tar.gz: 5e76e6c82462ead3333bfec4ccb991c77e00ded0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7fc3c659c2bbd667160ed47a7e479469f065519ee702dcda1c493d72302d5ba8f0bd9426a6d9050e5c8d07289be9c686a1d672f9a6bfc7597000353d5252f125
|
7
|
+
data.tar.gz: 36fb192b9f84bceb2b095d5eb2fd2ccf147e14b5204aef3fabfb9a31016da799b1c9dbf4a8b349b76d3ad47f2a40a04aa6fadb7f59f82b7d49142e4f440f1f1b
|
data/Xerces.md
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
## Xerces-Ruby support
|
2
|
+
|
3
|
+
From 0.0.6.1, multisax supports Xerces-Ruby unofficially. Official support won't be provided unless it is relicensed under permissive(BSD/MIT) or weak-copyleft(MPL) licenses.
|
4
|
+
|
5
|
+
Due to license issue, in multisax, you need to `MultiSAX::Sax.open(:xerces)` or `MultiSAX::Sax.open(*MultiSAX::XML_PARSERS_INSTALLABLE+[:xerces]+MultiSAX::XML_PARSERS_DEFAULT)` explicitly.
|
6
|
+
|
7
|
+
Here is the instruction to activate Xerces-Ruby.
|
8
|
+
|
9
|
+
----
|
10
|
+
- Install Xerces-C 2
|
11
|
+
- OSX (MacPorts): `sudo port install xercesc`
|
12
|
+
- Debian: `sudo apt-get install libxerces-c2-dev`
|
13
|
+
- Download and extract [xerces-ruby](http://www.geocities.co.jp/SiliconValley-SanJose/9156/xerces-ruby.html)
|
14
|
+
- Download [xerces-ruby.patch](https://gist.github.com/cielavenir/8401975)
|
15
|
+
- Convert SAXParse.cpp's charcode
|
16
|
+
- `nkf -i -w /Users/tyamada/Downloads/xerces-ruby-2/SAXParse.cpp`
|
17
|
+
- Apply patch
|
18
|
+
- `patch < xerces-ruby.patch`
|
19
|
+
- Run extconf.rb
|
20
|
+
- `ruby extconf.rb --with-opt-dir=/opt/local`
|
21
|
+
- `make`
|
22
|
+
- Install libraries
|
23
|
+
- `make install`
|
24
|
+
- `sudo cp Xerces.rb /Library/Ruby/Site/2.0.0/`
|
25
|
+
- Now you can `require 'Xerces'` in your Ruby script.
|
26
|
+
|
27
|
+
- Please note currently Xerces-Ruby cannot handle cdata. I (ciel) am NOT going to fix it.
|
data/lib/multisax.rb
CHANGED
@@ -8,7 +8,16 @@
|
|
8
8
|
|
9
9
|
module MultiSAX
|
10
10
|
# VERSION string
|
11
|
-
VERSION='0.0.6'
|
11
|
+
VERSION='0.0.6.1'
|
12
|
+
|
13
|
+
# Default XML parsers
|
14
|
+
XML_PARSERS_DEFAULT=[:rexmlstream,:rexmlsax2]
|
15
|
+
|
16
|
+
# Installable XML parsers
|
17
|
+
XML_PARSERS_INSTALLABLE=[:ox,:libxml,:xmlparser,:nokogiri,:oga]
|
18
|
+
|
19
|
+
# Installable HTML parsers
|
20
|
+
HTML_PARSERS_INSTALLABLE=[:oxhtml,:nokogirihtml,:ogahtml]
|
12
21
|
|
13
22
|
# The class to handle XML libraries.
|
14
23
|
class SAX
|
@@ -27,8 +36,8 @@ module MultiSAX
|
|
27
36
|
# If multiple selected, MultiSAX will try the libraries one by one and use the first usable one.
|
28
37
|
def open(*list)
|
29
38
|
return @parser if @parser
|
30
|
-
list=
|
31
|
-
list=
|
39
|
+
list=XML_PARSERS_INSTALLABLE+XML_PARSERS_DEFAULT if list.empty?||list==[:XML]
|
40
|
+
list=HTML_PARSERS_INSTALLABLE if list==[:HTML]
|
32
41
|
list.each{|e_module|
|
33
42
|
case e_module
|
34
43
|
when :ox,:oxhtml
|
@@ -115,6 +124,36 @@ module MultiSAX
|
|
115
124
|
def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
|
116
125
|
}
|
117
126
|
break
|
127
|
+
when :xmlparser
|
128
|
+
begin
|
129
|
+
require 'xml/saxdriver'
|
130
|
+
rescue LoadError;next end
|
131
|
+
@parser=e_module
|
132
|
+
@saxhelper=Class.new(::XML::Parser){
|
133
|
+
def __init__(obj)
|
134
|
+
@obj=obj
|
135
|
+
@cdata=false
|
136
|
+
self
|
137
|
+
end
|
138
|
+
def startElement(tag,attrs) @obj.sax_tag_start(tag,attrs) end
|
139
|
+
def endElement(tag) @obj.sax_tag_end(tag) end
|
140
|
+
def comment(txt) @obj.sax_comment(txt) end
|
141
|
+
def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
|
142
|
+
def character(txt)
|
143
|
+
if @cdata
|
144
|
+
@obj.sax_cdata(txt)
|
145
|
+
else
|
146
|
+
@obj.sax_text(txt)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
def startCdata
|
150
|
+
@cdata=true
|
151
|
+
end
|
152
|
+
def endCdata
|
153
|
+
@cdata=false
|
154
|
+
end
|
155
|
+
}
|
156
|
+
break
|
118
157
|
when :oga,:ogahtml
|
119
158
|
next if RUBY_VERSION<'1.9'
|
120
159
|
begin
|
@@ -144,34 +183,35 @@ module MultiSAX
|
|
144
183
|
end
|
145
184
|
}
|
146
185
|
break
|
147
|
-
when :
|
186
|
+
when :xerces
|
148
187
|
begin
|
149
|
-
require '
|
188
|
+
require 'Xerces'
|
150
189
|
rescue LoadError;next end
|
151
190
|
@parser=e_module
|
152
|
-
@saxhelper=Class.new(::
|
191
|
+
@saxhelper=Class.new(::XercesR::DocumentHandler){
|
153
192
|
def __init__(obj)
|
154
193
|
@obj=obj
|
155
|
-
|
194
|
+
#@cdata=false
|
156
195
|
self
|
157
196
|
end
|
158
|
-
def startElement(tag,attrs) @obj.sax_tag_start(tag,attrs) end
|
197
|
+
def startElement(tag,attrs) @obj.sax_tag_start(tag,Hash[*attrs.getLength.times.map{|i|[attrs.getName(i),attrs.getValue(i)]}.flatten(1)]) end
|
159
198
|
def endElement(tag) @obj.sax_tag_end(tag) end
|
160
199
|
def comment(txt) @obj.sax_comment(txt) end
|
161
|
-
def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
|
162
|
-
def
|
163
|
-
|
200
|
+
#def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
|
201
|
+
#def notationDecl(name, publicId, systemId) end
|
202
|
+
def characters(txt,len)
|
203
|
+
#if @cdata
|
164
204
|
@obj.sax_cdata(txt)
|
165
|
-
else
|
205
|
+
#else
|
166
206
|
@obj.sax_text(txt)
|
167
|
-
end
|
168
|
-
end
|
169
|
-
def startCdata
|
170
|
-
@cdata=true
|
171
|
-
end
|
172
|
-
def endCdata
|
173
|
-
@cdata=false
|
207
|
+
#end
|
174
208
|
end
|
209
|
+
#def startCdata
|
210
|
+
# @cdata=true
|
211
|
+
#end
|
212
|
+
#def endCdata
|
213
|
+
# @cdata=false
|
214
|
+
#end
|
175
215
|
}
|
176
216
|
break
|
177
217
|
when :rexmlstream
|
@@ -242,9 +282,10 @@ module MultiSAX
|
|
242
282
|
when :libxml then parser=LibXML::XML::SaxParser.string(source);parser.callbacks=saxhelper;parser.parse
|
243
283
|
when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source)
|
244
284
|
when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source)
|
285
|
+
when :xmlparser then saxhelper.parse(source)
|
245
286
|
when :oga then parser=Oga::XML::SaxParser.new(saxhelper,source);parser.parse
|
246
287
|
when :ogahtml then parser=Oga::HTML::SaxParser.new(saxhelper,source);parser.parse
|
247
|
-
when :
|
288
|
+
when :xerces then parser=XercesR::SAXParser.new;parser.setDocumentHandler(saxhelper);parser.parsebuf(source)
|
248
289
|
when :rexmlstream then REXML::Parsers::StreamParser.new(source,saxhelper).parse
|
249
290
|
when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse
|
250
291
|
end
|
@@ -255,9 +296,10 @@ module MultiSAX
|
|
255
296
|
when :libxml then parser=LibXML::XML::SaxParser.io(source);parser.callbacks=saxhelper;parser.parse
|
256
297
|
when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source)
|
257
298
|
when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source.read) # fixme: nokogirihtml IO doesn't allow errors.
|
299
|
+
when :xmlparser then saxhelper.parse(source)
|
258
300
|
when :oga then parser=Oga::XML::SaxParser.new(saxhelper,source);parser.parse
|
259
301
|
when :ogahtml then parser=Oga::HTML::SaxParser.new(saxhelper,source);parser.parse
|
260
|
-
when :
|
302
|
+
when :xerces then parser=XercesR::SAXParser.new;parser.setDocumentHandler(saxhelper);parser.parsebuf(source.read)
|
261
303
|
when :rexmlstream then REXML::Parsers::StreamParser.new(source,saxhelper).parse
|
262
304
|
when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse
|
263
305
|
end
|
data/spec/multisax_spec.rb
CHANGED
@@ -87,6 +87,17 @@ describe "[XML] MultiSAX::Sax.parse(String)" do
|
|
87
87
|
listener.attrib.should eq 'foo'
|
88
88
|
listener.xmlencoding.should eq 'UTF-8'
|
89
89
|
end
|
90
|
+
it "uses :xmlparser" do
|
91
|
+
pending 'xmlparser is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
|
92
|
+
pending 'xmlparser is not supported by rubinius' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='rbx'
|
93
|
+
MultiSAX::Sax.reset
|
94
|
+
MultiSAX::Sax.open(:xmlparser)
|
95
|
+
MultiSAX::Sax.parser.should eq :xmlparser
|
96
|
+
listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
|
97
|
+
listener.result.should eq xml_answer
|
98
|
+
listener.attrib.should eq 'foo'
|
99
|
+
listener.xmlencoding.should eq 'UTF-8'
|
100
|
+
end
|
90
101
|
it "uses :oga" do
|
91
102
|
pending 'oga requires Ruby >=1.9.3' if RUBY_VERSION<'1.9.3'
|
92
103
|
MultiSAX::Sax.reset
|
@@ -97,16 +108,16 @@ describe "[XML] MultiSAX::Sax.parse(String)" do
|
|
97
108
|
listener.attrib.should eq 'foo'
|
98
109
|
listener.xmlencoding.should eq 'UTF-8'
|
99
110
|
end
|
100
|
-
it "uses :
|
101
|
-
pending '
|
102
|
-
pending '
|
111
|
+
it "uses :xerces" do
|
112
|
+
pending 'xerces is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
|
113
|
+
pending 'xerces will not be supported officially'
|
103
114
|
MultiSAX::Sax.reset
|
104
|
-
MultiSAX::Sax.open(:
|
105
|
-
MultiSAX::Sax.parser.should eq :
|
115
|
+
MultiSAX::Sax.open(:xerces)
|
116
|
+
MultiSAX::Sax.parser.should eq :xerces
|
106
117
|
listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
|
107
118
|
listener.result.should eq xml_answer
|
108
119
|
listener.attrib.should eq 'foo'
|
109
|
-
listener.xmlencoding.should eq 'UTF-8'
|
120
|
+
#listener.xmlencoding.should eq 'UTF-8'
|
110
121
|
end
|
111
122
|
end
|
112
123
|
|
@@ -158,6 +169,17 @@ describe "[XML] MultiSAX::Sax.parse(IO)" do
|
|
158
169
|
listener.attrib.should eq 'foo'
|
159
170
|
listener.xmlencoding.should eq 'UTF-8'
|
160
171
|
end
|
172
|
+
it "uses :xmlparser" do
|
173
|
+
pending 'xmlparser is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
|
174
|
+
pending 'xmlparser is not supported by rubinius' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='rbx'
|
175
|
+
MultiSAX::Sax.reset
|
176
|
+
MultiSAX::Sax.open(:xmlparser)
|
177
|
+
MultiSAX::Sax.parser.should eq :xmlparser
|
178
|
+
listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
|
179
|
+
listener.result.should eq xml_answer
|
180
|
+
listener.attrib.should eq 'foo'
|
181
|
+
listener.xmlencoding.should eq 'UTF-8'
|
182
|
+
end
|
161
183
|
it "uses :oga" do
|
162
184
|
pending 'oga requires Ruby >=1.9.3' if RUBY_VERSION<'1.9.3'
|
163
185
|
MultiSAX::Sax.reset
|
@@ -168,16 +190,16 @@ describe "[XML] MultiSAX::Sax.parse(IO)" do
|
|
168
190
|
listener.attrib.should eq 'foo'
|
169
191
|
listener.xmlencoding.should eq 'UTF-8'
|
170
192
|
end
|
171
|
-
it "uses :
|
172
|
-
pending '
|
173
|
-
pending '
|
193
|
+
it "uses :xerces" do
|
194
|
+
pending 'xerces is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
|
195
|
+
pending 'xerces will not be supported officially'
|
174
196
|
MultiSAX::Sax.reset
|
175
|
-
MultiSAX::Sax.open(:
|
176
|
-
MultiSAX::Sax.parser.should eq :
|
197
|
+
MultiSAX::Sax.open(:xerces)
|
198
|
+
MultiSAX::Sax.parser.should eq :xerces
|
177
199
|
listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
|
178
200
|
listener.result.should eq xml_answer
|
179
201
|
listener.attrib.should eq 'foo'
|
180
|
-
listener.xmlencoding.should eq 'UTF-8'
|
202
|
+
#listener.xmlencoding.should eq 'UTF-8'
|
181
203
|
end
|
182
204
|
end
|
183
205
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: multisax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.6
|
4
|
+
version: 0.0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- cielavenir
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -67,6 +67,7 @@ files:
|
|
67
67
|
- LICENSE.txt
|
68
68
|
- README.md
|
69
69
|
- Rakefile
|
70
|
+
- Xerces.md
|
70
71
|
- lib/multisax.rb
|
71
72
|
- multisax.gemspec
|
72
73
|
- spec/multisax_spec.rb
|