multisax 0.0.6 → 0.0.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Xerces.md +27 -0
- data/lib/multisax.rb +63 -21
- data/spec/multisax_spec.rb +34 -12
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 89ef3e32c76021c3d85738ec9d08e6897dd8ef8a
|
4
|
+
data.tar.gz: 5e76e6c82462ead3333bfec4ccb991c77e00ded0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7fc3c659c2bbd667160ed47a7e479469f065519ee702dcda1c493d72302d5ba8f0bd9426a6d9050e5c8d07289be9c686a1d672f9a6bfc7597000353d5252f125
|
7
|
+
data.tar.gz: 36fb192b9f84bceb2b095d5eb2fd2ccf147e14b5204aef3fabfb9a31016da799b1c9dbf4a8b349b76d3ad47f2a40a04aa6fadb7f59f82b7d49142e4f440f1f1b
|
data/Xerces.md
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
## Xerces-Ruby support
|
2
|
+
|
3
|
+
From 0.0.6.1, multisax supports Xerces-Ruby unofficially. Official support won't be provided unless it is relicensed under permissive(BSD/MIT) or weak-copyleft(MPL) licenses.
|
4
|
+
|
5
|
+
Due to license issue, in multisax, you need to `MultiSAX::Sax.open(:xerces)` or `MultiSAX::Sax.open(*MultiSAX::XML_PARSERS_INSTALLABLE+[:xerces]+MultiSAX::XML_PARSERS_DEFAULT)` explicitly.
|
6
|
+
|
7
|
+
Here is the instruction to activate Xerces-Ruby.
|
8
|
+
|
9
|
+
----
|
10
|
+
- Install Xerces-C 2
|
11
|
+
- OSX (MacPorts): `sudo port install xercesc`
|
12
|
+
- Debian: `sudo apt-get install libxerces-c2-dev`
|
13
|
+
- Download and extract [xerces-ruby](http://www.geocities.co.jp/SiliconValley-SanJose/9156/xerces-ruby.html)
|
14
|
+
- Download [xerces-ruby.patch](https://gist.github.com/cielavenir/8401975)
|
15
|
+
- Convert SAXParse.cpp's charcode
|
16
|
+
- `nkf -i -w /Users/tyamada/Downloads/xerces-ruby-2/SAXParse.cpp`
|
17
|
+
- Apply patch
|
18
|
+
- `patch < xerces-ruby.patch`
|
19
|
+
- Run extconf.rb
|
20
|
+
- `ruby extconf.rb --with-opt-dir=/opt/local`
|
21
|
+
- `make`
|
22
|
+
- Install libraries
|
23
|
+
- `make install`
|
24
|
+
- `sudo cp Xerces.rb /Library/Ruby/Site/2.0.0/`
|
25
|
+
- Now you can `require 'Xerces'` in your Ruby script.
|
26
|
+
|
27
|
+
- Please note currently Xerces-Ruby cannot handle cdata. I (ciel) am NOT going to fix it.
|
data/lib/multisax.rb
CHANGED
@@ -8,7 +8,16 @@
|
|
8
8
|
|
9
9
|
module MultiSAX
|
10
10
|
# VERSION string
|
11
|
-
VERSION='0.0.6'
|
11
|
+
VERSION='0.0.6.1'
|
12
|
+
|
13
|
+
# Default XML parsers
|
14
|
+
XML_PARSERS_DEFAULT=[:rexmlstream,:rexmlsax2]
|
15
|
+
|
16
|
+
# Installable XML parsers
|
17
|
+
XML_PARSERS_INSTALLABLE=[:ox,:libxml,:xmlparser,:nokogiri,:oga]
|
18
|
+
|
19
|
+
# Installable HTML parsers
|
20
|
+
HTML_PARSERS_INSTALLABLE=[:oxhtml,:nokogirihtml,:ogahtml]
|
12
21
|
|
13
22
|
# The class to handle XML libraries.
|
14
23
|
class SAX
|
@@ -27,8 +36,8 @@ module MultiSAX
|
|
27
36
|
# If multiple selected, MultiSAX will try the libraries one by one and use the first usable one.
|
28
37
|
def open(*list)
|
29
38
|
return @parser if @parser
|
30
|
-
list=
|
31
|
-
list=
|
39
|
+
list=XML_PARSERS_INSTALLABLE+XML_PARSERS_DEFAULT if list.empty?||list==[:XML]
|
40
|
+
list=HTML_PARSERS_INSTALLABLE if list==[:HTML]
|
32
41
|
list.each{|e_module|
|
33
42
|
case e_module
|
34
43
|
when :ox,:oxhtml
|
@@ -115,6 +124,36 @@ module MultiSAX
|
|
115
124
|
def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
|
116
125
|
}
|
117
126
|
break
|
127
|
+
when :xmlparser
|
128
|
+
begin
|
129
|
+
require 'xml/saxdriver'
|
130
|
+
rescue LoadError;next end
|
131
|
+
@parser=e_module
|
132
|
+
@saxhelper=Class.new(::XML::Parser){
|
133
|
+
def __init__(obj)
|
134
|
+
@obj=obj
|
135
|
+
@cdata=false
|
136
|
+
self
|
137
|
+
end
|
138
|
+
def startElement(tag,attrs) @obj.sax_tag_start(tag,attrs) end
|
139
|
+
def endElement(tag) @obj.sax_tag_end(tag) end
|
140
|
+
def comment(txt) @obj.sax_comment(txt) end
|
141
|
+
def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
|
142
|
+
def character(txt)
|
143
|
+
if @cdata
|
144
|
+
@obj.sax_cdata(txt)
|
145
|
+
else
|
146
|
+
@obj.sax_text(txt)
|
147
|
+
end
|
148
|
+
end
|
149
|
+
def startCdata
|
150
|
+
@cdata=true
|
151
|
+
end
|
152
|
+
def endCdata
|
153
|
+
@cdata=false
|
154
|
+
end
|
155
|
+
}
|
156
|
+
break
|
118
157
|
when :oga,:ogahtml
|
119
158
|
next if RUBY_VERSION<'1.9'
|
120
159
|
begin
|
@@ -144,34 +183,35 @@ module MultiSAX
|
|
144
183
|
end
|
145
184
|
}
|
146
185
|
break
|
147
|
-
when :
|
186
|
+
when :xerces
|
148
187
|
begin
|
149
|
-
require '
|
188
|
+
require 'Xerces'
|
150
189
|
rescue LoadError;next end
|
151
190
|
@parser=e_module
|
152
|
-
@saxhelper=Class.new(::
|
191
|
+
@saxhelper=Class.new(::XercesR::DocumentHandler){
|
153
192
|
def __init__(obj)
|
154
193
|
@obj=obj
|
155
|
-
|
194
|
+
#@cdata=false
|
156
195
|
self
|
157
196
|
end
|
158
|
-
def startElement(tag,attrs) @obj.sax_tag_start(tag,attrs) end
|
197
|
+
def startElement(tag,attrs) @obj.sax_tag_start(tag,Hash[*attrs.getLength.times.map{|i|[attrs.getName(i),attrs.getValue(i)]}.flatten(1)]) end
|
159
198
|
def endElement(tag) @obj.sax_tag_end(tag) end
|
160
199
|
def comment(txt) @obj.sax_comment(txt) end
|
161
|
-
def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
|
162
|
-
def
|
163
|
-
|
200
|
+
#def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
|
201
|
+
#def notationDecl(name, publicId, systemId) end
|
202
|
+
def characters(txt,len)
|
203
|
+
#if @cdata
|
164
204
|
@obj.sax_cdata(txt)
|
165
|
-
else
|
205
|
+
#else
|
166
206
|
@obj.sax_text(txt)
|
167
|
-
end
|
168
|
-
end
|
169
|
-
def startCdata
|
170
|
-
@cdata=true
|
171
|
-
end
|
172
|
-
def endCdata
|
173
|
-
@cdata=false
|
207
|
+
#end
|
174
208
|
end
|
209
|
+
#def startCdata
|
210
|
+
# @cdata=true
|
211
|
+
#end
|
212
|
+
#def endCdata
|
213
|
+
# @cdata=false
|
214
|
+
#end
|
175
215
|
}
|
176
216
|
break
|
177
217
|
when :rexmlstream
|
@@ -242,9 +282,10 @@ module MultiSAX
|
|
242
282
|
when :libxml then parser=LibXML::XML::SaxParser.string(source);parser.callbacks=saxhelper;parser.parse
|
243
283
|
when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source)
|
244
284
|
when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source)
|
285
|
+
when :xmlparser then saxhelper.parse(source)
|
245
286
|
when :oga then parser=Oga::XML::SaxParser.new(saxhelper,source);parser.parse
|
246
287
|
when :ogahtml then parser=Oga::HTML::SaxParser.new(saxhelper,source);parser.parse
|
247
|
-
when :
|
288
|
+
when :xerces then parser=XercesR::SAXParser.new;parser.setDocumentHandler(saxhelper);parser.parsebuf(source)
|
248
289
|
when :rexmlstream then REXML::Parsers::StreamParser.new(source,saxhelper).parse
|
249
290
|
when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse
|
250
291
|
end
|
@@ -255,9 +296,10 @@ module MultiSAX
|
|
255
296
|
when :libxml then parser=LibXML::XML::SaxParser.io(source);parser.callbacks=saxhelper;parser.parse
|
256
297
|
when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source)
|
257
298
|
when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source.read) # fixme: nokogirihtml IO doesn't allow errors.
|
299
|
+
when :xmlparser then saxhelper.parse(source)
|
258
300
|
when :oga then parser=Oga::XML::SaxParser.new(saxhelper,source);parser.parse
|
259
301
|
when :ogahtml then parser=Oga::HTML::SaxParser.new(saxhelper,source);parser.parse
|
260
|
-
when :
|
302
|
+
when :xerces then parser=XercesR::SAXParser.new;parser.setDocumentHandler(saxhelper);parser.parsebuf(source.read)
|
261
303
|
when :rexmlstream then REXML::Parsers::StreamParser.new(source,saxhelper).parse
|
262
304
|
when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse
|
263
305
|
end
|
data/spec/multisax_spec.rb
CHANGED
@@ -87,6 +87,17 @@ describe "[XML] MultiSAX::Sax.parse(String)" do
|
|
87
87
|
listener.attrib.should eq 'foo'
|
88
88
|
listener.xmlencoding.should eq 'UTF-8'
|
89
89
|
end
|
90
|
+
it "uses :xmlparser" do
|
91
|
+
pending 'xmlparser is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
|
92
|
+
pending 'xmlparser is not supported by rubinius' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='rbx'
|
93
|
+
MultiSAX::Sax.reset
|
94
|
+
MultiSAX::Sax.open(:xmlparser)
|
95
|
+
MultiSAX::Sax.parser.should eq :xmlparser
|
96
|
+
listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
|
97
|
+
listener.result.should eq xml_answer
|
98
|
+
listener.attrib.should eq 'foo'
|
99
|
+
listener.xmlencoding.should eq 'UTF-8'
|
100
|
+
end
|
90
101
|
it "uses :oga" do
|
91
102
|
pending 'oga requires Ruby >=1.9.3' if RUBY_VERSION<'1.9.3'
|
92
103
|
MultiSAX::Sax.reset
|
@@ -97,16 +108,16 @@ describe "[XML] MultiSAX::Sax.parse(String)" do
|
|
97
108
|
listener.attrib.should eq 'foo'
|
98
109
|
listener.xmlencoding.should eq 'UTF-8'
|
99
110
|
end
|
100
|
-
it "uses :
|
101
|
-
pending '
|
102
|
-
pending '
|
111
|
+
it "uses :xerces" do
|
112
|
+
pending 'xerces is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
|
113
|
+
pending 'xerces will not be supported officially'
|
103
114
|
MultiSAX::Sax.reset
|
104
|
-
MultiSAX::Sax.open(:
|
105
|
-
MultiSAX::Sax.parser.should eq :
|
115
|
+
MultiSAX::Sax.open(:xerces)
|
116
|
+
MultiSAX::Sax.parser.should eq :xerces
|
106
117
|
listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
|
107
118
|
listener.result.should eq xml_answer
|
108
119
|
listener.attrib.should eq 'foo'
|
109
|
-
listener.xmlencoding.should eq 'UTF-8'
|
120
|
+
#listener.xmlencoding.should eq 'UTF-8'
|
110
121
|
end
|
111
122
|
end
|
112
123
|
|
@@ -158,6 +169,17 @@ describe "[XML] MultiSAX::Sax.parse(IO)" do
|
|
158
169
|
listener.attrib.should eq 'foo'
|
159
170
|
listener.xmlencoding.should eq 'UTF-8'
|
160
171
|
end
|
172
|
+
it "uses :xmlparser" do
|
173
|
+
pending 'xmlparser is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
|
174
|
+
pending 'xmlparser is not supported by rubinius' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='rbx'
|
175
|
+
MultiSAX::Sax.reset
|
176
|
+
MultiSAX::Sax.open(:xmlparser)
|
177
|
+
MultiSAX::Sax.parser.should eq :xmlparser
|
178
|
+
listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
|
179
|
+
listener.result.should eq xml_answer
|
180
|
+
listener.attrib.should eq 'foo'
|
181
|
+
listener.xmlencoding.should eq 'UTF-8'
|
182
|
+
end
|
161
183
|
it "uses :oga" do
|
162
184
|
pending 'oga requires Ruby >=1.9.3' if RUBY_VERSION<'1.9.3'
|
163
185
|
MultiSAX::Sax.reset
|
@@ -168,16 +190,16 @@ describe "[XML] MultiSAX::Sax.parse(IO)" do
|
|
168
190
|
listener.attrib.should eq 'foo'
|
169
191
|
listener.xmlencoding.should eq 'UTF-8'
|
170
192
|
end
|
171
|
-
it "uses :
|
172
|
-
pending '
|
173
|
-
pending '
|
193
|
+
it "uses :xerces" do
|
194
|
+
pending 'xerces is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
|
195
|
+
pending 'xerces will not be supported officially'
|
174
196
|
MultiSAX::Sax.reset
|
175
|
-
MultiSAX::Sax.open(:
|
176
|
-
MultiSAX::Sax.parser.should eq :
|
197
|
+
MultiSAX::Sax.open(:xerces)
|
198
|
+
MultiSAX::Sax.parser.should eq :xerces
|
177
199
|
listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
|
178
200
|
listener.result.should eq xml_answer
|
179
201
|
listener.attrib.should eq 'foo'
|
180
|
-
listener.xmlencoding.should eq 'UTF-8'
|
202
|
+
#listener.xmlencoding.should eq 'UTF-8'
|
181
203
|
end
|
182
204
|
end
|
183
205
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: multisax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.6
|
4
|
+
version: 0.0.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- cielavenir
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-11-
|
11
|
+
date: 2014-11-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -67,6 +67,7 @@ files:
|
|
67
67
|
- LICENSE.txt
|
68
68
|
- README.md
|
69
69
|
- Rakefile
|
70
|
+
- Xerces.md
|
70
71
|
- lib/multisax.rb
|
71
72
|
- multisax.gemspec
|
72
73
|
- spec/multisax_spec.rb
|