multisax 0.0.6 → 0.0.6.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/Xerces.md +27 -0
  3. data/lib/multisax.rb +63 -21
  4. data/spec/multisax_spec.rb +34 -12
  5. metadata +3 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8b40c84bc5076ee64f16e5dba05765e01fd1b17a
4
- data.tar.gz: 97f52137ee9171e3c3efd74303fbf784be40ca33
3
+ metadata.gz: 89ef3e32c76021c3d85738ec9d08e6897dd8ef8a
4
+ data.tar.gz: 5e76e6c82462ead3333bfec4ccb991c77e00ded0
5
5
  SHA512:
6
- metadata.gz: 09554bc0d7d6c198b05a2b2a212d506fda00ed7cb81d78d79d3309c4bbe782b00c910be430badc0d64f04c50eba5a72fb800bf8eefcc5d403149c90eee0f8913
7
- data.tar.gz: 588370966d1cca05f96ec119d44410e0b3d880fc2e122c0418329ef05c246ab9b387d1e41318c88833b93405e88faf7e6c10fb6688bf3724607c84633d6e2c94
6
+ metadata.gz: 7fc3c659c2bbd667160ed47a7e479469f065519ee702dcda1c493d72302d5ba8f0bd9426a6d9050e5c8d07289be9c686a1d672f9a6bfc7597000353d5252f125
7
+ data.tar.gz: 36fb192b9f84bceb2b095d5eb2fd2ccf147e14b5204aef3fabfb9a31016da799b1c9dbf4a8b349b76d3ad47f2a40a04aa6fadb7f59f82b7d49142e4f440f1f1b
data/Xerces.md ADDED
@@ -0,0 +1,27 @@
1
+ ## Xerces-Ruby support
2
+
3
+ From 0.0.6.1, multisax supports Xerces-Ruby unofficially. Official support won't be provided unless it is relicensed under permissive(BSD/MIT) or weak-copyleft(MPL) licenses.
4
+
5
+ Due to license issue, in multisax, you need to `MultiSAX::Sax.open(:xerces)` or `MultiSAX::Sax.open(*MultiSAX::XML_PARSERS_INSTALLABLE+[:xerces]+MultiSAX::XML_PARSERS_DEFAULT)` explicitly.
6
+
7
+ Here is the instruction to activate Xerces-Ruby.
8
+
9
+ ----
10
+ - Install Xerces-C 2
11
+ - OSX (MacPorts): `sudo port install xercesc`
12
+ - Debian: `sudo apt-get install libxerces-c2-dev`
13
+ - Download and extract [xerces-ruby](http://www.geocities.co.jp/SiliconValley-SanJose/9156/xerces-ruby.html)
14
+ - Download [xerces-ruby.patch](https://gist.github.com/cielavenir/8401975)
15
+ - Convert SAXParse.cpp's charcode
16
+ - `nkf -i -w /Users/tyamada/Downloads/xerces-ruby-2/SAXParse.cpp`
17
+ - Apply patch
18
+ - `patch < xerces-ruby.patch`
19
+ - Run extconf.rb
20
+ - `ruby extconf.rb --with-opt-dir=/opt/local`
21
+ - `make`
22
+ - Install libraries
23
+ - `make install`
24
+ - `sudo cp Xerces.rb /Library/Ruby/Site/2.0.0/`
25
+ - Now you can `require 'Xerces'` in your Ruby script.
26
+
27
+ - Please note currently Xerces-Ruby cannot handle cdata. I (ciel) am NOT going to fix it.
data/lib/multisax.rb CHANGED
@@ -8,7 +8,16 @@
8
8
 
9
9
  module MultiSAX
10
10
  # VERSION string
11
- VERSION='0.0.6'
11
+ VERSION='0.0.6.1'
12
+
13
+ # Default XML parsers
14
+ XML_PARSERS_DEFAULT=[:rexmlstream,:rexmlsax2]
15
+
16
+ # Installable XML parsers
17
+ XML_PARSERS_INSTALLABLE=[:ox,:libxml,:xmlparser,:nokogiri,:oga]
18
+
19
+ # Installable HTML parsers
20
+ HTML_PARSERS_INSTALLABLE=[:oxhtml,:nokogirihtml,:ogahtml]
12
21
 
13
22
  # The class to handle XML libraries.
14
23
  class SAX
@@ -27,8 +36,8 @@ module MultiSAX
27
36
  # If multiple selected, MultiSAX will try the libraries one by one and use the first usable one.
28
37
  def open(*list)
29
38
  return @parser if @parser
30
- list=[:ox,:libxml,:xmlparser,:nokogiri,:oga,:rexmlstream,:rexmlsax2] if list.empty?||list==[:XML]
31
- list=[:oxhtml,:nokogirihtml,:ogahtml] if list==[:HTML]
39
+ list=XML_PARSERS_INSTALLABLE+XML_PARSERS_DEFAULT if list.empty?||list==[:XML]
40
+ list=HTML_PARSERS_INSTALLABLE if list==[:HTML]
32
41
  list.each{|e_module|
33
42
  case e_module
34
43
  when :ox,:oxhtml
@@ -115,6 +124,36 @@ module MultiSAX
115
124
  def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
116
125
  }
117
126
  break
127
+ when :xmlparser
128
+ begin
129
+ require 'xml/saxdriver'
130
+ rescue LoadError;next end
131
+ @parser=e_module
132
+ @saxhelper=Class.new(::XML::Parser){
133
+ def __init__(obj)
134
+ @obj=obj
135
+ @cdata=false
136
+ self
137
+ end
138
+ def startElement(tag,attrs) @obj.sax_tag_start(tag,attrs) end
139
+ def endElement(tag) @obj.sax_tag_end(tag) end
140
+ def comment(txt) @obj.sax_comment(txt) end
141
+ def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
142
+ def character(txt)
143
+ if @cdata
144
+ @obj.sax_cdata(txt)
145
+ else
146
+ @obj.sax_text(txt)
147
+ end
148
+ end
149
+ def startCdata
150
+ @cdata=true
151
+ end
152
+ def endCdata
153
+ @cdata=false
154
+ end
155
+ }
156
+ break
118
157
  when :oga,:ogahtml
119
158
  next if RUBY_VERSION<'1.9'
120
159
  begin
@@ -144,34 +183,35 @@ module MultiSAX
144
183
  end
145
184
  }
146
185
  break
147
- when :xmlparser
186
+ when :xerces
148
187
  begin
149
- require 'xml/saxdriver'
188
+ require 'Xerces'
150
189
  rescue LoadError;next end
151
190
  @parser=e_module
152
- @saxhelper=Class.new(::XML::Parser){
191
+ @saxhelper=Class.new(::XercesR::DocumentHandler){
153
192
  def __init__(obj)
154
193
  @obj=obj
155
- @cdata=false
194
+ #@cdata=false
156
195
  self
157
196
  end
158
- def startElement(tag,attrs) @obj.sax_tag_start(tag,attrs) end
197
+ def startElement(tag,attrs) @obj.sax_tag_start(tag,Hash[*attrs.getLength.times.map{|i|[attrs.getName(i),attrs.getValue(i)]}.flatten(1)]) end
159
198
  def endElement(tag) @obj.sax_tag_end(tag) end
160
199
  def comment(txt) @obj.sax_comment(txt) end
161
- def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
162
- def character(txt)
163
- if @cdata
200
+ #def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
201
+ #def notationDecl(name, publicId, systemId) end
202
+ def characters(txt,len)
203
+ #if @cdata
164
204
  @obj.sax_cdata(txt)
165
- else
205
+ #else
166
206
  @obj.sax_text(txt)
167
- end
168
- end
169
- def startCdata
170
- @cdata=true
171
- end
172
- def endCdata
173
- @cdata=false
207
+ #end
174
208
  end
209
+ #def startCdata
210
+ # @cdata=true
211
+ #end
212
+ #def endCdata
213
+ # @cdata=false
214
+ #end
175
215
  }
176
216
  break
177
217
  when :rexmlstream
@@ -242,9 +282,10 @@ module MultiSAX
242
282
  when :libxml then parser=LibXML::XML::SaxParser.string(source);parser.callbacks=saxhelper;parser.parse
243
283
  when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source)
244
284
  when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source)
285
+ when :xmlparser then saxhelper.parse(source)
245
286
  when :oga then parser=Oga::XML::SaxParser.new(saxhelper,source);parser.parse
246
287
  when :ogahtml then parser=Oga::HTML::SaxParser.new(saxhelper,source);parser.parse
247
- when :xmlparser then saxhelper.parse(source)
288
+ when :xerces then parser=XercesR::SAXParser.new;parser.setDocumentHandler(saxhelper);parser.parsebuf(source)
248
289
  when :rexmlstream then REXML::Parsers::StreamParser.new(source,saxhelper).parse
249
290
  when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse
250
291
  end
@@ -255,9 +296,10 @@ module MultiSAX
255
296
  when :libxml then parser=LibXML::XML::SaxParser.io(source);parser.callbacks=saxhelper;parser.parse
256
297
  when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source)
257
298
  when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source.read) # fixme: nokogirihtml IO doesn't allow errors.
299
+ when :xmlparser then saxhelper.parse(source)
258
300
  when :oga then parser=Oga::XML::SaxParser.new(saxhelper,source);parser.parse
259
301
  when :ogahtml then parser=Oga::HTML::SaxParser.new(saxhelper,source);parser.parse
260
- when :xmlparser then saxhelper.parse(source)
302
+ when :xerces then parser=XercesR::SAXParser.new;parser.setDocumentHandler(saxhelper);parser.parsebuf(source.read)
261
303
  when :rexmlstream then REXML::Parsers::StreamParser.new(source,saxhelper).parse
262
304
  when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse
263
305
  end
@@ -87,6 +87,17 @@ describe "[XML] MultiSAX::Sax.parse(String)" do
87
87
  listener.attrib.should eq 'foo'
88
88
  listener.xmlencoding.should eq 'UTF-8'
89
89
  end
90
+ it "uses :xmlparser" do
91
+ pending 'xmlparser is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
92
+ pending 'xmlparser is not supported by rubinius' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='rbx'
93
+ MultiSAX::Sax.reset
94
+ MultiSAX::Sax.open(:xmlparser)
95
+ MultiSAX::Sax.parser.should eq :xmlparser
96
+ listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
97
+ listener.result.should eq xml_answer
98
+ listener.attrib.should eq 'foo'
99
+ listener.xmlencoding.should eq 'UTF-8'
100
+ end
90
101
  it "uses :oga" do
91
102
  pending 'oga requires Ruby >=1.9.3' if RUBY_VERSION<'1.9.3'
92
103
  MultiSAX::Sax.reset
@@ -97,16 +108,16 @@ describe "[XML] MultiSAX::Sax.parse(String)" do
97
108
  listener.attrib.should eq 'foo'
98
109
  listener.xmlencoding.should eq 'UTF-8'
99
110
  end
100
- it "uses :xmlparser" do
101
- pending 'xmlparser is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
102
- pending 'xmlparser is not supported by rubinius' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='rbx'
111
+ it "uses :xerces" do
112
+ pending 'xerces is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
113
+ pending 'xerces will not be supported officially'
103
114
  MultiSAX::Sax.reset
104
- MultiSAX::Sax.open(:xmlparser)
105
- MultiSAX::Sax.parser.should eq :xmlparser
115
+ MultiSAX::Sax.open(:xerces)
116
+ MultiSAX::Sax.parser.should eq :xerces
106
117
  listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
107
118
  listener.result.should eq xml_answer
108
119
  listener.attrib.should eq 'foo'
109
- listener.xmlencoding.should eq 'UTF-8'
120
+ #listener.xmlencoding.should eq 'UTF-8'
110
121
  end
111
122
  end
112
123
 
@@ -158,6 +169,17 @@ describe "[XML] MultiSAX::Sax.parse(IO)" do
158
169
  listener.attrib.should eq 'foo'
159
170
  listener.xmlencoding.should eq 'UTF-8'
160
171
  end
172
+ it "uses :xmlparser" do
173
+ pending 'xmlparser is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
174
+ pending 'xmlparser is not supported by rubinius' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='rbx'
175
+ MultiSAX::Sax.reset
176
+ MultiSAX::Sax.open(:xmlparser)
177
+ MultiSAX::Sax.parser.should eq :xmlparser
178
+ listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
179
+ listener.result.should eq xml_answer
180
+ listener.attrib.should eq 'foo'
181
+ listener.xmlencoding.should eq 'UTF-8'
182
+ end
161
183
  it "uses :oga" do
162
184
  pending 'oga requires Ruby >=1.9.3' if RUBY_VERSION<'1.9.3'
163
185
  MultiSAX::Sax.reset
@@ -168,16 +190,16 @@ describe "[XML] MultiSAX::Sax.parse(IO)" do
168
190
  listener.attrib.should eq 'foo'
169
191
  listener.xmlencoding.should eq 'UTF-8'
170
192
  end
171
- it "uses :xmlparser" do
172
- pending 'xmlparser is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
173
- pending 'xmlparser is not supported by rubinius' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='rbx'
193
+ it "uses :xerces" do
194
+ pending 'xerces is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
195
+ pending 'xerces will not be supported officially'
174
196
  MultiSAX::Sax.reset
175
- MultiSAX::Sax.open(:xmlparser)
176
- MultiSAX::Sax.parser.should eq :xmlparser
197
+ MultiSAX::Sax.open(:xerces)
198
+ MultiSAX::Sax.parser.should eq :xerces
177
199
  listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
178
200
  listener.result.should eq xml_answer
179
201
  listener.attrib.should eq 'foo'
180
- listener.xmlencoding.should eq 'UTF-8'
202
+ #listener.xmlencoding.should eq 'UTF-8'
181
203
  end
182
204
  end
183
205
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multisax
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - cielavenir
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-19 00:00:00.000000000 Z
11
+ date: 2014-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -67,6 +67,7 @@ files:
67
67
  - LICENSE.txt
68
68
  - README.md
69
69
  - Rakefile
70
+ - Xerces.md
70
71
  - lib/multisax.rb
71
72
  - multisax.gemspec
72
73
  - spec/multisax_spec.rb