multisax 0.0.6 → 0.0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (5) hide show
  1. checksums.yaml +4 -4
  2. data/Xerces.md +27 -0
  3. data/lib/multisax.rb +63 -21
  4. data/spec/multisax_spec.rb +34 -12
  5. metadata +3 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8b40c84bc5076ee64f16e5dba05765e01fd1b17a
4
- data.tar.gz: 97f52137ee9171e3c3efd74303fbf784be40ca33
3
+ metadata.gz: 89ef3e32c76021c3d85738ec9d08e6897dd8ef8a
4
+ data.tar.gz: 5e76e6c82462ead3333bfec4ccb991c77e00ded0
5
5
  SHA512:
6
- metadata.gz: 09554bc0d7d6c198b05a2b2a212d506fda00ed7cb81d78d79d3309c4bbe782b00c910be430badc0d64f04c50eba5a72fb800bf8eefcc5d403149c90eee0f8913
7
- data.tar.gz: 588370966d1cca05f96ec119d44410e0b3d880fc2e122c0418329ef05c246ab9b387d1e41318c88833b93405e88faf7e6c10fb6688bf3724607c84633d6e2c94
6
+ metadata.gz: 7fc3c659c2bbd667160ed47a7e479469f065519ee702dcda1c493d72302d5ba8f0bd9426a6d9050e5c8d07289be9c686a1d672f9a6bfc7597000353d5252f125
7
+ data.tar.gz: 36fb192b9f84bceb2b095d5eb2fd2ccf147e14b5204aef3fabfb9a31016da799b1c9dbf4a8b349b76d3ad47f2a40a04aa6fadb7f59f82b7d49142e4f440f1f1b
data/Xerces.md ADDED
@@ -0,0 +1,27 @@
1
+ ## Xerces-Ruby support
2
+
3
+ From 0.0.6.1, multisax supports Xerces-Ruby unofficially. Official support won't be provided unless it is relicensed under permissive(BSD/MIT) or weak-copyleft(MPL) licenses.
4
+
5
+ Due to license issue, in multisax, you need to `MultiSAX::Sax.open(:xerces)` or `MultiSAX::Sax.open(*MultiSAX::XML_PARSERS_INSTALLABLE+[:xerces]+MultiSAX::XML_PARSERS_DEFAULT)` explicitly.
6
+
7
+ Here is the instruction to activate Xerces-Ruby.
8
+
9
+ ----
10
+ - Install Xerces-C 2
11
+ - OSX (MacPorts): `sudo port install xercesc`
12
+ - Debian: `sudo apt-get install libxerces-c2-dev`
13
+ - Download and extract [xerces-ruby](http://www.geocities.co.jp/SiliconValley-SanJose/9156/xerces-ruby.html)
14
+ - Download [xerces-ruby.patch](https://gist.github.com/cielavenir/8401975)
15
+ - Convert SAXParse.cpp's charcode
16
+ - `nkf -i -w /Users/tyamada/Downloads/xerces-ruby-2/SAXParse.cpp`
17
+ - Apply patch
18
+ - `patch < xerces-ruby.patch`
19
+ - Run extconf.rb
20
+ - `ruby extconf.rb --with-opt-dir=/opt/local`
21
+ - `make`
22
+ - Install libraries
23
+ - `make install`
24
+ - `sudo cp Xerces.rb /Library/Ruby/Site/2.0.0/`
25
+ - Now you can `require 'Xerces'` in your Ruby script.
26
+
27
+ - Please note currently Xerces-Ruby cannot handle cdata. I (ciel) am NOT going to fix it.
data/lib/multisax.rb CHANGED
@@ -8,7 +8,16 @@
8
8
 
9
9
  module MultiSAX
10
10
  # VERSION string
11
- VERSION='0.0.6'
11
+ VERSION='0.0.6.1'
12
+
13
+ # Default XML parsers
14
+ XML_PARSERS_DEFAULT=[:rexmlstream,:rexmlsax2]
15
+
16
+ # Installable XML parsers
17
+ XML_PARSERS_INSTALLABLE=[:ox,:libxml,:xmlparser,:nokogiri,:oga]
18
+
19
+ # Installable HTML parsers
20
+ HTML_PARSERS_INSTALLABLE=[:oxhtml,:nokogirihtml,:ogahtml]
12
21
 
13
22
  # The class to handle XML libraries.
14
23
  class SAX
@@ -27,8 +36,8 @@ module MultiSAX
27
36
  # If multiple selected, MultiSAX will try the libraries one by one and use the first usable one.
28
37
  def open(*list)
29
38
  return @parser if @parser
30
- list=[:ox,:libxml,:xmlparser,:nokogiri,:oga,:rexmlstream,:rexmlsax2] if list.empty?||list==[:XML]
31
- list=[:oxhtml,:nokogirihtml,:ogahtml] if list==[:HTML]
39
+ list=XML_PARSERS_INSTALLABLE+XML_PARSERS_DEFAULT if list.empty?||list==[:XML]
40
+ list=HTML_PARSERS_INSTALLABLE if list==[:HTML]
32
41
  list.each{|e_module|
33
42
  case e_module
34
43
  when :ox,:oxhtml
@@ -115,6 +124,36 @@ module MultiSAX
115
124
  def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
116
125
  }
117
126
  break
127
+ when :xmlparser
128
+ begin
129
+ require 'xml/saxdriver'
130
+ rescue LoadError;next end
131
+ @parser=e_module
132
+ @saxhelper=Class.new(::XML::Parser){
133
+ def __init__(obj)
134
+ @obj=obj
135
+ @cdata=false
136
+ self
137
+ end
138
+ def startElement(tag,attrs) @obj.sax_tag_start(tag,attrs) end
139
+ def endElement(tag) @obj.sax_tag_end(tag) end
140
+ def comment(txt) @obj.sax_comment(txt) end
141
+ def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
142
+ def character(txt)
143
+ if @cdata
144
+ @obj.sax_cdata(txt)
145
+ else
146
+ @obj.sax_text(txt)
147
+ end
148
+ end
149
+ def startCdata
150
+ @cdata=true
151
+ end
152
+ def endCdata
153
+ @cdata=false
154
+ end
155
+ }
156
+ break
118
157
  when :oga,:ogahtml
119
158
  next if RUBY_VERSION<'1.9'
120
159
  begin
@@ -144,34 +183,35 @@ module MultiSAX
144
183
  end
145
184
  }
146
185
  break
147
- when :xmlparser
186
+ when :xerces
148
187
  begin
149
- require 'xml/saxdriver'
188
+ require 'Xerces'
150
189
  rescue LoadError;next end
151
190
  @parser=e_module
152
- @saxhelper=Class.new(::XML::Parser){
191
+ @saxhelper=Class.new(::XercesR::DocumentHandler){
153
192
  def __init__(obj)
154
193
  @obj=obj
155
- @cdata=false
194
+ #@cdata=false
156
195
  self
157
196
  end
158
- def startElement(tag,attrs) @obj.sax_tag_start(tag,attrs) end
197
+ def startElement(tag,attrs) @obj.sax_tag_start(tag,Hash[*attrs.getLength.times.map{|i|[attrs.getName(i),attrs.getValue(i)]}.flatten(1)]) end
159
198
  def endElement(tag) @obj.sax_tag_end(tag) end
160
199
  def comment(txt) @obj.sax_comment(txt) end
161
- def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
162
- def character(txt)
163
- if @cdata
200
+ #def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
201
+ #def notationDecl(name, publicId, systemId) end
202
+ def characters(txt,len)
203
+ #if @cdata
164
204
  @obj.sax_cdata(txt)
165
- else
205
+ #else
166
206
  @obj.sax_text(txt)
167
- end
168
- end
169
- def startCdata
170
- @cdata=true
171
- end
172
- def endCdata
173
- @cdata=false
207
+ #end
174
208
  end
209
+ #def startCdata
210
+ # @cdata=true
211
+ #end
212
+ #def endCdata
213
+ # @cdata=false
214
+ #end
175
215
  }
176
216
  break
177
217
  when :rexmlstream
@@ -242,9 +282,10 @@ module MultiSAX
242
282
  when :libxml then parser=LibXML::XML::SaxParser.string(source);parser.callbacks=saxhelper;parser.parse
243
283
  when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source)
244
284
  when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source)
285
+ when :xmlparser then saxhelper.parse(source)
245
286
  when :oga then parser=Oga::XML::SaxParser.new(saxhelper,source);parser.parse
246
287
  when :ogahtml then parser=Oga::HTML::SaxParser.new(saxhelper,source);parser.parse
247
- when :xmlparser then saxhelper.parse(source)
288
+ when :xerces then parser=XercesR::SAXParser.new;parser.setDocumentHandler(saxhelper);parser.parsebuf(source)
248
289
  when :rexmlstream then REXML::Parsers::StreamParser.new(source,saxhelper).parse
249
290
  when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse
250
291
  end
@@ -255,9 +296,10 @@ module MultiSAX
255
296
  when :libxml then parser=LibXML::XML::SaxParser.io(source);parser.callbacks=saxhelper;parser.parse
256
297
  when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source)
257
298
  when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source.read) # fixme: nokogirihtml IO doesn't allow errors.
299
+ when :xmlparser then saxhelper.parse(source)
258
300
  when :oga then parser=Oga::XML::SaxParser.new(saxhelper,source);parser.parse
259
301
  when :ogahtml then parser=Oga::HTML::SaxParser.new(saxhelper,source);parser.parse
260
- when :xmlparser then saxhelper.parse(source)
302
+ when :xerces then parser=XercesR::SAXParser.new;parser.setDocumentHandler(saxhelper);parser.parsebuf(source.read)
261
303
  when :rexmlstream then REXML::Parsers::StreamParser.new(source,saxhelper).parse
262
304
  when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse
263
305
  end
@@ -87,6 +87,17 @@ describe "[XML] MultiSAX::Sax.parse(String)" do
87
87
  listener.attrib.should eq 'foo'
88
88
  listener.xmlencoding.should eq 'UTF-8'
89
89
  end
90
+ it "uses :xmlparser" do
91
+ pending 'xmlparser is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
92
+ pending 'xmlparser is not supported by rubinius' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='rbx'
93
+ MultiSAX::Sax.reset
94
+ MultiSAX::Sax.open(:xmlparser)
95
+ MultiSAX::Sax.parser.should eq :xmlparser
96
+ listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
97
+ listener.result.should eq xml_answer
98
+ listener.attrib.should eq 'foo'
99
+ listener.xmlencoding.should eq 'UTF-8'
100
+ end
90
101
  it "uses :oga" do
91
102
  pending 'oga requires Ruby >=1.9.3' if RUBY_VERSION<'1.9.3'
92
103
  MultiSAX::Sax.reset
@@ -97,16 +108,16 @@ describe "[XML] MultiSAX::Sax.parse(String)" do
97
108
  listener.attrib.should eq 'foo'
98
109
  listener.xmlencoding.should eq 'UTF-8'
99
110
  end
100
- it "uses :xmlparser" do
101
- pending 'xmlparser is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
102
- pending 'xmlparser is not supported by rubinius' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='rbx'
111
+ it "uses :xerces" do
112
+ pending 'xerces is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
113
+ pending 'xerces will not be supported officially'
103
114
  MultiSAX::Sax.reset
104
- MultiSAX::Sax.open(:xmlparser)
105
- MultiSAX::Sax.parser.should eq :xmlparser
115
+ MultiSAX::Sax.open(:xerces)
116
+ MultiSAX::Sax.parser.should eq :xerces
106
117
  listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
107
118
  listener.result.should eq xml_answer
108
119
  listener.attrib.should eq 'foo'
109
- listener.xmlencoding.should eq 'UTF-8'
120
+ #listener.xmlencoding.should eq 'UTF-8'
110
121
  end
111
122
  end
112
123
 
@@ -158,6 +169,17 @@ describe "[XML] MultiSAX::Sax.parse(IO)" do
158
169
  listener.attrib.should eq 'foo'
159
170
  listener.xmlencoding.should eq 'UTF-8'
160
171
  end
172
+ it "uses :xmlparser" do
173
+ pending 'xmlparser is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
174
+ pending 'xmlparser is not supported by rubinius' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='rbx'
175
+ MultiSAX::Sax.reset
176
+ MultiSAX::Sax.open(:xmlparser)
177
+ MultiSAX::Sax.parser.should eq :xmlparser
178
+ listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
179
+ listener.result.should eq xml_answer
180
+ listener.attrib.should eq 'foo'
181
+ listener.xmlencoding.should eq 'UTF-8'
182
+ end
161
183
  it "uses :oga" do
162
184
  pending 'oga requires Ruby >=1.9.3' if RUBY_VERSION<'1.9.3'
163
185
  MultiSAX::Sax.reset
@@ -168,16 +190,16 @@ describe "[XML] MultiSAX::Sax.parse(IO)" do
168
190
  listener.attrib.should eq 'foo'
169
191
  listener.xmlencoding.should eq 'UTF-8'
170
192
  end
171
- it "uses :xmlparser" do
172
- pending 'xmlparser is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
173
- pending 'xmlparser is not supported by rubinius' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='rbx'
193
+ it "uses :xerces" do
194
+ pending 'xerces is not supported by jruby' if defined?(RUBY_ENGINE)&&RUBY_ENGINE=='jruby'
195
+ pending 'xerces will not be supported officially'
174
196
  MultiSAX::Sax.reset
175
- MultiSAX::Sax.open(:xmlparser)
176
- MultiSAX::Sax.parser.should eq :xmlparser
197
+ MultiSAX::Sax.open(:xerces)
198
+ MultiSAX::Sax.parser.should eq :xerces
177
199
  listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
178
200
  listener.result.should eq xml_answer
179
201
  listener.attrib.should eq 'foo'
180
- listener.xmlencoding.should eq 'UTF-8'
202
+ #listener.xmlencoding.should eq 'UTF-8'
181
203
  end
182
204
  end
183
205
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multisax
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - cielavenir
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-19 00:00:00.000000000 Z
11
+ date: 2014-11-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -67,6 +67,7 @@ files:
67
67
  - LICENSE.txt
68
68
  - README.md
69
69
  - Rakefile
70
+ - Xerces.md
70
71
  - lib/multisax.rb
71
72
  - multisax.gemspec
72
73
  - spec/multisax_spec.rb