multisax 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: faccd217b395949d18f4b29ac75f185a7f1976bb
4
- data.tar.gz: 24ecac5d9f3042ac03326efcb378886410d37edb
3
+ metadata.gz: 93782bad9e30774f1d5d05b206804d6ec7a981be
4
+ data.tar.gz: 1734d46e0312e9849fd6897aab0826c7225c8ff5
5
5
  SHA512:
6
- metadata.gz: 1e091e45e62d7c0f41015949dd97a196f37b604cc700e5a1d039a07d8e73b9e710f39c42342d3171f7f89edd150069312b6e3da021964212ffe65b0c48ff79aa
7
- data.tar.gz: 4cbbdb6fd438fbcfb5ddf62216d5b1269798835e1f8cd0ec75e5d7902290285934ec668177c35f0fa564922bb2de19c21241e8b8ec4e45b74fd89c3225156e69
6
+ metadata.gz: 3c4696780f1da115c91ca765f07c9a47d73221ef86e157f4aad2b3635b7a561d0fa3a1614da0a6118ab9fca99eb70444b835db970b0ea079d25c3cbe722fce21
7
+ data.tar.gz: 40df1e1c34be650f6526cc561e1535c35bc3fc2f5116b96fb8d046f68162ad2c143dd31bcc370a0b6d27f4fd550d8e35c88494dbbb80e5bda958149f5fbee6e0
data/CHANGELOG.rdoc CHANGED
@@ -1,5 +1,12 @@
1
1
  = ChangeLog
2
2
 
3
+ == 0.0.3 (2013 Nov 14)
4
+ - Fixed namespace handling.
5
+ - Now you can also select :oxhtml to parse HTML.
6
+ - Added shortcut :XML and :HTML.
7
+ - sax_tag_start()'s attrs is assured to be a Hash.
8
+ - Refined spec.
9
+
3
10
  == 0.0.2 (2013 Nov 13)
4
11
  - Now you can create an instance of MultiSAX::SAX.
5
12
  - Please note that passed class to MultiSAX is still modified directly.
@@ -7,6 +14,7 @@
7
14
  - MultiSAX::Sax is now an instance of MultiSAX::SAX (this is backward-compatible).
8
15
  - You can specify :nokogirihtml explicitly to parse HTML.
9
16
  - Fixed attrs with Ox (now String is passed, not Symbol)
17
+ - Moved to Bundler rather than Jeweler.
10
18
 
11
19
  == 0.0.1 (2013 Jul 8)
12
20
  - Added ChangeLog.
data/Gemfile CHANGED
@@ -11,6 +11,6 @@ group :optional do
11
11
  end
12
12
 
13
13
  group :development do
14
- gem "rspec"
15
- gem "bundler"
14
+ gem 'rspec'
15
+ gem 'bundler', '>= 1.0'
16
16
  end
data/lib/multisax.rb CHANGED
@@ -8,7 +8,7 @@
8
8
 
9
9
  module MultiSAX
10
10
  # VERSION string
11
- VERSION='0.0.2'
11
+ VERSION='0.0.3'
12
12
 
13
13
  # The class to handle XML libraries.
14
14
  class SAX
@@ -16,16 +16,19 @@ module MultiSAX
16
16
  @saxmodule=nil
17
17
  # Library loader.
18
18
  # Arguments are list (or Array) of libraries.
19
- # Currently the following are supported (order by speed):
19
+ # if list is empty or :XML, the following are searched (order by speed):
20
20
  # :ox, :libxml, :nokogiri, :rexmlstream, :rexmlsax2
21
+ # if list is :HTML, the following are searched (order by speed):
22
+ # :oxhtml, :nokogirihtml
23
+ # You can also specify libraries individually.
21
24
  # If multiple selected, MultiSAX will try the libraries one by one and use the first usable one.
22
- # For HTML, you can specify :nokogirihtml explicitly.
23
25
  def open(*list)
24
26
  return @parser if @parser
25
- list=[:ox,:libxml,:nokogiri,:rexmlstream,:rexmlsax2] if list.size==0
27
+ list=[:ox,:libxml,:nokogiri,:rexmlstream,:rexmlsax2] if list.size==0||list==[:XML]
28
+ list=[:oxhtml,:nokogirihtml] if list==[:HTML]
26
29
  list.each{|e_module|
27
30
  case e_module
28
- when :ox
31
+ when :ox,:oxhtml
29
32
  #next if RUBY_VERSION<'1.9'
30
33
  begin
31
34
  require 'ox'
@@ -92,17 +95,22 @@ module MultiSAX
92
95
  def method_mapping(listener)
93
96
  #raise "MultiSAX::Sax open first" if !@parser
94
97
  case @parser
95
- when :ox
98
+ when :ox,:oxhtml
96
99
  saxmodule=@saxmodule
97
100
  listener.instance_eval{
98
101
  extend saxmodule
99
102
  @saxwrapper_tag=nil
100
103
  @saxwrapper_attr={}
101
104
  def start_element(tag)
102
- # I hope provided Listener's sax_tag_start will NOT be used elsewhere.
103
- #alias :attrs_done :attrs_done_normal
104
- @saxwrapper_tag=tag
105
- @saxwrapper_attr={}
105
+ if @after_error
106
+ sax_tag_start(tag.to_s,{})
107
+ @after_error=false
108
+ else
109
+ # I hope provided Listener's sax_tag_start will NOT be used elsewhere.
110
+ #alias :attrs_done :attrs_done_normal
111
+ @saxwrapper_tag=tag
112
+ @saxwrapper_attr={}
113
+ end
106
114
  end
107
115
  # These "instance methods" are actually injected to listener class using instance_eval.
108
116
  # i.e. not APIs. You cannot call these methods from outside.
@@ -122,6 +130,7 @@ module MultiSAX
122
130
  def attrs_done
123
131
  @saxwrapper_tag ? attrs_done_normal : attrs_done_xmldecl
124
132
  end
133
+ def error(s,i,j) @after_error=true if s.end_with?('closed but not opened') end
125
134
  def end_element(tag) sax_tag_end(tag.to_s) end
126
135
  alias :cdata :sax_cdata
127
136
  alias :text :sax_text
@@ -133,8 +142,10 @@ module MultiSAX
133
142
  when :libxml
134
143
  listener.instance_eval{
135
144
  extend LibXML::XML::SaxParser::Callbacks
136
- alias :on_start_element :sax_tag_start
137
- alias :on_end_element :sax_tag_end
145
+ alias :on_start_element_ns :sax_start_element_namespace_libxml
146
+ #alias :on_start_element :sax_tag_start
147
+ alias :on_end_element_ns :sax_end_element_namespace
148
+ #alias :on_end_element :sax_tag_end
138
149
  alias :on_cdata_block :sax_cdata
139
150
  alias :on_characters :sax_text
140
151
  alias :on_comment :sax_comment
@@ -144,8 +155,8 @@ module MultiSAX
144
155
  saxmodule=@saxmodule
145
156
  listener.instance_eval{
146
157
  extend saxmodule
147
- alias :start_element_namespace :sax_start_element_namespace
148
- alias :start_element :sax_tag_start
158
+ alias :start_element_namespace :sax_start_element_namespace_nokogiri
159
+ def start_element(tag,attrs) sax_tag_start(tag,attrs.is_a?(Array) ? Hash[*attrs.flatten(1)] : attrs) end
149
160
  alias :end_element_namespace :sax_end_element_namespace
150
161
  alias :end_element :sax_tag_end
151
162
  alias :cdata_block :sax_cdata
@@ -166,8 +177,8 @@ module MultiSAX
166
177
  when :rexmlsax2
167
178
  listener.instance_eval{
168
179
  extend REXML::SAX2Listener
169
- def start_element(uri,tag,qname,attrs) sax_tag_start(tag,attrs) end
170
- def end_element(uri,tag,qname) sax_tag_end(tag) end
180
+ def start_element(uri,tag,qname,attrs) sax_tag_start(qname,attrs) end
181
+ def end_element(uri,tag,qname) sax_tag_end(qname) end
171
182
  alias :cdata :sax_cdata
172
183
  alias :characters :sax_text
173
184
  alias :comment :sax_comment
@@ -192,6 +203,7 @@ module MultiSAX
192
203
  if source.is_a?(String)
193
204
  case @parser
194
205
  when :ox then Ox.sax_parse(@listener,StringIO.new(source),:convert_special=>true)
206
+ when :oxhtml then Ox.sax_parse(@listener,StringIO.new(source),:convert_special=>true,:smart=>true)
195
207
  when :libxml then parser=LibXML::XML::SaxParser.string(source);parser.callbacks=@listener;parser.parse
196
208
  when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(@listener);parser.parse(source)
197
209
  when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(@listener);parser.parse(source)
@@ -201,9 +213,10 @@ module MultiSAX
201
213
  else
202
214
  case @parser
203
215
  when :ox then Ox.sax_parse(@listener,source,:convert_special=>true)
216
+ when :oxhtml then Ox.sax_parse(@listener,source,:convert_special=>true,:smart=>true)
204
217
  when :libxml then parser=LibXML::XML::SaxParser.io(source);parser.callbacks=@listener;parser.parse
205
218
  when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(@listener);parser.parse(source)
206
- when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(@listener);parser.parse(source)
219
+ when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(@listener);parser.parse(source.read) # fixme
207
220
  when :rexmlstream then REXML::Parsers::StreamParser.new(source,@listener).parse
208
221
  when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(@listener);parser.parse
209
222
  end
@@ -245,7 +258,7 @@ module MultiSAX
245
258
  module Callbacks
246
259
  # Cited from Nokogiri to convert Nokogiri::XML::SAX::Document into module.
247
260
  # https://github.com/sparklemotion/nokogiri/blob/master/lib/nokogiri/xml/sax/document.rb
248
- def sax_start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
261
+ def sax_start_element_namespace_nokogiri name, attrs = [], prefix = nil, uri = nil, ns = []
249
262
  # Deal with SAX v1 interface
250
263
  name = [prefix, name].compact.join(':')
251
264
  # modified in 0.0.2
@@ -257,6 +270,20 @@ module MultiSAX
257
270
  attributes[[attr.prefix, attr.localname].compact.join(':')]=attr.value
258
271
  }
259
272
  sax_tag_start name, attributes
273
+ end
274
+ # libxml namespace handler
275
+ def sax_start_element_namespace_libxml name, attrs, prefix = nil, uri = nil, ns = []
276
+ # Deal with SAX v1 interface
277
+ name = [prefix, name].compact.join(':')
278
+ # modified in 0.0.2
279
+ attributes = {}
280
+ ns.each{|ns_prefix,ns_uri|
281
+ attributes[['xmlns', ns_prefix].compact.join(':')]=ns_uri
282
+ }
283
+ attrs.each{|k,v|
284
+ attributes[k]=v
285
+ }
286
+ sax_tag_start name, attributes
260
287
  end
261
288
  # Cited from Nokogiri
262
289
  def sax_end_element_namespace name, prefix = nil, uri = nil
data/multisax.gemspec CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  ]
22
22
  spec.require_paths = ["lib"]
23
23
 
24
- spec.add_development_dependency "bundler"
24
+ spec.add_development_dependency "bundler", ">= 1.0"
25
25
  spec.add_development_dependency "rake"
26
26
  spec.add_development_dependency "rspec"
27
27
  end
@@ -8,7 +8,7 @@ class MultiSAXTester
8
8
  end
9
9
  def sax_tag_start(tag,attrs)
10
10
  @result<<tag
11
- @attrib=attrs['foo'] if tag=='sax'
11
+ @attrib=attrs['class'] if tag=='span'
12
12
  end
13
13
  def sax_tag_end(tag)
14
14
  @result<<tag
@@ -22,13 +22,16 @@ class MultiSAXTester
22
22
  end
23
23
  attr_reader :result,:attrib,:xmlencoding
24
24
  end
25
+
25
26
  input_xml=<<"EOM"
26
27
  <?xml version="1.0" encoding="UTF-8"?>
27
- <hello><sax foo="bar">world</sax></hello>
28
+ <ns xmlns:zzz="http://example.com/">
29
+ <zzz:hello><span class="foo">world</span></zzz:hello>
30
+ </ns>
28
31
  EOM
29
- answer=['hello','sax','world','sax','hello']
32
+ xml_answer=['ns','zzz:hello','span','world','span','zzz:hello','ns']
30
33
 
31
- describe "MultiSAX::Sax.parse (String)" do
34
+ describe "[XML] MultiSAX::Sax.parse(String)" do
32
35
  it "fails on :unknown" do
33
36
  MultiSAX::Sax.reset
34
37
  MultiSAX::Sax.open(:unknown).should be_false
@@ -38,8 +41,8 @@ describe "MultiSAX::Sax.parse (String)" do
38
41
  MultiSAX::Sax.open(:rexmlstream)
39
42
  MultiSAX::Sax.parser.should eq :rexmlstream
40
43
  listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
41
- listener.result.should eq answer
42
- listener.attrib.should eq 'bar'
44
+ listener.result.should eq xml_answer
45
+ listener.attrib.should eq 'foo'
43
46
  listener.xmlencoding.should eq 'UTF-8'
44
47
  end
45
48
  it "uses :rexmlsax2" do
@@ -47,8 +50,8 @@ describe "MultiSAX::Sax.parse (String)" do
47
50
  MultiSAX::Sax.open(:rexmlsax2)
48
51
  MultiSAX::Sax.parser.should eq :rexmlsax2
49
52
  listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
50
- listener.result.should eq answer
51
- listener.attrib.should eq 'bar'
53
+ listener.result.should eq xml_answer
54
+ listener.attrib.should eq 'foo'
52
55
  listener.xmlencoding.should eq 'UTF-8'
53
56
  end
54
57
  it "uses :ox" do
@@ -56,8 +59,8 @@ describe "MultiSAX::Sax.parse (String)" do
56
59
  MultiSAX::Sax.open(:ox)
57
60
  MultiSAX::Sax.parser.should eq :ox
58
61
  listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
59
- listener.result.should eq answer
60
- listener.attrib.should eq 'bar'
62
+ listener.result.should eq xml_answer
63
+ listener.attrib.should eq 'foo'
61
64
  listener.xmlencoding.should eq 'UTF-8'
62
65
  end
63
66
  it "uses :libxml" do
@@ -65,8 +68,8 @@ describe "MultiSAX::Sax.parse (String)" do
65
68
  MultiSAX::Sax.open(:libxml)
66
69
  MultiSAX::Sax.parser.should eq :libxml
67
70
  listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
68
- listener.result.should eq answer
69
- listener.attrib.should eq 'bar'
71
+ listener.result.should eq xml_answer
72
+ listener.attrib.should eq 'foo'
70
73
  #listener.xmlencoding.should eq 'UTF-8'
71
74
  end
72
75
  it "uses :nokogiri" do
@@ -74,20 +77,20 @@ describe "MultiSAX::Sax.parse (String)" do
74
77
  MultiSAX::Sax.open(:nokogiri)
75
78
  MultiSAX::Sax.parser.should eq :nokogiri
76
79
  listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
77
- listener.result.should eq answer
78
- listener.attrib.should eq 'bar'
80
+ listener.result.should eq xml_answer
81
+ listener.attrib.should eq 'foo'
79
82
  listener.xmlencoding.should eq 'UTF-8'
80
83
  end
81
84
  end
82
85
 
83
- describe "MultiSAX::Sax.parse (IO)" do
86
+ describe "[XML] MultiSAX::Sax.parse(IO)" do
84
87
  it "uses :rexmlstream" do
85
88
  MultiSAX::Sax.reset
86
89
  MultiSAX::Sax.open(:rexmlstream)
87
90
  MultiSAX::Sax.parser.should eq :rexmlstream
88
91
  listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
89
- listener.result.should eq answer
90
- listener.attrib.should eq 'bar'
92
+ listener.result.should eq xml_answer
93
+ listener.attrib.should eq 'foo'
91
94
  listener.xmlencoding.should eq 'UTF-8'
92
95
  end
93
96
  it "uses :rexmlsax2" do
@@ -95,8 +98,8 @@ describe "MultiSAX::Sax.parse (IO)" do
95
98
  MultiSAX::Sax.open(:rexmlsax2)
96
99
  MultiSAX::Sax.parser.should eq :rexmlsax2
97
100
  listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
98
- listener.result.should eq answer
99
- listener.attrib.should eq 'bar'
101
+ listener.result.should eq xml_answer
102
+ listener.attrib.should eq 'foo'
100
103
  listener.xmlencoding.should eq 'UTF-8'
101
104
  end
102
105
  it "uses :ox" do
@@ -104,8 +107,8 @@ describe "MultiSAX::Sax.parse (IO)" do
104
107
  MultiSAX::Sax.open(:ox)
105
108
  MultiSAX::Sax.parser.should eq :ox
106
109
  listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
107
- listener.result.should eq answer
108
- listener.attrib.should eq 'bar'
110
+ listener.result.should eq xml_answer
111
+ listener.attrib.should eq 'foo'
109
112
  listener.xmlencoding.should eq 'UTF-8'
110
113
  end
111
114
  it "uses :libxml" do
@@ -113,8 +116,8 @@ describe "MultiSAX::Sax.parse (IO)" do
113
116
  MultiSAX::Sax.open(:libxml)
114
117
  MultiSAX::Sax.parser.should eq :libxml
115
118
  listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
116
- listener.result.should eq answer
117
- listener.attrib.should eq 'bar'
119
+ listener.result.should eq xml_answer
120
+ listener.attrib.should eq 'foo'
118
121
  #listener.xmlencoding.should eq 'UTF-8'
119
122
  end
120
123
  it "uses :nokogiri" do
@@ -122,8 +125,56 @@ describe "MultiSAX::Sax.parse (IO)" do
122
125
  MultiSAX::Sax.open(:nokogiri)
123
126
  MultiSAX::Sax.parser.should eq :nokogiri
124
127
  listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
125
- listener.result.should eq answer
126
- listener.attrib.should eq 'bar'
128
+ listener.result.should eq xml_answer
129
+ listener.attrib.should eq 'foo'
127
130
  listener.xmlencoding.should eq 'UTF-8'
128
131
  end
129
132
  end
133
+
134
+ # broken intentionally
135
+ input_html=<<"EOM"
136
+ <html>
137
+ <body>
138
+ <span class="foo">hello
139
+ </body>
140
+ </html>
141
+ EOM
142
+ html_answer=['html','body','span','hello','span','body','html']
143
+
144
+ describe "[HTML] MultiSAX::Sax.parse(String)" do
145
+ it "uses :oxhtml" do
146
+ MultiSAX::Sax.reset
147
+ MultiSAX::Sax.open(:oxhtml)
148
+ MultiSAX::Sax.parser.should eq :oxhtml
149
+ listener=MultiSAX::Sax.parse(input_html,MultiSAXTester.new)
150
+ listener.result.should eq html_answer
151
+ listener.attrib.should eq 'foo'
152
+ end
153
+ it "uses :nokogirihtml" do
154
+ MultiSAX::Sax.reset
155
+ MultiSAX::Sax.open(:nokogirihtml)
156
+ MultiSAX::Sax.parser.should eq :nokogirihtml
157
+ listener=MultiSAX::Sax.parse(input_html,MultiSAXTester.new)
158
+ listener.result.should eq html_answer
159
+ listener.attrib.should eq 'foo'
160
+ end
161
+ end
162
+
163
+ describe "[HTML] MultiSAX::Sax.parse(IO)" do
164
+ it "uses :oxhtml" do
165
+ MultiSAX::Sax.reset
166
+ MultiSAX::Sax.open(:oxhtml)
167
+ MultiSAX::Sax.parser.should eq :oxhtml
168
+ listener=MultiSAX::Sax.parse(StringIO.new(input_html),MultiSAXTester.new)
169
+ listener.result.should eq html_answer
170
+ listener.attrib.should eq 'foo'
171
+ end
172
+ it "uses :nokogirihtml" do
173
+ MultiSAX::Sax.reset
174
+ MultiSAX::Sax.open(:nokogirihtml)
175
+ MultiSAX::Sax.parser.should eq :nokogirihtml
176
+ listener=MultiSAX::Sax.parse(StringIO.new(input_html),MultiSAXTester.new)
177
+ listener.result.should eq html_answer
178
+ listener.attrib.should eq 'foo'
179
+ end
180
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multisax
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - cielavenir
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '>='
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '1.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '>='
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '1.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement