multisax 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: faccd217b395949d18f4b29ac75f185a7f1976bb
4
- data.tar.gz: 24ecac5d9f3042ac03326efcb378886410d37edb
3
+ metadata.gz: 93782bad9e30774f1d5d05b206804d6ec7a981be
4
+ data.tar.gz: 1734d46e0312e9849fd6897aab0826c7225c8ff5
5
5
  SHA512:
6
- metadata.gz: 1e091e45e62d7c0f41015949dd97a196f37b604cc700e5a1d039a07d8e73b9e710f39c42342d3171f7f89edd150069312b6e3da021964212ffe65b0c48ff79aa
7
- data.tar.gz: 4cbbdb6fd438fbcfb5ddf62216d5b1269798835e1f8cd0ec75e5d7902290285934ec668177c35f0fa564922bb2de19c21241e8b8ec4e45b74fd89c3225156e69
6
+ metadata.gz: 3c4696780f1da115c91ca765f07c9a47d73221ef86e157f4aad2b3635b7a561d0fa3a1614da0a6118ab9fca99eb70444b835db970b0ea079d25c3cbe722fce21
7
+ data.tar.gz: 40df1e1c34be650f6526cc561e1535c35bc3fc2f5116b96fb8d046f68162ad2c143dd31bcc370a0b6d27f4fd550d8e35c88494dbbb80e5bda958149f5fbee6e0
data/CHANGELOG.rdoc CHANGED
@@ -1,5 +1,12 @@
1
1
  = ChangeLog
2
2
 
3
+ == 0.0.3 (2013 Nov 14)
4
+ - Fixed namespace handling.
5
+ - Now you can also select :oxhtml to parse HTML.
6
+ - Added shortcut :XML and :HTML.
7
+ - sax_tag_start()'s attrs is assured to be a Hash.
8
+ - Refined spec.
9
+
3
10
  == 0.0.2 (2013 Nov 13)
4
11
  - Now you can create an instance of MultiSAX::SAX.
5
12
  - Please note that passed class to MultiSAX is still modified directly.
@@ -7,6 +14,7 @@
7
14
  - MultiSAX::Sax is now an instance of MultiSAX::SAX (this is backward-compatible).
8
15
  - You can specify :nokogirihtml explicitly to parse HTML.
9
16
  - Fixed attrs with Ox (now String is passed, not Symbol)
17
+ - Moved to Bundler rather than Jeweler.
10
18
 
11
19
  == 0.0.1 (2013 Jul 8)
12
20
  - Added ChangeLog.
data/Gemfile CHANGED
@@ -11,6 +11,6 @@ group :optional do
11
11
  end
12
12
 
13
13
  group :development do
14
- gem "rspec"
15
- gem "bundler"
14
+ gem 'rspec'
15
+ gem 'bundler', '>= 1.0'
16
16
  end
data/lib/multisax.rb CHANGED
@@ -8,7 +8,7 @@
8
8
 
9
9
  module MultiSAX
10
10
  # VERSION string
11
- VERSION='0.0.2'
11
+ VERSION='0.0.3'
12
12
 
13
13
  # The class to handle XML libraries.
14
14
  class SAX
@@ -16,16 +16,19 @@ module MultiSAX
16
16
  @saxmodule=nil
17
17
  # Library loader.
18
18
  # Arguments are list (or Array) of libraries.
19
- # Currently the following are supported (order by speed):
19
+ # if list is empty or :XML, the following are searched (order by speed):
20
20
  # :ox, :libxml, :nokogiri, :rexmlstream, :rexmlsax2
21
+ # if list is :HTML, the following are searched (order by speed):
22
+ # :oxhtml, :nokogirihtml
23
+ # You can also specify libraries individually.
21
24
  # If multiple selected, MultiSAX will try the libraries one by one and use the first usable one.
22
- # For HTML, you can specify :nokogirihtml explicitly.
23
25
  def open(*list)
24
26
  return @parser if @parser
25
- list=[:ox,:libxml,:nokogiri,:rexmlstream,:rexmlsax2] if list.size==0
27
+ list=[:ox,:libxml,:nokogiri,:rexmlstream,:rexmlsax2] if list.size==0||list==[:XML]
28
+ list=[:oxhtml,:nokogirihtml] if list==[:HTML]
26
29
  list.each{|e_module|
27
30
  case e_module
28
- when :ox
31
+ when :ox,:oxhtml
29
32
  #next if RUBY_VERSION<'1.9'
30
33
  begin
31
34
  require 'ox'
@@ -92,17 +95,22 @@ module MultiSAX
92
95
  def method_mapping(listener)
93
96
  #raise "MultiSAX::Sax open first" if !@parser
94
97
  case @parser
95
- when :ox
98
+ when :ox,:oxhtml
96
99
  saxmodule=@saxmodule
97
100
  listener.instance_eval{
98
101
  extend saxmodule
99
102
  @saxwrapper_tag=nil
100
103
  @saxwrapper_attr={}
101
104
  def start_element(tag)
102
- # I hope provided Listener's sax_tag_start will NOT be used elsewhere.
103
- #alias :attrs_done :attrs_done_normal
104
- @saxwrapper_tag=tag
105
- @saxwrapper_attr={}
105
+ if @after_error
106
+ sax_tag_start(tag.to_s,{})
107
+ @after_error=false
108
+ else
109
+ # I hope provided Listener's sax_tag_start will NOT be used elsewhere.
110
+ #alias :attrs_done :attrs_done_normal
111
+ @saxwrapper_tag=tag
112
+ @saxwrapper_attr={}
113
+ end
106
114
  end
107
115
  # These "instance methods" are actually injected to listener class using instance_eval.
108
116
  # i.e. not APIs. You cannot call these methods from outside.
@@ -122,6 +130,7 @@ module MultiSAX
122
130
  def attrs_done
123
131
  @saxwrapper_tag ? attrs_done_normal : attrs_done_xmldecl
124
132
  end
133
+ def error(s,i,j) @after_error=true if s.end_with?('closed but not opened') end
125
134
  def end_element(tag) sax_tag_end(tag.to_s) end
126
135
  alias :cdata :sax_cdata
127
136
  alias :text :sax_text
@@ -133,8 +142,10 @@ module MultiSAX
133
142
  when :libxml
134
143
  listener.instance_eval{
135
144
  extend LibXML::XML::SaxParser::Callbacks
136
- alias :on_start_element :sax_tag_start
137
- alias :on_end_element :sax_tag_end
145
+ alias :on_start_element_ns :sax_start_element_namespace_libxml
146
+ #alias :on_start_element :sax_tag_start
147
+ alias :on_end_element_ns :sax_end_element_namespace
148
+ #alias :on_end_element :sax_tag_end
138
149
  alias :on_cdata_block :sax_cdata
139
150
  alias :on_characters :sax_text
140
151
  alias :on_comment :sax_comment
@@ -144,8 +155,8 @@ module MultiSAX
144
155
  saxmodule=@saxmodule
145
156
  listener.instance_eval{
146
157
  extend saxmodule
147
- alias :start_element_namespace :sax_start_element_namespace
148
- alias :start_element :sax_tag_start
158
+ alias :start_element_namespace :sax_start_element_namespace_nokogiri
159
+ def start_element(tag,attrs) sax_tag_start(tag,attrs.is_a?(Array) ? Hash[*attrs.flatten(1)] : attrs) end
149
160
  alias :end_element_namespace :sax_end_element_namespace
150
161
  alias :end_element :sax_tag_end
151
162
  alias :cdata_block :sax_cdata
@@ -166,8 +177,8 @@ module MultiSAX
166
177
  when :rexmlsax2
167
178
  listener.instance_eval{
168
179
  extend REXML::SAX2Listener
169
- def start_element(uri,tag,qname,attrs) sax_tag_start(tag,attrs) end
170
- def end_element(uri,tag,qname) sax_tag_end(tag) end
180
+ def start_element(uri,tag,qname,attrs) sax_tag_start(qname,attrs) end
181
+ def end_element(uri,tag,qname) sax_tag_end(qname) end
171
182
  alias :cdata :sax_cdata
172
183
  alias :characters :sax_text
173
184
  alias :comment :sax_comment
@@ -192,6 +203,7 @@ module MultiSAX
192
203
  if source.is_a?(String)
193
204
  case @parser
194
205
  when :ox then Ox.sax_parse(@listener,StringIO.new(source),:convert_special=>true)
206
+ when :oxhtml then Ox.sax_parse(@listener,StringIO.new(source),:convert_special=>true,:smart=>true)
195
207
  when :libxml then parser=LibXML::XML::SaxParser.string(source);parser.callbacks=@listener;parser.parse
196
208
  when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(@listener);parser.parse(source)
197
209
  when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(@listener);parser.parse(source)
@@ -201,9 +213,10 @@ module MultiSAX
201
213
  else
202
214
  case @parser
203
215
  when :ox then Ox.sax_parse(@listener,source,:convert_special=>true)
216
+ when :oxhtml then Ox.sax_parse(@listener,source,:convert_special=>true,:smart=>true)
204
217
  when :libxml then parser=LibXML::XML::SaxParser.io(source);parser.callbacks=@listener;parser.parse
205
218
  when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(@listener);parser.parse(source)
206
- when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(@listener);parser.parse(source)
219
+ when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(@listener);parser.parse(source.read) # fixme
207
220
  when :rexmlstream then REXML::Parsers::StreamParser.new(source,@listener).parse
208
221
  when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(@listener);parser.parse
209
222
  end
@@ -245,7 +258,7 @@ module MultiSAX
245
258
  module Callbacks
246
259
  # Cited from Nokogiri to convert Nokogiri::XML::SAX::Document into module.
247
260
  # https://github.com/sparklemotion/nokogiri/blob/master/lib/nokogiri/xml/sax/document.rb
248
- def sax_start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
261
+ def sax_start_element_namespace_nokogiri name, attrs = [], prefix = nil, uri = nil, ns = []
249
262
  # Deal with SAX v1 interface
250
263
  name = [prefix, name].compact.join(':')
251
264
  # modified in 0.0.2
@@ -257,6 +270,20 @@ module MultiSAX
257
270
  attributes[[attr.prefix, attr.localname].compact.join(':')]=attr.value
258
271
  }
259
272
  sax_tag_start name, attributes
273
+ end
274
+ # libxml namespace handler
275
+ def sax_start_element_namespace_libxml name, attrs, prefix = nil, uri = nil, ns = []
276
+ # Deal with SAX v1 interface
277
+ name = [prefix, name].compact.join(':')
278
+ # modified in 0.0.2
279
+ attributes = {}
280
+ ns.each{|ns_prefix,ns_uri|
281
+ attributes[['xmlns', ns_prefix].compact.join(':')]=ns_uri
282
+ }
283
+ attrs.each{|k,v|
284
+ attributes[k]=v
285
+ }
286
+ sax_tag_start name, attributes
260
287
  end
261
288
  # Cited from Nokogiri
262
289
  def sax_end_element_namespace name, prefix = nil, uri = nil
data/multisax.gemspec CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |spec|
21
21
  ]
22
22
  spec.require_paths = ["lib"]
23
23
 
24
- spec.add_development_dependency "bundler"
24
+ spec.add_development_dependency "bundler", ">= 1.0"
25
25
  spec.add_development_dependency "rake"
26
26
  spec.add_development_dependency "rspec"
27
27
  end
@@ -8,7 +8,7 @@ class MultiSAXTester
8
8
  end
9
9
  def sax_tag_start(tag,attrs)
10
10
  @result<<tag
11
- @attrib=attrs['foo'] if tag=='sax'
11
+ @attrib=attrs['class'] if tag=='span'
12
12
  end
13
13
  def sax_tag_end(tag)
14
14
  @result<<tag
@@ -22,13 +22,16 @@ class MultiSAXTester
22
22
  end
23
23
  attr_reader :result,:attrib,:xmlencoding
24
24
  end
25
+
25
26
  input_xml=<<"EOM"
26
27
  <?xml version="1.0" encoding="UTF-8"?>
27
- <hello><sax foo="bar">world</sax></hello>
28
+ <ns xmlns:zzz="http://example.com/">
29
+ <zzz:hello><span class="foo">world</span></zzz:hello>
30
+ </ns>
28
31
  EOM
29
- answer=['hello','sax','world','sax','hello']
32
+ xml_answer=['ns','zzz:hello','span','world','span','zzz:hello','ns']
30
33
 
31
- describe "MultiSAX::Sax.parse (String)" do
34
+ describe "[XML] MultiSAX::Sax.parse(String)" do
32
35
  it "fails on :unknown" do
33
36
  MultiSAX::Sax.reset
34
37
  MultiSAX::Sax.open(:unknown).should be_false
@@ -38,8 +41,8 @@ describe "MultiSAX::Sax.parse (String)" do
38
41
  MultiSAX::Sax.open(:rexmlstream)
39
42
  MultiSAX::Sax.parser.should eq :rexmlstream
40
43
  listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
41
- listener.result.should eq answer
42
- listener.attrib.should eq 'bar'
44
+ listener.result.should eq xml_answer
45
+ listener.attrib.should eq 'foo'
43
46
  listener.xmlencoding.should eq 'UTF-8'
44
47
  end
45
48
  it "uses :rexmlsax2" do
@@ -47,8 +50,8 @@ describe "MultiSAX::Sax.parse (String)" do
47
50
  MultiSAX::Sax.open(:rexmlsax2)
48
51
  MultiSAX::Sax.parser.should eq :rexmlsax2
49
52
  listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
50
- listener.result.should eq answer
51
- listener.attrib.should eq 'bar'
53
+ listener.result.should eq xml_answer
54
+ listener.attrib.should eq 'foo'
52
55
  listener.xmlencoding.should eq 'UTF-8'
53
56
  end
54
57
  it "uses :ox" do
@@ -56,8 +59,8 @@ describe "MultiSAX::Sax.parse (String)" do
56
59
  MultiSAX::Sax.open(:ox)
57
60
  MultiSAX::Sax.parser.should eq :ox
58
61
  listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
59
- listener.result.should eq answer
60
- listener.attrib.should eq 'bar'
62
+ listener.result.should eq xml_answer
63
+ listener.attrib.should eq 'foo'
61
64
  listener.xmlencoding.should eq 'UTF-8'
62
65
  end
63
66
  it "uses :libxml" do
@@ -65,8 +68,8 @@ describe "MultiSAX::Sax.parse (String)" do
65
68
  MultiSAX::Sax.open(:libxml)
66
69
  MultiSAX::Sax.parser.should eq :libxml
67
70
  listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
68
- listener.result.should eq answer
69
- listener.attrib.should eq 'bar'
71
+ listener.result.should eq xml_answer
72
+ listener.attrib.should eq 'foo'
70
73
  #listener.xmlencoding.should eq 'UTF-8'
71
74
  end
72
75
  it "uses :nokogiri" do
@@ -74,20 +77,20 @@ describe "MultiSAX::Sax.parse (String)" do
74
77
  MultiSAX::Sax.open(:nokogiri)
75
78
  MultiSAX::Sax.parser.should eq :nokogiri
76
79
  listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
77
- listener.result.should eq answer
78
- listener.attrib.should eq 'bar'
80
+ listener.result.should eq xml_answer
81
+ listener.attrib.should eq 'foo'
79
82
  listener.xmlencoding.should eq 'UTF-8'
80
83
  end
81
84
  end
82
85
 
83
- describe "MultiSAX::Sax.parse (IO)" do
86
+ describe "[XML] MultiSAX::Sax.parse(IO)" do
84
87
  it "uses :rexmlstream" do
85
88
  MultiSAX::Sax.reset
86
89
  MultiSAX::Sax.open(:rexmlstream)
87
90
  MultiSAX::Sax.parser.should eq :rexmlstream
88
91
  listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
89
- listener.result.should eq answer
90
- listener.attrib.should eq 'bar'
92
+ listener.result.should eq xml_answer
93
+ listener.attrib.should eq 'foo'
91
94
  listener.xmlencoding.should eq 'UTF-8'
92
95
  end
93
96
  it "uses :rexmlsax2" do
@@ -95,8 +98,8 @@ describe "MultiSAX::Sax.parse (IO)" do
95
98
  MultiSAX::Sax.open(:rexmlsax2)
96
99
  MultiSAX::Sax.parser.should eq :rexmlsax2
97
100
  listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
98
- listener.result.should eq answer
99
- listener.attrib.should eq 'bar'
101
+ listener.result.should eq xml_answer
102
+ listener.attrib.should eq 'foo'
100
103
  listener.xmlencoding.should eq 'UTF-8'
101
104
  end
102
105
  it "uses :ox" do
@@ -104,8 +107,8 @@ describe "MultiSAX::Sax.parse (IO)" do
104
107
  MultiSAX::Sax.open(:ox)
105
108
  MultiSAX::Sax.parser.should eq :ox
106
109
  listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
107
- listener.result.should eq answer
108
- listener.attrib.should eq 'bar'
110
+ listener.result.should eq xml_answer
111
+ listener.attrib.should eq 'foo'
109
112
  listener.xmlencoding.should eq 'UTF-8'
110
113
  end
111
114
  it "uses :libxml" do
@@ -113,8 +116,8 @@ describe "MultiSAX::Sax.parse (IO)" do
113
116
  MultiSAX::Sax.open(:libxml)
114
117
  MultiSAX::Sax.parser.should eq :libxml
115
118
  listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
116
- listener.result.should eq answer
117
- listener.attrib.should eq 'bar'
119
+ listener.result.should eq xml_answer
120
+ listener.attrib.should eq 'foo'
118
121
  #listener.xmlencoding.should eq 'UTF-8'
119
122
  end
120
123
  it "uses :nokogiri" do
@@ -122,8 +125,56 @@ describe "MultiSAX::Sax.parse (IO)" do
122
125
  MultiSAX::Sax.open(:nokogiri)
123
126
  MultiSAX::Sax.parser.should eq :nokogiri
124
127
  listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
125
- listener.result.should eq answer
126
- listener.attrib.should eq 'bar'
128
+ listener.result.should eq xml_answer
129
+ listener.attrib.should eq 'foo'
127
130
  listener.xmlencoding.should eq 'UTF-8'
128
131
  end
129
132
  end
133
+
134
+ # broken intentionally
135
+ input_html=<<"EOM"
136
+ <html>
137
+ <body>
138
+ <span class="foo">hello
139
+ </body>
140
+ </html>
141
+ EOM
142
+ html_answer=['html','body','span','hello','span','body','html']
143
+
144
+ describe "[HTML] MultiSAX::Sax.parse(String)" do
145
+ it "uses :oxhtml" do
146
+ MultiSAX::Sax.reset
147
+ MultiSAX::Sax.open(:oxhtml)
148
+ MultiSAX::Sax.parser.should eq :oxhtml
149
+ listener=MultiSAX::Sax.parse(input_html,MultiSAXTester.new)
150
+ listener.result.should eq html_answer
151
+ listener.attrib.should eq 'foo'
152
+ end
153
+ it "uses :nokogirihtml" do
154
+ MultiSAX::Sax.reset
155
+ MultiSAX::Sax.open(:nokogirihtml)
156
+ MultiSAX::Sax.parser.should eq :nokogirihtml
157
+ listener=MultiSAX::Sax.parse(input_html,MultiSAXTester.new)
158
+ listener.result.should eq html_answer
159
+ listener.attrib.should eq 'foo'
160
+ end
161
+ end
162
+
163
+ describe "[HTML] MultiSAX::Sax.parse(IO)" do
164
+ it "uses :oxhtml" do
165
+ MultiSAX::Sax.reset
166
+ MultiSAX::Sax.open(:oxhtml)
167
+ MultiSAX::Sax.parser.should eq :oxhtml
168
+ listener=MultiSAX::Sax.parse(StringIO.new(input_html),MultiSAXTester.new)
169
+ listener.result.should eq html_answer
170
+ listener.attrib.should eq 'foo'
171
+ end
172
+ it "uses :nokogirihtml" do
173
+ MultiSAX::Sax.reset
174
+ MultiSAX::Sax.open(:nokogirihtml)
175
+ MultiSAX::Sax.parser.should eq :nokogirihtml
176
+ listener=MultiSAX::Sax.parse(StringIO.new(input_html),MultiSAXTester.new)
177
+ listener.result.should eq html_answer
178
+ listener.attrib.should eq 'foo'
179
+ end
180
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: multisax
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - cielavenir
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - '>='
18
18
  - !ruby/object:Gem::Version
19
- version: '0'
19
+ version: '1.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - '>='
25
25
  - !ruby/object:Gem::Version
26
- version: '0'
26
+ version: '1.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement