multisax 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/.travis.yml +29 -0
- data/{CHANGELOG.rdoc → CHANGELOG.md} +13 -9
- data/Gemfile +21 -10
- data/Gemfile.lock +221 -5
- data/README.md +81 -0
- data/Rakefile +17 -12
- data/lib/multisax.rb +154 -180
- data/multisax.gemspec +8 -7
- data/spec/multisax_spec.rb +28 -4
- data/spec/spec_helper.rb +12 -11
- metadata +9 -10
- data/README.rdoc +0 -51
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 3f164d0aee6e3ddfd210daa0986a1544bf643e69
|
4
|
+
data.tar.gz: 2d70d700e08f2ee4a1908a2dd02c2c34efc2c25d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 57646607e1686e980ad616d5e39672d66a7e092903f54b5166ec14ed80f5f206786ee8749bf87464a5463c1bee4588510f24ad322ff7fb7cfcaf99bc54c4dbef
|
7
|
+
data.tar.gz: f587175f36daa320b3c24f06ce98edb72bbb7be01d53f4400d2ef11d9d7a2e89fc75ced8755a10723d635d887b7a0637c1fd1709095d45f7b15dd0892d99c0fc
|
data/.gitignore
CHANGED
data/.travis.yml
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
language: ruby
|
2
|
+
before_install:
|
3
|
+
- gem update --system 2.1.11
|
4
|
+
- gem --version
|
5
|
+
rvm:
|
6
|
+
- 1.8.7
|
7
|
+
- ree
|
8
|
+
- 1.9.2
|
9
|
+
- 1.9.3
|
10
|
+
- 2.0.0
|
11
|
+
- 2.1.0
|
12
|
+
- ruby-head
|
13
|
+
- jruby-18mode
|
14
|
+
- jruby-19mode
|
15
|
+
- jruby-20mode
|
16
|
+
- jruby-21mode
|
17
|
+
- jruby-head
|
18
|
+
- rbx-2.1.1
|
19
|
+
- rbx-2.2.3
|
20
|
+
matrix:
|
21
|
+
allow_failures:
|
22
|
+
- rvm: ruby-head
|
23
|
+
- rvm: jruby-18mode
|
24
|
+
- rvm: jruby-19mode
|
25
|
+
- rvm: jruby-20mode
|
26
|
+
- rvm: jruby-21mode
|
27
|
+
- rvm: jruby-head
|
28
|
+
- rvm: rbx-2.1.1
|
29
|
+
- rvm: rbx-2.2.3
|
@@ -1,13 +1,17 @@
|
|
1
|
-
|
1
|
+
# ChangeLog
|
2
2
|
|
3
|
-
|
3
|
+
## 0.0.4 (2014 Jan 13)
|
4
|
+
- Integrated travis. Now Ruby/Rubinius/jruby supports are assured.
|
5
|
+
- Added xmlparser (expat bindings) gem support.
|
6
|
+
|
7
|
+
## 0.0.3 (2013 Nov 14)
|
4
8
|
- Fixed namespace handling.
|
5
9
|
- Now you can also select :oxhtml to parse HTML.
|
6
10
|
- Added shortcut :XML and :HTML.
|
7
11
|
- sax_tag_start()'s attrs is assured to be a Hash.
|
8
12
|
- Refined spec.
|
9
13
|
|
10
|
-
|
14
|
+
## 0.0.2 (2013 Nov 13)
|
11
15
|
- Now you can create an instance of MultiSAX::SAX.
|
12
16
|
- Please note that passed class to MultiSAX is still modified directly.
|
13
17
|
- So only MultiSAX::SAX instances are thread-safe.
|
@@ -16,7 +20,7 @@
|
|
16
20
|
- Fixed attrs with Ox (now String is passed, not Symbol)
|
17
21
|
- Moved to Bundler rather than Jeweler.
|
18
22
|
|
19
|
-
|
23
|
+
## 0.0.1 (2013 Jul 8)
|
20
24
|
- Added ChangeLog.
|
21
25
|
- Added Ruby 1.8.7 support.
|
22
26
|
- Might work on lower version, but not guaranteed.
|
@@ -27,17 +31,17 @@
|
|
27
31
|
- Unless you directly specifies it, usually rexmlstream is selected.
|
28
32
|
- Added MultiSAX::Sax.parsefile().
|
29
33
|
|
30
|
-
|
34
|
+
## 0.0.0.5 (2013 Jun 21)
|
31
35
|
- Added a test case.
|
32
36
|
|
33
|
-
|
37
|
+
## 0.0.0.4 (2013 Jun 15)
|
34
38
|
- Provided rdoc.
|
35
39
|
|
36
|
-
|
40
|
+
## 0.0.0.3 (2013 Jun 14)
|
37
41
|
- Fixed @@parser getter.
|
38
42
|
|
39
|
-
|
43
|
+
## 0.0.0.2 (2013 Jun 14)
|
40
44
|
- Fixed Rakefile.
|
41
45
|
|
42
|
-
|
46
|
+
## 0.0.0.1 (2013 Jun 14)
|
43
47
|
- First release.
|
data/Gemfile
CHANGED
@@ -1,16 +1,27 @@
|
|
1
1
|
source "http://rubygems.org"
|
2
2
|
|
3
|
-
|
4
|
-
gem '
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
3
|
+
platforms :rbx do
|
4
|
+
gem 'rubysl'
|
5
|
+
gem 'racc' # nokogiri
|
6
|
+
end
|
7
|
+
|
8
|
+
#This clause is broken. So, please do not use bundle install if you are not developing multisax.
|
9
|
+
group :test do
|
10
|
+
#if RUBY_VERSION>='1.9'
|
11
|
+
# gem 'nokogiri'
|
12
|
+
#else
|
13
|
+
gem 'nokogiri', '~> 1.5.0'
|
14
|
+
#end
|
15
|
+
platforms :ruby do
|
16
|
+
gem 'libxml-ruby'
|
17
|
+
gem 'ox'
|
18
|
+
gem 'xmlparser'
|
9
19
|
end
|
10
|
-
gem 'ox', :require=>nil
|
11
20
|
end
|
12
21
|
|
13
|
-
group :development do
|
14
|
-
|
15
|
-
|
22
|
+
group :development, :test do
|
23
|
+
gem 'bundler', '>= 1.0'
|
24
|
+
gem 'rake'
|
25
|
+
gem 'rspec'
|
26
|
+
gem 'simplecov'
|
16
27
|
end
|
data/Gemfile.lock
CHANGED
@@ -2,11 +2,14 @@ GEM
|
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
4
|
diff-lcs (1.2.5)
|
5
|
+
docile (1.1.1)
|
6
|
+
ffi2-generators (0.1.1)
|
5
7
|
libxml-ruby (2.7.0)
|
6
|
-
|
7
|
-
nokogiri (1.
|
8
|
-
|
9
|
-
|
8
|
+
multi_json (1.8.2)
|
9
|
+
nokogiri (1.5.11)
|
10
|
+
ox (2.0.12)
|
11
|
+
racc (1.4.10)
|
12
|
+
rake (10.1.1)
|
10
13
|
rspec (2.14.1)
|
11
14
|
rspec-core (~> 2.14.0)
|
12
15
|
rspec-expectations (~> 2.14.0)
|
@@ -15,6 +18,214 @@ GEM
|
|
15
18
|
rspec-expectations (2.14.4)
|
16
19
|
diff-lcs (>= 1.1.3, < 2.0)
|
17
20
|
rspec-mocks (2.14.4)
|
21
|
+
rubysl (2.0.15)
|
22
|
+
rubysl-abbrev (~> 2.0)
|
23
|
+
rubysl-base64 (~> 2.0)
|
24
|
+
rubysl-benchmark (~> 2.0)
|
25
|
+
rubysl-bigdecimal (~> 2.0)
|
26
|
+
rubysl-cgi (~> 2.0)
|
27
|
+
rubysl-cgi-session (~> 2.0)
|
28
|
+
rubysl-cmath (~> 2.0)
|
29
|
+
rubysl-complex (~> 2.0)
|
30
|
+
rubysl-continuation (~> 2.0)
|
31
|
+
rubysl-coverage (~> 2.0)
|
32
|
+
rubysl-csv (~> 2.0)
|
33
|
+
rubysl-curses (~> 2.0)
|
34
|
+
rubysl-date (~> 2.0)
|
35
|
+
rubysl-delegate (~> 2.0)
|
36
|
+
rubysl-digest (~> 2.0)
|
37
|
+
rubysl-drb (~> 2.0)
|
38
|
+
rubysl-e2mmap (~> 2.0)
|
39
|
+
rubysl-english (~> 2.0)
|
40
|
+
rubysl-enumerator (~> 2.0)
|
41
|
+
rubysl-erb (~> 2.0)
|
42
|
+
rubysl-etc (~> 2.0)
|
43
|
+
rubysl-expect (~> 2.0)
|
44
|
+
rubysl-fcntl (~> 2.0)
|
45
|
+
rubysl-fiber (~> 2.0)
|
46
|
+
rubysl-fileutils (~> 2.0)
|
47
|
+
rubysl-find (~> 2.0)
|
48
|
+
rubysl-forwardable (~> 2.0)
|
49
|
+
rubysl-getoptlong (~> 2.0)
|
50
|
+
rubysl-gserver (~> 2.0)
|
51
|
+
rubysl-io-console (~> 2.0)
|
52
|
+
rubysl-io-nonblock (~> 2.0)
|
53
|
+
rubysl-io-wait (~> 2.0)
|
54
|
+
rubysl-ipaddr (~> 2.0)
|
55
|
+
rubysl-irb (~> 2.0)
|
56
|
+
rubysl-logger (~> 2.0)
|
57
|
+
rubysl-mathn (~> 2.0)
|
58
|
+
rubysl-matrix (~> 2.0)
|
59
|
+
rubysl-mkmf (~> 2.0)
|
60
|
+
rubysl-monitor (~> 2.0)
|
61
|
+
rubysl-mutex_m (~> 2.0)
|
62
|
+
rubysl-net-ftp (~> 2.0)
|
63
|
+
rubysl-net-http (~> 2.0)
|
64
|
+
rubysl-net-imap (~> 2.0)
|
65
|
+
rubysl-net-pop (~> 2.0)
|
66
|
+
rubysl-net-protocol (~> 2.0)
|
67
|
+
rubysl-net-smtp (~> 2.0)
|
68
|
+
rubysl-net-telnet (~> 2.0)
|
69
|
+
rubysl-nkf (~> 2.0)
|
70
|
+
rubysl-observer (~> 2.0)
|
71
|
+
rubysl-open-uri (~> 2.0)
|
72
|
+
rubysl-open3 (~> 2.0)
|
73
|
+
rubysl-openssl (~> 2.0)
|
74
|
+
rubysl-optparse (~> 2.0)
|
75
|
+
rubysl-ostruct (~> 2.0)
|
76
|
+
rubysl-pathname (~> 2.0)
|
77
|
+
rubysl-prettyprint (~> 2.0)
|
78
|
+
rubysl-prime (~> 2.0)
|
79
|
+
rubysl-profile (~> 2.0)
|
80
|
+
rubysl-profiler (~> 2.0)
|
81
|
+
rubysl-pstore (~> 2.0)
|
82
|
+
rubysl-pty (~> 2.0)
|
83
|
+
rubysl-rational (~> 2.0)
|
84
|
+
rubysl-readline (~> 2.0)
|
85
|
+
rubysl-resolv (~> 2.0)
|
86
|
+
rubysl-rexml (~> 2.0)
|
87
|
+
rubysl-rinda (~> 2.0)
|
88
|
+
rubysl-rss (~> 2.0)
|
89
|
+
rubysl-scanf (~> 2.0)
|
90
|
+
rubysl-securerandom (~> 2.0)
|
91
|
+
rubysl-set (~> 2.0)
|
92
|
+
rubysl-shellwords (~> 2.0)
|
93
|
+
rubysl-singleton (~> 2.0)
|
94
|
+
rubysl-socket (~> 2.0)
|
95
|
+
rubysl-stringio (~> 2.0)
|
96
|
+
rubysl-strscan (~> 2.0)
|
97
|
+
rubysl-sync (~> 2.0)
|
98
|
+
rubysl-syslog (~> 2.0)
|
99
|
+
rubysl-tempfile (~> 2.0)
|
100
|
+
rubysl-thread (~> 2.0)
|
101
|
+
rubysl-thwait (~> 2.0)
|
102
|
+
rubysl-time (~> 2.0)
|
103
|
+
rubysl-timeout (~> 2.0)
|
104
|
+
rubysl-tmpdir (~> 2.0)
|
105
|
+
rubysl-tsort (~> 2.0)
|
106
|
+
rubysl-un (~> 2.0)
|
107
|
+
rubysl-uri (~> 2.0)
|
108
|
+
rubysl-weakref (~> 2.0)
|
109
|
+
rubysl-webrick (~> 2.0)
|
110
|
+
rubysl-xmlrpc (~> 2.0)
|
111
|
+
rubysl-yaml (~> 2.0)
|
112
|
+
rubysl-zlib (~> 2.0)
|
113
|
+
rubysl-abbrev (2.0.4)
|
114
|
+
rubysl-base64 (2.0.0)
|
115
|
+
rubysl-benchmark (2.0.1)
|
116
|
+
rubysl-bigdecimal (2.0.2)
|
117
|
+
rubysl-cgi (2.0.1)
|
118
|
+
rubysl-cgi-session (2.0.1)
|
119
|
+
rubysl-cmath (2.0.0)
|
120
|
+
rubysl-complex (2.0.0)
|
121
|
+
rubysl-continuation (2.0.0)
|
122
|
+
rubysl-coverage (2.0.3)
|
123
|
+
rubysl-csv (2.0.2)
|
124
|
+
rubysl-english (~> 2.0)
|
125
|
+
rubysl-curses (2.0.1)
|
126
|
+
rubysl-date (2.0.6)
|
127
|
+
rubysl-delegate (2.0.1)
|
128
|
+
rubysl-digest (2.0.3)
|
129
|
+
rubysl-drb (2.0.1)
|
130
|
+
rubysl-e2mmap (2.0.0)
|
131
|
+
rubysl-english (2.0.0)
|
132
|
+
rubysl-enumerator (2.0.0)
|
133
|
+
rubysl-erb (2.0.1)
|
134
|
+
rubysl-etc (2.0.3)
|
135
|
+
ffi2-generators (~> 0.1)
|
136
|
+
rubysl-expect (2.0.0)
|
137
|
+
rubysl-fcntl (2.0.4)
|
138
|
+
ffi2-generators (~> 0.1)
|
139
|
+
rubysl-fiber (2.0.0)
|
140
|
+
rubysl-fileutils (2.0.3)
|
141
|
+
rubysl-find (2.0.1)
|
142
|
+
rubysl-forwardable (2.0.1)
|
143
|
+
rubysl-getoptlong (2.0.0)
|
144
|
+
rubysl-gserver (2.0.0)
|
145
|
+
rubysl-socket (~> 2.0)
|
146
|
+
rubysl-thread (~> 2.0)
|
147
|
+
rubysl-io-console (2.0.0)
|
148
|
+
rubysl-io-nonblock (2.0.0)
|
149
|
+
rubysl-io-wait (2.0.0)
|
150
|
+
rubysl-ipaddr (2.0.0)
|
151
|
+
rubysl-irb (2.0.4)
|
152
|
+
rubysl-e2mmap (~> 2.0)
|
153
|
+
rubysl-mathn (~> 2.0)
|
154
|
+
rubysl-readline (~> 2.0)
|
155
|
+
rubysl-thread (~> 2.0)
|
156
|
+
rubysl-logger (2.0.0)
|
157
|
+
rubysl-mathn (2.0.0)
|
158
|
+
rubysl-matrix (2.1.0)
|
159
|
+
rubysl-e2mmap (~> 2.0)
|
160
|
+
rubysl-mkmf (2.0.1)
|
161
|
+
rubysl-fileutils (~> 2.0)
|
162
|
+
rubysl-shellwords (~> 2.0)
|
163
|
+
rubysl-monitor (2.0.0)
|
164
|
+
rubysl-mutex_m (2.0.0)
|
165
|
+
rubysl-net-ftp (2.0.1)
|
166
|
+
rubysl-net-http (2.0.4)
|
167
|
+
rubysl-cgi (~> 2.0)
|
168
|
+
rubysl-erb (~> 2.0)
|
169
|
+
rubysl-singleton (~> 2.0)
|
170
|
+
rubysl-net-imap (2.0.1)
|
171
|
+
rubysl-net-pop (2.0.1)
|
172
|
+
rubysl-net-protocol (2.0.1)
|
173
|
+
rubysl-net-smtp (2.0.1)
|
174
|
+
rubysl-net-telnet (2.0.0)
|
175
|
+
rubysl-nkf (2.0.1)
|
176
|
+
rubysl-observer (2.0.0)
|
177
|
+
rubysl-open-uri (2.0.0)
|
178
|
+
rubysl-open3 (2.0.0)
|
179
|
+
rubysl-openssl (2.0.5)
|
180
|
+
rubysl-optparse (2.0.1)
|
181
|
+
rubysl-shellwords (~> 2.0)
|
182
|
+
rubysl-ostruct (2.0.4)
|
183
|
+
rubysl-pathname (2.0.0)
|
184
|
+
rubysl-prettyprint (2.0.2)
|
185
|
+
rubysl-prime (2.0.1)
|
186
|
+
rubysl-profile (2.0.0)
|
187
|
+
rubysl-profiler (2.0.1)
|
188
|
+
rubysl-pstore (2.0.0)
|
189
|
+
rubysl-pty (2.0.2)
|
190
|
+
rubysl-rational (2.0.1)
|
191
|
+
rubysl-readline (2.0.2)
|
192
|
+
rubysl-resolv (2.0.0)
|
193
|
+
rubysl-rexml (2.0.2)
|
194
|
+
rubysl-rinda (2.0.0)
|
195
|
+
rubysl-rss (2.0.0)
|
196
|
+
rubysl-scanf (2.0.0)
|
197
|
+
rubysl-securerandom (2.0.0)
|
198
|
+
rubysl-set (2.0.1)
|
199
|
+
rubysl-shellwords (2.0.0)
|
200
|
+
rubysl-singleton (2.0.0)
|
201
|
+
rubysl-socket (2.0.1)
|
202
|
+
rubysl-stringio (2.0.0)
|
203
|
+
rubysl-strscan (2.0.0)
|
204
|
+
rubysl-sync (2.0.0)
|
205
|
+
rubysl-syslog (2.0.1)
|
206
|
+
ffi2-generators (~> 0.1)
|
207
|
+
rubysl-tempfile (2.0.1)
|
208
|
+
rubysl-thread (2.0.2)
|
209
|
+
rubysl-thwait (2.0.0)
|
210
|
+
rubysl-time (2.0.3)
|
211
|
+
rubysl-timeout (2.0.0)
|
212
|
+
rubysl-tmpdir (2.0.0)
|
213
|
+
rubysl-tsort (2.0.1)
|
214
|
+
rubysl-un (2.0.0)
|
215
|
+
rubysl-fileutils (~> 2.0)
|
216
|
+
rubysl-optparse (~> 2.0)
|
217
|
+
rubysl-uri (2.0.0)
|
218
|
+
rubysl-weakref (2.0.0)
|
219
|
+
rubysl-webrick (2.0.0)
|
220
|
+
rubysl-xmlrpc (2.0.0)
|
221
|
+
rubysl-yaml (2.0.4)
|
222
|
+
rubysl-zlib (2.0.1)
|
223
|
+
simplecov (0.8.2)
|
224
|
+
docile (~> 1.1.0)
|
225
|
+
multi_json
|
226
|
+
simplecov-html (~> 0.8.0)
|
227
|
+
simplecov-html (0.8.0)
|
228
|
+
xmlparser (0.7.2.1)
|
18
229
|
|
19
230
|
PLATFORMS
|
20
231
|
ruby
|
@@ -22,6 +233,11 @@ PLATFORMS
|
|
22
233
|
DEPENDENCIES
|
23
234
|
bundler (>= 1.0)
|
24
235
|
libxml-ruby
|
25
|
-
nokogiri
|
236
|
+
nokogiri (~> 1.5.0)
|
26
237
|
ox
|
238
|
+
racc
|
239
|
+
rake
|
27
240
|
rspec
|
241
|
+
rubysl
|
242
|
+
simplecov
|
243
|
+
xmlparser
|
data/README.md
ADDED
@@ -0,0 +1,81 @@
|
|
1
|
+
# multisax
|
2
|
+
A module which allows multiple SAX library transparently.
|
3
|
+
|
4
|
+
[![Build Status](https://travis-ci.org/cielavenir/multisax.png)](https://travis-ci.org/cielavenir/multisax) [![Code Climate](https://codeclimate.com/github/cielavenir/multisax.png)](https://codeclimate.com/github/cielavenir/multisax)
|
5
|
+
|
6
|
+
## Supported Ruby versions
|
7
|
+
* Ruby 1.8.7 or later
|
8
|
+
* jruby
|
9
|
+
* libxml/ox/xmlparser are not available (they use C ext).
|
10
|
+
* rubinius 2.1.1 or 2.2.3 (or later, maybe)
|
11
|
+
* 2.2.1 and 2.2.2 are not working.
|
12
|
+
* xmlparser is not available.
|
13
|
+
* (Possibly) ironruby / macruby / topaz etc
|
14
|
+
|
15
|
+
## Binary distribution
|
16
|
+
* https://rubygems.org/gems/multisax
|
17
|
+
|
18
|
+
## (Embeddable) Minimalistic Edition
|
19
|
+
* https://gist.github.com/cielavenir/7691221
|
20
|
+
* multisax_mini.rb: :libxml/:rexmlstream/:rexmlsax2
|
21
|
+
* multisax_mini2.rb: :libxml/:rexmlstream
|
22
|
+
|
23
|
+
## Benchmark
|
24
|
+
* https://gist.github.com/cielavenir/8398647
|
25
|
+
|
26
|
+
## Install
|
27
|
+
* gem install multisax
|
28
|
+
* Optional XML libraries:
|
29
|
+
* gem install ox
|
30
|
+
* gem install libxml-ruby
|
31
|
+
* gem install nokogiri
|
32
|
+
* gem install xmlparser
|
33
|
+
* to treat cdata, I recommend expat 2.x.
|
34
|
+
* For example, OSX MacPorts users should:
|
35
|
+
* gem install xmlparser --with-expat-include=/opt/local/include --with-expat-lib=/opt/local/lib
|
36
|
+
|
37
|
+
## Usage
|
38
|
+
* Please check spec/multisax.spec as an example.
|
39
|
+
* Complex usage:
|
40
|
+
|
41
|
+
```rb
|
42
|
+
require 'multisax'
|
43
|
+
listener=MultiSAX::Sax.parse(xml,Class.new{
|
44
|
+
include MultiSAX::Callbacks
|
45
|
+
def initialize
|
46
|
+
@content=Hash.new{|h,k|h[k]=[]}
|
47
|
+
@current_tag=[]
|
48
|
+
end
|
49
|
+
attr_reader :content
|
50
|
+
|
51
|
+
def sax_tag_start(tag,attrs)
|
52
|
+
@current_tag.push(tag)
|
53
|
+
end
|
54
|
+
def sax_tag_end(tag)
|
55
|
+
if (t=@current_tag.pop)!=tag then raise "xml is malformed /#{t}" end
|
56
|
+
end
|
57
|
+
def sax_cdata(text)
|
58
|
+
@content[@current_tag.last] << text
|
59
|
+
end
|
60
|
+
def sax_text(text)
|
61
|
+
text.strip!
|
62
|
+
@content[@current_tag.last] << text if text.size>0
|
63
|
+
end
|
64
|
+
def sax_comment(text)
|
65
|
+
end
|
66
|
+
}.new)
|
67
|
+
listener.content.each{...}
|
68
|
+
```
|
69
|
+
|
70
|
+
## Contributing to multisax
|
71
|
+
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
72
|
+
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
73
|
+
* Fork the project.
|
74
|
+
* Start a feature/bugfix branch.
|
75
|
+
* Commit and push until you are happy with your contribution.
|
76
|
+
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
77
|
+
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
78
|
+
|
79
|
+
## Copyright
|
80
|
+
Copyright (c) 2013 T. Yamada under Ruby License (2-clause BSDL or Artistic).
|
81
|
+
See LICENSE.txt for further details.
|
data/Rakefile
CHANGED
@@ -7,27 +7,32 @@ require 'rake'
|
|
7
7
|
# Clean up after gem building
|
8
8
|
require 'rake/clean'
|
9
9
|
CLEAN.include('pkg/*.gem')
|
10
|
+
CLOBBER.include('coverage')
|
10
11
|
|
11
12
|
require 'rspec/core'
|
12
13
|
require 'rspec/core/rake_task'
|
13
14
|
RSpec::Core::RakeTask.new(:spec) do |spec|
|
14
|
-
|
15
|
+
spec.pattern = FileList['spec/**/*_spec.rb']
|
15
16
|
end
|
16
17
|
|
17
18
|
RSpec::Core::RakeTask.new(:rcov) do |spec|
|
18
|
-
|
19
|
-
|
19
|
+
spec.pattern = 'spec/**/*_spec.rb'
|
20
|
+
spec.rcov = true
|
20
21
|
end
|
21
22
|
|
22
23
|
task :default => :spec
|
23
24
|
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
25
|
+
begin
|
26
|
+
require 'rdoc/task'
|
27
|
+
Rake::RDocTask.new do |rdoc|
|
28
|
+
rdoc.rdoc_dir = 'rdoc'
|
29
|
+
rdoc.title = 'multisax '+MultiSAX::VERSION
|
30
|
+
rdoc.main = 'README.md'
|
31
|
+
rdoc.rdoc_files.include('README.*')
|
32
|
+
rdoc.rdoc_files.include('LICENSE.*')
|
33
|
+
rdoc.rdoc_files.include('CHANGELOG.*')
|
34
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
35
|
+
end
|
36
|
+
rescue Exception #LoadError # Thus rdoc generation is limited to Ruby 1.9.3+...
|
37
|
+
#Ruby 1.9.2 gives another Exception. I cannot limit to LoadError...
|
33
38
|
end
|
data/lib/multisax.rb
CHANGED
@@ -8,48 +8,93 @@
|
|
8
8
|
|
9
9
|
module MultiSAX
|
10
10
|
# VERSION string
|
11
|
-
VERSION='0.0.
|
11
|
+
VERSION='0.0.4'
|
12
12
|
|
13
13
|
# The class to handle XML libraries.
|
14
14
|
class SAX
|
15
|
-
|
16
|
-
|
15
|
+
# constructor
|
16
|
+
def initialize(*list)
|
17
|
+
@parser=nil
|
18
|
+
open(*list)
|
19
|
+
end
|
17
20
|
# Library loader.
|
18
21
|
# Arguments are list (or Array) of libraries.
|
19
22
|
# if list is empty or :XML, the following are searched (order by speed):
|
20
|
-
# :ox, :libxml, :nokogiri, :rexmlstream, :rexmlsax2
|
23
|
+
# :ox, :libxml, :xmlparser, :nokogiri, :rexmlstream, :rexmlsax2
|
21
24
|
# if list is :HTML, the following are searched (order by speed):
|
22
25
|
# :oxhtml, :nokogirihtml
|
23
26
|
# You can also specify libraries individually.
|
24
27
|
# If multiple selected, MultiSAX will try the libraries one by one and use the first usable one.
|
25
28
|
def open(*list)
|
26
29
|
return @parser if @parser
|
27
|
-
list=[:ox,:libxml,:nokogiri,:rexmlstream,:rexmlsax2] if list.size==0||list==[:XML]
|
30
|
+
list=[:ox,:libxml,:xmlparser,:nokogiri,:rexmlstream,:rexmlsax2] if list.size==0||list==[:XML]
|
28
31
|
list=[:oxhtml,:nokogirihtml] if list==[:HTML]
|
29
32
|
list.each{|e_module|
|
30
33
|
case e_module
|
31
34
|
when :ox,:oxhtml
|
32
|
-
#next if RUBY_VERSION<'1.9'
|
33
35
|
begin
|
34
36
|
require 'ox'
|
35
37
|
require 'stringio' #this should be standard module.
|
36
38
|
rescue LoadError;next end
|
37
39
|
@parser=e_module
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
40
|
+
@saxhelper=Class.new(::Ox::Sax){
|
41
|
+
def __init__(obj)
|
42
|
+
@obj=obj
|
43
|
+
@saxwrapper_tag=nil
|
44
|
+
@saxwrapper_attr={}
|
45
|
+
self
|
46
|
+
end
|
47
|
+
def end_element(tag) @obj.sax_tag_end(tag.to_s) end
|
48
|
+
def cdata(txt) @obj.sax_cdata(txt) end
|
49
|
+
def text(txt) @obj.sax_text(txt) end
|
50
|
+
def comment(txt) @obj.sax_comment(txt) end
|
51
|
+
|
52
|
+
def start_element(tag)
|
53
|
+
if @after_error
|
54
|
+
@obj.sax_tag_start(tag.to_s,{})
|
55
|
+
@after_error=false
|
56
|
+
else
|
57
|
+
# I hope provided Listener's sax_tag_start will NOT be used elsewhere.
|
58
|
+
#alias :attrs_done :attrs_done_normal
|
59
|
+
@saxwrapper_tag=tag
|
60
|
+
@saxwrapper_attr={}
|
61
|
+
end
|
62
|
+
end
|
63
|
+
def attr(name,str)
|
64
|
+
@saxwrapper_attr[name.to_s]=str
|
65
|
+
end
|
66
|
+
def attrs_done_xmldecl
|
67
|
+
@obj.sax_xmldecl(@saxwrapper_attr['version'],@saxwrapper_attr['encoding'],@saxwrapper_attr['standalone'])
|
68
|
+
end
|
69
|
+
def attrs_done_normal
|
70
|
+
@obj.sax_tag_start(@saxwrapper_tag.to_s,@saxwrapper_attr)
|
71
|
+
end
|
72
|
+
def attrs_done
|
73
|
+
@saxwrapper_tag ? attrs_done_normal : attrs_done_xmldecl
|
74
|
+
end
|
75
|
+
def error(s,i,j) @after_error=true if s.end_with?('closed but not opened') end
|
46
76
|
}
|
47
77
|
break
|
48
78
|
when :libxml
|
49
79
|
begin
|
50
80
|
require 'libxml'
|
51
81
|
rescue LoadError;next end
|
52
|
-
@parser=e_module
|
82
|
+
@parser=e_module
|
83
|
+
@saxhelper=Class.new{
|
84
|
+
include ::LibXML::XML::SaxParser::Callbacks
|
85
|
+
def __init__(obj)
|
86
|
+
@obj=obj
|
87
|
+
self
|
88
|
+
end
|
89
|
+
def on_start_element(tag,attrs) @obj.sax_tag_start(tag,attrs) end
|
90
|
+
def on_end_element(tag) @obj.sax_tag_end(tag) end
|
91
|
+
def on_characters(txt) @obj.sax_text(txt) end
|
92
|
+
def on_cdata_block(txt) @obj.sax_cdata(txt) end
|
93
|
+
def on_comment(txt) @obj.sax_comment(txt) end
|
94
|
+
#actually unused
|
95
|
+
def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
|
96
|
+
}
|
97
|
+
break
|
53
98
|
when :nokogiri,:nokogirihtml
|
54
99
|
#nokogiri 1.5.x are supported on Ruby 1.8.7.
|
55
100
|
#next if RUBY_VERSION<'1.9'
|
@@ -57,138 +102,99 @@ module MultiSAX
|
|
57
102
|
require 'nokogiri'
|
58
103
|
rescue LoadError;next end
|
59
104
|
@parser=e_module
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
105
|
+
@saxhelper=Class.new(::Nokogiri::XML::SAX::Document){
|
106
|
+
def __init__(obj)
|
107
|
+
@obj=obj
|
108
|
+
self
|
109
|
+
end
|
110
|
+
def start_element(tag,attrs) @obj.sax_tag_start(tag,attrs.is_a?(Array) ? Hash[*attrs.flatten(1)] : attrs) end
|
111
|
+
def end_element(tag) @obj.sax_tag_end(tag) end
|
112
|
+
def characters(txt) @obj.sax_text(txt) end
|
113
|
+
def cdata_block(txt) @obj.sax_cdata(txt) end
|
114
|
+
def comment(txt) @obj.sax_comment(txt) end
|
115
|
+
def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
|
116
|
+
}
|
117
|
+
break
|
118
|
+
when :xmlparser
|
119
|
+
begin
|
120
|
+
require 'xml/saxdriver'
|
121
|
+
rescue LoadError;next end
|
122
|
+
@parser=e_module
|
123
|
+
@saxhelper=Class.new(::XML::Parser){
|
124
|
+
def __init__(obj)
|
125
|
+
@obj=obj
|
126
|
+
@cdata=false
|
127
|
+
self
|
128
|
+
end
|
129
|
+
def startElement(tag,attrs) @obj.sax_tag_start(tag,attrs) end
|
130
|
+
def endElement(tag) @obj.sax_tag_end(tag) end
|
131
|
+
def comment(txt) @obj.sax_comment(txt) end
|
132
|
+
def xmlDecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
|
133
|
+
def character(txt)
|
134
|
+
if @cdata
|
135
|
+
@obj.sax_cdata(txt)
|
136
|
+
else
|
137
|
+
@obj.sax_text(txt)
|
138
|
+
end
|
139
|
+
end
|
140
|
+
def startCdata
|
141
|
+
@cdata=true
|
142
|
+
end
|
143
|
+
def endCdata
|
144
|
+
@cdata=false
|
145
|
+
end
|
68
146
|
}
|
69
147
|
break
|
70
148
|
when :rexmlstream
|
71
149
|
begin
|
72
|
-
require 'rexml/
|
150
|
+
require 'rexml/parsers/baseparser'
|
73
151
|
require 'rexml/parsers/streamparser'
|
74
152
|
require 'rexml/streamlistener'
|
75
153
|
rescue LoadError;next end
|
76
|
-
@parser=e_module
|
154
|
+
@parser=e_module
|
155
|
+
@saxhelper=Class.new{
|
156
|
+
include ::REXML::StreamListener
|
157
|
+
def __init__(obj)
|
158
|
+
@obj=obj
|
159
|
+
self
|
160
|
+
end
|
161
|
+
def tag_start(tag,attrs) @obj.sax_tag_start(tag,attrs) end
|
162
|
+
def tag_end(tag) @obj.sax_tag_end(tag) end
|
163
|
+
def text(txt) @obj.sax_text(txt) end
|
164
|
+
def cdata(txt) @obj.sax_cdata(txt) end
|
165
|
+
def comment(txt) @obj.sax_comment(txt) end
|
166
|
+
def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
|
167
|
+
}
|
168
|
+
break
|
77
169
|
when :rexmlsax2
|
78
170
|
begin
|
79
|
-
require 'rexml/document'
|
80
171
|
require 'rexml/parsers/sax2parser'
|
81
172
|
require 'rexml/sax2listener'
|
82
173
|
rescue LoadError;next end
|
83
|
-
@parser=e_module
|
174
|
+
@parser=e_module
|
175
|
+
@saxhelper=Class.new{
|
176
|
+
include ::REXML::SAX2Listener
|
177
|
+
def __init__(obj)
|
178
|
+
@obj=obj
|
179
|
+
self
|
180
|
+
end
|
181
|
+
def start_element(uri,tag,qname,attrs) @obj.sax_tag_start(qname,attrs) end
|
182
|
+
def end_element(uri,tag,qname) @obj.sax_tag_end(qname) end
|
183
|
+
def characters(txt) @obj.sax_text(txt) end
|
184
|
+
def cdata(txt) @obj.sax_cdata(txt) end
|
185
|
+
def comment(txt) @obj.sax_comment(txt) end
|
186
|
+
def xmldecl(version,encoding,standalone) @obj.sax_xmldecl(version,encoding,standalone) end
|
187
|
+
}
|
188
|
+
break
|
84
189
|
end
|
85
190
|
}
|
86
191
|
return @parser
|
87
192
|
end
|
88
193
|
# Reset MultiSAX state so that you can re-open() another library.
|
89
|
-
def reset() @parser=nil
|
194
|
+
def reset() @parser=nil end
|
90
195
|
# Returns which module is actually chosen.
|
91
196
|
def parser() @parser end
|
92
197
|
|
93
|
-
private
|
94
|
-
#(private) Patches listener to accept library-specific APIs.
|
95
|
-
def method_mapping(listener)
|
96
|
-
#raise "MultiSAX::Sax open first" if !@parser
|
97
|
-
case @parser
|
98
|
-
when :ox,:oxhtml
|
99
|
-
saxmodule=@saxmodule
|
100
|
-
listener.instance_eval{
|
101
|
-
extend saxmodule
|
102
|
-
@saxwrapper_tag=nil
|
103
|
-
@saxwrapper_attr={}
|
104
|
-
def start_element(tag)
|
105
|
-
if @after_error
|
106
|
-
sax_tag_start(tag.to_s,{})
|
107
|
-
@after_error=false
|
108
|
-
else
|
109
|
-
# I hope provided Listener's sax_tag_start will NOT be used elsewhere.
|
110
|
-
#alias :attrs_done :attrs_done_normal
|
111
|
-
@saxwrapper_tag=tag
|
112
|
-
@saxwrapper_attr={}
|
113
|
-
end
|
114
|
-
end
|
115
|
-
# These "instance methods" are actually injected to listener class using instance_eval.
|
116
|
-
# i.e. not APIs. You cannot call these methods from outside.
|
117
|
-
def attr(name,str)
|
118
|
-
@saxwrapper_attr[name.to_s]=str
|
119
|
-
end
|
120
|
-
#--
|
121
|
-
#alias :attr_value :attr
|
122
|
-
#++
|
123
|
-
def attrs_done_xmldecl
|
124
|
-
sax_xmldecl(@saxwrapper_attr['version'],@saxwrapper_attr['encoding'],@saxwrapper_attr['standalone'])
|
125
|
-
end
|
126
|
-
def attrs_done_normal
|
127
|
-
sax_tag_start(@saxwrapper_tag.to_s,@saxwrapper_attr)
|
128
|
-
end
|
129
|
-
#alias :attrs_done :attrs_done_xmldecl
|
130
|
-
def attrs_done
|
131
|
-
@saxwrapper_tag ? attrs_done_normal : attrs_done_xmldecl
|
132
|
-
end
|
133
|
-
def error(s,i,j) @after_error=true if s.end_with?('closed but not opened') end
|
134
|
-
def end_element(tag) sax_tag_end(tag.to_s) end
|
135
|
-
alias :cdata :sax_cdata
|
136
|
-
alias :text :sax_text
|
137
|
-
#--
|
138
|
-
#alias :value :sax_text
|
139
|
-
#++
|
140
|
-
alias :comment :sax_comment
|
141
|
-
}
|
142
|
-
when :libxml
|
143
|
-
listener.instance_eval{
|
144
|
-
extend LibXML::XML::SaxParser::Callbacks
|
145
|
-
alias :on_start_element_ns :sax_start_element_namespace_libxml
|
146
|
-
#alias :on_start_element :sax_tag_start
|
147
|
-
alias :on_end_element_ns :sax_end_element_namespace
|
148
|
-
#alias :on_end_element :sax_tag_end
|
149
|
-
alias :on_cdata_block :sax_cdata
|
150
|
-
alias :on_characters :sax_text
|
151
|
-
alias :on_comment :sax_comment
|
152
|
-
#alias :xmldecl :sax_xmldecl
|
153
|
-
}
|
154
|
-
when :nokogiri,:nokogirihtml
|
155
|
-
saxmodule=@saxmodule
|
156
|
-
listener.instance_eval{
|
157
|
-
extend saxmodule
|
158
|
-
alias :start_element_namespace :sax_start_element_namespace_nokogiri
|
159
|
-
def start_element(tag,attrs) sax_tag_start(tag,attrs.is_a?(Array) ? Hash[*attrs.flatten(1)] : attrs) end
|
160
|
-
alias :end_element_namespace :sax_end_element_namespace
|
161
|
-
alias :end_element :sax_tag_end
|
162
|
-
alias :cdata_block :sax_cdata
|
163
|
-
alias :characters :sax_text
|
164
|
-
alias :comment :sax_comment
|
165
|
-
alias :xmldecl :sax_xmldecl
|
166
|
-
}
|
167
|
-
when :rexmlstream
|
168
|
-
listener.instance_eval{
|
169
|
-
extend REXML::StreamListener
|
170
|
-
alias :tag_start :sax_tag_start
|
171
|
-
alias :tag_end :sax_tag_end
|
172
|
-
alias :cdata :sax_cdata
|
173
|
-
alias :text :sax_text
|
174
|
-
alias :comment :sax_comment
|
175
|
-
alias :xmldecl :sax_xmldecl
|
176
|
-
}
|
177
|
-
when :rexmlsax2
|
178
|
-
listener.instance_eval{
|
179
|
-
extend REXML::SAX2Listener
|
180
|
-
def start_element(uri,tag,qname,attrs) sax_tag_start(qname,attrs) end
|
181
|
-
def end_element(uri,tag,qname) sax_tag_end(qname) end
|
182
|
-
alias :cdata :sax_cdata
|
183
|
-
alias :characters :sax_text
|
184
|
-
alias :comment :sax_comment
|
185
|
-
alias :xmldecl :sax_xmldecl
|
186
|
-
}
|
187
|
-
end
|
188
|
-
listener
|
189
|
-
end
|
190
|
-
|
191
|
-
public
|
192
198
|
# The main parsing method.
|
193
199
|
# Listener can be Class.new{include MultiSAX::Callbacks}.new. Returns the listener after SAX is applied.
|
194
200
|
# If you have not called open(), this will call it using default value (all libraries).
|
@@ -196,32 +202,34 @@ module MultiSAX
|
|
196
202
|
# SAX's listeners are usually modified destructively.
|
197
203
|
# So instances shouldn't be provided.
|
198
204
|
def parse(source,listener)
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
205
|
+
if !@parser && !open
|
206
|
+
raise "Failed to open SAX library. REXML, which is a standard Ruby module, might be also corrupted."
|
207
|
+
end
|
208
|
+
saxhelper=@saxhelper.new.__init__(listener)
|
203
209
|
if source.is_a?(String)
|
204
210
|
case @parser
|
205
|
-
when :ox then Ox.sax_parse(
|
206
|
-
when :oxhtml then Ox.sax_parse(
|
207
|
-
when :libxml then parser=LibXML::XML::SaxParser.string(source);parser.callbacks
|
208
|
-
when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(
|
209
|
-
when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(
|
210
|
-
when :
|
211
|
-
when :
|
211
|
+
when :ox then Ox.sax_parse(saxhelper,StringIO.new(source),:convert_special=>true)
|
212
|
+
when :oxhtml then Ox.sax_parse(saxhelper,StringIO.new(source),:convert_special=>true,:smart=>true)
|
213
|
+
when :libxml then parser=LibXML::XML::SaxParser.string(source);parser.callbacks=saxhelper;parser.parse
|
214
|
+
when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source)
|
215
|
+
when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source)
|
216
|
+
when :xmlparser then saxhelper.parse(source)
|
217
|
+
when :rexmlstream then REXML::Parsers::StreamParser.new(source,saxhelper).parse
|
218
|
+
when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse
|
212
219
|
end
|
213
220
|
else
|
214
221
|
case @parser
|
215
|
-
when :ox then Ox.sax_parse(
|
216
|
-
when :oxhtml then Ox.sax_parse(
|
217
|
-
when :libxml then parser=LibXML::XML::SaxParser.io(source);parser.callbacks
|
218
|
-
when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(
|
219
|
-
when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(
|
220
|
-
when :
|
221
|
-
when :
|
222
|
+
when :ox then Ox.sax_parse(saxhelper,source,:convert_special=>true)
|
223
|
+
when :oxhtml then Ox.sax_parse(saxhelper,source,:convert_special=>true,:smart=>true)
|
224
|
+
when :libxml then parser=LibXML::XML::SaxParser.io(source);parser.callbacks=saxhelper;parser.parse
|
225
|
+
when :nokogiri then parser=Nokogiri::XML::SAX::Parser.new(saxhelper);parser.parse(source)
|
226
|
+
when :nokogirihtml then parser=Nokogiri::HTML::SAX::Parser.new(saxhelper);parser.parse(source.read) # fixme: nokogirihtml IO doesn't allow errors.
|
227
|
+
when :xmlparser then saxhelper.parse(source)
|
228
|
+
when :rexmlstream then REXML::Parsers::StreamParser.new(source,saxhelper).parse
|
229
|
+
when :rexmlsax2 then parser=REXML::Parsers::SAX2Parser.new(source);parser.listen(saxhelper);parser.parse
|
222
230
|
end
|
223
231
|
end
|
224
|
-
|
232
|
+
listener
|
225
233
|
end
|
226
234
|
|
227
235
|
# Parses file as XML. Error handling might be changed in the future.
|
@@ -256,40 +264,6 @@ module MultiSAX
|
|
256
264
|
# MultiSAX callbacks.
|
257
265
|
# MultiSAX::SAX listener should include this module.
|
258
266
|
module Callbacks
|
259
|
-
# Cited from Nokogiri to convert Nokogiri::XML::SAX::Document into module.
|
260
|
-
# https://github.com/sparklemotion/nokogiri/blob/master/lib/nokogiri/xml/sax/document.rb
|
261
|
-
def sax_start_element_namespace_nokogiri name, attrs = [], prefix = nil, uri = nil, ns = []
|
262
|
-
# Deal with SAX v1 interface
|
263
|
-
name = [prefix, name].compact.join(':')
|
264
|
-
# modified in 0.0.2
|
265
|
-
attributes = {}
|
266
|
-
ns.each{|ns_prefix,ns_uri|
|
267
|
-
attributes[['xmlns', ns_prefix].compact.join(':')]=ns_uri
|
268
|
-
}
|
269
|
-
attrs.each{|attr|
|
270
|
-
attributes[[attr.prefix, attr.localname].compact.join(':')]=attr.value
|
271
|
-
}
|
272
|
-
sax_tag_start name, attributes
|
273
|
-
end
|
274
|
-
# libxml namespace handler
|
275
|
-
def sax_start_element_namespace_libxml name, attrs, prefix = nil, uri = nil, ns = []
|
276
|
-
# Deal with SAX v1 interface
|
277
|
-
name = [prefix, name].compact.join(':')
|
278
|
-
# modified in 0.0.2
|
279
|
-
attributes = {}
|
280
|
-
ns.each{|ns_prefix,ns_uri|
|
281
|
-
attributes[['xmlns', ns_prefix].compact.join(':')]=ns_uri
|
282
|
-
}
|
283
|
-
attrs.each{|k,v|
|
284
|
-
attributes[k]=v
|
285
|
-
}
|
286
|
-
sax_tag_start name, attributes
|
287
|
-
end
|
288
|
-
# Cited from Nokogiri
|
289
|
-
def sax_end_element_namespace name, prefix = nil, uri = nil
|
290
|
-
# Deal with SAX v1 interface
|
291
|
-
sax_tag_end [prefix, name].compact.join(':')
|
292
|
-
end
|
293
267
|
# Start of tag
|
294
268
|
def sax_tag_start(tag,attrs) end
|
295
269
|
# End of tag
|
data/multisax.gemspec
CHANGED
@@ -6,22 +6,23 @@ Gem::Specification.new do |spec|
|
|
6
6
|
spec.version = MultiSAX::VERSION
|
7
7
|
spec.authors = ["cielavenir"]
|
8
8
|
spec.email = ["cielartisan@gmail.com"]
|
9
|
-
spec.description = "Ruby Gem to handle multiple SAX libraries: ox/libxml/nokogiri/rexml"
|
9
|
+
spec.description = "Ruby Gem to handle multiple SAX libraries: ox/libxml/nokogiri/xmlparser(expat)/rexml"
|
10
10
|
spec.summary = "Ruby Gem to handle multiple SAX libraries"
|
11
11
|
spec.homepage = "http://github.com/cielavenir/multisax"
|
12
12
|
spec.license = "Ruby License (2-clause BSDL or Artistic)"
|
13
13
|
|
14
|
-
spec.files = `git ls-files`.split($/)
|
15
|
-
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
16
|
-
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
17
|
-
spec.extra_rdoc_files = [
|
14
|
+
spec.files = `git ls-files`.split($/) + [
|
18
15
|
"LICENSE.txt",
|
19
|
-
"README.
|
20
|
-
|
16
|
+
"README.md",
|
17
|
+
"CHANGELOG.md",
|
21
18
|
]
|
19
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
20
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
21
|
+
|
22
22
|
spec.require_paths = ["lib"]
|
23
23
|
|
24
24
|
spec.add_development_dependency "bundler", ">= 1.0"
|
25
25
|
spec.add_development_dependency "rake"
|
26
26
|
spec.add_development_dependency "rspec"
|
27
|
+
spec.requirements << "Optional dependencies: libxml-ruby, nokogiri, ox, xmlparser"
|
27
28
|
end
|
data/spec/multisax_spec.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
require File.expand_path(File.dirname(__FILE__)
|
1
|
+
require File.expand_path(File.dirname(__FILE__)+'/spec_helper')
|
2
2
|
|
3
3
|
class MultiSAXTester
|
4
4
|
include MultiSAX::Callbacks
|
@@ -17,6 +17,10 @@ class MultiSAXTester
|
|
17
17
|
text.strip!
|
18
18
|
@result<<text if text.size>0
|
19
19
|
end
|
20
|
+
def sax_cdata(text)
|
21
|
+
text.strip!
|
22
|
+
@result<<text if text.size>0
|
23
|
+
end
|
20
24
|
def sax_xmldecl(version,encoding,standalone)
|
21
25
|
@xmlencoding=encoding
|
22
26
|
end
|
@@ -26,10 +30,10 @@ end
|
|
26
30
|
input_xml=<<"EOM"
|
27
31
|
<?xml version="1.0" encoding="UTF-8"?>
|
28
32
|
<ns xmlns:zzz="http://example.com/">
|
29
|
-
<zzz:hello><span class="foo">world</span></zzz:hello>
|
33
|
+
<zzz:hello><![CDATA[sax$]]><span class="foo">world</span></zzz:hello>
|
30
34
|
</ns>
|
31
35
|
EOM
|
32
|
-
xml_answer=['ns','zzz:hello','span','world','span','zzz:hello','ns']
|
36
|
+
xml_answer=['ns','zzz:hello','sax$','span','world','span','zzz:hello','ns']
|
33
37
|
|
34
38
|
describe "[XML] MultiSAX::Sax.parse(String)" do
|
35
39
|
it "fails on :unknown" do
|
@@ -81,6 +85,15 @@ describe "[XML] MultiSAX::Sax.parse(String)" do
|
|
81
85
|
listener.attrib.should eq 'foo'
|
82
86
|
listener.xmlencoding.should eq 'UTF-8'
|
83
87
|
end
|
88
|
+
it "uses :xmlparser" do
|
89
|
+
MultiSAX::Sax.reset
|
90
|
+
MultiSAX::Sax.open(:xmlparser)
|
91
|
+
MultiSAX::Sax.parser.should eq :xmlparser
|
92
|
+
listener=MultiSAX::Sax.parse(input_xml,MultiSAXTester.new)
|
93
|
+
listener.result.should eq xml_answer
|
94
|
+
listener.attrib.should eq 'foo'
|
95
|
+
listener.xmlencoding.should eq 'UTF-8'
|
96
|
+
end
|
84
97
|
end
|
85
98
|
|
86
99
|
describe "[XML] MultiSAX::Sax.parse(IO)" do
|
@@ -129,17 +142,28 @@ describe "[XML] MultiSAX::Sax.parse(IO)" do
|
|
129
142
|
listener.attrib.should eq 'foo'
|
130
143
|
listener.xmlencoding.should eq 'UTF-8'
|
131
144
|
end
|
145
|
+
it "uses :xmlparser" do
|
146
|
+
MultiSAX::Sax.reset
|
147
|
+
MultiSAX::Sax.open(:xmlparser)
|
148
|
+
MultiSAX::Sax.parser.should eq :xmlparser
|
149
|
+
listener=MultiSAX::Sax.parse(StringIO.new(input_xml),MultiSAXTester.new)
|
150
|
+
listener.result.should eq xml_answer
|
151
|
+
listener.attrib.should eq 'foo'
|
152
|
+
listener.xmlencoding.should eq 'UTF-8'
|
153
|
+
end
|
132
154
|
end
|
133
155
|
|
134
156
|
# broken intentionally
|
157
|
+
# nokogiri-java wants head tag...
|
135
158
|
input_html=<<"EOM"
|
136
159
|
<html>
|
160
|
+
<head></head>
|
137
161
|
<body>
|
138
162
|
<span class="foo">hello
|
139
163
|
</body>
|
140
164
|
</html>
|
141
165
|
EOM
|
142
|
-
html_answer=['html','body','span','hello','span','body','html']
|
166
|
+
html_answer=['html','head','head','body','span','hello','span','body','html']
|
143
167
|
|
144
168
|
describe "[HTML] MultiSAX::Sax.parse(String)" do
|
145
169
|
it "uses :oxhtml" do
|
data/spec/spec_helper.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
-
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
3
1
|
require 'rspec'
|
4
|
-
|
5
|
-
|
2
|
+
RSpec.configure{|config|
|
3
|
+
config.color=true
|
4
|
+
}
|
6
5
|
|
7
|
-
|
6
|
+
require 'stringio'
|
7
|
+
if RUBY_VERSION<'1.9' #gee, :rexmlsax2 on Ruby 1.8 dislikes StringIO.
|
8
8
|
class StringIO
|
9
9
|
def stat
|
10
10
|
Class.new{
|
@@ -14,10 +14,11 @@ if RUBY_VERSION<'1.9' #gee, StringIO needs to be hacked.
|
|
14
14
|
end
|
15
15
|
end
|
16
16
|
|
17
|
-
|
18
|
-
|
19
|
-
|
17
|
+
if !defined?(RUBY_ENGINE)||RUBY_ENGINE=='ruby'
|
18
|
+
require 'simplecov'
|
19
|
+
SimpleCov.start do
|
20
|
+
add_filter 'spec'
|
21
|
+
end
|
22
|
+
end
|
20
23
|
|
21
|
-
|
22
|
-
config.color=true
|
23
|
-
}
|
24
|
+
require File.expand_path(File.dirname(__FILE__)+'/../lib/multisax')
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: multisax
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- cielavenir
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2014-01-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,22 +52,20 @@ dependencies:
|
|
52
52
|
- - '>='
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
-
description: 'Ruby Gem to handle multiple SAX libraries: ox/libxml/nokogiri/rexml'
|
55
|
+
description: 'Ruby Gem to handle multiple SAX libraries: ox/libxml/nokogiri/xmlparser(expat)/rexml'
|
56
56
|
email:
|
57
57
|
- cielartisan@gmail.com
|
58
58
|
executables: []
|
59
59
|
extensions: []
|
60
|
-
extra_rdoc_files:
|
61
|
-
- LICENSE.txt
|
62
|
-
- README.rdoc
|
63
|
-
- CHANGELOG.rdoc
|
60
|
+
extra_rdoc_files: []
|
64
61
|
files:
|
65
62
|
- .gitignore
|
66
|
-
-
|
63
|
+
- .travis.yml
|
64
|
+
- CHANGELOG.md
|
67
65
|
- Gemfile
|
68
66
|
- Gemfile.lock
|
69
67
|
- LICENSE.txt
|
70
|
-
- README.
|
68
|
+
- README.md
|
71
69
|
- Rakefile
|
72
70
|
- lib/multisax.rb
|
73
71
|
- multisax.gemspec
|
@@ -91,7 +89,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
91
89
|
- - '>='
|
92
90
|
- !ruby/object:Gem::Version
|
93
91
|
version: '0'
|
94
|
-
requirements:
|
92
|
+
requirements:
|
93
|
+
- 'Optional dependencies: libxml-ruby, nokogiri, ox, xmlparser'
|
95
94
|
rubyforge_project:
|
96
95
|
rubygems_version: 2.0.3
|
97
96
|
signing_key:
|
data/README.rdoc
DELETED
@@ -1,51 +0,0 @@
|
|
1
|
-
= multisax
|
2
|
-
|
3
|
-
== Install
|
4
|
-
* gem install multisax
|
5
|
-
* Optional XML libraries:
|
6
|
-
* gem install ox
|
7
|
-
* gem install libxml-ruby
|
8
|
-
* gem install nokogiri
|
9
|
-
|
10
|
-
== Usage
|
11
|
-
* Please check spec/multisax.spec as an example.
|
12
|
-
* Complex usage:
|
13
|
-
require 'multisax'
|
14
|
-
listener=MultiSAX::Sax.parse(xml,Class.new{
|
15
|
-
include MultiSAX::Callbacks
|
16
|
-
def initialize
|
17
|
-
@content=Hash.new{|h,k|h[k]=[]}
|
18
|
-
@current_tag=[]
|
19
|
-
end
|
20
|
-
attr_reader :content
|
21
|
-
|
22
|
-
def sax_tag_start(tag,attrs)
|
23
|
-
@current_tag.push(tag)
|
24
|
-
end
|
25
|
-
def sax_tag_end(tag)
|
26
|
-
if (t=@current_tag.pop)!=tag then raise "xml is malformed /#{t}" end
|
27
|
-
end
|
28
|
-
def sax_cdata(text)
|
29
|
-
@content[@current_tag.last] << text
|
30
|
-
end
|
31
|
-
def sax_text(text)
|
32
|
-
text.strip!
|
33
|
-
@content[@current_tag.last] << text if text.size>0
|
34
|
-
end
|
35
|
-
def sax_comment(text)
|
36
|
-
end
|
37
|
-
}.new)
|
38
|
-
listener.content.each{...}
|
39
|
-
|
40
|
-
== Contributing to multisax
|
41
|
-
* Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
|
42
|
-
* Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
|
43
|
-
* Fork the project.
|
44
|
-
* Start a feature/bugfix branch.
|
45
|
-
* Commit and push until you are happy with your contribution.
|
46
|
-
* Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
|
47
|
-
* Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
|
48
|
-
|
49
|
-
== Copyright
|
50
|
-
Copyright (c) 2013 T. Yamada under Ruby License (2-clause BSDL or Artistic).
|
51
|
-
See LICENSE.txt for further details.
|