yahoo_jp_transit 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.txt +9 -2
- data/lib/yahoo_jp_transit.rb +141 -25
- metadata +3 -3
data/README.txt
CHANGED
@@ -4,14 +4,21 @@ Copyright (c) 2007 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
|
|
4
4
|
|
5
5
|
== Example
|
6
6
|
|
7
|
+
$KCODE = 's'
|
7
8
|
require 'yahoo_jp_transit'
|
8
9
|
|
9
10
|
searcher = YahooJpTransit::Searcher.new
|
10
|
-
|
11
|
+
|
12
|
+
selected = searcher.select('大宮', '東京')
|
13
|
+
puts selected
|
14
|
+
|
15
|
+
from = selected.from[0]
|
16
|
+
to = selected.to[0]
|
17
|
+
|
18
|
+
result = searcher.query(from, to)
|
11
19
|
|
12
20
|
exit if result.empty?
|
13
21
|
|
14
22
|
begin
|
15
23
|
puts result
|
16
24
|
end while result = result.next_pagerequire
|
17
|
-
|
data/lib/yahoo_jp_transit.rb
CHANGED
@@ -5,10 +5,18 @@
|
|
5
5
|
#
|
6
6
|
# == Example
|
7
7
|
#
|
8
|
+
# $KCODE = 's'
|
8
9
|
# require 'yahoo_jp_transit'
|
9
10
|
#
|
10
11
|
# searcher = YahooJpTransit::Searcher.new
|
11
|
-
#
|
12
|
+
#
|
13
|
+
# selected = searcher.select('大宮', '東京')
|
14
|
+
# puts selected
|
15
|
+
#
|
16
|
+
# from = selected.from[0]
|
17
|
+
# to = selected.to[0]
|
18
|
+
#
|
19
|
+
# result = searcher.query(from, to)
|
12
20
|
#
|
13
21
|
# exit if result.empty?
|
14
22
|
#
|
@@ -47,9 +55,26 @@ module YahooJpTransit
|
|
47
55
|
end
|
48
56
|
|
49
57
|
def query(from, to, options = {})
|
58
|
+
options = prepare_options(:result, from, to, options)
|
59
|
+
|
60
|
+
open("#{@@url}?#{query_string(options)}", { :proxy => @proxy }) do |f|
|
61
|
+
ResultParser.new(f, self, from, to, options).parse
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def select(from, to, options = {})
|
66
|
+
options = prepare_options(:select, from, to, options)
|
67
|
+
|
68
|
+
open("#{@@url}?#{query_string(options)}", { :proxy => @proxy }) do |f|
|
69
|
+
SelectParser.new(f, self).parse
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
def prepare_options(htmb, from, to, options)
|
50
75
|
options = options.dup
|
51
76
|
defaults = {
|
52
|
-
:val_htmb =>
|
77
|
+
:val_htmb => htmb,
|
53
78
|
:from => from,
|
54
79
|
:p => to,
|
55
80
|
:sort => 0,
|
@@ -76,12 +101,9 @@ module YahooJpTransit
|
|
76
101
|
defaults.delete(:val_dsmask_air)
|
77
102
|
end
|
78
103
|
|
79
|
-
|
80
|
-
|
81
|
-
end
|
82
|
-
end # def query
|
104
|
+
return defaults.merge(options)
|
105
|
+
end
|
83
106
|
|
84
|
-
private
|
85
107
|
def query_string(params)
|
86
108
|
params.map {|k, v|
|
87
109
|
k = CGI.escape("#{k}")
|
@@ -103,7 +125,7 @@ module YahooJpTransit
|
|
103
125
|
end
|
104
126
|
end # class Searcher
|
105
127
|
|
106
|
-
class
|
128
|
+
class ResultParser
|
107
129
|
def initialize(input_stream, searcher, from, to, search_options)
|
108
130
|
@input_stream = input_stream
|
109
131
|
@searcher = searcher
|
@@ -174,19 +196,19 @@ module YahooJpTransit
|
|
174
196
|
private
|
175
197
|
def parse_result_header(header_class)
|
176
198
|
kwargs = {}
|
177
|
-
kwargs[:start] = search('"start') {|l| l.match(/>([^<]+)</e)[1] }
|
178
|
-
kwargs[:reach] = search('"reach') {|l| l.match(/>([^<]+)</e)[1] }
|
179
|
-
kwargs[:lapse] = search('"result_lapse') do |l|
|
199
|
+
kwargs[:start] = search('"start', '"result_head_right_cont"') {|l| l.match(/>([^<]+)</e)[1] }
|
200
|
+
kwargs[:reach] = search('"reach', '"result_head_right_cont"') {|l| l.match(/>([^<]+)</e)[1] }
|
201
|
+
kwargs[:lapse] = search('"result_lapse', '<table') do |l|
|
180
202
|
l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?span[^>]*>|e, '')
|
181
203
|
end
|
182
|
-
kwargs[:distance] = search('"result_distance') {|l| l.match(/>([^<]+)</e)[1] }
|
183
|
-
kwargs[:passage] = search('"result_passage') do |l|
|
204
|
+
kwargs[:distance] = search('"result_distance', '<table') {|l| l.match(/>([^<]+)</e)[1] }
|
205
|
+
kwargs[:passage] = search('"result_passage', '<table') do |l|
|
184
206
|
l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?span[^>]*>|e, '')
|
185
207
|
end
|
186
|
-
kwargs[:connection] = search('"result_connection') do |l|
|
208
|
+
kwargs[:connection] = search('"result_connection', '<table') do |l|
|
187
209
|
l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?span[^>]*>|e, '')
|
188
210
|
end
|
189
|
-
kwargs[:pass] = search('"result_pass') do |l|
|
211
|
+
kwargs[:pass] = search('"result_pass', '<table', nil) do |l|
|
190
212
|
l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?em>|e, '')
|
191
213
|
end
|
192
214
|
|
@@ -201,15 +223,15 @@ module YahooJpTransit
|
|
201
223
|
|
202
224
|
def parse_result_path(path_class, out)
|
203
225
|
kwargs = {}
|
204
|
-
kwargs[:station] = search(['[', "\241\316"]) {|l| l.match(%r|<em>(.+)</em>|e)[1] }
|
226
|
+
kwargs[:station] = search(['[', "\241\316"], "<!--\265\376-->") {|l| l.match(%r|<em>(.+)</em>|e)[1] }
|
205
227
|
|
206
228
|
line = ''
|
207
|
-
line = @input_stream.gets until line["
|
229
|
+
line = @input_stream.gets until line["<!--\265\376-->"]
|
208
230
|
line = @input_stream.gets until line =~ /\A<tr>/e || line =~ %r|\A</div>|e
|
209
231
|
|
210
232
|
if has_next = (line =~ /\A<tr>/e)
|
211
|
-
kwargs[:time] = search('<small>') {|l| l.match(%r|<small>([^<]+)</small>|e)[1] }
|
212
|
-
kwargs[:course], kwargs[:fare] = search('<small>') do |l|
|
233
|
+
kwargs[:time] = search('<small>', "<!--\265\376-->") {|l| l.match(%r|<small>([^<]+)</small>|e)[1] }
|
234
|
+
kwargs[:course], kwargs[:fare] = search('<small>', "<!--\265\376-->") do |l|
|
213
235
|
desc = l.match(%r|>([^<]+)</font>|e)[1]
|
214
236
|
fare = (m = l.match(%r|>([^<]+)</td>|e)) ? m[1] : nil
|
215
237
|
[desc, fare]
|
@@ -246,22 +268,86 @@ module YahooJpTransit
|
|
246
268
|
return errmsg
|
247
269
|
end
|
248
270
|
|
249
|
-
def search(strs, limit =
|
271
|
+
def search(strs, limit, default = false)
|
250
272
|
if strs.kind_of?(String)
|
251
273
|
strs = [strs]
|
252
274
|
end
|
253
275
|
|
254
|
-
limit
|
255
|
-
line = @input_stream.gets
|
256
|
-
|
276
|
+
until (line = @input_stream.gets)[limit]
|
257
277
|
if strs.any? {|str| line[str] }
|
258
278
|
return yield(line)
|
259
279
|
end
|
260
280
|
end
|
261
281
|
|
262
|
-
|
282
|
+
if default == false
|
283
|
+
raise "cannot find strings: #{strs.map{|i| i.inspect }.join(', ')}"
|
284
|
+
else
|
285
|
+
return default
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end # class ResultParser
|
289
|
+
|
290
|
+
class SelectParser
|
291
|
+
def initialize(input_stream, searcher)
|
292
|
+
@input_stream = input_stream
|
293
|
+
@searcher = searcher
|
294
|
+
end
|
295
|
+
|
296
|
+
def parse
|
297
|
+
from = []
|
298
|
+
to = []
|
299
|
+
|
300
|
+
@input_stream.each do |line|
|
301
|
+
raise(SearchError, parse_error) if line['"error_container"']
|
302
|
+
break if line["<h3>2. \222T\215\365\223\372\216\236</h3>"]
|
303
|
+
|
304
|
+
if line["<em>\275\320\310\257\261\330</em>"]
|
305
|
+
parse_options(from)
|
306
|
+
end
|
307
|
+
|
308
|
+
if line["<em>\305\376\303\345\261\330</em>"]
|
309
|
+
parse_options(to)
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
selected = Selected.new(from, to)
|
314
|
+
|
315
|
+
if @searcher.kcode
|
316
|
+
selected.kconv(@searcher.kcode)
|
317
|
+
end
|
318
|
+
|
319
|
+
return selected
|
320
|
+
end
|
321
|
+
|
322
|
+
private
|
323
|
+
def parse_options(list)
|
324
|
+
until (line = @input_stream.gets)['</select>']
|
325
|
+
if (m = %r|^<option[^>]*>([^<]+)</option>|.match(line))
|
326
|
+
list << m[1]
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
def parse_error
|
332
|
+
errs = []
|
333
|
+
|
334
|
+
@input_stream.each do |line|
|
335
|
+
break if line['</div>']
|
336
|
+
|
337
|
+
if line["\241\246"]
|
338
|
+
errs << line.gsub("\241\246", '').gsub(%r|</?[^>]+>|e, '').strip
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
if (errmsg = errs.join(', ')).empty?
|
343
|
+
errmsg = 'search error'
|
344
|
+
elsif @searcher.kcode
|
345
|
+
errmsg = Kconv.kconv(errmsg, @searcher.kcode, Kconv::EUC)
|
346
|
+
end
|
347
|
+
|
348
|
+
return errmsg
|
263
349
|
end
|
264
|
-
end # class
|
350
|
+
end # class SelectParser
|
265
351
|
|
266
352
|
class Result
|
267
353
|
class Header
|
@@ -347,4 +433,34 @@ module YahooJpTransit
|
|
347
433
|
to_yaml
|
348
434
|
end
|
349
435
|
end # class Result
|
436
|
+
|
437
|
+
class Selected
|
438
|
+
attr_reader :from, :to
|
439
|
+
|
440
|
+
def initialize(from, to)
|
441
|
+
@from = from
|
442
|
+
@to = to
|
443
|
+
end
|
444
|
+
|
445
|
+
def kconv(kcode)
|
446
|
+
@from = @from.map {|i| Kconv.kconv(i, kcode, Kconv::EUC) }
|
447
|
+
@to = @to.map {|i| Kconv.kconv(i, kcode, Kconv::EUC) }
|
448
|
+
end
|
449
|
+
|
450
|
+
def to_yaml
|
451
|
+
yaml = ['---']
|
452
|
+
|
453
|
+
yaml << 'from:'
|
454
|
+
@from.each {|i| yaml << " - #{i}" }
|
455
|
+
|
456
|
+
yaml << 'to:'
|
457
|
+
@to.each {|i| yaml << " - #{i}" }
|
458
|
+
|
459
|
+
yaml.join($/)
|
460
|
+
end
|
461
|
+
|
462
|
+
def to_s
|
463
|
+
to_yaml
|
464
|
+
end
|
465
|
+
end # class Selected
|
350
466
|
end # module YahooJpTransit
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.
|
2
|
+
rubygems_version: 0.9.4
|
3
3
|
specification_version: 1
|
4
4
|
name: yahoo_jp_transit
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2007-10-
|
6
|
+
version: 0.1.1
|
7
|
+
date: 2007-10-24 00:00:00 +09:00
|
8
8
|
summary: Scraping library for Yahoo! route information.(http://transit.yahoo.co.jp/)
|
9
9
|
require_paths:
|
10
10
|
- lib
|