yahoo_jp_transit 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.txt +9 -2
- data/lib/yahoo_jp_transit.rb +141 -25
- metadata +3 -3
data/README.txt
CHANGED
@@ -4,14 +4,21 @@ Copyright (c) 2007 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
|
|
4
4
|
|
5
5
|
== Example
|
6
6
|
|
7
|
+
$KCODE = 's'
|
7
8
|
require 'yahoo_jp_transit'
|
8
9
|
|
9
10
|
searcher = YahooJpTransit::Searcher.new
|
10
|
-
|
11
|
+
|
12
|
+
selected = searcher.select('大宮', '東京')
|
13
|
+
puts selected
|
14
|
+
|
15
|
+
from = selected.from[0]
|
16
|
+
to = selected.to[0]
|
17
|
+
|
18
|
+
result = searcher.query(from, to)
|
11
19
|
|
12
20
|
exit if result.empty?
|
13
21
|
|
14
22
|
begin
|
15
23
|
puts result
|
16
24
|
end while result = result.next_pagerequire
|
17
|
-
|
data/lib/yahoo_jp_transit.rb
CHANGED
@@ -5,10 +5,18 @@
|
|
5
5
|
#
|
6
6
|
# == Example
|
7
7
|
#
|
8
|
+
# $KCODE = 's'
|
8
9
|
# require 'yahoo_jp_transit'
|
9
10
|
#
|
10
11
|
# searcher = YahooJpTransit::Searcher.new
|
11
|
-
#
|
12
|
+
#
|
13
|
+
# selected = searcher.select('大宮', '東京')
|
14
|
+
# puts selected
|
15
|
+
#
|
16
|
+
# from = selected.from[0]
|
17
|
+
# to = selected.to[0]
|
18
|
+
#
|
19
|
+
# result = searcher.query(from, to)
|
12
20
|
#
|
13
21
|
# exit if result.empty?
|
14
22
|
#
|
@@ -47,9 +55,26 @@ module YahooJpTransit
|
|
47
55
|
end
|
48
56
|
|
49
57
|
def query(from, to, options = {})
|
58
|
+
options = prepare_options(:result, from, to, options)
|
59
|
+
|
60
|
+
open("#{@@url}?#{query_string(options)}", { :proxy => @proxy }) do |f|
|
61
|
+
ResultParser.new(f, self, from, to, options).parse
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def select(from, to, options = {})
|
66
|
+
options = prepare_options(:select, from, to, options)
|
67
|
+
|
68
|
+
open("#{@@url}?#{query_string(options)}", { :proxy => @proxy }) do |f|
|
69
|
+
SelectParser.new(f, self).parse
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
private
|
74
|
+
def prepare_options(htmb, from, to, options)
|
50
75
|
options = options.dup
|
51
76
|
defaults = {
|
52
|
-
:val_htmb =>
|
77
|
+
:val_htmb => htmb,
|
53
78
|
:from => from,
|
54
79
|
:p => to,
|
55
80
|
:sort => 0,
|
@@ -76,12 +101,9 @@ module YahooJpTransit
|
|
76
101
|
defaults.delete(:val_dsmask_air)
|
77
102
|
end
|
78
103
|
|
79
|
-
|
80
|
-
|
81
|
-
end
|
82
|
-
end # def query
|
104
|
+
return defaults.merge(options)
|
105
|
+
end
|
83
106
|
|
84
|
-
private
|
85
107
|
def query_string(params)
|
86
108
|
params.map {|k, v|
|
87
109
|
k = CGI.escape("#{k}")
|
@@ -103,7 +125,7 @@ module YahooJpTransit
|
|
103
125
|
end
|
104
126
|
end # class Searcher
|
105
127
|
|
106
|
-
class
|
128
|
+
class ResultParser
|
107
129
|
def initialize(input_stream, searcher, from, to, search_options)
|
108
130
|
@input_stream = input_stream
|
109
131
|
@searcher = searcher
|
@@ -174,19 +196,19 @@ module YahooJpTransit
|
|
174
196
|
private
|
175
197
|
def parse_result_header(header_class)
|
176
198
|
kwargs = {}
|
177
|
-
kwargs[:start] = search('"start') {|l| l.match(/>([^<]+)</e)[1] }
|
178
|
-
kwargs[:reach] = search('"reach') {|l| l.match(/>([^<]+)</e)[1] }
|
179
|
-
kwargs[:lapse] = search('"result_lapse') do |l|
|
199
|
+
kwargs[:start] = search('"start', '"result_head_right_cont"') {|l| l.match(/>([^<]+)</e)[1] }
|
200
|
+
kwargs[:reach] = search('"reach', '"result_head_right_cont"') {|l| l.match(/>([^<]+)</e)[1] }
|
201
|
+
kwargs[:lapse] = search('"result_lapse', '<table') do |l|
|
180
202
|
l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?span[^>]*>|e, '')
|
181
203
|
end
|
182
|
-
kwargs[:distance] = search('"result_distance') {|l| l.match(/>([^<]+)</e)[1] }
|
183
|
-
kwargs[:passage] = search('"result_passage') do |l|
|
204
|
+
kwargs[:distance] = search('"result_distance', '<table') {|l| l.match(/>([^<]+)</e)[1] }
|
205
|
+
kwargs[:passage] = search('"result_passage', '<table') do |l|
|
184
206
|
l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?span[^>]*>|e, '')
|
185
207
|
end
|
186
|
-
kwargs[:connection] = search('"result_connection') do |l|
|
208
|
+
kwargs[:connection] = search('"result_connection', '<table') do |l|
|
187
209
|
l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?span[^>]*>|e, '')
|
188
210
|
end
|
189
|
-
kwargs[:pass] = search('"result_pass') do |l|
|
211
|
+
kwargs[:pass] = search('"result_pass', '<table', nil) do |l|
|
190
212
|
l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?em>|e, '')
|
191
213
|
end
|
192
214
|
|
@@ -201,15 +223,15 @@ module YahooJpTransit
|
|
201
223
|
|
202
224
|
def parse_result_path(path_class, out)
|
203
225
|
kwargs = {}
|
204
|
-
kwargs[:station] = search(['[', "\241\316"]) {|l| l.match(%r|<em>(.+)</em>|e)[1] }
|
226
|
+
kwargs[:station] = search(['[', "\241\316"], "<!--\265\376-->") {|l| l.match(%r|<em>(.+)</em>|e)[1] }
|
205
227
|
|
206
228
|
line = ''
|
207
|
-
line = @input_stream.gets until line["
|
229
|
+
line = @input_stream.gets until line["<!--\265\376-->"]
|
208
230
|
line = @input_stream.gets until line =~ /\A<tr>/e || line =~ %r|\A</div>|e
|
209
231
|
|
210
232
|
if has_next = (line =~ /\A<tr>/e)
|
211
|
-
kwargs[:time] = search('<small>') {|l| l.match(%r|<small>([^<]+)</small>|e)[1] }
|
212
|
-
kwargs[:course], kwargs[:fare] = search('<small>') do |l|
|
233
|
+
kwargs[:time] = search('<small>', "<!--\265\376-->") {|l| l.match(%r|<small>([^<]+)</small>|e)[1] }
|
234
|
+
kwargs[:course], kwargs[:fare] = search('<small>', "<!--\265\376-->") do |l|
|
213
235
|
desc = l.match(%r|>([^<]+)</font>|e)[1]
|
214
236
|
fare = (m = l.match(%r|>([^<]+)</td>|e)) ? m[1] : nil
|
215
237
|
[desc, fare]
|
@@ -246,22 +268,86 @@ module YahooJpTransit
|
|
246
268
|
return errmsg
|
247
269
|
end
|
248
270
|
|
249
|
-
def search(strs, limit =
|
271
|
+
def search(strs, limit, default = false)
|
250
272
|
if strs.kind_of?(String)
|
251
273
|
strs = [strs]
|
252
274
|
end
|
253
275
|
|
254
|
-
limit
|
255
|
-
line = @input_stream.gets
|
256
|
-
|
276
|
+
until (line = @input_stream.gets)[limit]
|
257
277
|
if strs.any? {|str| line[str] }
|
258
278
|
return yield(line)
|
259
279
|
end
|
260
280
|
end
|
261
281
|
|
262
|
-
|
282
|
+
if default == false
|
283
|
+
raise "cannot find strings: #{strs.map{|i| i.inspect }.join(', ')}"
|
284
|
+
else
|
285
|
+
return default
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end # class ResultParser
|
289
|
+
|
290
|
+
class SelectParser
|
291
|
+
def initialize(input_stream, searcher)
|
292
|
+
@input_stream = input_stream
|
293
|
+
@searcher = searcher
|
294
|
+
end
|
295
|
+
|
296
|
+
def parse
|
297
|
+
from = []
|
298
|
+
to = []
|
299
|
+
|
300
|
+
@input_stream.each do |line|
|
301
|
+
raise(SearchError, parse_error) if line['"error_container"']
|
302
|
+
break if line["<h3>2. \222T\215\365\223\372\216\236</h3>"]
|
303
|
+
|
304
|
+
if line["<em>\275\320\310\257\261\330</em>"]
|
305
|
+
parse_options(from)
|
306
|
+
end
|
307
|
+
|
308
|
+
if line["<em>\305\376\303\345\261\330</em>"]
|
309
|
+
parse_options(to)
|
310
|
+
end
|
311
|
+
end
|
312
|
+
|
313
|
+
selected = Selected.new(from, to)
|
314
|
+
|
315
|
+
if @searcher.kcode
|
316
|
+
selected.kconv(@searcher.kcode)
|
317
|
+
end
|
318
|
+
|
319
|
+
return selected
|
320
|
+
end
|
321
|
+
|
322
|
+
private
|
323
|
+
def parse_options(list)
|
324
|
+
until (line = @input_stream.gets)['</select>']
|
325
|
+
if (m = %r|^<option[^>]*>([^<]+)</option>|.match(line))
|
326
|
+
list << m[1]
|
327
|
+
end
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
def parse_error
|
332
|
+
errs = []
|
333
|
+
|
334
|
+
@input_stream.each do |line|
|
335
|
+
break if line['</div>']
|
336
|
+
|
337
|
+
if line["\241\246"]
|
338
|
+
errs << line.gsub("\241\246", '').gsub(%r|</?[^>]+>|e, '').strip
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
if (errmsg = errs.join(', ')).empty?
|
343
|
+
errmsg = 'search error'
|
344
|
+
elsif @searcher.kcode
|
345
|
+
errmsg = Kconv.kconv(errmsg, @searcher.kcode, Kconv::EUC)
|
346
|
+
end
|
347
|
+
|
348
|
+
return errmsg
|
263
349
|
end
|
264
|
-
end # class
|
350
|
+
end # class SelectParser
|
265
351
|
|
266
352
|
class Result
|
267
353
|
class Header
|
@@ -347,4 +433,34 @@ module YahooJpTransit
|
|
347
433
|
to_yaml
|
348
434
|
end
|
349
435
|
end # class Result
|
436
|
+
|
437
|
+
class Selected
|
438
|
+
attr_reader :from, :to
|
439
|
+
|
440
|
+
def initialize(from, to)
|
441
|
+
@from = from
|
442
|
+
@to = to
|
443
|
+
end
|
444
|
+
|
445
|
+
def kconv(kcode)
|
446
|
+
@from = @from.map {|i| Kconv.kconv(i, kcode, Kconv::EUC) }
|
447
|
+
@to = @to.map {|i| Kconv.kconv(i, kcode, Kconv::EUC) }
|
448
|
+
end
|
449
|
+
|
450
|
+
def to_yaml
|
451
|
+
yaml = ['---']
|
452
|
+
|
453
|
+
yaml << 'from:'
|
454
|
+
@from.each {|i| yaml << " - #{i}" }
|
455
|
+
|
456
|
+
yaml << 'to:'
|
457
|
+
@to.each {|i| yaml << " - #{i}" }
|
458
|
+
|
459
|
+
yaml.join($/)
|
460
|
+
end
|
461
|
+
|
462
|
+
def to_s
|
463
|
+
to_yaml
|
464
|
+
end
|
465
|
+
end # class Selected
|
350
466
|
end # module YahooJpTransit
|
metadata
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
|
-
rubygems_version: 0.9.
|
2
|
+
rubygems_version: 0.9.4
|
3
3
|
specification_version: 1
|
4
4
|
name: yahoo_jp_transit
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.1.
|
7
|
-
date: 2007-10-
|
6
|
+
version: 0.1.1
|
7
|
+
date: 2007-10-24 00:00:00 +09:00
|
8
8
|
summary: Scraping library for Yahoo! route information.(http://transit.yahoo.co.jp/)
|
9
9
|
require_paths:
|
10
10
|
- lib
|