yahoo_jp_transit 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.txt +17 -0
- data/lib/yahoo_jp_transit.rb +350 -0
- metadata +48 -0
data/README.txt
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
= yahoo_jp_transit.rb
|
2
|
+
|
3
|
+
Copyright (c) 2007 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
|
4
|
+
|
5
|
+
== Example
|
6
|
+
|
7
|
+
require 'yahoo_jp_transit'
|
8
|
+
|
9
|
+
searcher = YahooJpTransit::Searcher.new
|
10
|
+
result = searcher.query('大宮(埼玉県)', '東京')
|
11
|
+
|
12
|
+
exit if result.empty?
|
13
|
+
|
14
|
+
begin
|
15
|
+
puts result
|
16
|
+
end while result = result.next_pagerequire
|
17
|
+
|
@@ -0,0 +1,350 @@
|
|
1
|
+
#
|
2
|
+
# = yahoo_jp_transit.rb
|
3
|
+
#
|
4
|
+
# Copyright (c) 2007 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
|
5
|
+
#
|
6
|
+
# == Example
|
7
|
+
#
|
8
|
+
# require 'yahoo_jp_transit'
|
9
|
+
#
|
10
|
+
# searcher = YahooJpTransit::Searcher.new
|
11
|
+
# result = searcher.query('大宮(埼玉県)', '東京')
|
12
|
+
#
|
13
|
+
# exit if result.empty?
|
14
|
+
#
|
15
|
+
# begin
|
16
|
+
# puts result
|
17
|
+
# end while result = result.next_pagerequire
|
18
|
+
#
|
19
|
+
|
20
|
+
require 'cgi'
|
21
|
+
require 'kconv'
|
22
|
+
require 'open-uri'
|
23
|
+
|
24
|
+
module YahooJpTransit
|
25
|
+
class SearchError < StandardError; end
|
26
|
+
|
27
|
+
class Searcher
|
28
|
+
@@url = 'http://transit.yahoo.co.jp/search'
|
29
|
+
|
30
|
+
DEPARTURE = 'DEP'
|
31
|
+
ARRIVAL = 'ARR'
|
32
|
+
LAST = 'LST'
|
33
|
+
NONE = 'NON'
|
34
|
+
|
35
|
+
attr_reader :kcode
|
36
|
+
|
37
|
+
def initialize(kcode = $KCODE, proxy = nil)
|
38
|
+
@kcode = case kcode
|
39
|
+
when /\Ae/i
|
40
|
+
Kconv::EUC
|
41
|
+
when /\As/i
|
42
|
+
Kconv::SJIS
|
43
|
+
when /\Au/i
|
44
|
+
Kconv::UTF8
|
45
|
+
end
|
46
|
+
@proxy = proxy
|
47
|
+
end
|
48
|
+
|
49
|
+
def query(from, to, options = {})
|
50
|
+
options = options.dup
|
51
|
+
defaults = {
|
52
|
+
:val_htmb => :result,
|
53
|
+
:from => from,
|
54
|
+
:p => to,
|
55
|
+
:sort => 0,
|
56
|
+
:num => 0,
|
57
|
+
:valtimekb => DEPARTURE,
|
58
|
+
:val_dsmask_charge => :CHARGE,
|
59
|
+
:val_dsmask_air => :AIR,
|
60
|
+
:val_search => "\303\265\272\367"
|
61
|
+
}.merge(divide_datetime(Time.now))
|
62
|
+
|
63
|
+
if dt = options.delete(:datetime)
|
64
|
+
options.update(divide_datetime(dt))
|
65
|
+
end
|
66
|
+
|
67
|
+
if timekb = options.delete(:timekb)
|
68
|
+
options[:valtimekb] = timekb
|
69
|
+
end
|
70
|
+
|
71
|
+
if options.delete(:charge) == false or options.delete(:val_dsmask_charge) == false
|
72
|
+
defaults.delete(:val_dsmask_charge)
|
73
|
+
end
|
74
|
+
|
75
|
+
if options.delete(:air) == false or options.delete(:val_dsmask_air) == false
|
76
|
+
defaults.delete(:val_dsmask_air)
|
77
|
+
end
|
78
|
+
|
79
|
+
open("#{@@url}?#{query_string(defaults.merge(options))}", { :proxy => @proxy }) do |f|
|
80
|
+
Parser.new(f, self, from, to, options).parse
|
81
|
+
end
|
82
|
+
end # def query
|
83
|
+
|
84
|
+
private
|
85
|
+
def query_string(params)
|
86
|
+
params.map {|k, v|
|
87
|
+
k = CGI.escape("#{k}")
|
88
|
+
v = CGI.escape(Kconv.kconv("#{v}", Kconv::EUC, @kconv || Kconv::AUTO))
|
89
|
+
"#{k}=#{v}"
|
90
|
+
}.join('&')
|
91
|
+
end
|
92
|
+
|
93
|
+
def divide_datetime(datetime)
|
94
|
+
val_m = datetime.strftime('%M')
|
95
|
+
|
96
|
+
{
|
97
|
+
:val_yymm => datetime.strftime('%Y%m'),
|
98
|
+
:val_dd => datetime.strftime('%d'),
|
99
|
+
:val_hh => datetime.strftime('%H'),
|
100
|
+
:val_m1 => val_m.slice(0, 1),
|
101
|
+
:val_m2 => val_m.slice(1, 1)
|
102
|
+
}
|
103
|
+
end
|
104
|
+
end # class Searcher
|
105
|
+
|
106
|
+
class Parser
|
107
|
+
def initialize(input_stream, searcher, from, to, search_options)
|
108
|
+
@input_stream = input_stream
|
109
|
+
@searcher = searcher
|
110
|
+
@from = from
|
111
|
+
@to = to
|
112
|
+
@search_options = search_options
|
113
|
+
end
|
114
|
+
|
115
|
+
def parse
|
116
|
+
rs = []
|
117
|
+
vars = nil
|
118
|
+
|
119
|
+
@input_stream.each do |line|
|
120
|
+
raise(SearchError, parse_error) if line['"error_container"']
|
121
|
+
break if line['"page_control2"']
|
122
|
+
|
123
|
+
# parse page control
|
124
|
+
if line['"page_count"'] and not vars
|
125
|
+
vars = { :searcher => @searcher, :from => @from, :to => @to, :search_options => @search_options }
|
126
|
+
line = @input_stream.gets
|
127
|
+
count, range = line.scan(%r|<em>([^<]+)</em>|e).map {|i| i.first }
|
128
|
+
vars[:count] = count.to_i
|
129
|
+
vars[:head_num] = range.slice(0, 1).to_i
|
130
|
+
vars[:tail_num] = range.slice(-1, 1).to_i
|
131
|
+
end
|
132
|
+
|
133
|
+
next unless line['"result_order"']
|
134
|
+
|
135
|
+
rs << Result.new do |header_class, path_class|
|
136
|
+
header = parse_result_header(header_class)
|
137
|
+
paths = []
|
138
|
+
while parse_result_path(path_class, paths); end
|
139
|
+
[header, paths]
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
unless rs.empty?
|
144
|
+
rs.instance_variable_set(:@vars, vars)
|
145
|
+
def rs.count; @vars[:count]; end
|
146
|
+
def rs.head_num; @vars[:head_num]; end
|
147
|
+
def rs.tail_num; @vars[:tail_num]; end
|
148
|
+
def rs.page; (head_num.to_f / 3).ceil; end
|
149
|
+
def rs.pages; (count.to_f / 3).ceil; end
|
150
|
+
def rs.next_page;
|
151
|
+
if page < pages
|
152
|
+
options = @vars[:search_options].merge(:num => (3 * page))
|
153
|
+
@vars[:searcher].query(@vars[:from], @vars[:to], options)
|
154
|
+
else
|
155
|
+
nil
|
156
|
+
end
|
157
|
+
end
|
158
|
+
def rs.prev_page;
|
159
|
+
if page > 1
|
160
|
+
options = @vars[:search_options].merge(:num => (3 * (page - 2)))
|
161
|
+
@vars[:searcher].query(@vars[:from], @vars[:to], options)
|
162
|
+
else
|
163
|
+
nil
|
164
|
+
end
|
165
|
+
end
|
166
|
+
else
|
167
|
+
def rs.next_page; nil; end
|
168
|
+
def rs.prev_page; nil; end
|
169
|
+
end
|
170
|
+
|
171
|
+
return rs
|
172
|
+
end
|
173
|
+
|
174
|
+
private
|
175
|
+
def parse_result_header(header_class)
|
176
|
+
kwargs = {}
|
177
|
+
kwargs[:start] = search('"start') {|l| l.match(/>([^<]+)</e)[1] }
|
178
|
+
kwargs[:reach] = search('"reach') {|l| l.match(/>([^<]+)</e)[1] }
|
179
|
+
kwargs[:lapse] = search('"result_lapse') do |l|
|
180
|
+
l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?span[^>]*>|e, '')
|
181
|
+
end
|
182
|
+
kwargs[:distance] = search('"result_distance') {|l| l.match(/>([^<]+)</e)[1] }
|
183
|
+
kwargs[:passage] = search('"result_passage') do |l|
|
184
|
+
l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?span[^>]*>|e, '')
|
185
|
+
end
|
186
|
+
kwargs[:connection] = search('"result_connection') do |l|
|
187
|
+
l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?span[^>]*>|e, '')
|
188
|
+
end
|
189
|
+
kwargs[:pass] = search('"result_pass') do |l|
|
190
|
+
l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?em>|e, '')
|
191
|
+
end
|
192
|
+
|
193
|
+
header = header_class.new(kwargs)
|
194
|
+
|
195
|
+
if @searcher.kcode
|
196
|
+
header.kconv(@searcher.kcode)
|
197
|
+
end
|
198
|
+
|
199
|
+
return header
|
200
|
+
end
|
201
|
+
|
202
|
+
def parse_result_path(path_class, out)
|
203
|
+
kwargs = {}
|
204
|
+
kwargs[:station] = search(['[', "\241\316"]) {|l| l.match(%r|<em>(.+)</em>|e)[1] }
|
205
|
+
|
206
|
+
line = ''
|
207
|
+
line = @input_stream.gets until line["\265\376"]
|
208
|
+
line = @input_stream.gets until line =~ /\A<tr>/e || line =~ %r|\A</div>|e
|
209
|
+
|
210
|
+
if has_next = (line =~ /\A<tr>/e)
|
211
|
+
kwargs[:time] = search('<small>') {|l| l.match(%r|<small>([^<]+)</small>|e)[1] }
|
212
|
+
kwargs[:course], kwargs[:fare] = search('<small>') do |l|
|
213
|
+
desc = l.match(%r|>([^<]+)</font>|e)[1]
|
214
|
+
fare = (m = l.match(%r|>([^<]+)</td>|e)) ? m[1] : nil
|
215
|
+
[desc, fare]
|
216
|
+
end
|
217
|
+
end
|
218
|
+
|
219
|
+
path = path_class.new(kwargs)
|
220
|
+
|
221
|
+
if @searcher.kcode
|
222
|
+
path.kconv(@searcher.kcode)
|
223
|
+
end
|
224
|
+
|
225
|
+
out << path
|
226
|
+
return has_next
|
227
|
+
end
|
228
|
+
|
229
|
+
def parse_error
|
230
|
+
errs = []
|
231
|
+
|
232
|
+
@input_stream.each do |line|
|
233
|
+
break if line['</div>']
|
234
|
+
|
235
|
+
if line["\241\246"]
|
236
|
+
errs << line.gsub("\241\246", '').gsub(%r|</?[^>]+>|e, '').strip
|
237
|
+
end
|
238
|
+
end
|
239
|
+
|
240
|
+
if (errmsg = errs.join(', ')).empty?
|
241
|
+
errmsg = 'search error'
|
242
|
+
elsif @searcher.kcode
|
243
|
+
errmsg = Kconv.kconv(errmsg, @searcher.kcode, Kconv::EUC)
|
244
|
+
end
|
245
|
+
|
246
|
+
return errmsg
|
247
|
+
end
|
248
|
+
|
249
|
+
def search(strs, limit = 64)
|
250
|
+
if strs.kind_of?(String)
|
251
|
+
strs = [strs]
|
252
|
+
end
|
253
|
+
|
254
|
+
limit.times do
|
255
|
+
line = @input_stream.gets
|
256
|
+
|
257
|
+
if strs.any? {|str| line[str] }
|
258
|
+
return yield(line)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
raise "cannot find strings: #{strs.join(', ')}"
|
263
|
+
end
|
264
|
+
end # class Parser
|
265
|
+
|
266
|
+
class Result
|
267
|
+
class Header
|
268
|
+
@@attrs = [ :start, :reach, :lapse, :distance, :passage, :connection, :pass ]
|
269
|
+
attr_reader *@@attrs
|
270
|
+
|
271
|
+
def initialize(kwargs)
|
272
|
+
@@attrs.each {|kw| self.instance_variable_set("@#{kw}", kwargs[kw]) }
|
273
|
+
end
|
274
|
+
|
275
|
+
def to_s
|
276
|
+
@@attrs.map {|kw|
|
277
|
+
val = self.instance_variable_get("@#{kw}")
|
278
|
+
"#{kw}=#{val}"
|
279
|
+
}.join(', ')
|
280
|
+
end
|
281
|
+
|
282
|
+
def kconv(out_code)
|
283
|
+
@@attrs.map do |kw|
|
284
|
+
val = self.instance_variable_get("@#{kw}") or next
|
285
|
+
val = Kconv.kconv(val, out_code, Kconv::EUC)
|
286
|
+
self.instance_variable_set("@#{kw}", val)
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
def to_hash
|
291
|
+
vals = @@attrs.map {|kw| self.instance_variable_get("@#{kw}") }
|
292
|
+
Hash[*(@@attrs.zip(vals).flatten)]
|
293
|
+
end
|
294
|
+
end
|
295
|
+
|
296
|
+
class Path
|
297
|
+
@@attrs = [ :station, :time, :course, :fare ]
|
298
|
+
attr_reader *@@attrs
|
299
|
+
|
300
|
+
def initialize(kwargs)
|
301
|
+
@@attrs.each {|kw| self.instance_variable_set("@#{kw}", kwargs[kw]) }
|
302
|
+
end
|
303
|
+
|
304
|
+
def has_next?
|
305
|
+
not @time.nil?
|
306
|
+
end
|
307
|
+
|
308
|
+
def kconv(out_code)
|
309
|
+
@@attrs.map do |kw|
|
310
|
+
val = self.instance_variable_get("@#{kw}") or next
|
311
|
+
val = Kconv.kconv(val, out_code, Kconv::EUC)
|
312
|
+
self.instance_variable_set("@#{kw}", val)
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
def to_hash
|
317
|
+
hash = {}
|
318
|
+
@@attrs.map do |kw|
|
319
|
+
val = self.instance_variable_get("@#{kw}")
|
320
|
+
hash[kw] = val if val
|
321
|
+
end
|
322
|
+
return hash
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
attr_reader :header, :paths
|
327
|
+
|
328
|
+
def initialize
|
329
|
+
@header, @paths = yield(Header, Path)
|
330
|
+
end
|
331
|
+
|
332
|
+
def to_yaml
|
333
|
+
yaml = ['---']
|
334
|
+
|
335
|
+
yaml << 'header:'
|
336
|
+
@header.to_hash.each {|k, v| yaml << " #{k}: #{v}" }
|
337
|
+
|
338
|
+
yaml << 'paths:'
|
339
|
+
@paths.each do |path|
|
340
|
+
yaml << ' - ' + path.to_hash.map {|k, v| "#{k}: #{v}" }.join("#{$/} ")
|
341
|
+
end
|
342
|
+
|
343
|
+
yaml.join($/)
|
344
|
+
end
|
345
|
+
|
346
|
+
def to_s
|
347
|
+
to_yaml
|
348
|
+
end
|
349
|
+
end # class Result
|
350
|
+
end # module YahooJpTransit
|
metadata
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
rubygems_version: 0.9.2
|
3
|
+
specification_version: 1
|
4
|
+
name: yahoo_jp_transit
|
5
|
+
version: !ruby/object:Gem::Version
|
6
|
+
version: 0.1.0
|
7
|
+
date: 2007-10-22 00:00:00 +09:00
|
8
|
+
summary: Scraping library for Yahoo! route information.(http://transit.yahoo.co.jp/)
|
9
|
+
require_paths:
|
10
|
+
- lib
|
11
|
+
email: sgwr_dts@yahoo.co.jp
|
12
|
+
homepage: http://yahoo_jp_transit.rubyforge.org
|
13
|
+
rubyforge_project: yahoo_jp_transit
|
14
|
+
description: Scraping library for Yahoo! route information.(http://transit.yahoo.co.jp/)
|
15
|
+
autorequire:
|
16
|
+
default_executable:
|
17
|
+
bindir: bin
|
18
|
+
has_rdoc: true
|
19
|
+
required_ruby_version: !ruby/object:Gem::Version::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">"
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 0.0.0
|
24
|
+
version:
|
25
|
+
platform: ruby
|
26
|
+
signing_key:
|
27
|
+
cert_chain:
|
28
|
+
post_install_message:
|
29
|
+
authors:
|
30
|
+
- winebarrel
|
31
|
+
files:
|
32
|
+
- README.txt
|
33
|
+
- lib/yahoo_jp_transit.rb
|
34
|
+
test_files: []
|
35
|
+
|
36
|
+
rdoc_options:
|
37
|
+
- --main
|
38
|
+
- README.txt
|
39
|
+
extra_rdoc_files:
|
40
|
+
- README.txt
|
41
|
+
executables: []
|
42
|
+
|
43
|
+
extensions: []
|
44
|
+
|
45
|
+
requirements: []
|
46
|
+
|
47
|
+
dependencies: []
|
48
|
+
|