yahoo_jp_transit 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/README.txt +17 -0
  2. data/lib/yahoo_jp_transit.rb +350 -0
  3. metadata +48 -0
data/README.txt ADDED
@@ -0,0 +1,17 @@
1
+ = yahoo_jp_transit.rb
2
+
3
+ Copyright (c) 2007 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
4
+
5
+ == Example
6
+
7
+ require 'yahoo_jp_transit'
8
+
9
+ searcher = YahooJpTransit::Searcher.new
10
+ result = searcher.query('大宮(埼玉県)', '東京')
11
+
12
+ exit if result.empty?
13
+
14
+ begin
15
+ puts result
16
+ end while result = result.next_pagerequire
17
+
@@ -0,0 +1,350 @@
1
+ #
2
+ # = yahoo_jp_transit.rb
3
+ #
4
+ # Copyright (c) 2007 SUGAWARA Genki <sgwr_dts@yahoo.co.jp>
5
+ #
6
+ # == Example
7
+ #
8
+ # require 'yahoo_jp_transit'
9
+ #
10
+ # searcher = YahooJpTransit::Searcher.new
11
+ # result = searcher.query('大宮(埼玉県)', '東京')
12
+ #
13
+ # exit if result.empty?
14
+ #
15
+ # begin
16
+ # puts result
17
+ # end while result = result.next_pagerequire
18
+ #
19
+
20
+ require 'cgi'
21
+ require 'kconv'
22
+ require 'open-uri'
23
+
24
+ module YahooJpTransit
25
+ class SearchError < StandardError; end
26
+
27
+ class Searcher
28
+ @@url = 'http://transit.yahoo.co.jp/search'
29
+
30
+ DEPARTURE = 'DEP'
31
+ ARRIVAL = 'ARR'
32
+ LAST = 'LST'
33
+ NONE = 'NON'
34
+
35
+ attr_reader :kcode
36
+
37
+ def initialize(kcode = $KCODE, proxy = nil)
38
+ @kcode = case kcode
39
+ when /\Ae/i
40
+ Kconv::EUC
41
+ when /\As/i
42
+ Kconv::SJIS
43
+ when /\Au/i
44
+ Kconv::UTF8
45
+ end
46
+ @proxy = proxy
47
+ end
48
+
49
+ def query(from, to, options = {})
50
+ options = options.dup
51
+ defaults = {
52
+ :val_htmb => :result,
53
+ :from => from,
54
+ :p => to,
55
+ :sort => 0,
56
+ :num => 0,
57
+ :valtimekb => DEPARTURE,
58
+ :val_dsmask_charge => :CHARGE,
59
+ :val_dsmask_air => :AIR,
60
+ :val_search => "\303\265\272\367"
61
+ }.merge(divide_datetime(Time.now))
62
+
63
+ if dt = options.delete(:datetime)
64
+ options.update(divide_datetime(dt))
65
+ end
66
+
67
+ if timekb = options.delete(:timekb)
68
+ options[:valtimekb] = timekb
69
+ end
70
+
71
+ if options.delete(:charge) == false or options.delete(:val_dsmask_charge) == false
72
+ defaults.delete(:val_dsmask_charge)
73
+ end
74
+
75
+ if options.delete(:air) == false or options.delete(:val_dsmask_air) == false
76
+ defaults.delete(:val_dsmask_air)
77
+ end
78
+
79
+ open("#{@@url}?#{query_string(defaults.merge(options))}", { :proxy => @proxy }) do |f|
80
+ Parser.new(f, self, from, to, options).parse
81
+ end
82
+ end # def query
83
+
84
+ private
85
+ def query_string(params)
86
+ params.map {|k, v|
87
+ k = CGI.escape("#{k}")
88
+ v = CGI.escape(Kconv.kconv("#{v}", Kconv::EUC, @kconv || Kconv::AUTO))
89
+ "#{k}=#{v}"
90
+ }.join('&')
91
+ end
92
+
93
+ def divide_datetime(datetime)
94
+ val_m = datetime.strftime('%M')
95
+
96
+ {
97
+ :val_yymm => datetime.strftime('%Y%m'),
98
+ :val_dd => datetime.strftime('%d'),
99
+ :val_hh => datetime.strftime('%H'),
100
+ :val_m1 => val_m.slice(0, 1),
101
+ :val_m2 => val_m.slice(1, 1)
102
+ }
103
+ end
104
+ end # class Searcher
105
+
106
+ class Parser
107
+ def initialize(input_stream, searcher, from, to, search_options)
108
+ @input_stream = input_stream
109
+ @searcher = searcher
110
+ @from = from
111
+ @to = to
112
+ @search_options = search_options
113
+ end
114
+
115
+ def parse
116
+ rs = []
117
+ vars = nil
118
+
119
+ @input_stream.each do |line|
120
+ raise(SearchError, parse_error) if line['"error_container"']
121
+ break if line['"page_control2"']
122
+
123
+ # parse page control
124
+ if line['"page_count"'] and not vars
125
+ vars = { :searcher => @searcher, :from => @from, :to => @to, :search_options => @search_options }
126
+ line = @input_stream.gets
127
+ count, range = line.scan(%r|<em>([^<]+)</em>|e).map {|i| i.first }
128
+ vars[:count] = count.to_i
129
+ vars[:head_num] = range.slice(0, 1).to_i
130
+ vars[:tail_num] = range.slice(-1, 1).to_i
131
+ end
132
+
133
+ next unless line['"result_order"']
134
+
135
+ rs << Result.new do |header_class, path_class|
136
+ header = parse_result_header(header_class)
137
+ paths = []
138
+ while parse_result_path(path_class, paths); end
139
+ [header, paths]
140
+ end
141
+ end
142
+
143
+ unless rs.empty?
144
+ rs.instance_variable_set(:@vars, vars)
145
+ def rs.count; @vars[:count]; end
146
+ def rs.head_num; @vars[:head_num]; end
147
+ def rs.tail_num; @vars[:tail_num]; end
148
+ def rs.page; (head_num.to_f / 3).ceil; end
149
+ def rs.pages; (count.to_f / 3).ceil; end
150
+ def rs.next_page;
151
+ if page < pages
152
+ options = @vars[:search_options].merge(:num => (3 * page))
153
+ @vars[:searcher].query(@vars[:from], @vars[:to], options)
154
+ else
155
+ nil
156
+ end
157
+ end
158
+ def rs.prev_page;
159
+ if page > 1
160
+ options = @vars[:search_options].merge(:num => (3 * (page - 2)))
161
+ @vars[:searcher].query(@vars[:from], @vars[:to], options)
162
+ else
163
+ nil
164
+ end
165
+ end
166
+ else
167
+ def rs.next_page; nil; end
168
+ def rs.prev_page; nil; end
169
+ end
170
+
171
+ return rs
172
+ end
173
+
174
+ private
175
+ def parse_result_header(header_class)
176
+ kwargs = {}
177
+ kwargs[:start] = search('"start') {|l| l.match(/>([^<]+)</e)[1] }
178
+ kwargs[:reach] = search('"reach') {|l| l.match(/>([^<]+)</e)[1] }
179
+ kwargs[:lapse] = search('"result_lapse') do |l|
180
+ l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?span[^>]*>|e, '')
181
+ end
182
+ kwargs[:distance] = search('"result_distance') {|l| l.match(/>([^<]+)</e)[1] }
183
+ kwargs[:passage] = search('"result_passage') do |l|
184
+ l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?span[^>]*>|e, '')
185
+ end
186
+ kwargs[:connection] = search('"result_connection') do |l|
187
+ l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?span[^>]*>|e, '')
188
+ end
189
+ kwargs[:pass] = search('"result_pass') do |l|
190
+ l.match(%r|>(.+)</div>|e)[1].gsub(%r|</?em>|e, '')
191
+ end
192
+
193
+ header = header_class.new(kwargs)
194
+
195
+ if @searcher.kcode
196
+ header.kconv(@searcher.kcode)
197
+ end
198
+
199
+ return header
200
+ end
201
+
202
+ def parse_result_path(path_class, out)
203
+ kwargs = {}
204
+ kwargs[:station] = search(['[', "\241\316"]) {|l| l.match(%r|<em>(.+)</em>|e)[1] }
205
+
206
+ line = ''
207
+ line = @input_stream.gets until line["\265\376"]
208
+ line = @input_stream.gets until line =~ /\A<tr>/e || line =~ %r|\A</div>|e
209
+
210
+ if has_next = (line =~ /\A<tr>/e)
211
+ kwargs[:time] = search('<small>') {|l| l.match(%r|<small>([^<]+)</small>|e)[1] }
212
+ kwargs[:course], kwargs[:fare] = search('<small>') do |l|
213
+ desc = l.match(%r|>([^<]+)</font>|e)[1]
214
+ fare = (m = l.match(%r|>([^<]+)</td>|e)) ? m[1] : nil
215
+ [desc, fare]
216
+ end
217
+ end
218
+
219
+ path = path_class.new(kwargs)
220
+
221
+ if @searcher.kcode
222
+ path.kconv(@searcher.kcode)
223
+ end
224
+
225
+ out << path
226
+ return has_next
227
+ end
228
+
229
+ def parse_error
230
+ errs = []
231
+
232
+ @input_stream.each do |line|
233
+ break if line['</div>']
234
+
235
+ if line["\241\246"]
236
+ errs << line.gsub("\241\246", '').gsub(%r|</?[^>]+>|e, '').strip
237
+ end
238
+ end
239
+
240
+ if (errmsg = errs.join(', ')).empty?
241
+ errmsg = 'search error'
242
+ elsif @searcher.kcode
243
+ errmsg = Kconv.kconv(errmsg, @searcher.kcode, Kconv::EUC)
244
+ end
245
+
246
+ return errmsg
247
+ end
248
+
249
+ def search(strs, limit = 64)
250
+ if strs.kind_of?(String)
251
+ strs = [strs]
252
+ end
253
+
254
+ limit.times do
255
+ line = @input_stream.gets
256
+
257
+ if strs.any? {|str| line[str] }
258
+ return yield(line)
259
+ end
260
+ end
261
+
262
+ raise "cannot find strings: #{strs.join(', ')}"
263
+ end
264
+ end # class Parser
265
+
266
+ class Result
267
+ class Header
268
+ @@attrs = [ :start, :reach, :lapse, :distance, :passage, :connection, :pass ]
269
+ attr_reader *@@attrs
270
+
271
+ def initialize(kwargs)
272
+ @@attrs.each {|kw| self.instance_variable_set("@#{kw}", kwargs[kw]) }
273
+ end
274
+
275
+ def to_s
276
+ @@attrs.map {|kw|
277
+ val = self.instance_variable_get("@#{kw}")
278
+ "#{kw}=#{val}"
279
+ }.join(', ')
280
+ end
281
+
282
+ def kconv(out_code)
283
+ @@attrs.map do |kw|
284
+ val = self.instance_variable_get("@#{kw}") or next
285
+ val = Kconv.kconv(val, out_code, Kconv::EUC)
286
+ self.instance_variable_set("@#{kw}", val)
287
+ end
288
+ end
289
+
290
+ def to_hash
291
+ vals = @@attrs.map {|kw| self.instance_variable_get("@#{kw}") }
292
+ Hash[*(@@attrs.zip(vals).flatten)]
293
+ end
294
+ end
295
+
296
+ class Path
297
+ @@attrs = [ :station, :time, :course, :fare ]
298
+ attr_reader *@@attrs
299
+
300
+ def initialize(kwargs)
301
+ @@attrs.each {|kw| self.instance_variable_set("@#{kw}", kwargs[kw]) }
302
+ end
303
+
304
+ def has_next?
305
+ not @time.nil?
306
+ end
307
+
308
+ def kconv(out_code)
309
+ @@attrs.map do |kw|
310
+ val = self.instance_variable_get("@#{kw}") or next
311
+ val = Kconv.kconv(val, out_code, Kconv::EUC)
312
+ self.instance_variable_set("@#{kw}", val)
313
+ end
314
+ end
315
+
316
+ def to_hash
317
+ hash = {}
318
+ @@attrs.map do |kw|
319
+ val = self.instance_variable_get("@#{kw}")
320
+ hash[kw] = val if val
321
+ end
322
+ return hash
323
+ end
324
+ end
325
+
326
+ attr_reader :header, :paths
327
+
328
+ def initialize
329
+ @header, @paths = yield(Header, Path)
330
+ end
331
+
332
+ def to_yaml
333
+ yaml = ['---']
334
+
335
+ yaml << 'header:'
336
+ @header.to_hash.each {|k, v| yaml << " #{k}: #{v}" }
337
+
338
+ yaml << 'paths:'
339
+ @paths.each do |path|
340
+ yaml << ' - ' + path.to_hash.map {|k, v| "#{k}: #{v}" }.join("#{$/} ")
341
+ end
342
+
343
+ yaml.join($/)
344
+ end
345
+
346
+ def to_s
347
+ to_yaml
348
+ end
349
+ end # class Result
350
+ end # module YahooJpTransit
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ rubygems_version: 0.9.2
3
+ specification_version: 1
4
+ name: yahoo_jp_transit
5
+ version: !ruby/object:Gem::Version
6
+ version: 0.1.0
7
+ date: 2007-10-22 00:00:00 +09:00
8
+ summary: Scraping library for Yahoo! route information.(http://transit.yahoo.co.jp/)
9
+ require_paths:
10
+ - lib
11
+ email: sgwr_dts@yahoo.co.jp
12
+ homepage: http://yahoo_jp_transit.rubyforge.org
13
+ rubyforge_project: yahoo_jp_transit
14
+ description: Scraping library for Yahoo! route information.(http://transit.yahoo.co.jp/)
15
+ autorequire:
16
+ default_executable:
17
+ bindir: bin
18
+ has_rdoc: true
19
+ required_ruby_version: !ruby/object:Gem::Version::Requirement
20
+ requirements:
21
+ - - ">"
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ platform: ruby
26
+ signing_key:
27
+ cert_chain:
28
+ post_install_message:
29
+ authors:
30
+ - winebarrel
31
+ files:
32
+ - README.txt
33
+ - lib/yahoo_jp_transit.rb
34
+ test_files: []
35
+
36
+ rdoc_options:
37
+ - --main
38
+ - README.txt
39
+ extra_rdoc_files:
40
+ - README.txt
41
+ executables: []
42
+
43
+ extensions: []
44
+
45
+ requirements: []
46
+
47
+ dependencies: []
48
+