baidu 1.2.10 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 973532d0e6e0bc141eb17dcd034daccbefe2210a
4
- data.tar.gz: abdd019318edf7956ca07a7dd1bca44c02faaa1b
3
+ metadata.gz: f3ab3b16d4f810561f78ee164b2882eaf8f2bae8
4
+ data.tar.gz: 706fc2deeb2af8c5606eb685f1c85aef45f85127
5
5
  SHA512:
6
- metadata.gz: b74a336d56fd97b365db05d095991d49fa7cbf8098c180635afe2b8e16683db51ffa876a1fd855491b7f331574f25a91c3fe80dbdd6f2685517d1b45b9807034
7
- data.tar.gz: a430a916135a289b1577f3468d43188b3b8c6dc84db7460cd00d3af6f993f913f358d7e8e5d1a87d5a7557db30a1ec85cedec9eb949d3698d17fcd068438c791
6
+ metadata.gz: be21dae1919c0f313870b2b0a508304becc13117072bc0c93d7992f9cb992aaba344b30c9af36eee6fea2ec75d96c79a8a94af72430c36468998363987408811
7
+ data.tar.gz: a3f515b1a9ed7be68eb66437c1038e78ee068304fe1c13f0492c66f23a5bd3bef159f10eb1d2e3c867d946fe921e59375cd4589b6d56ce38d29cf3757e0635c7
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://ruby.taobao.org'
2
+
3
+ # Specify your gem's dependencies in baidu.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 刘明
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,82 @@
1
+ # Baidu
2
+
3
+ Baidu SEM Services
4
+ Baidu Ranking Services
5
+ Baidu Map Services
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'baidu'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install baidu
20
+
21
+ ## Knowledge
22
+ camel命名法,用于request数据格式
23
+ snake命名法,用于response数据格式
24
+
25
+ ## Rspec
26
+ 先修改spec/spec_helper.rb
27
+
28
+ ```ruby
29
+ BAIDU_MAP_KEY = ''
30
+ $username = ''
31
+ $password = ''
32
+ $token = ''
33
+
34
+ ```
35
+
36
+ ## Usage
37
+
38
+ SEM
39
+
40
+ ```ruby
41
+ require 'baidu'
42
+
43
+ $auth = Baidu::Auth.new
44
+ $auth.username = 'username'
45
+ $auth.password = 'password'
46
+ $auth.token = 'token'
47
+
48
+ ss = Baidu::SEM::SearchService.new
49
+ res = ss.getKeywordBySearch({:searchWord=>'word',:searchType=>0})
50
+ res = ss.getKeywordBySearch({:searchWord=>'word',:searchType=>0},true) #debug=true
51
+ ```
52
+
53
+ MAP
54
+
55
+ ```ruby
56
+ #返回码 定义 英文返回描述
57
+ #0 正常 ok
58
+ #2 请求参数非法 Parameter Invalid
59
+ #3 权限校验失败 Verify Failure
60
+ #4 配额校验失败 Quota Failure
61
+ #5 ak不存在或者非法 AK Failure
62
+ #2xx 无权限
63
+ #3xx 配额错误
64
+ puts map.get_xy_by_poiname('滨海公园','上海')
65
+ map = Baidumap.new('key')
66
+ require 'awesome_print'
67
+ lat = 40.3377039331399
68
+ lng = 116.647588831718
69
+ ap map.bus.around(lat,lng).info
70
+ puts map.get(39.911031821584,116.44931548023).for(1000).bus
71
+ puts map.geo('22.53','113.38')
72
+ Baidumap.get_baike('北京站')
73
+ Baidumap.get_cityid('北京') #=>131
74
+ ```
75
+
76
+ ## Contributing
77
+
78
+ 1. Fork it
79
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
80
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
81
+ 4. Push to the branch (`git push origin my-new-feature`)
82
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'baidu/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "baidu"
8
+ spec.version = Baidu::VERSION
9
+ spec.authors = ["seoaqua"]
10
+ spec.email = ["seoaqua@me.com"]
11
+ spec.description = %q{Baidu Services Pack,including SEM, Map, Ranking and the others}
12
+ spec.summary = %q{a gem summary}
13
+ spec.homepage = "http://github.com/seoaqua/baidu"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_dependency "httparty"
24
+ spec.add_dependency "awesome_print"
25
+ spec.add_dependency "savon","~> 3.0"
26
+ end
@@ -1,483 +1,61 @@
1
- # encoding: utf-8
2
- require 'nokogiri'
3
- require 'json'
4
- require 'addressable/uri'
5
- require 'httparty'
6
- class SearchEngine
7
- #是否收录
8
- def initialize(pagesize = 100)
9
- @pagesize = pagesize#只允许10或100
10
- end
11
- def indexed?(url)
12
- URI(url)
13
- result = query(url)
14
- return result.has_result?
15
- end
16
- end
17
- class SearchResult
18
- def initialize(page,baseuri,pagenumber=1,pagesize=100)
19
- @page = Nokogiri::HTML page
20
- @baseuri = baseuri
21
- # @host = URI(baseuri).host
22
- @pagenumber = pagenumber
23
- @pagesize = pagesize
24
- end
25
- def whole
26
- {
27
- 'ads_top'=>ads_top,
28
- 'ads_right'=>ads_right,
29
- 'ads_bottom'=>ads_bottom,
30
- 'ranks'=>ranks
31
- }
32
- end
33
- #返回当前页中host满足条件的结果
34
- def ranks_for(specific_host)
35
- host_ranks = Hash.new
36
- ranks.each do |id,line|
37
- if specific_host.class == Regexp
38
- host_ranks[id] = line if line['host'] =~ specific_host
39
- elsif specific_host.class == String
40
- host_ranks[id] = line if line['host'] == specific_host
41
- end
42
- end
43
- host_ranks
44
- end
45
- #return the top rank number from @ranks with the input host
46
- def rank(host)#on base of ranks
47
- ranks.each do |id,line|
48
- id = id.to_i
49
- if host.class == Regexp
50
- return id if line['host'] =~ host
51
- elsif host.class == String
52
- return id if line['host'] == host
53
- end
54
- end
55
- return nil
56
- end
1
+ module Baidu
2
+ module Rank
3
+ end
4
+ module SEM
5
+ end
57
6
  end
58
7
 
59
- class Qihoo < SearchEngine
60
- Host = 'www.so.com'
61
- #基本查询, 相当于在搜索框直接数据关键词查询
62
- def query(wd)
63
- #用原始路径请求
64
- uri = URI.join("http://#{Host}/",URI.encode('s?q='+wd)).to_s
65
- page = HTTParty.get(uri)
66
- #如果请求地址被跳转,重新获取当前页的URI,可避免翻页错误
67
- uri = URI.join("http://#{Host}/",page.request.path).to_s
68
- QihooResult.new(page,uri)
69
- end
70
- def self.related_keywords(wd)
71
- url = "http://rs.so.com/?callback=Search.relate.render&encodein=utf-8&encodeout=utf-8&q="+URI.encode(wd)
72
- # uri = URI.join("http://#{Host}/",URI.encode('s?q='+wd)).to_s
73
- page = HTTParty.get(url)
74
- json_str = page.body.split("(")[1].gsub(/\s\);/) {""}
75
- parsed_json = JSON.parse(json_str)
76
- # each
77
- # parsed_json.map { |q| p q['q']}
78
- @related_keywords = parsed_json.map { |q| q['q'] }
79
- # @related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text}
80
- end
8
+ class String
9
+ def snake_case
10
+ self.gsub(/::/, '/').
11
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
12
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
13
+ tr("-", "_").
14
+ downcase
15
+ end
81
16
  end
82
-
83
- class QihooResult < SearchResult
84
- Host = 'www.so.com'
85
- #返回所有当前页的排名结果
86
- def ranks
87
- return @ranks unless @ranks.nil?
88
- @ranks = Hash.new
89
- # id = (@pagenumber - 1) * 10
90
- id = 0
91
- @page.search('//li[@class="res-list"]').each do |li|
92
- a = li.search("h3/a").first
93
- url = li.search("cite")
94
- next if a['data-pos'].nil?
95
- id += 1
96
- text = a.text.strip
97
- href = a['href']
98
- url = url.first.text
99
- host = Addressable::URI.parse(URI.encode("http://#{url}")).host
100
- @ranks[id.to_s] = {'href'=>a['href'],'text'=>text,'host'=>host}
101
- end
102
- @ranks
17
+ class Savon
18
+ class Response
19
+ def header
20
+ hash[:envelope][:header]
103
21
  end
104
- def ads_top
105
- id = 0
106
- result = []
107
- @page.search("//ul[@id='djbox']/li").each do |li|
108
- id += 1
109
- title = li.search("a").first.text
110
- href = li.search("cite").first.text.downcase
111
- host = Addressable::URI.parse(URI.encode(href)).host
112
- result[id] = {'title'=>title,'href'=>href,'host'=>host}
113
- end
114
- result
22
+ def res_header
23
+ header[:res_header]
115
24
  end
116
- def ads_bottom
117
- []
25
+ def desc
26
+ res_header[:desc]
118
27
  end
119
- def ads_right
120
- id = 0
121
- result = []
122
- @page.search("//ul[@id='rightbox']/li").each do |li|
123
- id += 1
124
- title = li.search("a").first.text
125
- href = li.search("cite").first.text.downcase
126
- host = Addressable::URI.parse(URI.encode(href)).host
127
- result[id] = {'title'=>title,'href'=>href,'host'=>host}
128
- end
129
- result
28
+ def quota
29
+ res_header[:quota]
130
30
  end
131
- def related_keywords
132
- []
31
+ def rquota
32
+ res_header[:rquota]
133
33
  end
134
- #下一页
135
- def next
136
- next_href = @page.xpath('//a[@id="snext"]')
137
- return false if next_href.empty?
138
- next_href = next_href.first['href']
139
- next_href = URI.join(@baseuri,next_href).to_s
140
- # next_href = URI.join("http://#{@host}",next_href).to_s
141
- next_page = HTTParty.get(next_href).next
142
- return QihooResult.new(next_page,next_href,@pagenumber+1)
143
- #@page = MbaiduResult.new(Mechanize.new.click(@page.link_with(:text=>/下一页/))) unless @page.link_with(:text=>/下一页/).nil?
34
+ def oprs
35
+ res_header[:oprs]
144
36
  end
145
- #有结果
146
- def has_result?
147
- !@page.search('//div[@id="main"]/h3').text().include?'没有找到该URL'
37
+ def oprtime
38
+ res_header[:oprtime]
148
39
  end
149
- end
150
-
151
- class Mbaidu < SearchEngine
152
- BaseUri = 'http://m.baidu.com/s?'
153
- headers = {
154
- "User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'
155
- }
156
- Options = {:headers => headers}
157
-
158
- #基本查询,相当于从搜索框直接输入关键词查询
159
- def query(wd)
160
- queryStr = "word=#{wd}"
161
- uri = URI.encode((BaseUri + queryStr))
162
- begin
163
- res = HTTParty.get(uri,Options)
164
- MbaiduResult.new(res,uri)
165
- rescue Exception => e
166
- warn "#{uri} fetch error: #{e.to_s}"
167
- return false
168
- end
40
+ def failures
41
+ res_header[:failures]
169
42
  end
170
- end
171
- class MbaiduResult < SearchResult
172
- # def initialize(page,baseuri,pagenumber=nil)
173
- # @page= Nokogiri::HTML page
174
- # @baseuri = baseuri
175
- # if pagenumber.nil?
176
- # @pagenumber = 1
177
- # else
178
- # @pagenumber = pagenumber
179
- # end
180
- # end
181
-
182
- #返回当前页所有查询结果
183
- def ranks
184
- #如果已经赋值说明解析过,不需要重新解析,直接返回结果
185
- return @ranks unless @ranks.nil?
186
- @ranks = Hash.new
187
- @page.xpath('//div[@class="result"]').each do |result|
188
- href,text,host,is_mobile = '','','',false
189
- a = result.search("a").first
190
- is_mobile = true unless a.search("img").empty?
191
- host = result.search('[@class="site"]').first
192
- next if host.nil?
193
- host = host.text
194
- href = a['href']
195
- text = a.text
196
- id = href.scan(/&order=(\d+)&/)
197
- if id.empty?
198
- id = nil
199
- else
200
- id = id.first.first.to_i
201
- # id = (@pagenumber-1)*10+id
202
- end
203
- =begin
204
- result.children.each do |elem|
205
- if elem.name == 'a'
206
- href = elem['href']
207
- id = elem.text.match(/^\d+/).to_s.to_i
208
- text = elem.text.sub(/^\d+/,'')
209
- text.sub!(/^\u00A0/,'')
210
- elsif elem['class'] == 'abs'
211
- elem.children.each do |elem2|
212
- if elem2['class'] == 'site'
213
- host = elem2.text
214
- break
215
- end
216
- end
217
- elsif elem['class'] == 'site'
218
- host == elem['href']
219
- end
220
- end
221
- =end
222
-
223
- @ranks[id.to_s] = {'href'=>href,'text'=>text,'is_mobile'=>is_mobile,'host'=>host.sub(/\u00A0/,'')}
224
- end
225
- @ranks
226
- end
227
- def ads_top
228
- id = 0
229
- result = []
230
- @page.search("div[@class='ec_wise_ad']/div").each do |div|
231
- id += 1
232
- href = div.search("span[@class='ec_site']").first.text
233
- href = "http://#{href}"
234
- title = div.search("a/text()").text.strip
235
- host = Addressable::URI.parse(URI.encode(href)).host
236
- result[id] = {'title'=>title,'href'=>href,'host'=>host}
237
- end
238
- result
239
- end
240
- def ads_right
241
- []
242
- end
243
- def ads_bottom
244
- []
245
- end
246
- def related_keywords
247
- @related_keywords ||= @page.search("div[@class='relativewords_info']/a").map{|a|a.text}
248
- end
249
- =begin
250
- #返回当前页中,符合host条件的结果
251
- def ranks_for(specific_host)
252
- host_ranks = Hash.new
253
- ranks.each do |id,line|
254
- if specific_host.class == Regexp
255
- host_ranks[id] = line if line['host'] =~ specific_host
256
- elsif specific_host.class == String
257
- host_ranks[id] = line if line['host'] == specific_host
258
- end
259
- end
260
- host_ranks
43
+ def code
44
+ failures[:code] if failures
261
45
  end
262
- #return the top rank number from @ranks with the input host
263
- def rank(host)#on base of ranks
264
- ranks.each do |id,line|
265
- id = id.to_i
266
- if host.class == Regexp
267
- return id if line['host'] =~ host
268
- elsif host.class == String
269
- return id if line['host'] == host
270
- end
271
- end
272
- return nil
46
+ def message
47
+ failures[:message] if failures
273
48
  end
274
- =end
275
- #下一页
276
- def next
277
- nextbutton = @page.xpath('//a[text()="下一页"]').first
278
- return nil if nextbutton.nil?
279
- url = nextbutton['href']
280
- url = URI.join(@baseuri,url).to_s
281
- page = HTTParty.get(url)
282
- return MbaiduResult.new(page,url,@pagenumber+1)
49
+ def status
50
+ res_header[:status]
283
51
  end
52
+ end
284
53
  end
285
- class Baidu < SearchEngine
286
- BaseUri = 'http://www.baidu.com/s?'
287
- def suggestions(wd)
288
- json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").force_encoding('GBK').encode("UTF-8")
289
- m = /\[([^\]]*)\]/.match json
290
- return JSON.parse m[0]
291
- end
292
- #to find out the real url for something lik 'www.baidu.com/link?url=7yoYGJqjJ4zBBpC8yDF8xDhctimd_UkfF8AVaJRPKduy2ypxVG18aRB5L6D558y3MjT_Ko0nqFgkMoS'
293
- def url(id)
294
- a = Mechanize.new
295
- a.redirect_ok=false
296
- return a.head("http://www.baidu.com/link?url=#{id}").header['location']
297
- end
298
-
299
- =begin
300
- def extend(words,level=3,sleeptime=1)
301
- level = level.to_i - 1
302
- words = [words] unless words.respond_to? 'each'
303
-
304
- extensions = Array.new
305
- words.each do |word|
306
- self.query(word)
307
- extensions += related_keywords
308
- extensions += suggestions(word)
309
- sleep sleeptime
310
- end
311
- extensions.uniq!
312
- return extensions if level < 1
313
- return extensions + extend(extensions,level)
314
- end
315
- =end
316
-
317
- def popular?(wd)
318
- return HTTParty.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").include?"boxFlash"
319
- end
320
-
321
- def query(wd)
322
- q = Array.new
323
- q << "wd=#{wd}"
324
- q << "rn=#{@perpage}"
325
- queryStr = q.join("&")
326
- #uri = URI.encode((BaseUri + queryStr).encode('GBK'))
327
- uri = URI.encode((BaseUri + queryStr))
328
- begin
329
- # @page = @a.get uri
330
- @page = HTTParty.get uri
331
- BaiduResult.new(@page,uri,1,@pagesize)
332
- rescue Exception => e
333
- warn e.to_s
334
- return false
335
- end
336
- =begin
337
- query = "#{query}"
338
- @uri = BaseUri+URI.encode(query.encode('GBK'))
339
- @page = @a.get @uri
340
- self.clean
341
- @number = self.how_many
342
- @maxpage = (@number / @perpage.to_f).round
343
- @maxpage =10 if @maxpage>10
344
- @currpage =0
345
- =end
346
- end
347
-
348
- #site:xxx.yyy.com
349
- def how_many_pages(host)
350
- query("site:#{host}").how_many
351
- end
352
-
353
- #domain:xxx.yyy.com/path/file.html
354
- def how_many_links(uri)
355
- query("domain:\"#{uri}\"").how_many
356
- end
357
-
358
- #site:xxx.yyy.com inurl:zzz
359
- def how_many_pages_with(host,string)
360
- query("site:#{host} inurl:#{string}").how_many
361
- end
362
- end
363
-
364
- class BaiduResult < SearchResult
365
- # def initialize(page,baseuri,pagenumber=1,pagesize=100)
366
- # @page = Nokogiri::HTML page
367
- # @baseuri = baseuri
368
- # @pagenumber = pagenumber
369
- # @pagesize = pagesize
370
- # # raise ArgumentError 'should be Mechanize::Page' unless page.class == Mechanize::Page
371
- # # @page = page
372
- # end
373
- def ranks
374
- return @ranks unless @ranks.nil?
375
- @ranks = Hash.new
376
- @page.search("//table[@class=\"result\"]|//table[@class=\"result-op\"]").each do |table|
377
- id = table['id']
378
- if @pagesize == 10
379
- id = table['id'][-1,1]
380
- id = '10' if id == '0'
381
- end
382
-
383
- @ranks[id] = Hash.new
384
- url = table.search("[@class=\"g\"]").first
385
- url = url.text unless url.nil?
386
- a = table.search("h3").first
387
- next if a.nil?
388
- @ranks[id]['text'] = a.text
389
- @ranks[id]['href'] = url #a.first['href'].sub('http://www.baidu.com/link?url=','').strip
390
- unless url.nil?
391
- url = url.strip
392
- @ranks[id]['host'] = Addressable::URI.parse(URI.encode("http://#{url}")).host
393
- else
394
- @ranks[id]['host'] = nil
395
- end
396
- end
397
- #@page.search("//table[@class=\"result\"]").map{|table|@page.search("//table[@id=\"#{table['id']}\"]//span[@class=\"g\"]").first}.map{|rank|URI(URI.encode('http://'+rank.text.strip)).host unless rank.nil?}
398
- @ranks
399
- end
400
-
401
- def ads_bottom
402
- return {} if @page.search("//table[@bgcolor='f5f5f5']").empty?
403
- return ads_top
404
- # p @page.search("//table[@bgcolor='f5f5f5']").empty?
405
- end
406
- def ads_top
407
- #灰色底推广,上下都有
408
- ads = Hash.new
409
- @page.search("//table[@bgcolor='#f5f5f5']").each do |table|
410
- id = table['id']
411
- next if id.nil?
412
- id = id[2,3].to_i.to_s
413
- ads[id]= parse_ad(table)
414
- end
415
- #白色底推广,只有上部分
416
- if ads.empty?
417
- @page.search("//table").each do |table|
418
- id = table['id']
419
- next if id.nil? or id.to_i<3000
420
- id = id[2,3].to_i.to_s
421
- ads[id]= parse_ad(table)
422
- end
423
- end
424
- ads
425
- end
426
- def parse_ad(table)
427
- href = table.search("font[@color='#008000']").text.split(/\s/).first.strip
428
- title = table.search("a").first.text.strip
429
- {'title'=>title,'href' => href,'host'=>href}
430
- end
431
- def ads_right
432
- ads = {}
433
- @page.search("//div[@id='ec_im_container']").each do |table|
434
- table.search("div[@id]").each do |div|
435
- id = div['id'][-1,1].to_i+1
436
- title = div.search("a").first
437
- next if title.nil?
438
- title = title.text
439
- url = div.search("font[@color='#008000']").first
440
- next if url.nil?
441
- url = url.text
442
- ads[id.to_s] = {'title'=>title,'href'=>url,'host'=>url}
443
- end
444
- end
445
- ads
446
- end
447
-
448
- #return the top rank number from @ranks with the input host
449
- # def rank(host)#on base of ranks
450
- # ranks.each do |id,line|
451
- # id = id.to_i
452
- # if host.class == Regexp
453
- # return id if line['host'] =~ host
454
- # elsif host.class == String
455
- # return id if line['host'] == host
456
- # end
457
- # end
458
- # return nil
459
- # end
460
-
461
- def how_many
462
- @how_many ||= @page.search("//span[@class='nums']").map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
463
- end
464
-
465
- def related_keywords
466
- @related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text}
467
- end
468
-
469
- def next
470
- url = @page.xpath('//a[text()="下一页>"]').first
471
- return if url.nil?
472
- url = url['href']
473
- url = URI.join(@baseuri,url).to_s
474
- page = HTTParty.get(url)
475
- return BaiduResult.new(page,url,@pagenumber+1,@pagesize)
476
- # @page = BaiduResult.new(Mechanize.new.click(@page.link_with(:text=>/下一页/))) unless @page.link_with(:text=>/下一页/).nil?
477
- end
478
- def has_result?
479
- submit = @page.search('//a[text()="提交网址"]').first
480
- return false if submit and submit['href'].include?'sitesubmit'
481
- return true
482
- end
483
- end
54
+ require "baidu/version"
55
+ require "baidu/map"
56
+ require "baidu/sem"
57
+ require "baidu/rank"
58
+ require "baidu/auth"
59
+ require "baidu/response"
60
+ require "ext"
61
+ require "awesome_print"