baidu 1.2.10 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 973532d0e6e0bc141eb17dcd034daccbefe2210a
4
- data.tar.gz: abdd019318edf7956ca07a7dd1bca44c02faaa1b
3
+ metadata.gz: f3ab3b16d4f810561f78ee164b2882eaf8f2bae8
4
+ data.tar.gz: 706fc2deeb2af8c5606eb685f1c85aef45f85127
5
5
  SHA512:
6
- metadata.gz: b74a336d56fd97b365db05d095991d49fa7cbf8098c180635afe2b8e16683db51ffa876a1fd855491b7f331574f25a91c3fe80dbdd6f2685517d1b45b9807034
7
- data.tar.gz: a430a916135a289b1577f3468d43188b3b8c6dc84db7460cd00d3af6f993f913f358d7e8e5d1a87d5a7557db30a1ec85cedec9eb949d3698d17fcd068438c791
6
+ metadata.gz: be21dae1919c0f313870b2b0a508304becc13117072bc0c93d7992f9cb992aaba344b30c9af36eee6fea2ec75d96c79a8a94af72430c36468998363987408811
7
+ data.tar.gz: a3f515b1a9ed7be68eb66437c1038e78ee068304fe1c13f0492c66f23a5bd3bef159f10eb1d2e3c867d946fe921e59375cd4589b6d56ce38d29cf3757e0635c7
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'http://ruby.taobao.org'
2
+
3
+ # Specify your gem's dependencies in baidu.gemspec
4
+ gemspec
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 刘明
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,82 @@
1
+ # Baidu
2
+
3
+ Baidu SEM Services
4
+ Baidu Ranking Services
5
+ Baidu Map Services
6
+
7
+ ## Installation
8
+
9
+ Add this line to your application's Gemfile:
10
+
11
+ gem 'baidu'
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install baidu
20
+
21
+ ## Knowledge
22
+ camel命名法,用于request数据格式
23
+ snake命名法,用于response数据格式
24
+
25
+ ## Rspec
26
+ 先修改spec/spec_helper.rb
27
+
28
+ ```ruby
29
+ BAIDU_MAP_KEY = ''
30
+ $username = ''
31
+ $password = ''
32
+ $token = ''
33
+
34
+ ```
35
+
36
+ ## Usage
37
+
38
+ SEM
39
+
40
+ ```ruby
41
+ require 'baidu'
42
+
43
+ $auth = Baidu::Auth.new
44
+ $auth.username = 'username'
45
+ $auth.password = 'password'
46
+ $auth.token = 'token'
47
+
48
+ ss = Baidu::SEM::SearchService.new
49
+ res = ss.getKeywordBySearch({:searchWord=>'word',:searchType=>0})
50
+ res = ss.getKeywordBySearch({:searchWord=>'word',:searchType=>0},true) #debug=true
51
+ ```
52
+
53
+ MAP
54
+
55
+ ```ruby
56
+ #返回码 定义 英文返回描述
57
+ #0 正常 ok
58
+ #2 请求参数非法 Parameter Invalid
59
+ #3 权限校验失败 Verify Failure
60
+ #4 配额校验失败 Quota Failure
61
+ #5 ak不存在或者非法 AK Failure
62
+ #2xx 无权限
63
+ #3xx 配额错误
64
+ puts map.get_xy_by_poiname('滨海公园','上海')
65
+ map = Baidumap.new('key')
66
+ require 'awesome_print'
67
+ lat = 40.3377039331399
68
+ lng = 116.647588831718
69
+ ap map.bus.around(lat,lng).info
70
+ puts map.get(39.911031821584,116.44931548023).for(1000).bus
71
+ puts map.geo('22.53','113.38')
72
+ Baidumap.get_baike('北京站')
73
+ Baidumap.get_cityid('北京') #=>131
74
+ ```
75
+
76
+ ## Contributing
77
+
78
+ 1. Fork it
79
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
80
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
81
+ 4. Push to the branch (`git push origin my-new-feature`)
82
+ 5. Create new Pull Request
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,26 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'baidu/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "baidu"
8
+ spec.version = Baidu::VERSION
9
+ spec.authors = ["seoaqua"]
10
+ spec.email = ["seoaqua@me.com"]
11
+ spec.description = %q{Baidu Services Pack,including SEM, Map, Ranking and the others}
12
+ spec.summary = %q{a gem summary}
13
+ spec.homepage = "http://github.com/seoaqua/baidu"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ spec.add_dependency "httparty"
24
+ spec.add_dependency "awesome_print"
25
+ spec.add_dependency "savon","~> 3.0"
26
+ end
@@ -1,483 +1,61 @@
1
- # encoding: utf-8
2
- require 'nokogiri'
3
- require 'json'
4
- require 'addressable/uri'
5
- require 'httparty'
6
- class SearchEngine
7
- #是否收录
8
- def initialize(pagesize = 100)
9
- @pagesize = pagesize#只允许10或100
10
- end
11
- def indexed?(url)
12
- URI(url)
13
- result = query(url)
14
- return result.has_result?
15
- end
16
- end
17
- class SearchResult
18
- def initialize(page,baseuri,pagenumber=1,pagesize=100)
19
- @page = Nokogiri::HTML page
20
- @baseuri = baseuri
21
- # @host = URI(baseuri).host
22
- @pagenumber = pagenumber
23
- @pagesize = pagesize
24
- end
25
- def whole
26
- {
27
- 'ads_top'=>ads_top,
28
- 'ads_right'=>ads_right,
29
- 'ads_bottom'=>ads_bottom,
30
- 'ranks'=>ranks
31
- }
32
- end
33
- #返回当前页中host满足条件的结果
34
- def ranks_for(specific_host)
35
- host_ranks = Hash.new
36
- ranks.each do |id,line|
37
- if specific_host.class == Regexp
38
- host_ranks[id] = line if line['host'] =~ specific_host
39
- elsif specific_host.class == String
40
- host_ranks[id] = line if line['host'] == specific_host
41
- end
42
- end
43
- host_ranks
44
- end
45
- #return the top rank number from @ranks with the input host
46
- def rank(host)#on base of ranks
47
- ranks.each do |id,line|
48
- id = id.to_i
49
- if host.class == Regexp
50
- return id if line['host'] =~ host
51
- elsif host.class == String
52
- return id if line['host'] == host
53
- end
54
- end
55
- return nil
56
- end
1
+ module Baidu
2
+ module Rank
3
+ end
4
+ module SEM
5
+ end
57
6
  end
58
7
 
59
- class Qihoo < SearchEngine
60
- Host = 'www.so.com'
61
- #基本查询, 相当于在搜索框直接数据关键词查询
62
- def query(wd)
63
- #用原始路径请求
64
- uri = URI.join("http://#{Host}/",URI.encode('s?q='+wd)).to_s
65
- page = HTTParty.get(uri)
66
- #如果请求地址被跳转,重新获取当前页的URI,可避免翻页错误
67
- uri = URI.join("http://#{Host}/",page.request.path).to_s
68
- QihooResult.new(page,uri)
69
- end
70
- def self.related_keywords(wd)
71
- url = "http://rs.so.com/?callback=Search.relate.render&encodein=utf-8&encodeout=utf-8&q="+URI.encode(wd)
72
- # uri = URI.join("http://#{Host}/",URI.encode('s?q='+wd)).to_s
73
- page = HTTParty.get(url)
74
- json_str = page.body.split("(")[1].gsub(/\s\);/) {""}
75
- parsed_json = JSON.parse(json_str)
76
- # each
77
- # parsed_json.map { |q| p q['q']}
78
- @related_keywords = parsed_json.map { |q| q['q'] }
79
- # @related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text}
80
- end
8
+ class String
9
+ def snake_case
10
+ self.gsub(/::/, '/').
11
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
12
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
13
+ tr("-", "_").
14
+ downcase
15
+ end
81
16
  end
82
-
83
- class QihooResult < SearchResult
84
- Host = 'www.so.com'
85
- #返回所有当前页的排名结果
86
- def ranks
87
- return @ranks unless @ranks.nil?
88
- @ranks = Hash.new
89
- # id = (@pagenumber - 1) * 10
90
- id = 0
91
- @page.search('//li[@class="res-list"]').each do |li|
92
- a = li.search("h3/a").first
93
- url = li.search("cite")
94
- next if a['data-pos'].nil?
95
- id += 1
96
- text = a.text.strip
97
- href = a['href']
98
- url = url.first.text
99
- host = Addressable::URI.parse(URI.encode("http://#{url}")).host
100
- @ranks[id.to_s] = {'href'=>a['href'],'text'=>text,'host'=>host}
101
- end
102
- @ranks
17
+ class Savon
18
+ class Response
19
+ def header
20
+ hash[:envelope][:header]
103
21
  end
104
- def ads_top
105
- id = 0
106
- result = []
107
- @page.search("//ul[@id='djbox']/li").each do |li|
108
- id += 1
109
- title = li.search("a").first.text
110
- href = li.search("cite").first.text.downcase
111
- host = Addressable::URI.parse(URI.encode(href)).host
112
- result[id] = {'title'=>title,'href'=>href,'host'=>host}
113
- end
114
- result
22
+ def res_header
23
+ header[:res_header]
115
24
  end
116
- def ads_bottom
117
- []
25
+ def desc
26
+ res_header[:desc]
118
27
  end
119
- def ads_right
120
- id = 0
121
- result = []
122
- @page.search("//ul[@id='rightbox']/li").each do |li|
123
- id += 1
124
- title = li.search("a").first.text
125
- href = li.search("cite").first.text.downcase
126
- host = Addressable::URI.parse(URI.encode(href)).host
127
- result[id] = {'title'=>title,'href'=>href,'host'=>host}
128
- end
129
- result
28
+ def quota
29
+ res_header[:quota]
130
30
  end
131
- def related_keywords
132
- []
31
+ def rquota
32
+ res_header[:rquota]
133
33
  end
134
- #下一页
135
- def next
136
- next_href = @page.xpath('//a[@id="snext"]')
137
- return false if next_href.empty?
138
- next_href = next_href.first['href']
139
- next_href = URI.join(@baseuri,next_href).to_s
140
- # next_href = URI.join("http://#{@host}",next_href).to_s
141
- next_page = HTTParty.get(next_href).next
142
- return QihooResult.new(next_page,next_href,@pagenumber+1)
143
- #@page = MbaiduResult.new(Mechanize.new.click(@page.link_with(:text=>/下一页/))) unless @page.link_with(:text=>/下一页/).nil?
34
+ def oprs
35
+ res_header[:oprs]
144
36
  end
145
- #有结果
146
- def has_result?
147
- !@page.search('//div[@id="main"]/h3').text().include?'没有找到该URL'
37
+ def oprtime
38
+ res_header[:oprtime]
148
39
  end
149
- end
150
-
151
- class Mbaidu < SearchEngine
152
- BaseUri = 'http://m.baidu.com/s?'
153
- headers = {
154
- "User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'
155
- }
156
- Options = {:headers => headers}
157
-
158
- #基本查询,相当于从搜索框直接输入关键词查询
159
- def query(wd)
160
- queryStr = "word=#{wd}"
161
- uri = URI.encode((BaseUri + queryStr))
162
- begin
163
- res = HTTParty.get(uri,Options)
164
- MbaiduResult.new(res,uri)
165
- rescue Exception => e
166
- warn "#{uri} fetch error: #{e.to_s}"
167
- return false
168
- end
40
+ def failures
41
+ res_header[:failures]
169
42
  end
170
- end
171
- class MbaiduResult < SearchResult
172
- # def initialize(page,baseuri,pagenumber=nil)
173
- # @page= Nokogiri::HTML page
174
- # @baseuri = baseuri
175
- # if pagenumber.nil?
176
- # @pagenumber = 1
177
- # else
178
- # @pagenumber = pagenumber
179
- # end
180
- # end
181
-
182
- #返回当前页所有查询结果
183
- def ranks
184
- #如果已经赋值说明解析过,不需要重新解析,直接返回结果
185
- return @ranks unless @ranks.nil?
186
- @ranks = Hash.new
187
- @page.xpath('//div[@class="result"]').each do |result|
188
- href,text,host,is_mobile = '','','',false
189
- a = result.search("a").first
190
- is_mobile = true unless a.search("img").empty?
191
- host = result.search('[@class="site"]').first
192
- next if host.nil?
193
- host = host.text
194
- href = a['href']
195
- text = a.text
196
- id = href.scan(/&order=(\d+)&/)
197
- if id.empty?
198
- id = nil
199
- else
200
- id = id.first.first.to_i
201
- # id = (@pagenumber-1)*10+id
202
- end
203
- =begin
204
- result.children.each do |elem|
205
- if elem.name == 'a'
206
- href = elem['href']
207
- id = elem.text.match(/^\d+/).to_s.to_i
208
- text = elem.text.sub(/^\d+/,'')
209
- text.sub!(/^\u00A0/,'')
210
- elsif elem['class'] == 'abs'
211
- elem.children.each do |elem2|
212
- if elem2['class'] == 'site'
213
- host = elem2.text
214
- break
215
- end
216
- end
217
- elsif elem['class'] == 'site'
218
- host == elem['href']
219
- end
220
- end
221
- =end
222
-
223
- @ranks[id.to_s] = {'href'=>href,'text'=>text,'is_mobile'=>is_mobile,'host'=>host.sub(/\u00A0/,'')}
224
- end
225
- @ranks
226
- end
227
- def ads_top
228
- id = 0
229
- result = []
230
- @page.search("div[@class='ec_wise_ad']/div").each do |div|
231
- id += 1
232
- href = div.search("span[@class='ec_site']").first.text
233
- href = "http://#{href}"
234
- title = div.search("a/text()").text.strip
235
- host = Addressable::URI.parse(URI.encode(href)).host
236
- result[id] = {'title'=>title,'href'=>href,'host'=>host}
237
- end
238
- result
239
- end
240
- def ads_right
241
- []
242
- end
243
- def ads_bottom
244
- []
245
- end
246
- def related_keywords
247
- @related_keywords ||= @page.search("div[@class='relativewords_info']/a").map{|a|a.text}
248
- end
249
- =begin
250
- #返回当前页中,符合host条件的结果
251
- def ranks_for(specific_host)
252
- host_ranks = Hash.new
253
- ranks.each do |id,line|
254
- if specific_host.class == Regexp
255
- host_ranks[id] = line if line['host'] =~ specific_host
256
- elsif specific_host.class == String
257
- host_ranks[id] = line if line['host'] == specific_host
258
- end
259
- end
260
- host_ranks
43
+ def code
44
+ failures[:code] if failures
261
45
  end
262
- #return the top rank number from @ranks with the input host
263
- def rank(host)#on base of ranks
264
- ranks.each do |id,line|
265
- id = id.to_i
266
- if host.class == Regexp
267
- return id if line['host'] =~ host
268
- elsif host.class == String
269
- return id if line['host'] == host
270
- end
271
- end
272
- return nil
46
+ def message
47
+ failures[:message] if failures
273
48
  end
274
- =end
275
- #下一页
276
- def next
277
- nextbutton = @page.xpath('//a[text()="下一页"]').first
278
- return nil if nextbutton.nil?
279
- url = nextbutton['href']
280
- url = URI.join(@baseuri,url).to_s
281
- page = HTTParty.get(url)
282
- return MbaiduResult.new(page,url,@pagenumber+1)
49
+ def status
50
+ res_header[:status]
283
51
  end
52
+ end
284
53
  end
285
- class Baidu < SearchEngine
286
- BaseUri = 'http://www.baidu.com/s?'
287
- def suggestions(wd)
288
- json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").force_encoding('GBK').encode("UTF-8")
289
- m = /\[([^\]]*)\]/.match json
290
- return JSON.parse m[0]
291
- end
292
- #to find out the real url for something lik 'www.baidu.com/link?url=7yoYGJqjJ4zBBpC8yDF8xDhctimd_UkfF8AVaJRPKduy2ypxVG18aRB5L6D558y3MjT_Ko0nqFgkMoS'
293
- def url(id)
294
- a = Mechanize.new
295
- a.redirect_ok=false
296
- return a.head("http://www.baidu.com/link?url=#{id}").header['location']
297
- end
298
-
299
- =begin
300
- def extend(words,level=3,sleeptime=1)
301
- level = level.to_i - 1
302
- words = [words] unless words.respond_to? 'each'
303
-
304
- extensions = Array.new
305
- words.each do |word|
306
- self.query(word)
307
- extensions += related_keywords
308
- extensions += suggestions(word)
309
- sleep sleeptime
310
- end
311
- extensions.uniq!
312
- return extensions if level < 1
313
- return extensions + extend(extensions,level)
314
- end
315
- =end
316
-
317
- def popular?(wd)
318
- return HTTParty.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").include?"boxFlash"
319
- end
320
-
321
- def query(wd)
322
- q = Array.new
323
- q << "wd=#{wd}"
324
- q << "rn=#{@perpage}"
325
- queryStr = q.join("&")
326
- #uri = URI.encode((BaseUri + queryStr).encode('GBK'))
327
- uri = URI.encode((BaseUri + queryStr))
328
- begin
329
- # @page = @a.get uri
330
- @page = HTTParty.get uri
331
- BaiduResult.new(@page,uri,1,@pagesize)
332
- rescue Exception => e
333
- warn e.to_s
334
- return false
335
- end
336
- =begin
337
- query = "#{query}"
338
- @uri = BaseUri+URI.encode(query.encode('GBK'))
339
- @page = @a.get @uri
340
- self.clean
341
- @number = self.how_many
342
- @maxpage = (@number / @perpage.to_f).round
343
- @maxpage =10 if @maxpage>10
344
- @currpage =0
345
- =end
346
- end
347
-
348
- #site:xxx.yyy.com
349
- def how_many_pages(host)
350
- query("site:#{host}").how_many
351
- end
352
-
353
- #domain:xxx.yyy.com/path/file.html
354
- def how_many_links(uri)
355
- query("domain:\"#{uri}\"").how_many
356
- end
357
-
358
- #site:xxx.yyy.com inurl:zzz
359
- def how_many_pages_with(host,string)
360
- query("site:#{host} inurl:#{string}").how_many
361
- end
362
- end
363
-
364
- class BaiduResult < SearchResult
365
- # def initialize(page,baseuri,pagenumber=1,pagesize=100)
366
- # @page = Nokogiri::HTML page
367
- # @baseuri = baseuri
368
- # @pagenumber = pagenumber
369
- # @pagesize = pagesize
370
- # # raise ArgumentError 'should be Mechanize::Page' unless page.class == Mechanize::Page
371
- # # @page = page
372
- # end
373
- def ranks
374
- return @ranks unless @ranks.nil?
375
- @ranks = Hash.new
376
- @page.search("//table[@class=\"result\"]|//table[@class=\"result-op\"]").each do |table|
377
- id = table['id']
378
- if @pagesize == 10
379
- id = table['id'][-1,1]
380
- id = '10' if id == '0'
381
- end
382
-
383
- @ranks[id] = Hash.new
384
- url = table.search("[@class=\"g\"]").first
385
- url = url.text unless url.nil?
386
- a = table.search("h3").first
387
- next if a.nil?
388
- @ranks[id]['text'] = a.text
389
- @ranks[id]['href'] = url #a.first['href'].sub('http://www.baidu.com/link?url=','').strip
390
- unless url.nil?
391
- url = url.strip
392
- @ranks[id]['host'] = Addressable::URI.parse(URI.encode("http://#{url}")).host
393
- else
394
- @ranks[id]['host'] = nil
395
- end
396
- end
397
- #@page.search("//table[@class=\"result\"]").map{|table|@page.search("//table[@id=\"#{table['id']}\"]//span[@class=\"g\"]").first}.map{|rank|URI(URI.encode('http://'+rank.text.strip)).host unless rank.nil?}
398
- @ranks
399
- end
400
-
401
- def ads_bottom
402
- return {} if @page.search("//table[@bgcolor='f5f5f5']").empty?
403
- return ads_top
404
- # p @page.search("//table[@bgcolor='f5f5f5']").empty?
405
- end
406
- def ads_top
407
- #灰色底推广,上下都有
408
- ads = Hash.new
409
- @page.search("//table[@bgcolor='#f5f5f5']").each do |table|
410
- id = table['id']
411
- next if id.nil?
412
- id = id[2,3].to_i.to_s
413
- ads[id]= parse_ad(table)
414
- end
415
- #白色底推广,只有上部分
416
- if ads.empty?
417
- @page.search("//table").each do |table|
418
- id = table['id']
419
- next if id.nil? or id.to_i<3000
420
- id = id[2,3].to_i.to_s
421
- ads[id]= parse_ad(table)
422
- end
423
- end
424
- ads
425
- end
426
- def parse_ad(table)
427
- href = table.search("font[@color='#008000']").text.split(/\s/).first.strip
428
- title = table.search("a").first.text.strip
429
- {'title'=>title,'href' => href,'host'=>href}
430
- end
431
- def ads_right
432
- ads = {}
433
- @page.search("//div[@id='ec_im_container']").each do |table|
434
- table.search("div[@id]").each do |div|
435
- id = div['id'][-1,1].to_i+1
436
- title = div.search("a").first
437
- next if title.nil?
438
- title = title.text
439
- url = div.search("font[@color='#008000']").first
440
- next if url.nil?
441
- url = url.text
442
- ads[id.to_s] = {'title'=>title,'href'=>url,'host'=>url}
443
- end
444
- end
445
- ads
446
- end
447
-
448
- #return the top rank number from @ranks with the input host
449
- # def rank(host)#on base of ranks
450
- # ranks.each do |id,line|
451
- # id = id.to_i
452
- # if host.class == Regexp
453
- # return id if line['host'] =~ host
454
- # elsif host.class == String
455
- # return id if line['host'] == host
456
- # end
457
- # end
458
- # return nil
459
- # end
460
-
461
- def how_many
462
- @how_many ||= @page.search("//span[@class='nums']").map{|num|num.content.gsub(/\D/,'').to_i unless num.nil?}.first
463
- end
464
-
465
- def related_keywords
466
- @related_keywords ||= @page.search("//div[@id=\"rs\"]//tr//a").map{|keyword| keyword.text}
467
- end
468
-
469
- def next
470
- url = @page.xpath('//a[text()="下一页>"]').first
471
- return if url.nil?
472
- url = url['href']
473
- url = URI.join(@baseuri,url).to_s
474
- page = HTTParty.get(url)
475
- return BaiduResult.new(page,url,@pagenumber+1,@pagesize)
476
- # @page = BaiduResult.new(Mechanize.new.click(@page.link_with(:text=>/下一页/))) unless @page.link_with(:text=>/下一页/).nil?
477
- end
478
- def has_result?
479
- submit = @page.search('//a[text()="提交网址"]').first
480
- return false if submit and submit['href'].include?'sitesubmit'
481
- return true
482
- end
483
- end
54
+ require "baidu/version"
55
+ require "baidu/map"
56
+ require "baidu/sem"
57
+ require "baidu/rank"
58
+ require "baidu/auth"
59
+ require "baidu/response"
60
+ require "ext"
61
+ require "awesome_print"