baidu 1.2.3 → 1.2.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/baidu.rb +24 -26
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 11d7996ee0521e9b1c35fc3e011f6c4267e33892
4
- data.tar.gz: 286f0096efcae4f4db6839ea14b5e7c6878a0c84
3
+ metadata.gz: 4ad277bccec5ed902f59dbaab5e18aaa5fa5af8f
4
+ data.tar.gz: 08a52511a1952bc3d0ab3398cca376d7f8960bed
5
5
  SHA512:
6
- metadata.gz: 69f3944f3506e84c0649489e5bec624f7df4dccfba5665b50a2473835c5d961ea92ab27b150095ae2b90a7495e1135373089944378efa8f493c9a056c47ac1ad
7
- data.tar.gz: eb391f043831d459b0e4e843596213a79d098fe23c48ce838a6c4b10a34f92d8e88e7f10c69838cb6a477996ad238fcb39ad0d1e71fa98d0b55840759ffedd43
6
+ metadata.gz: 8b063ef6a9d4d85dea496b1b28fabe9ce184384452991de9872551cd39535e1bb55084156205bbdafadda7ee4f5be80f28517004d2ec5be683d7949647f4dbd6
7
+ data.tar.gz: b0950ebbb8c6aa2fd49388d92bc21306cbf323f5a2971380194525ee834f07a2abe862da3f10a87b3d375d1728699a087229eaceb221617f265f10b3c9fbd12e
data/lib/baidu.rb CHANGED
@@ -5,6 +5,9 @@ require 'addressable/uri'
5
5
  require 'httparty'
6
6
  class SearchEngine
7
7
  #是否收录
8
+ def initialize(perpage = 100)
9
+ @perpage = perpage
10
+ end
8
11
  def indexed?(url)
9
12
  URI(url)
10
13
  result = query(url)
@@ -22,7 +25,14 @@ class SearchResult
22
25
  @pagenumber = pagenumber
23
26
  end
24
27
  end
25
-
28
+ def whole
29
+ {
30
+ 'ads_top'=>ads_top,
31
+ 'ads_right'=>ads_right,
32
+ 'ads_bottom'=>ads_bottom,
33
+ 'ranks'=>ranks
34
+ }
35
+ end
26
36
  #返回当前页中host满足条件的结果
27
37
  def ranks_for(specific_host)
28
38
  host_ranks = Hash.new
@@ -64,7 +74,6 @@ end
64
74
 
65
75
  class QihooResult < SearchResult
66
76
  Host = 'www.so.com'
67
-
68
77
  #返回所有当前页的排名结果
69
78
  def ranks
70
79
  return @ranks unless @ranks.nil?
@@ -267,15 +276,6 @@ class MbaiduResult < SearchResult
267
276
  end
268
277
  class Baidu < SearchEngine
269
278
  BaseUri = 'http://www.baidu.com/s?'
270
- PerPage = 100
271
-
272
- def initialize
273
- # @a = Mechanize.new {|agent| agent.user_agent_alias = 'Linux Mozilla'}
274
- # @a.idle_timeout = 2
275
- # @a.max_history = 1
276
- @page = nil
277
- end
278
-
279
279
  def suggestions(wd)
280
280
  json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").body.force_encoding('GBK').encode("UTF-8")
281
281
  m = /\[([^\]]*)\]/.match json
@@ -313,7 +313,7 @@ class Baidu < SearchEngine
313
313
  def query(wd)
314
314
  q = Array.new
315
315
  q << "wd=#{wd}"
316
- q << "rn=#{PerPage}"
316
+ q << "rn=#{@perpage}"
317
317
  queryStr = q.join("&")
318
318
  #uri = URI.encode((BaseUri + queryStr).encode('GBK'))
319
319
  uri = URI.encode((BaseUri + queryStr))
@@ -364,7 +364,6 @@ end
364
364
 
365
365
  class BaiduResult < SearchResult
366
366
  def initialize(page,baseuri,pagenumber=1)
367
- File.open('/tmp/file','w'){|f|f.puts page}
368
367
  @page = Nokogiri::HTML page
369
368
  @baseuri = baseuri
370
369
  @pagenumber = pagenumber
@@ -396,31 +395,30 @@ class BaiduResult < SearchResult
396
395
  end
397
396
 
398
397
  def ads_bottom
398
+ id = 0
399
399
  ads = {}
400
- id=0
401
- @page.search("//table[@class='EC_mr15']|//table[@class='ec_pp_f']").each do |table|
402
- next if table['id'].nil?
400
+ @page.search("//table[@bgcolor='f5f5f5']").each do |table|
401
+ next unless table['id'].nil?
403
402
  id += 1
404
- href = table.search("font[@color='#008000']").text.split(/\s/).first.strip
405
- title = table.search("a").first.text.strip
406
- ads[id.to_s]= {'title'=>title,'href' => href,'host'=>href}
403
+ ads[id]= parse_ad(table)
407
404
  end
408
405
  ads
409
406
  end
410
407
  def ads_top
411
408
  id = 0
412
409
  ads = {}
413
- @page.search("//table[@class='EC_mr15']|//table[@class='ec_pp_f']").each do |table|
410
+ @page.search("//table[@bgcolor='f5f5f5']").each do |table|
411
+ next if id.nil?
414
412
  id += 1
415
- # id = table['id']
416
- next unless id.nil?
417
- # id = id[-1,1]
418
- href = table.search("font[@color='#008000']").text.split(/\s/).first.strip
419
- title = table.search("a").first.text.strip
420
- ads[id]= {'title'=>title,'href' => href,'host'=>href}
413
+ ads[id]= parse_ad(table)
421
414
  end
422
415
  ads
423
416
  end
417
+ def parse_ad(table)
418
+ href = table.search("font[@color='#008000']").text.split(/\s/).first.strip
419
+ title = table.search("a").first.text.strip
420
+ {'title'=>title,'href' => href,'host'=>href}
421
+ end
424
422
  def ads_right
425
423
  ads = {}
426
424
  @page.search("//div[@id='ec_im_container']").each do |table|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baidu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.3
4
+ version: 1.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - seoaqua