baidu 1.2.3 → 1.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/baidu.rb +24 -26
  3. metadata +1 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 11d7996ee0521e9b1c35fc3e011f6c4267e33892
4
- data.tar.gz: 286f0096efcae4f4db6839ea14b5e7c6878a0c84
3
+ metadata.gz: 4ad277bccec5ed902f59dbaab5e18aaa5fa5af8f
4
+ data.tar.gz: 08a52511a1952bc3d0ab3398cca376d7f8960bed
5
5
  SHA512:
6
- metadata.gz: 69f3944f3506e84c0649489e5bec624f7df4dccfba5665b50a2473835c5d961ea92ab27b150095ae2b90a7495e1135373089944378efa8f493c9a056c47ac1ad
7
- data.tar.gz: eb391f043831d459b0e4e843596213a79d098fe23c48ce838a6c4b10a34f92d8e88e7f10c69838cb6a477996ad238fcb39ad0d1e71fa98d0b55840759ffedd43
6
+ metadata.gz: 8b063ef6a9d4d85dea496b1b28fabe9ce184384452991de9872551cd39535e1bb55084156205bbdafadda7ee4f5be80f28517004d2ec5be683d7949647f4dbd6
7
+ data.tar.gz: b0950ebbb8c6aa2fd49388d92bc21306cbf323f5a2971380194525ee834f07a2abe862da3f10a87b3d375d1728699a087229eaceb221617f265f10b3c9fbd12e
data/lib/baidu.rb CHANGED
@@ -5,6 +5,9 @@ require 'addressable/uri'
5
5
  require 'httparty'
6
6
  class SearchEngine
7
7
  #是否收录
8
+ def initialize(perpage = 100)
9
+ @perpage = perpage
10
+ end
8
11
  def indexed?(url)
9
12
  URI(url)
10
13
  result = query(url)
@@ -22,7 +25,14 @@ class SearchResult
22
25
  @pagenumber = pagenumber
23
26
  end
24
27
  end
25
-
28
+ def whole
29
+ {
30
+ 'ads_top'=>ads_top,
31
+ 'ads_right'=>ads_right,
32
+ 'ads_bottom'=>ads_bottom,
33
+ 'ranks'=>ranks
34
+ }
35
+ end
26
36
  #返回当前页中host满足条件的结果
27
37
  def ranks_for(specific_host)
28
38
  host_ranks = Hash.new
@@ -64,7 +74,6 @@ end
64
74
 
65
75
  class QihooResult < SearchResult
66
76
  Host = 'www.so.com'
67
-
68
77
  #返回所有当前页的排名结果
69
78
  def ranks
70
79
  return @ranks unless @ranks.nil?
@@ -267,15 +276,6 @@ class MbaiduResult < SearchResult
267
276
  end
268
277
  class Baidu < SearchEngine
269
278
  BaseUri = 'http://www.baidu.com/s?'
270
- PerPage = 100
271
-
272
- def initialize
273
- # @a = Mechanize.new {|agent| agent.user_agent_alias = 'Linux Mozilla'}
274
- # @a.idle_timeout = 2
275
- # @a.max_history = 1
276
- @page = nil
277
- end
278
-
279
279
  def suggestions(wd)
280
280
  json = HTTParty.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").body.force_encoding('GBK').encode("UTF-8")
281
281
  m = /\[([^\]]*)\]/.match json
@@ -313,7 +313,7 @@ class Baidu < SearchEngine
313
313
  def query(wd)
314
314
  q = Array.new
315
315
  q << "wd=#{wd}"
316
- q << "rn=#{PerPage}"
316
+ q << "rn=#{@perpage}"
317
317
  queryStr = q.join("&")
318
318
  #uri = URI.encode((BaseUri + queryStr).encode('GBK'))
319
319
  uri = URI.encode((BaseUri + queryStr))
@@ -364,7 +364,6 @@ end
364
364
 
365
365
  class BaiduResult < SearchResult
366
366
  def initialize(page,baseuri,pagenumber=1)
367
- File.open('/tmp/file','w'){|f|f.puts page}
368
367
  @page = Nokogiri::HTML page
369
368
  @baseuri = baseuri
370
369
  @pagenumber = pagenumber
@@ -396,31 +395,30 @@ class BaiduResult < SearchResult
396
395
  end
397
396
 
398
397
  def ads_bottom
398
+ id = 0
399
399
  ads = {}
400
- id=0
401
- @page.search("//table[@class='EC_mr15']|//table[@class='ec_pp_f']").each do |table|
402
- next if table['id'].nil?
400
+ @page.search("//table[@bgcolor='f5f5f5']").each do |table|
401
+ next unless table['id'].nil?
403
402
  id += 1
404
- href = table.search("font[@color='#008000']").text.split(/\s/).first.strip
405
- title = table.search("a").first.text.strip
406
- ads[id.to_s]= {'title'=>title,'href' => href,'host'=>href}
403
+ ads[id]= parse_ad(table)
407
404
  end
408
405
  ads
409
406
  end
410
407
  def ads_top
411
408
  id = 0
412
409
  ads = {}
413
- @page.search("//table[@class='EC_mr15']|//table[@class='ec_pp_f']").each do |table|
410
+ @page.search("//table[@bgcolor='f5f5f5']").each do |table|
411
+ next if id.nil?
414
412
  id += 1
415
- # id = table['id']
416
- next unless id.nil?
417
- # id = id[-1,1]
418
- href = table.search("font[@color='#008000']").text.split(/\s/).first.strip
419
- title = table.search("a").first.text.strip
420
- ads[id]= {'title'=>title,'href' => href,'host'=>href}
413
+ ads[id]= parse_ad(table)
421
414
  end
422
415
  ads
423
416
  end
417
+ def parse_ad(table)
418
+ href = table.search("font[@color='#008000']").text.split(/\s/).first.strip
419
+ title = table.search("a").first.text.strip
420
+ {'title'=>title,'href' => href,'host'=>href}
421
+ end
424
422
  def ads_right
425
423
  ads = {}
426
424
  @page.search("//div[@id='ec_im_container']").each do |table|
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baidu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.3
4
+ version: 1.2.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - seoaqua