baidu 0.2.2 → 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/baidu.rb +11 -6
  2. metadata +2 -2
@@ -3,30 +3,33 @@ require 'mechanize'
3
3
  class Baidu
4
4
  def initialize(offset100=false)
5
5
  @a = Mechanize.new {|agent| agent.user_agent_alias = 'Linux Mozilla'}
6
+ @a.max_history = 1
6
7
  @perpage = 10
7
8
  @perpage = 100 if offset100==true
8
9
  @baseuri = "http://www.baidu.com/s?rn=#{@perpage}&wd="
9
10
  end
10
11
  def query(query)
12
+ query = "#{query}"
11
13
  @uri = @baseuri+URI.encode(query.encode('GBK'))
12
14
  @page = @a.get @uri
15
+ #File.open('/tmp/testpage','w'){|f|f.puts @page.body} if query=='1458 Italia'
13
16
  self.clean
14
- @number = self.number
17
+ @number = self.how_many
15
18
  @maxpage = (@number / @perpage.to_f).round
16
19
  @maxpage =10 if @maxpage>10
17
20
  @currpage =0
18
21
  end
19
22
  def how_many_pages(uri)
20
23
  self.query("site:#{uri}")
21
- return self.number
24
+ return self.how_many
22
25
  end
23
26
  def how_many_links(uri)
24
27
  self.query("domain:\"#{uri}\"")
25
- return self.number
28
+ return self.how_many
26
29
  end
27
30
  def how_many_pages_with(url,string)
28
31
  self.query("site:#{url} inurl:#{string}")
29
- return self.number
32
+ return self.how_many
30
33
  end
31
34
  def rank(host)
32
35
  @page.search("//table[@class=\"result\"]").each do |table|
@@ -39,7 +42,8 @@ class Baidu
39
42
  end
40
43
  return false
41
44
  end
42
- def number
45
+ def how_many
46
+ return false if @page.search("//span[@class='nums']").first.nil?
43
47
  return @page.search("//span[@class='nums']").first.content.gsub(/\D/,'').to_i
44
48
  end
45
49
  def next
@@ -50,7 +54,8 @@ class Baidu
50
54
  return true
51
55
  end
52
56
  def clean
53
- @page.body.encode!('UTF-8','GBK')
57
+ @page.body.force_encoding('GBK')
58
+ @page.body.encode!('UTF-8',:invalid => :replace, :undef => :replace, :replace => "")
54
59
  @page.body.gsub! ("[\U0080-\U2C77]+") #mechanize will be confuzed without removing the few characters
55
60
  end
56
61
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baidu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.2
4
+ version: 0.2.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -39,7 +39,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
39
39
  version: '0'
40
40
  requirements: []
41
41
  rubyforge_project:
42
- rubygems_version: 1.8.11
42
+ rubygems_version: 1.8.15
43
43
  signing_key:
44
44
  specification_version: 3
45
45
  summary: to get data from www.baidu.com. this is built by a newbie, so please be careful