baidu 0.2.4 → 0.2.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/baidu.rb +29 -8
  2. metadata +5 -4
data/lib/baidu.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  #coding:UTF-8
2
2
  require 'mechanize'
3
+ require 'json'
4
+ require 'uri'
3
5
  class Baidu
4
6
  attr_accessor :perpage,:pagenumber,:debug
5
7
  attr_reader :page,:wd,:data
@@ -16,6 +18,16 @@ class Baidu
16
18
  end
17
19
 
18
20
  public
21
+ def suggestions(wd)
22
+ json = @a.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").body.force_encoding('GBK').encode("UTF-8")
23
+ m = /\[([^\]]*)\]/.match json
24
+ return JSON.parse m[0]
25
+ end
26
+
27
+ def popular?(wd)
28
+ return @a.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").body.include?"boxFlash"
29
+ end
30
+
19
31
  def query(wd)
20
32
  @data.clear
21
33
  @wd = wd
@@ -25,11 +37,7 @@ class Baidu
25
37
  q << "rn=#{@perpage}"
26
38
  queryStr = q.join("&")
27
39
  uri = URI.encode((BaseUri + queryStr).encode('GBK'))
28
- begin
29
40
  @page = @a.get uri
30
- rescue SocketError => e
31
- puts e
32
- end
33
41
  clean
34
42
  @number = self.how_many
35
43
  @maxpage = (@number / @perpage.to_f).round
@@ -69,10 +77,10 @@ class Baidu
69
77
  ########################################################################################################################
70
78
  #look up a word and get the rank of a uri with $host
71
79
  def rank(host)#on base of ranks
72
- return @data['rank'][host] if @data.has_key?'rank' and @data['rank'].has_key?host
80
+ return @data[:rank][host] if @data.has_key?:rank and @data[:rank].has_key?host
73
81
  ranks.each_with_index do |uri,index|
74
82
  if URI.parse(URI.encode(uri).host)
75
- @data << {'rank'=>{host=>index+1}}
83
+ @data << {:rank=>{host=>index+1}}
76
84
  return index+1
77
85
  end
78
86
  end
@@ -90,17 +98,30 @@ class Baidu
90
98
  end
91
99
 
92
100
  def ranks#(keyword=false)
93
- return @data['ranks'] if @data.has_key?'ranks'
101
+ return @data[:ranks] if @data.has_key?:ranks
94
102
  raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page
95
103
  #self.query(keyword) if keyword
96
104
  ranks = Array.new
97
105
  @page.search("//table[@class=\"result\"]").each do |table|
98
106
  ranks << @page.search("//table[@id=\"#{table['id']}\"]//a").first['href']
99
107
  end
100
- @data['ranks'] = ranks
108
+ @data[:ranks] = ranks
101
109
  return ranks
102
110
  end
103
111
 
112
+ def related_keywords
113
+ return @data[:realated_keywords] if @data.has_key?:realated_keywords
114
+ raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page
115
+ keywords = Array.new
116
+ div = @page.search("//div[@id=\"rs\"]//tr//a")
117
+ return false if div.nil?
118
+ div.each do |keyword|
119
+ keywords << keyword.text
120
+ end
121
+ return keywords
122
+ #m = /href="[^"]+">([^<]+)<\/a>/.match(related.content)
123
+ end
124
+
104
125
  def how_many
105
126
  return @data['how_many'] if @data.has_key?'how_many'
106
127
  raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baidu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,8 +11,8 @@ bindir: bin
11
11
  cert_chain: []
12
12
  date: 2011-11-11 00:00:00.000000000 Z
13
13
  dependencies: []
14
- description: to get data from www.baidu.com. this is built by a newbie, so please
15
- be careful
14
+ description: to get keyword ranking,related queries and popularity from baidu.com.
15
+ this is built by a newbie, so please be careful
16
16
  email: seoaqua@qq.com
17
17
  executables: []
18
18
  extensions: []
@@ -42,5 +42,6 @@ rubyforge_project:
42
42
  rubygems_version: 1.8.17
43
43
  signing_key:
44
44
  specification_version: 3
45
- summary: to get data from www.baidu.com. this is built by a newbie, so please be careful
45
+ summary: to get keyword ranking,related queries and popularity from baidu.com. this
46
+ is built by a newbie, so please be careful
46
47
  test_files: []