baidu 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/baidu.rb +29 -8
  2. metadata +5 -4
data/lib/baidu.rb CHANGED
@@ -1,5 +1,7 @@
1
1
  #coding:UTF-8
2
2
  require 'mechanize'
3
+ require 'json'
4
+ require 'uri'
3
5
  class Baidu
4
6
  attr_accessor :perpage,:pagenumber,:debug
5
7
  attr_reader :page,:wd,:data
@@ -16,6 +18,16 @@ class Baidu
16
18
  end
17
19
 
18
20
  public
21
+ def suggestions(wd)
22
+ json = @a.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").body.force_encoding('GBK').encode("UTF-8")
23
+ m = /\[([^\]]*)\]/.match json
24
+ return JSON.parse m[0]
25
+ end
26
+
27
+ def popular?(wd)
28
+ return @a.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").body.include?"boxFlash"
29
+ end
30
+
19
31
  def query(wd)
20
32
  @data.clear
21
33
  @wd = wd
@@ -25,11 +37,7 @@ class Baidu
25
37
  q << "rn=#{@perpage}"
26
38
  queryStr = q.join("&")
27
39
  uri = URI.encode((BaseUri + queryStr).encode('GBK'))
28
- begin
29
40
  @page = @a.get uri
30
- rescue SocketError => e
31
- puts e
32
- end
33
41
  clean
34
42
  @number = self.how_many
35
43
  @maxpage = (@number / @perpage.to_f).round
@@ -69,10 +77,10 @@ class Baidu
69
77
  ########################################################################################################################
70
78
  #look up a word and get the rank of a uri with $host
71
79
  def rank(host)#on base of ranks
72
- return @data['rank'][host] if @data.has_key?'rank' and @data['rank'].has_key?host
80
+ return @data[:rank][host] if @data.has_key?:rank and @data[:rank].has_key?host
73
81
  ranks.each_with_index do |uri,index|
74
82
  if URI.parse(URI.encode(uri).host)
75
- @data << {'rank'=>{host=>index+1}}
83
+ @data << {:rank=>{host=>index+1}}
76
84
  return index+1
77
85
  end
78
86
  end
@@ -90,17 +98,30 @@ class Baidu
90
98
  end
91
99
 
92
100
  def ranks#(keyword=false)
93
- return @data['ranks'] if @data.has_key?'ranks'
101
+ return @data[:ranks] if @data.has_key?:ranks
94
102
  raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page
95
103
  #self.query(keyword) if keyword
96
104
  ranks = Array.new
97
105
  @page.search("//table[@class=\"result\"]").each do |table|
98
106
  ranks << @page.search("//table[@id=\"#{table['id']}\"]//a").first['href']
99
107
  end
100
- @data['ranks'] = ranks
108
+ @data[:ranks] = ranks
101
109
  return ranks
102
110
  end
103
111
 
112
+ def related_keywords
113
+ return @data[:realated_keywords] if @data.has_key?:realated_keywords
114
+ raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page
115
+ keywords = Array.new
116
+ div = @page.search("//div[@id=\"rs\"]//tr//a")
117
+ return false if div.nil?
118
+ div.each do |keyword|
119
+ keywords << keyword.text
120
+ end
121
+ return keywords
122
+ #m = /href="[^"]+">([^<]+)<\/a>/.match(related.content)
123
+ end
124
+
104
125
  def how_many
105
126
  return @data['how_many'] if @data.has_key?'how_many'
106
127
  raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baidu
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.4
4
+ version: 0.2.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -11,8 +11,8 @@ bindir: bin
11
11
  cert_chain: []
12
12
  date: 2011-11-11 00:00:00.000000000 Z
13
13
  dependencies: []
14
- description: to get data from www.baidu.com. this is built by a newbie, so please
15
- be careful
14
+ description: to get keyword ranking,related queries and popularity from baidu.com.
15
+ this is built by a newbie, so please be careful
16
16
  email: seoaqua@qq.com
17
17
  executables: []
18
18
  extensions: []
@@ -42,5 +42,6 @@ rubyforge_project:
42
42
  rubygems_version: 1.8.17
43
43
  signing_key:
44
44
  specification_version: 3
45
- summary: to get data from www.baidu.com. this is built by a newbie, so please be careful
45
+ summary: to get keyword ranking,related queries and popularity from baidu.com. this
46
+ is built by a newbie, so please be careful
46
47
  test_files: []