baidu 0.2.4 → 0.2.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/baidu.rb +29 -8
- metadata +5 -4
data/lib/baidu.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#coding:UTF-8
|
2
2
|
require 'mechanize'
|
3
|
+
require 'json'
|
4
|
+
require 'uri'
|
3
5
|
class Baidu
|
4
6
|
attr_accessor :perpage,:pagenumber,:debug
|
5
7
|
attr_reader :page,:wd,:data
|
@@ -16,6 +18,16 @@ class Baidu
|
|
16
18
|
end
|
17
19
|
|
18
20
|
public
|
21
|
+
def suggestions(wd)
|
22
|
+
json = @a.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").body.force_encoding('GBK').encode("UTF-8")
|
23
|
+
m = /\[([^\]]*)\]/.match json
|
24
|
+
return JSON.parse m[0]
|
25
|
+
end
|
26
|
+
|
27
|
+
def popular?(wd)
|
28
|
+
return @a.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").body.include?"boxFlash"
|
29
|
+
end
|
30
|
+
|
19
31
|
def query(wd)
|
20
32
|
@data.clear
|
21
33
|
@wd = wd
|
@@ -25,11 +37,7 @@ class Baidu
|
|
25
37
|
q << "rn=#{@perpage}"
|
26
38
|
queryStr = q.join("&")
|
27
39
|
uri = URI.encode((BaseUri + queryStr).encode('GBK'))
|
28
|
-
begin
|
29
40
|
@page = @a.get uri
|
30
|
-
rescue SocketError => e
|
31
|
-
puts e
|
32
|
-
end
|
33
41
|
clean
|
34
42
|
@number = self.how_many
|
35
43
|
@maxpage = (@number / @perpage.to_f).round
|
@@ -69,10 +77,10 @@ class Baidu
|
|
69
77
|
########################################################################################################################
|
70
78
|
#look up a word and get the rank of a uri with $host
|
71
79
|
def rank(host)#on base of ranks
|
72
|
-
return @data[
|
80
|
+
return @data[:rank][host] if @data.has_key?:rank and @data[:rank].has_key?host
|
73
81
|
ranks.each_with_index do |uri,index|
|
74
82
|
if URI.parse(URI.encode(uri).host)
|
75
|
-
@data << {
|
83
|
+
@data << {:rank=>{host=>index+1}}
|
76
84
|
return index+1
|
77
85
|
end
|
78
86
|
end
|
@@ -90,17 +98,30 @@ class Baidu
|
|
90
98
|
end
|
91
99
|
|
92
100
|
def ranks#(keyword=false)
|
93
|
-
return @data[
|
101
|
+
return @data[:ranks] if @data.has_key?:ranks
|
94
102
|
raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page
|
95
103
|
#self.query(keyword) if keyword
|
96
104
|
ranks = Array.new
|
97
105
|
@page.search("//table[@class=\"result\"]").each do |table|
|
98
106
|
ranks << @page.search("//table[@id=\"#{table['id']}\"]//a").first['href']
|
99
107
|
end
|
100
|
-
@data[
|
108
|
+
@data[:ranks] = ranks
|
101
109
|
return ranks
|
102
110
|
end
|
103
111
|
|
112
|
+
def related_keywords
|
113
|
+
return @data[:realated_keywords] if @data.has_key?:realated_keywords
|
114
|
+
raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page
|
115
|
+
keywords = Array.new
|
116
|
+
div = @page.search("//div[@id=\"rs\"]//tr//a")
|
117
|
+
return false if div.nil?
|
118
|
+
div.each do |keyword|
|
119
|
+
keywords << keyword.text
|
120
|
+
end
|
121
|
+
return keywords
|
122
|
+
#m = /href="[^"]+">([^<]+)<\/a>/.match(related.content)
|
123
|
+
end
|
124
|
+
|
104
125
|
def how_many
|
105
126
|
return @data['how_many'] if @data.has_key?'how_many'
|
106
127
|
raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: baidu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,8 +11,8 @@ bindir: bin
|
|
11
11
|
cert_chain: []
|
12
12
|
date: 2011-11-11 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
|
-
description: to get
|
15
|
-
be careful
|
14
|
+
description: to get keyword ranking,related queries and popularity from baidu.com.
|
15
|
+
this is built by a newbie, so please be careful
|
16
16
|
email: seoaqua@qq.com
|
17
17
|
executables: []
|
18
18
|
extensions: []
|
@@ -42,5 +42,6 @@ rubyforge_project:
|
|
42
42
|
rubygems_version: 1.8.17
|
43
43
|
signing_key:
|
44
44
|
specification_version: 3
|
45
|
-
summary: to get
|
45
|
+
summary: to get keyword ranking,related queries and popularity from baidu.com. this
|
46
|
+
is built by a newbie, so please be careful
|
46
47
|
test_files: []
|