baidu 0.2.4 → 0.2.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/baidu.rb +29 -8
- metadata +5 -4
data/lib/baidu.rb
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#coding:UTF-8
|
2
2
|
require 'mechanize'
|
3
|
+
require 'json'
|
4
|
+
require 'uri'
|
3
5
|
class Baidu
|
4
6
|
attr_accessor :perpage,:pagenumber,:debug
|
5
7
|
attr_reader :page,:wd,:data
|
@@ -16,6 +18,16 @@ class Baidu
|
|
16
18
|
end
|
17
19
|
|
18
20
|
public
|
21
|
+
def suggestions(wd)
|
22
|
+
json = @a.get("http://suggestion.baidu.com/su?wd=#{URI.encode(wd)}&cb=callback").body.force_encoding('GBK').encode("UTF-8")
|
23
|
+
m = /\[([^\]]*)\]/.match json
|
24
|
+
return JSON.parse m[0]
|
25
|
+
end
|
26
|
+
|
27
|
+
def popular?(wd)
|
28
|
+
return @a.get("http://index.baidu.com/main/word.php?word=#{URI.encode(wd.encode("GBK"))}").body.include?"boxFlash"
|
29
|
+
end
|
30
|
+
|
19
31
|
def query(wd)
|
20
32
|
@data.clear
|
21
33
|
@wd = wd
|
@@ -25,11 +37,7 @@ class Baidu
|
|
25
37
|
q << "rn=#{@perpage}"
|
26
38
|
queryStr = q.join("&")
|
27
39
|
uri = URI.encode((BaseUri + queryStr).encode('GBK'))
|
28
|
-
begin
|
29
40
|
@page = @a.get uri
|
30
|
-
rescue SocketError => e
|
31
|
-
puts e
|
32
|
-
end
|
33
41
|
clean
|
34
42
|
@number = self.how_many
|
35
43
|
@maxpage = (@number / @perpage.to_f).round
|
@@ -69,10 +77,10 @@ class Baidu
|
|
69
77
|
########################################################################################################################
|
70
78
|
#look up a word and get the rank of a uri with $host
|
71
79
|
def rank(host)#on base of ranks
|
72
|
-
return @data[
|
80
|
+
return @data[:rank][host] if @data.has_key?:rank and @data[:rank].has_key?host
|
73
81
|
ranks.each_with_index do |uri,index|
|
74
82
|
if URI.parse(URI.encode(uri).host)
|
75
|
-
@data << {
|
83
|
+
@data << {:rank=>{host=>index+1}}
|
76
84
|
return index+1
|
77
85
|
end
|
78
86
|
end
|
@@ -90,17 +98,30 @@ class Baidu
|
|
90
98
|
end
|
91
99
|
|
92
100
|
def ranks#(keyword=false)
|
93
|
-
return @data[
|
101
|
+
return @data[:ranks] if @data.has_key?:ranks
|
94
102
|
raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page
|
95
103
|
#self.query(keyword) if keyword
|
96
104
|
ranks = Array.new
|
97
105
|
@page.search("//table[@class=\"result\"]").each do |table|
|
98
106
|
ranks << @page.search("//table[@id=\"#{table['id']}\"]//a").first['href']
|
99
107
|
end
|
100
|
-
@data[
|
108
|
+
@data[:ranks] = ranks
|
101
109
|
return ranks
|
102
110
|
end
|
103
111
|
|
112
|
+
def related_keywords
|
113
|
+
return @data[:realated_keywords] if @data.has_key?:realated_keywords
|
114
|
+
raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page
|
115
|
+
keywords = Array.new
|
116
|
+
div = @page.search("//div[@id=\"rs\"]//tr//a")
|
117
|
+
return false if div.nil?
|
118
|
+
div.each do |keyword|
|
119
|
+
keywords << keyword.text
|
120
|
+
end
|
121
|
+
return keywords
|
122
|
+
#m = /href="[^"]+">([^<]+)<\/a>/.match(related.content)
|
123
|
+
end
|
124
|
+
|
104
125
|
def how_many
|
105
126
|
return @data['how_many'] if @data.has_key?'how_many'
|
106
127
|
raise StandardError,'wrong with @page' unless @page.instance_of? Mechanize::Page
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: baidu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -11,8 +11,8 @@ bindir: bin
|
|
11
11
|
cert_chain: []
|
12
12
|
date: 2011-11-11 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
|
-
description: to get
|
15
|
-
be careful
|
14
|
+
description: to get keyword ranking,related queries and popularity from baidu.com.
|
15
|
+
this is built by a newbie, so please be careful
|
16
16
|
email: seoaqua@qq.com
|
17
17
|
executables: []
|
18
18
|
extensions: []
|
@@ -42,5 +42,6 @@ rubyforge_project:
|
|
42
42
|
rubygems_version: 1.8.17
|
43
43
|
signing_key:
|
44
44
|
specification_version: 3
|
45
|
-
summary: to get
|
45
|
+
summary: to get keyword ranking,related queries and popularity from baidu.com. this
|
46
|
+
is built by a newbie, so please be careful
|
46
47
|
test_files: []
|