baidu 1.1.2 → 1.1.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/baidu.rb +19 -25
- metadata +1 -1
data/lib/baidu.rb
CHANGED
@@ -4,7 +4,14 @@ require 'nokogiri'
|
|
4
4
|
require 'json'
|
5
5
|
require 'addressable/uri'
|
6
6
|
require 'httparty'
|
7
|
-
|
7
|
+
class SearchEngine
|
8
|
+
#是否收录
|
9
|
+
def indexed?(url)
|
10
|
+
URI(url)
|
11
|
+
result = query(url)
|
12
|
+
return result.has_result?
|
13
|
+
end
|
14
|
+
end
|
8
15
|
class SearchResult
|
9
16
|
def initialize(body,baseuri,pagenumber=nil)
|
10
17
|
@body = Nokogiri::HTML body
|
@@ -42,26 +49,17 @@ class SearchResult
|
|
42
49
|
return nil
|
43
50
|
end
|
44
51
|
end
|
45
|
-
|
52
|
+
|
53
|
+
class Qihoo < SearchEngine
|
46
54
|
Host = 'www.so.com'
|
47
55
|
#基本查询, 相当于在搜索框直接数据关键词查询
|
48
56
|
def query(wd)
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
return QihooResult.new(body,uri)
|
56
|
-
rescue Exception => e
|
57
|
-
warn "#{uri} fetch error: #{e.to_s}"
|
58
|
-
return false
|
59
|
-
end
|
60
|
-
end
|
61
|
-
#是否收录
|
62
|
-
def indexed?(url)
|
63
|
-
URI(url)
|
64
|
-
query(url).has_result?
|
57
|
+
#用原始路径请求
|
58
|
+
uri = URI.join("http://#{Host}/",URI.encode('s?q='+wd)).to_s
|
59
|
+
body = HTTParty.get(uri)
|
60
|
+
#如果请求地址被跳转,重新获取当前页的URI,可避免翻页错误
|
61
|
+
uri = URI.join("http://#{Host}/",body.request.path).to_s
|
62
|
+
QihooResult.new(body,uri)
|
65
63
|
end
|
66
64
|
end
|
67
65
|
|
@@ -101,7 +99,7 @@ class QihooResult < SearchResult
|
|
101
99
|
end
|
102
100
|
end
|
103
101
|
|
104
|
-
class Mbaidu
|
102
|
+
class Mbaidu < SearchEngine
|
105
103
|
BaseUri = 'http://m.baidu.com/s?'
|
106
104
|
headers = {
|
107
105
|
"User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'
|
@@ -210,7 +208,7 @@ class MbaiduResult < SearchResult
|
|
210
208
|
end
|
211
209
|
|
212
210
|
end
|
213
|
-
class Baidu
|
211
|
+
class Baidu < SearchEngine
|
214
212
|
BaseUri = 'http://www.baidu.com/s?'
|
215
213
|
PerPage = 100
|
216
214
|
|
@@ -266,7 +264,7 @@ class Baidu
|
|
266
264
|
@page = @a.get uri
|
267
265
|
BaiduResult.new(@page)
|
268
266
|
rescue Net::HTTP::Persistent::Error
|
269
|
-
warn "#{uri}
|
267
|
+
warn "[timeout] #{uri}"
|
270
268
|
return false
|
271
269
|
end
|
272
270
|
=begin
|
@@ -295,10 +293,6 @@ class Baidu
|
|
295
293
|
def how_many_pages_with(host,string)
|
296
294
|
query("site:#{host} inurl:#{string}").how_many
|
297
295
|
end
|
298
|
-
#是否收录
|
299
|
-
def indexed?(url)
|
300
|
-
query(url).has_result?
|
301
|
-
end
|
302
296
|
|
303
297
|
=begin
|
304
298
|
private
|