baidu 1.1.2 → 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/baidu.rb +19 -25
  2. metadata +1 -1
@@ -4,7 +4,14 @@ require 'nokogiri'
4
4
  require 'json'
5
5
  require 'addressable/uri'
6
6
  require 'httparty'
7
-
7
+ class SearchEngine
8
+ #是否收录
9
+ def indexed?(url)
10
+ URI(url)
11
+ result = query(url)
12
+ return result.has_result?
13
+ end
14
+ end
8
15
  class SearchResult
9
16
  def initialize(body,baseuri,pagenumber=nil)
10
17
  @body = Nokogiri::HTML body
@@ -42,26 +49,17 @@ class SearchResult
42
49
  return nil
43
50
  end
44
51
  end
45
- class Qihoo
52
+
53
+ class Qihoo < SearchEngine
46
54
  Host = 'www.so.com'
47
55
  #基本查询, 相当于在搜索框直接数据关键词查询
48
56
  def query(wd)
49
- begin
50
- #用原始路径请求
51
- uri = URI.encode(URI.join("http://#{Host}/",'s?q='+wd).to_s)
52
- body = HTTParty.get(uri)
53
- #如果请求地址被跳转,重新获取当前页的URI
54
- uri = URI.join("http://#{Host}/",body.request.path).to_s
55
- return QihooResult.new(body,uri)
56
- rescue Exception => e
57
- warn "#{uri} fetch error: #{e.to_s}"
58
- return false
59
- end
60
- end
61
- #是否收录
62
- def indexed?(url)
63
- URI(url)
64
- query(url).has_result?
57
+ #用原始路径请求
58
+ uri = URI.join("http://#{Host}/",URI.encode('s?q='+wd)).to_s
59
+ body = HTTParty.get(uri)
60
+ #如果请求地址被跳转,重新获取当前页的URI,可避免翻页错误
61
+ uri = URI.join("http://#{Host}/",body.request.path).to_s
62
+ QihooResult.new(body,uri)
65
63
  end
66
64
  end
67
65
 
@@ -101,7 +99,7 @@ class QihooResult < SearchResult
101
99
  end
102
100
  end
103
101
 
104
- class Mbaidu
102
+ class Mbaidu < SearchEngine
105
103
  BaseUri = 'http://m.baidu.com/s?'
106
104
  headers = {
107
105
  "User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'
@@ -210,7 +208,7 @@ class MbaiduResult < SearchResult
210
208
  end
211
209
 
212
210
  end
213
- class Baidu
211
+ class Baidu < SearchEngine
214
212
  BaseUri = 'http://www.baidu.com/s?'
215
213
  PerPage = 100
216
214
 
@@ -266,7 +264,7 @@ class Baidu
266
264
  @page = @a.get uri
267
265
  BaiduResult.new(@page)
268
266
  rescue Net::HTTP::Persistent::Error
269
- warn "#{uri}timeout"
267
+ warn "[timeout] #{uri}"
270
268
  return false
271
269
  end
272
270
  =begin
@@ -295,10 +293,6 @@ class Baidu
295
293
  def how_many_pages_with(host,string)
296
294
  query("site:#{host} inurl:#{string}").how_many
297
295
  end
298
- #是否收录
299
- def indexed?(url)
300
- query(url).has_result?
301
- end
302
296
 
303
297
  =begin
304
298
  private
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baidu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2
4
+ version: 1.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: