baidu 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/baidu.rb +19 -25
  2. metadata +1 -1
@@ -4,7 +4,14 @@ require 'nokogiri'
4
4
  require 'json'
5
5
  require 'addressable/uri'
6
6
  require 'httparty'
7
-
7
+ class SearchEngine
8
+ #是否收录
9
+ def indexed?(url)
10
+ URI(url)
11
+ result = query(url)
12
+ return result.has_result?
13
+ end
14
+ end
8
15
  class SearchResult
9
16
  def initialize(body,baseuri,pagenumber=nil)
10
17
  @body = Nokogiri::HTML body
@@ -42,26 +49,17 @@ class SearchResult
42
49
  return nil
43
50
  end
44
51
  end
45
- class Qihoo
52
+
53
+ class Qihoo < SearchEngine
46
54
  Host = 'www.so.com'
47
55
  #基本查询, 相当于在搜索框直接数据关键词查询
48
56
  def query(wd)
49
- begin
50
- #用原始路径请求
51
- uri = URI.encode(URI.join("http://#{Host}/",'s?q='+wd).to_s)
52
- body = HTTParty.get(uri)
53
- #如果请求地址被跳转,重新获取当前页的URI
54
- uri = URI.join("http://#{Host}/",body.request.path).to_s
55
- return QihooResult.new(body,uri)
56
- rescue Exception => e
57
- warn "#{uri} fetch error: #{e.to_s}"
58
- return false
59
- end
60
- end
61
- #是否收录
62
- def indexed?(url)
63
- URI(url)
64
- query(url).has_result?
57
+ #用原始路径请求
58
+ uri = URI.join("http://#{Host}/",URI.encode('s?q='+wd)).to_s
59
+ body = HTTParty.get(uri)
60
+ #如果请求地址被跳转,重新获取当前页的URI,可避免翻页错误
61
+ uri = URI.join("http://#{Host}/",body.request.path).to_s
62
+ QihooResult.new(body,uri)
65
63
  end
66
64
  end
67
65
 
@@ -101,7 +99,7 @@ class QihooResult < SearchResult
101
99
  end
102
100
  end
103
101
 
104
- class Mbaidu
102
+ class Mbaidu < SearchEngine
105
103
  BaseUri = 'http://m.baidu.com/s?'
106
104
  headers = {
107
105
  "User-Agent" => 'Mozilla/5.0 (iPhone; U; CPU iPhone OS 4_3_2 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8H7 Safari/6533.18.5'
@@ -210,7 +208,7 @@ class MbaiduResult < SearchResult
210
208
  end
211
209
 
212
210
  end
213
- class Baidu
211
+ class Baidu < SearchEngine
214
212
  BaseUri = 'http://www.baidu.com/s?'
215
213
  PerPage = 100
216
214
 
@@ -266,7 +264,7 @@ class Baidu
266
264
  @page = @a.get uri
267
265
  BaiduResult.new(@page)
268
266
  rescue Net::HTTP::Persistent::Error
269
- warn "#{uri}timeout"
267
+ warn "[timeout] #{uri}"
270
268
  return false
271
269
  end
272
270
  =begin
@@ -295,10 +293,6 @@ class Baidu
295
293
  def how_many_pages_with(host,string)
296
294
  query("site:#{host} inurl:#{string}").how_many
297
295
  end
298
- #是否收录
299
- def indexed?(url)
300
- query(url).has_result?
301
- end
302
296
 
303
297
  =begin
304
298
  private
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baidu
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2
4
+ version: 1.1.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: