baiduserp 2.0.10 → 2.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9741036cb242d023eb31fadd169a9e4c902b4d85
4
- data.tar.gz: 01b875b94d402e09fbaa46f1c30e27fac3eb9cb2
3
+ metadata.gz: c411628080652da333ee3833dc986e0ede74639c
4
+ data.tar.gz: 555ff4417a419c469459883781a2e01fc7e2c5e4
5
5
  SHA512:
6
- metadata.gz: fdaa9048b0fe7108a6e6b4e76eedc11d640c76a912d87c969158c573454649698f2112b0c63f7e8796e8d8f2c4c7aeec72d2728febedba36967cc85cd5671637
7
- data.tar.gz: 3a05c17ff239191325a5806a1ca0fad23e0d037da150804a4a69cc3bf9d8541e028920cf1a0ef4da56f6119ae5420801970eb47784d66dee708099a8782a47a0
6
+ metadata.gz: a7aa377b0ad8b77909eb916394554994bb3e5649b215ae68bf8ff5e794c1297313e9fef5464ece6f85db6af0638358d141f2e749d119dcd574b4f6dccfbe8d27
7
+ data.tar.gz: 7b1a2a7e8f3c56bd60eef8e148c15aebd913ee27bae78fcf8719923973932f2eda10bd46656818ac10118ab05afd8ebed53eab405d08c9abeb1b5308e1daf7ed
@@ -15,7 +15,14 @@ module Baiduserp
15
15
 
16
16
  def self.get_serp(url, retries = 6)
17
17
  if retries > 0
18
- response = self.get(url)
18
+ begin
19
+ response = self.get(url)
20
+ rescue Timeout::Error => e
21
+ puts e.class
22
+ puts e.message
23
+ sleep(10)
24
+ retry
25
+ end
19
26
  if response.code == 301
20
27
  sleep(rand(60)+60)
21
28
  response = self.get_serp(url,retries - 1)
@@ -3,6 +3,7 @@ require 'nokogiri'
3
3
  require 'uri'
4
4
  require 'baiduserp/client'
5
5
  require 'baiduserp/helper'
6
+ require 'baiduserp/result'
6
7
 
7
8
  module Baiduserp
8
9
  class Parser
@@ -11,7 +12,7 @@ module Baiduserp
11
12
  def parse(html)
12
13
  html = html.encode!('UTF-8','UTF-8',:invalid => :replace)
13
14
  @file = Hash.new
14
- @serp = Hash.new
15
+ @serp = Baiduserp::Result.new
15
16
 
16
17
  @file[:html] = html
17
18
  @file[:doc] = Nokogiri::HTML(html)
@@ -0,0 +1,5 @@
1
+ module Baiduserp
2
+ class Result < Hash
3
+
4
+ end
5
+ end
@@ -1,3 +1,3 @@
1
1
  module Baiduserp
2
- VERSION = "2.0.10"
2
+ VERSION = "2.1.1"
3
3
  end
@@ -1,6 +1,5 @@
1
1
  class Baiduserp::Parser
2
2
  def _parse_pinpaizhuanqu(file)
3
- html = file[:html]
4
- html.include?('bs.baidu.com/adcoup-mat') && html.include?('http://clkmk.baidu.com/clkmk-rcv/lnk')
3
+ file[:doc].search("div[@id='content_left']").first.children[2].name == 'script'
5
4
  end
6
5
  end
@@ -0,0 +1,9 @@
1
+ class Baiduserp::Parser
2
+ def _parse_zhixin(file)
3
+ result = []
4
+ file[:doc].search("div#content_left .result-zxl").each do |zxl|
5
+ result << {:id => zxl['id'], :tpl => zxl['tpl'], :mu => zxl['mu'] }
6
+ end
7
+ result
8
+ end
9
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baiduserp
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.10
4
+ version: 2.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - MingQian Zhang
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-11-04 00:00:00.000000000 Z
11
+ date: 2013-11-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -49,6 +49,7 @@ files:
49
49
  - lib/baiduserp/client.rb
50
50
  - lib/baiduserp/helper.rb
51
51
  - lib/baiduserp/parser.rb
52
+ - lib/baiduserp/result.rb
52
53
  - lib/baiduserp/version.rb
53
54
  - lib/baiduserp.rb
54
55
  - lib/parsers/ads_right.rb
@@ -61,6 +62,7 @@ files:
61
62
  - lib/parsers/right_personinfo.rb
62
63
  - lib/parsers/right_relaperson.rb
63
64
  - lib/parsers/right_weather.rb
65
+ - lib/parsers/zhixin.rb
64
66
  - bin/baiduserp
65
67
  - README.md
66
68
  - lib/baiduserp/user_agents.yml