baiduserp 2.0.10 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9741036cb242d023eb31fadd169a9e4c902b4d85
4
- data.tar.gz: 01b875b94d402e09fbaa46f1c30e27fac3eb9cb2
3
+ metadata.gz: c411628080652da333ee3833dc986e0ede74639c
4
+ data.tar.gz: 555ff4417a419c469459883781a2e01fc7e2c5e4
5
5
  SHA512:
6
- metadata.gz: fdaa9048b0fe7108a6e6b4e76eedc11d640c76a912d87c969158c573454649698f2112b0c63f7e8796e8d8f2c4c7aeec72d2728febedba36967cc85cd5671637
7
- data.tar.gz: 3a05c17ff239191325a5806a1ca0fad23e0d037da150804a4a69cc3bf9d8541e028920cf1a0ef4da56f6119ae5420801970eb47784d66dee708099a8782a47a0
6
+ metadata.gz: a7aa377b0ad8b77909eb916394554994bb3e5649b215ae68bf8ff5e794c1297313e9fef5464ece6f85db6af0638358d141f2e749d119dcd574b4f6dccfbe8d27
7
+ data.tar.gz: 7b1a2a7e8f3c56bd60eef8e148c15aebd913ee27bae78fcf8719923973932f2eda10bd46656818ac10118ab05afd8ebed53eab405d08c9abeb1b5308e1daf7ed
@@ -15,7 +15,14 @@ module Baiduserp
15
15
 
16
16
  def self.get_serp(url, retries = 6)
17
17
  if retries > 0
18
- response = self.get(url)
18
+ begin
19
+ response = self.get(url)
20
+ rescue Timeout::Error => e
21
+ puts e.class
22
+ puts e.message
23
+ sleep(10)
24
+ retry
25
+ end
19
26
  if response.code == 301
20
27
  sleep(rand(60)+60)
21
28
  response = self.get_serp(url,retries - 1)
@@ -3,6 +3,7 @@ require 'nokogiri'
3
3
  require 'uri'
4
4
  require 'baiduserp/client'
5
5
  require 'baiduserp/helper'
6
+ require 'baiduserp/result'
6
7
 
7
8
  module Baiduserp
8
9
  class Parser
@@ -11,7 +12,7 @@ module Baiduserp
11
12
  def parse(html)
12
13
  html = html.encode!('UTF-8','UTF-8',:invalid => :replace)
13
14
  @file = Hash.new
14
- @serp = Hash.new
15
+ @serp = Baiduserp::Result.new
15
16
 
16
17
  @file[:html] = html
17
18
  @file[:doc] = Nokogiri::HTML(html)
@@ -0,0 +1,5 @@
1
+ module Baiduserp
2
+ class Result < Hash
3
+
4
+ end
5
+ end
@@ -1,3 +1,3 @@
1
1
  module Baiduserp
2
- VERSION = "2.0.10"
2
+ VERSION = "2.1.1"
3
3
  end
@@ -1,6 +1,5 @@
1
1
  class Baiduserp::Parser
2
2
  def _parse_pinpaizhuanqu(file)
3
- html = file[:html]
4
- html.include?('bs.baidu.com/adcoup-mat') && html.include?('http://clkmk.baidu.com/clkmk-rcv/lnk')
3
+ file[:doc].search("div[@id='content_left']").first.children[2].name == 'script'
5
4
  end
6
5
  end
@@ -0,0 +1,9 @@
1
+ class Baiduserp::Parser
2
+ def _parse_zhixin(file)
3
+ result = []
4
+ file[:doc].search("div#content_left .result-zxl").each do |zxl|
5
+ result << {:id => zxl['id'], :tpl => zxl['tpl'], :mu => zxl['mu'] }
6
+ end
7
+ result
8
+ end
9
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baiduserp
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.10
4
+ version: 2.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - MingQian Zhang
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-11-04 00:00:00.000000000 Z
11
+ date: 2013-11-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -49,6 +49,7 @@ files:
49
49
  - lib/baiduserp/client.rb
50
50
  - lib/baiduserp/helper.rb
51
51
  - lib/baiduserp/parser.rb
52
+ - lib/baiduserp/result.rb
52
53
  - lib/baiduserp/version.rb
53
54
  - lib/baiduserp.rb
54
55
  - lib/parsers/ads_right.rb
@@ -61,6 +62,7 @@ files:
61
62
  - lib/parsers/right_personinfo.rb
62
63
  - lib/parsers/right_relaperson.rb
63
64
  - lib/parsers/right_weather.rb
65
+ - lib/parsers/zhixin.rb
64
66
  - bin/baiduserp
65
67
  - README.md
66
68
  - lib/baiduserp/user_agents.yml