baiduserp 2.0.2 → 2.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 925a7067129b157540753444a3e54c78676d2a9f
4
+ data.tar.gz: 908b484a96fbbed83429804b09d50ff1b7828ff5
5
+ SHA512:
6
+ metadata.gz: 0c7fb5fd7f6fa734934522cfe738f0725be40c4f1504f0c87fa0e1d08bff5fd8d8c19580701a2dd17d014f02df9b2b850aef752f535a614d4fd62ba7044b001d
7
+ data.tar.gz: 793333893a7e9c51f936d21a861066dfa47237349976b1bdee59a1046c4641e059b829530d5dbaffc51322cf3b2a035c1e71b8323e8b0cfecc7803695a892a54
@@ -6,7 +6,7 @@ require 'baiduserp/helper'
6
6
 
7
7
  module Baiduserp
8
8
  class Parser
9
- Dir[File.expand_path('../parser/*.rb', __FILE__)].each{|f| require f}
9
+ Dir[File.expand_path('../../parsers/*.rb', __FILE__)].each{|f| require f}
10
10
 
11
11
  def parse(html)
12
12
  @file = Hash.new
@@ -15,9 +15,10 @@ module Baiduserp
15
15
  @file[:html] = html
16
16
  @file[:doc] = Nokogiri::HTML(html)
17
17
 
18
- self.class.constants.each do |m|
19
- #puts m
20
- eval "@serp[:#{m.downcase}] = #{m}.parse @file"
18
+ self.methods.each do |m|
19
+ next unless m =~ /^_parse_/
20
+ #p m
21
+ @serp[m.to_s.sub('_parse_','').to_sym] = self.send m,@file
21
22
  #p @serp.keys
22
23
  end
23
24
 
@@ -1,3 +1,3 @@
1
1
  module Baiduserp
2
- VERSION = "2.0.2"
2
+ VERSION = "2.0.3"
3
3
  end
@@ -1,5 +1,5 @@
1
- module Baiduserp::Parser::Ads_Right
2
- def self.parse(file)
1
+ class Baiduserp::Parser
2
+ def _parse_ads_right(file)
3
3
  result = []
4
4
  file[:doc].search('div.EC_im').each do |div|
5
5
  r = {}
@@ -1,5 +1,5 @@
1
- module Baiduserp::Parser::Ads_Top
2
- def self.parse(file)
1
+ class Baiduserp::Parser
2
+ def _parse_ads_top(file)
3
3
  result = []
4
4
  file[:doc].search('div.ec_pp_f').each do |div|
5
5
  id = div['id'].to_i
@@ -0,0 +1,5 @@
1
+ class Baiduserp::Parser
2
+ def _parse_pinpaizhuanqu(file)
3
+ file[:html].include? 'bs.baidu.com/adcoup-mat'
4
+ end
5
+ end
@@ -1,13 +1,17 @@
1
- module Baiduserp::Parser::Ranks
2
- def self.parse(file)
1
+ class Baiduserp::Parser
2
+ def _parse_ranks(file)
3
3
  result = []
4
4
  file[:doc].search("//table").each do |table|
5
+ next if table.nil?
5
6
  id = table['id'].to_i
6
7
  next unless id > 0
7
8
  r = {:rank => id}
8
9
 
9
- url = table.search('h3/a').first['href']
10
- url = Baiduserp::Client.get(url).headers['location'] if url.include?('http://www.baidu.com/link?')
10
+ url = table.search('h3/a').first
11
+ unless url.nil?
12
+ url = url['href']
13
+ url = Baiduserp::Client.get(url).headers['location'] if url.include?('http://www.baidu.com/link?')
14
+ end
11
15
  r[:url] = url
12
16
 
13
17
  r[:title] = Baiduserp::Helper.get_content_safe(table.search('h3'))
@@ -1,5 +1,5 @@
1
- module Baiduserp::Parser::Related_Keywords
2
- def self.parse(file)
1
+ class Baiduserp::Parser
2
+ def _parse_related_keywords(file)
3
3
  result = []
4
4
  file[:doc].search('div[@id="rs"]').each do |rs|
5
5
  rs.css('a').each do |link|
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
2
 
3
- module Baiduserp::Parser::Result_Num
4
- def self.parse(file)
3
+ class Baiduserp::Parser
4
+ def _parse_result_num(file)
5
5
  html = file[:html]
6
6
  str = html.scan(/找到相关结果(.*)个/).join
7
7
  str = str.gsub('约','')
@@ -0,0 +1,12 @@
1
+ class Baiduserp::Parser
2
+ def _parse_right_hotel(file)
3
+ rh = file[:doc].search('div[@tpl="right_hotel"]')
4
+ return nil if rh.nil?
5
+
6
+ rh = rh.first
7
+ return nil if rh.nil?
8
+ title = Baiduserp::Helper.get_content_safe(rh.search('div.opr-hotel-title'))
9
+
10
+ {:title => title}
11
+ end
12
+ end
@@ -0,0 +1,13 @@
1
+ class Baiduserp::Parser
2
+ def _parse_right_personinfo(file)
3
+ rp = file[:doc].search('div[@tpl="right_personinfo"]')
4
+ return nil if rp.nil?
5
+
6
+ title = Baiduserp::Helper.get_content_safe rp.search('span.opr-personinfo-subtitle-large')
7
+ info = Baiduserp::Helper.get_content_safe rp.search('div.opr-personinfo-info')
8
+ source = Baiduserp::Helper.get_content_safe rp.search('div.opr-personinfo-source a')
9
+
10
+ return nil if title.nil? && info.nil? && source.nil?
11
+ {:title => title, :info => info, :source => source}
12
+ end
13
+ end
@@ -0,0 +1,17 @@
1
+ class Baiduserp::Parser
2
+ def _parse_right_relaperson(file)
3
+ relapersons = file[:doc].search('div[@tpl="right_relaperson"]')
4
+ return nil if relapersons.nil?
5
+
6
+ result = []
7
+ relapersons.each do |rr|
8
+ title = rr.search('span.opr-relaperson-subtitle-tip').first.content
9
+ r = []
10
+ rr.search('p.opr-relaperson-name').each do |p|
11
+ r << p.content
12
+ end
13
+ result << {:title => title, :names => r}
14
+ end
15
+ result
16
+ end
17
+ end
@@ -0,0 +1,14 @@
1
+ class Baiduserp::Parser
2
+ def _parse_right_weather(file)
3
+ rw = file[:doc].search('div[@tpl="right_weather"]')
4
+ return nil if rw.nil?
5
+
6
+ rw = rw.first
7
+ return nil if rw.nil?
8
+
9
+ title = Baiduserp::Helper.get_content_safe(rw.search('div.opr-weather-title'))
10
+ week = rw.search('a.opr-weather-week').first['href']
11
+
12
+ {:title => title, :week => week}
13
+ end
14
+ end
metadata CHANGED
@@ -1,46 +1,41 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baiduserp
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
5
- prerelease:
4
+ version: 2.0.3
6
5
  platform: ruby
7
6
  authors:
8
7
  - MingQian Zhang
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-06-20 00:00:00.000000000 Z
11
+ date: 2013-06-25 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: nokogiri
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - '>='
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - '>='
28
25
  - !ruby/object:Gem::Version
29
26
  version: '0'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: httparty
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ! '>='
31
+ - - '>='
36
32
  - !ruby/object:Gem::Version
37
33
  version: '0'
38
34
  type: :runtime
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ! '>='
38
+ - - '>='
44
39
  - !ruby/object:Gem::Version
45
40
  version: '0'
46
41
  description: Parse Baidu SERP result page.
@@ -53,39 +48,42 @@ extra_rdoc_files: []
53
48
  files:
54
49
  - lib/baiduserp/client.rb
55
50
  - lib/baiduserp/helper.rb
56
- - lib/baiduserp/parser/ads_right.rb
57
- - lib/baiduserp/parser/ads_top.rb
58
- - lib/baiduserp/parser/pinpaizhuanqu.rb
59
- - lib/baiduserp/parser/ranks.rb
60
- - lib/baiduserp/parser/related_keywords.rb
61
- - lib/baiduserp/parser/result_num.rb
62
51
  - lib/baiduserp/parser.rb
63
52
  - lib/baiduserp/version.rb
64
53
  - lib/baiduserp.rb
54
+ - lib/parsers/ads_right.rb
55
+ - lib/parsers/ads_top.rb
56
+ - lib/parsers/pinpaizhuanqu.rb
57
+ - lib/parsers/ranks.rb
58
+ - lib/parsers/related_keywords.rb
59
+ - lib/parsers/result_num.rb
60
+ - lib/parsers/right_hotel.rb
61
+ - lib/parsers/right_personinfo.rb
62
+ - lib/parsers/right_relaperson.rb
63
+ - lib/parsers/right_weather.rb
65
64
  - bin/baiduserp
66
65
  - README.md
67
66
  homepage: https://github.com/mqzhang/baiduserp
68
67
  licenses: []
68
+ metadata: {}
69
69
  post_install_message:
70
70
  rdoc_options: []
71
71
  require_paths:
72
72
  - lib
73
73
  required_ruby_version: !ruby/object:Gem::Requirement
74
- none: false
75
74
  requirements:
76
- - - ! '>='
75
+ - - '>='
77
76
  - !ruby/object:Gem::Version
78
77
  version: '0'
79
78
  required_rubygems_version: !ruby/object:Gem::Requirement
80
- none: false
81
79
  requirements:
82
- - - ! '>='
80
+ - - '>='
83
81
  - !ruby/object:Gem::Version
84
82
  version: '0'
85
83
  requirements: []
86
84
  rubyforge_project:
87
- rubygems_version: 1.8.25
85
+ rubygems_version: 2.0.0
88
86
  signing_key:
89
- specification_version: 3
87
+ specification_version: 4
90
88
  summary: Baidu SERP
91
89
  test_files: []
@@ -1,5 +0,0 @@
1
- module Baiduserp::Parser::PinPaiZhuanQu
2
- def self.parse(file)
3
- file[:html].include? 'bs.baidu.com/adcoup-mat'
4
- end
5
- end