baiduserp 2.0.2 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 925a7067129b157540753444a3e54c78676d2a9f
4
+ data.tar.gz: 908b484a96fbbed83429804b09d50ff1b7828ff5
5
+ SHA512:
6
+ metadata.gz: 0c7fb5fd7f6fa734934522cfe738f0725be40c4f1504f0c87fa0e1d08bff5fd8d8c19580701a2dd17d014f02df9b2b850aef752f535a614d4fd62ba7044b001d
7
+ data.tar.gz: 793333893a7e9c51f936d21a861066dfa47237349976b1bdee59a1046c4641e059b829530d5dbaffc51322cf3b2a035c1e71b8323e8b0cfecc7803695a892a54
@@ -6,7 +6,7 @@ require 'baiduserp/helper'
6
6
 
7
7
  module Baiduserp
8
8
  class Parser
9
- Dir[File.expand_path('../parser/*.rb', __FILE__)].each{|f| require f}
9
+ Dir[File.expand_path('../../parsers/*.rb', __FILE__)].each{|f| require f}
10
10
 
11
11
  def parse(html)
12
12
  @file = Hash.new
@@ -15,9 +15,10 @@ module Baiduserp
15
15
  @file[:html] = html
16
16
  @file[:doc] = Nokogiri::HTML(html)
17
17
 
18
- self.class.constants.each do |m|
19
- #puts m
20
- eval "@serp[:#{m.downcase}] = #{m}.parse @file"
18
+ self.methods.each do |m|
19
+ next unless m =~ /^_parse_/
20
+ #p m
21
+ @serp[m.to_s.sub('_parse_','').to_sym] = self.send m,@file
21
22
  #p @serp.keys
22
23
  end
23
24
 
@@ -1,3 +1,3 @@
1
1
  module Baiduserp
2
- VERSION = "2.0.2"
2
+ VERSION = "2.0.3"
3
3
  end
@@ -1,5 +1,5 @@
1
- module Baiduserp::Parser::Ads_Right
2
- def self.parse(file)
1
+ class Baiduserp::Parser
2
+ def _parse_ads_right(file)
3
3
  result = []
4
4
  file[:doc].search('div.EC_im').each do |div|
5
5
  r = {}
@@ -1,5 +1,5 @@
1
- module Baiduserp::Parser::Ads_Top
2
- def self.parse(file)
1
+ class Baiduserp::Parser
2
+ def _parse_ads_top(file)
3
3
  result = []
4
4
  file[:doc].search('div.ec_pp_f').each do |div|
5
5
  id = div['id'].to_i
@@ -0,0 +1,5 @@
1
+ class Baiduserp::Parser
2
+ def _parse_pinpaizhuanqu(file)
3
+ file[:html].include? 'bs.baidu.com/adcoup-mat'
4
+ end
5
+ end
@@ -1,13 +1,17 @@
1
- module Baiduserp::Parser::Ranks
2
- def self.parse(file)
1
+ class Baiduserp::Parser
2
+ def _parse_ranks(file)
3
3
  result = []
4
4
  file[:doc].search("//table").each do |table|
5
+ next if table.nil?
5
6
  id = table['id'].to_i
6
7
  next unless id > 0
7
8
  r = {:rank => id}
8
9
 
9
- url = table.search('h3/a').first['href']
10
- url = Baiduserp::Client.get(url).headers['location'] if url.include?('http://www.baidu.com/link?')
10
+ url = table.search('h3/a').first
11
+ unless url.nil?
12
+ url = url['href']
13
+ url = Baiduserp::Client.get(url).headers['location'] if url.include?('http://www.baidu.com/link?')
14
+ end
11
15
  r[:url] = url
12
16
 
13
17
  r[:title] = Baiduserp::Helper.get_content_safe(table.search('h3'))
@@ -1,5 +1,5 @@
1
- module Baiduserp::Parser::Related_Keywords
2
- def self.parse(file)
1
+ class Baiduserp::Parser
2
+ def _parse_related_keywords(file)
3
3
  result = []
4
4
  file[:doc].search('div[@id="rs"]').each do |rs|
5
5
  rs.css('a').each do |link|
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
2
 
3
- module Baiduserp::Parser::Result_Num
4
- def self.parse(file)
3
+ class Baiduserp::Parser
4
+ def _parse_result_num(file)
5
5
  html = file[:html]
6
6
  str = html.scan(/找到相关结果(.*)个/).join
7
7
  str = str.gsub('约','')
@@ -0,0 +1,12 @@
1
+ class Baiduserp::Parser
2
+ def _parse_right_hotel(file)
3
+ rh = file[:doc].search('div[@tpl="right_hotel"]')
4
+ return nil if rh.nil?
5
+
6
+ rh = rh.first
7
+ return nil if rh.nil?
8
+ title = Baiduserp::Helper.get_content_safe(rh.search('div.opr-hotel-title'))
9
+
10
+ {:title => title}
11
+ end
12
+ end
@@ -0,0 +1,13 @@
1
+ class Baiduserp::Parser
2
+ def _parse_right_personinfo(file)
3
+ rp = file[:doc].search('div[@tpl="right_personinfo"]')
4
+ return nil if rp.nil?
5
+
6
+ title = Baiduserp::Helper.get_content_safe rp.search('span.opr-personinfo-subtitle-large')
7
+ info = Baiduserp::Helper.get_content_safe rp.search('div.opr-personinfo-info')
8
+ source = Baiduserp::Helper.get_content_safe rp.search('div.opr-personinfo-source a')
9
+
10
+ return nil if title.nil? && info.nil? && source.nil?
11
+ {:title => title, :info => info, :source => source}
12
+ end
13
+ end
@@ -0,0 +1,17 @@
1
+ class Baiduserp::Parser
2
+ def _parse_right_relaperson(file)
3
+ relapersons = file[:doc].search('div[@tpl="right_relaperson"]')
4
+ return nil if relapersons.nil?
5
+
6
+ result = []
7
+ relapersons.each do |rr|
8
+ title = rr.search('span.opr-relaperson-subtitle-tip').first.content
9
+ r = []
10
+ rr.search('p.opr-relaperson-name').each do |p|
11
+ r << p.content
12
+ end
13
+ result << {:title => title, :names => r}
14
+ end
15
+ result
16
+ end
17
+ end
@@ -0,0 +1,14 @@
1
+ class Baiduserp::Parser
2
+ def _parse_right_weather(file)
3
+ rw = file[:doc].search('div[@tpl="right_weather"]')
4
+ return nil if rw.nil?
5
+
6
+ rw = rw.first
7
+ return nil if rw.nil?
8
+
9
+ title = Baiduserp::Helper.get_content_safe(rw.search('div.opr-weather-title'))
10
+ week = rw.search('a.opr-weather-week').first['href']
11
+
12
+ {:title => title, :week => week}
13
+ end
14
+ end
metadata CHANGED
@@ -1,46 +1,41 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baiduserp
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.2
5
- prerelease:
4
+ version: 2.0.3
6
5
  platform: ruby
7
6
  authors:
8
7
  - MingQian Zhang
9
8
  autorequire:
10
9
  bindir: bin
11
10
  cert_chain: []
12
- date: 2013-06-20 00:00:00.000000000 Z
11
+ date: 2013-06-25 00:00:00.000000000 Z
13
12
  dependencies:
14
13
  - !ruby/object:Gem::Dependency
15
14
  name: nokogiri
16
15
  requirement: !ruby/object:Gem::Requirement
17
- none: false
18
16
  requirements:
19
- - - ! '>='
17
+ - - '>='
20
18
  - !ruby/object:Gem::Version
21
19
  version: '0'
22
20
  type: :runtime
23
21
  prerelease: false
24
22
  version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
23
  requirements:
27
- - - ! '>='
24
+ - - '>='
28
25
  - !ruby/object:Gem::Version
29
26
  version: '0'
30
27
  - !ruby/object:Gem::Dependency
31
28
  name: httparty
32
29
  requirement: !ruby/object:Gem::Requirement
33
- none: false
34
30
  requirements:
35
- - - ! '>='
31
+ - - '>='
36
32
  - !ruby/object:Gem::Version
37
33
  version: '0'
38
34
  type: :runtime
39
35
  prerelease: false
40
36
  version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
37
  requirements:
43
- - - ! '>='
38
+ - - '>='
44
39
  - !ruby/object:Gem::Version
45
40
  version: '0'
46
41
  description: Parse Baidu SERP result page.
@@ -53,39 +48,42 @@ extra_rdoc_files: []
53
48
  files:
54
49
  - lib/baiduserp/client.rb
55
50
  - lib/baiduserp/helper.rb
56
- - lib/baiduserp/parser/ads_right.rb
57
- - lib/baiduserp/parser/ads_top.rb
58
- - lib/baiduserp/parser/pinpaizhuanqu.rb
59
- - lib/baiduserp/parser/ranks.rb
60
- - lib/baiduserp/parser/related_keywords.rb
61
- - lib/baiduserp/parser/result_num.rb
62
51
  - lib/baiduserp/parser.rb
63
52
  - lib/baiduserp/version.rb
64
53
  - lib/baiduserp.rb
54
+ - lib/parsers/ads_right.rb
55
+ - lib/parsers/ads_top.rb
56
+ - lib/parsers/pinpaizhuanqu.rb
57
+ - lib/parsers/ranks.rb
58
+ - lib/parsers/related_keywords.rb
59
+ - lib/parsers/result_num.rb
60
+ - lib/parsers/right_hotel.rb
61
+ - lib/parsers/right_personinfo.rb
62
+ - lib/parsers/right_relaperson.rb
63
+ - lib/parsers/right_weather.rb
65
64
  - bin/baiduserp
66
65
  - README.md
67
66
  homepage: https://github.com/mqzhang/baiduserp
68
67
  licenses: []
68
+ metadata: {}
69
69
  post_install_message:
70
70
  rdoc_options: []
71
71
  require_paths:
72
72
  - lib
73
73
  required_ruby_version: !ruby/object:Gem::Requirement
74
- none: false
75
74
  requirements:
76
- - - ! '>='
75
+ - - '>='
77
76
  - !ruby/object:Gem::Version
78
77
  version: '0'
79
78
  required_rubygems_version: !ruby/object:Gem::Requirement
80
- none: false
81
79
  requirements:
82
- - - ! '>='
80
+ - - '>='
83
81
  - !ruby/object:Gem::Version
84
82
  version: '0'
85
83
  requirements: []
86
84
  rubyforge_project:
87
- rubygems_version: 1.8.25
85
+ rubygems_version: 2.0.0
88
86
  signing_key:
89
- specification_version: 3
87
+ specification_version: 4
90
88
  summary: Baidu SERP
91
89
  test_files: []
@@ -1,5 +0,0 @@
1
- module Baiduserp::Parser::PinPaiZhuanQu
2
- def self.parse(file)
3
- file[:html].include? 'bs.baidu.com/adcoup-mat'
4
- end
5
- end