baiduserp 2.0.2 → 2.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/baiduserp/parser.rb +5 -4
- data/lib/baiduserp/version.rb +1 -1
- data/lib/{baiduserp/parser → parsers}/ads_right.rb +2 -2
- data/lib/{baiduserp/parser → parsers}/ads_top.rb +2 -2
- data/lib/parsers/pinpaizhuanqu.rb +5 -0
- data/lib/{baiduserp/parser → parsers}/ranks.rb +8 -4
- data/lib/{baiduserp/parser → parsers}/related_keywords.rb +2 -2
- data/lib/{baiduserp/parser → parsers}/result_num.rb +2 -2
- data/lib/parsers/right_hotel.rb +12 -0
- data/lib/parsers/right_personinfo.rb +13 -0
- data/lib/parsers/right_relaperson.rb +17 -0
- data/lib/parsers/right_weather.rb +14 -0
- metadata +21 -23
- data/lib/baiduserp/parser/pinpaizhuanqu.rb +0 -5
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 925a7067129b157540753444a3e54c78676d2a9f
|
4
|
+
data.tar.gz: 908b484a96fbbed83429804b09d50ff1b7828ff5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0c7fb5fd7f6fa734934522cfe738f0725be40c4f1504f0c87fa0e1d08bff5fd8d8c19580701a2dd17d014f02df9b2b850aef752f535a614d4fd62ba7044b001d
|
7
|
+
data.tar.gz: 793333893a7e9c51f936d21a861066dfa47237349976b1bdee59a1046c4641e059b829530d5dbaffc51322cf3b2a035c1e71b8323e8b0cfecc7803695a892a54
|
data/lib/baiduserp/parser.rb
CHANGED
@@ -6,7 +6,7 @@ require 'baiduserp/helper'
|
|
6
6
|
|
7
7
|
module Baiduserp
|
8
8
|
class Parser
|
9
|
-
Dir[File.expand_path('
|
9
|
+
Dir[File.expand_path('../../parsers/*.rb', __FILE__)].each{|f| require f}
|
10
10
|
|
11
11
|
def parse(html)
|
12
12
|
@file = Hash.new
|
@@ -15,9 +15,10 @@ module Baiduserp
|
|
15
15
|
@file[:html] = html
|
16
16
|
@file[:doc] = Nokogiri::HTML(html)
|
17
17
|
|
18
|
-
self.
|
19
|
-
|
20
|
-
|
18
|
+
self.methods.each do |m|
|
19
|
+
next unless m =~ /^_parse_/
|
20
|
+
#p m
|
21
|
+
@serp[m.to_s.sub('_parse_','').to_sym] = self.send m,@file
|
21
22
|
#p @serp.keys
|
22
23
|
end
|
23
24
|
|
data/lib/baiduserp/version.rb
CHANGED
@@ -1,13 +1,17 @@
|
|
1
|
-
|
2
|
-
def
|
1
|
+
class Baiduserp::Parser
|
2
|
+
def _parse_ranks(file)
|
3
3
|
result = []
|
4
4
|
file[:doc].search("//table").each do |table|
|
5
|
+
next if table.nil?
|
5
6
|
id = table['id'].to_i
|
6
7
|
next unless id > 0
|
7
8
|
r = {:rank => id}
|
8
9
|
|
9
|
-
url = table.search('h3/a').first
|
10
|
-
|
10
|
+
url = table.search('h3/a').first
|
11
|
+
unless url.nil?
|
12
|
+
url = url['href']
|
13
|
+
url = Baiduserp::Client.get(url).headers['location'] if url.include?('http://www.baidu.com/link?')
|
14
|
+
end
|
11
15
|
r[:url] = url
|
12
16
|
|
13
17
|
r[:title] = Baiduserp::Helper.get_content_safe(table.search('h3'))
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class Baiduserp::Parser
|
2
|
+
def _parse_right_hotel(file)
|
3
|
+
rh = file[:doc].search('div[@tpl="right_hotel"]')
|
4
|
+
return nil if rh.nil?
|
5
|
+
|
6
|
+
rh = rh.first
|
7
|
+
return nil if rh.nil?
|
8
|
+
title = Baiduserp::Helper.get_content_safe(rh.search('div.opr-hotel-title'))
|
9
|
+
|
10
|
+
{:title => title}
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class Baiduserp::Parser
|
2
|
+
def _parse_right_personinfo(file)
|
3
|
+
rp = file[:doc].search('div[@tpl="right_personinfo"]')
|
4
|
+
return nil if rp.nil?
|
5
|
+
|
6
|
+
title = Baiduserp::Helper.get_content_safe rp.search('span.opr-personinfo-subtitle-large')
|
7
|
+
info = Baiduserp::Helper.get_content_safe rp.search('div.opr-personinfo-info')
|
8
|
+
source = Baiduserp::Helper.get_content_safe rp.search('div.opr-personinfo-source a')
|
9
|
+
|
10
|
+
return nil if title.nil? && info.nil? && source.nil?
|
11
|
+
{:title => title, :info => info, :source => source}
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class Baiduserp::Parser
|
2
|
+
def _parse_right_relaperson(file)
|
3
|
+
relapersons = file[:doc].search('div[@tpl="right_relaperson"]')
|
4
|
+
return nil if relapersons.nil?
|
5
|
+
|
6
|
+
result = []
|
7
|
+
relapersons.each do |rr|
|
8
|
+
title = rr.search('span.opr-relaperson-subtitle-tip').first.content
|
9
|
+
r = []
|
10
|
+
rr.search('p.opr-relaperson-name').each do |p|
|
11
|
+
r << p.content
|
12
|
+
end
|
13
|
+
result << {:title => title, :names => r}
|
14
|
+
end
|
15
|
+
result
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class Baiduserp::Parser
|
2
|
+
def _parse_right_weather(file)
|
3
|
+
rw = file[:doc].search('div[@tpl="right_weather"]')
|
4
|
+
return nil if rw.nil?
|
5
|
+
|
6
|
+
rw = rw.first
|
7
|
+
return nil if rw.nil?
|
8
|
+
|
9
|
+
title = Baiduserp::Helper.get_content_safe(rw.search('div.opr-weather-title'))
|
10
|
+
week = rw.search('a.opr-weather-week').first['href']
|
11
|
+
|
12
|
+
{:title => title, :week => week}
|
13
|
+
end
|
14
|
+
end
|
metadata
CHANGED
@@ -1,46 +1,41 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: baiduserp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
5
|
-
prerelease:
|
4
|
+
version: 2.0.3
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- MingQian Zhang
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
11
|
+
date: 2013-06-25 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: nokogiri
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - '>='
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '0'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: httparty
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - '>='
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '0'
|
38
34
|
type: :runtime
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - '>='
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0'
|
46
41
|
description: Parse Baidu SERP result page.
|
@@ -53,39 +48,42 @@ extra_rdoc_files: []
|
|
53
48
|
files:
|
54
49
|
- lib/baiduserp/client.rb
|
55
50
|
- lib/baiduserp/helper.rb
|
56
|
-
- lib/baiduserp/parser/ads_right.rb
|
57
|
-
- lib/baiduserp/parser/ads_top.rb
|
58
|
-
- lib/baiduserp/parser/pinpaizhuanqu.rb
|
59
|
-
- lib/baiduserp/parser/ranks.rb
|
60
|
-
- lib/baiduserp/parser/related_keywords.rb
|
61
|
-
- lib/baiduserp/parser/result_num.rb
|
62
51
|
- lib/baiduserp/parser.rb
|
63
52
|
- lib/baiduserp/version.rb
|
64
53
|
- lib/baiduserp.rb
|
54
|
+
- lib/parsers/ads_right.rb
|
55
|
+
- lib/parsers/ads_top.rb
|
56
|
+
- lib/parsers/pinpaizhuanqu.rb
|
57
|
+
- lib/parsers/ranks.rb
|
58
|
+
- lib/parsers/related_keywords.rb
|
59
|
+
- lib/parsers/result_num.rb
|
60
|
+
- lib/parsers/right_hotel.rb
|
61
|
+
- lib/parsers/right_personinfo.rb
|
62
|
+
- lib/parsers/right_relaperson.rb
|
63
|
+
- lib/parsers/right_weather.rb
|
65
64
|
- bin/baiduserp
|
66
65
|
- README.md
|
67
66
|
homepage: https://github.com/mqzhang/baiduserp
|
68
67
|
licenses: []
|
68
|
+
metadata: {}
|
69
69
|
post_install_message:
|
70
70
|
rdoc_options: []
|
71
71
|
require_paths:
|
72
72
|
- lib
|
73
73
|
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
-
none: false
|
75
74
|
requirements:
|
76
|
-
- -
|
75
|
+
- - '>='
|
77
76
|
- !ruby/object:Gem::Version
|
78
77
|
version: '0'
|
79
78
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
-
none: false
|
81
79
|
requirements:
|
82
|
-
- -
|
80
|
+
- - '>='
|
83
81
|
- !ruby/object:Gem::Version
|
84
82
|
version: '0'
|
85
83
|
requirements: []
|
86
84
|
rubyforge_project:
|
87
|
-
rubygems_version:
|
85
|
+
rubygems_version: 2.0.0
|
88
86
|
signing_key:
|
89
|
-
specification_version:
|
87
|
+
specification_version: 4
|
90
88
|
summary: Baidu SERP
|
91
89
|
test_files: []
|