baiduserp 2.0.2 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/baiduserp/parser.rb +5 -4
- data/lib/baiduserp/version.rb +1 -1
- data/lib/{baiduserp/parser → parsers}/ads_right.rb +2 -2
- data/lib/{baiduserp/parser → parsers}/ads_top.rb +2 -2
- data/lib/parsers/pinpaizhuanqu.rb +5 -0
- data/lib/{baiduserp/parser → parsers}/ranks.rb +8 -4
- data/lib/{baiduserp/parser → parsers}/related_keywords.rb +2 -2
- data/lib/{baiduserp/parser → parsers}/result_num.rb +2 -2
- data/lib/parsers/right_hotel.rb +12 -0
- data/lib/parsers/right_personinfo.rb +13 -0
- data/lib/parsers/right_relaperson.rb +17 -0
- data/lib/parsers/right_weather.rb +14 -0
- metadata +21 -23
- data/lib/baiduserp/parser/pinpaizhuanqu.rb +0 -5
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 925a7067129b157540753444a3e54c78676d2a9f
|
4
|
+
data.tar.gz: 908b484a96fbbed83429804b09d50ff1b7828ff5
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 0c7fb5fd7f6fa734934522cfe738f0725be40c4f1504f0c87fa0e1d08bff5fd8d8c19580701a2dd17d014f02df9b2b850aef752f535a614d4fd62ba7044b001d
|
7
|
+
data.tar.gz: 793333893a7e9c51f936d21a861066dfa47237349976b1bdee59a1046c4641e059b829530d5dbaffc51322cf3b2a035c1e71b8323e8b0cfecc7803695a892a54
|
data/lib/baiduserp/parser.rb
CHANGED
@@ -6,7 +6,7 @@ require 'baiduserp/helper'
|
|
6
6
|
|
7
7
|
module Baiduserp
|
8
8
|
class Parser
|
9
|
-
Dir[File.expand_path('
|
9
|
+
Dir[File.expand_path('../../parsers/*.rb', __FILE__)].each{|f| require f}
|
10
10
|
|
11
11
|
def parse(html)
|
12
12
|
@file = Hash.new
|
@@ -15,9 +15,10 @@ module Baiduserp
|
|
15
15
|
@file[:html] = html
|
16
16
|
@file[:doc] = Nokogiri::HTML(html)
|
17
17
|
|
18
|
-
self.
|
19
|
-
|
20
|
-
|
18
|
+
self.methods.each do |m|
|
19
|
+
next unless m =~ /^_parse_/
|
20
|
+
#p m
|
21
|
+
@serp[m.to_s.sub('_parse_','').to_sym] = self.send m,@file
|
21
22
|
#p @serp.keys
|
22
23
|
end
|
23
24
|
|
data/lib/baiduserp/version.rb
CHANGED
@@ -1,13 +1,17 @@
|
|
1
|
-
|
2
|
-
def
|
1
|
+
class Baiduserp::Parser
|
2
|
+
def _parse_ranks(file)
|
3
3
|
result = []
|
4
4
|
file[:doc].search("//table").each do |table|
|
5
|
+
next if table.nil?
|
5
6
|
id = table['id'].to_i
|
6
7
|
next unless id > 0
|
7
8
|
r = {:rank => id}
|
8
9
|
|
9
|
-
url = table.search('h3/a').first
|
10
|
-
|
10
|
+
url = table.search('h3/a').first
|
11
|
+
unless url.nil?
|
12
|
+
url = url['href']
|
13
|
+
url = Baiduserp::Client.get(url).headers['location'] if url.include?('http://www.baidu.com/link?')
|
14
|
+
end
|
11
15
|
r[:url] = url
|
12
16
|
|
13
17
|
r[:title] = Baiduserp::Helper.get_content_safe(table.search('h3'))
|
@@ -0,0 +1,12 @@
|
|
1
|
+
class Baiduserp::Parser
|
2
|
+
def _parse_right_hotel(file)
|
3
|
+
rh = file[:doc].search('div[@tpl="right_hotel"]')
|
4
|
+
return nil if rh.nil?
|
5
|
+
|
6
|
+
rh = rh.first
|
7
|
+
return nil if rh.nil?
|
8
|
+
title = Baiduserp::Helper.get_content_safe(rh.search('div.opr-hotel-title'))
|
9
|
+
|
10
|
+
{:title => title}
|
11
|
+
end
|
12
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class Baiduserp::Parser
|
2
|
+
def _parse_right_personinfo(file)
|
3
|
+
rp = file[:doc].search('div[@tpl="right_personinfo"]')
|
4
|
+
return nil if rp.nil?
|
5
|
+
|
6
|
+
title = Baiduserp::Helper.get_content_safe rp.search('span.opr-personinfo-subtitle-large')
|
7
|
+
info = Baiduserp::Helper.get_content_safe rp.search('div.opr-personinfo-info')
|
8
|
+
source = Baiduserp::Helper.get_content_safe rp.search('div.opr-personinfo-source a')
|
9
|
+
|
10
|
+
return nil if title.nil? && info.nil? && source.nil?
|
11
|
+
{:title => title, :info => info, :source => source}
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
class Baiduserp::Parser
|
2
|
+
def _parse_right_relaperson(file)
|
3
|
+
relapersons = file[:doc].search('div[@tpl="right_relaperson"]')
|
4
|
+
return nil if relapersons.nil?
|
5
|
+
|
6
|
+
result = []
|
7
|
+
relapersons.each do |rr|
|
8
|
+
title = rr.search('span.opr-relaperson-subtitle-tip').first.content
|
9
|
+
r = []
|
10
|
+
rr.search('p.opr-relaperson-name').each do |p|
|
11
|
+
r << p.content
|
12
|
+
end
|
13
|
+
result << {:title => title, :names => r}
|
14
|
+
end
|
15
|
+
result
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class Baiduserp::Parser
|
2
|
+
def _parse_right_weather(file)
|
3
|
+
rw = file[:doc].search('div[@tpl="right_weather"]')
|
4
|
+
return nil if rw.nil?
|
5
|
+
|
6
|
+
rw = rw.first
|
7
|
+
return nil if rw.nil?
|
8
|
+
|
9
|
+
title = Baiduserp::Helper.get_content_safe(rw.search('div.opr-weather-title'))
|
10
|
+
week = rw.search('a.opr-weather-week').first['href']
|
11
|
+
|
12
|
+
{:title => title, :week => week}
|
13
|
+
end
|
14
|
+
end
|
metadata
CHANGED
@@ -1,46 +1,41 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: baiduserp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.
|
5
|
-
prerelease:
|
4
|
+
version: 2.0.3
|
6
5
|
platform: ruby
|
7
6
|
authors:
|
8
7
|
- MingQian Zhang
|
9
8
|
autorequire:
|
10
9
|
bindir: bin
|
11
10
|
cert_chain: []
|
12
|
-
date: 2013-06-
|
11
|
+
date: 2013-06-25 00:00:00.000000000 Z
|
13
12
|
dependencies:
|
14
13
|
- !ruby/object:Gem::Dependency
|
15
14
|
name: nokogiri
|
16
15
|
requirement: !ruby/object:Gem::Requirement
|
17
|
-
none: false
|
18
16
|
requirements:
|
19
|
-
- -
|
17
|
+
- - '>='
|
20
18
|
- !ruby/object:Gem::Version
|
21
19
|
version: '0'
|
22
20
|
type: :runtime
|
23
21
|
prerelease: false
|
24
22
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
-
none: false
|
26
23
|
requirements:
|
27
|
-
- -
|
24
|
+
- - '>='
|
28
25
|
- !ruby/object:Gem::Version
|
29
26
|
version: '0'
|
30
27
|
- !ruby/object:Gem::Dependency
|
31
28
|
name: httparty
|
32
29
|
requirement: !ruby/object:Gem::Requirement
|
33
|
-
none: false
|
34
30
|
requirements:
|
35
|
-
- -
|
31
|
+
- - '>='
|
36
32
|
- !ruby/object:Gem::Version
|
37
33
|
version: '0'
|
38
34
|
type: :runtime
|
39
35
|
prerelease: false
|
40
36
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
-
none: false
|
42
37
|
requirements:
|
43
|
-
- -
|
38
|
+
- - '>='
|
44
39
|
- !ruby/object:Gem::Version
|
45
40
|
version: '0'
|
46
41
|
description: Parse Baidu SERP result page.
|
@@ -53,39 +48,42 @@ extra_rdoc_files: []
|
|
53
48
|
files:
|
54
49
|
- lib/baiduserp/client.rb
|
55
50
|
- lib/baiduserp/helper.rb
|
56
|
-
- lib/baiduserp/parser/ads_right.rb
|
57
|
-
- lib/baiduserp/parser/ads_top.rb
|
58
|
-
- lib/baiduserp/parser/pinpaizhuanqu.rb
|
59
|
-
- lib/baiduserp/parser/ranks.rb
|
60
|
-
- lib/baiduserp/parser/related_keywords.rb
|
61
|
-
- lib/baiduserp/parser/result_num.rb
|
62
51
|
- lib/baiduserp/parser.rb
|
63
52
|
- lib/baiduserp/version.rb
|
64
53
|
- lib/baiduserp.rb
|
54
|
+
- lib/parsers/ads_right.rb
|
55
|
+
- lib/parsers/ads_top.rb
|
56
|
+
- lib/parsers/pinpaizhuanqu.rb
|
57
|
+
- lib/parsers/ranks.rb
|
58
|
+
- lib/parsers/related_keywords.rb
|
59
|
+
- lib/parsers/result_num.rb
|
60
|
+
- lib/parsers/right_hotel.rb
|
61
|
+
- lib/parsers/right_personinfo.rb
|
62
|
+
- lib/parsers/right_relaperson.rb
|
63
|
+
- lib/parsers/right_weather.rb
|
65
64
|
- bin/baiduserp
|
66
65
|
- README.md
|
67
66
|
homepage: https://github.com/mqzhang/baiduserp
|
68
67
|
licenses: []
|
68
|
+
metadata: {}
|
69
69
|
post_install_message:
|
70
70
|
rdoc_options: []
|
71
71
|
require_paths:
|
72
72
|
- lib
|
73
73
|
required_ruby_version: !ruby/object:Gem::Requirement
|
74
|
-
none: false
|
75
74
|
requirements:
|
76
|
-
- -
|
75
|
+
- - '>='
|
77
76
|
- !ruby/object:Gem::Version
|
78
77
|
version: '0'
|
79
78
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
80
|
-
none: false
|
81
79
|
requirements:
|
82
|
-
- -
|
80
|
+
- - '>='
|
83
81
|
- !ruby/object:Gem::Version
|
84
82
|
version: '0'
|
85
83
|
requirements: []
|
86
84
|
rubyforge_project:
|
87
|
-
rubygems_version:
|
85
|
+
rubygems_version: 2.0.0
|
88
86
|
signing_key:
|
89
|
-
specification_version:
|
87
|
+
specification_version: 4
|
90
88
|
summary: Baidu SERP
|
91
89
|
test_files: []
|