baiduserp 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
data/lib/baiduserp/helper.rb
CHANGED
@@ -1,5 +1,35 @@
|
|
1
1
|
module Baiduserp::Parser::Ads_Left
|
2
2
|
def self.parse(file)
|
3
|
-
|
3
|
+
result = []
|
4
|
+
file[:doc].search('div.ec_pp_f').each do |div|
|
5
|
+
id = div['id'].to_i
|
6
|
+
next unless id >= 3000
|
7
|
+
r = {rank: id}
|
8
|
+
|
9
|
+
r[:title] = Baiduserp::Helper.get_content_safe(div.search('div.ec_title'))
|
10
|
+
|
11
|
+
r[:content] = Baiduserp::Helper.get_content_safe(div.search('div.ec_desc'))
|
12
|
+
|
13
|
+
r[:site] = Baiduserp::Helper.get_content_safe(div.search('span.ec_url'))
|
14
|
+
|
15
|
+
result << r
|
16
|
+
end
|
17
|
+
|
18
|
+
if result.empty?
|
19
|
+
file[:doc].search('table.ec_pp_f').each_with_index do |table,i|
|
20
|
+
r = {rank: i + 1}
|
21
|
+
|
22
|
+
r[:title] = Baiduserp::Helper.get_content_safe(table.search('td.EC_header/a'))
|
23
|
+
|
24
|
+
r[:content] = Baiduserp::Helper.get_content_safe(table.search('a.EC_desc'))
|
25
|
+
|
26
|
+
r[:site] = Baiduserp::Helper.get_content_safe(table.search('a.EC_url'))
|
27
|
+
|
28
|
+
result << r
|
29
|
+
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
result
|
4
34
|
end
|
5
35
|
end
|
@@ -1,5 +1,19 @@
|
|
1
1
|
module Baiduserp::Parser::Ads_Right
|
2
2
|
def self.parse(file)
|
3
|
-
|
3
|
+
result = []
|
4
|
+
file[:doc].search('div.EC_im').each do |div|
|
5
|
+
r = {}
|
6
|
+
|
7
|
+
r[:rank] = div['id'].sub('bdfs','').to_i + 1
|
8
|
+
|
9
|
+
r[:title] = Baiduserp::Helper.get_content_safe(div.search('a.EC_t'))
|
10
|
+
|
11
|
+
r[:content] = Baiduserp::Helper.get_content_safe(div.search('a.EC_desc/font'))
|
12
|
+
|
13
|
+
r[:site] = Baiduserp::Helper.get_content_safe(div.search('font.EC_url'))
|
14
|
+
|
15
|
+
result << r
|
16
|
+
end
|
17
|
+
result
|
4
18
|
end
|
5
19
|
end
|
@@ -4,15 +4,15 @@ module Baiduserp::Parser::Organic
|
|
4
4
|
file[:doc].search("//table").each do |table|
|
5
5
|
id = table['id'].to_i
|
6
6
|
next unless id > 0
|
7
|
-
r =
|
7
|
+
r = {:rank => id}
|
8
8
|
|
9
|
-
url = table.search(
|
9
|
+
url = table.search('h3/a').first['href']
|
10
10
|
url = Baiduserp::Client.get(url).headers['location'] if url.include?('http://www.baidu.com/link?')
|
11
11
|
r[:url] = url
|
12
12
|
|
13
13
|
r[:title] = Baiduserp::Helper.get_content_safe(table.search('h3'))
|
14
14
|
|
15
|
-
r[:content] = Baiduserp::Helper.get_content_safe(table.search(
|
15
|
+
r[:content] = Baiduserp::Helper.get_content_safe(table.search('div.c-abstract'))
|
16
16
|
|
17
17
|
r[:mu] = table['mu']
|
18
18
|
|
data/lib/baiduserp/version.rb
CHANGED