baiduserp 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -5,7 +5,7 @@ module Baiduserp
5
5
  def get_content_safe(noko)
6
6
  return nil if noko.nil?
7
7
  return nil if noko.empty?
8
- noko.first.content
8
+ noko.first.content.strip
9
9
  end
10
10
 
11
11
 
@@ -1,5 +1,35 @@
1
1
  module Baiduserp::Parser::Ads_Left
2
2
  def self.parse(file)
3
-
3
+ result = []
4
+ file[:doc].search('div.ec_pp_f').each do |div|
5
+ id = div['id'].to_i
6
+ next unless id >= 3000
7
+ r = {rank: id}
8
+
9
+ r[:title] = Baiduserp::Helper.get_content_safe(div.search('div.ec_title'))
10
+
11
+ r[:content] = Baiduserp::Helper.get_content_safe(div.search('div.ec_desc'))
12
+
13
+ r[:site] = Baiduserp::Helper.get_content_safe(div.search('span.ec_url'))
14
+
15
+ result << r
16
+ end
17
+
18
+ if result.empty?
19
+ file[:doc].search('table.ec_pp_f').each_with_index do |table,i|
20
+ r = {rank: i + 1}
21
+
22
+ r[:title] = Baiduserp::Helper.get_content_safe(table.search('td.EC_header/a'))
23
+
24
+ r[:content] = Baiduserp::Helper.get_content_safe(table.search('a.EC_desc'))
25
+
26
+ r[:site] = Baiduserp::Helper.get_content_safe(table.search('a.EC_url'))
27
+
28
+ result << r
29
+
30
+ end
31
+ end
32
+
33
+ result
4
34
  end
5
35
  end
@@ -1,5 +1,19 @@
1
1
  module Baiduserp::Parser::Ads_Right
2
2
  def self.parse(file)
3
-
3
+ result = []
4
+ file[:doc].search('div.EC_im').each do |div|
5
+ r = {}
6
+
7
+ r[:rank] = div['id'].sub('bdfs','').to_i + 1
8
+
9
+ r[:title] = Baiduserp::Helper.get_content_safe(div.search('a.EC_t'))
10
+
11
+ r[:content] = Baiduserp::Helper.get_content_safe(div.search('a.EC_desc/font'))
12
+
13
+ r[:site] = Baiduserp::Helper.get_content_safe(div.search('font.EC_url'))
14
+
15
+ result << r
16
+ end
17
+ result
4
18
  end
5
19
  end
@@ -4,15 +4,15 @@ module Baiduserp::Parser::Organic
4
4
  file[:doc].search("//table").each do |table|
5
5
  id = table['id'].to_i
6
6
  next unless id > 0
7
- r = Hash.new
7
+ r = {:rank => id}
8
8
 
9
- url = table.search("h3/a").first['href']
9
+ url = table.search('h3/a').first['href']
10
10
  url = Baiduserp::Client.get(url).headers['location'] if url.include?('http://www.baidu.com/link?')
11
11
  r[:url] = url
12
12
 
13
13
  r[:title] = Baiduserp::Helper.get_content_safe(table.search('h3'))
14
14
 
15
- r[:content] = Baiduserp::Helper.get_content_safe(table.search("div[@class='c-abstract']"))
15
+ r[:content] = Baiduserp::Helper.get_content_safe(table.search('div.c-abstract'))
16
16
 
17
17
  r[:mu] = table['mu']
18
18
 
@@ -1,3 +1,3 @@
1
1
  module Baiduserp
2
- VERSION = "2.0.0"
2
+ VERSION = "2.0.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baiduserp
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: