baiduserp 2.0.0 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,7 +5,7 @@ module Baiduserp
5
5
  def get_content_safe(noko)
6
6
  return nil if noko.nil?
7
7
  return nil if noko.empty?
8
- noko.first.content
8
+ noko.first.content.strip
9
9
  end
10
10
 
11
11
 
@@ -1,5 +1,35 @@
1
1
  module Baiduserp::Parser::Ads_Left
2
2
  def self.parse(file)
3
-
3
+ result = []
4
+ file[:doc].search('div.ec_pp_f').each do |div|
5
+ id = div['id'].to_i
6
+ next unless id >= 3000
7
+ r = {rank: id}
8
+
9
+ r[:title] = Baiduserp::Helper.get_content_safe(div.search('div.ec_title'))
10
+
11
+ r[:content] = Baiduserp::Helper.get_content_safe(div.search('div.ec_desc'))
12
+
13
+ r[:site] = Baiduserp::Helper.get_content_safe(div.search('span.ec_url'))
14
+
15
+ result << r
16
+ end
17
+
18
+ if result.empty?
19
+ file[:doc].search('table.ec_pp_f').each_with_index do |table,i|
20
+ r = {rank: i + 1}
21
+
22
+ r[:title] = Baiduserp::Helper.get_content_safe(table.search('td.EC_header/a'))
23
+
24
+ r[:content] = Baiduserp::Helper.get_content_safe(table.search('a.EC_desc'))
25
+
26
+ r[:site] = Baiduserp::Helper.get_content_safe(table.search('a.EC_url'))
27
+
28
+ result << r
29
+
30
+ end
31
+ end
32
+
33
+ result
4
34
  end
5
35
  end
@@ -1,5 +1,19 @@
1
1
  module Baiduserp::Parser::Ads_Right
2
2
  def self.parse(file)
3
-
3
+ result = []
4
+ file[:doc].search('div.EC_im').each do |div|
5
+ r = {}
6
+
7
+ r[:rank] = div['id'].sub('bdfs','').to_i + 1
8
+
9
+ r[:title] = Baiduserp::Helper.get_content_safe(div.search('a.EC_t'))
10
+
11
+ r[:content] = Baiduserp::Helper.get_content_safe(div.search('a.EC_desc/font'))
12
+
13
+ r[:site] = Baiduserp::Helper.get_content_safe(div.search('font.EC_url'))
14
+
15
+ result << r
16
+ end
17
+ result
4
18
  end
5
19
  end
@@ -4,15 +4,15 @@ module Baiduserp::Parser::Organic
4
4
  file[:doc].search("//table").each do |table|
5
5
  id = table['id'].to_i
6
6
  next unless id > 0
7
- r = Hash.new
7
+ r = {:rank => id}
8
8
 
9
- url = table.search("h3/a").first['href']
9
+ url = table.search('h3/a').first['href']
10
10
  url = Baiduserp::Client.get(url).headers['location'] if url.include?('http://www.baidu.com/link?')
11
11
  r[:url] = url
12
12
 
13
13
  r[:title] = Baiduserp::Helper.get_content_safe(table.search('h3'))
14
14
 
15
- r[:content] = Baiduserp::Helper.get_content_safe(table.search("div[@class='c-abstract']"))
15
+ r[:content] = Baiduserp::Helper.get_content_safe(table.search('div.c-abstract'))
16
16
 
17
17
  r[:mu] = table['mu']
18
18
 
@@ -1,3 +1,3 @@
1
1
  module Baiduserp
2
- VERSION = "2.0.0"
2
+ VERSION = "2.0.1"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baiduserp
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.0
4
+ version: 2.0.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: