baiduserp 2.3.3 → 2.3.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fefc5d8f8028b48f1557fdc0bacc9ea655fb5140
4
- data.tar.gz: b078c04a225b413fb1e6f70d01d89c63b285c930
3
+ metadata.gz: c4f925a3ac00d8faedd0ce3c596b957cd76f2323
4
+ data.tar.gz: 4d3f836b54ed03bfd70603cb82425f78e96c3cb1
5
5
  SHA512:
6
- metadata.gz: 9e56e95afb223c96499f99ee38a5bb209954361944ebfea43550a93a802c416ad7198b8a672b01d4ff99accc0c1f4591de69c0db58013f4691d967e2cd21ad77
7
- data.tar.gz: 1f476da432c0efca2536001b79e5cdd88748da2fd0bc4d7ace4aff97c39fdf79009dfd161f80292b5af45014ca42c5a168e022be0d0d20060681455e15bbbdf9
6
+ metadata.gz: abf62c6dcb480558c39f58b8e4f7b27b7146ccfe2d380225d7541ac32def7575f37628b1df86ba0d73da574ecb040a758169736dc3ceddb29167c9ea83b89fa8
7
+ data.tar.gz: 5192e5a8175459b121f46413d3d6701b5ffba45f615e9dcfbee360af151daf82b9b68e4b930c99639eb465553f45e49aaba3d4d73129d2452263c178d4cfeac0
@@ -81,6 +81,8 @@ module Baiduserp
81
81
  type = w[:type]
82
82
  name = w[:name]
83
83
  site = w[:site]
84
+ subdomain = w[:subdomain]
85
+ url = w[:url]
84
86
  side_rank = w[:side_rank]
85
87
  weight = w[:weight]
86
88
  side = w[:side]
@@ -91,6 +93,8 @@ module Baiduserp
91
93
  r.type = type
92
94
  r.name = name
93
95
  r.site = site
96
+ r.subdomain = subdomain
97
+ r.url = url
94
98
  r.weight = weight
95
99
  r.part = part
96
100
  r.normalized_weight = normalized_weight
@@ -33,8 +33,31 @@ module Baiduserp
33
33
  end
34
34
 
35
35
  def parse_site(url)
36
- url = Domainatrix.parse(url.to_s)
37
- url.domain + '.' + url.public_suffix
36
+ begin
37
+ url = Domainatrix.parse(url.to_s)
38
+ site = url.domain + '.' + url.public_suffix
39
+ rescue Exception => e
40
+ puts "parse_site from url error:"
41
+ puts url
42
+ puts e.class
43
+ puts e.message
44
+ site = ''
45
+ end
46
+ site
47
+ end
48
+
49
+ def parse_subdomain(url)
50
+ begin
51
+ url = Domainatrix.parse(url.to_s)
52
+ subdomain = url.subdomain
53
+ rescue Exception => e
54
+ puts "parse_site from url error:"
55
+ puts url
56
+ puts e.class
57
+ puts e.message
58
+ subdomain = ''
59
+ end
60
+ subdomain
38
61
  end
39
62
 
40
63
  end
@@ -0,0 +1,11 @@
1
+ Sequel.migration do
2
+ up do
3
+ add_column :weights, :subdomain, String
4
+ add_column :weights, :url, String
5
+ end
6
+
7
+ down do
8
+ drop_column :weights, :subdomain
9
+ drop_column :weights, :url
10
+ end
11
+ end
@@ -10,7 +10,7 @@ class Baiduserp::Parser
10
10
 
11
11
  r[:content] = Baiduserp::Helper.get_content_safe(div.search('a.EC_desc/font'))
12
12
 
13
- r[:site] = Baiduserp::Helper.get_content_safe(div.search('font.EC_url'))
13
+ r[:url] = Baiduserp::Helper.get_content_safe(div.search('font.EC_url'))
14
14
 
15
15
  result << r
16
16
  end
@@ -19,7 +19,7 @@ class Baiduserp::Parser
19
19
 
20
20
  r[:content] = Baiduserp::Helper.get_content_safe(div.search('div.ec_desc'))
21
21
 
22
- r[:site] = Baiduserp::Helper.get_content_safe(div.search('span.ec_url'))
22
+ r[:url] = Baiduserp::Helper.get_content_safe(div.search('span.ec_url'))
23
23
 
24
24
  result << r
25
25
 
@@ -30,7 +30,7 @@ class Baiduserp::Parser
30
30
 
31
31
  r[:content] = Baiduserp::Helper.get_content_safe(div.search('a.EC_desc'))
32
32
 
33
- r[:site] = Baiduserp::Helper.get_content_safe(div.search('a.EC_url'))
33
+ r[:url] = Baiduserp::Helper.get_content_safe(div.search('a.EC_url'))
34
34
 
35
35
  result << r
36
36
 
@@ -56,13 +56,21 @@ module Baiduserp
56
56
  self[:ranks].each do |rank|
57
57
  side_rank += 1
58
58
 
59
+ url = rank[:url].to_s
60
+ mu = rank[:mu].to_s
61
+
59
62
  type = 'SEO'
60
63
  type = 'Special' if rank[:baiduopen]
61
- type = 'Special' unless rank[:mu].to_s.empty?
62
64
 
63
- name = rank[:tpl].to_s
65
+ unless mu.empty?
66
+ url = mu
67
+ type = 'Special'
68
+ end
64
69
 
65
- site = Baiduserp::Helper.parse_site(rank[:url])
70
+ site = Baiduserp::Helper.parse_site(url)
71
+ subdomain = Baiduserp::Helper.parse_subdomain(url)
72
+
73
+ name = rank[:tpl].to_s
66
74
 
67
75
  weight = 1.0/side_rank.to_f
68
76
  if type == 'Special'
@@ -73,25 +81,23 @@ module Baiduserp
73
81
  end
74
82
  end
75
83
 
76
- result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
84
+ result << {type: type, name: name, site: site, subdomain: subdomain, url: url, mu: mu, side_rank: side_rank, weight: weight}
77
85
  end
78
86
  [result, side_rank]
79
87
  end
80
88
 
81
- # def weight_of_pinpaizhuanqu(side_rank)
82
- # [[{ type: 'Special', name: 'pinpaizhuanqu', site: '', side_rank: side_rank, weight: 1.0}], (side_rank + 1)]
83
- # end
84
-
85
89
  def weight_of_ads_top(side_rank)
86
90
  result = []
87
91
  self[:ads_top].each do |ad|
88
92
  side_rank += 1
89
93
 
94
+ url = ad[:url].to_s
90
95
  type = 'SEM'
91
96
  name = ''
92
- site = Baiduserp::Helper.parse_site(ad[:site])
97
+ site = Baiduserp::Helper.parse_site(url)
98
+ subdomain = Baiduserp::Helper.parse_subdomain(url)
93
99
  weight = 1.0/side_rank.to_f
94
- result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
100
+ result << {type: type, name: name, site: site, subdomain: subdomain, url: url, side_rank: side_rank, weight: weight}
95
101
  end
96
102
  [result, side_rank]
97
103
  end
@@ -101,11 +107,13 @@ module Baiduserp
101
107
  self[:ads_right].each do |ad|
102
108
  side_rank += 1
103
109
 
110
+ url = ad[:url].to_s
104
111
  type = 'SEM'
105
112
  name = ''
106
- site = Baiduserp::Helper.parse_site(ad[:site])
113
+ site = Baiduserp::Helper.parse_site(url)
114
+ subdomain = Baiduserp::Helper.parse_subdomain(url)
107
115
  weight = 1.0/side_rank.to_f
108
- result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
116
+ result << {type: type, name: name, site: site, subdomain: subdomain, url: url, side_rank: side_rank, weight: weight}
109
117
  end
110
118
  [result, side_rank]
111
119
  end
@@ -115,11 +123,13 @@ module Baiduserp
115
123
  self[:con_ar].each do |con|
116
124
  side_rank += 1
117
125
 
126
+ url = con[:data_click]['mu'].to_s
118
127
  type = 'Special'
119
128
  name = con[:tpl]
120
- site = Baiduserp::Helper.parse_site(con[:data_click]['mu'])
129
+ site = Baiduserp::Helper.parse_site(url)
130
+ subdomain = Baiduserp::Helper.parse_subdomain(url)
121
131
  weight = 1.0 * @config[:con_ar_weight]
122
- result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
132
+ result << {type: type, name: name, site: site, subdomain: subdomain, url: url, side_rank: side_rank, weight: weight}
123
133
  end
124
134
  [result, side_rank]
125
135
  end
@@ -129,11 +139,13 @@ module Baiduserp
129
139
  self[:zhixin].each do |zhixin|
130
140
  side_rank += 1
131
141
 
142
+ url = zhixin[:mu].to_s
132
143
  type = 'Special'
133
144
  name = zhixin[:tpl]
134
- site = Baiduserp::Helper.parse_site(zhixin[:mu])
145
+ site = Baiduserp::Helper.parse_site(url)
146
+ subdomain = Baiduserp::Helper.parse_subdomain(url)
135
147
  weight = 1.0 * @config[:zhixin_weight]
136
- result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
148
+ result << {type: type, name: name, site: site, subdomain: subdomain, url: url, side_rank: side_rank, weight: weight}
137
149
  end
138
150
  [result, side_rank]
139
151
  end
@@ -1,3 +1,3 @@
1
1
  module Baiduserp
2
- VERSION = "2.3.3"
2
+ VERSION = "2.3.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baiduserp
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.3
4
+ version: 2.3.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - MingQian Zhang
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-10 00:00:00.000000000 Z
11
+ date: 2013-12-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -95,6 +95,7 @@ files:
95
95
  - lib/baiduserp/migrations/002_create_htmls_table.rb
96
96
  - lib/baiduserp/migrations/003_create_serps_table.rb
97
97
  - lib/baiduserp/migrations/004_create_weights_table.rb
98
+ - lib/baiduserp/migrations/005_add_subdomain_url_to_weights.rb
98
99
  - lib/baiduserp/parser/ads_right.rb
99
100
  - lib/baiduserp/parser/ads_top.rb
100
101
  - lib/baiduserp/parser/con_ar.rb