baiduserp 2.3.3 → 2.3.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/baiduserp/analyser.rb +4 -0
- data/lib/baiduserp/helper.rb +25 -2
- data/lib/baiduserp/migrations/005_add_subdomain_url_to_weights.rb +11 -0
- data/lib/baiduserp/parser/ads_right.rb +1 -1
- data/lib/baiduserp/parser/ads_top.rb +2 -2
- data/lib/baiduserp/result.rb +28 -16
- data/lib/baiduserp/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c4f925a3ac00d8faedd0ce3c596b957cd76f2323
|
4
|
+
data.tar.gz: 4d3f836b54ed03bfd70603cb82425f78e96c3cb1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: abf62c6dcb480558c39f58b8e4f7b27b7146ccfe2d380225d7541ac32def7575f37628b1df86ba0d73da574ecb040a758169736dc3ceddb29167c9ea83b89fa8
|
7
|
+
data.tar.gz: 5192e5a8175459b121f46413d3d6701b5ffba45f615e9dcfbee360af151daf82b9b68e4b930c99639eb465553f45e49aaba3d4d73129d2452263c178d4cfeac0
|
data/lib/baiduserp/analyser.rb
CHANGED
@@ -81,6 +81,8 @@ module Baiduserp
|
|
81
81
|
type = w[:type]
|
82
82
|
name = w[:name]
|
83
83
|
site = w[:site]
|
84
|
+
subdomain = w[:subdomain]
|
85
|
+
url = w[:url]
|
84
86
|
side_rank = w[:side_rank]
|
85
87
|
weight = w[:weight]
|
86
88
|
side = w[:side]
|
@@ -91,6 +93,8 @@ module Baiduserp
|
|
91
93
|
r.type = type
|
92
94
|
r.name = name
|
93
95
|
r.site = site
|
96
|
+
r.subdomain = subdomain
|
97
|
+
r.url = url
|
94
98
|
r.weight = weight
|
95
99
|
r.part = part
|
96
100
|
r.normalized_weight = normalized_weight
|
data/lib/baiduserp/helper.rb
CHANGED
@@ -33,8 +33,31 @@ module Baiduserp
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def parse_site(url)
|
36
|
-
|
37
|
-
|
36
|
+
begin
|
37
|
+
url = Domainatrix.parse(url.to_s)
|
38
|
+
site = url.domain + '.' + url.public_suffix
|
39
|
+
rescue Exception => e
|
40
|
+
puts "parse_site from url error:"
|
41
|
+
puts url
|
42
|
+
puts e.class
|
43
|
+
puts e.message
|
44
|
+
site = ''
|
45
|
+
end
|
46
|
+
site
|
47
|
+
end
|
48
|
+
|
49
|
+
def parse_subdomain(url)
|
50
|
+
begin
|
51
|
+
url = Domainatrix.parse(url.to_s)
|
52
|
+
subdomain = url.subdomain
|
53
|
+
rescue Exception => e
|
54
|
+
puts "parse_site from url error:"
|
55
|
+
puts url
|
56
|
+
puts e.class
|
57
|
+
puts e.message
|
58
|
+
subdomain = ''
|
59
|
+
end
|
60
|
+
subdomain
|
38
61
|
end
|
39
62
|
|
40
63
|
end
|
@@ -10,7 +10,7 @@ class Baiduserp::Parser
|
|
10
10
|
|
11
11
|
r[:content] = Baiduserp::Helper.get_content_safe(div.search('a.EC_desc/font'))
|
12
12
|
|
13
|
-
r[:
|
13
|
+
r[:url] = Baiduserp::Helper.get_content_safe(div.search('font.EC_url'))
|
14
14
|
|
15
15
|
result << r
|
16
16
|
end
|
@@ -19,7 +19,7 @@ class Baiduserp::Parser
|
|
19
19
|
|
20
20
|
r[:content] = Baiduserp::Helper.get_content_safe(div.search('div.ec_desc'))
|
21
21
|
|
22
|
-
r[:
|
22
|
+
r[:url] = Baiduserp::Helper.get_content_safe(div.search('span.ec_url'))
|
23
23
|
|
24
24
|
result << r
|
25
25
|
|
@@ -30,7 +30,7 @@ class Baiduserp::Parser
|
|
30
30
|
|
31
31
|
r[:content] = Baiduserp::Helper.get_content_safe(div.search('a.EC_desc'))
|
32
32
|
|
33
|
-
r[:
|
33
|
+
r[:url] = Baiduserp::Helper.get_content_safe(div.search('a.EC_url'))
|
34
34
|
|
35
35
|
result << r
|
36
36
|
|
data/lib/baiduserp/result.rb
CHANGED
@@ -56,13 +56,21 @@ module Baiduserp
|
|
56
56
|
self[:ranks].each do |rank|
|
57
57
|
side_rank += 1
|
58
58
|
|
59
|
+
url = rank[:url].to_s
|
60
|
+
mu = rank[:mu].to_s
|
61
|
+
|
59
62
|
type = 'SEO'
|
60
63
|
type = 'Special' if rank[:baiduopen]
|
61
|
-
type = 'Special' unless rank[:mu].to_s.empty?
|
62
64
|
|
63
|
-
|
65
|
+
unless mu.empty?
|
66
|
+
url = mu
|
67
|
+
type = 'Special'
|
68
|
+
end
|
64
69
|
|
65
|
-
site = Baiduserp::Helper.parse_site(
|
70
|
+
site = Baiduserp::Helper.parse_site(url)
|
71
|
+
subdomain = Baiduserp::Helper.parse_subdomain(url)
|
72
|
+
|
73
|
+
name = rank[:tpl].to_s
|
66
74
|
|
67
75
|
weight = 1.0/side_rank.to_f
|
68
76
|
if type == 'Special'
|
@@ -73,25 +81,23 @@ module Baiduserp
|
|
73
81
|
end
|
74
82
|
end
|
75
83
|
|
76
|
-
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
84
|
+
result << {type: type, name: name, site: site, subdomain: subdomain, url: url, mu: mu, side_rank: side_rank, weight: weight}
|
77
85
|
end
|
78
86
|
[result, side_rank]
|
79
87
|
end
|
80
88
|
|
81
|
-
# def weight_of_pinpaizhuanqu(side_rank)
|
82
|
-
# [[{ type: 'Special', name: 'pinpaizhuanqu', site: '', side_rank: side_rank, weight: 1.0}], (side_rank + 1)]
|
83
|
-
# end
|
84
|
-
|
85
89
|
def weight_of_ads_top(side_rank)
|
86
90
|
result = []
|
87
91
|
self[:ads_top].each do |ad|
|
88
92
|
side_rank += 1
|
89
93
|
|
94
|
+
url = ad[:url].to_s
|
90
95
|
type = 'SEM'
|
91
96
|
name = ''
|
92
|
-
site = Baiduserp::Helper.parse_site(
|
97
|
+
site = Baiduserp::Helper.parse_site(url)
|
98
|
+
subdomain = Baiduserp::Helper.parse_subdomain(url)
|
93
99
|
weight = 1.0/side_rank.to_f
|
94
|
-
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
100
|
+
result << {type: type, name: name, site: site, subdomain: subdomain, url: url, side_rank: side_rank, weight: weight}
|
95
101
|
end
|
96
102
|
[result, side_rank]
|
97
103
|
end
|
@@ -101,11 +107,13 @@ module Baiduserp
|
|
101
107
|
self[:ads_right].each do |ad|
|
102
108
|
side_rank += 1
|
103
109
|
|
110
|
+
url = ad[:url].to_s
|
104
111
|
type = 'SEM'
|
105
112
|
name = ''
|
106
|
-
site = Baiduserp::Helper.parse_site(
|
113
|
+
site = Baiduserp::Helper.parse_site(url)
|
114
|
+
subdomain = Baiduserp::Helper.parse_subdomain(url)
|
107
115
|
weight = 1.0/side_rank.to_f
|
108
|
-
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
116
|
+
result << {type: type, name: name, site: site, subdomain: subdomain, url: url, side_rank: side_rank, weight: weight}
|
109
117
|
end
|
110
118
|
[result, side_rank]
|
111
119
|
end
|
@@ -115,11 +123,13 @@ module Baiduserp
|
|
115
123
|
self[:con_ar].each do |con|
|
116
124
|
side_rank += 1
|
117
125
|
|
126
|
+
url = con[:data_click]['mu'].to_s
|
118
127
|
type = 'Special'
|
119
128
|
name = con[:tpl]
|
120
|
-
site = Baiduserp::Helper.parse_site(
|
129
|
+
site = Baiduserp::Helper.parse_site(url)
|
130
|
+
subdomain = Baiduserp::Helper.parse_subdomain(url)
|
121
131
|
weight = 1.0 * @config[:con_ar_weight]
|
122
|
-
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
132
|
+
result << {type: type, name: name, site: site, subdomain: subdomain, url: url, side_rank: side_rank, weight: weight}
|
123
133
|
end
|
124
134
|
[result, side_rank]
|
125
135
|
end
|
@@ -129,11 +139,13 @@ module Baiduserp
|
|
129
139
|
self[:zhixin].each do |zhixin|
|
130
140
|
side_rank += 1
|
131
141
|
|
142
|
+
url = zhixin[:mu].to_s
|
132
143
|
type = 'Special'
|
133
144
|
name = zhixin[:tpl]
|
134
|
-
site = Baiduserp::Helper.parse_site(
|
145
|
+
site = Baiduserp::Helper.parse_site(url)
|
146
|
+
subdomain = Baiduserp::Helper.parse_subdomain(url)
|
135
147
|
weight = 1.0 * @config[:zhixin_weight]
|
136
|
-
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
148
|
+
result << {type: type, name: name, site: site, subdomain: subdomain, url: url, side_rank: side_rank, weight: weight}
|
137
149
|
end
|
138
150
|
[result, side_rank]
|
139
151
|
end
|
data/lib/baiduserp/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: baiduserp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.3.
|
4
|
+
version: 2.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MingQian Zhang
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- lib/baiduserp/migrations/002_create_htmls_table.rb
|
96
96
|
- lib/baiduserp/migrations/003_create_serps_table.rb
|
97
97
|
- lib/baiduserp/migrations/004_create_weights_table.rb
|
98
|
+
- lib/baiduserp/migrations/005_add_subdomain_url_to_weights.rb
|
98
99
|
- lib/baiduserp/parser/ads_right.rb
|
99
100
|
- lib/baiduserp/parser/ads_top.rb
|
100
101
|
- lib/baiduserp/parser/con_ar.rb
|