baiduserp 2.3.3 → 2.3.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/baiduserp/analyser.rb +4 -0
- data/lib/baiduserp/helper.rb +25 -2
- data/lib/baiduserp/migrations/005_add_subdomain_url_to_weights.rb +11 -0
- data/lib/baiduserp/parser/ads_right.rb +1 -1
- data/lib/baiduserp/parser/ads_top.rb +2 -2
- data/lib/baiduserp/result.rb +28 -16
- data/lib/baiduserp/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c4f925a3ac00d8faedd0ce3c596b957cd76f2323
|
4
|
+
data.tar.gz: 4d3f836b54ed03bfd70603cb82425f78e96c3cb1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: abf62c6dcb480558c39f58b8e4f7b27b7146ccfe2d380225d7541ac32def7575f37628b1df86ba0d73da574ecb040a758169736dc3ceddb29167c9ea83b89fa8
|
7
|
+
data.tar.gz: 5192e5a8175459b121f46413d3d6701b5ffba45f615e9dcfbee360af151daf82b9b68e4b930c99639eb465553f45e49aaba3d4d73129d2452263c178d4cfeac0
|
data/lib/baiduserp/analyser.rb
CHANGED
@@ -81,6 +81,8 @@ module Baiduserp
|
|
81
81
|
type = w[:type]
|
82
82
|
name = w[:name]
|
83
83
|
site = w[:site]
|
84
|
+
subdomain = w[:subdomain]
|
85
|
+
url = w[:url]
|
84
86
|
side_rank = w[:side_rank]
|
85
87
|
weight = w[:weight]
|
86
88
|
side = w[:side]
|
@@ -91,6 +93,8 @@ module Baiduserp
|
|
91
93
|
r.type = type
|
92
94
|
r.name = name
|
93
95
|
r.site = site
|
96
|
+
r.subdomain = subdomain
|
97
|
+
r.url = url
|
94
98
|
r.weight = weight
|
95
99
|
r.part = part
|
96
100
|
r.normalized_weight = normalized_weight
|
data/lib/baiduserp/helper.rb
CHANGED
@@ -33,8 +33,31 @@ module Baiduserp
|
|
33
33
|
end
|
34
34
|
|
35
35
|
def parse_site(url)
|
36
|
-
|
37
|
-
|
36
|
+
begin
|
37
|
+
url = Domainatrix.parse(url.to_s)
|
38
|
+
site = url.domain + '.' + url.public_suffix
|
39
|
+
rescue Exception => e
|
40
|
+
puts "parse_site from url error:"
|
41
|
+
puts url
|
42
|
+
puts e.class
|
43
|
+
puts e.message
|
44
|
+
site = ''
|
45
|
+
end
|
46
|
+
site
|
47
|
+
end
|
48
|
+
|
49
|
+
def parse_subdomain(url)
|
50
|
+
begin
|
51
|
+
url = Domainatrix.parse(url.to_s)
|
52
|
+
subdomain = url.subdomain
|
53
|
+
rescue Exception => e
|
54
|
+
puts "parse_site from url error:"
|
55
|
+
puts url
|
56
|
+
puts e.class
|
57
|
+
puts e.message
|
58
|
+
subdomain = ''
|
59
|
+
end
|
60
|
+
subdomain
|
38
61
|
end
|
39
62
|
|
40
63
|
end
|
@@ -10,7 +10,7 @@ class Baiduserp::Parser
|
|
10
10
|
|
11
11
|
r[:content] = Baiduserp::Helper.get_content_safe(div.search('a.EC_desc/font'))
|
12
12
|
|
13
|
-
r[:
|
13
|
+
r[:url] = Baiduserp::Helper.get_content_safe(div.search('font.EC_url'))
|
14
14
|
|
15
15
|
result << r
|
16
16
|
end
|
@@ -19,7 +19,7 @@ class Baiduserp::Parser
|
|
19
19
|
|
20
20
|
r[:content] = Baiduserp::Helper.get_content_safe(div.search('div.ec_desc'))
|
21
21
|
|
22
|
-
r[:
|
22
|
+
r[:url] = Baiduserp::Helper.get_content_safe(div.search('span.ec_url'))
|
23
23
|
|
24
24
|
result << r
|
25
25
|
|
@@ -30,7 +30,7 @@ class Baiduserp::Parser
|
|
30
30
|
|
31
31
|
r[:content] = Baiduserp::Helper.get_content_safe(div.search('a.EC_desc'))
|
32
32
|
|
33
|
-
r[:
|
33
|
+
r[:url] = Baiduserp::Helper.get_content_safe(div.search('a.EC_url'))
|
34
34
|
|
35
35
|
result << r
|
36
36
|
|
data/lib/baiduserp/result.rb
CHANGED
@@ -56,13 +56,21 @@ module Baiduserp
|
|
56
56
|
self[:ranks].each do |rank|
|
57
57
|
side_rank += 1
|
58
58
|
|
59
|
+
url = rank[:url].to_s
|
60
|
+
mu = rank[:mu].to_s
|
61
|
+
|
59
62
|
type = 'SEO'
|
60
63
|
type = 'Special' if rank[:baiduopen]
|
61
|
-
type = 'Special' unless rank[:mu].to_s.empty?
|
62
64
|
|
63
|
-
|
65
|
+
unless mu.empty?
|
66
|
+
url = mu
|
67
|
+
type = 'Special'
|
68
|
+
end
|
64
69
|
|
65
|
-
site = Baiduserp::Helper.parse_site(
|
70
|
+
site = Baiduserp::Helper.parse_site(url)
|
71
|
+
subdomain = Baiduserp::Helper.parse_subdomain(url)
|
72
|
+
|
73
|
+
name = rank[:tpl].to_s
|
66
74
|
|
67
75
|
weight = 1.0/side_rank.to_f
|
68
76
|
if type == 'Special'
|
@@ -73,25 +81,23 @@ module Baiduserp
|
|
73
81
|
end
|
74
82
|
end
|
75
83
|
|
76
|
-
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
84
|
+
result << {type: type, name: name, site: site, subdomain: subdomain, url: url, mu: mu, side_rank: side_rank, weight: weight}
|
77
85
|
end
|
78
86
|
[result, side_rank]
|
79
87
|
end
|
80
88
|
|
81
|
-
# def weight_of_pinpaizhuanqu(side_rank)
|
82
|
-
# [[{ type: 'Special', name: 'pinpaizhuanqu', site: '', side_rank: side_rank, weight: 1.0}], (side_rank + 1)]
|
83
|
-
# end
|
84
|
-
|
85
89
|
def weight_of_ads_top(side_rank)
|
86
90
|
result = []
|
87
91
|
self[:ads_top].each do |ad|
|
88
92
|
side_rank += 1
|
89
93
|
|
94
|
+
url = ad[:url].to_s
|
90
95
|
type = 'SEM'
|
91
96
|
name = ''
|
92
|
-
site = Baiduserp::Helper.parse_site(
|
97
|
+
site = Baiduserp::Helper.parse_site(url)
|
98
|
+
subdomain = Baiduserp::Helper.parse_subdomain(url)
|
93
99
|
weight = 1.0/side_rank.to_f
|
94
|
-
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
100
|
+
result << {type: type, name: name, site: site, subdomain: subdomain, url: url, side_rank: side_rank, weight: weight}
|
95
101
|
end
|
96
102
|
[result, side_rank]
|
97
103
|
end
|
@@ -101,11 +107,13 @@ module Baiduserp
|
|
101
107
|
self[:ads_right].each do |ad|
|
102
108
|
side_rank += 1
|
103
109
|
|
110
|
+
url = ad[:url].to_s
|
104
111
|
type = 'SEM'
|
105
112
|
name = ''
|
106
|
-
site = Baiduserp::Helper.parse_site(
|
113
|
+
site = Baiduserp::Helper.parse_site(url)
|
114
|
+
subdomain = Baiduserp::Helper.parse_subdomain(url)
|
107
115
|
weight = 1.0/side_rank.to_f
|
108
|
-
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
116
|
+
result << {type: type, name: name, site: site, subdomain: subdomain, url: url, side_rank: side_rank, weight: weight}
|
109
117
|
end
|
110
118
|
[result, side_rank]
|
111
119
|
end
|
@@ -115,11 +123,13 @@ module Baiduserp
|
|
115
123
|
self[:con_ar].each do |con|
|
116
124
|
side_rank += 1
|
117
125
|
|
126
|
+
url = con[:data_click]['mu'].to_s
|
118
127
|
type = 'Special'
|
119
128
|
name = con[:tpl]
|
120
|
-
site = Baiduserp::Helper.parse_site(
|
129
|
+
site = Baiduserp::Helper.parse_site(url)
|
130
|
+
subdomain = Baiduserp::Helper.parse_subdomain(url)
|
121
131
|
weight = 1.0 * @config[:con_ar_weight]
|
122
|
-
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
132
|
+
result << {type: type, name: name, site: site, subdomain: subdomain, url: url, side_rank: side_rank, weight: weight}
|
123
133
|
end
|
124
134
|
[result, side_rank]
|
125
135
|
end
|
@@ -129,11 +139,13 @@ module Baiduserp
|
|
129
139
|
self[:zhixin].each do |zhixin|
|
130
140
|
side_rank += 1
|
131
141
|
|
142
|
+
url = zhixin[:mu].to_s
|
132
143
|
type = 'Special'
|
133
144
|
name = zhixin[:tpl]
|
134
|
-
site = Baiduserp::Helper.parse_site(
|
145
|
+
site = Baiduserp::Helper.parse_site(url)
|
146
|
+
subdomain = Baiduserp::Helper.parse_subdomain(url)
|
135
147
|
weight = 1.0 * @config[:zhixin_weight]
|
136
|
-
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
148
|
+
result << {type: type, name: name, site: site, subdomain: subdomain, url: url, side_rank: side_rank, weight: weight}
|
137
149
|
end
|
138
150
|
[result, side_rank]
|
139
151
|
end
|
data/lib/baiduserp/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: baiduserp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.3.
|
4
|
+
version: 2.3.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MingQian Zhang
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -95,6 +95,7 @@ files:
|
|
95
95
|
- lib/baiduserp/migrations/002_create_htmls_table.rb
|
96
96
|
- lib/baiduserp/migrations/003_create_serps_table.rb
|
97
97
|
- lib/baiduserp/migrations/004_create_weights_table.rb
|
98
|
+
- lib/baiduserp/migrations/005_add_subdomain_url_to_weights.rb
|
98
99
|
- lib/baiduserp/parser/ads_right.rb
|
99
100
|
- lib/baiduserp/parser/ads_top.rb
|
100
101
|
- lib/baiduserp/parser/con_ar.rb
|