baiduserp 2.2.9 → 2.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/baiduserp/analyser.rb +46 -4
- data/lib/baiduserp/helper.rb +21 -0
- data/lib/baiduserp/{analyser-migrations → migrations}/001_create_keywords_table.rb +0 -0
- data/lib/baiduserp/{analyser-migrations → migrations}/002_create_htmls_table.rb +0 -0
- data/lib/baiduserp/{analyser-migrations → migrations}/003_create_serps_table.rb +0 -0
- data/lib/baiduserp/migrations/004_create_weights_table.rb +23 -0
- data/lib/baiduserp/result.rb +139 -2
- data/lib/baiduserp/version.rb +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fefc5d8f8028b48f1557fdc0bacc9ea655fb5140
|
4
|
+
data.tar.gz: b078c04a225b413fb1e6f70d01d89c63b285c930
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e56e95afb223c96499f99ee38a5bb209954361944ebfea43550a93a802c416ad7198b8a672b01d4ff99accc0c1f4591de69c0db58013f4691d967e2cd21ad77
|
7
|
+
data.tar.gz: 1f476da432c0efca2536001b79e5cdd88748da2fd0bc4d7ace4aff97c39fdf79009dfd161f80292b5af45014ca42c5a168e022be0d0d20060681455e15bbbdf9
|
data/lib/baiduserp/analyser.rb
CHANGED
@@ -27,6 +27,10 @@ module Baiduserp
|
|
27
27
|
@serps = Class.new(Sequel::Model) do
|
28
28
|
set_dataset :serps
|
29
29
|
end
|
30
|
+
|
31
|
+
@weights = Class.new(Sequel::Model) do
|
32
|
+
set_dataset :weights
|
33
|
+
end
|
30
34
|
|
31
35
|
import_keywords unless @keywords_imported
|
32
36
|
end
|
@@ -37,12 +41,15 @@ module Baiduserp
|
|
37
41
|
|
38
42
|
def migrate!
|
39
43
|
Sequel.extension :migration, :core_extensions
|
40
|
-
Sequel::Migrator.apply(@db, File.expand_path('../
|
44
|
+
Sequel::Migrator.apply(@db, File.expand_path('../migrations/',__FILE__))
|
41
45
|
end
|
42
46
|
|
43
47
|
def import_keywords(file=@attrs[:keywords])
|
44
48
|
CSV.foreach(file) do |l|
|
45
|
-
@keywords.
|
49
|
+
@keywords.find_or_create(:term => l[0]) do |r|
|
50
|
+
r.weight = l[1]
|
51
|
+
r.category = l[2]
|
52
|
+
end
|
46
53
|
end
|
47
54
|
end
|
48
55
|
|
@@ -52,8 +59,43 @@ module Baiduserp
|
|
52
59
|
puts k.to_hash
|
53
60
|
html = Baiduserp.get_search_html(k[:term])
|
54
61
|
serp = Baiduserp.parse(html)
|
55
|
-
@htmls.
|
56
|
-
@serps.
|
62
|
+
@htmls.find_or_create(:keyword_id => k[:id], :date => date) {|r| r.content = html}
|
63
|
+
@serps.find_or_create(:keyword_id => k[:id], :date => date) {|r| r.content = YAML.dump(serp)}
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def generate_serps(date=Date.today)
|
68
|
+
@htmls.where(:date => date).each do |html|
|
69
|
+
keyword_id = html[:keyword_id]
|
70
|
+
html = html[:content]
|
71
|
+
@serps.find_or_create(:date => date, :keyword_id => keyword_id) {|r| r.content = YAML.dump(Baiduserp.parse(html))}
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def generate_weights(date=Date.today)
|
76
|
+
@serps.where(:date => date).each do |s|
|
77
|
+
keyword_id = s[:keyword_id]
|
78
|
+
serp = YAML.load(s[:content])
|
79
|
+
|
80
|
+
serp.weights.each do |w|
|
81
|
+
type = w[:type]
|
82
|
+
name = w[:name]
|
83
|
+
site = w[:site]
|
84
|
+
side_rank = w[:side_rank]
|
85
|
+
weight = w[:weight]
|
86
|
+
side = w[:side]
|
87
|
+
part = w[:part]
|
88
|
+
normalized_weight = w[:normalized_weight]
|
89
|
+
|
90
|
+
@weights.find_or_create(:date => date, :keyword_id => keyword_id, :side => side, :side_rank => side_rank) do |r|
|
91
|
+
r.type = type
|
92
|
+
r.name = name
|
93
|
+
r.site = site
|
94
|
+
r.weight = weight
|
95
|
+
r.part = part
|
96
|
+
r.normalized_weight = normalized_weight
|
97
|
+
end
|
98
|
+
end
|
57
99
|
end
|
58
100
|
end
|
59
101
|
|
data/lib/baiduserp/helper.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'domainatrix'
|
2
|
+
|
1
3
|
module Baiduserp
|
2
4
|
module Helper
|
3
5
|
class << self
|
@@ -8,6 +10,8 @@ module Baiduserp
|
|
8
10
|
noko.first.content.strip
|
9
11
|
end
|
10
12
|
|
13
|
+
# parse data click value from baidu div property,
|
14
|
+
# which is a JSON like format
|
11
15
|
def parse_data_click(str)
|
12
16
|
JSON.parse(str
|
13
17
|
.gsub("'",'"')
|
@@ -16,6 +20,23 @@ module Baiduserp
|
|
16
20
|
#.gsub(/:'([^(',\")]*)'(,|})/,':"\1"\2')
|
17
21
|
)
|
18
22
|
end
|
23
|
+
|
24
|
+
# normalize weight of given data,
|
25
|
+
# the data must be a hash array structure.
|
26
|
+
# for example : [{a: 1, b: 2}, {a: 2, b: 3}]
|
27
|
+
def normalize(data,weight_col=:weight,normalized_col=:normalized_weight)
|
28
|
+
total_weight = data.reduce(0.0) {|sum,d| sum += d[weight_col].to_f}
|
29
|
+
data.each do|d|
|
30
|
+
d[normalized_col] = d[weight_col].to_f/total_weight
|
31
|
+
end
|
32
|
+
data
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_site(url)
|
36
|
+
url = Domainatrix.parse(url.to_s)
|
37
|
+
url.domain + '.' + url.public_suffix
|
38
|
+
end
|
39
|
+
|
19
40
|
end
|
20
41
|
end
|
21
42
|
end
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,23 @@
|
|
1
|
+
Sequel.migration do
|
2
|
+
up do
|
3
|
+
create_table :weights do
|
4
|
+
primary_key :id
|
5
|
+
foreign_key :keyword_id, :keywords
|
6
|
+
Date :date
|
7
|
+
String :side
|
8
|
+
String :part
|
9
|
+
String :type
|
10
|
+
String :name
|
11
|
+
String :site
|
12
|
+
Integer :side_rank
|
13
|
+
Float :weight
|
14
|
+
Float :normalized_weight
|
15
|
+
|
16
|
+
index [:date, :keyword_id, :side, :side_rank]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
down do
|
21
|
+
drop_table :weights
|
22
|
+
end
|
23
|
+
end
|
data/lib/baiduserp/result.rb
CHANGED
@@ -1,7 +1,143 @@
|
|
1
|
-
require 'domainatrix'
|
2
|
-
|
3
1
|
module Baiduserp
|
4
2
|
class Result < Hash
|
3
|
+
def initialize(*args)
|
4
|
+
@config = {
|
5
|
+
:left_parts => [:ads_top,
|
6
|
+
:zhixin,
|
7
|
+
:ranks
|
8
|
+
],
|
9
|
+
|
10
|
+
:right_parts => [:con_ar,
|
11
|
+
:ads_right
|
12
|
+
],
|
13
|
+
|
14
|
+
:left_part_weight => 8,
|
15
|
+
|
16
|
+
:right_part_weight => 2,
|
17
|
+
|
18
|
+
:zhixin_weight => 3.5,
|
19
|
+
|
20
|
+
:baiduopen_weight => 3,
|
21
|
+
|
22
|
+
:rank_special_weight => 2,
|
23
|
+
|
24
|
+
:con_ar_weight => 2
|
25
|
+
}
|
26
|
+
|
27
|
+
super
|
28
|
+
end
|
29
|
+
|
30
|
+
def weights
|
31
|
+
result = []
|
32
|
+
[:left,:right].each do |side|
|
33
|
+
side_rank = 0
|
34
|
+
|
35
|
+
@config["#{side}_parts".to_sym].each do |part|
|
36
|
+
rs,side_rank = self.send("weight_of_#{part}",side_rank)
|
37
|
+
|
38
|
+
rs.each do |r|
|
39
|
+
r[:side] = side.to_s
|
40
|
+
r[:part] = part
|
41
|
+
|
42
|
+
r[:weight] = r[:weight].to_f * @config["#{side}_part_weight".to_sym].to_f
|
43
|
+
result << r
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
Baiduserp::Helper.normalize(result)
|
48
|
+
end
|
49
|
+
|
50
|
+
# weight_of_*** functions
|
51
|
+
# return a hash array
|
52
|
+
# each hash includes: type, name, site, weight
|
53
|
+
|
54
|
+
def weight_of_ranks(side_rank)
|
55
|
+
result = []
|
56
|
+
self[:ranks].each do |rank|
|
57
|
+
side_rank += 1
|
58
|
+
|
59
|
+
type = 'SEO'
|
60
|
+
type = 'Special' if rank[:baiduopen]
|
61
|
+
type = 'Special' unless rank[:mu].to_s.empty?
|
62
|
+
|
63
|
+
name = rank[:tpl].to_s
|
64
|
+
|
65
|
+
site = Baiduserp::Helper.parse_site(rank[:url])
|
66
|
+
|
67
|
+
weight = 1.0/side_rank.to_f
|
68
|
+
if type == 'Special'
|
69
|
+
if rank[:baiduopen]
|
70
|
+
weight = weight * @config[:baiduopen_weight].to_f
|
71
|
+
else
|
72
|
+
weight = weight * @config[:rank_special_weight].to_f
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
77
|
+
end
|
78
|
+
[result, side_rank]
|
79
|
+
end
|
80
|
+
|
81
|
+
# def weight_of_pinpaizhuanqu(side_rank)
|
82
|
+
# [[{ type: 'Special', name: 'pinpaizhuanqu', site: '', side_rank: side_rank, weight: 1.0}], (side_rank + 1)]
|
83
|
+
# end
|
84
|
+
|
85
|
+
def weight_of_ads_top(side_rank)
|
86
|
+
result = []
|
87
|
+
self[:ads_top].each do |ad|
|
88
|
+
side_rank += 1
|
89
|
+
|
90
|
+
type = 'SEM'
|
91
|
+
name = ''
|
92
|
+
site = Baiduserp::Helper.parse_site(ad[:site])
|
93
|
+
weight = 1.0/side_rank.to_f
|
94
|
+
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
95
|
+
end
|
96
|
+
[result, side_rank]
|
97
|
+
end
|
98
|
+
|
99
|
+
def weight_of_ads_right(side_rank)
|
100
|
+
result = []
|
101
|
+
self[:ads_right].each do |ad|
|
102
|
+
side_rank += 1
|
103
|
+
|
104
|
+
type = 'SEM'
|
105
|
+
name = ''
|
106
|
+
site = Baiduserp::Helper.parse_site(ad[:site])
|
107
|
+
weight = 1.0/side_rank.to_f
|
108
|
+
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
109
|
+
end
|
110
|
+
[result, side_rank]
|
111
|
+
end
|
112
|
+
|
113
|
+
def weight_of_con_ar(side_rank)
|
114
|
+
result = []
|
115
|
+
self[:con_ar].each do |con|
|
116
|
+
side_rank += 1
|
117
|
+
|
118
|
+
type = 'Special'
|
119
|
+
name = con[:tpl]
|
120
|
+
site = Baiduserp::Helper.parse_site(con[:data_click]['mu'])
|
121
|
+
weight = 1.0 * @config[:con_ar_weight]
|
122
|
+
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
123
|
+
end
|
124
|
+
[result, side_rank]
|
125
|
+
end
|
126
|
+
|
127
|
+
def weight_of_zhixin(side_rank)
|
128
|
+
result = []
|
129
|
+
self[:zhixin].each do |zhixin|
|
130
|
+
side_rank += 1
|
131
|
+
|
132
|
+
type = 'Special'
|
133
|
+
name = zhixin[:tpl]
|
134
|
+
site = Baiduserp::Helper.parse_site(zhixin[:mu])
|
135
|
+
weight = 1.0 * @config[:zhixin_weight]
|
136
|
+
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
137
|
+
end
|
138
|
+
[result, side_rank]
|
139
|
+
end
|
140
|
+
|
5
141
|
def seo_urls
|
6
142
|
self[:ranks].reduce([]) {|result,rank| result << rank[:url]}
|
7
143
|
end
|
@@ -29,5 +165,6 @@ module Baiduserp
|
|
29
165
|
def sem_sites
|
30
166
|
sem_urls
|
31
167
|
end
|
168
|
+
|
32
169
|
end
|
33
170
|
end
|
data/lib/baiduserp/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: baiduserp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MingQian Zhang
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -88,12 +88,13 @@ executables:
|
|
88
88
|
extensions: []
|
89
89
|
extra_rdoc_files: []
|
90
90
|
files:
|
91
|
-
- lib/baiduserp/analyser-migrations/001_create_keywords_table.rb
|
92
|
-
- lib/baiduserp/analyser-migrations/002_create_htmls_table.rb
|
93
|
-
- lib/baiduserp/analyser-migrations/003_create_serps_table.rb
|
94
91
|
- lib/baiduserp/analyser.rb
|
95
92
|
- lib/baiduserp/client.rb
|
96
93
|
- lib/baiduserp/helper.rb
|
94
|
+
- lib/baiduserp/migrations/001_create_keywords_table.rb
|
95
|
+
- lib/baiduserp/migrations/002_create_htmls_table.rb
|
96
|
+
- lib/baiduserp/migrations/003_create_serps_table.rb
|
97
|
+
- lib/baiduserp/migrations/004_create_weights_table.rb
|
97
98
|
- lib/baiduserp/parser/ads_right.rb
|
98
99
|
- lib/baiduserp/parser/ads_top.rb
|
99
100
|
- lib/baiduserp/parser/con_ar.rb
|