baiduserp 2.2.9 → 2.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/baiduserp/analyser.rb +46 -4
- data/lib/baiduserp/helper.rb +21 -0
- data/lib/baiduserp/{analyser-migrations → migrations}/001_create_keywords_table.rb +0 -0
- data/lib/baiduserp/{analyser-migrations → migrations}/002_create_htmls_table.rb +0 -0
- data/lib/baiduserp/{analyser-migrations → migrations}/003_create_serps_table.rb +0 -0
- data/lib/baiduserp/migrations/004_create_weights_table.rb +23 -0
- data/lib/baiduserp/result.rb +139 -2
- data/lib/baiduserp/version.rb +1 -1
- metadata +6 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fefc5d8f8028b48f1557fdc0bacc9ea655fb5140
|
4
|
+
data.tar.gz: b078c04a225b413fb1e6f70d01d89c63b285c930
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9e56e95afb223c96499f99ee38a5bb209954361944ebfea43550a93a802c416ad7198b8a672b01d4ff99accc0c1f4591de69c0db58013f4691d967e2cd21ad77
|
7
|
+
data.tar.gz: 1f476da432c0efca2536001b79e5cdd88748da2fd0bc4d7ace4aff97c39fdf79009dfd161f80292b5af45014ca42c5a168e022be0d0d20060681455e15bbbdf9
|
data/lib/baiduserp/analyser.rb
CHANGED
@@ -27,6 +27,10 @@ module Baiduserp
|
|
27
27
|
@serps = Class.new(Sequel::Model) do
|
28
28
|
set_dataset :serps
|
29
29
|
end
|
30
|
+
|
31
|
+
@weights = Class.new(Sequel::Model) do
|
32
|
+
set_dataset :weights
|
33
|
+
end
|
30
34
|
|
31
35
|
import_keywords unless @keywords_imported
|
32
36
|
end
|
@@ -37,12 +41,15 @@ module Baiduserp
|
|
37
41
|
|
38
42
|
def migrate!
|
39
43
|
Sequel.extension :migration, :core_extensions
|
40
|
-
Sequel::Migrator.apply(@db, File.expand_path('../
|
44
|
+
Sequel::Migrator.apply(@db, File.expand_path('../migrations/',__FILE__))
|
41
45
|
end
|
42
46
|
|
43
47
|
def import_keywords(file=@attrs[:keywords])
|
44
48
|
CSV.foreach(file) do |l|
|
45
|
-
@keywords.
|
49
|
+
@keywords.find_or_create(:term => l[0]) do |r|
|
50
|
+
r.weight = l[1]
|
51
|
+
r.category = l[2]
|
52
|
+
end
|
46
53
|
end
|
47
54
|
end
|
48
55
|
|
@@ -52,8 +59,43 @@ module Baiduserp
|
|
52
59
|
puts k.to_hash
|
53
60
|
html = Baiduserp.get_search_html(k[:term])
|
54
61
|
serp = Baiduserp.parse(html)
|
55
|
-
@htmls.
|
56
|
-
@serps.
|
62
|
+
@htmls.find_or_create(:keyword_id => k[:id], :date => date) {|r| r.content = html}
|
63
|
+
@serps.find_or_create(:keyword_id => k[:id], :date => date) {|r| r.content = YAML.dump(serp)}
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
def generate_serps(date=Date.today)
|
68
|
+
@htmls.where(:date => date).each do |html|
|
69
|
+
keyword_id = html[:keyword_id]
|
70
|
+
html = html[:content]
|
71
|
+
@serps.find_or_create(:date => date, :keyword_id => keyword_id) {|r| r.content = YAML.dump(Baiduserp.parse(html))}
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def generate_weights(date=Date.today)
|
76
|
+
@serps.where(:date => date).each do |s|
|
77
|
+
keyword_id = s[:keyword_id]
|
78
|
+
serp = YAML.load(s[:content])
|
79
|
+
|
80
|
+
serp.weights.each do |w|
|
81
|
+
type = w[:type]
|
82
|
+
name = w[:name]
|
83
|
+
site = w[:site]
|
84
|
+
side_rank = w[:side_rank]
|
85
|
+
weight = w[:weight]
|
86
|
+
side = w[:side]
|
87
|
+
part = w[:part]
|
88
|
+
normalized_weight = w[:normalized_weight]
|
89
|
+
|
90
|
+
@weights.find_or_create(:date => date, :keyword_id => keyword_id, :side => side, :side_rank => side_rank) do |r|
|
91
|
+
r.type = type
|
92
|
+
r.name = name
|
93
|
+
r.site = site
|
94
|
+
r.weight = weight
|
95
|
+
r.part = part
|
96
|
+
r.normalized_weight = normalized_weight
|
97
|
+
end
|
98
|
+
end
|
57
99
|
end
|
58
100
|
end
|
59
101
|
|
data/lib/baiduserp/helper.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'domainatrix'
|
2
|
+
|
1
3
|
module Baiduserp
|
2
4
|
module Helper
|
3
5
|
class << self
|
@@ -8,6 +10,8 @@ module Baiduserp
|
|
8
10
|
noko.first.content.strip
|
9
11
|
end
|
10
12
|
|
13
|
+
# parse data click value from baidu div property,
|
14
|
+
# which is a JSON like format
|
11
15
|
def parse_data_click(str)
|
12
16
|
JSON.parse(str
|
13
17
|
.gsub("'",'"')
|
@@ -16,6 +20,23 @@ module Baiduserp
|
|
16
20
|
#.gsub(/:'([^(',\")]*)'(,|})/,':"\1"\2')
|
17
21
|
)
|
18
22
|
end
|
23
|
+
|
24
|
+
# normalize weight of given data,
|
25
|
+
# the data must be a hash array structure.
|
26
|
+
# for example : [{a: 1, b: 2}, {a: 2, b: 3}]
|
27
|
+
def normalize(data,weight_col=:weight,normalized_col=:normalized_weight)
|
28
|
+
total_weight = data.reduce(0.0) {|sum,d| sum += d[weight_col].to_f}
|
29
|
+
data.each do|d|
|
30
|
+
d[normalized_col] = d[weight_col].to_f/total_weight
|
31
|
+
end
|
32
|
+
data
|
33
|
+
end
|
34
|
+
|
35
|
+
def parse_site(url)
|
36
|
+
url = Domainatrix.parse(url.to_s)
|
37
|
+
url.domain + '.' + url.public_suffix
|
38
|
+
end
|
39
|
+
|
19
40
|
end
|
20
41
|
end
|
21
42
|
end
|
File without changes
|
File without changes
|
File without changes
|
@@ -0,0 +1,23 @@
|
|
1
|
+
Sequel.migration do
|
2
|
+
up do
|
3
|
+
create_table :weights do
|
4
|
+
primary_key :id
|
5
|
+
foreign_key :keyword_id, :keywords
|
6
|
+
Date :date
|
7
|
+
String :side
|
8
|
+
String :part
|
9
|
+
String :type
|
10
|
+
String :name
|
11
|
+
String :site
|
12
|
+
Integer :side_rank
|
13
|
+
Float :weight
|
14
|
+
Float :normalized_weight
|
15
|
+
|
16
|
+
index [:date, :keyword_id, :side, :side_rank]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
down do
|
21
|
+
drop_table :weights
|
22
|
+
end
|
23
|
+
end
|
data/lib/baiduserp/result.rb
CHANGED
@@ -1,7 +1,143 @@
|
|
1
|
-
require 'domainatrix'
|
2
|
-
|
3
1
|
module Baiduserp
|
4
2
|
class Result < Hash
|
3
|
+
def initialize(*args)
|
4
|
+
@config = {
|
5
|
+
:left_parts => [:ads_top,
|
6
|
+
:zhixin,
|
7
|
+
:ranks
|
8
|
+
],
|
9
|
+
|
10
|
+
:right_parts => [:con_ar,
|
11
|
+
:ads_right
|
12
|
+
],
|
13
|
+
|
14
|
+
:left_part_weight => 8,
|
15
|
+
|
16
|
+
:right_part_weight => 2,
|
17
|
+
|
18
|
+
:zhixin_weight => 3.5,
|
19
|
+
|
20
|
+
:baiduopen_weight => 3,
|
21
|
+
|
22
|
+
:rank_special_weight => 2,
|
23
|
+
|
24
|
+
:con_ar_weight => 2
|
25
|
+
}
|
26
|
+
|
27
|
+
super
|
28
|
+
end
|
29
|
+
|
30
|
+
def weights
|
31
|
+
result = []
|
32
|
+
[:left,:right].each do |side|
|
33
|
+
side_rank = 0
|
34
|
+
|
35
|
+
@config["#{side}_parts".to_sym].each do |part|
|
36
|
+
rs,side_rank = self.send("weight_of_#{part}",side_rank)
|
37
|
+
|
38
|
+
rs.each do |r|
|
39
|
+
r[:side] = side.to_s
|
40
|
+
r[:part] = part
|
41
|
+
|
42
|
+
r[:weight] = r[:weight].to_f * @config["#{side}_part_weight".to_sym].to_f
|
43
|
+
result << r
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
Baiduserp::Helper.normalize(result)
|
48
|
+
end
|
49
|
+
|
50
|
+
# weight_of_*** functions
|
51
|
+
# return a hash array
|
52
|
+
# each hash includes: type, name, site, weight
|
53
|
+
|
54
|
+
def weight_of_ranks(side_rank)
|
55
|
+
result = []
|
56
|
+
self[:ranks].each do |rank|
|
57
|
+
side_rank += 1
|
58
|
+
|
59
|
+
type = 'SEO'
|
60
|
+
type = 'Special' if rank[:baiduopen]
|
61
|
+
type = 'Special' unless rank[:mu].to_s.empty?
|
62
|
+
|
63
|
+
name = rank[:tpl].to_s
|
64
|
+
|
65
|
+
site = Baiduserp::Helper.parse_site(rank[:url])
|
66
|
+
|
67
|
+
weight = 1.0/side_rank.to_f
|
68
|
+
if type == 'Special'
|
69
|
+
if rank[:baiduopen]
|
70
|
+
weight = weight * @config[:baiduopen_weight].to_f
|
71
|
+
else
|
72
|
+
weight = weight * @config[:rank_special_weight].to_f
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
77
|
+
end
|
78
|
+
[result, side_rank]
|
79
|
+
end
|
80
|
+
|
81
|
+
# def weight_of_pinpaizhuanqu(side_rank)
|
82
|
+
# [[{ type: 'Special', name: 'pinpaizhuanqu', site: '', side_rank: side_rank, weight: 1.0}], (side_rank + 1)]
|
83
|
+
# end
|
84
|
+
|
85
|
+
def weight_of_ads_top(side_rank)
|
86
|
+
result = []
|
87
|
+
self[:ads_top].each do |ad|
|
88
|
+
side_rank += 1
|
89
|
+
|
90
|
+
type = 'SEM'
|
91
|
+
name = ''
|
92
|
+
site = Baiduserp::Helper.parse_site(ad[:site])
|
93
|
+
weight = 1.0/side_rank.to_f
|
94
|
+
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
95
|
+
end
|
96
|
+
[result, side_rank]
|
97
|
+
end
|
98
|
+
|
99
|
+
def weight_of_ads_right(side_rank)
|
100
|
+
result = []
|
101
|
+
self[:ads_right].each do |ad|
|
102
|
+
side_rank += 1
|
103
|
+
|
104
|
+
type = 'SEM'
|
105
|
+
name = ''
|
106
|
+
site = Baiduserp::Helper.parse_site(ad[:site])
|
107
|
+
weight = 1.0/side_rank.to_f
|
108
|
+
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
109
|
+
end
|
110
|
+
[result, side_rank]
|
111
|
+
end
|
112
|
+
|
113
|
+
def weight_of_con_ar(side_rank)
|
114
|
+
result = []
|
115
|
+
self[:con_ar].each do |con|
|
116
|
+
side_rank += 1
|
117
|
+
|
118
|
+
type = 'Special'
|
119
|
+
name = con[:tpl]
|
120
|
+
site = Baiduserp::Helper.parse_site(con[:data_click]['mu'])
|
121
|
+
weight = 1.0 * @config[:con_ar_weight]
|
122
|
+
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
123
|
+
end
|
124
|
+
[result, side_rank]
|
125
|
+
end
|
126
|
+
|
127
|
+
def weight_of_zhixin(side_rank)
|
128
|
+
result = []
|
129
|
+
self[:zhixin].each do |zhixin|
|
130
|
+
side_rank += 1
|
131
|
+
|
132
|
+
type = 'Special'
|
133
|
+
name = zhixin[:tpl]
|
134
|
+
site = Baiduserp::Helper.parse_site(zhixin[:mu])
|
135
|
+
weight = 1.0 * @config[:zhixin_weight]
|
136
|
+
result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
|
137
|
+
end
|
138
|
+
[result, side_rank]
|
139
|
+
end
|
140
|
+
|
5
141
|
def seo_urls
|
6
142
|
self[:ranks].reduce([]) {|result,rank| result << rank[:url]}
|
7
143
|
end
|
@@ -29,5 +165,6 @@ module Baiduserp
|
|
29
165
|
def sem_sites
|
30
166
|
sem_urls
|
31
167
|
end
|
168
|
+
|
32
169
|
end
|
33
170
|
end
|
data/lib/baiduserp/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: baiduserp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MingQian Zhang
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -88,12 +88,13 @@ executables:
|
|
88
88
|
extensions: []
|
89
89
|
extra_rdoc_files: []
|
90
90
|
files:
|
91
|
-
- lib/baiduserp/analyser-migrations/001_create_keywords_table.rb
|
92
|
-
- lib/baiduserp/analyser-migrations/002_create_htmls_table.rb
|
93
|
-
- lib/baiduserp/analyser-migrations/003_create_serps_table.rb
|
94
91
|
- lib/baiduserp/analyser.rb
|
95
92
|
- lib/baiduserp/client.rb
|
96
93
|
- lib/baiduserp/helper.rb
|
94
|
+
- lib/baiduserp/migrations/001_create_keywords_table.rb
|
95
|
+
- lib/baiduserp/migrations/002_create_htmls_table.rb
|
96
|
+
- lib/baiduserp/migrations/003_create_serps_table.rb
|
97
|
+
- lib/baiduserp/migrations/004_create_weights_table.rb
|
97
98
|
- lib/baiduserp/parser/ads_right.rb
|
98
99
|
- lib/baiduserp/parser/ads_top.rb
|
99
100
|
- lib/baiduserp/parser/con_ar.rb
|