baiduserp 2.2.9 → 2.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1d1c2a61259d5e3134d0566e002ba551a791b06c
4
- data.tar.gz: 8b3f78ad2190a17a2f530019c0b0719bb9abfb93
3
+ metadata.gz: fefc5d8f8028b48f1557fdc0bacc9ea655fb5140
4
+ data.tar.gz: b078c04a225b413fb1e6f70d01d89c63b285c930
5
5
  SHA512:
6
- metadata.gz: 859d3c01765741cfe50eed92a05f5cbec93c68c003d13d2a2dad2e6b85e22d63d2171ded85fa3cd2271d7b830c522bfa95b3b27f91a269af621f811f23c3a87a
7
- data.tar.gz: 5847dd8a81f4c61a0f72d2d8a2e6cd91e4aa1e9eed32f3c495453e31fe006a5c8162f902ebf2e831adb16731114fcb6b7d4a4867ed8ce575aecc39e2feca14c4
6
+ metadata.gz: 9e56e95afb223c96499f99ee38a5bb209954361944ebfea43550a93a802c416ad7198b8a672b01d4ff99accc0c1f4591de69c0db58013f4691d967e2cd21ad77
7
+ data.tar.gz: 1f476da432c0efca2536001b79e5cdd88748da2fd0bc4d7ace4aff97c39fdf79009dfd161f80292b5af45014ca42c5a168e022be0d0d20060681455e15bbbdf9
@@ -27,6 +27,10 @@ module Baiduserp
27
27
  @serps = Class.new(Sequel::Model) do
28
28
  set_dataset :serps
29
29
  end
30
+
31
+ @weights = Class.new(Sequel::Model) do
32
+ set_dataset :weights
33
+ end
30
34
 
31
35
  import_keywords unless @keywords_imported
32
36
  end
@@ -37,12 +41,15 @@ module Baiduserp
37
41
 
38
42
  def migrate!
39
43
  Sequel.extension :migration, :core_extensions
40
- Sequel::Migrator.apply(@db, File.expand_path('../analyser-migrations/',__FILE__))
44
+ Sequel::Migrator.apply(@db, File.expand_path('../migrations/',__FILE__))
41
45
  end
42
46
 
43
47
  def import_keywords(file=@attrs[:keywords])
44
48
  CSV.foreach(file) do |l|
45
- @keywords.insert(:term => l[0], :weight => l[1], :category => l[2])
49
+ @keywords.find_or_create(:term => l[0]) do |r|
50
+ r.weight = l[1]
51
+ r.category = l[2]
52
+ end
46
53
  end
47
54
  end
48
55
 
@@ -52,8 +59,43 @@ module Baiduserp
52
59
  puts k.to_hash
53
60
  html = Baiduserp.get_search_html(k[:term])
54
61
  serp = Baiduserp.parse(html)
55
- @htmls.insert(:keyword_id => k[:id], :date => date, :content => html)
56
- @serps.insert(:keyword_id => k[:id], :date => date, :content => YAML.dump(serp))
62
+ @htmls.find_or_create(:keyword_id => k[:id], :date => date) {|r| r.content = html}
63
+ @serps.find_or_create(:keyword_id => k[:id], :date => date) {|r| r.content = YAML.dump(serp)}
64
+ end
65
+ end
66
+
67
+ def generate_serps(date=Date.today)
68
+ @htmls.where(:date => date).each do |html|
69
+ keyword_id = html[:keyword_id]
70
+ html = html[:content]
71
+ @serps.find_or_create(:date => date, :keyword_id => keyword_id) {|r| r.content = YAML.dump(Baiduserp.parse(html))}
72
+ end
73
+ end
74
+
75
+ def generate_weights(date=Date.today)
76
+ @serps.where(:date => date).each do |s|
77
+ keyword_id = s[:keyword_id]
78
+ serp = YAML.load(s[:content])
79
+
80
+ serp.weights.each do |w|
81
+ type = w[:type]
82
+ name = w[:name]
83
+ site = w[:site]
84
+ side_rank = w[:side_rank]
85
+ weight = w[:weight]
86
+ side = w[:side]
87
+ part = w[:part]
88
+ normalized_weight = w[:normalized_weight]
89
+
90
+ @weights.find_or_create(:date => date, :keyword_id => keyword_id, :side => side, :side_rank => side_rank) do |r|
91
+ r.type = type
92
+ r.name = name
93
+ r.site = site
94
+ r.weight = weight
95
+ r.part = part
96
+ r.normalized_weight = normalized_weight
97
+ end
98
+ end
57
99
  end
58
100
  end
59
101
 
@@ -1,3 +1,5 @@
1
+ require 'domainatrix'
2
+
1
3
  module Baiduserp
2
4
  module Helper
3
5
  class << self
@@ -8,6 +10,8 @@ module Baiduserp
8
10
  noko.first.content.strip
9
11
  end
10
12
 
13
+ # parse data click value from baidu div property,
14
+ # which is a JSON like format
11
15
  def parse_data_click(str)
12
16
  JSON.parse(str
13
17
  .gsub("'",'"')
@@ -16,6 +20,23 @@ module Baiduserp
16
20
  #.gsub(/:'([^(',\")]*)'(,|})/,':"\1"\2')
17
21
  )
18
22
  end
23
+
24
+ # normalize weight of given data,
25
+ # the data must be a hash array structure.
26
+ # for example : [{a: 1, b: 2}, {a: 2, b: 3}]
27
+ def normalize(data,weight_col=:weight,normalized_col=:normalized_weight)
28
+ total_weight = data.reduce(0.0) {|sum,d| sum += d[weight_col].to_f}
29
+ data.each do|d|
30
+ d[normalized_col] = d[weight_col].to_f/total_weight
31
+ end
32
+ data
33
+ end
34
+
35
+ def parse_site(url)
36
+ url = Domainatrix.parse(url.to_s)
37
+ url.domain + '.' + url.public_suffix
38
+ end
39
+
19
40
  end
20
41
  end
21
42
  end
@@ -0,0 +1,23 @@
1
+ Sequel.migration do
2
+ up do
3
+ create_table :weights do
4
+ primary_key :id
5
+ foreign_key :keyword_id, :keywords
6
+ Date :date
7
+ String :side
8
+ String :part
9
+ String :type
10
+ String :name
11
+ String :site
12
+ Integer :side_rank
13
+ Float :weight
14
+ Float :normalized_weight
15
+
16
+ index [:date, :keyword_id, :side, :side_rank]
17
+ end
18
+ end
19
+
20
+ down do
21
+ drop_table :weights
22
+ end
23
+ end
@@ -1,7 +1,143 @@
1
- require 'domainatrix'
2
-
3
1
  module Baiduserp
4
2
  class Result < Hash
3
+ def initialize(*args)
4
+ @config = {
5
+ :left_parts => [:ads_top,
6
+ :zhixin,
7
+ :ranks
8
+ ],
9
+
10
+ :right_parts => [:con_ar,
11
+ :ads_right
12
+ ],
13
+
14
+ :left_part_weight => 8,
15
+
16
+ :right_part_weight => 2,
17
+
18
+ :zhixin_weight => 3.5,
19
+
20
+ :baiduopen_weight => 3,
21
+
22
+ :rank_special_weight => 2,
23
+
24
+ :con_ar_weight => 2
25
+ }
26
+
27
+ super
28
+ end
29
+
30
+ def weights
31
+ result = []
32
+ [:left,:right].each do |side|
33
+ side_rank = 0
34
+
35
+ @config["#{side}_parts".to_sym].each do |part|
36
+ rs,side_rank = self.send("weight_of_#{part}",side_rank)
37
+
38
+ rs.each do |r|
39
+ r[:side] = side.to_s
40
+ r[:part] = part
41
+
42
+ r[:weight] = r[:weight].to_f * @config["#{side}_part_weight".to_sym].to_f
43
+ result << r
44
+ end
45
+ end
46
+ end
47
+ Baiduserp::Helper.normalize(result)
48
+ end
49
+
50
+ # weight_of_*** functions
51
+ # return a hash array
52
+ # each hash includes: type, name, site, weight
53
+
54
+ def weight_of_ranks(side_rank)
55
+ result = []
56
+ self[:ranks].each do |rank|
57
+ side_rank += 1
58
+
59
+ type = 'SEO'
60
+ type = 'Special' if rank[:baiduopen]
61
+ type = 'Special' unless rank[:mu].to_s.empty?
62
+
63
+ name = rank[:tpl].to_s
64
+
65
+ site = Baiduserp::Helper.parse_site(rank[:url])
66
+
67
+ weight = 1.0/side_rank.to_f
68
+ if type == 'Special'
69
+ if rank[:baiduopen]
70
+ weight = weight * @config[:baiduopen_weight].to_f
71
+ else
72
+ weight = weight * @config[:rank_special_weight].to_f
73
+ end
74
+ end
75
+
76
+ result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
77
+ end
78
+ [result, side_rank]
79
+ end
80
+
81
+ # def weight_of_pinpaizhuanqu(side_rank)
82
+ # [[{ type: 'Special', name: 'pinpaizhuanqu', site: '', side_rank: side_rank, weight: 1.0}], (side_rank + 1)]
83
+ # end
84
+
85
+ def weight_of_ads_top(side_rank)
86
+ result = []
87
+ self[:ads_top].each do |ad|
88
+ side_rank += 1
89
+
90
+ type = 'SEM'
91
+ name = ''
92
+ site = Baiduserp::Helper.parse_site(ad[:site])
93
+ weight = 1.0/side_rank.to_f
94
+ result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
95
+ end
96
+ [result, side_rank]
97
+ end
98
+
99
+ def weight_of_ads_right(side_rank)
100
+ result = []
101
+ self[:ads_right].each do |ad|
102
+ side_rank += 1
103
+
104
+ type = 'SEM'
105
+ name = ''
106
+ site = Baiduserp::Helper.parse_site(ad[:site])
107
+ weight = 1.0/side_rank.to_f
108
+ result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
109
+ end
110
+ [result, side_rank]
111
+ end
112
+
113
+ def weight_of_con_ar(side_rank)
114
+ result = []
115
+ self[:con_ar].each do |con|
116
+ side_rank += 1
117
+
118
+ type = 'Special'
119
+ name = con[:tpl]
120
+ site = Baiduserp::Helper.parse_site(con[:data_click]['mu'])
121
+ weight = 1.0 * @config[:con_ar_weight]
122
+ result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
123
+ end
124
+ [result, side_rank]
125
+ end
126
+
127
+ def weight_of_zhixin(side_rank)
128
+ result = []
129
+ self[:zhixin].each do |zhixin|
130
+ side_rank += 1
131
+
132
+ type = 'Special'
133
+ name = zhixin[:tpl]
134
+ site = Baiduserp::Helper.parse_site(zhixin[:mu])
135
+ weight = 1.0 * @config[:zhixin_weight]
136
+ result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
137
+ end
138
+ [result, side_rank]
139
+ end
140
+
5
141
  def seo_urls
6
142
  self[:ranks].reduce([]) {|result,rank| result << rank[:url]}
7
143
  end
@@ -29,5 +165,6 @@ module Baiduserp
29
165
  def sem_sites
30
166
  sem_urls
31
167
  end
168
+
32
169
  end
33
170
  end
@@ -1,3 +1,3 @@
1
1
  module Baiduserp
2
- VERSION = "2.2.9"
2
+ VERSION = "2.3.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baiduserp
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.9
4
+ version: 2.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - MingQian Zhang
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-02 00:00:00.000000000 Z
11
+ date: 2013-12-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -88,12 +88,13 @@ executables:
88
88
  extensions: []
89
89
  extra_rdoc_files: []
90
90
  files:
91
- - lib/baiduserp/analyser-migrations/001_create_keywords_table.rb
92
- - lib/baiduserp/analyser-migrations/002_create_htmls_table.rb
93
- - lib/baiduserp/analyser-migrations/003_create_serps_table.rb
94
91
  - lib/baiduserp/analyser.rb
95
92
  - lib/baiduserp/client.rb
96
93
  - lib/baiduserp/helper.rb
94
+ - lib/baiduserp/migrations/001_create_keywords_table.rb
95
+ - lib/baiduserp/migrations/002_create_htmls_table.rb
96
+ - lib/baiduserp/migrations/003_create_serps_table.rb
97
+ - lib/baiduserp/migrations/004_create_weights_table.rb
97
98
  - lib/baiduserp/parser/ads_right.rb
98
99
  - lib/baiduserp/parser/ads_top.rb
99
100
  - lib/baiduserp/parser/con_ar.rb