baiduserp 2.2.9 → 2.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1d1c2a61259d5e3134d0566e002ba551a791b06c
4
- data.tar.gz: 8b3f78ad2190a17a2f530019c0b0719bb9abfb93
3
+ metadata.gz: fefc5d8f8028b48f1557fdc0bacc9ea655fb5140
4
+ data.tar.gz: b078c04a225b413fb1e6f70d01d89c63b285c930
5
5
  SHA512:
6
- metadata.gz: 859d3c01765741cfe50eed92a05f5cbec93c68c003d13d2a2dad2e6b85e22d63d2171ded85fa3cd2271d7b830c522bfa95b3b27f91a269af621f811f23c3a87a
7
- data.tar.gz: 5847dd8a81f4c61a0f72d2d8a2e6cd91e4aa1e9eed32f3c495453e31fe006a5c8162f902ebf2e831adb16731114fcb6b7d4a4867ed8ce575aecc39e2feca14c4
6
+ metadata.gz: 9e56e95afb223c96499f99ee38a5bb209954361944ebfea43550a93a802c416ad7198b8a672b01d4ff99accc0c1f4591de69c0db58013f4691d967e2cd21ad77
7
+ data.tar.gz: 1f476da432c0efca2536001b79e5cdd88748da2fd0bc4d7ace4aff97c39fdf79009dfd161f80292b5af45014ca42c5a168e022be0d0d20060681455e15bbbdf9
@@ -27,6 +27,10 @@ module Baiduserp
27
27
  @serps = Class.new(Sequel::Model) do
28
28
  set_dataset :serps
29
29
  end
30
+
31
+ @weights = Class.new(Sequel::Model) do
32
+ set_dataset :weights
33
+ end
30
34
 
31
35
  import_keywords unless @keywords_imported
32
36
  end
@@ -37,12 +41,15 @@ module Baiduserp
37
41
 
38
42
  def migrate!
39
43
  Sequel.extension :migration, :core_extensions
40
- Sequel::Migrator.apply(@db, File.expand_path('../analyser-migrations/',__FILE__))
44
+ Sequel::Migrator.apply(@db, File.expand_path('../migrations/',__FILE__))
41
45
  end
42
46
 
43
47
  def import_keywords(file=@attrs[:keywords])
44
48
  CSV.foreach(file) do |l|
45
- @keywords.insert(:term => l[0], :weight => l[1], :category => l[2])
49
+ @keywords.find_or_create(:term => l[0]) do |r|
50
+ r.weight = l[1]
51
+ r.category = l[2]
52
+ end
46
53
  end
47
54
  end
48
55
 
@@ -52,8 +59,43 @@ module Baiduserp
52
59
  puts k.to_hash
53
60
  html = Baiduserp.get_search_html(k[:term])
54
61
  serp = Baiduserp.parse(html)
55
- @htmls.insert(:keyword_id => k[:id], :date => date, :content => html)
56
- @serps.insert(:keyword_id => k[:id], :date => date, :content => YAML.dump(serp))
62
+ @htmls.find_or_create(:keyword_id => k[:id], :date => date) {|r| r.content = html}
63
+ @serps.find_or_create(:keyword_id => k[:id], :date => date) {|r| r.content = YAML.dump(serp)}
64
+ end
65
+ end
66
+
67
+ def generate_serps(date=Date.today)
68
+ @htmls.where(:date => date).each do |html|
69
+ keyword_id = html[:keyword_id]
70
+ html = html[:content]
71
+ @serps.find_or_create(:date => date, :keyword_id => keyword_id) {|r| r.content = YAML.dump(Baiduserp.parse(html))}
72
+ end
73
+ end
74
+
75
+ def generate_weights(date=Date.today)
76
+ @serps.where(:date => date).each do |s|
77
+ keyword_id = s[:keyword_id]
78
+ serp = YAML.load(s[:content])
79
+
80
+ serp.weights.each do |w|
81
+ type = w[:type]
82
+ name = w[:name]
83
+ site = w[:site]
84
+ side_rank = w[:side_rank]
85
+ weight = w[:weight]
86
+ side = w[:side]
87
+ part = w[:part]
88
+ normalized_weight = w[:normalized_weight]
89
+
90
+ @weights.find_or_create(:date => date, :keyword_id => keyword_id, :side => side, :side_rank => side_rank) do |r|
91
+ r.type = type
92
+ r.name = name
93
+ r.site = site
94
+ r.weight = weight
95
+ r.part = part
96
+ r.normalized_weight = normalized_weight
97
+ end
98
+ end
57
99
  end
58
100
  end
59
101
 
@@ -1,3 +1,5 @@
1
+ require 'domainatrix'
2
+
1
3
  module Baiduserp
2
4
  module Helper
3
5
  class << self
@@ -8,6 +10,8 @@ module Baiduserp
8
10
  noko.first.content.strip
9
11
  end
10
12
 
13
+ # parse data click value from baidu div property,
14
+ # which is a JSON like format
11
15
  def parse_data_click(str)
12
16
  JSON.parse(str
13
17
  .gsub("'",'"')
@@ -16,6 +20,23 @@ module Baiduserp
16
20
  #.gsub(/:'([^(',\")]*)'(,|})/,':"\1"\2')
17
21
  )
18
22
  end
23
+
24
+ # normalize weight of given data,
25
+ # the data must be a hash array structure.
26
+ # for example : [{a: 1, b: 2}, {a: 2, b: 3}]
27
+ def normalize(data,weight_col=:weight,normalized_col=:normalized_weight)
28
+ total_weight = data.reduce(0.0) {|sum,d| sum += d[weight_col].to_f}
29
+ data.each do|d|
30
+ d[normalized_col] = d[weight_col].to_f/total_weight
31
+ end
32
+ data
33
+ end
34
+
35
+ def parse_site(url)
36
+ url = Domainatrix.parse(url.to_s)
37
+ url.domain + '.' + url.public_suffix
38
+ end
39
+
19
40
  end
20
41
  end
21
42
  end
@@ -0,0 +1,23 @@
1
+ Sequel.migration do
2
+ up do
3
+ create_table :weights do
4
+ primary_key :id
5
+ foreign_key :keyword_id, :keywords
6
+ Date :date
7
+ String :side
8
+ String :part
9
+ String :type
10
+ String :name
11
+ String :site
12
+ Integer :side_rank
13
+ Float :weight
14
+ Float :normalized_weight
15
+
16
+ index [:date, :keyword_id, :side, :side_rank]
17
+ end
18
+ end
19
+
20
+ down do
21
+ drop_table :weights
22
+ end
23
+ end
@@ -1,7 +1,143 @@
1
- require 'domainatrix'
2
-
3
1
  module Baiduserp
4
2
  class Result < Hash
3
+ def initialize(*args)
4
+ @config = {
5
+ :left_parts => [:ads_top,
6
+ :zhixin,
7
+ :ranks
8
+ ],
9
+
10
+ :right_parts => [:con_ar,
11
+ :ads_right
12
+ ],
13
+
14
+ :left_part_weight => 8,
15
+
16
+ :right_part_weight => 2,
17
+
18
+ :zhixin_weight => 3.5,
19
+
20
+ :baiduopen_weight => 3,
21
+
22
+ :rank_special_weight => 2,
23
+
24
+ :con_ar_weight => 2
25
+ }
26
+
27
+ super
28
+ end
29
+
30
+ def weights
31
+ result = []
32
+ [:left,:right].each do |side|
33
+ side_rank = 0
34
+
35
+ @config["#{side}_parts".to_sym].each do |part|
36
+ rs,side_rank = self.send("weight_of_#{part}",side_rank)
37
+
38
+ rs.each do |r|
39
+ r[:side] = side.to_s
40
+ r[:part] = part
41
+
42
+ r[:weight] = r[:weight].to_f * @config["#{side}_part_weight".to_sym].to_f
43
+ result << r
44
+ end
45
+ end
46
+ end
47
+ Baiduserp::Helper.normalize(result)
48
+ end
49
+
50
+ # weight_of_*** functions
51
+ # return a hash array
52
+ # each hash includes: type, name, site, weight
53
+
54
+ def weight_of_ranks(side_rank)
55
+ result = []
56
+ self[:ranks].each do |rank|
57
+ side_rank += 1
58
+
59
+ type = 'SEO'
60
+ type = 'Special' if rank[:baiduopen]
61
+ type = 'Special' unless rank[:mu].to_s.empty?
62
+
63
+ name = rank[:tpl].to_s
64
+
65
+ site = Baiduserp::Helper.parse_site(rank[:url])
66
+
67
+ weight = 1.0/side_rank.to_f
68
+ if type == 'Special'
69
+ if rank[:baiduopen]
70
+ weight = weight * @config[:baiduopen_weight].to_f
71
+ else
72
+ weight = weight * @config[:rank_special_weight].to_f
73
+ end
74
+ end
75
+
76
+ result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
77
+ end
78
+ [result, side_rank]
79
+ end
80
+
81
+ # def weight_of_pinpaizhuanqu(side_rank)
82
+ # [[{ type: 'Special', name: 'pinpaizhuanqu', site: '', side_rank: side_rank, weight: 1.0}], (side_rank + 1)]
83
+ # end
84
+
85
+ def weight_of_ads_top(side_rank)
86
+ result = []
87
+ self[:ads_top].each do |ad|
88
+ side_rank += 1
89
+
90
+ type = 'SEM'
91
+ name = ''
92
+ site = Baiduserp::Helper.parse_site(ad[:site])
93
+ weight = 1.0/side_rank.to_f
94
+ result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
95
+ end
96
+ [result, side_rank]
97
+ end
98
+
99
+ def weight_of_ads_right(side_rank)
100
+ result = []
101
+ self[:ads_right].each do |ad|
102
+ side_rank += 1
103
+
104
+ type = 'SEM'
105
+ name = ''
106
+ site = Baiduserp::Helper.parse_site(ad[:site])
107
+ weight = 1.0/side_rank.to_f
108
+ result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
109
+ end
110
+ [result, side_rank]
111
+ end
112
+
113
+ def weight_of_con_ar(side_rank)
114
+ result = []
115
+ self[:con_ar].each do |con|
116
+ side_rank += 1
117
+
118
+ type = 'Special'
119
+ name = con[:tpl]
120
+ site = Baiduserp::Helper.parse_site(con[:data_click]['mu'])
121
+ weight = 1.0 * @config[:con_ar_weight]
122
+ result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
123
+ end
124
+ [result, side_rank]
125
+ end
126
+
127
+ def weight_of_zhixin(side_rank)
128
+ result = []
129
+ self[:zhixin].each do |zhixin|
130
+ side_rank += 1
131
+
132
+ type = 'Special'
133
+ name = zhixin[:tpl]
134
+ site = Baiduserp::Helper.parse_site(zhixin[:mu])
135
+ weight = 1.0 * @config[:zhixin_weight]
136
+ result << {type: type, name: name, site: site, side_rank: side_rank, weight: weight}
137
+ end
138
+ [result, side_rank]
139
+ end
140
+
5
141
  def seo_urls
6
142
  self[:ranks].reduce([]) {|result,rank| result << rank[:url]}
7
143
  end
@@ -29,5 +165,6 @@ module Baiduserp
29
165
  def sem_sites
30
166
  sem_urls
31
167
  end
168
+
32
169
  end
33
170
  end
@@ -1,3 +1,3 @@
1
1
  module Baiduserp
2
- VERSION = "2.2.9"
2
+ VERSION = "2.3.3"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baiduserp
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.9
4
+ version: 2.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - MingQian Zhang
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-02 00:00:00.000000000 Z
11
+ date: 2013-12-10 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -88,12 +88,13 @@ executables:
88
88
  extensions: []
89
89
  extra_rdoc_files: []
90
90
  files:
91
- - lib/baiduserp/analyser-migrations/001_create_keywords_table.rb
92
- - lib/baiduserp/analyser-migrations/002_create_htmls_table.rb
93
- - lib/baiduserp/analyser-migrations/003_create_serps_table.rb
94
91
  - lib/baiduserp/analyser.rb
95
92
  - lib/baiduserp/client.rb
96
93
  - lib/baiduserp/helper.rb
94
+ - lib/baiduserp/migrations/001_create_keywords_table.rb
95
+ - lib/baiduserp/migrations/002_create_htmls_table.rb
96
+ - lib/baiduserp/migrations/003_create_serps_table.rb
97
+ - lib/baiduserp/migrations/004_create_weights_table.rb
97
98
  - lib/baiduserp/parser/ads_right.rb
98
99
  - lib/baiduserp/parser/ads_top.rb
99
100
  - lib/baiduserp/parser/con_ar.rb