baiduserp 2.3.7 → 2.5.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2886ad6a97fe01a9cc1ba3ab6f9893a7b37495d3
4
- data.tar.gz: 3e78db82105ef932cf65253655fc49f01f2f7118
3
+ metadata.gz: 89ad01defa5be250a6f32c0e32d2ad9cb64044ac
4
+ data.tar.gz: 06d8a6914183c1630f10b036d0f1dacc70f67899
5
5
  SHA512:
6
- metadata.gz: 83192c563fb0ca280cbfccf647c52474bc0e4b286923e4d06060de574a146260476162b126cc98a006357928e8b4f2d96ab4607f5cef7fcf774427d21a244f56
7
- data.tar.gz: d99d993c78e32fe16f66768c684cb441123250a27f774296e4dcf4189db1bc0b04ac87a7fc57b847ae20ee964e1bf2ce13b59063f0b0a763cf66979032e268e3
6
+ metadata.gz: 0c52c60cd473b2b7dba88f0c1039311d1a811d6f27690c6c0ceb4350c5e608c995e56d5bc69e94982a43404aaf10dad756749bcad96af2163258f08818a3a3c2
7
+ data.tar.gz: f1b21e87c779f24d2b9bd5780e68330071c8f59463d5dfebd369831036f8f909532442092dc656074140540bd5b69c2c6d9e078f2b7347ba698601685b724f75
@@ -9,72 +9,76 @@ module Baiduserp
9
9
  # Dir[File.expand_path('../analyser/*.rb', __FILE__)].each{|f| require f}
10
10
 
11
11
  def initialize(name,attrs={})
12
- @db_file = name + ".db"
12
+ @name = name
13
+ Dir.mkdir @name unless Dir.exists? @name # store htmls and serps data under the dir
14
+
15
+ @db_file = @name + ".sqlite"
13
16
  @attrs = attrs
14
17
  @keywords_imported = File.exists?(@db_file)
15
18
 
16
- @db = Sequel.connect("sqlite://" + @db_file)
19
+ @db = Sequel.sqlite(@db_file)
17
20
 
18
21
  migrate!
19
22
 
20
- @keywords = Class.new(Sequel::Model) do
21
- set_dataset :keywords
22
- end
23
-
24
- @htmls = Class.new(Sequel::Model) do
25
- set_dataset :htmls
26
- end
27
-
28
- @serps = Class.new(Sequel::Model) do
29
- set_dataset :serps
30
- end
31
-
32
- @weights = Class.new(Sequel::Model) do
33
- set_dataset :weights
34
- end
23
+ @keywords = Class.new(Sequel::Model(@db[:keywords]))
24
+ @weights = Class.new(Sequel::Model(@db[:weights]))
35
25
 
36
26
  import_keywords unless @keywords_imported
37
27
  end
38
28
 
39
- def run
40
-
29
+ def run(date=Date.today)
30
+ search(date)
31
+ generate_weights(date)
41
32
  end
42
33
 
43
- def migrate!
34
+ def migrate!(db = @db, schema = 'weights')
44
35
  Sequel.extension :migration, :core_extensions
45
- Sequel::Migrator.apply(@db, File.expand_path('../migrations/',__FILE__))
36
+ Sequel::Migrator.apply(db, File.expand_path("../migrations/#{schema}/",__FILE__))
46
37
  end
47
38
 
48
39
  def import_keywords(file=@attrs[:keywords])
49
40
  CSV.foreach(file) do |l|
50
41
  @keywords.find_or_create(:term => l[0]) do |r|
51
- r.weight = l[1]
42
+ r.search_volume = l[1]
52
43
  r.category = l[2]
53
44
  end
54
45
  end
55
46
  end
56
47
 
48
+ def model_htmls(date=Date.today)
49
+ db = Sequel.sqlite("#{@name}/htmls_#{date}.sqlite")
50
+ migrate!(db, 'htmls')
51
+ Class.new(Sequel::Model(db[:htmls]))
52
+ end
53
+
54
+ def model_serps(date=Date.today)
55
+ db = Sequel.sqlite("#{@name}/serps_#{date}.sqlite")
56
+ migrate!(db, 'serps')
57
+ Class.new(Sequel::Model(db[:serps]))
58
+ end
59
+
60
+ # Search Keywords -> Store Html -> Parse SERP
57
61
  def search(date=Date.today)
58
- p = ProgressBar.create(:title => "Search Keywords", :total => @keywords.all.count)
62
+ htmls = model_htmls(date)
63
+ serps = model_serps(date)
64
+ p = ProgressBar.create(:title => "Searching Keywords", :total => @keywords.all.count)
59
65
  @keywords.each do |k|
60
- if @htmls.where(:date => date, :keyword_id => k[:id]).count > 0
61
- p.increment
62
- next
63
- end
66
+ htmls.find_or_create(:keyword_id => k[:id]) {|r| r.content = Baiduserp.get_search_html(k[:term]) }
67
+ serps.find_or_create(:keyword_id => k[:id]) {|r| r.content = YAML.dump(Baiduserp.parse(htmls.where(:keyword_id => k[:id]).first[:content])) }
64
68
  p.log k.to_hash
65
- html = Baiduserp.get_search_html(k[:term])
66
- @htmls.find_or_create(:keyword_id => k[:id], :date => date) {|r| r.content = html}
67
69
  p.increment
68
70
  end
69
71
  end
70
72
 
71
- def generate_serps(date=Date.today)
72
- htmls = @htmls.where(:date => date)
73
- p = ProgressBar.create(:title => "Generating SERPS", :total => htmls.count)
73
+ def regenerate_serps(date=Date.today)
74
+ htmls = model_htmls(date)
75
+ serps = model_serps(date)
76
+ p = ProgressBar.create(:title => "ReGenerating SERPS", :total => htmls.count)
74
77
  htmls.each do |html|
75
78
  keyword_id = html[:keyword_id]
76
79
  html = html[:content]
77
- @serps.find_or_create(:date => date, :keyword_id => keyword_id) {|r| r.content = YAML.dump(Baiduserp.parse(html))}
80
+ r = serps.find_or_create(:keyword_id => keyword_id)
81
+ r.update(:content => YAML.dump(Baiduserp.parse(html)))
78
82
 
79
83
  p.log keyword_id
80
84
  p.increment
@@ -82,9 +86,9 @@ module Baiduserp
82
86
  end
83
87
 
84
88
  def generate_weights(date=Date.today)
85
- serps = @serps.where(:date => date)
89
+ serps = model_serps(date)
86
90
  p = ProgressBar.create(:title => "Generating Weights", :total => serps.count)
87
- serps.where(:date => date).each do |s|
91
+ serps.each do |s|
88
92
  keyword_id = s[:keyword_id]
89
93
  serp = YAML.load(s[:content])
90
94
 
@@ -17,6 +17,22 @@ module Baiduserp
17
17
  self.new.get_serp(url,retries)
18
18
  end
19
19
 
20
+ def self.get_rank_url(url)
21
+ self.new.get_rank_url(url)
22
+ end
23
+
24
+ def get_rank_url(url)
25
+ begin
26
+ response = self.class.get(url)
27
+ rescue StandardError => e
28
+ puts e.class
29
+ puts e.message
30
+ sleep(10)
31
+ retry
32
+ end
33
+ response
34
+ end
35
+
20
36
  def get_serp(url, retries = 3)
21
37
  if retries > 0
22
38
  begin
@@ -2,11 +2,10 @@ Sequel.migration do
2
2
  up do
3
3
  create_table :htmls do
4
4
  primary_key :id
5
- foreign_key :keyword_id, :keywords
6
- Date :date
5
+ Integer :keyword_id
7
6
  String :content, :text => true
8
7
 
9
- index :date
8
+ index :keyword_id
10
9
  end
11
10
  end
12
11
 
@@ -2,11 +2,10 @@ Sequel.migration do
2
2
  up do
3
3
  create_table :serps do
4
4
  primary_key :id
5
- foreign_key :keyword_id, :keywords
6
- Date :date
5
+ Integer :keyword_id
7
6
  String :content, :text => true
8
-
9
- index :date
7
+
8
+ index :keyword_id
10
9
  end
11
10
  end
12
11
 
@@ -3,7 +3,7 @@ Sequel.migration do
3
3
  create_table :keywords do
4
4
  primary_key :id
5
5
  String :term
6
- Integer :weight
6
+ Integer :search_volume
7
7
  String :category
8
8
 
9
9
  index :term
@@ -9,6 +9,8 @@ Sequel.migration do
9
9
  String :type
10
10
  String :name
11
11
  String :site
12
+ String :subdomain
13
+ String :url
12
14
  Integer :side_rank
13
15
  Float :weight
14
16
  Float :normalized_weight
@@ -24,12 +24,8 @@ class Baiduserp::Parser
24
24
  url = table.search('h3/a').first
25
25
  unless url.nil?
26
26
  url = url['href']
27
- begin
28
- url = Baiduserp::Client.get(url).headers['location'] if url.include?('http://www.baidu.com/link?')
29
- rescue Exception
30
- sleep(10)
31
- retry
32
- end
27
+ sleep(rand)
28
+ url = Baiduserp::Client.get_rank_url(url).headers['location'] if url.include?('http://www.baidu.com/link?')
33
29
  end
34
30
  r[:url] = url
35
31
 
@@ -92,6 +92,7 @@ module Baiduserp
92
92
  side_rank += 1
93
93
 
94
94
  url = ad[:url].to_s
95
+ url = ad[:site].to_s if url.empty? # patch to campatible with older versions of baiduserp
95
96
  type = 'SEM'
96
97
  name = ''
97
98
  site = Baiduserp::Helper.parse_site(url)
@@ -108,6 +109,7 @@ module Baiduserp
108
109
  side_rank += 1
109
110
 
110
111
  url = ad[:url].to_s
112
+ url = ad[:site].to_s if url.empty? # patch to compatible with older versions of baiduserp
111
113
  type = 'SEM'
112
114
  name = ''
113
115
  site = Baiduserp::Helper.parse_site(url)
@@ -1,3 +1,3 @@
1
1
  module Baiduserp
2
- VERSION = "2.3.7"
2
+ VERSION = "2.5.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baiduserp
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.7
4
+ version: 2.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - MingQian Zhang
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-12 00:00:00.000000000 Z
11
+ date: 2013-12-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -105,11 +105,10 @@ files:
105
105
  - lib/baiduserp/analyser.rb
106
106
  - lib/baiduserp/client.rb
107
107
  - lib/baiduserp/helper.rb
108
- - lib/baiduserp/migrations/001_create_keywords_table.rb
109
- - lib/baiduserp/migrations/002_create_htmls_table.rb
110
- - lib/baiduserp/migrations/003_create_serps_table.rb
111
- - lib/baiduserp/migrations/004_create_weights_table.rb
112
- - lib/baiduserp/migrations/005_add_subdomain_url_to_weights.rb
108
+ - lib/baiduserp/migrations/htmls/001_create_htmls_table.rb
109
+ - lib/baiduserp/migrations/serps/001_create_serps_table.rb
110
+ - lib/baiduserp/migrations/weights/001_create_keywords_table.rb
111
+ - lib/baiduserp/migrations/weights/002_create_weights_table.rb
113
112
  - lib/baiduserp/parser/ads_right.rb
114
113
  - lib/baiduserp/parser/ads_top.rb
115
114
  - lib/baiduserp/parser/con_ar.rb
@@ -1,11 +0,0 @@
1
- Sequel.migration do
2
- up do
3
- add_column :weights, :subdomain, String
4
- add_column :weights, :url, String
5
- end
6
-
7
- down do
8
- drop_column :weights, :subdomain
9
- drop_column :weights, :url
10
- end
11
- end