baiduserp 2.3.7 → 2.5.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2886ad6a97fe01a9cc1ba3ab6f9893a7b37495d3
4
- data.tar.gz: 3e78db82105ef932cf65253655fc49f01f2f7118
3
+ metadata.gz: 89ad01defa5be250a6f32c0e32d2ad9cb64044ac
4
+ data.tar.gz: 06d8a6914183c1630f10b036d0f1dacc70f67899
5
5
  SHA512:
6
- metadata.gz: 83192c563fb0ca280cbfccf647c52474bc0e4b286923e4d06060de574a146260476162b126cc98a006357928e8b4f2d96ab4607f5cef7fcf774427d21a244f56
7
- data.tar.gz: d99d993c78e32fe16f66768c684cb441123250a27f774296e4dcf4189db1bc0b04ac87a7fc57b847ae20ee964e1bf2ce13b59063f0b0a763cf66979032e268e3
6
+ metadata.gz: 0c52c60cd473b2b7dba88f0c1039311d1a811d6f27690c6c0ceb4350c5e608c995e56d5bc69e94982a43404aaf10dad756749bcad96af2163258f08818a3a3c2
7
+ data.tar.gz: f1b21e87c779f24d2b9bd5780e68330071c8f59463d5dfebd369831036f8f909532442092dc656074140540bd5b69c2c6d9e078f2b7347ba698601685b724f75
@@ -9,72 +9,76 @@ module Baiduserp
9
9
  # Dir[File.expand_path('../analyser/*.rb', __FILE__)].each{|f| require f}
10
10
 
11
11
  def initialize(name,attrs={})
12
- @db_file = name + ".db"
12
+ @name = name
13
+ Dir.mkdir @name unless Dir.exists? @name # store htmls and serps data under the dir
14
+
15
+ @db_file = @name + ".sqlite"
13
16
  @attrs = attrs
14
17
  @keywords_imported = File.exists?(@db_file)
15
18
 
16
- @db = Sequel.connect("sqlite://" + @db_file)
19
+ @db = Sequel.sqlite(@db_file)
17
20
 
18
21
  migrate!
19
22
 
20
- @keywords = Class.new(Sequel::Model) do
21
- set_dataset :keywords
22
- end
23
-
24
- @htmls = Class.new(Sequel::Model) do
25
- set_dataset :htmls
26
- end
27
-
28
- @serps = Class.new(Sequel::Model) do
29
- set_dataset :serps
30
- end
31
-
32
- @weights = Class.new(Sequel::Model) do
33
- set_dataset :weights
34
- end
23
+ @keywords = Class.new(Sequel::Model(@db[:keywords]))
24
+ @weights = Class.new(Sequel::Model(@db[:weights]))
35
25
 
36
26
  import_keywords unless @keywords_imported
37
27
  end
38
28
 
39
- def run
40
-
29
+ def run(date=Date.today)
30
+ search(date)
31
+ generate_weights(date)
41
32
  end
42
33
 
43
- def migrate!
34
+ def migrate!(db = @db, schema = 'weights')
44
35
  Sequel.extension :migration, :core_extensions
45
- Sequel::Migrator.apply(@db, File.expand_path('../migrations/',__FILE__))
36
+ Sequel::Migrator.apply(db, File.expand_path("../migrations/#{schema}/",__FILE__))
46
37
  end
47
38
 
48
39
  def import_keywords(file=@attrs[:keywords])
49
40
  CSV.foreach(file) do |l|
50
41
  @keywords.find_or_create(:term => l[0]) do |r|
51
- r.weight = l[1]
42
+ r.search_volume = l[1]
52
43
  r.category = l[2]
53
44
  end
54
45
  end
55
46
  end
56
47
 
48
+ def model_htmls(date=Date.today)
49
+ db = Sequel.sqlite("#{@name}/htmls_#{date}.sqlite")
50
+ migrate!(db, 'htmls')
51
+ Class.new(Sequel::Model(db[:htmls]))
52
+ end
53
+
54
+ def model_serps(date=Date.today)
55
+ db = Sequel.sqlite("#{@name}/serps_#{date}.sqlite")
56
+ migrate!(db, 'serps')
57
+ Class.new(Sequel::Model(db[:serps]))
58
+ end
59
+
60
+ # Search Keywords -> Store Html -> Parse SERP
57
61
  def search(date=Date.today)
58
- p = ProgressBar.create(:title => "Search Keywords", :total => @keywords.all.count)
62
+ htmls = model_htmls(date)
63
+ serps = model_serps(date)
64
+ p = ProgressBar.create(:title => "Searching Keywords", :total => @keywords.all.count)
59
65
  @keywords.each do |k|
60
- if @htmls.where(:date => date, :keyword_id => k[:id]).count > 0
61
- p.increment
62
- next
63
- end
66
+ htmls.find_or_create(:keyword_id => k[:id]) {|r| r.content = Baiduserp.get_search_html(k[:term]) }
67
+ serps.find_or_create(:keyword_id => k[:id]) {|r| r.content = YAML.dump(Baiduserp.parse(htmls.where(:keyword_id => k[:id]).first[:content])) }
64
68
  p.log k.to_hash
65
- html = Baiduserp.get_search_html(k[:term])
66
- @htmls.find_or_create(:keyword_id => k[:id], :date => date) {|r| r.content = html}
67
69
  p.increment
68
70
  end
69
71
  end
70
72
 
71
- def generate_serps(date=Date.today)
72
- htmls = @htmls.where(:date => date)
73
- p = ProgressBar.create(:title => "Generating SERPS", :total => htmls.count)
73
+ def regenerate_serps(date=Date.today)
74
+ htmls = model_htmls(date)
75
+ serps = model_serps(date)
76
+ p = ProgressBar.create(:title => "ReGenerating SERPS", :total => htmls.count)
74
77
  htmls.each do |html|
75
78
  keyword_id = html[:keyword_id]
76
79
  html = html[:content]
77
- @serps.find_or_create(:date => date, :keyword_id => keyword_id) {|r| r.content = YAML.dump(Baiduserp.parse(html))}
80
+ r = serps.find_or_create(:keyword_id => keyword_id)
81
+ r.update(:content => YAML.dump(Baiduserp.parse(html)))
78
82
 
79
83
  p.log keyword_id
80
84
  p.increment
@@ -82,9 +86,9 @@ module Baiduserp
82
86
  end
83
87
 
84
88
  def generate_weights(date=Date.today)
85
- serps = @serps.where(:date => date)
89
+ serps = model_serps(date)
86
90
  p = ProgressBar.create(:title => "Generating Weights", :total => serps.count)
87
- serps.where(:date => date).each do |s|
91
+ serps.each do |s|
88
92
  keyword_id = s[:keyword_id]
89
93
  serp = YAML.load(s[:content])
90
94
 
@@ -17,6 +17,22 @@ module Baiduserp
17
17
  self.new.get_serp(url,retries)
18
18
  end
19
19
 
20
+ def self.get_rank_url(url)
21
+ self.new.get_rank_url(url)
22
+ end
23
+
24
+ def get_rank_url(url)
25
+ begin
26
+ response = self.class.get(url)
27
+ rescue StandardError => e
28
+ puts e.class
29
+ puts e.message
30
+ sleep(10)
31
+ retry
32
+ end
33
+ response
34
+ end
35
+
20
36
  def get_serp(url, retries = 3)
21
37
  if retries > 0
22
38
  begin
@@ -2,11 +2,10 @@ Sequel.migration do
2
2
  up do
3
3
  create_table :htmls do
4
4
  primary_key :id
5
- foreign_key :keyword_id, :keywords
6
- Date :date
5
+ Integer :keyword_id
7
6
  String :content, :text => true
8
7
 
9
- index :date
8
+ index :keyword_id
10
9
  end
11
10
  end
12
11
 
@@ -2,11 +2,10 @@ Sequel.migration do
2
2
  up do
3
3
  create_table :serps do
4
4
  primary_key :id
5
- foreign_key :keyword_id, :keywords
6
- Date :date
5
+ Integer :keyword_id
7
6
  String :content, :text => true
8
-
9
- index :date
7
+
8
+ index :keyword_id
10
9
  end
11
10
  end
12
11
 
@@ -3,7 +3,7 @@ Sequel.migration do
3
3
  create_table :keywords do
4
4
  primary_key :id
5
5
  String :term
6
- Integer :weight
6
+ Integer :search_volume
7
7
  String :category
8
8
 
9
9
  index :term
@@ -9,6 +9,8 @@ Sequel.migration do
9
9
  String :type
10
10
  String :name
11
11
  String :site
12
+ String :subdomain
13
+ String :url
12
14
  Integer :side_rank
13
15
  Float :weight
14
16
  Float :normalized_weight
@@ -24,12 +24,8 @@ class Baiduserp::Parser
24
24
  url = table.search('h3/a').first
25
25
  unless url.nil?
26
26
  url = url['href']
27
- begin
28
- url = Baiduserp::Client.get(url).headers['location'] if url.include?('http://www.baidu.com/link?')
29
- rescue Exception
30
- sleep(10)
31
- retry
32
- end
27
+ sleep(rand)
28
+ url = Baiduserp::Client.get_rank_url(url).headers['location'] if url.include?('http://www.baidu.com/link?')
33
29
  end
34
30
  r[:url] = url
35
31
 
@@ -92,6 +92,7 @@ module Baiduserp
92
92
  side_rank += 1
93
93
 
94
94
  url = ad[:url].to_s
95
+ url = ad[:site].to_s if url.empty? # patch to campatible with older versions of baiduserp
95
96
  type = 'SEM'
96
97
  name = ''
97
98
  site = Baiduserp::Helper.parse_site(url)
@@ -108,6 +109,7 @@ module Baiduserp
108
109
  side_rank += 1
109
110
 
110
111
  url = ad[:url].to_s
112
+ url = ad[:site].to_s if url.empty? # patch to compatible with older versions of baiduserp
111
113
  type = 'SEM'
112
114
  name = ''
113
115
  site = Baiduserp::Helper.parse_site(url)
@@ -1,3 +1,3 @@
1
1
  module Baiduserp
2
- VERSION = "2.3.7"
2
+ VERSION = "2.5.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: baiduserp
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.3.7
4
+ version: 2.5.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - MingQian Zhang
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-12 00:00:00.000000000 Z
11
+ date: 2013-12-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -105,11 +105,10 @@ files:
105
105
  - lib/baiduserp/analyser.rb
106
106
  - lib/baiduserp/client.rb
107
107
  - lib/baiduserp/helper.rb
108
- - lib/baiduserp/migrations/001_create_keywords_table.rb
109
- - lib/baiduserp/migrations/002_create_htmls_table.rb
110
- - lib/baiduserp/migrations/003_create_serps_table.rb
111
- - lib/baiduserp/migrations/004_create_weights_table.rb
112
- - lib/baiduserp/migrations/005_add_subdomain_url_to_weights.rb
108
+ - lib/baiduserp/migrations/htmls/001_create_htmls_table.rb
109
+ - lib/baiduserp/migrations/serps/001_create_serps_table.rb
110
+ - lib/baiduserp/migrations/weights/001_create_keywords_table.rb
111
+ - lib/baiduserp/migrations/weights/002_create_weights_table.rb
113
112
  - lib/baiduserp/parser/ads_right.rb
114
113
  - lib/baiduserp/parser/ads_top.rb
115
114
  - lib/baiduserp/parser/con_ar.rb
@@ -1,11 +0,0 @@
1
- Sequel.migration do
2
- up do
3
- add_column :weights, :subdomain, String
4
- add_column :weights, :url, String
5
- end
6
-
7
- down do
8
- drop_column :weights, :subdomain
9
- drop_column :weights, :url
10
- end
11
- end