baiduserp 2.3.7 → 2.5.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/baiduserp/analyser.rb +39 -35
- data/lib/baiduserp/client.rb +16 -0
- data/lib/baiduserp/migrations/{002_create_htmls_table.rb → htmls/001_create_htmls_table.rb} +2 -3
- data/lib/baiduserp/migrations/{003_create_serps_table.rb → serps/001_create_serps_table.rb} +3 -4
- data/lib/baiduserp/migrations/{001_create_keywords_table.rb → weights/001_create_keywords_table.rb} +1 -1
- data/lib/baiduserp/migrations/{004_create_weights_table.rb → weights/002_create_weights_table.rb} +2 -0
- data/lib/baiduserp/parser/ranks.rb +2 -6
- data/lib/baiduserp/result.rb +2 -0
- data/lib/baiduserp/version.rb +1 -1
- metadata +6 -7
- data/lib/baiduserp/migrations/005_add_subdomain_url_to_weights.rb +0 -11
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 89ad01defa5be250a6f32c0e32d2ad9cb64044ac
|
4
|
+
data.tar.gz: 06d8a6914183c1630f10b036d0f1dacc70f67899
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0c52c60cd473b2b7dba88f0c1039311d1a811d6f27690c6c0ceb4350c5e608c995e56d5bc69e94982a43404aaf10dad756749bcad96af2163258f08818a3a3c2
|
7
|
+
data.tar.gz: f1b21e87c779f24d2b9bd5780e68330071c8f59463d5dfebd369831036f8f909532442092dc656074140540bd5b69c2c6d9e078f2b7347ba698601685b724f75
|
data/lib/baiduserp/analyser.rb
CHANGED
@@ -9,72 +9,76 @@ module Baiduserp
|
|
9
9
|
# Dir[File.expand_path('../analyser/*.rb', __FILE__)].each{|f| require f}
|
10
10
|
|
11
11
|
def initialize(name,attrs={})
|
12
|
-
@
|
12
|
+
@name = name
|
13
|
+
Dir.mkdir @name unless Dir.exists? @name # store htmls and serps data under the dir
|
14
|
+
|
15
|
+
@db_file = @name + ".sqlite"
|
13
16
|
@attrs = attrs
|
14
17
|
@keywords_imported = File.exists?(@db_file)
|
15
18
|
|
16
|
-
@db = Sequel.
|
19
|
+
@db = Sequel.sqlite(@db_file)
|
17
20
|
|
18
21
|
migrate!
|
19
22
|
|
20
|
-
@keywords = Class.new(Sequel::Model)
|
21
|
-
|
22
|
-
end
|
23
|
-
|
24
|
-
@htmls = Class.new(Sequel::Model) do
|
25
|
-
set_dataset :htmls
|
26
|
-
end
|
27
|
-
|
28
|
-
@serps = Class.new(Sequel::Model) do
|
29
|
-
set_dataset :serps
|
30
|
-
end
|
31
|
-
|
32
|
-
@weights = Class.new(Sequel::Model) do
|
33
|
-
set_dataset :weights
|
34
|
-
end
|
23
|
+
@keywords = Class.new(Sequel::Model(@db[:keywords]))
|
24
|
+
@weights = Class.new(Sequel::Model(@db[:weights]))
|
35
25
|
|
36
26
|
import_keywords unless @keywords_imported
|
37
27
|
end
|
38
28
|
|
39
|
-
def run
|
40
|
-
|
29
|
+
def run(date=Date.today)
|
30
|
+
search(date)
|
31
|
+
generate_weights(date)
|
41
32
|
end
|
42
33
|
|
43
|
-
def migrate!
|
34
|
+
def migrate!(db = @db, schema = 'weights')
|
44
35
|
Sequel.extension :migration, :core_extensions
|
45
|
-
Sequel::Migrator.apply(
|
36
|
+
Sequel::Migrator.apply(db, File.expand_path("../migrations/#{schema}/",__FILE__))
|
46
37
|
end
|
47
38
|
|
48
39
|
def import_keywords(file=@attrs[:keywords])
|
49
40
|
CSV.foreach(file) do |l|
|
50
41
|
@keywords.find_or_create(:term => l[0]) do |r|
|
51
|
-
r.
|
42
|
+
r.search_volume = l[1]
|
52
43
|
r.category = l[2]
|
53
44
|
end
|
54
45
|
end
|
55
46
|
end
|
56
47
|
|
48
|
+
def model_htmls(date=Date.today)
|
49
|
+
db = Sequel.sqlite("#{@name}/htmls_#{date}.sqlite")
|
50
|
+
migrate!(db, 'htmls')
|
51
|
+
Class.new(Sequel::Model(db[:htmls]))
|
52
|
+
end
|
53
|
+
|
54
|
+
def model_serps(date=Date.today)
|
55
|
+
db = Sequel.sqlite("#{@name}/serps_#{date}.sqlite")
|
56
|
+
migrate!(db, 'serps')
|
57
|
+
Class.new(Sequel::Model(db[:serps]))
|
58
|
+
end
|
59
|
+
|
60
|
+
# Search Keywords -> Store Html -> Parse SERP
|
57
61
|
def search(date=Date.today)
|
58
|
-
|
62
|
+
htmls = model_htmls(date)
|
63
|
+
serps = model_serps(date)
|
64
|
+
p = ProgressBar.create(:title => "Searching Keywords", :total => @keywords.all.count)
|
59
65
|
@keywords.each do |k|
|
60
|
-
|
61
|
-
|
62
|
-
next
|
63
|
-
end
|
66
|
+
htmls.find_or_create(:keyword_id => k[:id]) {|r| r.content = Baiduserp.get_search_html(k[:term]) }
|
67
|
+
serps.find_or_create(:keyword_id => k[:id]) {|r| r.content = YAML.dump(Baiduserp.parse(htmls.where(:keyword_id => k[:id]).first[:content])) }
|
64
68
|
p.log k.to_hash
|
65
|
-
html = Baiduserp.get_search_html(k[:term])
|
66
|
-
@htmls.find_or_create(:keyword_id => k[:id], :date => date) {|r| r.content = html}
|
67
69
|
p.increment
|
68
70
|
end
|
69
71
|
end
|
70
72
|
|
71
|
-
def
|
72
|
-
htmls =
|
73
|
-
|
73
|
+
def regenerate_serps(date=Date.today)
|
74
|
+
htmls = model_htmls(date)
|
75
|
+
serps = model_serps(date)
|
76
|
+
p = ProgressBar.create(:title => "ReGenerating SERPS", :total => htmls.count)
|
74
77
|
htmls.each do |html|
|
75
78
|
keyword_id = html[:keyword_id]
|
76
79
|
html = html[:content]
|
77
|
-
|
80
|
+
r = serps.find_or_create(:keyword_id => keyword_id)
|
81
|
+
r.update(:content => YAML.dump(Baiduserp.parse(html)))
|
78
82
|
|
79
83
|
p.log keyword_id
|
80
84
|
p.increment
|
@@ -82,9 +86,9 @@ module Baiduserp
|
|
82
86
|
end
|
83
87
|
|
84
88
|
def generate_weights(date=Date.today)
|
85
|
-
serps =
|
89
|
+
serps = model_serps(date)
|
86
90
|
p = ProgressBar.create(:title => "Generating Weights", :total => serps.count)
|
87
|
-
serps.
|
91
|
+
serps.each do |s|
|
88
92
|
keyword_id = s[:keyword_id]
|
89
93
|
serp = YAML.load(s[:content])
|
90
94
|
|
data/lib/baiduserp/client.rb
CHANGED
@@ -17,6 +17,22 @@ module Baiduserp
|
|
17
17
|
self.new.get_serp(url,retries)
|
18
18
|
end
|
19
19
|
|
20
|
+
def self.get_rank_url(url)
|
21
|
+
self.new.get_rank_url(url)
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_rank_url(url)
|
25
|
+
begin
|
26
|
+
response = self.class.get(url)
|
27
|
+
rescue StandardError => e
|
28
|
+
puts e.class
|
29
|
+
puts e.message
|
30
|
+
sleep(10)
|
31
|
+
retry
|
32
|
+
end
|
33
|
+
response
|
34
|
+
end
|
35
|
+
|
20
36
|
def get_serp(url, retries = 3)
|
21
37
|
if retries > 0
|
22
38
|
begin
|
@@ -24,12 +24,8 @@ class Baiduserp::Parser
|
|
24
24
|
url = table.search('h3/a').first
|
25
25
|
unless url.nil?
|
26
26
|
url = url['href']
|
27
|
-
|
28
|
-
|
29
|
-
rescue Exception
|
30
|
-
sleep(10)
|
31
|
-
retry
|
32
|
-
end
|
27
|
+
sleep(rand)
|
28
|
+
url = Baiduserp::Client.get_rank_url(url).headers['location'] if url.include?('http://www.baidu.com/link?')
|
33
29
|
end
|
34
30
|
r[:url] = url
|
35
31
|
|
data/lib/baiduserp/result.rb
CHANGED
@@ -92,6 +92,7 @@ module Baiduserp
|
|
92
92
|
side_rank += 1
|
93
93
|
|
94
94
|
url = ad[:url].to_s
|
95
|
+
url = ad[:site].to_s if url.empty? # patch to campatible with older versions of baiduserp
|
95
96
|
type = 'SEM'
|
96
97
|
name = ''
|
97
98
|
site = Baiduserp::Helper.parse_site(url)
|
@@ -108,6 +109,7 @@ module Baiduserp
|
|
108
109
|
side_rank += 1
|
109
110
|
|
110
111
|
url = ad[:url].to_s
|
112
|
+
url = ad[:site].to_s if url.empty? # patch to compatible with older versions of baiduserp
|
111
113
|
type = 'SEM'
|
112
114
|
name = ''
|
113
115
|
site = Baiduserp::Helper.parse_site(url)
|
data/lib/baiduserp/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: baiduserp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.5.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- MingQian Zhang
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-17 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -105,11 +105,10 @@ files:
|
|
105
105
|
- lib/baiduserp/analyser.rb
|
106
106
|
- lib/baiduserp/client.rb
|
107
107
|
- lib/baiduserp/helper.rb
|
108
|
-
- lib/baiduserp/migrations/
|
109
|
-
- lib/baiduserp/migrations/
|
110
|
-
- lib/baiduserp/migrations/
|
111
|
-
- lib/baiduserp/migrations/
|
112
|
-
- lib/baiduserp/migrations/005_add_subdomain_url_to_weights.rb
|
108
|
+
- lib/baiduserp/migrations/htmls/001_create_htmls_table.rb
|
109
|
+
- lib/baiduserp/migrations/serps/001_create_serps_table.rb
|
110
|
+
- lib/baiduserp/migrations/weights/001_create_keywords_table.rb
|
111
|
+
- lib/baiduserp/migrations/weights/002_create_weights_table.rb
|
113
112
|
- lib/baiduserp/parser/ads_right.rb
|
114
113
|
- lib/baiduserp/parser/ads_top.rb
|
115
114
|
- lib/baiduserp/parser/con_ar.rb
|