serper 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 149badc447dec8ed55714a9ed6768e3f85a9b94e
4
+ data.tar.gz: 8204acacee0b068b61421c0bc59507ea30717895
5
+ SHA512:
6
+ metadata.gz: d329ccb1dbf584a4a100bf945740a5ff9ffc391a79f34aa598776cc5db901fdefdf0e04e26920060784c302fd9eb935dcf0a2a623a0acdda8b76bdc61c4235b2
7
+ data.tar.gz: 36a41f00ff396bde0e3c18587bd58359300591572ff346135f340520b16145272c622f228709d0fe72d770a2174f5bf48cc6c9e98e75fc3b22eef78e8ea14f41
@@ -0,0 +1 @@
1
+ 解析百度的搜索结果页面, 并返回结构化数据以进行后续分析.
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'serper'
4
+ require 'optparse'
5
+ require 'json'
6
+ require 'pp'
7
+ require 'docopt'
8
+
9
+ cmd = File.basename(__FILE__)
10
+
11
+ doc = <<DOCOPT
12
+ 1. serper -s 'keyword' # search 'keyword' and print parse result
13
+ 2. serper -s 'keyword' -o output.json # -o means save result to a file
14
+ 3. serper -f 'file path' # parse html source code from file
15
+ 4. serper -s 'keyword' -j # search 'keyword' and print parse result in JSON format
16
+
17
+ Usage:
18
+ #{cmd} [options]
19
+
20
+ Options:
21
+ -h --help show this help message and exit
22
+ -v --version show version and exit
23
+ -a --analyse Name analyse as the given name
24
+ --keywords File uses with -a, import give keywords File before search
25
+ -s --search Keyword search Keyword and show result
26
+ -f --file File parse local file or given url
27
+ -j --json print JSON output
28
+ -o --output File output JSON result to File
29
+
30
+ DOCOPT
31
+
32
+ begin
33
+ options = Docopt::docopt(doc, version: Serper::VERSION)
34
+ # pp options
35
+ rescue Docopt::Exit => e
36
+ puts e.message
37
+ end
38
+
39
+ result = ''
40
+ if options['--analyse']
41
+ analyse = Serper.analyse(options['--analyse'])
42
+ analyse.import_keywords(options('--keywords'))
43
+ analyse.search
44
+ result = 'Analyse finished!'
45
+ elsif options['--search']
46
+ result = Serper.search options['--search']
47
+ elsif options['--file']
48
+ result = Serper.parse_file options['--file']
49
+ else
50
+ puts "At least given one of -a/-s/-f"
51
+ end
52
+
53
+ if options['--json']
54
+ puts result.to_json
55
+ else
56
+ pp result
57
+ end
58
+
59
+ open(options['--output'],'w').puts result.to_json if options['--output']
@@ -0,0 +1,26 @@
1
+ require "serper/version"
2
+ require "serper/parser"
3
+ require "serper/analyser"
4
+
5
+ [:baidu].each do |engine_name|
6
+ %w{crawler parser weight}.each do |part|
7
+ require File.expand_path("../serper/#{engine_name}/#{part}.rb",__FILE__)
8
+ end
9
+ end
10
+
11
+ module Serper
12
+ ENGINES = {
13
+ :baidu => Baidu
14
+ }
15
+
16
+ def self.search(engine_name,keyword,page=1)
17
+ serp = Parser.new(engine_name,keyword,page)
18
+ serp.search
19
+ serp
20
+ end
21
+
22
+ def self.analyse(connection)
23
+ Analyser.new(connection)
24
+ end
25
+ end
26
+
@@ -0,0 +1,112 @@
1
+ require 'active_record'
2
+ require 'csv'
3
+ require 'date'
4
+ require 'yaml'
5
+ require 'ruby-progressbar'
6
+
7
+ module Serper
8
+ class Analyser
9
+ def initialize(connection)
10
+ ActiveRecord::Base.establish_connection(connection)
11
+ end
12
+
13
+ def import_keywords(file)
14
+ CSV.foreach(file) do |l|
15
+ Keyword.find_or_create_by(:term => l[0]) do |r|
16
+ r.pv = l[1]
17
+ r.category = l[2]
18
+ r.url_type = l[3]
19
+ r.url_id = l[4]
20
+ end
21
+ end
22
+ end
23
+
24
+ def run(date=Date.today,skip=true)
25
+ puts "Serper Analyser on #{date}"
26
+ ENGINES.keys.each do |engine_name|
27
+ puts engine_name
28
+ search_engine(engine_name,date,skip)
29
+ end
30
+ end
31
+
32
+ def search_engine(engine_name,date,skip=true)
33
+ p = ProgressBar.create(:title => "Searching #{engine_name} - #{date}", :total => Keyword.all.count, :format => '%t (%c/%C) %a %E |%w')
34
+ Keyword.all.each do |k|
35
+ check_exists = Weight.where(:engine => engine_name, :date => date, :keyword_id => k.id)
36
+ if check_exists.any?
37
+ if skip
38
+ next
39
+ else
40
+ check_exists.destroy_all
41
+ end
42
+ end
43
+
44
+ serp = Serper.search(engine_name,k.term)
45
+ serp.weights.each do |w|
46
+ Weight.create(:date => date,
47
+ :keyword_id => k.id,
48
+ :engine => engine_name,
49
+ :side => w[:side],
50
+ :part => w[:part],
51
+ :source => w[:type],
52
+ :name => w[:name],
53
+ :site => w[:site],
54
+ :subdomain => w[:subdomain],
55
+ :path => w[:path],
56
+ :part_rank => w[:part_rank],
57
+ :side_rank => w[:side_rank],
58
+ :side_weight => w[:side_weight],
59
+ :weight => w[:weight]
60
+ )
61
+ end
62
+
63
+ p.increment
64
+ end
65
+ end
66
+
67
+ def migrate!
68
+ ActiveRecord::Schema.define do
69
+ create_table :serper_keywords do |t|
70
+ t.string :term
71
+ t.integer :pv
72
+ t.string :category
73
+ t.string :url_type
74
+ t.integer :url_id
75
+
76
+ t.timestamps
77
+
78
+ t.index :term
79
+ end
80
+
81
+ create_table :serper_weights do |t|
82
+ t.date :date
83
+ t.string :engine
84
+ t.integer :keyword_id
85
+ t.string :side # Left Right
86
+ t.string :part
87
+ t.string :source # SEO SEM Special
88
+ t.string :name
89
+ t.string :site
90
+ t.string :subdomain
91
+ t.string :path
92
+ t.integer :part_rank
93
+ t.integer :side_rank
94
+ t.float :side_weight
95
+ t.float :weight
96
+
97
+ t.timestamps
98
+
99
+ t.index [:date, :engine, :keyword_id, :side, :side_rank], name: 'weights_pk_index'
100
+ end
101
+ end
102
+ end
103
+
104
+ class Keyword < ActiveRecord::Base
105
+ self.table_name = 'serper_keywords'
106
+ end
107
+
108
+ class Weight < ActiveRecord::Base
109
+ self.table_name = 'serper_weights'
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,7 @@
1
+ class Serper::Baidu
2
+ def serp_url(keyword,page)
3
+ keyword = keyword.gsub(" ","+")
4
+ page = page.to_i > 1 ? "&pn=#{page.to_i-1}0" : ''
5
+ URI.escape("http://www.baidu.com/s?wd=#{keyword}#{page}&ie=utf-8&inputT=#{1000+rand(1000)}")
6
+ end
7
+ end
@@ -0,0 +1,185 @@
1
+ class Serper::Baidu
2
+ def _parse_ads_right(file)
3
+ result = []
4
+ rank = 0
5
+
6
+ file[:doc].search('div#ec_im_container span a.c-icon.efc-cert').each do |div|
7
+ rank += 1
8
+ url = Addressable::URI.parse(Serper::Helper.parse_data_click(div['data-renzheng'])['identity']['a']['url']).query_values['wd'].to_s.sub('@v','') rescue ''
9
+ result << {url: url, rank: rank}
10
+ end
11
+ result
12
+ end
13
+
14
+ def _parse_ads_top(file)
15
+ result = []
16
+ rank = 0
17
+
18
+ file[:doc].search('div#content_left').first.children.each do |div|
19
+ break if div['id'].to_i > 0
20
+ div.search('span a.c-icon.efc-cert').each do |div|
21
+ rank += 1
22
+ url = Addressable::URI.parse(Serper::Helper.parse_data_click(div['data-renzheng'])['identity']['a']['url']).query_values['wd'].to_s.sub('@v', '') rescue ''
23
+ result << {url: url, rank: rank}
24
+ end
25
+ end
26
+ result
27
+ end
28
+
29
+ def _parse_con_ar(file)
30
+ result = []
31
+ divs = file[:doc].search("div#content_right div#con-ar").first
32
+ return [] if divs.nil?
33
+ divs.children.each do |div|
34
+ next unless div['class'].to_s.include?('result-op')
35
+ result << {:tpl => div['tpl'],
36
+ :data_click => Serper::Helper.parse_data_click(div['data-click'])
37
+ }
38
+ end
39
+ result
40
+ end
41
+
42
+ # def _parse_pinpaizhuanqu(file)
43
+ # part = file[:doc].search("div[@id='content_left']").first
44
+ # return false if part.nil?
45
+ #
46
+ # part.children[2].name == 'script'
47
+ # end
48
+
49
+ def _parse_ranks(file)
50
+ result = []
51
+ part = file[:doc].search("div[@id='content_left']").first
52
+ return result if part.nil?
53
+
54
+ part.children.each do |table|
55
+ next if table.nil?
56
+ id = table['id'].to_i
57
+ next unless id > 0 && id < 3000
58
+
59
+ r = {:rank => id}
60
+
61
+ r[:result_op] = table['class'].to_s.include?('result-op')
62
+
63
+ r[:fk] = table['fk']
64
+
65
+ r[:srcid] = table['srcid']
66
+
67
+ r[:tpl] = table['tpl']
68
+
69
+ r[:mu] = table['mu']
70
+
71
+ url = table.search('h3/a').first
72
+ unless url.nil?
73
+ url = url['href']
74
+ sleep(rand)
75
+ url = Serper::Crawler.get_rank_url('http:'+url).headers['location'] if url.include?('//www.baidu.com/link?')
76
+ end
77
+ r[:url] = url
78
+
79
+ r[:title] = Serper::Helper.get_content_safe(table.search('h3'))
80
+
81
+ r[:content] = Serper::Helper.get_content_safe(table.search('div.c-abstract'))
82
+
83
+ table.search('a').each do |link|
84
+ r[:baiduopen] = true if link['href'].to_s.include?('open.baidu.com')
85
+ end
86
+ r[:baiduopen] = false if r[:baiduopen].nil?
87
+
88
+ result << r
89
+ end
90
+ result
91
+ end
92
+
93
+ # def _parse_related_keywords(file)
94
+ # result = []
95
+ # file[:doc].search('div[@id="rs"]').each do |rs|
96
+ # rs.css('a').each do |link|
97
+ # result << link.content
98
+ # end
99
+ # end
100
+ # result
101
+ # end
102
+
103
+ # def _parse_result_num(file)
104
+ # html = file[:html]
105
+ # str = html.scan(/百度为您找到相关结果(.*)个/).join
106
+ # str = str.gsub('约','')
107
+ # if str.include?('万')
108
+ # parts = str.split('万')
109
+ # result = parts[0].to_i * 10000 + parts[1].to_i
110
+ # else
111
+ # result = str.gsub(',', '').to_i
112
+ # end
113
+ #
114
+ # result
115
+ # end
116
+
117
+ # def _parse_right_hotel(file)
118
+ # rh = file[:doc].search('div[@tpl="right_hotel"]')
119
+ # return nil if rh.nil?
120
+ #
121
+ # rh = rh.first
122
+ # return nil if rh.nil?
123
+ # title = Serper::Helper.get_content_safe(rh.search('div.opr-hotel-title'))
124
+ #
125
+ # {:title => title}
126
+ # end
127
+
128
+ # def _parse_right_personinfo(file)
129
+ # rp = file[:doc].search('div[@tpl="right_personinfo"]')
130
+ # return nil if rp.nil?
131
+ #
132
+ # title = Serper::Helper.get_content_safe rp.search('span.opr-personinfo-subtitle-large')
133
+ # info_summary = Serper::Helper.get_content_safe rp.search('div.opr-personinfo-summary')
134
+ # info = Serper::Helper.get_content_safe rp.search('div.opr-personinfo-info')
135
+ # source = Serper::Helper.get_content_safe rp.search('div.opr-personinfo-source a')
136
+ #
137
+ # return nil if title.nil? && info.nil? && source.nil?
138
+ # {:title => title, :info_summary => info_summary, :info => info, :source => source}
139
+ # end
140
+
141
+ # def _parse_right_relaperson(file)
142
+ # relapersons = file[:doc].search('div[@tpl="right_relaperson"]')
143
+ # return nil if relapersons.nil?
144
+ #
145
+ # result = []
146
+ # relapersons.each do |rr|
147
+ # title = rr.search('div.cr-title/span').first
148
+ # title = title.content unless title.nil?
149
+ # r = []
150
+ # rr.search('p.opr-relaperson-name/a').each do |p|
151
+ # r << p['title']
152
+ # end
153
+ # result << {:title => title, :names => r}
154
+ # end
155
+ # result
156
+ # end
157
+
158
+ # def _parse_right_weather(file)
159
+ # rw = file[:doc].search('div[@tpl="right_weather"]')
160
+ # return nil if rw.nil?
161
+ #
162
+ # rw = rw.first
163
+ # return nil if rw.nil?
164
+ #
165
+ # title = Serper::Helper.get_content_safe(rw.search('div.opr-weather-title'))
166
+ # week = rw.search('a.opr-weather-week').first['href']
167
+ #
168
+ # {:title => title, :week => week}
169
+ # end
170
+
171
+ def _parse_zhixin(file)
172
+ result = []
173
+ file[:doc].search("div#content_left .result-zxl").each do |zxl|
174
+ result << {:id => zxl['id'],
175
+ :srcid => zxl['srcid'],
176
+ :fk => zxl['fk'],
177
+ :tpl => zxl['tpl'],
178
+ :mu => zxl['mu'],
179
+ :data_click => Serper::Helper.parse_data_click(zxl['data-click'])
180
+ }
181
+ end
182
+ result
183
+ end
184
+
185
+ end
@@ -0,0 +1,144 @@
1
+ class Serper::Baidu
2
+ def weight_config
3
+ {
4
+ :left_parts => [:ads_top,
5
+ :zhixin,
6
+ :ranks
7
+ ],
8
+
9
+ :right_parts => [:con_ar,
10
+ :ads_right
11
+ ],
12
+
13
+ :left_part_weight => 8,
14
+
15
+ :right_part_weight => 2,
16
+
17
+ :zhixin_weight => 3.5,
18
+
19
+ :baiduopen_weight => 3,
20
+
21
+ :rank_special_weight => 2,
22
+
23
+ :con_ar_weight => 2
24
+ }
25
+ end
26
+
27
+ # _weight_of_*** functions
28
+ # return a hash array
29
+ # each hash includes: type, name, site, weight
30
+
31
+ def _weight_of_ranks(serp_result,side_rank)
32
+ result = []
33
+ serp_result[:ranks].each.with_index do |rank,i|
34
+ side_rank += 1
35
+
36
+ url = rank[:url].to_s
37
+ mu = rank[:mu].to_s
38
+
39
+ type = 'SEO'
40
+ type = 'Special' if rank[:baiduopen]
41
+
42
+ unless mu.empty?
43
+ url = mu
44
+ type = 'Special'
45
+ end
46
+
47
+ site = Serper::Helper.parse_site(url)
48
+ subdomain = Serper::Helper.parse_subdomain(url)
49
+ path = Serper::Helper.parse_path(url)
50
+
51
+ name = rank[:tpl].to_s
52
+
53
+ weight = 1.0/side_rank.to_f
54
+ if type == 'Special'
55
+ if rank[:baiduopen]
56
+ weight = weight * weight_config[:baiduopen_weight].to_f
57
+ else
58
+ weight = weight * weight_config[:rank_special_weight].to_f
59
+ end
60
+ end
61
+
62
+ part_rank = rank[:rank]
63
+
64
+ result << {type: type, name: name, site: site, subdomain: subdomain, path: path, mu: mu, side_rank: side_rank, part_rank: part_rank, side_weight: weight}
65
+ end
66
+ [result, side_rank]
67
+ end
68
+
69
+ def _weight_of_ads_top(serp_result,side_rank)
70
+ result = []
71
+ serp_result[:ads_top].each.with_index do |ad,i|
72
+ side_rank += 1
73
+
74
+ url = ad[:url].to_s
75
+ type = 'SEM'
76
+ name = ''
77
+ site = Serper::Helper.parse_site(url)
78
+ subdomain = Serper::Helper.parse_subdomain(url)
79
+ path = Serper::Helper.parse_path(url)
80
+
81
+ part_rank = ad[:rank]
82
+
83
+ weight = 1.0/side_rank.to_f
84
+ result << {type: type, name: name, site: site, subdomain: subdomain, path: path, side_rank: side_rank, part_rank: part_rank, side_weight: weight}
85
+ end
86
+ [result, side_rank]
87
+ end
88
+
89
+ def _weight_of_ads_right(serp_result,side_rank)
90
+ result = []
91
+ serp_result[:ads_right].each.with_index do |ad,i|
92
+ side_rank += 1
93
+
94
+ url = ad[:url].to_s
95
+ type = 'SEM'
96
+ name = ''
97
+ site = Serper::Helper.parse_site(url)
98
+ subdomain = Serper::Helper.parse_subdomain(url)
99
+ path = Serper::Helper.parse_path(url)
100
+
101
+ part_rank = ad[:rank]
102
+
103
+ weight = 1.0/side_rank.to_f
104
+ result << {type: type, name: name, site: site, subdomain: subdomain, path: path, side_rank: side_rank, part_rank: part_rank, side_weight: weight}
105
+ end
106
+ [result, side_rank]
107
+ end
108
+
109
+ def _weight_of_con_ar(serp_result,side_rank)
110
+ result = []
111
+ serp_result[:con_ar].each.with_index do |con,i|
112
+ side_rank += 1
113
+
114
+ url = con[:data_click]['mu'].to_s
115
+ type = 'Special'
116
+ name = con[:tpl]
117
+ site = Serper::Helper.parse_site(url)
118
+ subdomain = Serper::Helper.parse_subdomain(url)
119
+ path = Serper::Helper.parse_path(url)
120
+
121
+ weight = 1.0 * weight_config[:con_ar_weight]
122
+ result << {type: type, name: name, site: site, subdomain: subdomain, path: path, side_rank: side_rank, part_rank: i+1, side_weight: weight}
123
+ end
124
+ [result, side_rank]
125
+ end
126
+
127
+ def _weight_of_zhixin(serp_result,side_rank)
128
+ result = []
129
+ serp_result[:zhixin].each.with_index do |zhixin,i|
130
+ side_rank += 1
131
+
132
+ url = zhixin[:mu].to_s
133
+ type = 'Special'
134
+ name = zhixin[:tpl]
135
+ site = Serper::Helper.parse_site(url)
136
+ subdomain = Serper::Helper.parse_subdomain(url)
137
+ weight = 1.0 * weight_config[:zhixin_weight]
138
+ path = Serper::Helper.parse_path(url)
139
+
140
+ result << {type: type, name: name, site: site, subdomain: subdomain, path: path, side_rank: side_rank, part_rank: i+1, side_weight: weight}
141
+ end
142
+ [result, side_rank]
143
+ end
144
+ end
@@ -0,0 +1,84 @@
1
+ require 'httparty'
2
+
3
+ module Serper
4
+ class Crawler
5
+ AllUserAgents = YAML.load(open(File.expand_path('../user_agents.yml',__FILE__)))
6
+
7
+ def self.rand_ua
8
+ AllUserAgents[rand(AllUserAgents.size)]
9
+ end
10
+
11
+ include HTTParty
12
+ base_uri 'www.baidu.com'
13
+ follow_redirects false
14
+ headers "User-Agent" => self.rand_ua, "Referer" => 'http://www.baidu.com/'
15
+
16
+ def self.get_serp(url,retries = 3)
17
+ self.new.get_serp(url,retries)
18
+ end
19
+
20
+ def self.get_rank_url(url)
21
+ self.new.get_rank_url(url)
22
+ end
23
+
24
+ def get_rank_url(url)
25
+ begin
26
+ response = self.class.get(url)
27
+ rescue StandardError => e
28
+ puts e.class
29
+ puts e.message
30
+ sleep(10)
31
+ retry
32
+ end
33
+ response
34
+ end
35
+
36
+ def get_serp(url, retries = 3)
37
+ if retries > 0
38
+ begin
39
+ response = self.class.get(url)
40
+ rescue StandardError => e
41
+ puts e.class
42
+ puts e.message
43
+ sleep(10)
44
+ retry
45
+ end
46
+
47
+ if response.code != 200
48
+ puts response
49
+ puts "Retry on URL: #{url}"
50
+ sleep(rand(60)+1200)
51
+ response = self.class.get_serp(url,retries - 1)
52
+ end
53
+
54
+ if response.nil?
55
+ puts "Still error after 3 tries, sleep 3600s now."
56
+ sleep(3600)
57
+ response = self.class.get_serp(url)
58
+ end
59
+
60
+ ##Baidu Stopped response Content-Length in headers...
61
+ #if response.headers['Content-Length'].nil?
62
+ # puts "Can't read Content-Length from response, retry."
63
+ # response = self.class.get_serp(url,retries-1)
64
+ #end
65
+ #
66
+ #if response.headers['Content-Length'].to_i != response.body.bytesize
67
+ # issue_file = "/tmp/serper_crawler_issue_#{Time.now.strftime("%Y%m%d%H%M%S")}.html"
68
+ # open(issue_file,'w').puts(response.body)
69
+ # puts "Notice:"
70
+ # puts "Serper get an error when crawl SERP: response size (#{response.headers['Content-Length']}) not match body size."
71
+ # puts "Please see file #{issue_file} for body content."
72
+ # puts "Sleep 10s and retry"
73
+ # sleep(10)
74
+ # response = self.class.get_serp(url)
75
+ #end
76
+
77
+ response
78
+ else
79
+ nil
80
+ end
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,79 @@
1
+ require 'domainatrix'
2
+
3
+ module Serper
4
+ module Helper
5
+ class << self
6
+ # get content safe from nokogiri search reasult
7
+ def get_content_safe(noko)
8
+ return nil if noko.nil?
9
+ return nil if noko.empty?
10
+ noko.first.content.strip
11
+ end
12
+
13
+ # parse data click value from baidu div property,
14
+ # which is a JSON like format
15
+ def parse_data_click(str)
16
+ JSON.parse(str
17
+ .gsub("'",'"')
18
+ .gsub(/({|,)([a-zA-Z0-9_]+):/, '\1"\2":')
19
+ #.gsub(/'*([a-zA-Z0-9_]+)'*:/, '"\1":')
20
+ #.gsub(/:'([^(',\")]*)'(,|})/,':"\1"\2')
21
+ )
22
+ end
23
+
24
+ # normalize weight of given data,
25
+ # the data must be a hash array structure.
26
+ # for example : [{a: 1, b: 2}, {a: 2, b: 3}]
27
+ def normalize(data,weight_col=:weight,normalized_col=:normalized_weight)
28
+ total_weight = data.reduce(0.0) {|sum,d| sum += d[weight_col].to_f}
29
+ data.each do|d|
30
+ d[normalized_col] = d[weight_col].to_f/total_weight
31
+ end
32
+ data
33
+ end
34
+
35
+ def parse_site(url)
36
+ begin
37
+ url = Domainatrix.parse(url.to_s)
38
+ site = url.domain + '.' + url.public_suffix
39
+ rescue Exception => e
40
+ puts "parse_site from url error:"
41
+ puts url
42
+ puts e.class
43
+ puts e.message
44
+ site = ''
45
+ end
46
+ site
47
+ end
48
+
49
+ def parse_subdomain(url)
50
+ begin
51
+ url = Domainatrix.parse(url.to_s)
52
+ subdomain = url.subdomain
53
+ rescue Exception => e
54
+ puts "parse_site from url error:"
55
+ puts url
56
+ puts e.class
57
+ puts e.message
58
+ subdomain = ''
59
+ end
60
+ subdomain
61
+ end
62
+
63
+ def parse_path(url)
64
+ begin
65
+ url = Domainatrix.parse(url.to_s)
66
+ path = url.path
67
+ rescue Exception => e
68
+ puts "parse_site from url error:"
69
+ puts url
70
+ puts e.class
71
+ puts e.message
72
+ path = ''
73
+ end
74
+ path
75
+ end
76
+
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,77 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'nokogiri'
3
+ require 'uri'
4
+ require 'json'
5
+ require 'serper/crawler'
6
+ require 'serper/helper'
7
+
8
+ module Serper
9
+ class Parser
10
+ attr_reader :engine_name, :keyword, :page, :html, :doc, :result
11
+
12
+ def initialize(engine_name,keyword,page=1)
13
+ @engine_name = engine_name
14
+ @engine = ENGINES[@engine_name].new
15
+ @keyword = keyword
16
+ @page = page
17
+ end
18
+
19
+ def serp_url
20
+ @engine.serp_url(@keyword,@page)
21
+ end
22
+
23
+ def search
24
+ html = Crawler.get_serp(serp_url).body
25
+ parse html
26
+ end
27
+
28
+ def parse(html)
29
+ html = html.encode!('UTF-8','UTF-8',:invalid => :replace)
30
+ @file = Hash.new
31
+ @result = Hash.new
32
+
33
+ @file[:html] = html
34
+ @file[:doc] = Nokogiri::HTML(html)
35
+
36
+ @engine.methods.each do |m|
37
+ next unless m =~ /^_parse_/
38
+ begin
39
+ @result[m.to_s.sub('_parse_','').to_sym] = @engine.send m,@file
40
+ rescue Exception => e
41
+ issue_file = "/tmp/serper_issue_#{Time.now.strftime("%Y%m%d%H%M%S")}.html"
42
+ open(issue_file,'w').puts(html)
43
+ puts "Notice:"
44
+ puts "Serper gem have a bug, please email to zmingqian@qq.com to report it."
45
+ puts "Please attach file #{issue_file} in the email and the error information below, thanks!"
46
+ puts e.message
47
+ puts e.inspect
48
+ puts e.backtrace
49
+ raise "Serper Parser Get An Error!"
50
+ end
51
+ end
52
+
53
+ @result
54
+ end
55
+
56
+
57
+ def weights
58
+ result = []
59
+ [:left,:right].each do |side|
60
+ side_rank = 0
61
+
62
+ @engine.weight_config["#{side}_parts".to_sym].each do |part|
63
+ rs,side_rank = @engine.send("_weight_of_#{part}",@result,side_rank)
64
+
65
+ rs.each do |r|
66
+ r[:side] = side.to_s
67
+ r[:part] = part
68
+
69
+ r[:weight] = r[:weight].to_f * @engine.weight_config["#{side}_part_weight".to_sym].to_f
70
+ result << r
71
+ end
72
+ end
73
+ end
74
+ Serper::Helper.normalize(result,:side_weight,:weight)
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,183 @@
1
+ ---
2
+ - 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/527 (KHTML, like Gecko,
3
+ Safari/419.3) Arora/0.6 (Change: )'
4
+ - Mozilla/5.0 (Windows; U; ; en-NZ) AppleWebKit/527 (KHTML, like Gecko, Safari/419.3)
5
+ Arora/0.8.0
6
+ - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser; Avant Browser;
7
+ .NET CLR 1.0.3705; .NET CLR 1.1.4322; Media Center PC 4.0; .NET CLR 2.0.50727; .NET
8
+ CLR 3.0.04506.30)
9
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.8 (KHTML, like Gecko) Beamrise/17.2.0.9
10
+ Chrome/17.0.939.0 Safari/535.8
11
+ - Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/18.6.872.0
12
+ Safari/535.2 UNTRUSTED/1.0 3gpp-gba UNTRUSTED/1.0
13
+ - Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1
14
+ Safari/536.3
15
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0
16
+ Safari/536.6
17
+ - Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0
18
+ Safari/536.6
19
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1
20
+ Safari/537.1
21
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/28.0.1469.0
22
+ Safari/537.36
23
+ - Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/28.0.1469.0
24
+ Safari/537.36
25
+ - Mozilla/5.0 (Windows NT 6.1; rv:12.0) Gecko/20120403211507 Firefox/12.0
26
+ - Mozilla/5.0 (Windows NT 6.0; rv:14.0) Gecko/20100101 Firefox/14.0.1
27
+ - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:15.0) Gecko/20120427 Firefox/15.0a1
28
+ - Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/16.0 Firefox/16.0
29
+ - Mozilla/5.0 (Windows NT 6.2; rv:19.0) Gecko/20121129 Firefox/19.0
30
+ - Mozilla/5.0 (Windows NT 6.2; rv:20.0) Gecko/20121202 Firefox/20.0
31
+ - Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0
32
+ - Mozilla/5.0 (compatible; Konqueror/4.5; Windows) KHTML/4.5.4 (like Gecko)
33
+ - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR
34
+ 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Maxthon
35
+ 2.0)
36
+ - Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.1 (KHTML, like Gecko)
37
+ Maxthon/3.0.8.2 Safari/533.1
38
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML like Gecko) Maxthon/4.0.0.2000
39
+ Chrome/22.0.1229.79 Safari/537.1
40
+ - Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)
41
+ - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)
42
+ - Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727;
43
+ .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)
44
+ - Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)
45
+ - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/4.0)
46
+ - Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)
47
+ - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)
48
+ - Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)
49
+ - Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.2; Trident/5.0)
50
+ - Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.2; WOW64; Trident/5.0)
51
+ - Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media
52
+ Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7)
53
+ - Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)
54
+ - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/6.0)
55
+ - Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1;
56
+ .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0
57
+ - Opera/9.25 (Windows NT 6.0; U; en)
58
+ - Opera/9.80 (Windows NT 5.2; U; en) Presto/2.2.15 Version/10.10
59
+ - Opera/9.80 (Windows NT 5.1; U; ru) Presto/2.7.39 Version/11.00
60
+ - Opera/9.80 (Windows NT 6.1; U; en) Presto/2.7.62 Version/11.01
61
+ - Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10
62
+ - Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00
63
+ - Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14
64
+ - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.21.8 (KHTML, like
65
+ Gecko) Version/4.0.4 Safari/531.21.10
66
+ - Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/533.17.8 (KHTML, like
67
+ Gecko) Version/5.0.1 Safari/533.17.8
68
+ - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.19.4 (KHTML, like
69
+ Gecko) Version/5.0.2 Safari/533.18.5
70
+ - Mozilla/5.0 (Windows; U; Windows NT 6.2; es-US ) AppleWebKit/540.0 (KHTML like Gecko)
71
+ Version/6.0 Safari/8900.00
72
+ - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.1.17) Gecko/20110123 (like
73
+ Firefox/3.x) SeaMonkey/2.0.12
74
+ - Mozilla/5.0 (Windows NT 5.2; rv:10.0.1) Gecko/20100101 Firefox/10.0.1 SeaMonkey/2.7.1
75
+ - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20120422 Firefox/12.0 SeaMonkey/2.9
76
+ - Avant Browser/1.2.789rel1 (http://www.avantbrowser.com)
77
+ - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko)
78
+ Chrome/4.0.249.0 Safari/532.5
79
+ - Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.9 (KHTML, like Gecko)
80
+ Chrome/5.0.310.0 Safari/532.9
81
+ - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko)
82
+ Chrome/7.0.514.0 Safari/534.7
83
+ - Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.14 (KHTML, like
84
+ Gecko) Chrome/9.0.601.0 Safari/534.14
85
+ - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.14 (KHTML, like
86
+ Gecko) Chrome/10.0.601.0 Safari/534.14
87
+ - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.20 (KHTML, like
88
+ Gecko) Chrome/11.0.672.2 Safari/534.20
89
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.27 (KHTML, like Gecko) Chrome/12.0.712.0
90
+ Safari/534.27
91
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24
92
+ Safari/535.1
93
+ - Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120
94
+ Safari/535.2
95
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36
96
+ Safari/535.7
97
+ - Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421
98
+ Minefield/3.0.2pre
99
+ - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10
100
+ - Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11
101
+ (.NET CLR 3.5.30729)
102
+ - Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6
103
+ GTB5
104
+ - Mozilla/5.0 (Windows; U; Windows NT 5.1; tr; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8
105
+ ( .NET CLR 3.5.30729; .NET4.0E)
106
+ - Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1
107
+ - Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1
108
+ - Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0
109
+ - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0a2) Gecko/20110622 Firefox/6.0a2
110
+ - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1
111
+ - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
112
+ - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0b4pre) Gecko/20100815 Minefield/4.0b4pre
113
+ - Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0 )
114
+ - Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; Win 9x 4.90)
115
+ - Mozilla/5.0 (Windows; U; Windows XP) Gecko MultiZilla/1.6.1.0a
116
+ - Mozilla/2.02E (Win95; U)
117
+ - Mozilla/3.01Gold (Win95; I)
118
+ - Mozilla/4.8 [en] (Windows NT 5.1; U)
119
+ - Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.4) Gecko Netscape/7.1 (ax)
120
+ - Opera/7.50 (Windows XP; U)
121
+ - Opera/7.50 (Windows ME; U) [en]
122
+ - Opera/7.51 (Windows NT 5.1; U) [en]
123
+ - Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; en) Opera 8.0
124
+ - Mozilla/5.0 (Windows; U; WinNT4.0; en-US; rv:1.2b) Gecko/20021001 Phoenix/0.2
125
+ - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.23) Gecko/20090825 SeaMonkey/1.1.18
126
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1
127
+ Camino/2.2.1
128
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0b6pre) Gecko/20100907 Firefox/4.0b6pre
129
+ Camino/2.2a1pre
130
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko)
131
+ Chrome/19.0.1063.0 Safari/536.3
132
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.4 (KHTML like Gecko)
133
+ Chrome/22.0.1229.79 Safari/537.4
134
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.31 (KHTML like Gecko)
135
+ Chrome/26.0.1410.63 Safari/537.31
136
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 1083) AppleWebKit/537.36 (KHTML like Gecko)
137
+ Chrome/28.0.1469.0 Safari/537.36
138
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
139
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:16.0) Gecko/20120813 Firefox/16.0
140
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:20.0) Gecko/20100101 Firefox/20.0
141
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0
142
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US) AppleWebKit/528.16 (KHTML, like
143
+ Gecko, Safari/528.16) OmniWeb/v622.8.0.112941
144
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/528.16 (KHTML,
145
+ like Gecko, Safari/528.16) OmniWeb/v622.8.0
146
+ - Opera/9.20 (Macintosh; Intel Mac OS X; U; en)
147
+ - Opera/9.80 (Macintosh; Intel Mac OS X; U; en) Presto/2.6.30 Version/10.61
148
+ - Opera/9.80 (Macintosh; Intel Mac OS X 10.4.11; U; en) Presto/2.7.62 Version/11.00
149
+ - Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52
150
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; en-us) AppleWebKit/531.21.8 (KHTML,
151
+ like Gecko) Version/4.0.4 Safari/531.21.10
152
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; de-de) AppleWebKit/534.15 (KHTML,
153
+ like Gecko) Version/5.0.3 Safari/533.19.4
154
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-us) AppleWebKit/533.20.25 (KHTML,
155
+ like Gecko) Version/5.0.4 Safari/533.20.27
156
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_7; en-us) AppleWebKit/534.20.8 (KHTML,
157
+ like Gecko) Version/5.1 Safari/534.20.8
158
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like
159
+ Gecko) Version/5.1.3 Safari/534.53.10
160
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/536.26.17 (KHTML like
161
+ Gecko) Version/6.0.2 Safari/536.26.17
162
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.5; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
163
+ SeaMonkey/2.7.1
164
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.8 (KHTML,
165
+ like Gecko) Chrome/4.0.302.2 Safari/532.8
166
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML,
167
+ like Gecko) Chrome/6.0.464.0 Safari/534.3
168
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.13 (KHTML,
169
+ like Gecko) Chrome/9.0.597.15 Safari/534.13
170
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.1 (KHTML, like Gecko)
171
+ Chrome/14.0.835.186 Safari/535.1
172
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko)
173
+ Chrome/15.0.874.54 Safari/535.2
174
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko)
175
+ Chrome/16.0.912.36 Safari/535.7
176
+ - 'Mozilla/5.0 (Macintosh; U; Mac OS X Mach-O; en-US; rv:2.0a) Gecko/20040614 Firefox/3.0.0 '
177
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1) Gecko/20090624
178
+ Firefox/3.5
179
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.14) Gecko/20110218
180
+ AlexaToolbar/alxf-2.0 Firefox/3.6.14
181
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1
182
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:5.0) Gecko/20100101 Firefox/5.0
183
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0) Gecko/20100101 Firefox/9.0
@@ -0,0 +1,3 @@
1
+ module Serper
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: serper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - MingQian Zhang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-07-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: httparty
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: domainatrix
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: activerecord
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: docopt
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: ruby-progressbar
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Parse SERP result page.
98
+ email:
99
+ - zmingqian@qq.com
100
+ executables:
101
+ - serper
102
+ extensions: []
103
+ extra_rdoc_files: []
104
+ files:
105
+ - lib/serper/analyser.rb
106
+ - lib/serper/baidu/crawler.rb
107
+ - lib/serper/baidu/parser.rb
108
+ - lib/serper/baidu/weight.rb
109
+ - lib/serper/crawler.rb
110
+ - lib/serper/helper.rb
111
+ - lib/serper/parser.rb
112
+ - lib/serper/version.rb
113
+ - lib/serper.rb
114
+ - bin/serper
115
+ - README.md
116
+ - lib/serper/user_agents.yml
117
+ homepage: https://github.com/semseo/serper
118
+ licenses:
119
+ - MIT
120
+ metadata: {}
121
+ post_install_message:
122
+ rdoc_options: []
123
+ require_paths:
124
+ - lib
125
+ required_ruby_version: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - '>='
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ required_rubygems_version: !ruby/object:Gem::Requirement
131
+ requirements:
132
+ - - '>='
133
+ - !ruby/object:Gem::Version
134
+ version: '0'
135
+ requirements: []
136
+ rubyforge_project:
137
+ rubygems_version: 2.0.0
138
+ signing_key:
139
+ specification_version: 4
140
+ summary: SERP
141
+ test_files: []