serper 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 149badc447dec8ed55714a9ed6768e3f85a9b94e
4
+ data.tar.gz: 8204acacee0b068b61421c0bc59507ea30717895
5
+ SHA512:
6
+ metadata.gz: d329ccb1dbf584a4a100bf945740a5ff9ffc391a79f34aa598776cc5db901fdefdf0e04e26920060784c302fd9eb935dcf0a2a623a0acdda8b76bdc61c4235b2
7
+ data.tar.gz: 36a41f00ff396bde0e3c18587bd58359300591572ff346135f340520b16145272c622f228709d0fe72d770a2174f5bf48cc6c9e98e75fc3b22eef78e8ea14f41
@@ -0,0 +1 @@
1
+ 解析百度的搜索结果页面, 并返回结构化数据以进行后续分析.
@@ -0,0 +1,59 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'serper'
4
+ require 'optparse'
5
+ require 'json'
6
+ require 'pp'
7
+ require 'docopt'
8
+
9
+ cmd = File.basename(__FILE__)
10
+
11
+ doc = <<DOCOPT
12
+ 1. serper -s 'keyword' # search 'keyword' and print parse result
13
+ 2. serper -s 'keyword' -o output.json # -o means save result to a file
14
+ 3. serper -f 'file path' # parse html source code from file
15
+ 4. serper -s 'keyword' -j # search 'keyword' and print parse result in JSON format
16
+
17
+ Usage:
18
+ #{cmd} [options]
19
+
20
+ Options:
21
+ -h --help show this help message and exit
22
+ -v --version show version and exit
23
+ -a --analyse Name analyse as the given name
24
+ --keywords File uses with -a, import give keywords File before search
25
+ -s --search Keyword search Keyword and show result
26
+ -f --file File parse local file or given url
27
+ -j --json print JSON output
28
+ -o --output File output JSON result to File
29
+
30
+ DOCOPT
31
+
32
+ begin
33
+ options = Docopt::docopt(doc, version: Serper::VERSION)
34
+ # pp options
35
+ rescue Docopt::Exit => e
36
+ puts e.message
37
+ end
38
+
39
+ result = ''
40
+ if options['--analyse']
41
+ analyse = Serper.analyse(options['--analyse'])
42
+ analyse.import_keywords(options('--keywords'))
43
+ analyse.search
44
+ result = 'Analyse finished!'
45
+ elsif options['--search']
46
+ result = Serper.search options['--search']
47
+ elsif options['--file']
48
+ result = Serper.parse_file options['--file']
49
+ else
50
+ puts "At least given one of -a/-s/-f"
51
+ end
52
+
53
+ if options['--json']
54
+ puts result.to_json
55
+ else
56
+ pp result
57
+ end
58
+
59
+ open(options['--output'],'w').puts result.to_json if options['--output']
@@ -0,0 +1,26 @@
1
+ require "serper/version"
2
+ require "serper/parser"
3
+ require "serper/analyser"
4
+
5
+ [:baidu].each do |engine_name|
6
+ %w{crawler parser weight}.each do |part|
7
+ require File.expand_path("../serper/#{engine_name}/#{part}.rb",__FILE__)
8
+ end
9
+ end
10
+
11
+ module Serper
12
+ ENGINES = {
13
+ :baidu => Baidu
14
+ }
15
+
16
+ def self.search(engine_name,keyword,page=1)
17
+ serp = Parser.new(engine_name,keyword,page)
18
+ serp.search
19
+ serp
20
+ end
21
+
22
+ def self.analyse(connection)
23
+ Analyser.new(connection)
24
+ end
25
+ end
26
+
@@ -0,0 +1,112 @@
1
+ require 'active_record'
2
+ require 'csv'
3
+ require 'date'
4
+ require 'yaml'
5
+ require 'ruby-progressbar'
6
+
7
+ module Serper
8
+ class Analyser
9
+ def initialize(connection)
10
+ ActiveRecord::Base.establish_connection(connection)
11
+ end
12
+
13
+ def import_keywords(file)
14
+ CSV.foreach(file) do |l|
15
+ Keyword.find_or_create_by(:term => l[0]) do |r|
16
+ r.pv = l[1]
17
+ r.category = l[2]
18
+ r.url_type = l[3]
19
+ r.url_id = l[4]
20
+ end
21
+ end
22
+ end
23
+
24
+ def run(date=Date.today,skip=true)
25
+ puts "Serper Analyser on #{date}"
26
+ ENGINES.keys.each do |engine_name|
27
+ puts engine_name
28
+ search_engine(engine_name,date,skip)
29
+ end
30
+ end
31
+
32
+ def search_engine(engine_name,date,skip=true)
33
+ p = ProgressBar.create(:title => "Searching #{engine_name} - #{date}", :total => Keyword.all.count, :format => '%t (%c/%C) %a %E |%w')
34
+ Keyword.all.each do |k|
35
+ check_exists = Weight.where(:engine => engine_name, :date => date, :keyword_id => k.id)
36
+ if check_exists.any?
37
+ if skip
38
+ next
39
+ else
40
+ check_exists.destroy_all
41
+ end
42
+ end
43
+
44
+ serp = Serper.search(engine_name,k.term)
45
+ serp.weights.each do |w|
46
+ Weight.create(:date => date,
47
+ :keyword_id => k.id,
48
+ :engine => engine_name,
49
+ :side => w[:side],
50
+ :part => w[:part],
51
+ :source => w[:type],
52
+ :name => w[:name],
53
+ :site => w[:site],
54
+ :subdomain => w[:subdomain],
55
+ :path => w[:path],
56
+ :part_rank => w[:part_rank],
57
+ :side_rank => w[:side_rank],
58
+ :side_weight => w[:side_weight],
59
+ :weight => w[:weight]
60
+ )
61
+ end
62
+
63
+ p.increment
64
+ end
65
+ end
66
+
67
+ def migrate!
68
+ ActiveRecord::Schema.define do
69
+ create_table :serper_keywords do |t|
70
+ t.string :term
71
+ t.integer :pv
72
+ t.string :category
73
+ t.string :url_type
74
+ t.integer :url_id
75
+
76
+ t.timestamps
77
+
78
+ t.index :term
79
+ end
80
+
81
+ create_table :serper_weights do |t|
82
+ t.date :date
83
+ t.string :engine
84
+ t.integer :keyword_id
85
+ t.string :side # Left Right
86
+ t.string :part
87
+ t.string :source # SEO SEM Special
88
+ t.string :name
89
+ t.string :site
90
+ t.string :subdomain
91
+ t.string :path
92
+ t.integer :part_rank
93
+ t.integer :side_rank
94
+ t.float :side_weight
95
+ t.float :weight
96
+
97
+ t.timestamps
98
+
99
+ t.index [:date, :engine, :keyword_id, :side, :side_rank], name: 'weights_pk_index'
100
+ end
101
+ end
102
+ end
103
+
104
+ class Keyword < ActiveRecord::Base
105
+ self.table_name = 'serper_keywords'
106
+ end
107
+
108
+ class Weight < ActiveRecord::Base
109
+ self.table_name = 'serper_weights'
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,7 @@
1
+ class Serper::Baidu
2
+ def serp_url(keyword,page)
3
+ keyword = keyword.gsub(" ","+")
4
+ page = page.to_i > 1 ? "&pn=#{page.to_i-1}0" : ''
5
+ URI.escape("http://www.baidu.com/s?wd=#{keyword}#{page}&ie=utf-8&inputT=#{1000+rand(1000)}")
6
+ end
7
+ end
@@ -0,0 +1,185 @@
1
+ class Serper::Baidu
2
+ def _parse_ads_right(file)
3
+ result = []
4
+ rank = 0
5
+
6
+ file[:doc].search('div#ec_im_container span a.c-icon.efc-cert').each do |div|
7
+ rank += 1
8
+ url = Addressable::URI.parse(Serper::Helper.parse_data_click(div['data-renzheng'])['identity']['a']['url']).query_values['wd'].to_s.sub('@v','') rescue ''
9
+ result << {url: url, rank: rank}
10
+ end
11
+ result
12
+ end
13
+
14
+ def _parse_ads_top(file)
15
+ result = []
16
+ rank = 0
17
+
18
+ file[:doc].search('div#content_left').first.children.each do |div|
19
+ break if div['id'].to_i > 0
20
+ div.search('span a.c-icon.efc-cert').each do |div|
21
+ rank += 1
22
+ url = Addressable::URI.parse(Serper::Helper.parse_data_click(div['data-renzheng'])['identity']['a']['url']).query_values['wd'].to_s.sub('@v', '') rescue ''
23
+ result << {url: url, rank: rank}
24
+ end
25
+ end
26
+ result
27
+ end
28
+
29
+ def _parse_con_ar(file)
30
+ result = []
31
+ divs = file[:doc].search("div#content_right div#con-ar").first
32
+ return [] if divs.nil?
33
+ divs.children.each do |div|
34
+ next unless div['class'].to_s.include?('result-op')
35
+ result << {:tpl => div['tpl'],
36
+ :data_click => Serper::Helper.parse_data_click(div['data-click'])
37
+ }
38
+ end
39
+ result
40
+ end
41
+
42
+ # def _parse_pinpaizhuanqu(file)
43
+ # part = file[:doc].search("div[@id='content_left']").first
44
+ # return false if part.nil?
45
+ #
46
+ # part.children[2].name == 'script'
47
+ # end
48
+
49
+ def _parse_ranks(file)
50
+ result = []
51
+ part = file[:doc].search("div[@id='content_left']").first
52
+ return result if part.nil?
53
+
54
+ part.children.each do |table|
55
+ next if table.nil?
56
+ id = table['id'].to_i
57
+ next unless id > 0 && id < 3000
58
+
59
+ r = {:rank => id}
60
+
61
+ r[:result_op] = table['class'].to_s.include?('result-op')
62
+
63
+ r[:fk] = table['fk']
64
+
65
+ r[:srcid] = table['srcid']
66
+
67
+ r[:tpl] = table['tpl']
68
+
69
+ r[:mu] = table['mu']
70
+
71
+ url = table.search('h3/a').first
72
+ unless url.nil?
73
+ url = url['href']
74
+ sleep(rand)
75
+ url = Serper::Crawler.get_rank_url('http:'+url).headers['location'] if url.include?('//www.baidu.com/link?')
76
+ end
77
+ r[:url] = url
78
+
79
+ r[:title] = Serper::Helper.get_content_safe(table.search('h3'))
80
+
81
+ r[:content] = Serper::Helper.get_content_safe(table.search('div.c-abstract'))
82
+
83
+ table.search('a').each do |link|
84
+ r[:baiduopen] = true if link['href'].to_s.include?('open.baidu.com')
85
+ end
86
+ r[:baiduopen] = false if r[:baiduopen].nil?
87
+
88
+ result << r
89
+ end
90
+ result
91
+ end
92
+
93
+ # def _parse_related_keywords(file)
94
+ # result = []
95
+ # file[:doc].search('div[@id="rs"]').each do |rs|
96
+ # rs.css('a').each do |link|
97
+ # result << link.content
98
+ # end
99
+ # end
100
+ # result
101
+ # end
102
+
103
+ # def _parse_result_num(file)
104
+ # html = file[:html]
105
+ # str = html.scan(/百度为您找到相关结果(.*)个/).join
106
+ # str = str.gsub('约','')
107
+ # if str.include?('万')
108
+ # parts = str.split('万')
109
+ # result = parts[0].to_i * 10000 + parts[1].to_i
110
+ # else
111
+ # result = str.gsub(',', '').to_i
112
+ # end
113
+ #
114
+ # result
115
+ # end
116
+
117
+ # def _parse_right_hotel(file)
118
+ # rh = file[:doc].search('div[@tpl="right_hotel"]')
119
+ # return nil if rh.nil?
120
+ #
121
+ # rh = rh.first
122
+ # return nil if rh.nil?
123
+ # title = Serper::Helper.get_content_safe(rh.search('div.opr-hotel-title'))
124
+ #
125
+ # {:title => title}
126
+ # end
127
+
128
+ # def _parse_right_personinfo(file)
129
+ # rp = file[:doc].search('div[@tpl="right_personinfo"]')
130
+ # return nil if rp.nil?
131
+ #
132
+ # title = Serper::Helper.get_content_safe rp.search('span.opr-personinfo-subtitle-large')
133
+ # info_summary = Serper::Helper.get_content_safe rp.search('div.opr-personinfo-summary')
134
+ # info = Serper::Helper.get_content_safe rp.search('div.opr-personinfo-info')
135
+ # source = Serper::Helper.get_content_safe rp.search('div.opr-personinfo-source a')
136
+ #
137
+ # return nil if title.nil? && info.nil? && source.nil?
138
+ # {:title => title, :info_summary => info_summary, :info => info, :source => source}
139
+ # end
140
+
141
+ # def _parse_right_relaperson(file)
142
+ # relapersons = file[:doc].search('div[@tpl="right_relaperson"]')
143
+ # return nil if relapersons.nil?
144
+ #
145
+ # result = []
146
+ # relapersons.each do |rr|
147
+ # title = rr.search('div.cr-title/span').first
148
+ # title = title.content unless title.nil?
149
+ # r = []
150
+ # rr.search('p.opr-relaperson-name/a').each do |p|
151
+ # r << p['title']
152
+ # end
153
+ # result << {:title => title, :names => r}
154
+ # end
155
+ # result
156
+ # end
157
+
158
+ # def _parse_right_weather(file)
159
+ # rw = file[:doc].search('div[@tpl="right_weather"]')
160
+ # return nil if rw.nil?
161
+ #
162
+ # rw = rw.first
163
+ # return nil if rw.nil?
164
+ #
165
+ # title = Serper::Helper.get_content_safe(rw.search('div.opr-weather-title'))
166
+ # week = rw.search('a.opr-weather-week').first['href']
167
+ #
168
+ # {:title => title, :week => week}
169
+ # end
170
+
171
+ def _parse_zhixin(file)
172
+ result = []
173
+ file[:doc].search("div#content_left .result-zxl").each do |zxl|
174
+ result << {:id => zxl['id'],
175
+ :srcid => zxl['srcid'],
176
+ :fk => zxl['fk'],
177
+ :tpl => zxl['tpl'],
178
+ :mu => zxl['mu'],
179
+ :data_click => Serper::Helper.parse_data_click(zxl['data-click'])
180
+ }
181
+ end
182
+ result
183
+ end
184
+
185
+ end
@@ -0,0 +1,144 @@
1
+ class Serper::Baidu
2
+ def weight_config
3
+ {
4
+ :left_parts => [:ads_top,
5
+ :zhixin,
6
+ :ranks
7
+ ],
8
+
9
+ :right_parts => [:con_ar,
10
+ :ads_right
11
+ ],
12
+
13
+ :left_part_weight => 8,
14
+
15
+ :right_part_weight => 2,
16
+
17
+ :zhixin_weight => 3.5,
18
+
19
+ :baiduopen_weight => 3,
20
+
21
+ :rank_special_weight => 2,
22
+
23
+ :con_ar_weight => 2
24
+ }
25
+ end
26
+
27
+ # _weight_of_*** functions
28
+ # return a hash array
29
+ # each hash includes: type, name, site, weight
30
+
31
+ def _weight_of_ranks(serp_result,side_rank)
32
+ result = []
33
+ serp_result[:ranks].each.with_index do |rank,i|
34
+ side_rank += 1
35
+
36
+ url = rank[:url].to_s
37
+ mu = rank[:mu].to_s
38
+
39
+ type = 'SEO'
40
+ type = 'Special' if rank[:baiduopen]
41
+
42
+ unless mu.empty?
43
+ url = mu
44
+ type = 'Special'
45
+ end
46
+
47
+ site = Serper::Helper.parse_site(url)
48
+ subdomain = Serper::Helper.parse_subdomain(url)
49
+ path = Serper::Helper.parse_path(url)
50
+
51
+ name = rank[:tpl].to_s
52
+
53
+ weight = 1.0/side_rank.to_f
54
+ if type == 'Special'
55
+ if rank[:baiduopen]
56
+ weight = weight * weight_config[:baiduopen_weight].to_f
57
+ else
58
+ weight = weight * weight_config[:rank_special_weight].to_f
59
+ end
60
+ end
61
+
62
+ part_rank = rank[:rank]
63
+
64
+ result << {type: type, name: name, site: site, subdomain: subdomain, path: path, mu: mu, side_rank: side_rank, part_rank: part_rank, side_weight: weight}
65
+ end
66
+ [result, side_rank]
67
+ end
68
+
69
+ def _weight_of_ads_top(serp_result,side_rank)
70
+ result = []
71
+ serp_result[:ads_top].each.with_index do |ad,i|
72
+ side_rank += 1
73
+
74
+ url = ad[:url].to_s
75
+ type = 'SEM'
76
+ name = ''
77
+ site = Serper::Helper.parse_site(url)
78
+ subdomain = Serper::Helper.parse_subdomain(url)
79
+ path = Serper::Helper.parse_path(url)
80
+
81
+ part_rank = ad[:rank]
82
+
83
+ weight = 1.0/side_rank.to_f
84
+ result << {type: type, name: name, site: site, subdomain: subdomain, path: path, side_rank: side_rank, part_rank: part_rank, side_weight: weight}
85
+ end
86
+ [result, side_rank]
87
+ end
88
+
89
+ def _weight_of_ads_right(serp_result,side_rank)
90
+ result = []
91
+ serp_result[:ads_right].each.with_index do |ad,i|
92
+ side_rank += 1
93
+
94
+ url = ad[:url].to_s
95
+ type = 'SEM'
96
+ name = ''
97
+ site = Serper::Helper.parse_site(url)
98
+ subdomain = Serper::Helper.parse_subdomain(url)
99
+ path = Serper::Helper.parse_path(url)
100
+
101
+ part_rank = ad[:rank]
102
+
103
+ weight = 1.0/side_rank.to_f
104
+ result << {type: type, name: name, site: site, subdomain: subdomain, path: path, side_rank: side_rank, part_rank: part_rank, side_weight: weight}
105
+ end
106
+ [result, side_rank]
107
+ end
108
+
109
+ def _weight_of_con_ar(serp_result,side_rank)
110
+ result = []
111
+ serp_result[:con_ar].each.with_index do |con,i|
112
+ side_rank += 1
113
+
114
+ url = con[:data_click]['mu'].to_s
115
+ type = 'Special'
116
+ name = con[:tpl]
117
+ site = Serper::Helper.parse_site(url)
118
+ subdomain = Serper::Helper.parse_subdomain(url)
119
+ path = Serper::Helper.parse_path(url)
120
+
121
+ weight = 1.0 * weight_config[:con_ar_weight]
122
+ result << {type: type, name: name, site: site, subdomain: subdomain, path: path, side_rank: side_rank, part_rank: i+1, side_weight: weight}
123
+ end
124
+ [result, side_rank]
125
+ end
126
+
127
+ def _weight_of_zhixin(serp_result,side_rank)
128
+ result = []
129
+ serp_result[:zhixin].each.with_index do |zhixin,i|
130
+ side_rank += 1
131
+
132
+ url = zhixin[:mu].to_s
133
+ type = 'Special'
134
+ name = zhixin[:tpl]
135
+ site = Serper::Helper.parse_site(url)
136
+ subdomain = Serper::Helper.parse_subdomain(url)
137
+ weight = 1.0 * weight_config[:zhixin_weight]
138
+ path = Serper::Helper.parse_path(url)
139
+
140
+ result << {type: type, name: name, site: site, subdomain: subdomain, path: path, side_rank: side_rank, part_rank: i+1, side_weight: weight}
141
+ end
142
+ [result, side_rank]
143
+ end
144
+ end
@@ -0,0 +1,84 @@
1
+ require 'httparty'
2
+
3
+ module Serper
4
+ class Crawler
5
+ AllUserAgents = YAML.load(open(File.expand_path('../user_agents.yml',__FILE__)))
6
+
7
+ def self.rand_ua
8
+ AllUserAgents[rand(AllUserAgents.size)]
9
+ end
10
+
11
+ include HTTParty
12
+ base_uri 'www.baidu.com'
13
+ follow_redirects false
14
+ headers "User-Agent" => self.rand_ua, "Referer" => 'http://www.baidu.com/'
15
+
16
+ def self.get_serp(url,retries = 3)
17
+ self.new.get_serp(url,retries)
18
+ end
19
+
20
+ def self.get_rank_url(url)
21
+ self.new.get_rank_url(url)
22
+ end
23
+
24
+ def get_rank_url(url)
25
+ begin
26
+ response = self.class.get(url)
27
+ rescue StandardError => e
28
+ puts e.class
29
+ puts e.message
30
+ sleep(10)
31
+ retry
32
+ end
33
+ response
34
+ end
35
+
36
+ def get_serp(url, retries = 3)
37
+ if retries > 0
38
+ begin
39
+ response = self.class.get(url)
40
+ rescue StandardError => e
41
+ puts e.class
42
+ puts e.message
43
+ sleep(10)
44
+ retry
45
+ end
46
+
47
+ if response.code != 200
48
+ puts response
49
+ puts "Retry on URL: #{url}"
50
+ sleep(rand(60)+1200)
51
+ response = self.class.get_serp(url,retries - 1)
52
+ end
53
+
54
+ if response.nil?
55
+ puts "Still error after 3 tries, sleep 3600s now."
56
+ sleep(3600)
57
+ response = self.class.get_serp(url)
58
+ end
59
+
60
+ ##Baidu Stopped response Content-Length in headers...
61
+ #if response.headers['Content-Length'].nil?
62
+ # puts "Can't read Content-Length from response, retry."
63
+ # response = self.class.get_serp(url,retries-1)
64
+ #end
65
+ #
66
+ #if response.headers['Content-Length'].to_i != response.body.bytesize
67
+ # issue_file = "/tmp/serper_crawler_issue_#{Time.now.strftime("%Y%m%d%H%M%S")}.html"
68
+ # open(issue_file,'w').puts(response.body)
69
+ # puts "Notice:"
70
+ # puts "Serper get an error when crawl SERP: response size (#{response.headers['Content-Length']}) not match body size."
71
+ # puts "Please see file #{issue_file} for body content."
72
+ # puts "Sleep 10s and retry"
73
+ # sleep(10)
74
+ # response = self.class.get_serp(url)
75
+ #end
76
+
77
+ response
78
+ else
79
+ nil
80
+ end
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,79 @@
1
+ require 'domainatrix'
2
+
3
+ module Serper
4
+ module Helper
5
+ class << self
6
+ # get content safe from nokogiri search reasult
7
+ def get_content_safe(noko)
8
+ return nil if noko.nil?
9
+ return nil if noko.empty?
10
+ noko.first.content.strip
11
+ end
12
+
13
+ # parse data click value from baidu div property,
14
+ # which is a JSON like format
15
+ def parse_data_click(str)
16
+ JSON.parse(str
17
+ .gsub("'",'"')
18
+ .gsub(/({|,)([a-zA-Z0-9_]+):/, '\1"\2":')
19
+ #.gsub(/'*([a-zA-Z0-9_]+)'*:/, '"\1":')
20
+ #.gsub(/:'([^(',\")]*)'(,|})/,':"\1"\2')
21
+ )
22
+ end
23
+
24
+ # normalize weight of given data,
25
+ # the data must be a hash array structure.
26
+ # for example : [{a: 1, b: 2}, {a: 2, b: 3}]
27
+ def normalize(data,weight_col=:weight,normalized_col=:normalized_weight)
28
+ total_weight = data.reduce(0.0) {|sum,d| sum += d[weight_col].to_f}
29
+ data.each do|d|
30
+ d[normalized_col] = d[weight_col].to_f/total_weight
31
+ end
32
+ data
33
+ end
34
+
35
+ def parse_site(url)
36
+ begin
37
+ url = Domainatrix.parse(url.to_s)
38
+ site = url.domain + '.' + url.public_suffix
39
+ rescue Exception => e
40
+ puts "parse_site from url error:"
41
+ puts url
42
+ puts e.class
43
+ puts e.message
44
+ site = ''
45
+ end
46
+ site
47
+ end
48
+
49
+ def parse_subdomain(url)
50
+ begin
51
+ url = Domainatrix.parse(url.to_s)
52
+ subdomain = url.subdomain
53
+ rescue Exception => e
54
+ puts "parse_site from url error:"
55
+ puts url
56
+ puts e.class
57
+ puts e.message
58
+ subdomain = ''
59
+ end
60
+ subdomain
61
+ end
62
+
63
+ def parse_path(url)
64
+ begin
65
+ url = Domainatrix.parse(url.to_s)
66
+ path = url.path
67
+ rescue Exception => e
68
+ puts "parse_site from url error:"
69
+ puts url
70
+ puts e.class
71
+ puts e.message
72
+ path = ''
73
+ end
74
+ path
75
+ end
76
+
77
+ end
78
+ end
79
+ end
@@ -0,0 +1,77 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'nokogiri'
3
+ require 'uri'
4
+ require 'json'
5
+ require 'serper/crawler'
6
+ require 'serper/helper'
7
+
8
+ module Serper
9
+ class Parser
10
+ attr_reader :engine_name, :keyword, :page, :html, :doc, :result
11
+
12
+ def initialize(engine_name,keyword,page=1)
13
+ @engine_name = engine_name
14
+ @engine = ENGINES[@engine_name].new
15
+ @keyword = keyword
16
+ @page = page
17
+ end
18
+
19
+ def serp_url
20
+ @engine.serp_url(@keyword,@page)
21
+ end
22
+
23
+ def search
24
+ html = Crawler.get_serp(serp_url).body
25
+ parse html
26
+ end
27
+
28
+ def parse(html)
29
+ html = html.encode!('UTF-8','UTF-8',:invalid => :replace)
30
+ @file = Hash.new
31
+ @result = Hash.new
32
+
33
+ @file[:html] = html
34
+ @file[:doc] = Nokogiri::HTML(html)
35
+
36
+ @engine.methods.each do |m|
37
+ next unless m =~ /^_parse_/
38
+ begin
39
+ @result[m.to_s.sub('_parse_','').to_sym] = @engine.send m,@file
40
+ rescue Exception => e
41
+ issue_file = "/tmp/serper_issue_#{Time.now.strftime("%Y%m%d%H%M%S")}.html"
42
+ open(issue_file,'w').puts(html)
43
+ puts "Notice:"
44
+ puts "Serper gem have a bug, please email to zmingqian@qq.com to report it."
45
+ puts "Please attach file #{issue_file} in the email and the error information below, thanks!"
46
+ puts e.message
47
+ puts e.inspect
48
+ puts e.backtrace
49
+ raise "Serper Parser Get An Error!"
50
+ end
51
+ end
52
+
53
+ @result
54
+ end
55
+
56
+
57
+ def weights
58
+ result = []
59
+ [:left,:right].each do |side|
60
+ side_rank = 0
61
+
62
+ @engine.weight_config["#{side}_parts".to_sym].each do |part|
63
+ rs,side_rank = @engine.send("_weight_of_#{part}",@result,side_rank)
64
+
65
+ rs.each do |r|
66
+ r[:side] = side.to_s
67
+ r[:part] = part
68
+
69
+ r[:weight] = r[:weight].to_f * @engine.weight_config["#{side}_part_weight".to_sym].to_f
70
+ result << r
71
+ end
72
+ end
73
+ end
74
+ Serper::Helper.normalize(result,:side_weight,:weight)
75
+ end
76
+ end
77
+ end
@@ -0,0 +1,183 @@
1
+ ---
2
+ - 'Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/527 (KHTML, like Gecko,
3
+ Safari/419.3) Arora/0.6 (Change: )'
4
+ - Mozilla/5.0 (Windows; U; ; en-NZ) AppleWebKit/527 (KHTML, like Gecko, Safari/419.3)
5
+ Arora/0.8.0
6
+ - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1; Avant Browser; Avant Browser;
7
+ .NET CLR 1.0.3705; .NET CLR 1.1.4322; Media Center PC 4.0; .NET CLR 2.0.50727; .NET
8
+ CLR 3.0.04506.30)
9
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.8 (KHTML, like Gecko) Beamrise/17.2.0.9
10
+ Chrome/17.0.939.0 Safari/535.8
11
+ - Mozilla/5.0 (Windows NT 6.1) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/18.6.872.0
12
+ Safari/535.2 UNTRUSTED/1.0 3gpp-gba UNTRUSTED/1.0
13
+ - Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.3 (KHTML, like Gecko) Chrome/19.0.1061.1
14
+ Safari/536.3
15
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1092.0
16
+ Safari/536.6
17
+ - Mozilla/5.0 (Windows NT 6.2) AppleWebKit/536.6 (KHTML, like Gecko) Chrome/20.0.1090.0
18
+ Safari/536.6
19
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML, like Gecko) Chrome/22.0.1207.1
20
+ Safari/537.1
21
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/28.0.1469.0
22
+ Safari/537.36
23
+ - Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.36 (KHTML like Gecko) Chrome/28.0.1469.0
24
+ Safari/537.36
25
+ - Mozilla/5.0 (Windows NT 6.1; rv:12.0) Gecko/20120403211507 Firefox/12.0
26
+ - Mozilla/5.0 (Windows NT 6.0; rv:14.0) Gecko/20100101 Firefox/14.0.1
27
+ - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:15.0) Gecko/20120427 Firefox/15.0a1
28
+ - Mozilla/5.0 (Windows NT 6.2; Win64; x64; rv:16.0) Gecko/16.0 Firefox/16.0
29
+ - Mozilla/5.0 (Windows NT 6.2; rv:19.0) Gecko/20121129 Firefox/19.0
30
+ - Mozilla/5.0 (Windows NT 6.2; rv:20.0) Gecko/20121202 Firefox/20.0
31
+ - Mozilla/5.0 (Windows NT 6.1; rv:21.0) Gecko/20130401 Firefox/21.0
32
+ - Mozilla/5.0 (compatible; Konqueror/4.5; Windows) KHTML/4.5.4 (like Gecko)
33
+ - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR
34
+ 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; Maxthon
35
+ 2.0)
36
+ - Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/533.1 (KHTML, like Gecko)
37
+ Maxthon/3.0.8.2 Safari/533.1
38
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.1 (KHTML like Gecko) Maxthon/4.0.0.2000
39
+ Chrome/22.0.1229.79 Safari/537.1
40
+ - Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1)
41
+ - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)
42
+ - Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 2.0.50727;
43
+ .NET CLR 3.0.04506.648; .NET CLR 3.5.21022; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)
44
+ - Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0)
45
+ - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/4.0)
46
+ - Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.0; Trident/4.0)
47
+ - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0; Trident/5.0)
48
+ - Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)
49
+ - Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.2; Trident/5.0)
50
+ - Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.2; WOW64; Trident/5.0)
51
+ - Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; Media
52
+ Center PC 6.0; InfoPath.3; MS-RTC LM 8; Zune 4.7)
53
+ - Mozilla/5.0 (compatible; MSIE 10.0; Windows NT 6.1; WOW64; Trident/6.0)
54
+ - Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; Trident/6.0)
55
+ - Mozilla/5.0 (compatible; MSIE 10.6; Windows NT 6.1; Trident/5.0; InfoPath.2; SLCC1;
56
+ .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729; .NET CLR 2.0.50727) 3gpp-gba UNTRUSTED/1.0
57
+ - Opera/9.25 (Windows NT 6.0; U; en)
58
+ - Opera/9.80 (Windows NT 5.2; U; en) Presto/2.2.15 Version/10.10
59
+ - Opera/9.80 (Windows NT 5.1; U; ru) Presto/2.7.39 Version/11.00
60
+ - Opera/9.80 (Windows NT 6.1; U; en) Presto/2.7.62 Version/11.01
61
+ - Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10
62
+ - Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00
63
+ - Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14
64
+ - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/531.21.8 (KHTML, like
65
+ Gecko) Version/4.0.4 Safari/531.21.10
66
+ - Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/533.17.8 (KHTML, like
67
+ Gecko) Version/5.0.1 Safari/533.17.8
68
+ - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.19.4 (KHTML, like
69
+ Gecko) Version/5.0.2 Safari/533.18.5
70
+ - Mozilla/5.0 (Windows; U; Windows NT 6.2; es-US ) AppleWebKit/540.0 (KHTML like Gecko)
71
+ Version/6.0 Safari/8900.00
72
+ - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-GB; rv:1.9.1.17) Gecko/20110123 (like
73
+ Firefox/3.x) SeaMonkey/2.0.12
74
+ - Mozilla/5.0 (Windows NT 5.2; rv:10.0.1) Gecko/20100101 Firefox/10.0.1 SeaMonkey/2.7.1
75
+ - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:12.0) Gecko/20120422 Firefox/12.0 SeaMonkey/2.9
76
+ - Avant Browser/1.2.789rel1 (http://www.avantbrowser.com)
77
+ - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/532.5 (KHTML, like Gecko)
78
+ Chrome/4.0.249.0 Safari/532.5
79
+ - Mozilla/5.0 (Windows; U; Windows NT 5.2; en-US) AppleWebKit/532.9 (KHTML, like Gecko)
80
+ Chrome/5.0.310.0 Safari/532.9
81
+ - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US) AppleWebKit/534.7 (KHTML, like Gecko)
82
+ Chrome/7.0.514.0 Safari/534.7
83
+ - Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US) AppleWebKit/534.14 (KHTML, like
84
+ Gecko) Chrome/9.0.601.0 Safari/534.14
85
+ - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.14 (KHTML, like
86
+ Gecko) Chrome/10.0.601.0 Safari/534.14
87
+ - Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/534.20 (KHTML, like
88
+ Gecko) Chrome/11.0.672.2 Safari/534.20
89
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/534.27 (KHTML, like Gecko) Chrome/12.0.712.0
90
+ Safari/534.27
91
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/13.0.782.24
92
+ Safari/535.1
93
+ - Mozilla/5.0 (Windows NT 6.0) AppleWebKit/535.2 (KHTML, like Gecko) Chrome/15.0.874.120
94
+ Safari/535.2
95
+ - Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.7 (KHTML, like Gecko) Chrome/16.0.912.36
96
+ Safari/535.7
97
+ - Mozilla/5.0 (Windows; U; Windows NT 6.0 x64; en-US; rv:1.9pre) Gecko/2008072421
98
+ Minefield/3.0.2pre
99
+ - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.9.0.10) Gecko/2009042316 Firefox/3.0.10
100
+ - Mozilla/5.0 (Windows; U; Windows NT 6.0; en-GB; rv:1.9.0.11) Gecko/2009060215 Firefox/3.0.11
101
+ (.NET CLR 3.5.30729)
102
+ - Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6
103
+ GTB5
104
+ - Mozilla/5.0 (Windows; U; Windows NT 5.1; tr; rv:1.9.2.8) Gecko/20100722 Firefox/3.6.8
105
+ ( .NET CLR 3.5.30729; .NET4.0E)
106
+ - Mozilla/5.0 (Windows NT 6.1; rv:2.0.1) Gecko/20100101 Firefox/4.0.1
107
+ - Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:2.0.1) Gecko/20100101 Firefox/4.0.1
108
+ - Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/5.0
109
+ - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0a2) Gecko/20110622 Firefox/6.0a2
110
+ - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:7.0.1) Gecko/20100101 Firefox/7.0.1
111
+ - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
112
+ - Mozilla/5.0 (Windows NT 6.1; WOW64; rv:2.0b4pre) Gecko/20100815 Minefield/4.0b4pre
113
+ - Mozilla/4.0 (compatible; MSIE 5.5; Windows NT 5.0 )
114
+ - Mozilla/4.0 (compatible; MSIE 5.5; Windows 98; Win 9x 4.90)
115
+ - Mozilla/5.0 (Windows; U; Windows XP) Gecko MultiZilla/1.6.1.0a
116
+ - Mozilla/2.02E (Win95; U)
117
+ - Mozilla/3.01Gold (Win95; I)
118
+ - Mozilla/4.8 [en] (Windows NT 5.1; U)
119
+ - Mozilla/5.0 (Windows; U; Win98; en-US; rv:1.4) Gecko Netscape/7.1 (ax)
120
+ - Opera/7.50 (Windows XP; U)
121
+ - Opera/7.50 (Windows ME; U) [en]
122
+ - Opera/7.51 (Windows NT 5.1; U) [en]
123
+ - Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0; en) Opera 8.0
124
+ - Mozilla/5.0 (Windows; U; WinNT4.0; en-US; rv:1.2b) Gecko/20021001 Phoenix/0.2
125
+ - Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.23) Gecko/20090825 SeaMonkey/1.1.18
126
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1
127
+ Camino/2.2.1
128
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0b6pre) Gecko/20100907 Firefox/4.0b6pre
129
+ Camino/2.2a1pre
130
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_0) AppleWebKit/536.3 (KHTML, like Gecko)
131
+ Chrome/19.0.1063.0 Safari/536.3
132
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_2) AppleWebKit/537.4 (KHTML like Gecko)
133
+ Chrome/22.0.1229.79 Safari/537.4
134
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.31 (KHTML like Gecko)
135
+ Chrome/26.0.1410.63 Safari/537.31
136
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 1083) AppleWebKit/537.36 (KHTML like Gecko)
137
+ Chrome/28.0.1469.0 Safari/537.36
138
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
139
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:16.0) Gecko/20120813 Firefox/16.0
140
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:20.0) Gecko/20100101 Firefox/20.0
141
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:21.0) Gecko/20100101 Firefox/21.0
142
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US) AppleWebKit/528.16 (KHTML, like
143
+ Gecko, Safari/528.16) OmniWeb/v622.8.0.112941
144
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_6; en-US) AppleWebKit/528.16 (KHTML,
145
+ like Gecko, Safari/528.16) OmniWeb/v622.8.0
146
+ - Opera/9.20 (Macintosh; Intel Mac OS X; U; en)
147
+ - Opera/9.80 (Macintosh; Intel Mac OS X; U; en) Presto/2.6.30 Version/10.61
148
+ - Opera/9.80 (Macintosh; Intel Mac OS X 10.4.11; U; en) Presto/2.7.62 Version/11.00
149
+ - Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52
150
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_2; en-us) AppleWebKit/531.21.8 (KHTML,
151
+ like Gecko) Version/4.0.4 Safari/531.21.10
152
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; de-de) AppleWebKit/534.15 (KHTML,
153
+ like Gecko) Version/5.0.3 Safari/533.19.4
154
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_6; en-us) AppleWebKit/533.20.25 (KHTML,
155
+ like Gecko) Version/5.0.4 Safari/533.20.27
156
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_7; en-us) AppleWebKit/534.20.8 (KHTML,
157
+ like Gecko) Version/5.1 Safari/534.20.8
158
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_3) AppleWebKit/534.55.3 (KHTML, like
159
+ Gecko) Version/5.1.3 Safari/534.53.10
160
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/536.26.17 (KHTML like
161
+ Gecko) Version/6.0.2 Safari/536.26.17
162
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.5; rv:10.0.1) Gecko/20100101 Firefox/10.0.1
163
+ SeaMonkey/2.7.1
164
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_5_8; en-US) AppleWebKit/532.8 (KHTML,
165
+ like Gecko) Chrome/4.0.302.2 Safari/532.8
166
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_4; en-US) AppleWebKit/534.3 (KHTML,
167
+ like Gecko) Chrome/6.0.464.0 Safari/534.3
168
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_5; en-US) AppleWebKit/534.13 (KHTML,
169
+ like Gecko) Chrome/9.0.597.15 Safari/534.13
170
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_2) AppleWebKit/535.1 (KHTML, like Gecko)
171
+ Chrome/14.0.835.186 Safari/535.1
172
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.2 (KHTML, like Gecko)
173
+ Chrome/15.0.874.54 Safari/535.2
174
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_8) AppleWebKit/535.7 (KHTML, like Gecko)
175
+ Chrome/16.0.912.36 Safari/535.7
176
+ - 'Mozilla/5.0 (Macintosh; U; Mac OS X Mach-O; en-US; rv:2.0a) Gecko/20040614 Firefox/3.0.0 '
177
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.1) Gecko/20090624
178
+ Firefox/3.5
179
+ - Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.6; en-US; rv:1.9.2.14) Gecko/20110218
180
+ AlexaToolbar/alxf-2.0 Firefox/3.6.14
181
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1
182
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:5.0) Gecko/20100101 Firefox/5.0
183
+ - Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:9.0) Gecko/20100101 Firefox/9.0
@@ -0,0 +1,3 @@
1
+ module Serper
2
+ VERSION = "0.1.0"
3
+ end
metadata ADDED
@@ -0,0 +1,141 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: serper
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - MingQian Zhang
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-07-29 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: httparty
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: domainatrix
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: activerecord
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '>='
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: docopt
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - '>='
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - '>='
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: ruby-progressbar
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ description: Parse SERP result page.
98
+ email:
99
+ - zmingqian@qq.com
100
+ executables:
101
+ - serper
102
+ extensions: []
103
+ extra_rdoc_files: []
104
+ files:
105
+ - lib/serper/analyser.rb
106
+ - lib/serper/baidu/crawler.rb
107
+ - lib/serper/baidu/parser.rb
108
+ - lib/serper/baidu/weight.rb
109
+ - lib/serper/crawler.rb
110
+ - lib/serper/helper.rb
111
+ - lib/serper/parser.rb
112
+ - lib/serper/version.rb
113
+ - lib/serper.rb
114
+ - bin/serper
115
+ - README.md
116
+ - lib/serper/user_agents.yml
117
+ homepage: https://github.com/semseo/serper
118
+ licenses:
119
+ - MIT
120
+ metadata: {}
121
+ post_install_message:
122
+ rdoc_options: []
123
+ require_paths:
124
+ - lib
125
+ required_ruby_version: !ruby/object:Gem::Requirement
126
+ requirements:
127
+ - - '>='
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ required_rubygems_version: !ruby/object:Gem::Requirement
131
+ requirements:
132
+ - - '>='
133
+ - !ruby/object:Gem::Version
134
+ version: '0'
135
+ requirements: []
136
+ rubyforge_project:
137
+ rubygems_version: 2.0.0
138
+ signing_key:
139
+ specification_version: 4
140
+ summary: SERP
141
+ test_files: []