serp_scraper 0.0.0 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1f8b2245714d035db0eeb3853404259057c653cd
4
- data.tar.gz: c3ed1301196c9989edda91bfcdb51b7c6304726c
3
+ metadata.gz: 50907d75fe90b6a6eba27dcccf7da802e3d2b999
4
+ data.tar.gz: 69210b67fa80e1df6774600ffc798e93306a3a70
5
5
  SHA512:
6
- metadata.gz: 7448374ba90679e644e64f1eaec165a2eb5ee0e95f31bfe5654f58bb8218b065735d442a41bce6907340aeead2133e8ab6c0e4d36c9bb918f44d768f86919074
7
- data.tar.gz: 0a92203202b7cc0ef273973f58147bc57a21a84bf600a28f7f18526d7be3c2e24d143ef62d1f8012921aaa6a667079d71c80834305e4828742ee803183d8adde
6
+ metadata.gz: 030bba279587f40d63c258824c32ef6c0bed017e07a9330a008e6df292c556500b293bc01549a181aa4dd22e8a0ab38c4ec258030464848de117208735a788d9
7
+ data.tar.gz: cdffcc183ba38b153fce55257a508500dc0378582dc9547dd1c88e80f8dfed1a0a741712ad3e1a4b70271392e4c8cdabb5d0fe5f3f4984400db6c994b0eb49ac
@@ -0,0 +1 @@
1
+ *.gem
@@ -0,0 +1,25 @@
1
+ # SERP Scraper examples
2
+
3
+ ## Basic search
4
+ ```ruby
5
+ require 'serp_scraper'
6
+
7
+ s = SerpScraper.new(engine: 'google')
8
+ response = s.search('buy cars onlines')
9
+
10
+ response.results.each do |result|
11
+ puts result
12
+ # => {:position=>1, :title=>"Buying From CarMax", :scheme=>"https", :domain=>"www.carmax.com", :url=>"/car-buying-process", :full_url=>"https://www.carmax.com/car-buying-process"}
13
+ end
14
+ ```
15
+
16
+ ## Country/TLD specific search
17
+ ```ruby
18
+ # Usees google.se for swedish results
19
+ s = SerpScraper.new(engine: 'google', tld: 'se')
20
+
21
+ # Set language to Swedish
22
+ s.engine.parameter('hl', 'sv')
23
+
24
+ response = s.search('köp bilar online')
25
+ ```
@@ -0,0 +1,48 @@
1
+ # SERP Scraper
2
+ SERP Scraper is a ruby library that extracts keyword rankings from Google.
3
+
4
+ ##### Supported search engines
5
+ * Google
6
+
7
+ ## Installation
8
+ Install 'SERP Scraper' from RubyGems:
9
+ ```sh
10
+ $ gem install serp_scraper
11
+ ```
12
+ Or include it in your project's Gemfile with Bundler:
13
+ ```ruby
14
+ gem 'serp_scraper'
15
+ ```
16
+
17
+ ## Examples
18
+ ```ruby
19
+ s = SerpScraper.new(engine: 'google')
20
+ res = s.search('buy cars onlines')
21
+ puts res.results[0]
22
+ # => {:position=>1, :title=>"Buying From CarMax", :scheme=>"https", :domain=>"www.carmax.com", :url=>"/car-buying-process", :full_url=>"https://www.carmax.com/car-buying-process"}
23
+ ```
24
+
25
+ If you are just starting, check out the [EXAMPLES](https://github.com/kjellberg/serp_scraper/blob/master/EXAMPLES.md) file for more examples.
26
+
27
+ ## Support
28
+ - [github.com/kjellberg/serp_scraper/issues](https://github.com/kjellberg/serp_scraper/issues)
29
+
30
+ ## Contribute
31
+ - [github.com/kjellberg/serp_scraper/issues](https://github.com/kjellberg/serp_scraper/issues)
32
+
33
+ ### Goals
34
+ - Add more search engines like Bing & Yahoo
35
+ - Add DeathByCaptcha support for captcha solving
36
+
37
+ ## Dependencies
38
+ - [mechanize](https://github.com/sparklemotion/mechanize)
39
+ - [nokogiri](https://github.com/sparklemotion/nokogiri)
40
+ - [addressable/uri](https://github.com/sporkmonger/addressable)
41
+
42
+ ## Credits
43
+ - [github.com/kjellberg](https://github.com/kjellberg)
44
+
45
+ *Make a [pull request](https://github.com/kjellberg/serp_scraper/#contribute) and add your name here :)*
46
+
47
+ ## License
48
+ This library is distributed under the MIT license.
@@ -0,0 +1,89 @@
1
+ class SerpScraper::Google
2
+ attr_accessor :tld
3
+ attr_accessor :user_agent
4
+
5
+ def initialize(tld)
6
+ self.tld = tld
7
+
8
+ @browser = Mechanize.new { |agent|
9
+ agent.user_agent_alias = 'Mac Safari'
10
+ }
11
+
12
+ @parameters = Hash.new
13
+ @parameters['gbv'] = 1
14
+ @parameters['complete'] = 0
15
+ @parameters['num'] = 100
16
+ @parameters['pws'] = 0
17
+ @parameters['nfrpr'] = 1
18
+ @parameters['ie'] = 'utf-8'
19
+ @parameters['oe'] = 'utf-8'
20
+ @parameters['site'] = 'webhp'
21
+ @parameters['source'] = 'hp'
22
+ end
23
+
24
+ def search(keyword)
25
+ # Do the Googleing
26
+ http_response = @browser.get(build_query_url_from_keyword(keyword))
27
+
28
+ return build_serp_response(http_response) if http_response.code == "200"
29
+
30
+ # @todo: Look for and solve captchas.
31
+ puts "Did not get a 200 response. Maybe a captcha error?"
32
+ end
33
+
34
+ def build_serp_response(http_response)
35
+ sr = SerpScraper::SerpResponse.new
36
+ sr.keyword = @parameters['q']
37
+ sr.user_agent = @browser.user_agent
38
+ sr.url = http_response.uri.to_s
39
+ sr.html = http_response.content
40
+ sr.results = extract_results(sr.html)
41
+
42
+ sr # Return sr
43
+ end
44
+
45
+ def extract_results(html)
46
+ doc = Nokogiri::HTML(html)
47
+ results = Array.new
48
+
49
+ rows = doc.css('h3.r > a')
50
+ rows.each_with_index do |row, i|
51
+ begin
52
+ href = Addressable::URI.parse(row["href"])
53
+
54
+ external_url = href.query_values['q'] unless href.query_values['q'] == nil
55
+ external_url = href.query_values['url'] unless href.query_values['url'] == nil
56
+
57
+ url = Addressable::URI.parse(external_url)
58
+
59
+ results.push({
60
+ position: i + 1,
61
+ title: row.content,
62
+ scheme: url.scheme,
63
+ domain: url.host,
64
+ url: url.request_uri,
65
+ full_url: url.to_s
66
+ })
67
+ rescue
68
+ next
69
+ end
70
+ end
71
+
72
+ results
73
+ end
74
+
75
+ def parameter(key, value)
76
+ @parameters[key] = value
77
+ end
78
+
79
+ def build_query_url_from_keyword(keyword)
80
+ @parameters['q'] = keyword
81
+
82
+ uri = Addressable::URI.new
83
+ uri.host = "www.google.#{tld}"
84
+ uri.scheme = "https"
85
+ uri.path = "/search"
86
+ uri.query_values = @parameters
87
+ uri.to_s
88
+ end
89
+ end
@@ -0,0 +1,8 @@
1
+ class SerpScraper::SerpResponse
2
+ attr_accessor :keyword
3
+ attr_accessor :url
4
+ attr_accessor :user_agent
5
+ attr_accessor :proxy
6
+ attr_accessor :results
7
+ attr_accessor :html
8
+ end
@@ -1,5 +1,37 @@
1
1
  class SerpScraper
2
- def self.hi
3
- puts "It Works!"
2
+ attr_accessor :engine
3
+
4
+ def initialize(params)
5
+ engine = params[:engine] || 'google'
6
+ tld = params[:tld] || 'com'
7
+
8
+ case engine
9
+ when "google"
10
+ @engine = Google.new(tld)
11
+ end
4
12
  end
5
- end
13
+
14
+ def search(keyword)
15
+ @engine.search(keyword)
16
+ end
17
+ end
18
+
19
+ def test
20
+ google = SerpScraper.new(engine: 'google', tld: 'se')
21
+
22
+ # Set language to Swedish
23
+ google.engine.parameter('hl', 'sv')
24
+
25
+ # GO, FETCH!
26
+ response = google.search("casino faktura")
27
+
28
+ # Return search results
29
+ response.results
30
+ end
31
+
32
+ require 'uri'
33
+ require 'mechanize'
34
+ require 'addressable/uri'
35
+ require 'nokogiri'
36
+ require 'engines/google'
37
+ require 'serp_response'
@@ -0,0 +1,21 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = 'serp_scraper'
3
+ s.version = '0.0.2'
4
+ s.date = '2017-05-26'
5
+
6
+ s.homepage = 'https://github.com/kjellberg'
7
+ s.summary = %q{Get rankings from Search Engines}
8
+ s.description = "SERP Scraper is a ruby library that extracts keyword rankings from Google."
9
+
10
+
11
+ s.authors = ["Rasmus Kjellberg"]
12
+ s.email = 'rk@youngmedia.se'
13
+ s.license = 'MIT'
14
+
15
+ s.require_paths = ["lib"]
16
+ s.files = `git ls-files`.split($/)
17
+
18
+ s.add_runtime_dependency 'mechanize', '~> 2.7', '>= 2.7.5'
19
+ s.add_runtime_dependency 'addressable', '~> 2.5'
20
+ s.add_runtime_dependency 'nokogiri', '~> 2.9', '>= 2.9.4'
21
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: serp_scraper
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Rasmus Kjellberg
@@ -9,14 +9,74 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
  date: 2017-05-26 00:00:00.000000000 Z
12
- dependencies: []
13
- description: Scrape search engine keyword positions.
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mechanize
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '2.7'
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: 2.7.5
23
+ type: :runtime
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - "~>"
28
+ - !ruby/object:Gem::Version
29
+ version: '2.7'
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 2.7.5
33
+ - !ruby/object:Gem::Dependency
34
+ name: addressable
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '2.5'
40
+ type: :runtime
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '2.5'
47
+ - !ruby/object:Gem::Dependency
48
+ name: nokogiri
49
+ requirement: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '2.9'
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: 2.9.4
57
+ type: :runtime
58
+ prerelease: false
59
+ version_requirements: !ruby/object:Gem::Requirement
60
+ requirements:
61
+ - - "~>"
62
+ - !ruby/object:Gem::Version
63
+ version: '2.9'
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: 2.9.4
67
+ description: SERP Scraper is a ruby library that extracts keyword rankings from Google.
14
68
  email: rk@youngmedia.se
15
69
  executables: []
16
70
  extensions: []
17
71
  extra_rdoc_files: []
18
72
  files:
73
+ - ".gitignore"
74
+ - EXAMPLES.md
75
+ - README.md
76
+ - lib/engines/google.rb
77
+ - lib/serp_response.rb
19
78
  - lib/serp_scraper.rb
79
+ - serp_scraper.gemspec
20
80
  homepage: https://github.com/kjellberg
21
81
  licenses:
22
82
  - MIT
@@ -40,5 +100,5 @@ rubyforge_project:
40
100
  rubygems_version: 2.5.1
41
101
  signing_key:
42
102
  specification_version: 4
43
- summary: Scrape search engine keyword positions.
103
+ summary: Get rankings from Search Engines
44
104
  test_files: []