position_inspector 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ .DS_Store
2
+ *.gem
3
+ *.rbc
4
+ .bundle
5
+ .config
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in position_inspector.gemspec
4
+ gemspec
5
+ gem 'nokogiri'
6
+
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Samuel Sanchez
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,36 @@
1
+ # PositionInspector
2
+
3
+ Inspect positions of domains in google SERP
4
+
5
+ ## Installation
6
+
7
+ Install it yourself as:
8
+
9
+ $ gem install position_inspector
10
+
11
+ ## Usage
12
+
13
+ Usage:
14
+
15
+ $ position_inspector -d yourdomain.tld -q "your keywords" -e google-tld
16
+
17
+ Examples:
18
+
19
+ $ position_inspector -d pagedegeek.com -q "page geek" -e fr
20
+
21
+ Advanced examples:
22
+
23
+ $ position_inspector -d pagedegeek.com -q "\"page de geek\"" -e fr -p 3 -s 0
24
+
25
+ ## Help
26
+
27
+ $ position_inspector -h
28
+
29
+ ## Contributing
30
+
31
+ 1. Fork it
32
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
33
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
34
+ 4. Push to the branch (`git push origin my-new-feature`)
35
+ 5. Create new Pull Request
36
+
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env ruby
2
+ #encoding: utf-8
3
+
4
+ require 'logger'
5
+ require 'optparse'
6
+
7
+ $:.unshift(File.dirname(__FILE__) + "/../lib")
8
+ require 'position_inspector'
9
+
10
+ # default values
11
+ options = {}
12
+ options[:inspector] = 'google'
13
+ options[:extension] = 'com'
14
+ options[:sleep_time] = nil
15
+ options[:pages] = 5
16
+
17
+ OptionParser.new do |opts|
18
+ opts.banner = "Usage: #{$0} [options]"
19
+
20
+ opts.on("--version", "Version of Position Inspector") do |version|
21
+ options[:print_version] = true
22
+ end
23
+
24
+ opts.on("-d", "--domain DOMAIN", String, "Inspect domain") do |domain|
25
+ options[:domain] = domain
26
+ end
27
+
28
+ opts.on("-q", "--query KEYWORDS", String, "Keywords") do |keywords|
29
+ options[:keywords] = keywords
30
+ end
31
+
32
+ opts.on("-e", "--extention EXTENSION", String, "Extension (com, fr, es, etc)") do |ext|
33
+ options[:extension] = ext
34
+ end
35
+
36
+ opts.on("-i", "--inspector INSPECTOR", String, "Inspector (google, bing, etc)") do |ext|
37
+ options[:inspector] = inspector
38
+ end
39
+
40
+ opts.on("-s", "--sleep SLEEP_TIME", Integer, "Sleep time between request") do |sleep_time|
41
+ options[:sleep_time] = sleep_time
42
+ end
43
+
44
+ opts.on("-p", "--pages PAGES_NUMBER", Integer, "Number of pages") do |pages|
45
+ options[:pages] = pages
46
+ end
47
+
48
+ opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
49
+ options[:verbose] = v
50
+ end
51
+ end.parse!
52
+
53
+ include PositionInspector
54
+
55
+ Cli.print_version if options[:print_version]
56
+
57
+
58
+ begin
59
+ logger = Logger.new(STDOUT)
60
+ logger.level = Logger::ERROR
61
+ logger.level = Logger::INFO if options[:verbose]
62
+ logger.info 'Logger opened'
63
+
64
+ cli = Cli.new(logger)
65
+
66
+ if options[:inspector].downcase == 'google'
67
+ inspector = Inspectors::GoogleInspector.new(options[:domain],
68
+ options[:keywords], options[:extension], logger,
69
+ options[:pages], options[:sleep_time])
70
+ end
71
+
72
+ cli.positions(inspector).each do |r|
73
+ s = ' '
74
+ s = '*' if r[:is_domain]
75
+ puts "#{s} [#{r[:position].to_s.rjust(3)}] #{r[:url][0..80]}"
76
+ end
77
+
78
+ rescue Exception => e
79
+ logger.fatal "Message: #{e.message}"
80
+ logger.error "Inspect: #{e.inspect}"
81
+ e.backtrace.each do |line|
82
+ logger.error "Backtrace: #{line}"
83
+ end
84
+ ensure
85
+ unless logger.nil?
86
+ logger.info "Close logger"
87
+ logger.close
88
+ end
89
+ end
90
+
@@ -0,0 +1,29 @@
1
+
2
+ module PositionInspector
3
+ class Cli
4
+
5
+ def self.print_version
6
+ puts "version #{PositionInspector::VERSION}"
7
+ end
8
+
9
+ def initialize(logger)
10
+ @logger = logger
11
+ end
12
+
13
+ def positions(inspector)
14
+ @logger.info 'Start inspector'
15
+ begin
16
+ r = inspector.results
17
+ rescue SignalException => e
18
+ @logger.info 'Inspector stopped by signal'
19
+ raise e
20
+ rescue Exception => e
21
+ @logger.error "Inspector error: #{e.message}"
22
+ raise e
23
+ end
24
+ @logger.info 'Stop inspector'
25
+ return r
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,110 @@
1
+ require 'cgi'
2
+ require 'uri'
3
+ require 'net/http'
4
+ require 'nokogiri'
5
+
6
+ module PositionInspector
7
+ module Inspectors
8
+ MAX_PAGES=5
9
+ SLEEP_TIME = 2
10
+
11
+ class Inspector
12
+ def results
13
+ raise NotImplementedError
14
+ end
15
+
16
+ def escape(keywords)
17
+ CGI.escape(keywords)
18
+ end
19
+
20
+ def is_equal_host?(host, url)
21
+ URI.parse(url).host.downcase.include? host.downcase
22
+ end
23
+
24
+ end
25
+
26
+ class GoogleInspector < Inspector
27
+ def initialize(domain, keywords, extension, logger, max_pages=MAX_PAGES, sleep_time=SLEEP_TIME)
28
+ @logger = logger
29
+ @domain = domain
30
+ @keywords = keywords
31
+ @extension = extension
32
+ @sleep_time = sleep_time
33
+ @max_pages = max_pages
34
+ @logger.info "Google inspector set domain: #{@domain}"
35
+ @logger.info "Google inspector set keywords: #{@keywords}"
36
+ @logger.info "Google inspector set extension: #{@extension}"
37
+
38
+ raise ArgumentError, "domain invalid (#{@domain})" if @domain.nil? || @domain.length <= 0
39
+ raise ArgumentError, "keywords invalid (#{@keywords})" if @keywords.nil? || @keywords.length <= 0
40
+ raise ArgumentError, "extension invalid (#{@extension})" if @extension.nil? || @extension.length <= 0
41
+ end
42
+
43
+ def results
44
+ positions(@keywords, @extension, @max_pages, @sleep_time)
45
+ end
46
+
47
+ private
48
+ def positions(keywords, extension, max_pages=MAX_PAGES, sleep_time=SLEEP_TIME)
49
+ google_url = "http://www.google.#{extension}"
50
+ @logger.info "Google inspector set google_url: #{google_url}"
51
+ @logger.info "Http query: #{escape(keywords)}"
52
+ search_path = "/search?q=#{escape(keywords)}"
53
+ @logger.info "Google inspector set search_page: #{search_path}"
54
+
55
+ array = []
56
+ max_pages.times do |i|
57
+ print '.'
58
+ page_idx = i
59
+ @logger.info "Google inspector start page: #{page_idx}"
60
+
61
+ sleep sleep_time
62
+
63
+ url = "#{google_url}#{search_path}&pws=0&start=#{(i*10)}"
64
+ @logger.info "Google inspector url: #{url}"
65
+ resp = Net::HTTP.get_response URI.parse(url)
66
+
67
+ doc = Nokogiri::HTML(resp.body)
68
+
69
+ pos_idx = 1
70
+ doc.css('li.g').each do |row|
71
+ @logger.info "Google inspector new entry"
72
+ a = row.search('h3 a').first
73
+
74
+ pos = page_idx * 10 + pos_idx
75
+ @logger.info "Google inspector pos: #{pos}"
76
+
77
+ title = a.text
78
+ @logger.info "Google inspector title: #{title}"
79
+
80
+ # desc = row.search('div.s').first.text
81
+ desc = ''
82
+
83
+ href = a['href']
84
+ if href[0..'/url'.length-1] == '/url'
85
+ gurl = "#{google_url}#{href}"
86
+ @logger.info "Google inspector good gurl: #{gurl}"
87
+ guri = URI.parse gurl
88
+ url = CGI.parse(guri.query)['q'][0]
89
+ @logger.info "Google inspector good url: #{url}"
90
+
91
+ array << {position: pos, url: url, title: title, description: desc, is_domain: is_equal_host?(@domain, url)}
92
+ else
93
+ url = "#{google_url}#{href}"
94
+ @logger.info "Google inspector bad url: #{url}"
95
+ array << {position: pos, url: url, title: '<google image or video>', description: '', is_domain: is_equal_host?(@domain, url)}
96
+ end
97
+ pos_idx += 1
98
+ end
99
+ end
100
+
101
+ puts
102
+ return array
103
+ rescue Exception => e
104
+ raise e
105
+ end
106
+
107
+ end
108
+
109
+ end
110
+ end
@@ -0,0 +1,3 @@
1
+ module PositionInspector
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,5 @@
1
+
2
+ require "position_inspector/version"
3
+ require "position_inspector/cli"
4
+ require "position_inspector/inspectors"
5
+
@@ -0,0 +1,17 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/position_inspector/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Sam"]
6
+ gem.email = ["samuel@pagedegeek.com"]
7
+ gem.description = %q{Inspect domain positions in google SERP}
8
+ gem.summary = %q{Inspect positions of domain in google SERP}
9
+ gem.homepage = "http://www.github.com/pagedegeek/position_inspector"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "position_inspector"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = PositionInspector::VERSION
17
+ end
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: position_inspector
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Sam
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-31 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Inspect domain positions in google SERP
15
+ email:
16
+ - samuel@pagedegeek.com
17
+ executables:
18
+ - position_inspector
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - .gitignore
23
+ - Gemfile
24
+ - LICENSE
25
+ - README.markdown
26
+ - Rakefile
27
+ - bin/position_inspector
28
+ - lib/position_inspector.rb
29
+ - lib/position_inspector/cli.rb
30
+ - lib/position_inspector/inspectors.rb
31
+ - lib/position_inspector/version.rb
32
+ - position_inspector.gemspec
33
+ homepage: http://www.github.com/pagedegeek/position_inspector
34
+ licenses: []
35
+ post_install_message:
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ requirements: []
52
+ rubyforge_project:
53
+ rubygems_version: 1.8.24
54
+ signing_key:
55
+ specification_version: 3
56
+ summary: Inspect positions of domain in google SERP
57
+ test_files: []