position_inspector 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ .DS_Store
2
+ *.gem
3
+ *.rbc
4
+ .bundle
5
+ .config
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,6 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in position_inspector.gemspec
4
+ gemspec
5
+ gem 'nokogiri'
6
+
data/LICENSE ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 Samuel Sanchez
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,36 @@
1
+ # PositionInspector
2
+
3
+ Inspect positions of domains in google SERP
4
+
5
+ ## Installation
6
+
7
+ Install it yourself as:
8
+
9
+ $ gem install position_inspector
10
+
11
+ ## Usage
12
+
13
+ Usage:
14
+
15
+ $ position_inspector -d yourdomain.tld -q "your keywords" -e google-tld
16
+
17
+ Examples:
18
+
19
+ $ position_inspector -d pagedegeek.com -q "page geek" -e fr
20
+
21
+ Advanced examples:
22
+
23
+ $ position_inspector -d pagedegeek.com -q "\"page de geek\"" -e fr -p 3 -s 0
24
+
25
+ ## Help
26
+
27
+ $ position_inspector -h
28
+
29
+ ## Contributing
30
+
31
+ 1. Fork it
32
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
33
+ 3. Commit your changes (`git commit -am 'Added some feature'`)
34
+ 4. Push to the branch (`git push origin my-new-feature`)
35
+ 5. Create new Pull Request
36
+
data/Rakefile ADDED
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env rake
2
+ require "bundler/gem_tasks"
@@ -0,0 +1,90 @@
1
+ #!/usr/bin/env ruby
2
+ #encoding: utf-8
3
+
4
+ require 'logger'
5
+ require 'optparse'
6
+
7
+ $:.unshift(File.dirname(__FILE__) + "/../lib")
8
+ require 'position_inspector'
9
+
10
+ # default values
11
+ options = {}
12
+ options[:inspector] = 'google'
13
+ options[:extension] = 'com'
14
+ options[:sleep_time] = nil
15
+ options[:pages] = 5
16
+
17
+ OptionParser.new do |opts|
18
+ opts.banner = "Usage: #{$0} [options]"
19
+
20
+ opts.on("--version", "Version of Position Inspector") do |version|
21
+ options[:print_version] = true
22
+ end
23
+
24
+ opts.on("-d", "--domain DOMAIN", String, "Inspect domain") do |domain|
25
+ options[:domain] = domain
26
+ end
27
+
28
+ opts.on("-q", "--query KEYWORDS", String, "Keywords") do |keywords|
29
+ options[:keywords] = keywords
30
+ end
31
+
32
+ opts.on("-e", "--extention EXTENSION", String, "Extension (com, fr, es, etc)") do |ext|
33
+ options[:extension] = ext
34
+ end
35
+
36
+ opts.on("-i", "--inspector INSPECTOR", String, "Inspector (google, bing, etc)") do |ext|
37
+ options[:inspector] = inspector
38
+ end
39
+
40
+ opts.on("-s", "--sleep SLEEP_TIME", Integer, "Sleep time between request") do |sleep_time|
41
+ options[:sleep_time] = sleep_time
42
+ end
43
+
44
+ opts.on("-p", "--pages PAGES_NUMBER", Integer, "Number of pages") do |pages|
45
+ options[:pages] = pages
46
+ end
47
+
48
+ opts.on("-v", "--[no-]verbose", "Run verbosely") do |v|
49
+ options[:verbose] = v
50
+ end
51
+ end.parse!
52
+
53
+ include PositionInspector
54
+
55
+ Cli.print_version if options[:print_version]
56
+
57
+
58
+ begin
59
+ logger = Logger.new(STDOUT)
60
+ logger.level = Logger::ERROR
61
+ logger.level = Logger::INFO if options[:verbose]
62
+ logger.info 'Logger opened'
63
+
64
+ cli = Cli.new(logger)
65
+
66
+ if options[:inspector].downcase == 'google'
67
+ inspector = Inspectors::GoogleInspector.new(options[:domain],
68
+ options[:keywords], options[:extension], logger,
69
+ options[:pages], options[:sleep_time])
70
+ end
71
+
72
+ cli.positions(inspector).each do |r|
73
+ s = ' '
74
+ s = '*' if r[:is_domain]
75
+ puts "#{s} [#{r[:position].to_s.rjust(3)}] #{r[:url][0..80]}"
76
+ end
77
+
78
+ rescue Exception => e
79
+ logger.fatal "Message: #{e.message}"
80
+ logger.error "Inspect: #{e.inspect}"
81
+ e.backtrace.each do |line|
82
+ logger.error "Backtrace: #{line}"
83
+ end
84
+ ensure
85
+ unless logger.nil?
86
+ logger.info "Close logger"
87
+ logger.close
88
+ end
89
+ end
90
+
@@ -0,0 +1,29 @@
1
+
2
+ module PositionInspector
3
+ class Cli
4
+
5
+ def self.print_version
6
+ puts "version #{PositionInspector::VERSION}"
7
+ end
8
+
9
+ def initialize(logger)
10
+ @logger = logger
11
+ end
12
+
13
+ def positions(inspector)
14
+ @logger.info 'Start inspector'
15
+ begin
16
+ r = inspector.results
17
+ rescue SignalException => e
18
+ @logger.info 'Inspector stopped by signal'
19
+ raise e
20
+ rescue Exception => e
21
+ @logger.error "Inspector error: #{e.message}"
22
+ raise e
23
+ end
24
+ @logger.info 'Stop inspector'
25
+ return r
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,110 @@
1
+ require 'cgi'
2
+ require 'uri'
3
+ require 'net/http'
4
+ require 'nokogiri'
5
+
6
+ module PositionInspector
7
+ module Inspectors
8
+ MAX_PAGES=5
9
+ SLEEP_TIME = 2
10
+
11
+ class Inspector
12
+ def results
13
+ raise NotImplementedError
14
+ end
15
+
16
+ def escape(keywords)
17
+ CGI.escape(keywords)
18
+ end
19
+
20
+ def is_equal_host?(host, url)
21
+ URI.parse(url).host.downcase.include? host.downcase
22
+ end
23
+
24
+ end
25
+
26
+ class GoogleInspector < Inspector
27
+ def initialize(domain, keywords, extension, logger, max_pages=MAX_PAGES, sleep_time=SLEEP_TIME)
28
+ @logger = logger
29
+ @domain = domain
30
+ @keywords = keywords
31
+ @extension = extension
32
+ @sleep_time = sleep_time
33
+ @max_pages = max_pages
34
+ @logger.info "Google inspector set domain: #{@domain}"
35
+ @logger.info "Google inspector set keywords: #{@keywords}"
36
+ @logger.info "Google inspector set extension: #{@extension}"
37
+
38
+ raise ArgumentError, "domain invalid (#{@domain})" if @domain.nil? || @domain.length <= 0
39
+ raise ArgumentError, "keywords invalid (#{@keywords})" if @keywords.nil? || @keywords.length <= 0
40
+ raise ArgumentError, "extension invalid (#{@extension})" if @extension.nil? || @extension.length <= 0
41
+ end
42
+
43
+ def results
44
+ positions(@keywords, @extension, @max_pages, @sleep_time)
45
+ end
46
+
47
+ private
48
+ def positions(keywords, extension, max_pages=MAX_PAGES, sleep_time=SLEEP_TIME)
49
+ google_url = "http://www.google.#{extension}"
50
+ @logger.info "Google inspector set google_url: #{google_url}"
51
+ @logger.info "Http query: #{escape(keywords)}"
52
+ search_path = "/search?q=#{escape(keywords)}"
53
+ @logger.info "Google inspector set search_page: #{search_path}"
54
+
55
+ array = []
56
+ max_pages.times do |i|
57
+ print '.'
58
+ page_idx = i
59
+ @logger.info "Google inspector start page: #{page_idx}"
60
+
61
+ sleep sleep_time
62
+
63
+ url = "#{google_url}#{search_path}&pws=0&start=#{(i*10)}"
64
+ @logger.info "Google inspector url: #{url}"
65
+ resp = Net::HTTP.get_response URI.parse(url)
66
+
67
+ doc = Nokogiri::HTML(resp.body)
68
+
69
+ pos_idx = 1
70
+ doc.css('li.g').each do |row|
71
+ @logger.info "Google inspector new entry"
72
+ a = row.search('h3 a').first
73
+
74
+ pos = page_idx * 10 + pos_idx
75
+ @logger.info "Google inspector pos: #{pos}"
76
+
77
+ title = a.text
78
+ @logger.info "Google inspector title: #{title}"
79
+
80
+ # desc = row.search('div.s').first.text
81
+ desc = ''
82
+
83
+ href = a['href']
84
+ if href[0..'/url'.length-1] == '/url'
85
+ gurl = "#{google_url}#{href}"
86
+ @logger.info "Google inspector good gurl: #{gurl}"
87
+ guri = URI.parse gurl
88
+ url = CGI.parse(guri.query)['q'][0]
89
+ @logger.info "Google inspector good url: #{url}"
90
+
91
+ array << {position: pos, url: url, title: title, description: desc, is_domain: is_equal_host?(@domain, url)}
92
+ else
93
+ url = "#{google_url}#{href}"
94
+ @logger.info "Google inspector bad url: #{url}"
95
+ array << {position: pos, url: url, title: '<google image or video>', description: '', is_domain: is_equal_host?(@domain, url)}
96
+ end
97
+ pos_idx += 1
98
+ end
99
+ end
100
+
101
+ puts
102
+ return array
103
+ rescue Exception => e
104
+ raise e
105
+ end
106
+
107
+ end
108
+
109
+ end
110
+ end
@@ -0,0 +1,3 @@
1
+ module PositionInspector
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,5 @@
1
+
2
+ require "position_inspector/version"
3
+ require "position_inspector/cli"
4
+ require "position_inspector/inspectors"
5
+
@@ -0,0 +1,17 @@
1
+ # -*- encoding: utf-8 -*-
2
+ require File.expand_path('../lib/position_inspector/version', __FILE__)
3
+
4
+ Gem::Specification.new do |gem|
5
+ gem.authors = ["Sam"]
6
+ gem.email = ["samuel@pagedegeek.com"]
7
+ gem.description = %q{Inspect domain positions in google SERP}
8
+ gem.summary = %q{Inspect positions of domain in google SERP}
9
+ gem.homepage = "http://www.github.com/pagedegeek/position_inspector"
10
+
11
+ gem.files = `git ls-files`.split($\)
12
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
13
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
14
+ gem.name = "position_inspector"
15
+ gem.require_paths = ["lib"]
16
+ gem.version = PositionInspector::VERSION
17
+ end
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: position_inspector
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Sam
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-05-31 00:00:00.000000000 Z
13
+ dependencies: []
14
+ description: Inspect domain positions in google SERP
15
+ email:
16
+ - samuel@pagedegeek.com
17
+ executables:
18
+ - position_inspector
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - .gitignore
23
+ - Gemfile
24
+ - LICENSE
25
+ - README.markdown
26
+ - Rakefile
27
+ - bin/position_inspector
28
+ - lib/position_inspector.rb
29
+ - lib/position_inspector/cli.rb
30
+ - lib/position_inspector/inspectors.rb
31
+ - lib/position_inspector/version.rb
32
+ - position_inspector.gemspec
33
+ homepage: http://www.github.com/pagedegeek/position_inspector
34
+ licenses: []
35
+ post_install_message:
36
+ rdoc_options: []
37
+ require_paths:
38
+ - lib
39
+ required_ruby_version: !ruby/object:Gem::Requirement
40
+ none: false
41
+ requirements:
42
+ - - ! '>='
43
+ - !ruby/object:Gem::Version
44
+ version: '0'
45
+ required_rubygems_version: !ruby/object:Gem::Requirement
46
+ none: false
47
+ requirements:
48
+ - - ! '>='
49
+ - !ruby/object:Gem::Version
50
+ version: '0'
51
+ requirements: []
52
+ rubyforge_project:
53
+ rubygems_version: 1.8.24
54
+ signing_key:
55
+ specification_version: 3
56
+ summary: Inspect positions of domain in google SERP
57
+ test_files: []