PageRankr 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,24 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
22
+ .idea
23
+ doc
24
+ .yardoc
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 blatyo
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,54 @@
1
+ # PageRankr
2
+
3
+ Provides an easy way to retrieve Google Page Rank, Alexa Rank, and backlink counts.
4
+
5
+ ## Exampes
6
+
7
+ ### Backlinks
8
+ Backlinks are the result of doing a search with a query like "link:www.google.com". The number of returned results indicates how many sites point to that url.
9
+
10
+ `PageRankr.backlinks('www.google.com', :google, :bing) #=> {:google=>161000, :bing=>208000000}`
11
+
12
+ `PageRankr.backlinks('www.google.com', :yahoo) #=> {:yahoo=>256300062}`
13
+
14
+ Valid search engines are: `:google, :bing, :yahoo, :altavista, :alltheweb, :alexa`.
15
+
16
+ ### Ranks
17
+ `PageRankr.ranks('www.google.com', :alexa, :google) #=> {:alexa=>1, :google=>10}`
18
+
19
+ There are two valid rank trackers supported: `:alexa, :google`.
20
+ Alexa ranks are descending where 1 is the most popular. If a site has an alexa rank of 0 then the site is unranked.
21
+ Google page ranks are in the range 0-10 where 10 is the most popular. If a site is unindexed then the rank will be -1.
22
+
23
+
24
+ ## Note on Patches/Pull Requests
25
+
26
+ * Fork the project.
27
+ * Make your feature addition or bug fix.
28
+ * Add tests for it. This is important so I don't break it in a
29
+ future version unintentionally.
30
+ * Commit, do not mess with rakefile, version, or history.
31
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
32
+ * Send me a pull request. Bonus points for topic branches.
33
+
34
+ ## TODO
35
+ * <del>Get backlink counts for:</del>
36
+ * <del>Google</del>
37
+ * <del>Bing</del>
38
+ * <del>Yahoo!</del>
39
+ * <del>AltaVista</del>
40
+ * <del>AllTheWeb</del>
41
+ * <del>Alexa</del>
42
+ * <del>Get Google Page Rank</del>
43
+ * <del>Implement Hashing Algorithm</del>
44
+ * <del>Get Alexa ranking</del>
45
+
46
+ ## Shout Out
47
+ Gotta give credit where credits due!
48
+ * http://github.com/alexmipego/PageRankSharp
49
+ * http://snipplr.com/view/18329/google-page-range-lookup/
50
+ * http://www.sitetoolcenter.com/free-website-scripts/ajax-pr-checker.php
51
+
52
+ ## Copyright
53
+
54
+ Copyright (c) 2010 Allen Madsen. See LICENSE for details.
@@ -0,0 +1,46 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "PageRankr"
8
+ gem.summary = %Q{Easy way to retrieve Google Page Rank, Alexa Rank, and backlink counts}
9
+ gem.description = %Q{Easy way to retrieve Google Page Rank, Alexa Rank, and backlink counts}
10
+ gem.email = "blatyo@gmail.com"
11
+ gem.homepage = "http://github.com/blatyo/page_rankr"
12
+ gem.authors = ["Allen Madsen"]
13
+ gem.add_dependency "nokogiri", ">= 1.4.1"
14
+ gem.add_development_dependency "rspec", ">= 1.2.9"
15
+ gem.add_development_dependency "yard", ">= 0"
16
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
17
+ end
18
+ Jeweler::GemcutterTasks.new
19
+ rescue LoadError
20
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
21
+ end
22
+
23
+ require 'spec/rake/spectask'
24
+ Spec::Rake::SpecTask.new(:spec) do |spec|
25
+ spec.libs << 'lib' << 'spec'
26
+ spec.spec_files = FileList['spec/**/*_spec.rb']
27
+ end
28
+
29
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
30
+ spec.libs << 'lib' << 'spec'
31
+ spec.pattern = 'spec/**/*_spec.rb'
32
+ spec.rcov = true
33
+ end
34
+
35
+ task :spec => :check_dependencies
36
+
37
+ task :default => :spec
38
+
39
+ begin
40
+ require 'yard'
41
+ YARD::Rake::YardocTask.new
42
+ rescue LoadError
43
+ task :yardoc do
44
+ abort "YARD is not available. In order to run yardoc, you must: sudo gem install yard"
45
+ end
46
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 1.0.0
@@ -0,0 +1,25 @@
1
+ path = File.expand_path(File.dirname(__FILE__)) + '/page_rankr/'
2
+ require path + 'backlinks'
3
+ require path + 'alexa'
4
+ require path + 'google'
5
+
6
+ module PageRankr
7
+ class << self
8
+ def backlinks(site, *search_engines)
9
+ Backlinks.lookup site, *search_engines
10
+ end
11
+
12
+ def ranks(site, *rank_trackers)
13
+ ranks = {}
14
+ rank_trackers.each do |tracker|
15
+ case tracker
16
+ when :google
17
+ ranks[tracker] = Google.lookup(site)
18
+ when :alexa
19
+ ranks[tracker] = Alexa.lookup(site)
20
+ end
21
+ end
22
+ ranks
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,14 @@
1
+ require 'open-uri'
2
+ require 'cgi'
3
+ require 'nokogiri'
4
+
5
+ module PageRankr
6
+ class Alexa
7
+ class << self
8
+ def lookup(site)
9
+ url = "http://data.alexa.com/data?cli=10&dat=snbamz&url="
10
+ Nokogiri::HTML(open(url + CGI.escape(site))).search("//reach/@rank").to_s.to_i
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,37 @@
1
+ require 'open-uri'
2
+ require 'cgi'
3
+ require 'nokogiri'
4
+
5
+ module PageRankr
6
+ class Backlinks
7
+ SEARCH_ENGINE_URLS = {
8
+ :google => "http://www.google.com/search?q=link%3A",
9
+ :bing => "http://www.bing.com/search?q=link%3A",
10
+ :yahoo => "http://siteexplorer.search.yahoo.com/search?p=",
11
+ :altavista => "http://www.altavista.com/web/results?q=link%3A",
12
+ :alltheweb => "http://www.alltheweb.com/search?q=link%3A",
13
+ :alexa => "http://data.alexa.com/data?cli=10&dat=snbamz&url="
14
+ }
15
+
16
+ SEARCH_EGNINE_PATHS = {
17
+ :google => "//p[@id='resultStats']/b[3]/text()",
18
+ :bing => "//span[@class='sb_count']/text()",
19
+ :yahoo => "//ol[@id='results-tab']/li[2]/a/text()",
20
+ :altavista => "//a[@class='lbl']/text()",
21
+ :alltheweb => "//span[@class='ofSoMany']/text()",
22
+ :alexa => "//linksin/@num"
23
+ }
24
+
25
+ def self.lookup(site, *search_engines)
26
+ backlinks = {}
27
+ search_engines.each do |engine|
28
+ next unless SEARCH_ENGINE_URLS[engine]
29
+ doc = Nokogiri::HTML(open(SEARCH_ENGINE_URLS[engine] + CGI.escape(site)))
30
+ count = doc.at(SEARCH_EGNINE_PATHS[engine]).to_s
31
+ count = count.gsub('1-10', '').gsub(/[a-zA-Z,\s\(\)]/, '')
32
+ backlinks[engine] = count.to_i
33
+ end
34
+ backlinks
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,24 @@
1
+ path = File.expand_path(File.dirname(__FILE__)) + '/google/'
2
+ require "open-uri"
3
+ require path + 'checksum'
4
+
5
+ module PageRankr
6
+ module Google
7
+ class << self
8
+ def lookup(site)
9
+ checksum = Checksum.generate(site)
10
+ begin
11
+ open(url(site, checksum)) {|io| io.read.scan(/Rank_\d+:\d+:(\d+)/)[0][0].to_i}
12
+ rescue
13
+ -1
14
+ end
15
+ end
16
+
17
+ private
18
+
19
+ def url(site, checksum)
20
+ "http://toolbarqueries.google.com/search?client=navclient-auto&ch=#{checksum}&features=Rank&q=info:#{site}"
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,74 @@
1
+ module PageRankr
2
+ module Google
3
+ class Checksum
4
+ class << self
5
+ def generate(site)
6
+ bytes = byte_array('info:' + site)
7
+ length = bytes.length
8
+ a = b = 0x9E3779B9
9
+ c = 0xE6359A60
10
+ k, len = 0, length
11
+ while(len >= 12)
12
+ a = m(a + bytes[k + 0] + (bytes[k + 1] << 8) + (bytes[k + 2] << 16) + (bytes[k + 3] << 24))
13
+ b = m(b + bytes[k + 4] + (bytes[k + 5] << 8) + (bytes[k + 6] << 16) + (bytes[k + 7] << 24))
14
+ c = m(c + bytes[k + 8] + (bytes[k + 9] << 8) + (bytes[k + 10] << 16) + (bytes[k + 11] << 24))
15
+
16
+ a, b, c = mix(a, b, c)
17
+ k += 12
18
+ len -= 12
19
+ end
20
+
21
+ c = c + length
22
+
23
+ c = mix(*toss(a, b, c, bytes, len, k))[2]
24
+ "6" + c.to_s
25
+ end
26
+
27
+ private
28
+
29
+ def byte_array(site)
30
+ bytes = []
31
+ site.each_byte {|b| bytes << b}
32
+ bytes
33
+ end
34
+
35
+ # Need to keep numbers in the unsigned int 32 range
36
+ def m(v)
37
+ v % 0x100000000
38
+ end
39
+
40
+ def mix(a, b, c)
41
+ a, b, c = m(a), m(b), m(c)
42
+
43
+ a = m(a-b-c) ^ m(c >> 13)
44
+ b = m(b-c-a) ^ m(a << 8)
45
+ c = m(c-a-b) ^ m(b >> 13)
46
+
47
+ a = m(a-b-c) ^ m(c >> 12)
48
+ b = m(b-c-a) ^ m(a << 16)
49
+ c = m(c-a-b) ^ m(b >> 5)
50
+
51
+ a = m(a-b-c) ^ m(c >> 3)
52
+ b = m(b-c-a) ^ m(a << 10)
53
+ c = m(c-a-b) ^ m(b >> 15)
54
+
55
+ [a, b, c]
56
+ end
57
+
58
+ def toss(a, b, c, bytes, len, k)
59
+ case len
60
+ when 9..11
61
+ c = c + (bytes[k+len-1] << ((len % 8) * 8))
62
+ when 5..8
63
+ b = b + (bytes[k+len-1] << ((len % 5) * 8))
64
+ when 1..4
65
+ a = a + (bytes[k+len-1] << ((len - 1) * 8))
66
+ else
67
+ return [a, b, c]
68
+ end
69
+ toss(a, b, c, bytes, len-1, k)
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,7 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "PageRankr" do
4
+ it "fails" do
5
+ fail "hey buddy, you should probably rename this file and start specing for real"
6
+ end
7
+ end
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'page_rankr'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,116 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: PageRankr
3
+ version: !ruby/object:Gem::Version
4
+ prerelease: false
5
+ segments:
6
+ - 1
7
+ - 0
8
+ - 0
9
+ version: 1.0.0
10
+ platform: ruby
11
+ authors:
12
+ - Allen Madsen
13
+ autorequire:
14
+ bindir: bin
15
+ cert_chain: []
16
+
17
+ date: 2010-04-18 00:00:00 -04:00
18
+ default_executable:
19
+ dependencies:
20
+ - !ruby/object:Gem::Dependency
21
+ name: nokogiri
22
+ prerelease: false
23
+ requirement: &id001 !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ segments:
28
+ - 1
29
+ - 4
30
+ - 1
31
+ version: 1.4.1
32
+ type: :runtime
33
+ version_requirements: *id001
34
+ - !ruby/object:Gem::Dependency
35
+ name: rspec
36
+ prerelease: false
37
+ requirement: &id002 !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - ">="
40
+ - !ruby/object:Gem::Version
41
+ segments:
42
+ - 1
43
+ - 2
44
+ - 9
45
+ version: 1.2.9
46
+ type: :development
47
+ version_requirements: *id002
48
+ - !ruby/object:Gem::Dependency
49
+ name: yard
50
+ prerelease: false
51
+ requirement: &id003 !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ segments:
56
+ - 0
57
+ version: "0"
58
+ type: :development
59
+ version_requirements: *id003
60
+ description: Easy way to retrieve Google Page Rank, Alexa Rank, and backlink counts
61
+ email: blatyo@gmail.com
62
+ executables: []
63
+
64
+ extensions: []
65
+
66
+ extra_rdoc_files:
67
+ - LICENSE.md
68
+ - README.md
69
+ files:
70
+ - .document
71
+ - .gitignore
72
+ - LICENSE.md
73
+ - README.md
74
+ - Rakefile
75
+ - VERSION
76
+ - lib/page_rankr.rb
77
+ - lib/page_rankr/alexa.rb
78
+ - lib/page_rankr/backlinks.rb
79
+ - lib/page_rankr/google.rb
80
+ - lib/page_rankr/google/checksum.rb
81
+ - spec/page_rankr_spec.rb
82
+ - spec/spec.opts
83
+ - spec/spec_helper.rb
84
+ has_rdoc: true
85
+ homepage: http://github.com/blatyo/page_rankr
86
+ licenses: []
87
+
88
+ post_install_message:
89
+ rdoc_options:
90
+ - --charset=UTF-8
91
+ require_paths:
92
+ - lib
93
+ required_ruby_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ segments:
98
+ - 0
99
+ version: "0"
100
+ required_rubygems_version: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - ">="
103
+ - !ruby/object:Gem::Version
104
+ segments:
105
+ - 0
106
+ version: "0"
107
+ requirements: []
108
+
109
+ rubyforge_project:
110
+ rubygems_version: 1.3.6
111
+ signing_key:
112
+ specification_version: 3
113
+ summary: Easy way to retrieve Google Page Rank, Alexa Rank, and backlink counts
114
+ test_files:
115
+ - spec/page_rankr_spec.rb
116
+ - spec/spec_helper.rb