PageRankr 1.2.0 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  # Change Log
2
2
 
3
+ ## Version 1.3.0
4
+
5
+ * Lots of refactoring. Should be much easier to extend and temporarily fix if needed.
6
+
3
7
  ## Version 1.2.0
4
8
 
5
9
  * Changed backlinks method with no search engines specified to use all of them
data/PageRankr.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{PageRankr}
8
- s.version = "1.2.0"
8
+ s.version = "1.3.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Allen Madsen"]
12
- s.date = %q{2010-07-04}
12
+ s.date = %q{2010-07-05}
13
13
  s.description = %q{Easy way to retrieve Google Page Rank, Alexa Rank, and backlink counts}
14
14
  s.email = %q{blatyo@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -26,10 +26,18 @@ Gem::Specification.new do |s|
26
26
  "Rakefile",
27
27
  "VERSION",
28
28
  "lib/page_rankr.rb",
29
- "lib/page_rankr/alexa.rb",
30
29
  "lib/page_rankr/backlinks.rb",
31
- "lib/page_rankr/google.rb",
32
- "lib/page_rankr/google/checksum.rb"
30
+ "lib/page_rankr/backlinks/alexa.rb",
31
+ "lib/page_rankr/backlinks/alltheweb.rb",
32
+ "lib/page_rankr/backlinks/altavista.rb",
33
+ "lib/page_rankr/backlinks/backlink.rb",
34
+ "lib/page_rankr/backlinks/bing.rb",
35
+ "lib/page_rankr/backlinks/google.rb",
36
+ "lib/page_rankr/backlinks/yahoo.rb",
37
+ "lib/page_rankr/ranks.rb",
38
+ "lib/page_rankr/ranks/alexa.rb",
39
+ "lib/page_rankr/ranks/google.rb",
40
+ "lib/page_rankr/ranks/google/checksum.rb"
33
41
  ]
34
42
  s.homepage = %q{http://github.com/blatyo/page_rankr}
35
43
  s.rdoc_options = ["--charset=UTF-8"]
data/README.md CHANGED
@@ -42,6 +42,44 @@ There are two valid rank trackers supported: `:alexa, :google`.
42
42
  Alexa ranks are descending where 1 is the most popular. If a site has an alexa rank of 0 then the site is unranked.
43
43
  Google page ranks are in the range 0-10 where 10 is the most popular. If a site is unindexed then the rank will be -1.
44
44
 
45
+ ## Fix it!
46
+
47
+ If you ever find something is broken it should now be much easier to fix it with version >= 1.3.0. For example, if the xpath used to lookup a backlink is broken, just override the method for that class to provide the correct xpath.
48
+
49
+ module PageRankr
50
+ class Backlinks
51
+ class Google < Backlink
52
+ def xpath
53
+ "my new awesome xpath"
54
+ end
55
+ end
56
+ end
57
+ end
58
+
59
+ ## Extend it!
60
+
61
+ If you ever come across a site that provides a rank or backlinks you can hook that class up to automatically be use with PageRankr.
62
+
63
+ module PageRankr
64
+ class Backlinks
65
+ class Foo < Backlink
66
+ def url(site)
67
+ "http://example.com/?q=#{site}"
68
+ end
69
+
70
+ def xpath
71
+ "//backlinks/text()"
72
+ end
73
+
74
+ def clean(backlink_count)
75
+ #do some of my own cleaning
76
+ super(backlink_count) # strips letters, commas, and a few other nasty things and converts it to an integer
77
+ end
78
+ end
79
+ end
80
+ end
81
+
82
+ Then, just make sure you require the class and PageRankr and whenver you call PageRankr.backlinks it'll be able to use your class.
45
83
 
46
84
  ## Note on Patches/Pull Requests
47
85
 
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.2.0
1
+ 1.3.0
data/lib/page_rankr.rb CHANGED
@@ -1,28 +1,15 @@
1
- path = File.expand_path(File.dirname(__FILE__)) + '/page_rankr/'
2
- require path + 'backlinks'
3
- require path + 'alexa'
4
- require path + 'google'
1
+ require File.join("page_rankr", "backlinks")
2
+ require File.join("page_rankr", "ranks")
5
3
 
6
4
  module PageRankr
7
5
  class << self
8
6
  def backlinks(site, *search_engines)
9
- Backlinks.lookup site, *search_engines
7
+ Backlinks.new.lookup site, *search_engines
10
8
  end
11
9
  alias_method :backlink, :backlinks
12
10
 
13
11
  def ranks(site, *rank_trackers)
14
- rank_trackers = [:google, :alexa] if rank_trackers.empty?
15
-
16
- ranks = {}
17
- rank_trackers.each do |tracker|
18
- case tracker
19
- when :google
20
- ranks[tracker] = Google.lookup(site)
21
- when :alexa
22
- ranks[tracker] = Alexa.lookup(site)
23
- end
24
- end
25
- ranks
12
+ Ranks.new.lookup site, *rank_trackers
26
13
  end
27
14
  alias_method :rank, :ranks
28
15
  end
@@ -1,39 +1,30 @@
1
- require 'open-uri'
2
- require 'cgi'
3
- require 'nokogiri'
1
+ require File.join("page_rankr", "backlinks", "backlink")
2
+ require File.join("page_rankr", "backlinks", "alexa")
3
+ require File.join("page_rankr", "backlinks", "alltheweb")
4
+ require File.join("page_rankr", "backlinks", "altavista")
5
+ require File.join("page_rankr", "backlinks", "bing")
6
+ require File.join("page_rankr", "backlinks", "google")
7
+ require File.join("page_rankr", "backlinks", "yahoo")
4
8
 
5
9
  module PageRankr
6
10
  class Backlinks
7
- SEARCH_ENGINES = [:google, :bing, :yahoo, :altavista, :alltheweb, :alexa]
11
+ attr_accessor :search_engines
8
12
 
9
- SEARCH_ENGINE_URLS = {
10
- :google => "http://www.google.com/search?q=link%3A",
11
- :bing => "http://www.bing.com/search?q=link%3A",
12
- :yahoo => "http://siteexplorer.search.yahoo.com/search?p=",
13
- :altavista => "http://www.altavista.com/web/results?q=link%3A",
14
- :alltheweb => "http://www.alltheweb.com/search?q=link%3A",
15
- :alexa => "http://data.alexa.com/data?cli=10&dat=snbamz&url="
16
- }
17
-
18
- SEARCH_EGNINE_PATHS = {
19
- :google => "//div[@id='resultStats']/text()",
20
- :bing => "//span[@class='sb_count']/text()",
21
- :yahoo => "//ol[@id='results-tab']/li[2]/a/text()",
22
- :altavista => "//a[@class='lbl']/text()",
23
- :alltheweb => "//span[@class='ofSoMany']/text()",
24
- :alexa => "//linksin/@num"
25
- }
13
+ def initialize
14
+ @search_engines = self.class.constants
15
+ @search_engines.delete(:Backlink)
16
+ end
26
17
 
27
- def self.lookup(site, *search_engines)
28
- search_engines = SEARCH_ENGINES if search_engines.empty?
18
+ def lookup(site, *engines)
19
+ engines = search_engines if engines.empty?
29
20
 
30
21
  backlinks = {}
31
- search_engines.each do |engine|
32
- next unless SEARCH_ENGINE_URLS[engine]
33
- doc = Nokogiri::HTML(open(SEARCH_ENGINE_URLS[engine] + CGI.escape(site)))
34
- count = doc.at(SEARCH_EGNINE_PATHS[engine]).to_s
35
- count = count.gsub('1-10', '').gsub(/[a-zA-Z,\s\(\)]/, '')
36
- backlinks[engine] = count.to_i
22
+ engines.each do |engine|
23
+ name, klass = engine.to_s.capitalize, self.class
24
+
25
+ next unless klass.const_defined? name
26
+
27
+ backlinks[engine.to_s.downcase.to_sym] = klass.const_get(name).new(site).backlinks
37
28
  end
38
29
  backlinks
39
30
  end
@@ -0,0 +1,15 @@
1
+ require 'cgi'
2
+
3
+ module PageRankr
4
+ class Backlinks
5
+ class Alexa < Backlink
6
+ def url(site)
7
+ "http://data.alexa.com/data?cli=10&dat=snbamz&url=#{CGI.escape(site)}"
8
+ end
9
+
10
+ def xpath
11
+ "//linksin/@num"
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ require 'cgi'
2
+
3
+ module PageRankr
4
+ class Backlinks
5
+ class Alltheweb < Backlink
6
+ def url(site)
7
+ "http://www.alltheweb.com/search?q=link%3A#{CGI.escape(site)}"
8
+ end
9
+
10
+ def xpath
11
+ "//span[@class='ofSoMany']/text()"
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ require 'cgi'
2
+
3
+ module PageRankr
4
+ class Backlinks
5
+ class Altavista < Backlink
6
+ def url(site)
7
+ "http://www.altavista.com/web/results?q=link%3A#{CGI.escape(site)}"
8
+ end
9
+
10
+ def xpath
11
+ "//a[@class='lbl']/text()"
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,18 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
4
+ module PageRankr
5
+ class Backlinks
6
+ class Backlink
7
+ attr_reader :backlinks
8
+
9
+ def initialize(site)
10
+ @backlinks = clean Nokogiri::HTML(open url(site)).at(xpath).to_s
11
+ end
12
+
13
+ def clean(backlink_count)
14
+ backlink_count.gsub(/[a-zA-Z,\s\(\)]/, '').to_i
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,19 @@
1
+ require 'cgi'
2
+
3
+ module PageRankr
4
+ class Backlinks
5
+ class Bing < Backlink
6
+ def url(site)
7
+ "http://www.bing.com/search?q=link%3A#{CGI.escape(site)}"
8
+ end
9
+
10
+ def xpath
11
+ "//span[@class='sb_count']/text()"
12
+ end
13
+
14
+ def clean(backlink_count)
15
+ super(backlink_count.gsub('1-10', ''))
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,15 @@
1
+ require 'cgi'
2
+
3
+ module PageRankr
4
+ class Backlinks
5
+ class Google < Backlink
6
+ def url(site)
7
+ "http://www.google.com/search?q=link%3A#{CGI.escape(site)}"
8
+ end
9
+
10
+ def xpath
11
+ "//div[@id='resultStats']/text()"
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ require 'cgi'
2
+
3
+ module PageRankr
4
+ class Backlinks
5
+ class Yahoo < Backlink
6
+ def url(site)
7
+ "http://siteexplorer.search.yahoo.com/search?p=#{CGI.escape(site)}"
8
+ end
9
+
10
+ def xpath
11
+ "//ol[@id='results-tab']/li[2]/a/text()"
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,26 @@
1
+ require File.join("page_rankr", "ranks", "alexa")
2
+ require File.join("page_rankr", "ranks", "google")
3
+
4
+ module PageRankr
5
+ class Ranks
6
+ attr_reader :rank_trackers
7
+
8
+ def initialize
9
+ @rank_trackers = self.class.constants
10
+ end
11
+
12
+ def lookup(site, *trackers)
13
+ trackers = rank_trackers if trackers.empty?
14
+
15
+ ranks = {}
16
+ trackers.each do |tracker|
17
+ name, klass = tracker.to_s.capitalize, self.class
18
+
19
+ next unless klass.const_defined? name
20
+
21
+ ranks[tracker.to_s.downcase.to_sym] = klass.const_get(name).new(site).rank
22
+ end
23
+ ranks
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,23 @@
1
+ require 'open-uri'
2
+ require 'cgi'
3
+ require 'nokogiri'
4
+
5
+ module PageRankr
6
+ class Ranks
7
+ class Alexa
8
+ attr_reader :rank
9
+
10
+ def initialize(site)
11
+ @rank = Nokogiri::HTML(open(url(site))).search(xpath).to_s.to_i
12
+ end
13
+
14
+ def xpath
15
+ "//reach/@rank"
16
+ end
17
+
18
+ def url(site)
19
+ "http://data.alexa.com/data?cli=10&dat=snbamz&url=#{CGI.escape(site)}"
20
+ end
21
+ end
22
+ end
23
+ end
@@ -1,20 +1,23 @@
1
- path = File.expand_path(File.dirname(__FILE__)) + '/google/'
2
1
  require "open-uri"
3
- require path + 'checksum'
2
+ require File.join("page_rankr", "ranks", "google", "checksum")
4
3
 
5
4
  module PageRankr
6
- module Google
7
- class << self
8
- def lookup(site)
5
+ class Ranks
6
+ class Google
7
+ attr_reader :rank
8
+
9
+ def initialize(site)
9
10
  checksum = Checksum.generate(site)
10
11
  begin
11
- open(url(site, checksum)) {|io| io.read.scan(/Rank_\d+:\d+:(\d+)/)[0][0].to_i}
12
+ @rank = open(url(site, checksum)) {|io| io.read.scan(regex)[0][0].to_i}
12
13
  rescue
13
14
  -1
14
15
  end
15
16
  end
16
-
17
- private
17
+
18
+ def regex
19
+ /Rank_\d+:\d+:(\d+)/
20
+ end
18
21
 
19
22
  def url(site, checksum)
20
23
  "http://toolbarqueries.google.com/search?client=navclient-auto&ch=#{checksum}&features=Rank&q=info:#{site}"
@@ -0,0 +1,76 @@
1
+ module PageRankr
2
+ class Ranks
3
+ class Google
4
+ class Checksum
5
+ class << self
6
+ def generate(site)
7
+ bytes = byte_array('info:' + site)
8
+ length = bytes.length
9
+ a = b = 0x9E3779B9
10
+ c = 0xE6359A60
11
+ k, len = 0, length
12
+ while(len >= 12)
13
+ a = m(a + bytes[k + 0] + (bytes[k + 1] << 8) + (bytes[k + 2] << 16) + (bytes[k + 3] << 24))
14
+ b = m(b + bytes[k + 4] + (bytes[k + 5] << 8) + (bytes[k + 6] << 16) + (bytes[k + 7] << 24))
15
+ c = m(c + bytes[k + 8] + (bytes[k + 9] << 8) + (bytes[k + 10] << 16) + (bytes[k + 11] << 24))
16
+
17
+ a, b, c = mix(a, b, c)
18
+ k += 12
19
+ len -= 12
20
+ end
21
+
22
+ c = c + length
23
+
24
+ c = mix(*toss(a, b, c, bytes, len, k))[2]
25
+ "6" + c.to_s
26
+ end
27
+
28
+ private
29
+
30
+ def byte_array(site)
31
+ bytes = []
32
+ site.each_byte {|b| bytes << b}
33
+ bytes
34
+ end
35
+
36
+ # Need to keep numbers in the unsigned int 32 range
37
+ def m(v)
38
+ v % 0x100000000
39
+ end
40
+
41
+ def mix(a, b, c)
42
+ a, b, c = m(a), m(b), m(c)
43
+
44
+ a = m(a-b-c) ^ m(c >> 13)
45
+ b = m(b-c-a) ^ m(a << 8)
46
+ c = m(c-a-b) ^ m(b >> 13)
47
+
48
+ a = m(a-b-c) ^ m(c >> 12)
49
+ b = m(b-c-a) ^ m(a << 16)
50
+ c = m(c-a-b) ^ m(b >> 5)
51
+
52
+ a = m(a-b-c) ^ m(c >> 3)
53
+ b = m(b-c-a) ^ m(a << 10)
54
+ c = m(c-a-b) ^ m(b >> 15)
55
+
56
+ [a, b, c]
57
+ end
58
+
59
+ def toss(a, b, c, bytes, len, k)
60
+ case len
61
+ when 9..11
62
+ c = c + (bytes[k+len-1] << ((len % 8) * 8))
63
+ when 5..8
64
+ b = b + (bytes[k+len-1] << ((len % 5) * 8))
65
+ when 1..4
66
+ a = a + (bytes[k+len-1] << ((len - 1) * 8))
67
+ else
68
+ return [a, b, c]
69
+ end
70
+ toss(a, b, c, bytes, len-1, k)
71
+ end
72
+ end
73
+ end
74
+ end
75
+ end
76
+ end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 1
7
- - 2
7
+ - 3
8
8
  - 0
9
- version: 1.2.0
9
+ version: 1.3.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Allen Madsen
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-07-04 00:00:00 -04:00
17
+ date: 2010-07-05 00:00:00 -04:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -64,10 +64,18 @@ files:
64
64
  - Rakefile
65
65
  - VERSION
66
66
  - lib/page_rankr.rb
67
- - lib/page_rankr/alexa.rb
68
67
  - lib/page_rankr/backlinks.rb
69
- - lib/page_rankr/google.rb
70
- - lib/page_rankr/google/checksum.rb
68
+ - lib/page_rankr/backlinks/alexa.rb
69
+ - lib/page_rankr/backlinks/alltheweb.rb
70
+ - lib/page_rankr/backlinks/altavista.rb
71
+ - lib/page_rankr/backlinks/backlink.rb
72
+ - lib/page_rankr/backlinks/bing.rb
73
+ - lib/page_rankr/backlinks/google.rb
74
+ - lib/page_rankr/backlinks/yahoo.rb
75
+ - lib/page_rankr/ranks.rb
76
+ - lib/page_rankr/ranks/alexa.rb
77
+ - lib/page_rankr/ranks/google.rb
78
+ - lib/page_rankr/ranks/google/checksum.rb
71
79
  has_rdoc: true
72
80
  homepage: http://github.com/blatyo/page_rankr
73
81
  licenses: []
@@ -1,14 +0,0 @@
1
- require 'open-uri'
2
- require 'cgi'
3
- require 'nokogiri'
4
-
5
- module PageRankr
6
- class Alexa
7
- class << self
8
- def lookup(site)
9
- url = "http://data.alexa.com/data?cli=10&dat=snbamz&url="
10
- Nokogiri::HTML(open(url + CGI.escape(site))).search("//reach/@rank").to_s.to_i
11
- end
12
- end
13
- end
14
- end
@@ -1,74 +0,0 @@
1
- module PageRankr
2
- module Google
3
- class Checksum
4
- class << self
5
- def generate(site)
6
- bytes = byte_array('info:' + site)
7
- length = bytes.length
8
- a = b = 0x9E3779B9
9
- c = 0xE6359A60
10
- k, len = 0, length
11
- while(len >= 12)
12
- a = m(a + bytes[k + 0] + (bytes[k + 1] << 8) + (bytes[k + 2] << 16) + (bytes[k + 3] << 24))
13
- b = m(b + bytes[k + 4] + (bytes[k + 5] << 8) + (bytes[k + 6] << 16) + (bytes[k + 7] << 24))
14
- c = m(c + bytes[k + 8] + (bytes[k + 9] << 8) + (bytes[k + 10] << 16) + (bytes[k + 11] << 24))
15
-
16
- a, b, c = mix(a, b, c)
17
- k += 12
18
- len -= 12
19
- end
20
-
21
- c = c + length
22
-
23
- c = mix(*toss(a, b, c, bytes, len, k))[2]
24
- "6" + c.to_s
25
- end
26
-
27
- private
28
-
29
- def byte_array(site)
30
- bytes = []
31
- site.each_byte {|b| bytes << b}
32
- bytes
33
- end
34
-
35
- # Need to keep numbers in the unsigned int 32 range
36
- def m(v)
37
- v % 0x100000000
38
- end
39
-
40
- def mix(a, b, c)
41
- a, b, c = m(a), m(b), m(c)
42
-
43
- a = m(a-b-c) ^ m(c >> 13)
44
- b = m(b-c-a) ^ m(a << 8)
45
- c = m(c-a-b) ^ m(b >> 13)
46
-
47
- a = m(a-b-c) ^ m(c >> 12)
48
- b = m(b-c-a) ^ m(a << 16)
49
- c = m(c-a-b) ^ m(b >> 5)
50
-
51
- a = m(a-b-c) ^ m(c >> 3)
52
- b = m(b-c-a) ^ m(a << 10)
53
- c = m(c-a-b) ^ m(b >> 15)
54
-
55
- [a, b, c]
56
- end
57
-
58
- def toss(a, b, c, bytes, len, k)
59
- case len
60
- when 9..11
61
- c = c + (bytes[k+len-1] << ((len % 8) * 8))
62
- when 5..8
63
- b = b + (bytes[k+len-1] << ((len % 5) * 8))
64
- when 1..4
65
- a = a + (bytes[k+len-1] << ((len - 1) * 8))
66
- else
67
- return [a, b, c]
68
- end
69
- toss(a, b, c, bytes, len-1, k)
70
- end
71
- end
72
- end
73
- end
74
- end