PageRankr 1.7.1 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +7 -1
- data/PageRankr.gemspec +4 -2
- data/README.md +38 -26
- data/lib/page_rankr.rb +10 -9
- data/lib/page_rankr/backlink.rb +8 -2
- data/lib/page_rankr/backlinks.rb +3 -3
- data/lib/page_rankr/backlinks/alexa.rb +8 -5
- data/lib/page_rankr/backlinks/bing.rb +8 -5
- data/lib/page_rankr/backlinks/google.rb +12 -6
- data/lib/page_rankr/backlinks/yahoo.rb +8 -5
- data/lib/page_rankr/index.rb +7 -2
- data/lib/page_rankr/indexes.rb +3 -1
- data/lib/page_rankr/indexes/bing.rb +8 -5
- data/lib/page_rankr/indexes/google.rb +14 -6
- data/lib/page_rankr/rank.rb +15 -1
- data/lib/page_rankr/ranks.rb +5 -2
- data/lib/page_rankr/ranks/alexa_global.rb +19 -0
- data/lib/page_rankr/ranks/alexa_us.rb +19 -0
- data/lib/page_rankr/ranks/compete.rb +10 -10
- data/lib/page_rankr/ranks/google.rb +17 -10
- data/lib/page_rankr/ranks/google/checksum.rb +2 -2
- data/lib/page_rankr/site.rb +21 -0
- data/lib/page_rankr/tracker.rb +29 -4
- data/lib/page_rankr/version.rb +1 -1
- data/spec/page_rankr_spec.rb +24 -34
- metadata +38 -8
- data/lib/page_rankr/backlinks/alltheweb.rb +0 -15
- data/lib/page_rankr/backlinks/altavista.rb +0 -15
- data/lib/page_rankr/ranks/alexa.rb +0 -26
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,13 @@
|
|
1
1
|
# Change Log
|
2
|
+
|
3
|
+
## Version 2.0.0
|
4
|
+
* URL validation
|
5
|
+
* Parallel requests = way faster!
|
6
|
+
* Not tracked returns nil
|
7
|
+
* Alexa US and Global are treated as separate trackers and returned results are a single level hash.
|
8
|
+
* Removed Altavista and AllTheWeb because they now direct to yahoo.
|
9
|
+
* Changed some classes to modules so that it wasn't necessary to specify them when opening the class.
|
10
|
+
|
2
11
|
## Version 1.7.1
|
3
12
|
* Catches exception thrown when doing compete rank lookup with url not in the form "google.com".
|
4
13
|
|
data/Gemfile.lock
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
PageRankr (1.7.
|
4
|
+
PageRankr (1.7.1)
|
5
5
|
json (>= 1.4.6)
|
6
6
|
nokogiri (>= 1.4.1)
|
7
|
+
public_suffix_service (~> 0.8.1)
|
8
|
+
typhoeus (~> 0.2.1)
|
7
9
|
|
8
10
|
GEM
|
9
11
|
remote: http://rubygems.org/
|
@@ -14,7 +16,9 @@ GEM
|
|
14
16
|
rspec-instafail (~> 0.1.4)
|
15
17
|
ruby-progressbar (~> 0.0.9)
|
16
18
|
json (1.5.1)
|
19
|
+
mime-types (1.16)
|
17
20
|
nokogiri (1.4.4)
|
21
|
+
public_suffix_service (0.8.1)
|
18
22
|
rspec (2.1.0)
|
19
23
|
rspec-core (~> 2.1.0)
|
20
24
|
rspec-expectations (~> 2.1.0)
|
@@ -25,6 +29,8 @@ GEM
|
|
25
29
|
rspec-instafail (0.1.5)
|
26
30
|
rspec-mocks (2.1.0)
|
27
31
|
ruby-progressbar (0.0.9)
|
32
|
+
typhoeus (0.2.1)
|
33
|
+
mime-types
|
28
34
|
|
29
35
|
PLATFORMS
|
30
36
|
ruby
|
data/PageRankr.gemspec
CHANGED
@@ -15,8 +15,10 @@ Gem::Specification.new do |s|
|
|
15
15
|
s.add_development_dependency "bundler", ">= 1.0.0"
|
16
16
|
s.add_development_dependency "fuubar", ">= 0.0.1"
|
17
17
|
|
18
|
-
s.add_runtime_dependency "nokogiri",
|
19
|
-
s.add_runtime_dependency "json",
|
18
|
+
s.add_runtime_dependency "nokogiri", ">= 1.4.1"
|
19
|
+
s.add_runtime_dependency "json", ">= 1.4.6"
|
20
|
+
s.add_runtime_dependency "public_suffix_service", "~> 0.8.1"
|
21
|
+
s.add_runtime_dependency "typhoeus", "~> 0.2.1"
|
20
22
|
|
21
23
|
s.files = `git ls-files`.split("\n")
|
22
24
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
data/README.md
CHANGED
@@ -17,7 +17,7 @@ Check out a little [web app][1] I wrote up that uses it or look at the [source][
|
|
17
17
|
|
18
18
|
### Backlinks
|
19
19
|
|
20
|
-
Backlinks are the result of doing a search with a query like "link:www.google.com". The number of returned results indicates how many sites point to that url.
|
20
|
+
Backlinks are the result of doing a search with a query like "link:www.google.com". The number of returned results indicates how many sites point to that url. If a site is not tracked then `nil` is returned.
|
21
21
|
|
22
22
|
PageRankr.backlinks('www.google.com', :google, :bing) #=> {:google=>161000, :bing=>208000000}
|
23
23
|
PageRankr.backlinks('www.google.com', :yahoo) #=> {:yahoo=>256300062}
|
@@ -26,20 +26,21 @@ If you don't specify a search engine, then all of them are used.
|
|
26
26
|
|
27
27
|
# this
|
28
28
|
PageRankr.backlinks('www.google.com')
|
29
|
-
#=> {:google=>23000, :bing=>215000000, :yahoo=>250522337, :
|
29
|
+
#=> {:google=>23000, :bing=>215000000, :yahoo=>250522337, :alexa=>727036}
|
30
30
|
|
31
31
|
# is equivalent to
|
32
|
-
PageRankr.backlinks('www.google.com', :google, :bing, :yahoo, :
|
33
|
-
#=> {:google=>23000, :bing=>215000000, :yahoo=>250522337, :
|
32
|
+
PageRankr.backlinks('www.google.com', :google, :bing, :yahoo, :alexa)
|
33
|
+
#=> {:google=>23000, :bing=>215000000, :yahoo=>250522337, :alexa=>727036}
|
34
34
|
|
35
35
|
You can also use the alias `backlink` instead of `backlinks`.
|
36
|
-
Valid search engines are: `:google, :bing, :yahoo, :altavista, :alltheweb, :alexa`. To get this list you can do:
|
37
36
|
|
38
|
-
|
37
|
+
Valid search engines are: `:google, :bing, :yahoo, :alexa` (altavista and alltheweb now redirect to yahoo). To get this list you can do:
|
38
|
+
|
39
|
+
PageRankr.backlink_trackers #=> [:alexa, :bing, :google, :yahoo]
|
39
40
|
|
40
41
|
### Indexes
|
41
42
|
|
42
|
-
Indexes are the result of doing a search with a query like "site:www.google.com". The number of returned results indicates how many pages of a domain are indexed by a particular search engine.
|
43
|
+
Indexes are the result of doing a search with a query like "site:www.google.com". The number of returned results indicates how many pages of a domain are indexed by a particular search engine. If the site is not indexed `nil` is returned.
|
43
44
|
|
44
45
|
PageRankr.indexes('www.google.com', :google) #=> {:google=>4860000}
|
45
46
|
PageRankr.indexes('www.google.com', :bing) #=> {:bing=>2120000}
|
@@ -55,36 +56,43 @@ If you don't specify a search engine, then all of them are used.
|
|
55
56
|
#=> {:bing=>2120000, :google=>4860000}
|
56
57
|
|
57
58
|
You can also use the alias `index` instead of `indexes`.
|
59
|
+
|
58
60
|
Valid search engines are: `:google, :bing`. To get this list you can do:
|
59
61
|
|
60
|
-
PageRankr.index_trackers #=> [:
|
62
|
+
PageRankr.index_trackers #=> [:bing, :google]
|
61
63
|
|
62
64
|
### Ranks
|
63
65
|
|
64
|
-
|
66
|
+
Ranks are ratings assigned to specify how popular a site is. The most famous example of this is the google page rank.
|
67
|
+
|
68
|
+
PageRankr.ranks('www.google.com', :google) #=> {:google=>10}
|
65
69
|
|
66
|
-
|
70
|
+
If you don't specify a rank provider, then all of them are used.
|
71
|
+
|
72
|
+
PageRankr.ranks('www.google.com', :alexa_us, :alexa_global, :compete, :google)
|
73
|
+
#=> {:alexa_us=>1, :alexa_global=>1, :google=>10, :compete=>1}
|
67
74
|
|
68
75
|
# this also gives the same result
|
69
|
-
PageRankr.ranks('google.com')
|
76
|
+
PageRankr.ranks('www.google.com')
|
77
|
+
#=> {:alexa_us=>1, :alexa_global=>1, :google=>10, :compete=>1}
|
70
78
|
|
71
79
|
You can also use the alias `rank` instead of `ranks`.
|
72
|
-
There are three valid rank trackers supported: `:alexa, :google, :compete`. To get this you can do:
|
73
80
|
|
74
|
-
|
81
|
+
Valid rank trackers are: `:alexa_us, :alexa_global, :compete, :google`. To get this you can do:
|
82
|
+
|
83
|
+
PageRankr.rank_trackers #=> [:alexa_global, :alexa_us, :compete, :google]
|
75
84
|
|
76
|
-
Alexa ranks are descending where 1 is the most popular.
|
77
|
-
Google page ranks are in the range 0-10 where 10 is the most popular. If a site is unindexed then the rank will be -1.
|
85
|
+
Alexa and Compete ranks are descending where 1 is the most popular. Google page ranks are in the range 0-10 where 10 is the most popular. If a site is unindexed then the rank will be nil.
|
78
86
|
|
79
87
|
## Fix it!
|
80
88
|
|
81
89
|
If you ever find something is broken it should now be much easier to fix it with version >= 1.3.0. For example, if the xpath used to lookup a backlink is broken, just override the method for that class to provide the correct xpath.
|
82
90
|
|
83
91
|
module PageRankr
|
84
|
-
class Backlinks
|
85
|
-
class
|
92
|
+
class Backlinks
|
93
|
+
class Bing
|
86
94
|
def xpath
|
87
|
-
"my
|
95
|
+
"//my/new/awesome/@xpath"
|
88
96
|
end
|
89
97
|
end
|
90
98
|
end
|
@@ -92,13 +100,16 @@ If you ever find something is broken it should now be much easier to fix it with
|
|
92
100
|
|
93
101
|
## Extend it!
|
94
102
|
|
95
|
-
If you ever come across a site that provides a rank or backlinks you can hook that class up to automatically be use with PageRankr.
|
103
|
+
If you ever come across a site that provides a rank or backlinks you can hook that class up to automatically be use with PageRankr. PageRankr does this by looking up all the classes namespaced under Backlinks, Indexes, and Ranks.
|
96
104
|
|
97
105
|
module PageRankr
|
98
|
-
class Backlinks
|
99
|
-
class Foo
|
100
|
-
|
101
|
-
|
106
|
+
class Backlinks
|
107
|
+
class Foo
|
108
|
+
include Backlink
|
109
|
+
|
110
|
+
def request
|
111
|
+
@request ||= Typhoeus::Request.new("http://example.com/",
|
112
|
+
:params => {:q => @site.to_s})
|
102
113
|
end
|
103
114
|
|
104
115
|
def xpath
|
@@ -125,15 +136,16 @@ Then, just make sure you require the class and PageRankr and whenever you call P
|
|
125
136
|
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
126
137
|
* Send me a pull request. Bonus points for topic branches.
|
127
138
|
|
128
|
-
## TODO Version
|
139
|
+
## TODO Version 3
|
129
140
|
* Use API's where possible
|
130
|
-
* Use [Typhoeus](https://github.com/pauldix/typhoeus) to improve speed when requesting multiple ranks and/or backlinks
|
131
141
|
* Configuration
|
132
142
|
* Optionally use API keys
|
143
|
+
* Maybe allow API key cycling to get around query limits
|
144
|
+
* Google search API is deprecated
|
133
145
|
|
134
146
|
## Contributors
|
135
147
|
* [Druwerd](https://github.com/Druwerd) - Use Google Search API instead of scraping.
|
136
|
-
* [Iteration Labs](https://github.com/iterationlabs) - Compete rank tracker and domain indexes.
|
148
|
+
* [Iteration Labs, LLC](https://github.com/iterationlabs) - Compete rank tracker and domain indexes.
|
137
149
|
|
138
150
|
## Shout Out
|
139
151
|
Gotta give credit where credits due!
|
data/lib/page_rankr.rb
CHANGED
@@ -2,31 +2,32 @@ require File.join(File.dirname(__FILE__), "page_rankr", "tracker")
|
|
2
2
|
require File.join(File.dirname(__FILE__), "page_rankr", "backlinks")
|
3
3
|
require File.join(File.dirname(__FILE__), "page_rankr", "ranks")
|
4
4
|
require File.join(File.dirname(__FILE__), "page_rankr", "indexes")
|
5
|
+
require File.join(File.dirname(__FILE__), "page_rankr", "site")
|
5
6
|
|
6
7
|
module PageRankr
|
7
8
|
class << self
|
8
9
|
def backlinks(site, *search_engines)
|
9
|
-
Backlinks.new.lookup
|
10
|
+
Backlinks.new.lookup(Site.new(site), *search_engines)
|
10
11
|
end
|
11
12
|
alias_method :backlink, :backlinks
|
13
|
+
|
14
|
+
def backlink_trackers
|
15
|
+
Backlinks.new.backlink_trackers
|
16
|
+
end
|
12
17
|
|
13
18
|
def ranks(site, *rank_trackers)
|
14
|
-
Ranks.new.lookup
|
19
|
+
Ranks.new.lookup(Site.new(site), *rank_trackers)
|
15
20
|
end
|
16
21
|
alias_method :rank, :ranks
|
17
22
|
|
18
|
-
def indexes(site, *index_trackers)
|
19
|
-
Indexes.new.lookup site, *index_trackers
|
20
|
-
end
|
21
|
-
alias_method :index, :indexes
|
22
|
-
|
23
23
|
def rank_trackers
|
24
24
|
Ranks.new.rank_trackers
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
28
|
-
|
27
|
+
def indexes(site, *index_trackers)
|
28
|
+
Indexes.new.lookup(Site.new(site), *index_trackers)
|
29
29
|
end
|
30
|
+
alias_method :index, :indexes
|
30
31
|
|
31
32
|
def index_trackers
|
32
33
|
Indexes.new.index_trackers
|
data/lib/page_rankr/backlink.rb
CHANGED
@@ -3,12 +3,18 @@ require 'nokogiri'
|
|
3
3
|
require 'open-uri'
|
4
4
|
|
5
5
|
module PageRankr
|
6
|
-
|
6
|
+
module Backlink
|
7
7
|
attr_reader :backlinks
|
8
8
|
alias_method :tracked, :backlinks
|
9
9
|
|
10
10
|
def initialize(site)
|
11
|
-
@
|
11
|
+
@site = site
|
12
|
+
|
13
|
+
request.on_complete do |response|
|
14
|
+
html = Nokogiri::HTML(response.body)
|
15
|
+
@backlinks = clean(html.at(xpath).to_s)
|
16
|
+
@backlinks = nil if @backlinks.zero?
|
17
|
+
end
|
12
18
|
end
|
13
19
|
|
14
20
|
def clean(backlink_count)
|
data/lib/page_rankr/backlinks.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), "backlink")
|
2
2
|
require File.join(File.dirname(__FILE__), "backlinks", "alexa")
|
3
|
-
require File.join(File.dirname(__FILE__), "backlinks", "alltheweb")
|
4
|
-
require File.join(File.dirname(__FILE__), "backlinks", "altavista")
|
5
3
|
require File.join(File.dirname(__FILE__), "backlinks", "bing")
|
6
4
|
require File.join(File.dirname(__FILE__), "backlinks", "google")
|
7
5
|
require File.join(File.dirname(__FILE__), "backlinks", "yahoo")
|
8
6
|
|
9
7
|
module PageRankr
|
10
|
-
class Backlinks
|
8
|
+
class Backlinks
|
9
|
+
include Tracker
|
10
|
+
|
11
11
|
alias_method :backlink_trackers, :site_trackers
|
12
12
|
end
|
13
13
|
end
|
@@ -1,10 +1,13 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
|
3
3
|
module PageRankr
|
4
|
-
class Backlinks
|
5
|
-
class Alexa
|
6
|
-
|
7
|
-
|
4
|
+
class Backlinks
|
5
|
+
class Alexa
|
6
|
+
include Backlink
|
7
|
+
|
8
|
+
def request
|
9
|
+
@request ||= Typhoeus::Request.new("http://data.alexa.com/data",
|
10
|
+
:params => {:cli => 10, :dat => "snbamz", :url => @site.to_s}, :method => :get)
|
8
11
|
end
|
9
12
|
|
10
13
|
def xpath
|
@@ -1,10 +1,13 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
|
3
3
|
module PageRankr
|
4
|
-
class Backlinks
|
5
|
-
class Bing
|
6
|
-
|
7
|
-
|
4
|
+
class Backlinks
|
5
|
+
class Bing
|
6
|
+
include Backlink
|
7
|
+
|
8
|
+
def request
|
9
|
+
@request ||= Typhoeus::Request.new("http://www.bing.com/search",
|
10
|
+
:params => {:q => "link:#{@site.to_s}"}, :method => :get)
|
8
11
|
end
|
9
12
|
|
10
13
|
def xpath
|
@@ -1,18 +1,24 @@
|
|
1
|
-
require 'cgi'
|
2
1
|
require 'json'
|
3
2
|
|
4
3
|
module PageRankr
|
5
|
-
class Backlinks
|
6
|
-
class Google
|
4
|
+
class Backlinks
|
5
|
+
class Google
|
6
|
+
include Backlink
|
7
7
|
|
8
8
|
# overloaded to use Google's AJAX search API
|
9
9
|
# http://code.google.com/apis/ajaxsearch/documentation/
|
10
10
|
def initialize(site)
|
11
|
-
@
|
11
|
+
@site = site
|
12
|
+
request.on_complete do |response|
|
13
|
+
json = JSON.parse(response.body)
|
14
|
+
@backlinks = clean(json["responseData"]["cursor"]["estimatedResultCount"].to_s)
|
15
|
+
@backlinks = nil if @backlinks.zero?
|
16
|
+
end
|
12
17
|
end
|
13
18
|
|
14
|
-
def
|
15
|
-
"http://ajax.googleapis.com/ajax/services/search/web
|
19
|
+
def request
|
20
|
+
@request ||= Typhoeus::Request.new("http://ajax.googleapis.com/ajax/services/search/web",
|
21
|
+
:params => {:v => "1.0", :rsz => 1, :q => "link:#{@site.to_s}"}, :method => :get)
|
16
22
|
end
|
17
23
|
end
|
18
24
|
end
|
@@ -1,10 +1,13 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
|
3
3
|
module PageRankr
|
4
|
-
class Backlinks
|
5
|
-
class Yahoo
|
6
|
-
|
7
|
-
|
4
|
+
class Backlinks
|
5
|
+
class Yahoo
|
6
|
+
include Backlink
|
7
|
+
|
8
|
+
def request
|
9
|
+
@request ||= Typhoeus::Request.new("http://siteexplorer.search.yahoo.com/search",
|
10
|
+
:params => {:p => "#{@site.to_s}"}, :method => :get)
|
8
11
|
end
|
9
12
|
|
10
13
|
def xpath
|
data/lib/page_rankr/index.rb
CHANGED
@@ -2,12 +2,17 @@ require 'nokogiri'
|
|
2
2
|
require 'open-uri'
|
3
3
|
|
4
4
|
module PageRankr
|
5
|
-
|
5
|
+
module Index
|
6
6
|
attr_reader :indexes
|
7
7
|
alias_method :tracked, :indexes
|
8
8
|
|
9
9
|
def initialize(site)
|
10
|
-
@
|
10
|
+
@site = site
|
11
|
+
request.on_complete do |response|
|
12
|
+
html = Nokogiri::HTML(response.body)
|
13
|
+
@indexes = clean(html.at(xpath).to_s)
|
14
|
+
@indexes = nil if @indexes.zero?
|
15
|
+
end
|
11
16
|
end
|
12
17
|
|
13
18
|
def clean(backlink_count)
|
data/lib/page_rankr/indexes.rb
CHANGED
@@ -3,7 +3,9 @@ require File.join(File.dirname(__FILE__), "indexes", "bing")
|
|
3
3
|
require File.join(File.dirname(__FILE__), "indexes", "google")
|
4
4
|
|
5
5
|
module PageRankr
|
6
|
-
class Indexes
|
6
|
+
class Indexes
|
7
|
+
include Tracker
|
8
|
+
|
7
9
|
alias_method :index_trackers, :site_trackers
|
8
10
|
end
|
9
11
|
end
|
@@ -1,10 +1,13 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
|
3
3
|
module PageRankr
|
4
|
-
class Indexes
|
5
|
-
class Bing
|
6
|
-
|
7
|
-
|
4
|
+
class Indexes
|
5
|
+
class Bing
|
6
|
+
include Index
|
7
|
+
|
8
|
+
def request
|
9
|
+
@request ||= Typhoeus::Request.new("http://www.bing.com/search",
|
10
|
+
:params => {:q => "site:#{@site.to_s}"})
|
8
11
|
end
|
9
12
|
|
10
13
|
def xpath
|
@@ -1,18 +1,26 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
require 'json'
|
3
3
|
|
4
4
|
module PageRankr
|
5
|
-
class Indexes
|
6
|
-
class Google
|
5
|
+
class Indexes
|
6
|
+
class Google
|
7
|
+
include Index
|
7
8
|
|
8
9
|
# overloaded to use Google's AJAX search API
|
9
10
|
# http://code.google.com/apis/ajaxsearch/documentation/
|
10
11
|
def initialize(site)
|
11
|
-
@
|
12
|
+
@site = site
|
13
|
+
|
14
|
+
request.on_complete do |response|
|
15
|
+
json = JSON.parse(response.body)
|
16
|
+
@indexes = clean(json["responseData"]["cursor"]["estimatedResultCount"].to_s)
|
17
|
+
@indexes = nil if @indexes.zero?
|
18
|
+
end
|
12
19
|
end
|
13
20
|
|
14
|
-
def
|
15
|
-
"http://ajax.googleapis.com/ajax/services/search/web
|
21
|
+
def request
|
22
|
+
@request ||= Typhoeus::Request.new("http://ajax.googleapis.com/ajax/services/search/web",
|
23
|
+
:params => {:v => "1.0", :rsz => 1, :q => "site:#{@site.to_s}"}, :method => :get)
|
16
24
|
end
|
17
25
|
end
|
18
26
|
end
|
data/lib/page_rankr/rank.rb
CHANGED
@@ -1,6 +1,20 @@
|
|
1
1
|
module PageRankr
|
2
|
-
|
2
|
+
module Rank
|
3
3
|
attr_reader :rank
|
4
4
|
alias_method :tracked, :rank
|
5
|
+
|
6
|
+
def initialize(site)
|
7
|
+
@site = site
|
8
|
+
|
9
|
+
request.on_complete do |response|
|
10
|
+
html = Nokogiri::HTML(response.body)
|
11
|
+
@rank = clean(html.search(xpath))
|
12
|
+
@rank = nil if @rank.zero?
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def clean(rank)
|
17
|
+
rank.to_s.to_i
|
18
|
+
end
|
5
19
|
end
|
6
20
|
end
|
data/lib/page_rankr/ranks.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), "rank")
|
2
|
-
require File.join(File.dirname(__FILE__), "ranks", "
|
2
|
+
require File.join(File.dirname(__FILE__), "ranks", "alexa_us")
|
3
|
+
require File.join(File.dirname(__FILE__), "ranks", "alexa_global")
|
3
4
|
require File.join(File.dirname(__FILE__), "ranks", "google")
|
4
5
|
require File.join(File.dirname(__FILE__), "ranks", "compete")
|
5
6
|
|
6
7
|
module PageRankr
|
7
|
-
class Ranks
|
8
|
+
class Ranks
|
9
|
+
include Tracker
|
10
|
+
|
8
11
|
alias_method :rank_trackers, :site_trackers
|
9
12
|
end
|
10
13
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'typhoeus'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module PageRankr
|
5
|
+
class Ranks
|
6
|
+
class AlexaGlobal
|
7
|
+
include Rank
|
8
|
+
|
9
|
+
def xpath
|
10
|
+
"//popularity/@text"
|
11
|
+
end
|
12
|
+
|
13
|
+
def request
|
14
|
+
@request ||= Typhoeus::Request.new("http://data.alexa.com/data",
|
15
|
+
:params => {:cli => 10, :dat => "snbamz", :url => @site.to_s})
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'typhoeus'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module PageRankr
|
5
|
+
class Ranks
|
6
|
+
class AlexaUs
|
7
|
+
include Rank
|
8
|
+
|
9
|
+
def xpath
|
10
|
+
"//reach/@rank"
|
11
|
+
end
|
12
|
+
|
13
|
+
def request
|
14
|
+
@request ||= Typhoeus::Request.new("http://data.alexa.com/data",
|
15
|
+
:params => {:cli => 10, :dat => "snbamz", :url => @site.to_s})
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -1,22 +1,22 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
require 'cgi'
|
3
3
|
require 'nokogiri'
|
4
4
|
|
5
5
|
module PageRankr
|
6
|
-
class Ranks
|
7
|
-
class Compete
|
8
|
-
|
9
|
-
@rank = Nokogiri::HTML(open(url(site))).search(xpath).to_s.gsub(',', '').to_i
|
10
|
-
rescue
|
11
|
-
@rank = 0 #compete only accepts urls without http:// and www, will be fixed in 2.0
|
12
|
-
end
|
6
|
+
class Ranks
|
7
|
+
class Compete
|
8
|
+
include Rank
|
13
9
|
|
14
10
|
def xpath
|
15
11
|
"//div[@id='rank']/div[@class='number value']/text()"
|
16
12
|
end
|
17
13
|
|
18
|
-
def
|
19
|
-
"http://siteanalytics.compete.com/#{CGI.escape(site)}/"
|
14
|
+
def request
|
15
|
+
@request ||= Typhoeus::Request.new("http://siteanalytics.compete.com/#{CGI.escape(@site.domain)}/")
|
16
|
+
end
|
17
|
+
|
18
|
+
def clean(rank)
|
19
|
+
rank.to_s.gsub(',', '').to_i
|
20
20
|
end
|
21
21
|
end
|
22
22
|
end
|
@@ -1,15 +1,21 @@
|
|
1
|
-
require
|
1
|
+
require 'typhoeus'
|
2
2
|
require File.join(File.dirname(__FILE__), "google", "checksum")
|
3
3
|
|
4
4
|
module PageRankr
|
5
|
-
class Ranks
|
6
|
-
class Google
|
5
|
+
class Ranks
|
6
|
+
class Google
|
7
|
+
include Rank
|
8
|
+
|
7
9
|
def initialize(site)
|
8
|
-
|
9
|
-
@
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
@site = site
|
11
|
+
@checksum = Checksum.generate(@site.to_s)
|
12
|
+
|
13
|
+
request.on_complete do |response|
|
14
|
+
@rank = if response.body =~ regex
|
15
|
+
clean($1)
|
16
|
+
else
|
17
|
+
nil
|
18
|
+
end
|
13
19
|
end
|
14
20
|
end
|
15
21
|
|
@@ -17,8 +23,9 @@ module PageRankr
|
|
17
23
|
/Rank_\d+:\d+:(\d+)/
|
18
24
|
end
|
19
25
|
|
20
|
-
def
|
21
|
-
"http://toolbarqueries.google.com/search
|
26
|
+
def request
|
27
|
+
@request ||= Typhoeus::Request.new("http://toolbarqueries.google.com/search",
|
28
|
+
:params => {:client => "navclient-auto", :ch => @checksum, :features => "Rank", :q => "info:#{@site.to_s}"})
|
22
29
|
end
|
23
30
|
end
|
24
31
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'public_suffix_service'
|
2
|
+
require 'delegate'
|
3
|
+
|
4
|
+
module PageRankr
|
5
|
+
class DomainInvalid < StandardError; end
|
6
|
+
|
7
|
+
class Site < DelegateClass(PublicSuffixService::Domain)
|
8
|
+
def initialize(site)
|
9
|
+
super(PublicSuffixService.parse(clean(site)))
|
10
|
+
valid? or raise DomainInvalid, "The domain provided is invalid."
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def clean(site)
|
16
|
+
site = site.split("://").last # remove protocol
|
17
|
+
site = site.split("/").first # remove path
|
18
|
+
site.split("?").first # remove params
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/page_rankr/tracker.rb
CHANGED
@@ -1,23 +1,48 @@
|
|
1
|
+
require 'typhoeus'
|
2
|
+
|
1
3
|
module PageRankr
|
2
|
-
|
4
|
+
module Tracker
|
3
5
|
attr_accessor :site_trackers
|
4
6
|
|
5
7
|
def initialize
|
6
|
-
@site_trackers = self.class.constants.collect{|tracker| tracker
|
8
|
+
@site_trackers = self.class.constants.collect{|tracker| symbol_for(tracker)}
|
7
9
|
end
|
8
10
|
|
9
11
|
def lookup(site, *trackers)
|
10
12
|
trackers = site_trackers if trackers.empty?
|
11
13
|
|
12
14
|
tracked = {}
|
15
|
+
hydra = Typhoeus::Hydra.new
|
13
16
|
trackers.each do |tracker|
|
14
|
-
name, klass = tracker
|
17
|
+
name, klass = constant_name(tracker), self.class
|
15
18
|
|
16
19
|
next unless klass.const_defined? name
|
17
20
|
|
18
|
-
tracked[tracker] = klass.const_get(name).new(site)
|
21
|
+
tracked[tracker] = klass.const_get(name).new(site)
|
22
|
+
hydra.queue tracked[tracker].request
|
23
|
+
end
|
24
|
+
hydra.run
|
25
|
+
|
26
|
+
tracked.keys.each do |tracker|
|
27
|
+
tracked[tracker] = tracked[tracker].tracked
|
19
28
|
end
|
29
|
+
|
20
30
|
tracked
|
21
31
|
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def symbol_for(klass)
|
36
|
+
word = klass.to_s.dup
|
37
|
+
word.gsub!(/([A-Z]+)([A-Z][a-z])/){|match| "#{$1}_#{$2}" }
|
38
|
+
word.gsub!(/([a-z\d])([A-Z])/){|match| "#{$1}_#{$2}" }
|
39
|
+
word.tr!("-", "_")
|
40
|
+
word.downcase!
|
41
|
+
word.to_sym
|
42
|
+
end
|
43
|
+
|
44
|
+
def constant_name(sym)
|
45
|
+
sym.to_s.split('_').collect{|str| str.capitalize}.join
|
46
|
+
end
|
22
47
|
end
|
23
48
|
end
|
data/lib/page_rankr/version.rb
CHANGED
data/spec/page_rankr_spec.rb
CHANGED
@@ -14,47 +14,39 @@ describe PageRankr do
|
|
14
14
|
describe "#rank_trackers" do
|
15
15
|
subject{ PageRankr.rank_trackers }
|
16
16
|
|
17
|
-
it{ should include(:
|
17
|
+
it{ should include(:alexa_us) }
|
18
|
+
it{ should include(:alexa_global) }
|
19
|
+
it{ should include(:compete) }
|
18
20
|
it{ should include(:google) }
|
19
21
|
end
|
20
22
|
|
21
23
|
describe "#ranks" do
|
22
24
|
describe "success" do
|
23
|
-
subject{ PageRankr.ranks("google.com") }
|
25
|
+
subject{ PageRankr.ranks("www.google.com") }
|
24
26
|
|
25
|
-
it{ should have_key(:
|
27
|
+
it{ should have_key(:alexa_us) }
|
28
|
+
it{ should have_key(:alexa_global) }
|
26
29
|
it{ should have_key(:google) }
|
30
|
+
it{ should have_key(:compete) }
|
27
31
|
|
28
|
-
|
29
|
-
it{
|
30
|
-
it{
|
31
|
-
|
32
|
-
let(:alexa_us_rank) { alexa_rank[:us] }
|
33
|
-
it{ alexa_us_rank.should >= 1 }
|
34
|
-
let(:alexa_global_rank) { alexa_rank[:global] }
|
35
|
-
it{ alexa_global_rank.should >= 1 }
|
36
|
-
|
37
|
-
let(:google_rank) { subject[:google] }
|
38
|
-
it{ google_rank.should be_in(0..10)}
|
32
|
+
it{ subject[:alexa_us].should >= 1 }
|
33
|
+
it{ subject[:alexa_global].should >= 1 }
|
34
|
+
it{ subject[:compete].should >= 1 }
|
35
|
+
it{ subject[:google].should be_in(0..10) }
|
39
36
|
end
|
40
37
|
|
41
38
|
describe "failure" do
|
42
39
|
subject{ PageRankr.ranks("please-dont-register-a-site-that-breaks-this-test.com") }
|
43
40
|
|
44
|
-
it{ should have_key(:
|
41
|
+
it{ should have_key(:alexa_us) }
|
42
|
+
it{ should have_key(:alexa_global) }
|
45
43
|
it{ should have_key(:google) }
|
46
|
-
|
47
|
-
let(:alexa_rank) { subject[:alexa] }
|
48
|
-
it{ alexa_rank.should have_key(:us) }
|
49
|
-
it{ alexa_rank.should have_key(:global) }
|
50
|
-
|
51
|
-
let(:alexa_us_rank) { alexa_rank[:us] }
|
52
|
-
it{ alexa_us_rank.should == 0 }
|
53
|
-
let(:alexa_global_rank) { alexa_rank[:global] }
|
54
|
-
it{ alexa_global_rank.should == 0 }
|
44
|
+
it{ should have_key(:compete) }
|
55
45
|
|
56
|
-
|
57
|
-
it{
|
46
|
+
it{ subject[:alexa_us].should be_nil }
|
47
|
+
it{ subject[:alexa_global].should be_nil }
|
48
|
+
it{ subject[:compete].should be_nil }
|
49
|
+
it{ subject[:google].should be_nil }
|
58
50
|
end
|
59
51
|
end
|
60
52
|
|
@@ -62,8 +54,6 @@ describe PageRankr do
|
|
62
54
|
subject{ PageRankr.backlink_trackers }
|
63
55
|
|
64
56
|
it{ should include(:alexa) }
|
65
|
-
it{ should include(:alltheweb) }
|
66
|
-
it{ should include(:altavista) }
|
67
57
|
it{ should include(:bing) }
|
68
58
|
it{ should include(:google) }
|
69
59
|
it{ should include(:yahoo) }
|
@@ -71,11 +61,11 @@ describe PageRankr do
|
|
71
61
|
|
72
62
|
describe "#backlinks" do
|
73
63
|
describe "success" do
|
74
|
-
subject{ PageRankr.backlinks("google.com") }
|
64
|
+
subject{ PageRankr.backlinks("www.google.com") }
|
75
65
|
|
76
66
|
PageRankr.backlink_trackers.each do |tracker|
|
77
67
|
it{ should have_key(tracker) }
|
78
|
-
it{ subject[tracker].should
|
68
|
+
it{ subject[tracker].should > 0 }
|
79
69
|
end
|
80
70
|
end
|
81
71
|
|
@@ -84,7 +74,7 @@ describe PageRankr do
|
|
84
74
|
|
85
75
|
PageRankr.backlink_trackers.each do |tracker|
|
86
76
|
it{ should have_key(tracker) }
|
87
|
-
it{ subject[tracker].should
|
77
|
+
it{ subject[tracker].should be_nil }
|
88
78
|
end
|
89
79
|
end
|
90
80
|
end
|
@@ -98,11 +88,11 @@ describe PageRankr do
|
|
98
88
|
|
99
89
|
describe "#indexes" do
|
100
90
|
describe "success" do
|
101
|
-
subject{ PageRankr.indexes("google.com") }
|
91
|
+
subject{ PageRankr.indexes("www.google.com") }
|
102
92
|
|
103
93
|
PageRankr.index_trackers.each do |tracker|
|
104
94
|
it{ should have_key(tracker) }
|
105
|
-
it{ subject[tracker].should
|
95
|
+
it{ subject[tracker].should > 0 }
|
106
96
|
end
|
107
97
|
end
|
108
98
|
|
@@ -111,7 +101,7 @@ describe PageRankr do
|
|
111
101
|
|
112
102
|
PageRankr.index_trackers.each do |tracker|
|
113
103
|
it{ should have_key(tracker) }
|
114
|
-
it{ subject[tracker].should
|
104
|
+
it{ subject[tracker].should be_nil }
|
115
105
|
end
|
116
106
|
end
|
117
107
|
end
|
metadata
CHANGED
@@ -3,10 +3,10 @@ name: PageRankr
|
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
|
-
-
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version:
|
6
|
+
- 2
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
version: 2.0.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Allen Madsen
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-02-
|
17
|
+
date: 2011-02-02 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -92,6 +92,36 @@ dependencies:
|
|
92
92
|
version: 1.4.6
|
93
93
|
type: :runtime
|
94
94
|
version_requirements: *id005
|
95
|
+
- !ruby/object:Gem::Dependency
|
96
|
+
name: public_suffix_service
|
97
|
+
prerelease: false
|
98
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ~>
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
segments:
|
104
|
+
- 0
|
105
|
+
- 8
|
106
|
+
- 1
|
107
|
+
version: 0.8.1
|
108
|
+
type: :runtime
|
109
|
+
version_requirements: *id006
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: typhoeus
|
112
|
+
prerelease: false
|
113
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
114
|
+
none: false
|
115
|
+
requirements:
|
116
|
+
- - ~>
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
segments:
|
119
|
+
- 0
|
120
|
+
- 2
|
121
|
+
- 1
|
122
|
+
version: 0.2.1
|
123
|
+
type: :runtime
|
124
|
+
version_requirements: *id007
|
95
125
|
description: Easy way to retrieve Google Page Rank, Alexa Rank, and backlink counts
|
96
126
|
email:
|
97
127
|
- blatyo@gmail.com
|
@@ -115,8 +145,6 @@ files:
|
|
115
145
|
- lib/page_rankr/backlink.rb
|
116
146
|
- lib/page_rankr/backlinks.rb
|
117
147
|
- lib/page_rankr/backlinks/alexa.rb
|
118
|
-
- lib/page_rankr/backlinks/alltheweb.rb
|
119
|
-
- lib/page_rankr/backlinks/altavista.rb
|
120
148
|
- lib/page_rankr/backlinks/bing.rb
|
121
149
|
- lib/page_rankr/backlinks/google.rb
|
122
150
|
- lib/page_rankr/backlinks/yahoo.rb
|
@@ -126,10 +154,12 @@ files:
|
|
126
154
|
- lib/page_rankr/indexes/google.rb
|
127
155
|
- lib/page_rankr/rank.rb
|
128
156
|
- lib/page_rankr/ranks.rb
|
129
|
-
- lib/page_rankr/ranks/
|
157
|
+
- lib/page_rankr/ranks/alexa_global.rb
|
158
|
+
- lib/page_rankr/ranks/alexa_us.rb
|
130
159
|
- lib/page_rankr/ranks/compete.rb
|
131
160
|
- lib/page_rankr/ranks/google.rb
|
132
161
|
- lib/page_rankr/ranks/google/checksum.rb
|
162
|
+
- lib/page_rankr/site.rb
|
133
163
|
- lib/page_rankr/tracker.rb
|
134
164
|
- lib/page_rankr/version.rb
|
135
165
|
- spec/page_rankr_spec.rb
|
@@ -1,15 +0,0 @@
|
|
1
|
-
require 'cgi'
|
2
|
-
|
3
|
-
module PageRankr
|
4
|
-
class Backlinks < Tracker
|
5
|
-
class Alltheweb < Backlink
|
6
|
-
def url(site)
|
7
|
-
"http://www.alltheweb.com/search?q=link%3A#{CGI.escape(site)}"
|
8
|
-
end
|
9
|
-
|
10
|
-
def xpath
|
11
|
-
"//span[@class='ofSoMany']/text()"
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
@@ -1,15 +0,0 @@
|
|
1
|
-
require 'cgi'
|
2
|
-
|
3
|
-
module PageRankr
|
4
|
-
class Backlinks < Tracker
|
5
|
-
class Altavista < Backlink
|
6
|
-
def url(site)
|
7
|
-
"http://www.altavista.com/web/results?q=link%3A#{CGI.escape(site)}"
|
8
|
-
end
|
9
|
-
|
10
|
-
def xpath
|
11
|
-
"//a[@class='lbl']/text()"
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
@@ -1,26 +0,0 @@
|
|
1
|
-
require 'open-uri'
|
2
|
-
require 'cgi'
|
3
|
-
require 'nokogiri'
|
4
|
-
|
5
|
-
module PageRankr
|
6
|
-
class Ranks < Tracker
|
7
|
-
class Alexa < Rank
|
8
|
-
def initialize(site)
|
9
|
-
@rank = {}
|
10
|
-
xpath.each_key do |key|
|
11
|
-
@rank[key] = Nokogiri::HTML(open(url(site))).search(xpath[key]).to_s.to_i
|
12
|
-
end
|
13
|
-
@rank
|
14
|
-
end
|
15
|
-
|
16
|
-
def xpath
|
17
|
-
{ :us => "//reach/@rank",
|
18
|
-
:global => "//popularity/@text" }
|
19
|
-
end
|
20
|
-
|
21
|
-
def url(site)
|
22
|
-
"http://data.alexa.com/data?cli=10&dat=snbamz&url=#{CGI.escape(site)}"
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|