PageRankr 1.7.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +9 -0
- data/Gemfile.lock +7 -1
- data/PageRankr.gemspec +4 -2
- data/README.md +38 -26
- data/lib/page_rankr.rb +10 -9
- data/lib/page_rankr/backlink.rb +8 -2
- data/lib/page_rankr/backlinks.rb +3 -3
- data/lib/page_rankr/backlinks/alexa.rb +8 -5
- data/lib/page_rankr/backlinks/bing.rb +8 -5
- data/lib/page_rankr/backlinks/google.rb +12 -6
- data/lib/page_rankr/backlinks/yahoo.rb +8 -5
- data/lib/page_rankr/index.rb +7 -2
- data/lib/page_rankr/indexes.rb +3 -1
- data/lib/page_rankr/indexes/bing.rb +8 -5
- data/lib/page_rankr/indexes/google.rb +14 -6
- data/lib/page_rankr/rank.rb +15 -1
- data/lib/page_rankr/ranks.rb +5 -2
- data/lib/page_rankr/ranks/alexa_global.rb +19 -0
- data/lib/page_rankr/ranks/alexa_us.rb +19 -0
- data/lib/page_rankr/ranks/compete.rb +10 -10
- data/lib/page_rankr/ranks/google.rb +17 -10
- data/lib/page_rankr/ranks/google/checksum.rb +2 -2
- data/lib/page_rankr/site.rb +21 -0
- data/lib/page_rankr/tracker.rb +29 -4
- data/lib/page_rankr/version.rb +1 -1
- data/spec/page_rankr_spec.rb +24 -34
- metadata +38 -8
- data/lib/page_rankr/backlinks/alltheweb.rb +0 -15
- data/lib/page_rankr/backlinks/altavista.rb +0 -15
- data/lib/page_rankr/ranks/alexa.rb +0 -26
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,13 @@
|
|
1
1
|
# Change Log
|
2
|
+
|
3
|
+
## Version 2.0.0
|
4
|
+
* URL validation
|
5
|
+
* Parallel requests = way faster!
|
6
|
+
* Not tracked returns nil
|
7
|
+
* Alexa US and Global are treated as separate trackers and returned results are a single level hash.
|
8
|
+
* Removed Altavista and AllTheWeb because they now direct to yahoo.
|
9
|
+
* Changed some classes to modules so that it wasn't necessary to specify them when opening the class.
|
10
|
+
|
2
11
|
## Version 1.7.1
|
3
12
|
* Catches exception thrown when doing compete rank lookup with url not in the form "google.com".
|
4
13
|
|
data/Gemfile.lock
CHANGED
@@ -1,9 +1,11 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
PageRankr (1.7.
|
4
|
+
PageRankr (1.7.1)
|
5
5
|
json (>= 1.4.6)
|
6
6
|
nokogiri (>= 1.4.1)
|
7
|
+
public_suffix_service (~> 0.8.1)
|
8
|
+
typhoeus (~> 0.2.1)
|
7
9
|
|
8
10
|
GEM
|
9
11
|
remote: http://rubygems.org/
|
@@ -14,7 +16,9 @@ GEM
|
|
14
16
|
rspec-instafail (~> 0.1.4)
|
15
17
|
ruby-progressbar (~> 0.0.9)
|
16
18
|
json (1.5.1)
|
19
|
+
mime-types (1.16)
|
17
20
|
nokogiri (1.4.4)
|
21
|
+
public_suffix_service (0.8.1)
|
18
22
|
rspec (2.1.0)
|
19
23
|
rspec-core (~> 2.1.0)
|
20
24
|
rspec-expectations (~> 2.1.0)
|
@@ -25,6 +29,8 @@ GEM
|
|
25
29
|
rspec-instafail (0.1.5)
|
26
30
|
rspec-mocks (2.1.0)
|
27
31
|
ruby-progressbar (0.0.9)
|
32
|
+
typhoeus (0.2.1)
|
33
|
+
mime-types
|
28
34
|
|
29
35
|
PLATFORMS
|
30
36
|
ruby
|
data/PageRankr.gemspec
CHANGED
@@ -15,8 +15,10 @@ Gem::Specification.new do |s|
|
|
15
15
|
s.add_development_dependency "bundler", ">= 1.0.0"
|
16
16
|
s.add_development_dependency "fuubar", ">= 0.0.1"
|
17
17
|
|
18
|
-
s.add_runtime_dependency "nokogiri",
|
19
|
-
s.add_runtime_dependency "json",
|
18
|
+
s.add_runtime_dependency "nokogiri", ">= 1.4.1"
|
19
|
+
s.add_runtime_dependency "json", ">= 1.4.6"
|
20
|
+
s.add_runtime_dependency "public_suffix_service", "~> 0.8.1"
|
21
|
+
s.add_runtime_dependency "typhoeus", "~> 0.2.1"
|
20
22
|
|
21
23
|
s.files = `git ls-files`.split("\n")
|
22
24
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
data/README.md
CHANGED
@@ -17,7 +17,7 @@ Check out a little [web app][1] I wrote up that uses it or look at the [source][
|
|
17
17
|
|
18
18
|
### Backlinks
|
19
19
|
|
20
|
-
Backlinks are the result of doing a search with a query like "link:www.google.com". The number of returned results indicates how many sites point to that url.
|
20
|
+
Backlinks are the result of doing a search with a query like "link:www.google.com". The number of returned results indicates how many sites point to that url. If a site is not tracked then `nil` is returned.
|
21
21
|
|
22
22
|
PageRankr.backlinks('www.google.com', :google, :bing) #=> {:google=>161000, :bing=>208000000}
|
23
23
|
PageRankr.backlinks('www.google.com', :yahoo) #=> {:yahoo=>256300062}
|
@@ -26,20 +26,21 @@ If you don't specify a search engine, then all of them are used.
|
|
26
26
|
|
27
27
|
# this
|
28
28
|
PageRankr.backlinks('www.google.com')
|
29
|
-
#=> {:google=>23000, :bing=>215000000, :yahoo=>250522337, :
|
29
|
+
#=> {:google=>23000, :bing=>215000000, :yahoo=>250522337, :alexa=>727036}
|
30
30
|
|
31
31
|
# is equivalent to
|
32
|
-
PageRankr.backlinks('www.google.com', :google, :bing, :yahoo, :
|
33
|
-
#=> {:google=>23000, :bing=>215000000, :yahoo=>250522337, :
|
32
|
+
PageRankr.backlinks('www.google.com', :google, :bing, :yahoo, :alexa)
|
33
|
+
#=> {:google=>23000, :bing=>215000000, :yahoo=>250522337, :alexa=>727036}
|
34
34
|
|
35
35
|
You can also use the alias `backlink` instead of `backlinks`.
|
36
|
-
Valid search engines are: `:google, :bing, :yahoo, :altavista, :alltheweb, :alexa`. To get this list you can do:
|
37
36
|
|
38
|
-
|
37
|
+
Valid search engines are: `:google, :bing, :yahoo, :alexa` (altavista and alltheweb now redirect to yahoo). To get this list you can do:
|
38
|
+
|
39
|
+
PageRankr.backlink_trackers #=> [:alexa, :bing, :google, :yahoo]
|
39
40
|
|
40
41
|
### Indexes
|
41
42
|
|
42
|
-
Indexes are the result of doing a search with a query like "site:www.google.com". The number of returned results indicates how many pages of a domain are indexed by a particular search engine.
|
43
|
+
Indexes are the result of doing a search with a query like "site:www.google.com". The number of returned results indicates how many pages of a domain are indexed by a particular search engine. If the site is not indexed `nil` is returned.
|
43
44
|
|
44
45
|
PageRankr.indexes('www.google.com', :google) #=> {:google=>4860000}
|
45
46
|
PageRankr.indexes('www.google.com', :bing) #=> {:bing=>2120000}
|
@@ -55,36 +56,43 @@ If you don't specify a search engine, then all of them are used.
|
|
55
56
|
#=> {:bing=>2120000, :google=>4860000}
|
56
57
|
|
57
58
|
You can also use the alias `index` instead of `indexes`.
|
59
|
+
|
58
60
|
Valid search engines are: `:google, :bing`. To get this list you can do:
|
59
61
|
|
60
|
-
PageRankr.index_trackers #=> [:
|
62
|
+
PageRankr.index_trackers #=> [:bing, :google]
|
61
63
|
|
62
64
|
### Ranks
|
63
65
|
|
64
|
-
|
66
|
+
Ranks are ratings assigned to specify how popular a site is. The most famous example of this is the google page rank.
|
67
|
+
|
68
|
+
PageRankr.ranks('www.google.com', :google) #=> {:google=>10}
|
65
69
|
|
66
|
-
|
70
|
+
If you don't specify a rank provider, then all of them are used.
|
71
|
+
|
72
|
+
PageRankr.ranks('www.google.com', :alexa_us, :alexa_global, :compete, :google)
|
73
|
+
#=> {:alexa_us=>1, :alexa_global=>1, :google=>10, :compete=>1}
|
67
74
|
|
68
75
|
# this also gives the same result
|
69
|
-
PageRankr.ranks('google.com')
|
76
|
+
PageRankr.ranks('www.google.com')
|
77
|
+
#=> {:alexa_us=>1, :alexa_global=>1, :google=>10, :compete=>1}
|
70
78
|
|
71
79
|
You can also use the alias `rank` instead of `ranks`.
|
72
|
-
There are three valid rank trackers supported: `:alexa, :google, :compete`. To get this you can do:
|
73
80
|
|
74
|
-
|
81
|
+
Valid rank trackers are: `:alexa_us, :alexa_global, :compete, :google`. To get this you can do:
|
82
|
+
|
83
|
+
PageRankr.rank_trackers #=> [:alexa_global, :alexa_us, :compete, :google]
|
75
84
|
|
76
|
-
Alexa ranks are descending where 1 is the most popular.
|
77
|
-
Google page ranks are in the range 0-10 where 10 is the most popular. If a site is unindexed then the rank will be -1.
|
85
|
+
Alexa and Compete ranks are descending where 1 is the most popular. Google page ranks are in the range 0-10 where 10 is the most popular. If a site is unindexed then the rank will be nil.
|
78
86
|
|
79
87
|
## Fix it!
|
80
88
|
|
81
89
|
If you ever find something is broken it should now be much easier to fix it with version >= 1.3.0. For example, if the xpath used to lookup a backlink is broken, just override the method for that class to provide the correct xpath.
|
82
90
|
|
83
91
|
module PageRankr
|
84
|
-
class Backlinks
|
85
|
-
class
|
92
|
+
class Backlinks
|
93
|
+
class Bing
|
86
94
|
def xpath
|
87
|
-
"my
|
95
|
+
"//my/new/awesome/@xpath"
|
88
96
|
end
|
89
97
|
end
|
90
98
|
end
|
@@ -92,13 +100,16 @@ If you ever find something is broken it should now be much easier to fix it with
|
|
92
100
|
|
93
101
|
## Extend it!
|
94
102
|
|
95
|
-
If you ever come across a site that provides a rank or backlinks you can hook that class up to automatically be use with PageRankr.
|
103
|
+
If you ever come across a site that provides a rank or backlinks you can hook that class up to automatically be use with PageRankr. PageRankr does this by looking up all the classes namespaced under Backlinks, Indexes, and Ranks.
|
96
104
|
|
97
105
|
module PageRankr
|
98
|
-
class Backlinks
|
99
|
-
class Foo
|
100
|
-
|
101
|
-
|
106
|
+
class Backlinks
|
107
|
+
class Foo
|
108
|
+
include Backlink
|
109
|
+
|
110
|
+
def request
|
111
|
+
@request ||= Typhoeus::Request.new("http://example.com/",
|
112
|
+
:params => {:q => @site.to_s})
|
102
113
|
end
|
103
114
|
|
104
115
|
def xpath
|
@@ -125,15 +136,16 @@ Then, just make sure you require the class and PageRankr and whenever you call P
|
|
125
136
|
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
126
137
|
* Send me a pull request. Bonus points for topic branches.
|
127
138
|
|
128
|
-
## TODO Version
|
139
|
+
## TODO Version 3
|
129
140
|
* Use API's where possible
|
130
|
-
* Use [Typhoeus](https://github.com/pauldix/typhoeus) to improve speed when requesting multiple ranks and/or backlinks
|
131
141
|
* Configuration
|
132
142
|
* Optionally use API keys
|
143
|
+
* Maybe allow API key cycling to get around query limits
|
144
|
+
* Google search API is deprecated
|
133
145
|
|
134
146
|
## Contributors
|
135
147
|
* [Druwerd](https://github.com/Druwerd) - Use Google Search API instead of scraping.
|
136
|
-
* [Iteration Labs](https://github.com/iterationlabs) - Compete rank tracker and domain indexes.
|
148
|
+
* [Iteration Labs, LLC](https://github.com/iterationlabs) - Compete rank tracker and domain indexes.
|
137
149
|
|
138
150
|
## Shout Out
|
139
151
|
Gotta give credit where credits due!
|
data/lib/page_rankr.rb
CHANGED
@@ -2,31 +2,32 @@ require File.join(File.dirname(__FILE__), "page_rankr", "tracker")
|
|
2
2
|
require File.join(File.dirname(__FILE__), "page_rankr", "backlinks")
|
3
3
|
require File.join(File.dirname(__FILE__), "page_rankr", "ranks")
|
4
4
|
require File.join(File.dirname(__FILE__), "page_rankr", "indexes")
|
5
|
+
require File.join(File.dirname(__FILE__), "page_rankr", "site")
|
5
6
|
|
6
7
|
module PageRankr
|
7
8
|
class << self
|
8
9
|
def backlinks(site, *search_engines)
|
9
|
-
Backlinks.new.lookup
|
10
|
+
Backlinks.new.lookup(Site.new(site), *search_engines)
|
10
11
|
end
|
11
12
|
alias_method :backlink, :backlinks
|
13
|
+
|
14
|
+
def backlink_trackers
|
15
|
+
Backlinks.new.backlink_trackers
|
16
|
+
end
|
12
17
|
|
13
18
|
def ranks(site, *rank_trackers)
|
14
|
-
Ranks.new.lookup
|
19
|
+
Ranks.new.lookup(Site.new(site), *rank_trackers)
|
15
20
|
end
|
16
21
|
alias_method :rank, :ranks
|
17
22
|
|
18
|
-
def indexes(site, *index_trackers)
|
19
|
-
Indexes.new.lookup site, *index_trackers
|
20
|
-
end
|
21
|
-
alias_method :index, :indexes
|
22
|
-
|
23
23
|
def rank_trackers
|
24
24
|
Ranks.new.rank_trackers
|
25
25
|
end
|
26
26
|
|
27
|
-
def
|
28
|
-
|
27
|
+
def indexes(site, *index_trackers)
|
28
|
+
Indexes.new.lookup(Site.new(site), *index_trackers)
|
29
29
|
end
|
30
|
+
alias_method :index, :indexes
|
30
31
|
|
31
32
|
def index_trackers
|
32
33
|
Indexes.new.index_trackers
|
data/lib/page_rankr/backlink.rb
CHANGED
@@ -3,12 +3,18 @@ require 'nokogiri'
|
|
3
3
|
require 'open-uri'
|
4
4
|
|
5
5
|
module PageRankr
|
6
|
-
|
6
|
+
module Backlink
|
7
7
|
attr_reader :backlinks
|
8
8
|
alias_method :tracked, :backlinks
|
9
9
|
|
10
10
|
def initialize(site)
|
11
|
-
@
|
11
|
+
@site = site
|
12
|
+
|
13
|
+
request.on_complete do |response|
|
14
|
+
html = Nokogiri::HTML(response.body)
|
15
|
+
@backlinks = clean(html.at(xpath).to_s)
|
16
|
+
@backlinks = nil if @backlinks.zero?
|
17
|
+
end
|
12
18
|
end
|
13
19
|
|
14
20
|
def clean(backlink_count)
|
data/lib/page_rankr/backlinks.rb
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), "backlink")
|
2
2
|
require File.join(File.dirname(__FILE__), "backlinks", "alexa")
|
3
|
-
require File.join(File.dirname(__FILE__), "backlinks", "alltheweb")
|
4
|
-
require File.join(File.dirname(__FILE__), "backlinks", "altavista")
|
5
3
|
require File.join(File.dirname(__FILE__), "backlinks", "bing")
|
6
4
|
require File.join(File.dirname(__FILE__), "backlinks", "google")
|
7
5
|
require File.join(File.dirname(__FILE__), "backlinks", "yahoo")
|
8
6
|
|
9
7
|
module PageRankr
|
10
|
-
class Backlinks
|
8
|
+
class Backlinks
|
9
|
+
include Tracker
|
10
|
+
|
11
11
|
alias_method :backlink_trackers, :site_trackers
|
12
12
|
end
|
13
13
|
end
|
@@ -1,10 +1,13 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
|
3
3
|
module PageRankr
|
4
|
-
class Backlinks
|
5
|
-
class Alexa
|
6
|
-
|
7
|
-
|
4
|
+
class Backlinks
|
5
|
+
class Alexa
|
6
|
+
include Backlink
|
7
|
+
|
8
|
+
def request
|
9
|
+
@request ||= Typhoeus::Request.new("http://data.alexa.com/data",
|
10
|
+
:params => {:cli => 10, :dat => "snbamz", :url => @site.to_s}, :method => :get)
|
8
11
|
end
|
9
12
|
|
10
13
|
def xpath
|
@@ -1,10 +1,13 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
|
3
3
|
module PageRankr
|
4
|
-
class Backlinks
|
5
|
-
class Bing
|
6
|
-
|
7
|
-
|
4
|
+
class Backlinks
|
5
|
+
class Bing
|
6
|
+
include Backlink
|
7
|
+
|
8
|
+
def request
|
9
|
+
@request ||= Typhoeus::Request.new("http://www.bing.com/search",
|
10
|
+
:params => {:q => "link:#{@site.to_s}"}, :method => :get)
|
8
11
|
end
|
9
12
|
|
10
13
|
def xpath
|
@@ -1,18 +1,24 @@
|
|
1
|
-
require 'cgi'
|
2
1
|
require 'json'
|
3
2
|
|
4
3
|
module PageRankr
|
5
|
-
class Backlinks
|
6
|
-
class Google
|
4
|
+
class Backlinks
|
5
|
+
class Google
|
6
|
+
include Backlink
|
7
7
|
|
8
8
|
# overloaded to use Google's AJAX search API
|
9
9
|
# http://code.google.com/apis/ajaxsearch/documentation/
|
10
10
|
def initialize(site)
|
11
|
-
@
|
11
|
+
@site = site
|
12
|
+
request.on_complete do |response|
|
13
|
+
json = JSON.parse(response.body)
|
14
|
+
@backlinks = clean(json["responseData"]["cursor"]["estimatedResultCount"].to_s)
|
15
|
+
@backlinks = nil if @backlinks.zero?
|
16
|
+
end
|
12
17
|
end
|
13
18
|
|
14
|
-
def
|
15
|
-
"http://ajax.googleapis.com/ajax/services/search/web
|
19
|
+
def request
|
20
|
+
@request ||= Typhoeus::Request.new("http://ajax.googleapis.com/ajax/services/search/web",
|
21
|
+
:params => {:v => "1.0", :rsz => 1, :q => "link:#{@site.to_s}"}, :method => :get)
|
16
22
|
end
|
17
23
|
end
|
18
24
|
end
|
@@ -1,10 +1,13 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
|
3
3
|
module PageRankr
|
4
|
-
class Backlinks
|
5
|
-
class Yahoo
|
6
|
-
|
7
|
-
|
4
|
+
class Backlinks
|
5
|
+
class Yahoo
|
6
|
+
include Backlink
|
7
|
+
|
8
|
+
def request
|
9
|
+
@request ||= Typhoeus::Request.new("http://siteexplorer.search.yahoo.com/search",
|
10
|
+
:params => {:p => "#{@site.to_s}"}, :method => :get)
|
8
11
|
end
|
9
12
|
|
10
13
|
def xpath
|
data/lib/page_rankr/index.rb
CHANGED
@@ -2,12 +2,17 @@ require 'nokogiri'
|
|
2
2
|
require 'open-uri'
|
3
3
|
|
4
4
|
module PageRankr
|
5
|
-
|
5
|
+
module Index
|
6
6
|
attr_reader :indexes
|
7
7
|
alias_method :tracked, :indexes
|
8
8
|
|
9
9
|
def initialize(site)
|
10
|
-
@
|
10
|
+
@site = site
|
11
|
+
request.on_complete do |response|
|
12
|
+
html = Nokogiri::HTML(response.body)
|
13
|
+
@indexes = clean(html.at(xpath).to_s)
|
14
|
+
@indexes = nil if @indexes.zero?
|
15
|
+
end
|
11
16
|
end
|
12
17
|
|
13
18
|
def clean(backlink_count)
|
data/lib/page_rankr/indexes.rb
CHANGED
@@ -3,7 +3,9 @@ require File.join(File.dirname(__FILE__), "indexes", "bing")
|
|
3
3
|
require File.join(File.dirname(__FILE__), "indexes", "google")
|
4
4
|
|
5
5
|
module PageRankr
|
6
|
-
class Indexes
|
6
|
+
class Indexes
|
7
|
+
include Tracker
|
8
|
+
|
7
9
|
alias_method :index_trackers, :site_trackers
|
8
10
|
end
|
9
11
|
end
|
@@ -1,10 +1,13 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
|
3
3
|
module PageRankr
|
4
|
-
class Indexes
|
5
|
-
class Bing
|
6
|
-
|
7
|
-
|
4
|
+
class Indexes
|
5
|
+
class Bing
|
6
|
+
include Index
|
7
|
+
|
8
|
+
def request
|
9
|
+
@request ||= Typhoeus::Request.new("http://www.bing.com/search",
|
10
|
+
:params => {:q => "site:#{@site.to_s}"})
|
8
11
|
end
|
9
12
|
|
10
13
|
def xpath
|
@@ -1,18 +1,26 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
require 'json'
|
3
3
|
|
4
4
|
module PageRankr
|
5
|
-
class Indexes
|
6
|
-
class Google
|
5
|
+
class Indexes
|
6
|
+
class Google
|
7
|
+
include Index
|
7
8
|
|
8
9
|
# overloaded to use Google's AJAX search API
|
9
10
|
# http://code.google.com/apis/ajaxsearch/documentation/
|
10
11
|
def initialize(site)
|
11
|
-
@
|
12
|
+
@site = site
|
13
|
+
|
14
|
+
request.on_complete do |response|
|
15
|
+
json = JSON.parse(response.body)
|
16
|
+
@indexes = clean(json["responseData"]["cursor"]["estimatedResultCount"].to_s)
|
17
|
+
@indexes = nil if @indexes.zero?
|
18
|
+
end
|
12
19
|
end
|
13
20
|
|
14
|
-
def
|
15
|
-
"http://ajax.googleapis.com/ajax/services/search/web
|
21
|
+
def request
|
22
|
+
@request ||= Typhoeus::Request.new("http://ajax.googleapis.com/ajax/services/search/web",
|
23
|
+
:params => {:v => "1.0", :rsz => 1, :q => "site:#{@site.to_s}"}, :method => :get)
|
16
24
|
end
|
17
25
|
end
|
18
26
|
end
|
data/lib/page_rankr/rank.rb
CHANGED
@@ -1,6 +1,20 @@
|
|
1
1
|
module PageRankr
|
2
|
-
|
2
|
+
module Rank
|
3
3
|
attr_reader :rank
|
4
4
|
alias_method :tracked, :rank
|
5
|
+
|
6
|
+
def initialize(site)
|
7
|
+
@site = site
|
8
|
+
|
9
|
+
request.on_complete do |response|
|
10
|
+
html = Nokogiri::HTML(response.body)
|
11
|
+
@rank = clean(html.search(xpath))
|
12
|
+
@rank = nil if @rank.zero?
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def clean(rank)
|
17
|
+
rank.to_s.to_i
|
18
|
+
end
|
5
19
|
end
|
6
20
|
end
|
data/lib/page_rankr/ranks.rb
CHANGED
@@ -1,10 +1,13 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), "rank")
|
2
|
-
require File.join(File.dirname(__FILE__), "ranks", "
|
2
|
+
require File.join(File.dirname(__FILE__), "ranks", "alexa_us")
|
3
|
+
require File.join(File.dirname(__FILE__), "ranks", "alexa_global")
|
3
4
|
require File.join(File.dirname(__FILE__), "ranks", "google")
|
4
5
|
require File.join(File.dirname(__FILE__), "ranks", "compete")
|
5
6
|
|
6
7
|
module PageRankr
|
7
|
-
class Ranks
|
8
|
+
class Ranks
|
9
|
+
include Tracker
|
10
|
+
|
8
11
|
alias_method :rank_trackers, :site_trackers
|
9
12
|
end
|
10
13
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'typhoeus'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module PageRankr
|
5
|
+
class Ranks
|
6
|
+
class AlexaGlobal
|
7
|
+
include Rank
|
8
|
+
|
9
|
+
def xpath
|
10
|
+
"//popularity/@text"
|
11
|
+
end
|
12
|
+
|
13
|
+
def request
|
14
|
+
@request ||= Typhoeus::Request.new("http://data.alexa.com/data",
|
15
|
+
:params => {:cli => 10, :dat => "snbamz", :url => @site.to_s})
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'typhoeus'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
module PageRankr
|
5
|
+
class Ranks
|
6
|
+
class AlexaUs
|
7
|
+
include Rank
|
8
|
+
|
9
|
+
def xpath
|
10
|
+
"//reach/@rank"
|
11
|
+
end
|
12
|
+
|
13
|
+
def request
|
14
|
+
@request ||= Typhoeus::Request.new("http://data.alexa.com/data",
|
15
|
+
:params => {:cli => 10, :dat => "snbamz", :url => @site.to_s})
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -1,22 +1,22 @@
|
|
1
|
-
require '
|
1
|
+
require 'typhoeus'
|
2
2
|
require 'cgi'
|
3
3
|
require 'nokogiri'
|
4
4
|
|
5
5
|
module PageRankr
|
6
|
-
class Ranks
|
7
|
-
class Compete
|
8
|
-
|
9
|
-
@rank = Nokogiri::HTML(open(url(site))).search(xpath).to_s.gsub(',', '').to_i
|
10
|
-
rescue
|
11
|
-
@rank = 0 #compete only accepts urls without http:// and www, will be fixed in 2.0
|
12
|
-
end
|
6
|
+
class Ranks
|
7
|
+
class Compete
|
8
|
+
include Rank
|
13
9
|
|
14
10
|
def xpath
|
15
11
|
"//div[@id='rank']/div[@class='number value']/text()"
|
16
12
|
end
|
17
13
|
|
18
|
-
def
|
19
|
-
"http://siteanalytics.compete.com/#{CGI.escape(site)}/"
|
14
|
+
def request
|
15
|
+
@request ||= Typhoeus::Request.new("http://siteanalytics.compete.com/#{CGI.escape(@site.domain)}/")
|
16
|
+
end
|
17
|
+
|
18
|
+
def clean(rank)
|
19
|
+
rank.to_s.gsub(',', '').to_i
|
20
20
|
end
|
21
21
|
end
|
22
22
|
end
|
@@ -1,15 +1,21 @@
|
|
1
|
-
require
|
1
|
+
require 'typhoeus'
|
2
2
|
require File.join(File.dirname(__FILE__), "google", "checksum")
|
3
3
|
|
4
4
|
module PageRankr
|
5
|
-
class Ranks
|
6
|
-
class Google
|
5
|
+
class Ranks
|
6
|
+
class Google
|
7
|
+
include Rank
|
8
|
+
|
7
9
|
def initialize(site)
|
8
|
-
|
9
|
-
@
|
10
|
-
|
11
|
-
|
12
|
-
|
10
|
+
@site = site
|
11
|
+
@checksum = Checksum.generate(@site.to_s)
|
12
|
+
|
13
|
+
request.on_complete do |response|
|
14
|
+
@rank = if response.body =~ regex
|
15
|
+
clean($1)
|
16
|
+
else
|
17
|
+
nil
|
18
|
+
end
|
13
19
|
end
|
14
20
|
end
|
15
21
|
|
@@ -17,8 +23,9 @@ module PageRankr
|
|
17
23
|
/Rank_\d+:\d+:(\d+)/
|
18
24
|
end
|
19
25
|
|
20
|
-
def
|
21
|
-
"http://toolbarqueries.google.com/search
|
26
|
+
def request
|
27
|
+
@request ||= Typhoeus::Request.new("http://toolbarqueries.google.com/search",
|
28
|
+
:params => {:client => "navclient-auto", :ch => @checksum, :features => "Rank", :q => "info:#{@site.to_s}"})
|
22
29
|
end
|
23
30
|
end
|
24
31
|
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'public_suffix_service'
|
2
|
+
require 'delegate'
|
3
|
+
|
4
|
+
module PageRankr
|
5
|
+
class DomainInvalid < StandardError; end
|
6
|
+
|
7
|
+
class Site < DelegateClass(PublicSuffixService::Domain)
|
8
|
+
def initialize(site)
|
9
|
+
super(PublicSuffixService.parse(clean(site)))
|
10
|
+
valid? or raise DomainInvalid, "The domain provided is invalid."
|
11
|
+
end
|
12
|
+
|
13
|
+
private
|
14
|
+
|
15
|
+
def clean(site)
|
16
|
+
site = site.split("://").last # remove protocol
|
17
|
+
site = site.split("/").first # remove path
|
18
|
+
site.split("?").first # remove params
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/page_rankr/tracker.rb
CHANGED
@@ -1,23 +1,48 @@
|
|
1
|
+
require 'typhoeus'
|
2
|
+
|
1
3
|
module PageRankr
|
2
|
-
|
4
|
+
module Tracker
|
3
5
|
attr_accessor :site_trackers
|
4
6
|
|
5
7
|
def initialize
|
6
|
-
@site_trackers = self.class.constants.collect{|tracker| tracker
|
8
|
+
@site_trackers = self.class.constants.collect{|tracker| symbol_for(tracker)}
|
7
9
|
end
|
8
10
|
|
9
11
|
def lookup(site, *trackers)
|
10
12
|
trackers = site_trackers if trackers.empty?
|
11
13
|
|
12
14
|
tracked = {}
|
15
|
+
hydra = Typhoeus::Hydra.new
|
13
16
|
trackers.each do |tracker|
|
14
|
-
name, klass = tracker
|
17
|
+
name, klass = constant_name(tracker), self.class
|
15
18
|
|
16
19
|
next unless klass.const_defined? name
|
17
20
|
|
18
|
-
tracked[tracker] = klass.const_get(name).new(site)
|
21
|
+
tracked[tracker] = klass.const_get(name).new(site)
|
22
|
+
hydra.queue tracked[tracker].request
|
23
|
+
end
|
24
|
+
hydra.run
|
25
|
+
|
26
|
+
tracked.keys.each do |tracker|
|
27
|
+
tracked[tracker] = tracked[tracker].tracked
|
19
28
|
end
|
29
|
+
|
20
30
|
tracked
|
21
31
|
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def symbol_for(klass)
|
36
|
+
word = klass.to_s.dup
|
37
|
+
word.gsub!(/([A-Z]+)([A-Z][a-z])/){|match| "#{$1}_#{$2}" }
|
38
|
+
word.gsub!(/([a-z\d])([A-Z])/){|match| "#{$1}_#{$2}" }
|
39
|
+
word.tr!("-", "_")
|
40
|
+
word.downcase!
|
41
|
+
word.to_sym
|
42
|
+
end
|
43
|
+
|
44
|
+
def constant_name(sym)
|
45
|
+
sym.to_s.split('_').collect{|str| str.capitalize}.join
|
46
|
+
end
|
22
47
|
end
|
23
48
|
end
|
data/lib/page_rankr/version.rb
CHANGED
data/spec/page_rankr_spec.rb
CHANGED
@@ -14,47 +14,39 @@ describe PageRankr do
|
|
14
14
|
describe "#rank_trackers" do
|
15
15
|
subject{ PageRankr.rank_trackers }
|
16
16
|
|
17
|
-
it{ should include(:
|
17
|
+
it{ should include(:alexa_us) }
|
18
|
+
it{ should include(:alexa_global) }
|
19
|
+
it{ should include(:compete) }
|
18
20
|
it{ should include(:google) }
|
19
21
|
end
|
20
22
|
|
21
23
|
describe "#ranks" do
|
22
24
|
describe "success" do
|
23
|
-
subject{ PageRankr.ranks("google.com") }
|
25
|
+
subject{ PageRankr.ranks("www.google.com") }
|
24
26
|
|
25
|
-
it{ should have_key(:
|
27
|
+
it{ should have_key(:alexa_us) }
|
28
|
+
it{ should have_key(:alexa_global) }
|
26
29
|
it{ should have_key(:google) }
|
30
|
+
it{ should have_key(:compete) }
|
27
31
|
|
28
|
-
|
29
|
-
it{
|
30
|
-
it{
|
31
|
-
|
32
|
-
let(:alexa_us_rank) { alexa_rank[:us] }
|
33
|
-
it{ alexa_us_rank.should >= 1 }
|
34
|
-
let(:alexa_global_rank) { alexa_rank[:global] }
|
35
|
-
it{ alexa_global_rank.should >= 1 }
|
36
|
-
|
37
|
-
let(:google_rank) { subject[:google] }
|
38
|
-
it{ google_rank.should be_in(0..10)}
|
32
|
+
it{ subject[:alexa_us].should >= 1 }
|
33
|
+
it{ subject[:alexa_global].should >= 1 }
|
34
|
+
it{ subject[:compete].should >= 1 }
|
35
|
+
it{ subject[:google].should be_in(0..10) }
|
39
36
|
end
|
40
37
|
|
41
38
|
describe "failure" do
|
42
39
|
subject{ PageRankr.ranks("please-dont-register-a-site-that-breaks-this-test.com") }
|
43
40
|
|
44
|
-
it{ should have_key(:
|
41
|
+
it{ should have_key(:alexa_us) }
|
42
|
+
it{ should have_key(:alexa_global) }
|
45
43
|
it{ should have_key(:google) }
|
46
|
-
|
47
|
-
let(:alexa_rank) { subject[:alexa] }
|
48
|
-
it{ alexa_rank.should have_key(:us) }
|
49
|
-
it{ alexa_rank.should have_key(:global) }
|
50
|
-
|
51
|
-
let(:alexa_us_rank) { alexa_rank[:us] }
|
52
|
-
it{ alexa_us_rank.should == 0 }
|
53
|
-
let(:alexa_global_rank) { alexa_rank[:global] }
|
54
|
-
it{ alexa_global_rank.should == 0 }
|
44
|
+
it{ should have_key(:compete) }
|
55
45
|
|
56
|
-
|
57
|
-
it{
|
46
|
+
it{ subject[:alexa_us].should be_nil }
|
47
|
+
it{ subject[:alexa_global].should be_nil }
|
48
|
+
it{ subject[:compete].should be_nil }
|
49
|
+
it{ subject[:google].should be_nil }
|
58
50
|
end
|
59
51
|
end
|
60
52
|
|
@@ -62,8 +54,6 @@ describe PageRankr do
|
|
62
54
|
subject{ PageRankr.backlink_trackers }
|
63
55
|
|
64
56
|
it{ should include(:alexa) }
|
65
|
-
it{ should include(:alltheweb) }
|
66
|
-
it{ should include(:altavista) }
|
67
57
|
it{ should include(:bing) }
|
68
58
|
it{ should include(:google) }
|
69
59
|
it{ should include(:yahoo) }
|
@@ -71,11 +61,11 @@ describe PageRankr do
|
|
71
61
|
|
72
62
|
describe "#backlinks" do
|
73
63
|
describe "success" do
|
74
|
-
subject{ PageRankr.backlinks("google.com") }
|
64
|
+
subject{ PageRankr.backlinks("www.google.com") }
|
75
65
|
|
76
66
|
PageRankr.backlink_trackers.each do |tracker|
|
77
67
|
it{ should have_key(tracker) }
|
78
|
-
it{ subject[tracker].should
|
68
|
+
it{ subject[tracker].should > 0 }
|
79
69
|
end
|
80
70
|
end
|
81
71
|
|
@@ -84,7 +74,7 @@ describe PageRankr do
|
|
84
74
|
|
85
75
|
PageRankr.backlink_trackers.each do |tracker|
|
86
76
|
it{ should have_key(tracker) }
|
87
|
-
it{ subject[tracker].should
|
77
|
+
it{ subject[tracker].should be_nil }
|
88
78
|
end
|
89
79
|
end
|
90
80
|
end
|
@@ -98,11 +88,11 @@ describe PageRankr do
|
|
98
88
|
|
99
89
|
describe "#indexes" do
|
100
90
|
describe "success" do
|
101
|
-
subject{ PageRankr.indexes("google.com") }
|
91
|
+
subject{ PageRankr.indexes("www.google.com") }
|
102
92
|
|
103
93
|
PageRankr.index_trackers.each do |tracker|
|
104
94
|
it{ should have_key(tracker) }
|
105
|
-
it{ subject[tracker].should
|
95
|
+
it{ subject[tracker].should > 0 }
|
106
96
|
end
|
107
97
|
end
|
108
98
|
|
@@ -111,7 +101,7 @@ describe PageRankr do
|
|
111
101
|
|
112
102
|
PageRankr.index_trackers.each do |tracker|
|
113
103
|
it{ should have_key(tracker) }
|
114
|
-
it{ subject[tracker].should
|
104
|
+
it{ subject[tracker].should be_nil }
|
115
105
|
end
|
116
106
|
end
|
117
107
|
end
|
metadata
CHANGED
@@ -3,10 +3,10 @@ name: PageRankr
|
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
|
-
-
|
7
|
-
-
|
8
|
-
-
|
9
|
-
version:
|
6
|
+
- 2
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
version: 2.0.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Allen Madsen
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2011-02-
|
17
|
+
date: 2011-02-02 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -92,6 +92,36 @@ dependencies:
|
|
92
92
|
version: 1.4.6
|
93
93
|
type: :runtime
|
94
94
|
version_requirements: *id005
|
95
|
+
- !ruby/object:Gem::Dependency
|
96
|
+
name: public_suffix_service
|
97
|
+
prerelease: false
|
98
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
99
|
+
none: false
|
100
|
+
requirements:
|
101
|
+
- - ~>
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
segments:
|
104
|
+
- 0
|
105
|
+
- 8
|
106
|
+
- 1
|
107
|
+
version: 0.8.1
|
108
|
+
type: :runtime
|
109
|
+
version_requirements: *id006
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: typhoeus
|
112
|
+
prerelease: false
|
113
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
114
|
+
none: false
|
115
|
+
requirements:
|
116
|
+
- - ~>
|
117
|
+
- !ruby/object:Gem::Version
|
118
|
+
segments:
|
119
|
+
- 0
|
120
|
+
- 2
|
121
|
+
- 1
|
122
|
+
version: 0.2.1
|
123
|
+
type: :runtime
|
124
|
+
version_requirements: *id007
|
95
125
|
description: Easy way to retrieve Google Page Rank, Alexa Rank, and backlink counts
|
96
126
|
email:
|
97
127
|
- blatyo@gmail.com
|
@@ -115,8 +145,6 @@ files:
|
|
115
145
|
- lib/page_rankr/backlink.rb
|
116
146
|
- lib/page_rankr/backlinks.rb
|
117
147
|
- lib/page_rankr/backlinks/alexa.rb
|
118
|
-
- lib/page_rankr/backlinks/alltheweb.rb
|
119
|
-
- lib/page_rankr/backlinks/altavista.rb
|
120
148
|
- lib/page_rankr/backlinks/bing.rb
|
121
149
|
- lib/page_rankr/backlinks/google.rb
|
122
150
|
- lib/page_rankr/backlinks/yahoo.rb
|
@@ -126,10 +154,12 @@ files:
|
|
126
154
|
- lib/page_rankr/indexes/google.rb
|
127
155
|
- lib/page_rankr/rank.rb
|
128
156
|
- lib/page_rankr/ranks.rb
|
129
|
-
- lib/page_rankr/ranks/
|
157
|
+
- lib/page_rankr/ranks/alexa_global.rb
|
158
|
+
- lib/page_rankr/ranks/alexa_us.rb
|
130
159
|
- lib/page_rankr/ranks/compete.rb
|
131
160
|
- lib/page_rankr/ranks/google.rb
|
132
161
|
- lib/page_rankr/ranks/google/checksum.rb
|
162
|
+
- lib/page_rankr/site.rb
|
133
163
|
- lib/page_rankr/tracker.rb
|
134
164
|
- lib/page_rankr/version.rb
|
135
165
|
- spec/page_rankr_spec.rb
|
@@ -1,15 +0,0 @@
|
|
1
|
-
require 'cgi'
|
2
|
-
|
3
|
-
module PageRankr
|
4
|
-
class Backlinks < Tracker
|
5
|
-
class Alltheweb < Backlink
|
6
|
-
def url(site)
|
7
|
-
"http://www.alltheweb.com/search?q=link%3A#{CGI.escape(site)}"
|
8
|
-
end
|
9
|
-
|
10
|
-
def xpath
|
11
|
-
"//span[@class='ofSoMany']/text()"
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
@@ -1,15 +0,0 @@
|
|
1
|
-
require 'cgi'
|
2
|
-
|
3
|
-
module PageRankr
|
4
|
-
class Backlinks < Tracker
|
5
|
-
class Altavista < Backlink
|
6
|
-
def url(site)
|
7
|
-
"http://www.altavista.com/web/results?q=link%3A#{CGI.escape(site)}"
|
8
|
-
end
|
9
|
-
|
10
|
-
def xpath
|
11
|
-
"//a[@class='lbl']/text()"
|
12
|
-
end
|
13
|
-
end
|
14
|
-
end
|
15
|
-
end
|
@@ -1,26 +0,0 @@
|
|
1
|
-
require 'open-uri'
|
2
|
-
require 'cgi'
|
3
|
-
require 'nokogiri'
|
4
|
-
|
5
|
-
module PageRankr
|
6
|
-
class Ranks < Tracker
|
7
|
-
class Alexa < Rank
|
8
|
-
def initialize(site)
|
9
|
-
@rank = {}
|
10
|
-
xpath.each_key do |key|
|
11
|
-
@rank[key] = Nokogiri::HTML(open(url(site))).search(xpath[key]).to_s.to_i
|
12
|
-
end
|
13
|
-
@rank
|
14
|
-
end
|
15
|
-
|
16
|
-
def xpath
|
17
|
-
{ :us => "//reach/@rank",
|
18
|
-
:global => "//popularity/@text" }
|
19
|
-
end
|
20
|
-
|
21
|
-
def url(site)
|
22
|
-
"http://data.alexa.com/data?cli=10&dat=snbamz&url=#{CGI.escape(site)}"
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|