PageRankr 2.0.4 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +10 -0
- data/Gemfile +3 -1
- data/PageRankr.gemspec +1 -0
- data/README.md +48 -12
- data/lib/page_rankr.rb +6 -5
- data/lib/page_rankr/backlink.rb +4 -19
- data/lib/page_rankr/backlinks.rb +6 -6
- data/lib/page_rankr/backlinks/alexa.rb +7 -4
- data/lib/page_rankr/backlinks/bing.rb +9 -6
- data/lib/page_rankr/backlinks/google.rb +11 -15
- data/lib/page_rankr/backlinks/yahoo.rb +6 -4
- data/lib/page_rankr/index.rb +4 -17
- data/lib/page_rankr/indexes.rb +4 -4
- data/lib/page_rankr/indexes/bing.rb +7 -4
- data/lib/page_rankr/indexes/google.rb +10 -16
- data/lib/page_rankr/rank.rb +5 -16
- data/lib/page_rankr/ranks.rb +6 -6
- data/lib/page_rankr/ranks/alexa_global.rb +9 -7
- data/lib/page_rankr/ranks/alexa_us.rb +9 -7
- data/lib/page_rankr/ranks/compete.rb +5 -10
- data/lib/page_rankr/ranks/google.rb +12 -16
- data/lib/page_rankr/site.rb +6 -2
- data/lib/page_rankr/tracker.rb +54 -33
- data/lib/page_rankr/trackers.rb +48 -0
- data/lib/page_rankr/version.rb +1 -1
- data/spec/fixtures/vcr_cassettes/alexa_ranks_edge_case_1.yml +6 -6
- data/spec/fixtures/vcr_cassettes/failure_backlinks.yml +87 -77
- data/spec/fixtures/vcr_cassettes/failure_indexes.yml +58 -48
- data/spec/fixtures/vcr_cassettes/failure_ranks.yml +20 -20
- data/spec/fixtures/vcr_cassettes/success_backlinks.yml +70 -62
- data/spec/fixtures/vcr_cassettes/success_indexes.yml +73 -67
- data/spec/fixtures/vcr_cassettes/success_ranks.yml +94 -73
- data/spec/page_rankr_spec.rb +8 -2
- data/spec/spec_helper.rb +1 -1
- metadata +32 -20
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,15 @@
|
|
1
1
|
# Change Log
|
2
2
|
|
3
|
+
## Version 3.0.0
|
4
|
+
* Refactor
|
5
|
+
* Move the logic for the typhoeus request out of the individual trackers in Tracker to hide the complexity.
|
6
|
+
* Create Tracker to encapsulate the common logic in Backlink, Tracker, and Rank.
|
7
|
+
* Have each file declare it's dependencies, so that it is simple to use a la carte.
|
8
|
+
* Fix google backlink and index. The search API is deprecated and the new API is annoying to setup, so webscraping has been brought back.
|
9
|
+
* Make requires consistent.
|
10
|
+
* API Breakages
|
11
|
+
* Tracker was renamed to Trackers and a new Tracker file was created that represents a different object.
|
12
|
+
|
3
13
|
## Version 2.0.4
|
4
14
|
* Fix google page rank url
|
5
15
|
|
data/Gemfile
CHANGED
data/PageRankr.gemspec
CHANGED
@@ -21,6 +21,7 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.add_runtime_dependency "json", ">= 1.4.6"
|
22
22
|
s.add_runtime_dependency "public_suffix_service", "~> 0.9.0"
|
23
23
|
s.add_runtime_dependency "typhoeus", "~> 0.2.1"
|
24
|
+
s.add_runtime_dependency "jsonpath", "~> 0.4.2"
|
24
25
|
|
25
26
|
s.files = `git ls-files`.split("\n")
|
26
27
|
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
data/README.md
CHANGED
@@ -9,11 +9,15 @@ Check out a little [web app][1] I wrote up that uses it or look at the [source][
|
|
9
9
|
|
10
10
|
## Get it!
|
11
11
|
|
12
|
+
``` bash
|
12
13
|
gem install PageRankr
|
14
|
+
```
|
13
15
|
|
14
16
|
## Use it!
|
15
17
|
|
18
|
+
``` ruby
|
16
19
|
require 'page_rankr'
|
20
|
+
```
|
17
21
|
|
18
22
|
### Backlinks
|
19
23
|
|
@@ -102,6 +106,30 @@ Valid rank trackers are: `:alexa_us, :alexa_global, :compete, :google`. To get t
|
|
102
106
|
|
103
107
|
Alexa and Compete ranks are descending where 1 is the most popular. Google page ranks are in the range 0-10 where 10 is the most popular. If a site is unindexed then the rank will be nil.
|
104
108
|
|
109
|
+
## Use it a la carte!
|
110
|
+
|
111
|
+
From versions >= 3, everything should be usable in a much more a la carte manner. If all you care about is google page rank (which I speculate is common) you can get that all by itself:
|
112
|
+
|
113
|
+
``` ruby
|
114
|
+
require 'page_rankr/ranks/google'
|
115
|
+
|
116
|
+
tracker = PageRankr::Ranks::Google.new("myawesomesite.com")
|
117
|
+
tracker.run #=> 2
|
118
|
+
```
|
119
|
+
|
120
|
+
Also, once a tracker has run three values will be accessible from it:
|
121
|
+
|
122
|
+
``` ruby
|
123
|
+
# The value extracted. Tracked is aliased to rank for PageRankr::Ranks, backlink for PageRankr::Backlinks, and index for PageRankr::Indexes.
|
124
|
+
tracker.tracked #=> 2
|
125
|
+
|
126
|
+
# The value extracted with the jsonpath, xpath, or regex before being cleaned.
|
127
|
+
tracker.raw #=> "2"
|
128
|
+
|
129
|
+
# The body of the response
|
130
|
+
tracker.body #=> "<html><head>..."
|
131
|
+
```
|
132
|
+
|
105
133
|
## Fix it!
|
106
134
|
|
107
135
|
If you ever find something is broken it should now be much easier to fix it with version >= 1.3.0. For example, if the xpath used to lookup a backlink is broken, just override the method for that class to provide the correct xpath.
|
@@ -123,27 +151,39 @@ If you ever find something is broken it should now be much easier to fix it with
|
|
123
151
|
If you ever come across a site that provides a rank or backlinks you can hook that class up to automatically be use with PageRankr. PageRankr does this by looking up all the classes namespaced under Backlinks, Indexes, and Ranks.
|
124
152
|
|
125
153
|
``` ruby
|
154
|
+
require 'page_rankr/backlink'
|
155
|
+
|
126
156
|
module PageRankr
|
127
157
|
class Backlinks
|
128
158
|
class Foo
|
129
159
|
include Backlink
|
130
160
|
|
131
|
-
|
132
|
-
|
133
|
-
|
161
|
+
# This method is required
|
162
|
+
def url
|
163
|
+
"http://example.com/"
|
164
|
+
end
|
165
|
+
|
166
|
+
# This method specifies the parameters for the url. It is optional, but likely required for the class to be useful.
|
167
|
+
def params
|
168
|
+
{:q => @site.to_s}
|
134
169
|
end
|
135
170
|
|
171
|
+
# You can use a method named either xpath, jsonpath, or regex with the appropriate query type
|
136
172
|
def xpath
|
137
173
|
"//backlinks/text()"
|
138
174
|
end
|
139
175
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
176
|
+
# Optionally, you could override the clean method if the current implementation isn't sufficient
|
177
|
+
# def clean(backlink_count)
|
178
|
+
# #do some of my own cleaning
|
179
|
+
# super(backlink_count) # strips non-digits and converts it to an integer or nil
|
180
|
+
# end
|
144
181
|
end
|
145
182
|
end
|
146
183
|
end
|
184
|
+
|
185
|
+
PageRankr::Backlinks::Foo.new("myawesomesite.com").run #=> 3
|
186
|
+
PageRankr.backlinks("myawesomesite.com", :foo)[:foo] #=> 3
|
147
187
|
```
|
148
188
|
|
149
189
|
Then, just make sure you require the class and PageRankr and whenever you call PageRankr.backlinks it'll be able to use your class.
|
@@ -158,12 +198,8 @@ Then, just make sure you require the class and PageRankr and whenever you call P
|
|
158
198
|
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
159
199
|
* Send me a pull request. Bonus points for topic branches.
|
160
200
|
|
161
|
-
## TODO Version 3
|
201
|
+
## TODO Version 3-4
|
162
202
|
* Use API's where possible
|
163
|
-
* Configuration
|
164
|
-
* Optionally use API keys
|
165
|
-
* Maybe allow API key cycling to get around query limits
|
166
|
-
* Google search API is deprecated
|
167
203
|
* New Compete API
|
168
204
|
* Some search engines throttle the amount of queries. It would be nice to know when this happens. Probably throw an exception.
|
169
205
|
|
data/lib/page_rankr.rb
CHANGED
@@ -1,10 +1,11 @@
|
|
1
|
-
require File.
|
2
|
-
require File.
|
3
|
-
require File.
|
4
|
-
require File.join(File.dirname(__FILE__), "page_rankr", "indexes")
|
5
|
-
require File.join(File.dirname(__FILE__), "page_rankr", "site")
|
1
|
+
require File.expand_path("../page_rankr/backlinks", __FILE__)
|
2
|
+
require File.expand_path("../page_rankr/ranks", __FILE__)
|
3
|
+
require File.expand_path("../page_rankr/indexes", __FILE__)
|
6
4
|
|
7
5
|
module PageRankr
|
6
|
+
class MethodRequired < StandardError; end
|
7
|
+
class DomainInvalid < StandardError; end
|
8
|
+
|
8
9
|
class << self
|
9
10
|
def backlinks(site, *search_engines)
|
10
11
|
Backlinks.new.lookup(Site.new(site), *search_engines)
|
data/lib/page_rankr/backlink.rb
CHANGED
@@ -1,24 +1,9 @@
|
|
1
|
-
require '
|
2
|
-
require 'nokogiri'
|
3
|
-
require 'open-uri'
|
1
|
+
require File.expand_path('../tracker', __FILE__)
|
4
2
|
|
5
3
|
module PageRankr
|
6
4
|
module Backlink
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
def initialize(site)
|
11
|
-
@site = site
|
12
|
-
|
13
|
-
request.on_complete do |response|
|
14
|
-
html = Nokogiri::HTML(response.body)
|
15
|
-
@backlinks = clean(html.at(xpath).to_s)
|
16
|
-
@backlinks = nil if @backlinks.zero?
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
def clean(backlink_count)
|
21
|
-
backlink_count.gsub(/[a-zA-Z,\s\(\)]/, '').to_i
|
22
|
-
end
|
5
|
+
include Tracker
|
6
|
+
|
7
|
+
alias_method :backlink, :tracked
|
23
8
|
end
|
24
9
|
end
|
data/lib/page_rankr/backlinks.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
require File.
|
2
|
-
require File.
|
3
|
-
require File.
|
4
|
-
require File.
|
5
|
-
require File.
|
1
|
+
require File.expand_path("../trackers", __FILE__)
|
2
|
+
require File.expand_path("../backlinks/alexa", __FILE__)
|
3
|
+
require File.expand_path("../backlinks/bing", __FILE__)
|
4
|
+
require File.expand_path("../backlinks/google", __FILE__)
|
5
|
+
require File.expand_path("../backlinks/yahoo", __FILE__)
|
6
6
|
|
7
7
|
module PageRankr
|
8
8
|
class Backlinks
|
9
|
-
include
|
9
|
+
include Trackers
|
10
10
|
|
11
11
|
alias_method :backlink_trackers, :site_trackers
|
12
12
|
end
|
@@ -1,13 +1,16 @@
|
|
1
|
-
require '
|
1
|
+
require File.expand_path('../../backlink', __FILE__)
|
2
2
|
|
3
3
|
module PageRankr
|
4
4
|
class Backlinks
|
5
5
|
class Alexa
|
6
6
|
include Backlink
|
7
7
|
|
8
|
-
def
|
9
|
-
|
10
|
-
|
8
|
+
def url
|
9
|
+
"http://data.alexa.com/data"
|
10
|
+
end
|
11
|
+
|
12
|
+
def params
|
13
|
+
{:cli => 10, :dat => "snbamz", :url => @site.to_s}
|
11
14
|
end
|
12
15
|
|
13
16
|
def xpath
|
@@ -1,15 +1,18 @@
|
|
1
|
-
require '
|
1
|
+
require File.expand_path('../../backlink', __FILE__)
|
2
2
|
|
3
3
|
module PageRankr
|
4
4
|
class Backlinks
|
5
5
|
class Bing
|
6
6
|
include Backlink
|
7
|
-
|
8
|
-
def
|
9
|
-
|
10
|
-
:params => {:q => "link:#{@site.to_s}"}, :method => :get)
|
7
|
+
|
8
|
+
def url
|
9
|
+
"http://www.bing.com/search"
|
11
10
|
end
|
12
|
-
|
11
|
+
|
12
|
+
def params
|
13
|
+
{:q => "link:#{@site.to_s}"}
|
14
|
+
end
|
15
|
+
|
13
16
|
def xpath
|
14
17
|
"//span[@class='sb_count']/text()"
|
15
18
|
end
|
@@ -1,24 +1,20 @@
|
|
1
|
-
require '
|
1
|
+
require File.expand_path('../../backlink', __FILE__)
|
2
2
|
|
3
3
|
module PageRankr
|
4
4
|
class Backlinks
|
5
5
|
class Google
|
6
6
|
include Backlink
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
@backlinks = clean(json["responseData"]["cursor"]["estimatedResultCount"].to_s)
|
15
|
-
@backlinks = nil if @backlinks.zero?
|
16
|
-
end
|
7
|
+
|
8
|
+
def url
|
9
|
+
"http://www.google.com/search"
|
10
|
+
end
|
11
|
+
|
12
|
+
def params
|
13
|
+
{:q => "link:#{@site.to_s}"}
|
17
14
|
end
|
18
|
-
|
19
|
-
def
|
20
|
-
@
|
21
|
-
:params => {:v => "1.0", :rsz => 1, :q => "link:#{@site.to_s}"}, :method => :get)
|
15
|
+
|
16
|
+
def xpath
|
17
|
+
"//div[@id='resultStats']/text()"
|
22
18
|
end
|
23
19
|
end
|
24
20
|
end
|
@@ -1,13 +1,15 @@
|
|
1
|
-
require '
|
1
|
+
require File.expand_path('../../backlink', __FILE__)
|
2
2
|
|
3
3
|
module PageRankr
|
4
4
|
class Backlinks
|
5
5
|
class Yahoo
|
6
6
|
include Backlink
|
7
7
|
|
8
|
-
def
|
9
|
-
|
10
|
-
|
8
|
+
def url
|
9
|
+
"http://siteexplorer.search.yahoo.com/search"
|
10
|
+
end
|
11
|
+
def params
|
12
|
+
{:p => "#{@site.to_s}"}
|
11
13
|
end
|
12
14
|
|
13
15
|
def xpath
|
data/lib/page_rankr/index.rb
CHANGED
@@ -1,22 +1,9 @@
|
|
1
|
-
require '
|
2
|
-
require 'open-uri'
|
1
|
+
require File.expand_path('../tracker', __FILE__)
|
3
2
|
|
4
3
|
module PageRankr
|
5
4
|
module Index
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
def initialize(site)
|
10
|
-
@site = site
|
11
|
-
request.on_complete do |response|
|
12
|
-
html = Nokogiri::HTML(response.body)
|
13
|
-
@indexes = clean(html.at(xpath).to_s)
|
14
|
-
@indexes = nil if @indexes.zero?
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
def clean(backlink_count)
|
19
|
-
backlink_count.gsub(/[a-zA-Z,\s\(\)]/, '').to_i
|
20
|
-
end
|
5
|
+
include Tracker
|
6
|
+
|
7
|
+
alias_method :index, :tracked
|
21
8
|
end
|
22
9
|
end
|
data/lib/page_rankr/indexes.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
require File.
|
2
|
-
require File.
|
3
|
-
require File.
|
1
|
+
require File.expand_path("../trackers", __FILE__)
|
2
|
+
require File.expand_path("../indexes/bing", __FILE__)
|
3
|
+
require File.expand_path("../indexes/google", __FILE__)
|
4
4
|
|
5
5
|
module PageRankr
|
6
6
|
class Indexes
|
7
|
-
include
|
7
|
+
include Trackers
|
8
8
|
|
9
9
|
alias_method :index_trackers, :site_trackers
|
10
10
|
end
|
@@ -1,13 +1,16 @@
|
|
1
|
-
require '
|
1
|
+
require File.expand_path('../../index', __FILE__)
|
2
2
|
|
3
3
|
module PageRankr
|
4
4
|
class Indexes
|
5
5
|
class Bing
|
6
6
|
include Index
|
7
7
|
|
8
|
-
def
|
9
|
-
|
10
|
-
|
8
|
+
def url
|
9
|
+
"http://www.bing.com/search"
|
10
|
+
end
|
11
|
+
|
12
|
+
def params
|
13
|
+
{:q => "site:#{@site.to_s}"}
|
11
14
|
end
|
12
15
|
|
13
16
|
def xpath
|
@@ -1,26 +1,20 @@
|
|
1
|
-
require '
|
2
|
-
require 'json'
|
1
|
+
require File.expand_path('../../index', __FILE__)
|
3
2
|
|
4
3
|
module PageRankr
|
5
4
|
class Indexes
|
6
5
|
class Google
|
7
6
|
include Index
|
8
7
|
|
9
|
-
|
10
|
-
|
11
|
-
def initialize(site)
|
12
|
-
@site = site
|
13
|
-
|
14
|
-
request.on_complete do |response|
|
15
|
-
json = JSON.parse(response.body)
|
16
|
-
@indexes = clean(json["responseData"]["cursor"]["estimatedResultCount"].to_s)
|
17
|
-
@indexes = nil if @indexes.zero?
|
18
|
-
end
|
8
|
+
def url
|
9
|
+
"http://www.google.com/search"
|
19
10
|
end
|
20
|
-
|
21
|
-
def
|
22
|
-
|
23
|
-
|
11
|
+
|
12
|
+
def params
|
13
|
+
{:q => "site:#{@site.to_s}"}
|
14
|
+
end
|
15
|
+
|
16
|
+
def xpath
|
17
|
+
"//div[@id='resultStats']/text()"
|
24
18
|
end
|
25
19
|
end
|
26
20
|
end
|
data/lib/page_rankr/rank.rb
CHANGED
@@ -1,20 +1,9 @@
|
|
1
|
+
require File.expand_path('../tracker', __FILE__)
|
2
|
+
|
1
3
|
module PageRankr
|
2
4
|
module Rank
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
def initialize(site)
|
7
|
-
@site = site
|
8
|
-
|
9
|
-
request.on_complete do |response|
|
10
|
-
html = Nokogiri::HTML(response.body)
|
11
|
-
@rank = clean(html.search(xpath))
|
12
|
-
@rank = nil if @rank.zero?
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def clean(rank)
|
17
|
-
rank.to_s.to_i
|
18
|
-
end
|
5
|
+
include Tracker
|
6
|
+
|
7
|
+
alias_method :rank, :tracked
|
19
8
|
end
|
20
9
|
end
|
data/lib/page_rankr/ranks.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
|
-
require File.
|
2
|
-
require File.
|
3
|
-
require File.
|
4
|
-
require File.
|
5
|
-
require File.
|
1
|
+
require File.expand_path("../trackers", __FILE__)
|
2
|
+
require File.expand_path("../ranks/alexa_us", __FILE__)
|
3
|
+
require File.expand_path("../ranks/alexa_global", __FILE__)
|
4
|
+
require File.expand_path("../ranks/google", __FILE__)
|
5
|
+
require File.expand_path("../ranks/compete", __FILE__)
|
6
6
|
|
7
7
|
module PageRankr
|
8
8
|
class Ranks
|
9
|
-
include
|
9
|
+
include Trackers
|
10
10
|
|
11
11
|
alias_method :rank_trackers, :site_trackers
|
12
12
|
end
|