PageRankr 1.6.0 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.md +4 -1
- data/Gemfile.lock +3 -5
- data/README.md +25 -5
- data/Rakefile +1 -1
- data/lib/page_rankr.rb +10 -0
- data/lib/page_rankr/backlink.rb +1 -0
- data/lib/page_rankr/index.rb +17 -0
- data/lib/page_rankr/indexes.rb +9 -0
- data/lib/page_rankr/indexes/bing.rb +19 -0
- data/lib/page_rankr/indexes/google.rb +19 -0
- data/lib/page_rankr/ranks.rb +1 -0
- data/lib/page_rankr/ranks/compete.rb +21 -0
- data/lib/page_rankr/tracker.rb +1 -1
- data/lib/page_rankr/version.rb +1 -1
- data/spec/page_rankr_spec.rb +27 -0
- metadata +8 -3
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
PageRankr (1.
|
4
|
+
PageRankr (1.7.0)
|
5
5
|
json (>= 1.4.6)
|
6
6
|
nokogiri (>= 1.4.1)
|
7
7
|
|
@@ -13,8 +13,8 @@ GEM
|
|
13
13
|
rspec (~> 2.0)
|
14
14
|
rspec-instafail (~> 0.1.4)
|
15
15
|
ruby-progressbar (~> 0.0.9)
|
16
|
-
json (1.
|
17
|
-
nokogiri (1.4.
|
16
|
+
json (1.5.1)
|
17
|
+
nokogiri (1.4.4)
|
18
18
|
rspec (2.1.0)
|
19
19
|
rspec-core (~> 2.1.0)
|
20
20
|
rspec-expectations (~> 2.1.0)
|
@@ -33,6 +33,4 @@ DEPENDENCIES
|
|
33
33
|
PageRankr!
|
34
34
|
bundler (>= 1.0.0)
|
35
35
|
fuubar (>= 0.0.1)
|
36
|
-
json (>= 1.4.6)
|
37
|
-
nokogiri (>= 1.4.1)
|
38
36
|
rspec (~> 2.1.0)
|
data/README.md
CHANGED
@@ -37,6 +37,28 @@ Valid search engines are: `:google, :bing, :yahoo, :altavista, :alltheweb, :alex
|
|
37
37
|
|
38
38
|
PageRankr.backlink_trackers #=> [:alexa, :alltheweb, :altavista, :bing, :google, :yahoo]
|
39
39
|
|
40
|
+
### Indexes
|
41
|
+
|
42
|
+
Indexes are the result of doing a search with a query like "site:www.google.com". The number of returned results indicates how many pages of a domain are indexed by a particular search engine.
|
43
|
+
|
44
|
+
PageRankr.indexes('www.google.com', :google) #=> {:google=>4860000}
|
45
|
+
PageRankr.indexes('www.google.com', :bing) #=> {:bing=>2120000}
|
46
|
+
|
47
|
+
If you don't specify a search engine, then all of them are used.
|
48
|
+
|
49
|
+
# this
|
50
|
+
PageRankr.indexes('www.google.com')
|
51
|
+
#=> {:bing=>2120000, :google=>4860000}
|
52
|
+
|
53
|
+
# is equivalent to
|
54
|
+
PageRankr.indexes('www.google.com', :google, :bing)
|
55
|
+
#=> {:bing=>2120000, :google=>4860000}
|
56
|
+
|
57
|
+
You can also use the alias `index` instead of `indexes`.
|
58
|
+
Valid search engines are: `:google, :bing`. To get this list you can do:
|
59
|
+
|
60
|
+
PageRankr.index_trackers #=> [:alexa, :alltheweb, :altavista, :bing, :google, :yahoo]
|
61
|
+
|
40
62
|
### Ranks
|
41
63
|
|
42
64
|
PageRankr.ranks('www.google.com', :alexa, :google) #=> {:alexa=>{:us=>1, :global=>1}, :google=>10}
|
@@ -57,7 +79,7 @@ Google page ranks are in the range 0-10 where 10 is the most popular. If a site
|
|
57
79
|
If you ever find something is broken it should now be much easier to fix it with version >= 1.3.0. For example, if the xpath used to lookup a backlink is broken, just override the method for that class to provide the correct xpath.
|
58
80
|
|
59
81
|
module PageRankr
|
60
|
-
class Backlinks
|
82
|
+
class Backlinks < Tracker
|
61
83
|
class Google < Backlink
|
62
84
|
def xpath
|
63
85
|
"my new awesome xpath"
|
@@ -71,7 +93,7 @@ If you ever find something is broken it should now be much easier to fix it with
|
|
71
93
|
If you ever come across a site that provides a rank or backlinks you can hook that class up to automatically be use with PageRankr.
|
72
94
|
|
73
95
|
module PageRankr
|
74
|
-
class Backlinks
|
96
|
+
class Backlinks < Tracker
|
75
97
|
class Foo < Backlink
|
76
98
|
def url(site)
|
77
99
|
"http://example.com/?q=#{site}"
|
@@ -105,9 +127,7 @@ Then, just make sure you require the class and PageRankr and whenever you call P
|
|
105
127
|
* Use API's where possible
|
106
128
|
* Use [Typhoeus](https://github.com/pauldix/typhoeus) to improve speed when requesting multiple ranks and/or backlinks
|
107
129
|
* Configuration
|
108
|
-
* API keys
|
109
|
-
* Alexa rank options
|
110
|
-
* Add compete rank tracker
|
130
|
+
* Optionally use API keys
|
111
131
|
|
112
132
|
## Contributors
|
113
133
|
* [Druwerd](http://github.com/Druwerd) - Use Google Search API instead of scraping.
|
data/Rakefile
CHANGED
data/lib/page_rankr.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), "page_rankr", "tracker")
|
2
2
|
require File.join(File.dirname(__FILE__), "page_rankr", "backlinks")
|
3
3
|
require File.join(File.dirname(__FILE__), "page_rankr", "ranks")
|
4
|
+
require File.join(File.dirname(__FILE__), "page_rankr", "indexes")
|
4
5
|
|
5
6
|
module PageRankr
|
6
7
|
class << self
|
@@ -14,6 +15,11 @@ module PageRankr
|
|
14
15
|
end
|
15
16
|
alias_method :rank, :ranks
|
16
17
|
|
18
|
+
def indexes(site, *index_trackers)
|
19
|
+
Indexes.new.lookup site, *index_trackers
|
20
|
+
end
|
21
|
+
alias_method :index, :indexes
|
22
|
+
|
17
23
|
def rank_trackers
|
18
24
|
Ranks.new.rank_trackers
|
19
25
|
end
|
@@ -21,5 +27,9 @@ module PageRankr
|
|
21
27
|
def backlink_trackers
|
22
28
|
Backlinks.new.backlink_trackers
|
23
29
|
end
|
30
|
+
|
31
|
+
def index_trackers
|
32
|
+
Indexes.new.index_trackers
|
33
|
+
end
|
24
34
|
end
|
25
35
|
end
|
data/lib/page_rankr/backlink.rb
CHANGED
@@ -0,0 +1,17 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
4
|
+
module PageRankr
|
5
|
+
class Index
|
6
|
+
attr_reader :indexes
|
7
|
+
alias_method :tracked, :indexes
|
8
|
+
|
9
|
+
def initialize(site)
|
10
|
+
@indexes = clean Nokogiri::HTML(open url(site)).at(xpath).to_s
|
11
|
+
end
|
12
|
+
|
13
|
+
def clean(backlink_count)
|
14
|
+
backlink_count.gsub(/[a-zA-Z,\s\(\)]/, '').to_i
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), "index")
|
2
|
+
require File.join(File.dirname(__FILE__), "indexes", "bing")
|
3
|
+
require File.join(File.dirname(__FILE__), "indexes", "google")
|
4
|
+
|
5
|
+
module PageRankr
|
6
|
+
class Indexes < Tracker
|
7
|
+
alias_method :index_trackers, :site_trackers
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
|
3
|
+
module PageRankr
|
4
|
+
class Indexes < Tracker
|
5
|
+
class Bing < Index
|
6
|
+
def url(site)
|
7
|
+
"http://www.bing.com/search?q=site%3A#{CGI.escape(site)}"
|
8
|
+
end
|
9
|
+
|
10
|
+
def xpath
|
11
|
+
"//span[@class='sb_count']/text()"
|
12
|
+
end
|
13
|
+
|
14
|
+
def clean(backlink_count)
|
15
|
+
super(backlink_count.gsub('1-10', ''))
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
require 'json'
|
3
|
+
|
4
|
+
module PageRankr
|
5
|
+
class Indexes < Tracker
|
6
|
+
class Google < Index
|
7
|
+
|
8
|
+
# overloaded to use Google's AJAX search API
|
9
|
+
# http://code.google.com/apis/ajaxsearch/documentation/
|
10
|
+
def initialize(site)
|
11
|
+
@indexes = clean JSON.parse( open( url(site)).read )["responseData"]["cursor"]["estimatedResultCount"].to_s
|
12
|
+
end
|
13
|
+
|
14
|
+
def url(site)
|
15
|
+
"http://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=1&q=site%3A#{CGI.escape(site)}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
data/lib/page_rankr/ranks.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), "rank")
|
2
2
|
require File.join(File.dirname(__FILE__), "ranks", "alexa")
|
3
3
|
require File.join(File.dirname(__FILE__), "ranks", "google")
|
4
|
+
require File.join(File.dirname(__FILE__), "ranks", "compete")
|
4
5
|
|
5
6
|
module PageRankr
|
6
7
|
class Ranks < Tracker
|
@@ -0,0 +1,21 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
require 'cgi'
|
3
|
+
require 'nokogiri'
|
4
|
+
|
5
|
+
module PageRankr
|
6
|
+
class Ranks < Tracker
|
7
|
+
class Compete < Rank
|
8
|
+
def initialize(site)
|
9
|
+
@rank = Nokogiri::HTML(open(url(site))).search(xpath).to_s.gsub(',', '').to_i
|
10
|
+
end
|
11
|
+
|
12
|
+
def xpath
|
13
|
+
"//div[@id='rank']/div[@class='number value']/text()"
|
14
|
+
end
|
15
|
+
|
16
|
+
def url(site)
|
17
|
+
"http://siteanalytics.compete.com/#{CGI.escape(site)}/"
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/page_rankr/tracker.rb
CHANGED
data/lib/page_rankr/version.rb
CHANGED
data/spec/page_rankr_spec.rb
CHANGED
@@ -88,4 +88,31 @@ describe PageRankr do
|
|
88
88
|
end
|
89
89
|
end
|
90
90
|
end
|
91
|
+
|
92
|
+
describe "#index_trackers" do
|
93
|
+
subject{ PageRankr.index_trackers }
|
94
|
+
|
95
|
+
it{ should include(:google) }
|
96
|
+
it{ should include(:bing) }
|
97
|
+
end
|
98
|
+
|
99
|
+
describe "#indexes" do
|
100
|
+
describe "success" do
|
101
|
+
subject{ PageRankr.indexes("google.com") }
|
102
|
+
|
103
|
+
PageRankr.index_trackers.each do |tracker|
|
104
|
+
it{ should have_key(tracker) }
|
105
|
+
it{ subject[tracker].should >= 0 }
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
describe "failure" do
|
110
|
+
subject{ PageRankr.indexes("please-dont-register-a-site-that-breaks-this-test.com") }
|
111
|
+
|
112
|
+
PageRankr.index_trackers.each do |tracker|
|
113
|
+
it{ should have_key(tracker) }
|
114
|
+
it{ subject[tracker].should == 0 }
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
91
118
|
end
|
metadata
CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
|
|
4
4
|
prerelease: false
|
5
5
|
segments:
|
6
6
|
- 1
|
7
|
-
-
|
7
|
+
- 7
|
8
8
|
- 0
|
9
|
-
version: 1.
|
9
|
+
version: 1.7.0
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Allen Madsen
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date:
|
17
|
+
date: 2011-02-01 00:00:00 -05:00
|
18
18
|
default_executable:
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
@@ -120,9 +120,14 @@ files:
|
|
120
120
|
- lib/page_rankr/backlinks/bing.rb
|
121
121
|
- lib/page_rankr/backlinks/google.rb
|
122
122
|
- lib/page_rankr/backlinks/yahoo.rb
|
123
|
+
- lib/page_rankr/index.rb
|
124
|
+
- lib/page_rankr/indexes.rb
|
125
|
+
- lib/page_rankr/indexes/bing.rb
|
126
|
+
- lib/page_rankr/indexes/google.rb
|
123
127
|
- lib/page_rankr/rank.rb
|
124
128
|
- lib/page_rankr/ranks.rb
|
125
129
|
- lib/page_rankr/ranks/alexa.rb
|
130
|
+
- lib/page_rankr/ranks/compete.rb
|
126
131
|
- lib/page_rankr/ranks/google.rb
|
127
132
|
- lib/page_rankr/ranks/google/checksum.rb
|
128
133
|
- lib/page_rankr/tracker.rb
|