PageRankr 1.6.0 → 1.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,8 @@
1
1
  # Change Log
2
- ## Version 1.6
2
+ ## Version 1.7.0
3
+ * Merged in additions from iteration labs to add compete rank tracker and domain indexes.
4
+
5
+ ## Version 1.6.0
3
6
 
4
7
  * Added ability to get global alexa rank instead of just us alexa rank.
5
8
 
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- PageRankr (1.6.0)
4
+ PageRankr (1.7.0)
5
5
  json (>= 1.4.6)
6
6
  nokogiri (>= 1.4.1)
7
7
 
@@ -13,8 +13,8 @@ GEM
13
13
  rspec (~> 2.0)
14
14
  rspec-instafail (~> 0.1.4)
15
15
  ruby-progressbar (~> 0.0.9)
16
- json (1.4.6)
17
- nokogiri (1.4.3.1)
16
+ json (1.5.1)
17
+ nokogiri (1.4.4)
18
18
  rspec (2.1.0)
19
19
  rspec-core (~> 2.1.0)
20
20
  rspec-expectations (~> 2.1.0)
@@ -33,6 +33,4 @@ DEPENDENCIES
33
33
  PageRankr!
34
34
  bundler (>= 1.0.0)
35
35
  fuubar (>= 0.0.1)
36
- json (>= 1.4.6)
37
- nokogiri (>= 1.4.1)
38
36
  rspec (~> 2.1.0)
data/README.md CHANGED
@@ -37,6 +37,28 @@ Valid search engines are: `:google, :bing, :yahoo, :altavista, :alltheweb, :alex
37
37
 
38
38
  PageRankr.backlink_trackers #=> [:alexa, :alltheweb, :altavista, :bing, :google, :yahoo]
39
39
 
40
+ ### Indexes
41
+
42
+ Indexes are the result of doing a search with a query like "site:www.google.com". The number of returned results indicates how many pages of a domain are indexed by a particular search engine.
43
+
44
+ PageRankr.indexes('www.google.com', :google) #=> {:google=>4860000}
45
+ PageRankr.indexes('www.google.com', :bing) #=> {:bing=>2120000}
46
+
47
+ If you don't specify a search engine, then all of them are used.
48
+
49
+ # this
50
+ PageRankr.indexes('www.google.com')
51
+ #=> {:bing=>2120000, :google=>4860000}
52
+
53
+ # is equivalent to
54
+ PageRankr.indexes('www.google.com', :google, :bing)
55
+ #=> {:bing=>2120000, :google=>4860000}
56
+
57
+ You can also use the alias `index` instead of `indexes`.
58
+ Valid search engines are: `:google, :bing`. To get this list you can do:
59
+
60
+ PageRankr.index_trackers #=> [:alexa, :alltheweb, :altavista, :bing, :google, :yahoo]
61
+
40
62
  ### Ranks
41
63
 
42
64
  PageRankr.ranks('www.google.com', :alexa, :google) #=> {:alexa=>{:us=>1, :global=>1}, :google=>10}
@@ -57,7 +79,7 @@ Google page ranks are in the range 0-10 where 10 is the most popular. If a site
57
79
  If you ever find something is broken it should now be much easier to fix it with version >= 1.3.0. For example, if the xpath used to lookup a backlink is broken, just override the method for that class to provide the correct xpath.
58
80
 
59
81
  module PageRankr
60
- class Backlinks
82
+ class Backlinks < Tracker
61
83
  class Google < Backlink
62
84
  def xpath
63
85
  "my new awesome xpath"
@@ -71,7 +93,7 @@ If you ever find something is broken it should now be much easier to fix it with
71
93
  If you ever come across a site that provides a rank or backlinks you can hook that class up to automatically be use with PageRankr.
72
94
 
73
95
  module PageRankr
74
- class Backlinks
96
+ class Backlinks < Tracker
75
97
  class Foo < Backlink
76
98
  def url(site)
77
99
  "http://example.com/?q=#{site}"
@@ -105,9 +127,7 @@ Then, just make sure you require the class and PageRankr and whenever you call P
105
127
  * Use API's where possible
106
128
  * Use [Typhoeus](https://github.com/pauldix/typhoeus) to improve speed when requesting multiple ranks and/or backlinks
107
129
  * Configuration
108
- * API keys
109
- * Alexa rank options
110
- * Add compete rank tracker
130
+ * Optionally use API keys
111
131
 
112
132
  ## Contributors
113
133
  * [Druwerd](http://github.com/Druwerd) - Use Google Search API instead of scraping.
data/Rakefile CHANGED
@@ -2,4 +2,4 @@ require 'bundler'
2
2
  Bundler::GemHelper.install_tasks
3
3
 
4
4
  require 'rspec/core/rake_task'
5
- RSpec::Core::RakeTask.new(:spec)
5
+ RSpec::Core::RakeTask.new(:spec)
@@ -1,6 +1,7 @@
1
1
  require File.join(File.dirname(__FILE__), "page_rankr", "tracker")
2
2
  require File.join(File.dirname(__FILE__), "page_rankr", "backlinks")
3
3
  require File.join(File.dirname(__FILE__), "page_rankr", "ranks")
4
+ require File.join(File.dirname(__FILE__), "page_rankr", "indexes")
4
5
 
5
6
  module PageRankr
6
7
  class << self
@@ -14,6 +15,11 @@ module PageRankr
14
15
  end
15
16
  alias_method :rank, :ranks
16
17
 
18
+ def indexes(site, *index_trackers)
19
+ Indexes.new.lookup site, *index_trackers
20
+ end
21
+ alias_method :index, :indexes
22
+
17
23
  def rank_trackers
18
24
  Ranks.new.rank_trackers
19
25
  end
@@ -21,5 +27,9 @@ module PageRankr
21
27
  def backlink_trackers
22
28
  Backlinks.new.backlink_trackers
23
29
  end
30
+
31
+ def index_trackers
32
+ Indexes.new.index_trackers
33
+ end
24
34
  end
25
35
  end
@@ -1,3 +1,4 @@
1
+ require 'rubygems'
1
2
  require 'nokogiri'
2
3
  require 'open-uri'
3
4
 
@@ -0,0 +1,17 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
4
+ module PageRankr
5
+ class Index
6
+ attr_reader :indexes
7
+ alias_method :tracked, :indexes
8
+
9
+ def initialize(site)
10
+ @indexes = clean Nokogiri::HTML(open url(site)).at(xpath).to_s
11
+ end
12
+
13
+ def clean(backlink_count)
14
+ backlink_count.gsub(/[a-zA-Z,\s\(\)]/, '').to_i
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,9 @@
1
+ require File.join(File.dirname(__FILE__), "index")
2
+ require File.join(File.dirname(__FILE__), "indexes", "bing")
3
+ require File.join(File.dirname(__FILE__), "indexes", "google")
4
+
5
+ module PageRankr
6
+ class Indexes < Tracker
7
+ alias_method :index_trackers, :site_trackers
8
+ end
9
+ end
@@ -0,0 +1,19 @@
1
+ require 'cgi'
2
+
3
+ module PageRankr
4
+ class Indexes < Tracker
5
+ class Bing < Index
6
+ def url(site)
7
+ "http://www.bing.com/search?q=site%3A#{CGI.escape(site)}"
8
+ end
9
+
10
+ def xpath
11
+ "//span[@class='sb_count']/text()"
12
+ end
13
+
14
+ def clean(backlink_count)
15
+ super(backlink_count.gsub('1-10', ''))
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ require 'cgi'
2
+ require 'json'
3
+
4
+ module PageRankr
5
+ class Indexes < Tracker
6
+ class Google < Index
7
+
8
+ # overloaded to use Google's AJAX search API
9
+ # http://code.google.com/apis/ajaxsearch/documentation/
10
+ def initialize(site)
11
+ @indexes = clean JSON.parse( open( url(site)).read )["responseData"]["cursor"]["estimatedResultCount"].to_s
12
+ end
13
+
14
+ def url(site)
15
+ "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=1&q=site%3A#{CGI.escape(site)}"
16
+ end
17
+ end
18
+ end
19
+ end
@@ -1,6 +1,7 @@
1
1
  require File.join(File.dirname(__FILE__), "rank")
2
2
  require File.join(File.dirname(__FILE__), "ranks", "alexa")
3
3
  require File.join(File.dirname(__FILE__), "ranks", "google")
4
+ require File.join(File.dirname(__FILE__), "ranks", "compete")
4
5
 
5
6
  module PageRankr
6
7
  class Ranks < Tracker
@@ -0,0 +1,21 @@
1
+ require 'open-uri'
2
+ require 'cgi'
3
+ require 'nokogiri'
4
+
5
+ module PageRankr
6
+ class Ranks < Tracker
7
+ class Compete < Rank
8
+ def initialize(site)
9
+ @rank = Nokogiri::HTML(open(url(site))).search(xpath).to_s.gsub(',', '').to_i
10
+ end
11
+
12
+ def xpath
13
+ "//div[@id='rank']/div[@class='number value']/text()"
14
+ end
15
+
16
+ def url(site)
17
+ "http://siteanalytics.compete.com/#{CGI.escape(site)}/"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -14,7 +14,7 @@ module PageRankr
14
14
  name, klass = tracker.to_s.capitalize, self.class
15
15
 
16
16
  next unless klass.const_defined? name
17
-
17
+
18
18
  tracked[tracker] = klass.const_get(name).new(site).tracked
19
19
  end
20
20
  tracked
@@ -1,3 +1,3 @@
1
1
  module PageRankr
2
- VERSION = "1.6.0"
2
+ VERSION = "1.7.0"
3
3
  end
@@ -88,4 +88,31 @@ describe PageRankr do
88
88
  end
89
89
  end
90
90
  end
91
+
92
+ describe "#index_trackers" do
93
+ subject{ PageRankr.index_trackers }
94
+
95
+ it{ should include(:google) }
96
+ it{ should include(:bing) }
97
+ end
98
+
99
+ describe "#indexes" do
100
+ describe "success" do
101
+ subject{ PageRankr.indexes("google.com") }
102
+
103
+ PageRankr.index_trackers.each do |tracker|
104
+ it{ should have_key(tracker) }
105
+ it{ subject[tracker].should >= 0 }
106
+ end
107
+ end
108
+
109
+ describe "failure" do
110
+ subject{ PageRankr.indexes("please-dont-register-a-site-that-breaks-this-test.com") }
111
+
112
+ PageRankr.index_trackers.each do |tracker|
113
+ it{ should have_key(tracker) }
114
+ it{ subject[tracker].should == 0 }
115
+ end
116
+ end
117
+ end
91
118
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 1
7
- - 6
7
+ - 7
8
8
  - 0
9
- version: 1.6.0
9
+ version: 1.7.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Allen Madsen
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-12-04 00:00:00 -05:00
17
+ date: 2011-02-01 00:00:00 -05:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -120,9 +120,14 @@ files:
120
120
  - lib/page_rankr/backlinks/bing.rb
121
121
  - lib/page_rankr/backlinks/google.rb
122
122
  - lib/page_rankr/backlinks/yahoo.rb
123
+ - lib/page_rankr/index.rb
124
+ - lib/page_rankr/indexes.rb
125
+ - lib/page_rankr/indexes/bing.rb
126
+ - lib/page_rankr/indexes/google.rb
123
127
  - lib/page_rankr/rank.rb
124
128
  - lib/page_rankr/ranks.rb
125
129
  - lib/page_rankr/ranks/alexa.rb
130
+ - lib/page_rankr/ranks/compete.rb
126
131
  - lib/page_rankr/ranks/google.rb
127
132
  - lib/page_rankr/ranks/google/checksum.rb
128
133
  - lib/page_rankr/tracker.rb