PageRankr 1.6.0 → 1.7.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,8 @@
1
1
  # Change Log
2
- ## Version 1.6
2
+ ## Version 1.7.0
3
+ * Merged in additions from iteration labs to add compete rank tracker and domain indexes.
4
+
5
+ ## Version 1.6.0
3
6
 
4
7
  * Added ability to get global alexa rank instead of just us alexa rank.
5
8
 
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- PageRankr (1.6.0)
4
+ PageRankr (1.7.0)
5
5
  json (>= 1.4.6)
6
6
  nokogiri (>= 1.4.1)
7
7
 
@@ -13,8 +13,8 @@ GEM
13
13
  rspec (~> 2.0)
14
14
  rspec-instafail (~> 0.1.4)
15
15
  ruby-progressbar (~> 0.0.9)
16
- json (1.4.6)
17
- nokogiri (1.4.3.1)
16
+ json (1.5.1)
17
+ nokogiri (1.4.4)
18
18
  rspec (2.1.0)
19
19
  rspec-core (~> 2.1.0)
20
20
  rspec-expectations (~> 2.1.0)
@@ -33,6 +33,4 @@ DEPENDENCIES
33
33
  PageRankr!
34
34
  bundler (>= 1.0.0)
35
35
  fuubar (>= 0.0.1)
36
- json (>= 1.4.6)
37
- nokogiri (>= 1.4.1)
38
36
  rspec (~> 2.1.0)
data/README.md CHANGED
@@ -37,6 +37,28 @@ Valid search engines are: `:google, :bing, :yahoo, :altavista, :alltheweb, :alex
37
37
 
38
38
  PageRankr.backlink_trackers #=> [:alexa, :alltheweb, :altavista, :bing, :google, :yahoo]
39
39
 
40
+ ### Indexes
41
+
42
+ Indexes are the result of doing a search with a query like "site:www.google.com". The number of returned results indicates how many pages of a domain are indexed by a particular search engine.
43
+
44
+ PageRankr.indexes('www.google.com', :google) #=> {:google=>4860000}
45
+ PageRankr.indexes('www.google.com', :bing) #=> {:bing=>2120000}
46
+
47
+ If you don't specify a search engine, then all of them are used.
48
+
49
+ # this
50
+ PageRankr.indexes('www.google.com')
51
+ #=> {:bing=>2120000, :google=>4860000}
52
+
53
+ # is equivalent to
54
+ PageRankr.indexes('www.google.com', :google, :bing)
55
+ #=> {:bing=>2120000, :google=>4860000}
56
+
57
+ You can also use the alias `index` instead of `indexes`.
58
+ Valid search engines are: `:google, :bing`. To get this list you can do:
59
+
60
+ PageRankr.index_trackers #=> [:alexa, :alltheweb, :altavista, :bing, :google, :yahoo]
61
+
40
62
  ### Ranks
41
63
 
42
64
  PageRankr.ranks('www.google.com', :alexa, :google) #=> {:alexa=>{:us=>1, :global=>1}, :google=>10}
@@ -57,7 +79,7 @@ Google page ranks are in the range 0-10 where 10 is the most popular. If a site
57
79
  If you ever find something is broken it should now be much easier to fix it with version >= 1.3.0. For example, if the xpath used to lookup a backlink is broken, just override the method for that class to provide the correct xpath.
58
80
 
59
81
  module PageRankr
60
- class Backlinks
82
+ class Backlinks < Tracker
61
83
  class Google < Backlink
62
84
  def xpath
63
85
  "my new awesome xpath"
@@ -71,7 +93,7 @@ If you ever find something is broken it should now be much easier to fix it with
71
93
  If you ever come across a site that provides a rank or backlinks you can hook that class up to automatically be use with PageRankr.
72
94
 
73
95
  module PageRankr
74
- class Backlinks
96
+ class Backlinks < Tracker
75
97
  class Foo < Backlink
76
98
  def url(site)
77
99
  "http://example.com/?q=#{site}"
@@ -105,9 +127,7 @@ Then, just make sure you require the class and PageRankr and whenever you call P
105
127
  * Use API's where possible
106
128
  * Use [Typhoeus](https://github.com/pauldix/typhoeus) to improve speed when requesting multiple ranks and/or backlinks
107
129
  * Configuration
108
- * API keys
109
- * Alexa rank options
110
- * Add compete rank tracker
130
+ * Optionally use API keys
111
131
 
112
132
  ## Contributors
113
133
  * [Druwerd](http://github.com/Druwerd) - Use Google Search API instead of scraping.
data/Rakefile CHANGED
@@ -2,4 +2,4 @@ require 'bundler'
2
2
  Bundler::GemHelper.install_tasks
3
3
 
4
4
  require 'rspec/core/rake_task'
5
- RSpec::Core::RakeTask.new(:spec)
5
+ RSpec::Core::RakeTask.new(:spec)
@@ -1,6 +1,7 @@
1
1
  require File.join(File.dirname(__FILE__), "page_rankr", "tracker")
2
2
  require File.join(File.dirname(__FILE__), "page_rankr", "backlinks")
3
3
  require File.join(File.dirname(__FILE__), "page_rankr", "ranks")
4
+ require File.join(File.dirname(__FILE__), "page_rankr", "indexes")
4
5
 
5
6
  module PageRankr
6
7
  class << self
@@ -14,6 +15,11 @@ module PageRankr
14
15
  end
15
16
  alias_method :rank, :ranks
16
17
 
18
+ def indexes(site, *index_trackers)
19
+ Indexes.new.lookup site, *index_trackers
20
+ end
21
+ alias_method :index, :indexes
22
+
17
23
  def rank_trackers
18
24
  Ranks.new.rank_trackers
19
25
  end
@@ -21,5 +27,9 @@ module PageRankr
21
27
  def backlink_trackers
22
28
  Backlinks.new.backlink_trackers
23
29
  end
30
+
31
+ def index_trackers
32
+ Indexes.new.index_trackers
33
+ end
24
34
  end
25
35
  end
@@ -1,3 +1,4 @@
1
+ require 'rubygems'
1
2
  require 'nokogiri'
2
3
  require 'open-uri'
3
4
 
@@ -0,0 +1,17 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
4
+ module PageRankr
5
+ class Index
6
+ attr_reader :indexes
7
+ alias_method :tracked, :indexes
8
+
9
+ def initialize(site)
10
+ @indexes = clean Nokogiri::HTML(open url(site)).at(xpath).to_s
11
+ end
12
+
13
+ def clean(backlink_count)
14
+ backlink_count.gsub(/[a-zA-Z,\s\(\)]/, '').to_i
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,9 @@
1
+ require File.join(File.dirname(__FILE__), "index")
2
+ require File.join(File.dirname(__FILE__), "indexes", "bing")
3
+ require File.join(File.dirname(__FILE__), "indexes", "google")
4
+
5
+ module PageRankr
6
+ class Indexes < Tracker
7
+ alias_method :index_trackers, :site_trackers
8
+ end
9
+ end
@@ -0,0 +1,19 @@
1
+ require 'cgi'
2
+
3
+ module PageRankr
4
+ class Indexes < Tracker
5
+ class Bing < Index
6
+ def url(site)
7
+ "http://www.bing.com/search?q=site%3A#{CGI.escape(site)}"
8
+ end
9
+
10
+ def xpath
11
+ "//span[@class='sb_count']/text()"
12
+ end
13
+
14
+ def clean(backlink_count)
15
+ super(backlink_count.gsub('1-10', ''))
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ require 'cgi'
2
+ require 'json'
3
+
4
+ module PageRankr
5
+ class Indexes < Tracker
6
+ class Google < Index
7
+
8
+ # overloaded to use Google's AJAX search API
9
+ # http://code.google.com/apis/ajaxsearch/documentation/
10
+ def initialize(site)
11
+ @indexes = clean JSON.parse( open( url(site)).read )["responseData"]["cursor"]["estimatedResultCount"].to_s
12
+ end
13
+
14
+ def url(site)
15
+ "http://ajax.googleapis.com/ajax/services/search/web?v=1.0&rsz=1&q=site%3A#{CGI.escape(site)}"
16
+ end
17
+ end
18
+ end
19
+ end
@@ -1,6 +1,7 @@
1
1
  require File.join(File.dirname(__FILE__), "rank")
2
2
  require File.join(File.dirname(__FILE__), "ranks", "alexa")
3
3
  require File.join(File.dirname(__FILE__), "ranks", "google")
4
+ require File.join(File.dirname(__FILE__), "ranks", "compete")
4
5
 
5
6
  module PageRankr
6
7
  class Ranks < Tracker
@@ -0,0 +1,21 @@
1
+ require 'open-uri'
2
+ require 'cgi'
3
+ require 'nokogiri'
4
+
5
+ module PageRankr
6
+ class Ranks < Tracker
7
+ class Compete < Rank
8
+ def initialize(site)
9
+ @rank = Nokogiri::HTML(open(url(site))).search(xpath).to_s.gsub(',', '').to_i
10
+ end
11
+
12
+ def xpath
13
+ "//div[@id='rank']/div[@class='number value']/text()"
14
+ end
15
+
16
+ def url(site)
17
+ "http://siteanalytics.compete.com/#{CGI.escape(site)}/"
18
+ end
19
+ end
20
+ end
21
+ end
@@ -14,7 +14,7 @@ module PageRankr
14
14
  name, klass = tracker.to_s.capitalize, self.class
15
15
 
16
16
  next unless klass.const_defined? name
17
-
17
+
18
18
  tracked[tracker] = klass.const_get(name).new(site).tracked
19
19
  end
20
20
  tracked
@@ -1,3 +1,3 @@
1
1
  module PageRankr
2
- VERSION = "1.6.0"
2
+ VERSION = "1.7.0"
3
3
  end
@@ -88,4 +88,31 @@ describe PageRankr do
88
88
  end
89
89
  end
90
90
  end
91
+
92
+ describe "#index_trackers" do
93
+ subject{ PageRankr.index_trackers }
94
+
95
+ it{ should include(:google) }
96
+ it{ should include(:bing) }
97
+ end
98
+
99
+ describe "#indexes" do
100
+ describe "success" do
101
+ subject{ PageRankr.indexes("google.com") }
102
+
103
+ PageRankr.index_trackers.each do |tracker|
104
+ it{ should have_key(tracker) }
105
+ it{ subject[tracker].should >= 0 }
106
+ end
107
+ end
108
+
109
+ describe "failure" do
110
+ subject{ PageRankr.indexes("please-dont-register-a-site-that-breaks-this-test.com") }
111
+
112
+ PageRankr.index_trackers.each do |tracker|
113
+ it{ should have_key(tracker) }
114
+ it{ subject[tracker].should == 0 }
115
+ end
116
+ end
117
+ end
91
118
  end
metadata CHANGED
@@ -4,9 +4,9 @@ version: !ruby/object:Gem::Version
4
4
  prerelease: false
5
5
  segments:
6
6
  - 1
7
- - 6
7
+ - 7
8
8
  - 0
9
- version: 1.6.0
9
+ version: 1.7.0
10
10
  platform: ruby
11
11
  authors:
12
12
  - Allen Madsen
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2010-12-04 00:00:00 -05:00
17
+ date: 2011-02-01 00:00:00 -05:00
18
18
  default_executable:
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
@@ -120,9 +120,14 @@ files:
120
120
  - lib/page_rankr/backlinks/bing.rb
121
121
  - lib/page_rankr/backlinks/google.rb
122
122
  - lib/page_rankr/backlinks/yahoo.rb
123
+ - lib/page_rankr/index.rb
124
+ - lib/page_rankr/indexes.rb
125
+ - lib/page_rankr/indexes/bing.rb
126
+ - lib/page_rankr/indexes/google.rb
123
127
  - lib/page_rankr/rank.rb
124
128
  - lib/page_rankr/ranks.rb
125
129
  - lib/page_rankr/ranks/alexa.rb
130
+ - lib/page_rankr/ranks/compete.rb
126
131
  - lib/page_rankr/ranks/google.rb
127
132
  - lib/page_rankr/ranks/google/checksum.rb
128
133
  - lib/page_rankr/tracker.rb