metainspector 1.9.3 → 1.9.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -32,15 +32,15 @@ Then you can see the scraped data like this:
32
32
 
33
33
  page.url # URL of the page
34
34
  page.scheme # Scheme of the page (http, https)
35
+ page.host # Hostname of the page (like, w3clove.com, without the scheme)
36
+ page.root_url # Root url (scheme + host, like http://w3clove.com/)
35
37
  page.title # title of the page, as string
36
- page.links # array of strings, with every link found on the page
37
- page.absolute_links # array of all the links converted to absolute urls
38
+ page.links # array of strings, with every link found on the page as an absolute URL
38
39
  page.meta_description # meta description, as string
39
40
  page.description # returns the meta description, or the first long paragraph if no meta description is found
40
41
  page.meta_keywords # meta keywords, as string
41
42
  page.image # Most relevant image, if defined with og:image
42
- page.images # array of strings, with every img found on the page
43
- page.absolute_images # array of all the images converted to absolute urls
43
+ page.images # array of strings, with every img found on the page as an absolute URL
44
44
  page.feed # Get rss or atom links in meta data fields as array
45
45
  page.meta_og_title # opengraph title
46
46
  page.meta_og_image # opengraph image
@@ -9,13 +9,15 @@ require 'timeout'
9
9
  # MetaInspector provides an easy way to scrape web pages and get its elements
10
10
  module MetaInspector
11
11
  class Scraper
12
- attr_reader :url, :scheme, :errors
12
+ attr_reader :url, :scheme, :host, :root_url, :errors
13
13
  # Initializes a new instance of MetaInspector, setting the URL to the one given
14
14
  # If no scheme given, set it to http:// by default
15
15
 
16
16
  def initialize(url, timeout = 20)
17
17
  @url = URI.parse(url).scheme.nil? ? 'http://' + url : url
18
18
  @scheme = URI.parse(url).scheme || 'http'
19
+ @host = URI.parse(url).host
20
+ @root_url = "#{@scheme}://#{@host}/"
19
21
  @timeout = timeout
20
22
  @data = Hashie::Rash.new('url' => @url)
21
23
  @errors = []
@@ -33,26 +35,24 @@ module MetaInspector
33
35
  meta_description.nil? ? secondary_description : meta_description
34
36
  end
35
37
 
36
- # Returns the parsed document links
38
+ # Links found on the page, as absolute URLs
37
39
  def links
38
- @data.links ||= parsed_document.search("//a") \
39
- .map {|link| link.attributes["href"] \
40
- .to_s.strip}.uniq rescue nil
40
+ @data.links ||= parsed_links.map { |l| absolutify_url(unrelativize_url(l)) }
41
41
  end
42
42
 
43
- def images
44
- @data.images ||= parsed_document.search('//img') \
45
- .reject{|i| i.attributes['src'].blank? } \
46
- .map{ |i| i.attributes['src'].value }.uniq
43
+ def absolute_links
44
+ warn "absolute_links is deprecated since 1.9.4 and will be removed, use links instead"
45
+ links
47
46
  end
48
47
 
49
- # Returns the links converted to absolute urls
50
- def absolute_links
51
- @data.absolute_links ||= links.map { |l| absolutify_url(unrelativize_url(l)) }
48
+ # Images found on the page, as absolute URLs
49
+ def images
50
+ @data.images ||= parsed_images.map{ |i| absolutify_url(i) }
52
51
  end
53
52
 
54
53
  def absolute_images
55
- @data.absolute_images ||= images.map{ |i| absolutify_url(i) }
54
+ warn "absolute_images is deprecated since 1.9.4 and will be removed, use images instead"
55
+ images
56
56
  end
57
57
 
58
58
  # Returns the parsed document meta rss links
@@ -81,7 +81,7 @@ module MetaInspector
81
81
  # Returns all parsed data as a nested Hash
82
82
  def to_hash
83
83
  # TODO: find a better option to populate the data to the Hash
84
- image;feed;links;charset;absolute_links;title;meta_keywords
84
+ image;images;feed;links;charset;title;meta_keywords
85
85
  @data.to_hash
86
86
  end
87
87
 
@@ -146,6 +146,18 @@ module MetaInspector
146
146
 
147
147
  private
148
148
 
149
+ def parsed_links
150
+ @parsed_links ||= parsed_document.search("//a") \
151
+ .map {|link| link.attributes["href"] \
152
+ .to_s.strip}.uniq rescue nil
153
+ end
154
+
155
+ def parsed_images
156
+ @parsed_images ||= parsed_document.search('//img') \
157
+ .reject{|i| i.attributes['src'].blank? } \
158
+ .map{ |i| i.attributes['src'].value }.uniq
159
+ end
160
+
149
161
  # Stores the error for later inspection
150
162
  def add_fatal_error(error)
151
163
  @errors << error
@@ -154,7 +166,15 @@ module MetaInspector
154
166
  # Convert a relative url like "/users" to an absolute one like "http://example.com/users"
155
167
  # Respecting already absolute URLs like the ones starting with http:, ftp:, telnet:, mailto:, javascript: ...
156
168
  def absolutify_url(url)
157
- url =~ /^\w*\:/i ? url : File.join(@url,url)
169
+ if url =~ /^\w*\:/i
170
+ url
171
+ else
172
+ if url[0] == "/"
173
+ File.join(@root_url, url)
174
+ else
175
+ File.join(@url, url)
176
+ end
177
+ end
158
178
  end
159
179
 
160
180
  # Convert a protocol-relative url to its full form, depending on the scheme of the page that contains it
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.9.3"
4
+ VERSION = "1.9.4"
5
5
  end
@@ -0,0 +1,266 @@
1
+ HTTP/1.1 200 OK
2
+ Server: nginx
3
+ Date: Mon, 23 Jul 2012 08:44:12 GMT
4
+ Content-Type: text/html; charset=utf-8
5
+ Connection: keep-alive
6
+ Status: 200 OK
7
+ X-Ua-Compatible: IE=Edge,chrome=1
8
+ Etag: "c4f3d4aaf12acce6a909714618e08934"
9
+ Cache-Control: max-age=0, private, must-revalidate
10
+ Set-Cookie: _w3clovesite_session=BAh7B0kiD3Nlc3Npb25faWQGOgZFRkkiJTJiMWU0NzVkNjJjNDliMDRlZGI3MjI5OTVlN2U4MjU5BjsAVEkiEF9jc3JmX3Rva2VuBjsARkkiMTlWUmVSMEVlTWNuV0t4cTFuNHUvQVozZCttMjhxRTEvWFhYYW5hOXRFdUk9BjsARg%3D%3D--7e9d3e900c9531363297f469f8baa3e3ed31336a; path=/; HttpOnly
11
+ X-Request-Id: 33ca78a4044d244e673d273a59fa4ebc
12
+ X-Runtime: 0.017688
13
+ X-Rack-Cache: miss
14
+ Content-Length: 12923
15
+ X-Varnish: 647613022
16
+ Age: 0
17
+ Via: 1.1 varnish
18
+
19
+ <!DOCTYPE html>
20
+ <html>
21
+
22
+ <head><script type="text/javascript">var NREUMQ=NREUMQ||[];NREUMQ.push(["mark","firstbyte",new Date().getTime()]);</script>
23
+ <meta charset="UTF-8" />
24
+ <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1">
25
+ <title>Whole site HTML validator | W3CLove</title>
26
+ <link rel="shortcut icon" type="image/x-icon" href="/favicon.ico" />
27
+ <meta name="description" content="Site-wide markup validation tool. Validate the markup of your whole site with just one click." />
28
+ <meta name="keywords" content="html, markup, validation, validator, tool, w3c, development, standards, free" />
29
+ <link href="http://cdn-images.mailchimp.com/embedcode/slim-081711.css" rel="stylesheet" type="text/css">
30
+ <link href="http://fonts.googleapis.com/css?family=Terminal+Dosis:400,600" rel="stylesheet" type="text/css" />
31
+ <link href="/assets/application-9da2f67bc1bc6e19a801cb7685a0b497.css" media="screen" rel="stylesheet" type="text/css" />
32
+ <meta content="authenticity_token" name="csrf-param" />
33
+ <meta content="9VReR0EeMcnWKxq1n4u/AZ3d+m28qE1/XXXana9tEuI=" name="csrf-token" />
34
+ <script src="/assets/application-4e8aa1a929a0aeab6bdf339edecbeaa6.js" type="text/javascript"></script>
35
+ <script src="/assets/pages-7270767b2a9e9fff880aa5de378ca791.js" type="text/javascript"></script>
36
+ <script src="https://apis.google.com/js/plusone.js" type="text/javascript"></script>
37
+
38
+ <script type="text/javascript">
39
+
40
+ var _gaq = _gaq || [];
41
+ _gaq.push(['_setAccount', 'UA-122379-37']);
42
+ _gaq.push(['_trackPageview']);
43
+
44
+ (function() {
45
+ var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
46
+ ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
47
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
48
+ })();
49
+
50
+ </script>
51
+ </head>
52
+
53
+ <body>
54
+
55
+ <div id="flash_message"><div><span id="flash_message_text"></span><a class="close-message" href="#" onclick="closeMsgBar();">X</a></div></div>
56
+
57
+
58
+
59
+ <div class="row top-bar">
60
+ <div class="content">
61
+ <header>
62
+ <div class="header">
63
+ <h1><a href="/" title="W3CLove">W3CLove</a></h1>
64
+ <nav>
65
+ <a href="#" class="nav_button"></a>
66
+ <ul class="nav">
67
+ <li><a href="/faqs" class='active'>FAQs</a></li>
68
+ <li><a href="/plans-and-pricing" >Plans and pricing</a></li>
69
+ <li><a href="/contact" >Contact</a></li>
70
+ <li><a href="/charts/errors" >Top 100 Errors</a></li>
71
+ </ul>
72
+ </nav>
73
+
74
+
75
+ <div id="sign_in">
76
+ <a href="/credits" class="label warning" id="credits_count">10 Credits</a>
77
+
78
+ <a href="/signin" class="label success">Sign in</a>
79
+ </div>
80
+ </div>
81
+ </header>
82
+ </div>
83
+ </div>
84
+
85
+ <div id="faqs" class="row hero">
86
+
87
+ <div class="content">
88
+
89
+ <h2>FAQs</h2>
90
+ <h3>Frequently Asked Questions</h3>
91
+
92
+ </div>
93
+
94
+ </div>
95
+
96
+
97
+ <div class="row description">
98
+
99
+ <div class="content">
100
+
101
+ <h4 id="what_is_w3clove">What is W3CLove?</h4>
102
+ <p>
103
+ W3CLove is a site-wide markup validation tool. It allows you to check the validity of the markup of several pages
104
+ from your website, and gives you a summary of the most common errors and warnings, with a single click.
105
+ </p>
106
+
107
+ <h4 id="why_not_just_use_the_official_w3c_validator">Why not just use the official W3C validator?</h4>
108
+ <p>
109
+ The <a href="http://validator.w3.org">official W3C validator</a> does not yet provide a way to submit several URLs at once. So, if you want to check your whole
110
+ website, you need to submit each of your URLs individually, which is a slow process. W3CLove provides a simpler, faster way to
111
+ submit several pages at once.
112
+ </p>
113
+
114
+ <h4 id="how_can_i_submit_my_site_for_validation">How can I submit a site for validation?</h4>
115
+ <p>
116
+ To submit a site, just enter its URL on the <a href="/">front page</a>, and click the "Validate" button. The W3CLove spider will crawl the site
117
+ in search for internal links, validate each of them, and then compile all errors and warnings in one summary.
118
+ </p>
119
+
120
+ <h4 id="how_can_i_specify_the_exact_urls_i_want_to_validate">How can I specify the exact URLs I want to validate?</h4>
121
+ <p>
122
+ The W3CLove spider will crawl the provided URL in search for internal links, but you can also provide an <a href="http://www.sitemaps.org/">XML sitemap</a> with the exact URLs
123
+ you need to validate.
124
+ </p>
125
+
126
+ <h4 id="is_there_a_limit_on_the_number_of_urls">Is there a limit on the number of URLs to validate?</h4>
127
+ <p>
128
+ Yes, there is a limit of 250 URLs per each sitemap submitted. This should be enough for most sites to get a good idea of the
129
+ validation status of the site, and saves processing time for both W3CLove and the W3C validator.
130
+ </p>
131
+
132
+ <h4 id="how_can_i_resubmit_a_site">How can I resubmit a site?</h4>
133
+ <p>
134
+ Just click on the "Re-check" buttons. You can recheck the whole sitemap or individual pages.
135
+ </p>
136
+
137
+ <h4 id="can_i_store_my_sitemaps_list">Can I store my sitemaps list?</h4>
138
+ <p>
139
+ Yes, W3CLove lets you store for free a list of the sitemaps you're interested in validating. Just sign in with your Twitter, Facebook or Google account and every sitemap you validate will appear on your sitemaps list.
140
+ </p>
141
+
142
+ <h4 id="how_do_credits_work">How do credits work?</h4>
143
+ <p>
144
+ When you sign up for the first time at W3CLove, you're given 100 initial credits so you can try the service for free.
145
+ </p>
146
+ <p>
147
+ For every single web page validation that you make using our service, you're charged 1 credit. So, for example, if you start with 100 credits and you validate a site that has 30 web pages, you end up with 70 credits.
148
+ </p>
149
+ <p>
150
+ Once you spend all your credits, you can't make more validations until you recharge them.
151
+ </p>
152
+
153
+ <h4 id="how_can_i_recharge_my_credits">How can I recharge my credits?</h4>
154
+ <p>
155
+ The easiest way to recharge your credits is through a monthly subscription.
156
+ </p>
157
+ <p>
158
+ This way, your credits will be recharged every month up to the monthly limit of your chosen plan.
159
+ </p>
160
+ <p>
161
+ Check out the <a href="/plans-and-pricing">Plans and pricing</a> page to see what plan is best for you. If you're not sure about how many validations you need, you can buy packs of validations.
162
+ </p>
163
+
164
+ <h4 id="how_can_i_sign_in_with_another_account">I've signed in from one account, how can I sign in with a different one?</h4>
165
+ <p>
166
+ For your convenience, you're first shown 3 ways to sign in: Twitter, Facebook and Google. When you use one of those, W3CLove will remember your preference and offer just this one.
167
+ </p>
168
+ <p>
169
+ If you'd like to change this preference, just sign in again with your preferred account:<br/>
170
+ <a href='#' onclick='window.location="/auth/twitter"; return false;' style='color:white;'><span class='label success'>twitter</span></a>, <a href='#' onclick='window.location="/auth/facebook"; return false;' style='color:white;'><span class='label success'>facebook</span></a> or <a href='#' onclick='window.location="/auth/google_oauth2"; return false;' style='color:white;'><span class='label success'>google</span></a>.
171
+ </p>
172
+
173
+ <h4 id="who_is_behind_all_this">Who is behind all this?</h4>
174
+ <p>
175
+ W3CLove is a personal project maintained by <a href="http://jaimeiniesta.com/">Jaime Iniesta</a>, an independent web developer who loves working with Ruby on Rails. That's me. :)
176
+ </p>
177
+
178
+ <h4 id="how_did_this_project_start">How did this project start?</h4>
179
+ <p>
180
+ During March 2011 I took the Ruby Core Skills course at the <a href="http://mendicantuniversity.org/">Mendicant University</a>, an intense three week course that takes you through several important topic areas every Ruby developer should be comfortable on. You can <a href="http://jaimeiniesta.posterous.com/rbmu-a-better-way-to-learn-ruby">read more</a> about it at my blog.
181
+ </p>
182
+ <p>
183
+ With the help of <a href="http://majesticseacreature.com/">Gregory Brown</a> and the rest of the <a href="http://school.mendicantuniversity.org/alumni/2011">Mendicant University Alumni</a>, I built the <a href="https://github.com/jaimeiniesta/w3clove">w3clove</a> gem that allows you to do site-wide markup validation from the command line.
184
+ </p>
185
+ <p>
186
+ Afterwards, I built this <a href="http://w3clove.com">W3CLove.com</a> site to make it easier for everyone to do site-wide markup validation, with a nicer HTML interface, storing the results for later, rechecking, etc.
187
+ </p>
188
+ <p>
189
+ I want to express my gratitude to all the Mendicant University community, all of them are still helping me making W3CLove a better tool for everyone. Thank you!
190
+ </p>
191
+
192
+ <h4 id="is_this_free">Is this free?</h4>
193
+ <p>
194
+ No, this is a paid service, but you can try it for free.
195
+ </p>
196
+
197
+ <h4 id="is_there_an_open_source_version">Is there an open source version?</h4>
198
+ <p>
199
+ Yes! There's a free, standalone version that you can install on your computer. It's packed as a Ruby gem and it's open source, so you can examine the code and contribute to it if you wish.
200
+ </p>
201
+ <p>
202
+ You can find the <a href="https://github.com/jaimeiniesta/w3clove">w3clove gem at Github</a>.
203
+ </p>
204
+
205
+ <h4 id="is_there_an_api">Is there an API?</h4>
206
+ <p>
207
+ Yes! I've started building an API. It's not finished yet, but you can already validate sitemaps and pages with it. Read more about it at the <a href="/api_v1_reference">API V1 Reference</a> page.
208
+ </p>
209
+
210
+ </div>
211
+ </div>
212
+
213
+
214
+ <div class="row footer">
215
+ <div class="content">
216
+ <footer>
217
+ <p>
218
+ <!-- Begin MailChimp Signup Form -->
219
+ <div id="mc_embed_signup">
220
+ <form action="http://w3clove.us4.list-manage.com/subscribe/post?u=6af3ab69c286561d0f0f25671&amp;id=04a0dab609" method="post" id="mc-embedded-subscribe-form" name="mc-embedded-subscribe-form" class="validate shadowins">
221
+ <label for="mce-EMAIL">Subscribe to our newsletter:</label>
222
+ <input type="email" value="your email" name="EMAIL" class="email" id="mce-EMAIL" placeholder="your email" onfocus="this.value='';" required>
223
+ <div class="clear"><input type="submit" value="♥" name="subscribe" id="mc-embedded-subscribe" class="button btn"></div>
224
+ </form>
225
+ </div>
226
+ <!--End mc_embed_signup-->
227
+
228
+ <ul class="social_share">
229
+ <li class="twitter_follow"><a href="https://twitter.com/w3clove" class="twitter-follow-button" data-button="grey" data-text-color="#FFFFFF" data-link-color="#999999" data-show-count="false">Follow</a></li>
230
+ <li class="tweets_count"><a href="http://twitter.com/share" style="display:block;" class="twitter-share-button" data-count="horizontal" data-via="w3clove" data-lang="en">Tweet</a><script type="text/javascript" src="http://platform.twitter.com/widgets.js"></script></li>
231
+ <li class="gplus_count"><div class="g-plusone" data-size="medium" data-count="true"></div></li>
232
+ </ul>
233
+ </p>
234
+ <p class="clearb"><strong>W3CLove</strong> lets you <strong>validate entire sites</strong> with one click. This is an independent project, not associated with the W3C. By making use of this website you agree to the <a href="/terms_of_service">Terms of service</a>.<br /><br />Follow us on <a href="http://twitter.com/W3CLove">Twitter</a> and <a href="http://us4.campaign-archive1.com/home/?u=6af3ab69c286561d0f0f25671&id=04a0dab609">subscribe to our monthly newsletter</a>.</p>
235
+ </footer>
236
+ </div>
237
+ </div>
238
+
239
+ <script type="text/javascript">
240
+ var uvOptions = {};
241
+ (function() {
242
+ var uv = document.createElement('script'); uv.type = 'text/javascript'; uv.async = true;
243
+ uv.src = ('https:' == document.location.protocol ? 'https://' : 'http://') + 'widget.uservoice.com/nhy6YD24GjgADgFX3h5z4w.js';
244
+ var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(uv, s);
245
+ })();
246
+ </script>
247
+ <script type="text/javascript">
248
+ //<![CDATA[
249
+
250
+ var menuVisible = false;
251
+ $('.nav_button').on('click', showHideMenu);
252
+
253
+ //]]>
254
+ </script>
255
+
256
+ <script type="text/javascript">if (!NREUMQ.f) { NREUMQ.f=function() {
257
+ NREUMQ.push(["load",new Date().getTime()]);
258
+ var e=document.createElement("script");
259
+ e.type="text/javascript";e.async=true;e.src="https://d1ros97qkrwjf5.cloudfront.net/39/eum/rum.js";
260
+ document.body.appendChild(e);
261
+ if(NREUMQ.a)NREUMQ.a();
262
+ };
263
+ NREUMQ.a=window.onload;window.onload=NREUMQ.f;
264
+ };
265
+ NREUMQ.push(["nrfj","beacon-1.newrelic.com","96fc3f1db6",415027,"c1hbQUcNWlhQQhsNWVdfakNaDkJVUlUbFVFXUkYaRgpZQw==",0.0,14,new Date().getTime(),"","","","",""])</script></body>
266
+ </html>
@@ -14,6 +14,7 @@ describe MetaInspector do
14
14
  FakeWeb.register_uri(:get, "https://protocol-relative.com", :response => fixture_file("protocol_relative.response"))
15
15
  FakeWeb.register_uri(:get, "http://example.com/nonhttp", :response => fixture_file("nonhttp.response"))
16
16
  FakeWeb.register_uri(:get, "http://www.youtube.com/watch?v=iaGSSrp49uc", :response => fixture_file("youtube.response"))
17
+ FakeWeb.register_uri(:get, "http://w3clove.com/faqs", :response => fixture_file("w3clove_faqs.response"))
17
18
 
18
19
  describe 'Initialization' do
19
20
  it 'should accept an URL with a scheme' do
@@ -30,6 +31,16 @@ describe MetaInspector do
30
31
  MetaInspector.new('http://pagerankalert.com').scheme.should == 'http'
31
32
  MetaInspector.new('https://pagerankalert.com').scheme.should == 'https'
32
33
  end
34
+
35
+ it "should store the host" do
36
+ MetaInspector.new('http://pagerankalert.com').host.should == 'pagerankalert.com'
37
+ MetaInspector.new('https://pagerankalert.com').host.should == 'pagerankalert.com'
38
+ end
39
+
40
+ it "should store the root url" do
41
+ MetaInspector.new('http://pagerankalert.com').root_url.should == 'http://pagerankalert.com/'
42
+ MetaInspector.new('https://pagerankalert.com').root_url.should == 'https://pagerankalert.com/'
43
+ end
33
44
  end
34
45
 
35
46
  describe 'Doing a basic scrape' do
@@ -54,8 +65,7 @@ describe MetaInspector do
54
65
  end
55
66
 
56
67
  it "should find all page images" do
57
- @m.absolute_images == ["http://pagerankalert.com/images/pagerank_alert.png?1309512337"]
58
- @m.images == ["/images/pagerank_alert.png?1309512337"]
68
+ @m.images == ["http://pagerankalert.com/images/pagerank_alert.png?1309512337"]
59
69
  end
60
70
 
61
71
  it "should ignore malformed image tags" do
@@ -97,29 +107,41 @@ describe MetaInspector do
97
107
  end
98
108
 
99
109
  it "should get the links" do
100
- @m.links.should == [
101
- "/",
102
- "/es?language=es",
103
- "/users/sign_up",
104
- "/users/sign_in",
105
- "mailto:pagerankalert@gmail.com",
106
- "http://pagerankalert.posterous.com",
107
- "http://twitter.com/pagerankalert",
108
- "http://twitter.com/share"
109
- ]
110
- end
111
-
112
- it "should convert links to absolute urls" do
113
- @m.absolute_links.should == [
114
- "http://pagerankalert.com/",
115
- "http://pagerankalert.com/es?language=es",
116
- "http://pagerankalert.com/users/sign_up",
117
- "http://pagerankalert.com/users/sign_in",
118
- "mailto:pagerankalert@gmail.com",
119
- "http://pagerankalert.posterous.com",
120
- "http://twitter.com/pagerankalert",
121
- "http://twitter.com/share"
122
- ]
110
+ @m.links.should == [ "http://pagerankalert.com/",
111
+ "http://pagerankalert.com/es?language=es",
112
+ "http://pagerankalert.com/users/sign_up",
113
+ "http://pagerankalert.com/users/sign_in",
114
+ "mailto:pagerankalert@gmail.com",
115
+ "http://pagerankalert.posterous.com",
116
+ "http://twitter.com/pagerankalert",
117
+ "http://twitter.com/share" ]
118
+ end
119
+
120
+ it "should get correct absolute links for internal pages" do
121
+ m = MetaInspector.new('http://w3clove.com/faqs')
122
+ m.links.should == [ "http://w3clove.com/faqs/#",
123
+ "http://w3clove.com/",
124
+ "http://w3clove.com/faqs",
125
+ "http://w3clove.com/plans-and-pricing",
126
+ "http://w3clove.com/contact",
127
+ "http://w3clove.com/charts/errors",
128
+ "http://w3clove.com/credits",
129
+ "http://w3clove.com/signin",
130
+ "http://validator.w3.org",
131
+ "http://www.sitemaps.org/",
132
+ "http://jaimeiniesta.com/",
133
+ "http://mendicantuniversity.org/",
134
+ "http://jaimeiniesta.posterous.com/rbmu-a-better-way-to-learn-ruby",
135
+ "http://majesticseacreature.com/",
136
+ "http://school.mendicantuniversity.org/alumni/2011",
137
+ "https://github.com/jaimeiniesta/w3clove",
138
+ "http://w3clove.com",
139
+ "http://w3clove.com/api_v1_reference",
140
+ "https://twitter.com/w3clove",
141
+ "http://twitter.com/share",
142
+ "http://w3clove.com/terms_of_service",
143
+ "http://twitter.com/W3CLove",
144
+ "http://us4.campaign-archive1.com/home/?u=6af3ab69c286561d0f0f25671&id=04a0dab609" ]
123
145
  end
124
146
  end
125
147
 
@@ -138,10 +160,6 @@ describe MetaInspector do
138
160
  "telnet://telnet.cdrom.com"
139
161
  ]
140
162
  end
141
-
142
- it "should return the same links as absolute links do" do
143
- @m.absolute_links.should == @m.links
144
- end
145
163
  end
146
164
 
147
165
  describe 'Protocol-relative URLs' do
@@ -151,13 +169,13 @@ describe MetaInspector do
151
169
  end
152
170
 
153
171
  it "should convert protocol-relative links to http" do
154
- @m_http.absolute_links.should include('http://protocol-relative.com/contact')
155
- @m_http.absolute_links.should include('http://yahoo.com')
172
+ @m_http.links.should include('http://protocol-relative.com/contact')
173
+ @m_http.links.should include('http://yahoo.com')
156
174
  end
157
175
 
158
176
  it "should convert protocol-relative links to https" do
159
- @m_https.absolute_links.should include('https://protocol-relative.com/contact')
160
- @m_https.absolute_links.should include('https://yahoo.com')
177
+ @m_https.links.should include('https://protocol-relative.com/contact')
178
+ @m_https.links.should include('https://yahoo.com')
161
179
  end
162
180
  end
163
181
 
@@ -227,7 +245,7 @@ describe MetaInspector do
227
245
  describe 'to_hash' do
228
246
  it "should return a hash with all the values set" do
229
247
  @m = MetaInspector.new('http://pagerankalert.com')
230
- @m.to_hash.should == {"title"=>"PageRankAlert.com :: Track your PageRank changes", "url"=>"http://pagerankalert.com", "meta"=>{"name"=>{"robots"=>"all,follow", "csrf_param"=>"authenticity_token", "description"=>"Track your PageRank(TM) changes and receive alerts by email", "keywords"=>"pagerank, seo, optimization, google", "csrf_token"=>"iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE="}, "property"=>{}}, "links"=>["/", "/es?language=es", "/users/sign_up", "/users/sign_in", "mailto:pagerankalert@gmail.com", "http://pagerankalert.posterous.com", "http://twitter.com/pagerankalert", "http://twitter.com/share"], "charset"=>"utf-8", "feed"=>"http://feeds.feedburner.com/PageRankAlert", "absolute_links"=>["http://pagerankalert.com/", "http://pagerankalert.com/es?language=es", "http://pagerankalert.com/users/sign_up", "http://pagerankalert.com/users/sign_in", "mailto:pagerankalert@gmail.com", "http://pagerankalert.posterous.com", "http://twitter.com/pagerankalert", "http://twitter.com/share"]}
248
+ @m.to_hash.should == {"title"=>"PageRankAlert.com :: Track your PageRank changes", "url"=>"http://pagerankalert.com", "meta"=>{"name"=>{"robots"=>"all,follow", "csrf_param"=>"authenticity_token", "description"=>"Track your PageRank(TM) changes and receive alerts by email", "keywords"=>"pagerank, seo, optimization, google", "csrf_token"=>"iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE="}, "property"=>{}}, "images"=>["http://pagerankalert.com/images/pagerank_alert.png?1305794559"], "charset"=>"utf-8", "feed"=>"http://feeds.feedburner.com/PageRankAlert", "links"=>["http://pagerankalert.com/", "http://pagerankalert.com/es?language=es", "http://pagerankalert.com/users/sign_up", "http://pagerankalert.com/users/sign_in", "mailto:pagerankalert@gmail.com", "http://pagerankalert.posterous.com", "http://twitter.com/pagerankalert", "http://twitter.com/share"]}
231
249
  end
232
250
  end
233
251
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- hash: 53
4
+ hash: 59
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
8
  - 9
9
- - 3
10
- version: 1.9.3
9
+ - 4
10
+ version: 1.9.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jaime Iniesta
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-07-22 00:00:00 Z
18
+ date: 2012-07-23 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: nokogiri
@@ -162,6 +162,7 @@ files:
162
162
  - spec/fixtures/tea-tron.com.response
163
163
  - spec/fixtures/theonion-no-description.com.response
164
164
  - spec/fixtures/theonion.com.response
165
+ - spec/fixtures/w3clove_faqs.response
165
166
  - spec/fixtures/youtube.response
166
167
  - spec/metainspector_spec.rb
167
168
  - spec/spec_helper.rb