RubyGems - metainspector - Versions diffs - 3.3.0 → 4.0.0.rc1 - Mend

metainspector 3.3.0 → 4.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

checksums.yaml +4 -4
data/Guardfile +5 -0
data/README.md +26 -8
data/lib/meta_inspector/document.rb +4 -8
data/lib/meta_inspector/exception_log.rb +0 -2
data/lib/meta_inspector/exceptionable.rb +0 -2
data/lib/meta_inspector/parser.rb +17 -162
data/lib/meta_inspector/parsers/base.rb +30 -0
data/lib/meta_inspector/parsers/images.rb +45 -0
data/lib/meta_inspector/parsers/links.rb +69 -0
data/lib/meta_inspector/parsers/meta_tags.rb +72 -0
data/lib/meta_inspector/parsers/texts.rb +27 -0
data/lib/meta_inspector/request.rb +0 -2
data/lib/meta_inspector/url.rb +0 -2
data/lib/meta_inspector/version.rb +1 -3
data/lib/meta_inspector.rb +5 -2
data/lib/metainspector.rb +0 -2
data/meta_inspector.gemspec +2 -1
data/spec/document_spec.rb +16 -26
data/spec/exception_log_spec.rb +1 -3
data/spec/fixtures/example.response +17 -0
data/spec/meta_inspector/images_spec.rb +111 -0
data/spec/meta_inspector/links_spec.rb +203 -0
data/spec/{meta_inspector_spec.rb → meta_inspector/meta_inspector_spec.rb} +1 -3
data/spec/meta_inspector/meta_tags_spec.rb +108 -0
data/spec/meta_inspector/redirections_spec.rb +48 -0
data/spec/meta_inspector/texts_spec.rb +22 -0
data/spec/parser_spec.rb +7 -393
data/spec/request_spec.rb +1 -3
data/spec/spec_helper.rb +0 -2
data/spec/url_spec.rb +1 -3
metadata +44 -6
data/spec/redirections_spec.rb +0 -47

data/spec/document_spec.rb CHANGED Viewed

@@ -1,19 +1,15 @@
-# -*- encoding: utf-8 -*-
-require File.join(File.dirname(__FILE__), "/spec_helper")
+require 'spec_helper'
 describe MetaInspector::Document do
   describe 'passing the contents of the document as html' do
-    before(:each) do
-      @m = MetaInspector::Document.new('http://cnn.com/', :document => "<html><head><title>Hello From Passed Html</title><a href='/hello'>Hello link</a></head><body></body></html>")
-    end
+    let(:doc) { MetaInspector::Document.new('http://cnn.com/', :document => "<html><head><title>Hello From Passed Html</title><a href='/hello'>Hello link</a></head><body></body></html>") }
     it "should get correct links when the url html is passed as an option" do
-      @m.links.should == ["http://cnn.com/hello"]
+      doc.links.internal.should == ["http://cnn.com/hello"]
     end
     it "should get the title" do
-      @m.title.should == "Hello From Passed Html"
+      doc.title.should == "Hello From Passed Html"
     end
   end
@@ -22,27 +18,21 @@ describe MetaInspector::Document do
   end
   it "should return a Hash with all the values set" do
-    @m = MetaInspector::Document.new('http://pagerankalert.com')
-    @m.to_hash.should == {
+    doc = MetaInspector::Document.new('http://pagerankalert.com')
+    doc.to_hash.should == {
                             "url"             => "http://pagerankalert.com/",
                             "title"           => "PageRankAlert.com :: Track your PageRank changes & receive alerts",
                             "favicon"         => "http://pagerankalert.com/src/favicon.ico",
-                            "links"           => ["http://pagerankalert.com/",
-                                                  "http://pagerankalert.com/es?language=es",
-                                                  "http://pagerankalert.com/users/sign_up",
-                                                  "http://pagerankalert.com/users/sign_in",
-                                                  "mailto:pagerankalert@gmail.com",
-                                                  "http://pagerankalert.posterous.com/",
-                                                  "http://twitter.com/pagerankalert",
-                                                  "http://twitter.com/share"],
-                            "internal_links"  => ["http://pagerankalert.com/",
-                                                  "http://pagerankalert.com/es?language=es",
-                                                  "http://pagerankalert.com/users/sign_up",
-                                                  "http://pagerankalert.com/users/sign_in"],
-                            "external_links"  => ["mailto:pagerankalert@gmail.com",
-                                                  "http://pagerankalert.posterous.com/",
-                                                  "http://twitter.com/pagerankalert",
-                                                  "http://twitter.com/share"],
+                            "links"           => {
+                                                    'internal' => ["http://pagerankalert.com/",
+                                                                   "http://pagerankalert.com/es?language=es",
+                                                                   "http://pagerankalert.com/users/sign_up",
+                                                                   "http://pagerankalert.com/users/sign_in"],
+                                                    'external' => ["http://pagerankalert.posterous.com/",
+                                                                   "http://twitter.com/pagerankalert",
+                                                                   "http://twitter.com/share"],
+                                                    'non_http' => ["mailto:pagerankalert@gmail.com"]
+                                                  },
                             "images"          => ["http://pagerankalert.com/images/pagerank_alert.png?1305794559"],
                             "charset"         => "utf-8",
                             "feed"            => "http://feeds.feedburner.com/PageRankAlert",

data/spec/exception_log_spec.rb CHANGED Viewed

@@ -1,6 +1,4 @@
-# -*- encoding: utf-8 -*-
-require File.join(File.dirname(__FILE__), "/spec_helper")
+require 'spec_helper'
 describe MetaInspector::ExceptionLog do

data/spec/fixtures/example.response CHANGED Viewed

@@ -22,5 +22,22 @@ Via: 1.1 varnish
         <rect x="10" y="10" width="200" height="50" style="fill:none; stroke:blue; stroke-width:1px"/>
       </g>
     </svg>
+    <!-- Internal relative links -->
+    <a href="/">Root</a>
+    <a href="/faqs">FAQs</a>
+    <a href="contact">Contact</a>
+    <!-- Internal absolute links -->
+    <a href="http://example.com/team.html">Team</a>
+    <!-- External links -->
+    <a href="https://twitter.com">Twitter</a>
+    <a href="https://github.com">Github</a>
+    <!-- Non-HTTP links -->
+    <a href="mailto:hello@example.com">email</a>
+    <a href="javascript:alert('hi');">hello</a>
+    <a href="ftp://ftp.example.com">FTP</a>
   </body>
 </html>

data/spec/meta_inspector/images_spec.rb ADDED Viewed

@@ -0,0 +1,111 @@
+require 'spec_helper'
+describe MetaInspector do
+  describe "#images" do
+    describe "returns an Enumerable" do
+      let(:page) { MetaInspector.new('https://twitter.com/markupvalidator') }
+      it "has a length" do
+        page.images.length.should == 6
+      end
+      it "has a size" do
+        page.images.size.should == 6
+      end
+      it "can be iterated" do
+        c = []
+        page.images.each {|i| c << i}
+        c.length.should == 6
+      end
+      it "can be sorted" do
+        page.images.sort
+          .should == ["https://si0.twimg.com/sticky/default_profile_images/default_profile_6_mini.png",
+                      "https://twimg0-a.akamaihd.net/a/1342841381/images/bigger_spinner.gif",
+                      "https://twimg0-a.akamaihd.net/profile_images/1538528659/jaime_nov_08_normal.jpg",
+                      "https://twimg0-a.akamaihd.net/profile_images/2293774732/v0pgo4xpdd9rou2xq5h0_normal.png",
+                      "https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_normal.png",
+                      "https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_reasonably_small.png"]
+      end
+    end
+    it "should find all page images" do
+      page = MetaInspector.new('http://pagerankalert.com')
+      page.images.to_a.should == ["http://pagerankalert.com/images/pagerank_alert.png?1305794559"]
+    end
+    it "should find images on twitter" do
+      page = MetaInspector.new('https://twitter.com/markupvalidator')
+      page.images.length.should == 6
+      page.images.to_a.should == ["https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_reasonably_small.png",
+                             "https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_normal.png",
+                             "https://twimg0-a.akamaihd.net/profile_images/2293774732/v0pgo4xpdd9rou2xq5h0_normal.png",
+                             "https://twimg0-a.akamaihd.net/profile_images/1538528659/jaime_nov_08_normal.jpg",
+                             "https://si0.twimg.com/sticky/default_profile_images/default_profile_6_mini.png",
+                             "https://twimg0-a.akamaihd.net/a/1342841381/images/bigger_spinner.gif"]
+    end
+    it "should ignore malformed image tags" do
+      # There is an image tag without a source. The scraper should not fatal.
+      page = MetaInspector.new("http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups")
+      page.images.size.should == 11
+    end
+  end
+  describe "#image" do
+    it "should find the og image" do
+      page = MetaInspector.new('http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
+      page.images.best.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
+    end
+    it "should find image on youtube" do
+      page = MetaInspector.new('http://www.youtube.com/watch?v=iaGSSrp49uc')
+      page.images.best.should == "http://i2.ytimg.com/vi/iaGSSrp49uc/mqdefault.jpg"
+    end
+    it "should find image when og:image and twitter:image metatags are missing" do
+      page = MetaInspector.new('http://www.alazan.com')
+      page.images.best.should == "http://www.alazan.com/imagenes/logo.jpg"
+    end
+  end
+  describe '#favicon' do
+    it "should get favicon link when marked as icon" do
+      page = MetaInspector.new('http://pagerankalert.com/')
+      page.images.favicon.should == 'http://pagerankalert.com/src/favicon.ico'
+    end
+    it "should get favicon link when marked as shortcut" do
+      page = MetaInspector.new('http://pagerankalert-shortcut.com/')
+      page.images.favicon.should == 'http://pagerankalert-shortcut.com/src/favicon.ico'
+    end
+    it "should get favicon link when marked as shorcut and icon" do
+      page = MetaInspector.new('http://pagerankalert-shortcut-and-icon.com/')
+      page.images.favicon.should == 'http://pagerankalert-shortcut-and-icon.com/src/favicon.ico'
+    end
+    it "should get favicon link when there is also a touch icon" do
+      page = MetaInspector.new('http://pagerankalert-touch-icon.com/')
+      page.images.favicon.should == 'http://pagerankalert-touch-icon.com/src/favicon.ico'
+    end
+    it "should get favicon link of nil" do
+      page = MetaInspector.new('http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
+      page.images.favicon.should == nil
+    end
+  end
+end

data/spec/meta_inspector/links_spec.rb ADDED Viewed

@@ -0,0 +1,203 @@
+require 'spec_helper'
+describe MetaInspector do
+  let(:page)   { MetaInspector.new('http://example.com') }
+  describe '#links' do
+    it 'returns the internal links' do
+      page.links.internal.should == [ "http://example.com/",
+                                        "http://example.com/faqs",
+                                        "http://example.com/contact",
+                                        "http://example.com/team.html" ]
+    end
+    it 'returns the external links' do
+      page.links.external.should == [ "https://twitter.com/",
+                                        "https://github.com/" ]
+    end
+    it 'returns the non-HTTP links' do
+      page.links.non_http.should == [ "mailto:hello@example.com",
+                                        "javascript:alert('hi');",
+                                        "ftp://ftp.example.com/" ]
+    end
+  end
+  describe 'Links' do
+    before(:each) do
+      @m = MetaInspector.new('http://pagerankalert.com')
+    end
+    it "should get correct absolute links for internal pages" do
+      @m.links.internal.should == [ "http://pagerankalert.com/",
+                                      "http://pagerankalert.com/es?language=es",
+                                      "http://pagerankalert.com/users/sign_up",
+                                      "http://pagerankalert.com/users/sign_in" ]
+    end
+    it "should get correct absolute links for external pages" do
+      @m.links.external.should == [ "http://pagerankalert.posterous.com/",
+                                      "http://twitter.com/pagerankalert",
+                                      "http://twitter.com/share" ]
+    end
+    it "should get correct absolute links, correcting relative links from URL not ending with slash" do
+      m = MetaInspector.new('http://alazan.com/websolution.asp')
+      m.links.internal.should == [ "http://alazan.com/index.asp",
+                                     "http://alazan.com/faqs.asp" ]
+    end
+    describe "links with international characters" do
+      it "should get correct absolute links, encoding the URLs as needed" do
+        m = MetaInspector.new('http://international.com')
+        m.links.internal.should == [ "http://international.com/espa%C3%B1a.asp",
+                                       "http://international.com/roman%C3%A9e",
+                                       "http://international.com/faqs#cami%C3%B3n",
+                                       "http://international.com/search?q=cami%C3%B3n",
+                                       "http://international.com/search?q=espa%C3%B1a#top",
+                                       "http://international.com/index.php?q=espa%C3%B1a&url=aHR0zZQ==&cntnt01pageid=21"]
+        m.links.external.should == [ "http://example.com/espa%C3%B1a.asp",
+                                       "http://example.com/roman%C3%A9e",
+                                       "http://example.com/faqs#cami%C3%B3n",
+                                       "http://example.com/search?q=cami%C3%B3n",
+                                       "http://example.com/search?q=espa%C3%B1a#top"]
+      end
+      describe "internal links" do
+        it "should get correct internal links, encoding the URLs as needed but respecting # and ?" do
+          m = MetaInspector.new('http://international.com')
+          m.links.internal.should == [ "http://international.com/espa%C3%B1a.asp",
+                                       "http://international.com/roman%C3%A9e",
+                                       "http://international.com/faqs#cami%C3%B3n",
+                                       "http://international.com/search?q=cami%C3%B3n",
+                                       "http://international.com/search?q=espa%C3%B1a#top",
+                                       "http://international.com/index.php?q=espa%C3%B1a&url=aHR0zZQ==&cntnt01pageid=21"]
+        end
+        it "should not crash when processing malformed hrefs" do
+          m = MetaInspector.new('http://example.com/malformed_href')
+          m.links.internal.should == [ "http://example.com/faqs" ]
+        end
+      end
+      describe "external links" do
+        it "should get correct external links, encoding the URLs as needed but respecting # and ?" do
+          m = MetaInspector.new('http://international.com')
+          m.links.external.should == [ "http://example.com/espa%C3%B1a.asp",
+                                       "http://example.com/roman%C3%A9e",
+                                       "http://example.com/faqs#cami%C3%B3n",
+                                       "http://example.com/search?q=cami%C3%B3n",
+                                       "http://example.com/search?q=espa%C3%B1a#top"]
+        end
+        it "should not crash when processing malformed hrefs" do
+          m = MetaInspector.new('http://example.com/malformed_href')
+          m.links.non_http.should == ["skype:joeuser?call", "telnet://telnet.cdrom.com", "javascript:alert('ok');",
+                                        "javascript://", "mailto:email(at)example.com"]
+        end
+      end
+    end
+    it "should not crash with links that have weird href values" do
+      m = MetaInspector.new('http://example.com/invalid_href')
+      m.links.non_http.should == ["%3Cp%3Eftp://ftp.cdrom.com", "skype:joeuser?call", "telnet://telnet.cdrom.com"]
+    end
+  end
+  describe 'Relative links' do
+    describe 'From a root URL' do
+      before(:each) do
+        @m = MetaInspector.new('http://relative.com/')
+      end
+      it 'should get the relative links' do
+        @m.links.internal.should == ['http://relative.com/about', 'http://relative.com/sitemap']
+      end
+    end
+    describe 'From a document' do
+      before(:each) do
+        @m = MetaInspector.new('http://relative.com/company')
+      end
+      it 'should get the relative links' do
+        @m.links.internal.should == ['http://relative.com/about', 'http://relative.com/sitemap']
+      end
+    end
+    describe 'From a directory' do
+      before(:each) do
+        @m = MetaInspector.new('http://relative.com/company/')
+      end
+      it 'should get the relative links' do
+        @m.links.internal.should == ['http://relative.com/company/about', 'http://relative.com/sitemap']
+      end
+    end
+  end
+  describe 'Relative links with base' do
+    it 'should get the relative links from a document' do
+      m = MetaInspector.new('http://relativewithbase.com/company/page2')
+      m.links.internal.should == ['http://relativewithbase.com/about', 'http://relativewithbase.com/sitemap']
+    end
+    it 'should get the relative links from a directory' do
+      m = MetaInspector.new('http://relativewithbase.com/company/page2/')
+      m.links.internal.should == ['http://relativewithbase.com/about', 'http://relativewithbase.com/sitemap']
+    end
+  end
+  describe 'Non-HTTP links' do
+    before(:each) do
+      @m = MetaInspector.new('http://example.com/nonhttp')
+    end
+    it "should get the links" do
+      @m.links.non_http.sort.should == [
+                                "ftp://ftp.cdrom.com/",
+                                "javascript:alert('hey');",
+                                "mailto:user@example.com",
+                                "skype:joeuser?call",
+                                "telnet://telnet.cdrom.com"
+                              ]
+    end
+  end
+  describe 'Protocol-relative URLs' do
+    before(:each) do
+      @m_http   = MetaInspector.new('http://protocol-relative.com')
+      @m_https  = MetaInspector.new('https://protocol-relative.com')
+    end
+    it "should convert protocol-relative links to http" do
+      @m_http.links.internal.should include('http://protocol-relative.com/contact')
+      @m_http.links.external.should include('http://yahoo.com/')
+    end
+    it "should convert protocol-relative links to https" do
+      @m_https.links.internal.should include('https://protocol-relative.com/contact')
+      @m_https.links.external.should include('https://yahoo.com/')
+    end
+  end
+  describe "Feed" do
+    it "should get rss feed" do
+      @m = MetaInspector.new('http://www.iteh.at')
+      @m.feed.should == 'http://www.iteh.at/de/rss/'
+    end
+    it "should get atom feed" do
+      @m = MetaInspector.new('http://www.tea-tron.com/jbravo/blog/')
+      @m.feed.should == 'http://www.tea-tron.com/jbravo/blog/feed/'
+    end
+    it "should return nil if no feed found" do
+      @m = MetaInspector.new('http://www.alazan.com')
+      @m.feed.should == nil
+    end
+  end
+end

data/spec/{meta_inspector_spec.rb → meta_inspector/meta_inspector_spec.rb} RENAMED Viewed

@@ -1,6 +1,4 @@
-# -*- encoding: utf-8 -*-
-require File.join(File.dirname(__FILE__), "/spec_helper")
+require 'spec_helper'
 describe MetaInspector do
   it "returns a Document" do

data/spec/meta_inspector/meta_tags_spec.rb ADDED Viewed

@@ -0,0 +1,108 @@
+require 'spec_helper'
+describe MetaInspector do
+  describe "meta tags" do
+    let(:page) { MetaInspector.new('http://example.com/meta-tags') }
+    it "#meta_tags" do
+      page.meta_tags.should == {
+                                  'name' => {
+                                              'keywords'       => ['one, two, three'],
+                                              'description'    => ['the description'],
+                                              'author'         => ['Joe Sample'],
+                                              'robots'         => ['index,follow'],
+                                              'revisit'        => ['15 days'],
+                                              'dc.date.issued' => ['2011-09-15']
+                                             },
+                                  'http-equiv' => {
+                                                    'content-type'        => ['text/html; charset=UTF-8'],
+                                                    'content-style-type'  => ['text/css']
+                                                  },
+                                  'property' => {
+                                                  'og:title'        => ['An OG title'],
+                                                  'og:type'         => ['website'],
+                                                  'og:url'          => ['http://example.com/meta-tags'],
+                                                  'og:image'        => ['http://example.com/rock.jpg',
+                                                                        'http://example.com/rock2.jpg',
+                                                                        'http://example.com/rock3.jpg'],
+                                                  'og:image:width'  => ['300'],
+                                                  'og:image:height' => ['300', '1000']
+                                                },
+                                  'charset' => ['UTF-8']
+                                }
+    end
+    it "#meta_tag" do
+      page.meta_tag.should == {
+                                  'name' => {
+                                              'keywords'       => 'one, two, three',
+                                              'description'    => 'the description',
+                                              'author'         => 'Joe Sample',
+                                              'robots'         => 'index,follow',
+                                              'revisit'        => '15 days',
+                                              'dc.date.issued' => '2011-09-15'
+                                             },
+                                  'http-equiv' => {
+                                                    'content-type'        => 'text/html; charset=UTF-8',
+                                                    'content-style-type'  => 'text/css'
+                                                  },
+                                  'property' => {
+                                                  'og:title'        => 'An OG title',
+                                                  'og:type'         => 'website',
+                                                  'og:url'          => 'http://example.com/meta-tags',
+                                                  'og:image'        => 'http://example.com/rock.jpg',
+                                                  'og:image:width'  => '300',
+                                                  'og:image:height' => '300'
+                                                },
+                                  'charset' => 'UTF-8'
+                                }
+    end
+    it "#meta" do
+      page.meta.should == {
+                            'keywords'            => 'one, two, three',
+                            'description'         => 'the description',
+                            'author'              => 'Joe Sample',
+                            'robots'              => 'index,follow',
+                            'revisit'             => '15 days',
+                            'dc.date.issued'      => '2011-09-15',
+                            'content-type'        => 'text/html; charset=UTF-8',
+                            'content-style-type'  => 'text/css',
+                            'og:title'            => 'An OG title',
+                            'og:type'             => 'website',
+                            'og:url'              => 'http://example.com/meta-tags',
+                            'og:image'            => 'http://example.com/rock.jpg',
+                            'og:image:width'      => '300',
+                            'og:image:height'     => '300',
+                            'charset'             => 'UTF-8'
+                          }
+    end
+  end
+  describe 'Charset detection' do
+    it "should get the charset from <meta charset />" do
+      page = MetaInspector.new('http://charset001.com')
+      page.charset.should == "utf-8"
+    end
+    it "should get the charset from meta content type" do
+      page = MetaInspector.new('http://charset002.com')
+      page.charset.should == "windows-1252"
+    end
+    it "should get nil if no declared charset is found" do
+      page = MetaInspector.new('http://charset000.com')
+      page.charset.should == nil
+    end
+  end
+end

data/spec/meta_inspector/redirections_spec.rb ADDED Viewed

@@ -0,0 +1,48 @@
+require 'spec_helper'
+describe MetaInspector do
+  describe "redirections" do
+    let(:logger) { MetaInspector::ExceptionLog.new }
+    context "when redirections are turned off" do
+      it "disallows redirections" do
+        page = MetaInspector.new("http://facebook.com", :allow_redirections => false)
+        page.url.should == "http://facebook.com/"
+      end
+    end
+    context "when redirections are on (default)" do
+      it "allows follows redirections" do
+        logger.should_not receive(:<<)
+        page = MetaInspector.new("http://facebook.com", exception_log: logger)
+        page.url.should == "https://www.facebook.com/"
+      end
+    end
+    context "when there are cookies required for proper redirection" do
+      before(:all) { WebMock.enable! }
+      after(:all)  { WebMock.disable! }
+      it "allows follows redirections while sending the cookies" do
+        stub_request(:get, "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/")
+          .to_return(:status => 302,
+                     :headers => {
+                                   "Location" => "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1",
+                                   "Set-Cookie" => "EMETA_COOKIE_CHECK=1; path=/; domain=clarionledger.com"
+                                 })
+        stub_request(:get, "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1")
+          .with(:headers => {"Cookie" => "EMETA_COOKIE_CHECK=1"})
+        logger.should_not receive(:<<)
+        page = MetaInspector.new("http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/", exception_log: logger)
+        page.url.should == "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1"
+      end
+    end
+  end
+end

data/spec/meta_inspector/texts_spec.rb ADDED Viewed

@@ -0,0 +1,22 @@
+require 'spec_helper'
+describe MetaInspector do
+  it "should get the title from the head section" do
+    page = MetaInspector.new('http://example.com')
+    page.title.should == 'An example page'
+  end
+  describe '#description' do
+    it "should find description from meta description" do
+      page = MetaInspector.new('http://www.youtube.com/watch?v=iaGSSrp49uc')
+      page.description.should == "This is Youtube"
+    end
+    it "should find a secondary description if no meta description" do
+      page = MetaInspector.new('http://theonion-no-description.com')
+      page.description.should == "SAN FRANCISCO—In a move expected to revolutionize the mobile device industry, Apple launched its fastest and most powerful iPhone to date Tuesday, an innovative new model that can only be seen by the company's hippest and most dedicated customers. This is secondary text picked up because of a missing meta description."
+    end
+  end
+end