metainspector 3.3.0 → 4.0.0.rc1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,19 +1,15 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require File.join(File.dirname(__FILE__), "/spec_helper")
1
+ require 'spec_helper'
4
2
 
5
3
  describe MetaInspector::Document do
6
4
  describe 'passing the contents of the document as html' do
7
- before(:each) do
8
- @m = MetaInspector::Document.new('http://cnn.com/', :document => "<html><head><title>Hello From Passed Html</title><a href='/hello'>Hello link</a></head><body></body></html>")
9
- end
5
+ let(:doc) { MetaInspector::Document.new('http://cnn.com/', :document => "<html><head><title>Hello From Passed Html</title><a href='/hello'>Hello link</a></head><body></body></html>") }
10
6
 
11
7
  it "should get correct links when the url html is passed as an option" do
12
- @m.links.should == ["http://cnn.com/hello"]
8
+ doc.links.internal.should == ["http://cnn.com/hello"]
13
9
  end
14
10
 
15
11
  it "should get the title" do
16
- @m.title.should == "Hello From Passed Html"
12
+ doc.title.should == "Hello From Passed Html"
17
13
  end
18
14
  end
19
15
 
@@ -22,27 +18,21 @@ describe MetaInspector::Document do
22
18
  end
23
19
 
24
20
  it "should return a Hash with all the values set" do
25
- @m = MetaInspector::Document.new('http://pagerankalert.com')
26
- @m.to_hash.should == {
21
+ doc = MetaInspector::Document.new('http://pagerankalert.com')
22
+ doc.to_hash.should == {
27
23
  "url" => "http://pagerankalert.com/",
28
24
  "title" => "PageRankAlert.com :: Track your PageRank changes & receive alerts",
29
25
  "favicon" => "http://pagerankalert.com/src/favicon.ico",
30
- "links" => ["http://pagerankalert.com/",
31
- "http://pagerankalert.com/es?language=es",
32
- "http://pagerankalert.com/users/sign_up",
33
- "http://pagerankalert.com/users/sign_in",
34
- "mailto:pagerankalert@gmail.com",
35
- "http://pagerankalert.posterous.com/",
36
- "http://twitter.com/pagerankalert",
37
- "http://twitter.com/share"],
38
- "internal_links" => ["http://pagerankalert.com/",
39
- "http://pagerankalert.com/es?language=es",
40
- "http://pagerankalert.com/users/sign_up",
41
- "http://pagerankalert.com/users/sign_in"],
42
- "external_links" => ["mailto:pagerankalert@gmail.com",
43
- "http://pagerankalert.posterous.com/",
44
- "http://twitter.com/pagerankalert",
45
- "http://twitter.com/share"],
26
+ "links" => {
27
+ 'internal' => ["http://pagerankalert.com/",
28
+ "http://pagerankalert.com/es?language=es",
29
+ "http://pagerankalert.com/users/sign_up",
30
+ "http://pagerankalert.com/users/sign_in"],
31
+ 'external' => ["http://pagerankalert.posterous.com/",
32
+ "http://twitter.com/pagerankalert",
33
+ "http://twitter.com/share"],
34
+ 'non_http' => ["mailto:pagerankalert@gmail.com"]
35
+ },
46
36
  "images" => ["http://pagerankalert.com/images/pagerank_alert.png?1305794559"],
47
37
  "charset" => "utf-8",
48
38
  "feed" => "http://feeds.feedburner.com/PageRankAlert",
@@ -1,6 +1,4 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require File.join(File.dirname(__FILE__), "/spec_helper")
1
+ require 'spec_helper'
4
2
 
5
3
  describe MetaInspector::ExceptionLog do
6
4
 
@@ -22,5 +22,22 @@ Via: 1.1 varnish
22
22
  <rect x="10" y="10" width="200" height="50" style="fill:none; stroke:blue; stroke-width:1px"/>
23
23
  </g>
24
24
  </svg>
25
+
26
+ <!-- Internal relative links -->
27
+ <a href="/">Root</a>
28
+ <a href="/faqs">FAQs</a>
29
+ <a href="contact">Contact</a>
30
+
31
+ <!-- Internal absolute links -->
32
+ <a href="http://example.com/team.html">Team</a>
33
+
34
+ <!-- External links -->
35
+ <a href="https://twitter.com">Twitter</a>
36
+ <a href="https://github.com">Github</a>
37
+
38
+ <!-- Non-HTTP links -->
39
+ <a href="mailto:hello@example.com">email</a>
40
+ <a href="javascript:alert('hi');">hello</a>
41
+ <a href="ftp://ftp.example.com">FTP</a>
25
42
  </body>
26
43
  </html>
@@ -0,0 +1,111 @@
1
+ require 'spec_helper'
2
+
3
+ describe MetaInspector do
4
+
5
+ describe "#images" do
6
+ describe "returns an Enumerable" do
7
+ let(:page) { MetaInspector.new('https://twitter.com/markupvalidator') }
8
+
9
+ it "has a length" do
10
+ page.images.length.should == 6
11
+ end
12
+
13
+ it "has a size" do
14
+ page.images.size.should == 6
15
+ end
16
+
17
+ it "can be iterated" do
18
+ c = []
19
+ page.images.each {|i| c << i}
20
+ c.length.should == 6
21
+ end
22
+
23
+ it "can be sorted" do
24
+ page.images.sort
25
+ .should == ["https://si0.twimg.com/sticky/default_profile_images/default_profile_6_mini.png",
26
+ "https://twimg0-a.akamaihd.net/a/1342841381/images/bigger_spinner.gif",
27
+ "https://twimg0-a.akamaihd.net/profile_images/1538528659/jaime_nov_08_normal.jpg",
28
+ "https://twimg0-a.akamaihd.net/profile_images/2293774732/v0pgo4xpdd9rou2xq5h0_normal.png",
29
+ "https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_normal.png",
30
+ "https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_reasonably_small.png"]
31
+ end
32
+ end
33
+
34
+ it "should find all page images" do
35
+ page = MetaInspector.new('http://pagerankalert.com')
36
+
37
+ page.images.to_a.should == ["http://pagerankalert.com/images/pagerank_alert.png?1305794559"]
38
+ end
39
+
40
+ it "should find images on twitter" do
41
+ page = MetaInspector.new('https://twitter.com/markupvalidator')
42
+
43
+ page.images.length.should == 6
44
+ page.images.to_a.should == ["https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_reasonably_small.png",
45
+ "https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_normal.png",
46
+ "https://twimg0-a.akamaihd.net/profile_images/2293774732/v0pgo4xpdd9rou2xq5h0_normal.png",
47
+ "https://twimg0-a.akamaihd.net/profile_images/1538528659/jaime_nov_08_normal.jpg",
48
+ "https://si0.twimg.com/sticky/default_profile_images/default_profile_6_mini.png",
49
+ "https://twimg0-a.akamaihd.net/a/1342841381/images/bigger_spinner.gif"]
50
+ end
51
+
52
+ it "should ignore malformed image tags" do
53
+ # There is an image tag without a source. The scraper should not fatal.
54
+ page = MetaInspector.new("http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups")
55
+
56
+ page.images.size.should == 11
57
+ end
58
+ end
59
+
60
+ describe "#image" do
61
+ it "should find the og image" do
62
+ page = MetaInspector.new('http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
63
+
64
+ page.images.best.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
65
+ end
66
+
67
+ it "should find image on youtube" do
68
+ page = MetaInspector.new('http://www.youtube.com/watch?v=iaGSSrp49uc')
69
+
70
+ page.images.best.should == "http://i2.ytimg.com/vi/iaGSSrp49uc/mqdefault.jpg"
71
+ end
72
+
73
+ it "should find image when og:image and twitter:image metatags are missing" do
74
+ page = MetaInspector.new('http://www.alazan.com')
75
+
76
+ page.images.best.should == "http://www.alazan.com/imagenes/logo.jpg"
77
+ end
78
+ end
79
+
80
+ describe '#favicon' do
81
+ it "should get favicon link when marked as icon" do
82
+ page = MetaInspector.new('http://pagerankalert.com/')
83
+
84
+ page.images.favicon.should == 'http://pagerankalert.com/src/favicon.ico'
85
+ end
86
+
87
+ it "should get favicon link when marked as shortcut" do
88
+ page = MetaInspector.new('http://pagerankalert-shortcut.com/')
89
+
90
+ page.images.favicon.should == 'http://pagerankalert-shortcut.com/src/favicon.ico'
91
+ end
92
+
93
+ it "should get favicon link when marked as shorcut and icon" do
94
+ page = MetaInspector.new('http://pagerankalert-shortcut-and-icon.com/')
95
+
96
+ page.images.favicon.should == 'http://pagerankalert-shortcut-and-icon.com/src/favicon.ico'
97
+ end
98
+
99
+ it "should get favicon link when there is also a touch icon" do
100
+ page = MetaInspector.new('http://pagerankalert-touch-icon.com/')
101
+
102
+ page.images.favicon.should == 'http://pagerankalert-touch-icon.com/src/favicon.ico'
103
+ end
104
+
105
+ it "should get favicon link of nil" do
106
+ page = MetaInspector.new('http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
107
+
108
+ page.images.favicon.should == nil
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,203 @@
1
+ require 'spec_helper'
2
+
3
+ describe MetaInspector do
4
+ let(:page) { MetaInspector.new('http://example.com') }
5
+
6
+ describe '#links' do
7
+ it 'returns the internal links' do
8
+ page.links.internal.should == [ "http://example.com/",
9
+ "http://example.com/faqs",
10
+ "http://example.com/contact",
11
+ "http://example.com/team.html" ]
12
+ end
13
+
14
+ it 'returns the external links' do
15
+ page.links.external.should == [ "https://twitter.com/",
16
+ "https://github.com/" ]
17
+ end
18
+
19
+ it 'returns the non-HTTP links' do
20
+ page.links.non_http.should == [ "mailto:hello@example.com",
21
+ "javascript:alert('hi');",
22
+ "ftp://ftp.example.com/" ]
23
+ end
24
+ end
25
+
26
+ describe 'Links' do
27
+ before(:each) do
28
+ @m = MetaInspector.new('http://pagerankalert.com')
29
+ end
30
+
31
+ it "should get correct absolute links for internal pages" do
32
+ @m.links.internal.should == [ "http://pagerankalert.com/",
33
+ "http://pagerankalert.com/es?language=es",
34
+ "http://pagerankalert.com/users/sign_up",
35
+ "http://pagerankalert.com/users/sign_in" ]
36
+ end
37
+
38
+ it "should get correct absolute links for external pages" do
39
+ @m.links.external.should == [ "http://pagerankalert.posterous.com/",
40
+ "http://twitter.com/pagerankalert",
41
+ "http://twitter.com/share" ]
42
+ end
43
+
44
+ it "should get correct absolute links, correcting relative links from URL not ending with slash" do
45
+ m = MetaInspector.new('http://alazan.com/websolution.asp')
46
+
47
+ m.links.internal.should == [ "http://alazan.com/index.asp",
48
+ "http://alazan.com/faqs.asp" ]
49
+ end
50
+
51
+ describe "links with international characters" do
52
+ it "should get correct absolute links, encoding the URLs as needed" do
53
+ m = MetaInspector.new('http://international.com')
54
+
55
+ m.links.internal.should == [ "http://international.com/espa%C3%B1a.asp",
56
+ "http://international.com/roman%C3%A9e",
57
+ "http://international.com/faqs#cami%C3%B3n",
58
+ "http://international.com/search?q=cami%C3%B3n",
59
+ "http://international.com/search?q=espa%C3%B1a#top",
60
+ "http://international.com/index.php?q=espa%C3%B1a&url=aHR0zZQ==&cntnt01pageid=21"]
61
+
62
+ m.links.external.should == [ "http://example.com/espa%C3%B1a.asp",
63
+ "http://example.com/roman%C3%A9e",
64
+ "http://example.com/faqs#cami%C3%B3n",
65
+ "http://example.com/search?q=cami%C3%B3n",
66
+ "http://example.com/search?q=espa%C3%B1a#top"]
67
+ end
68
+
69
+ describe "internal links" do
70
+ it "should get correct internal links, encoding the URLs as needed but respecting # and ?" do
71
+ m = MetaInspector.new('http://international.com')
72
+ m.links.internal.should == [ "http://international.com/espa%C3%B1a.asp",
73
+ "http://international.com/roman%C3%A9e",
74
+ "http://international.com/faqs#cami%C3%B3n",
75
+ "http://international.com/search?q=cami%C3%B3n",
76
+ "http://international.com/search?q=espa%C3%B1a#top",
77
+ "http://international.com/index.php?q=espa%C3%B1a&url=aHR0zZQ==&cntnt01pageid=21"]
78
+ end
79
+
80
+ it "should not crash when processing malformed hrefs" do
81
+ m = MetaInspector.new('http://example.com/malformed_href')
82
+ m.links.internal.should == [ "http://example.com/faqs" ]
83
+ end
84
+ end
85
+
86
+ describe "external links" do
87
+ it "should get correct external links, encoding the URLs as needed but respecting # and ?" do
88
+ m = MetaInspector.new('http://international.com')
89
+ m.links.external.should == [ "http://example.com/espa%C3%B1a.asp",
90
+ "http://example.com/roman%C3%A9e",
91
+ "http://example.com/faqs#cami%C3%B3n",
92
+ "http://example.com/search?q=cami%C3%B3n",
93
+ "http://example.com/search?q=espa%C3%B1a#top"]
94
+ end
95
+
96
+ it "should not crash when processing malformed hrefs" do
97
+ m = MetaInspector.new('http://example.com/malformed_href')
98
+ m.links.non_http.should == ["skype:joeuser?call", "telnet://telnet.cdrom.com", "javascript:alert('ok');",
99
+ "javascript://", "mailto:email(at)example.com"]
100
+ end
101
+ end
102
+ end
103
+
104
+ it "should not crash with links that have weird href values" do
105
+ m = MetaInspector.new('http://example.com/invalid_href')
106
+ m.links.non_http.should == ["%3Cp%3Eftp://ftp.cdrom.com", "skype:joeuser?call", "telnet://telnet.cdrom.com"]
107
+ end
108
+ end
109
+
110
+ describe 'Relative links' do
111
+ describe 'From a root URL' do
112
+ before(:each) do
113
+ @m = MetaInspector.new('http://relative.com/')
114
+ end
115
+
116
+ it 'should get the relative links' do
117
+ @m.links.internal.should == ['http://relative.com/about', 'http://relative.com/sitemap']
118
+ end
119
+ end
120
+
121
+ describe 'From a document' do
122
+ before(:each) do
123
+ @m = MetaInspector.new('http://relative.com/company')
124
+ end
125
+
126
+ it 'should get the relative links' do
127
+ @m.links.internal.should == ['http://relative.com/about', 'http://relative.com/sitemap']
128
+ end
129
+ end
130
+
131
+ describe 'From a directory' do
132
+ before(:each) do
133
+ @m = MetaInspector.new('http://relative.com/company/')
134
+ end
135
+
136
+ it 'should get the relative links' do
137
+ @m.links.internal.should == ['http://relative.com/company/about', 'http://relative.com/sitemap']
138
+ end
139
+ end
140
+ end
141
+
142
+ describe 'Relative links with base' do
143
+ it 'should get the relative links from a document' do
144
+ m = MetaInspector.new('http://relativewithbase.com/company/page2')
145
+ m.links.internal.should == ['http://relativewithbase.com/about', 'http://relativewithbase.com/sitemap']
146
+ end
147
+
148
+ it 'should get the relative links from a directory' do
149
+ m = MetaInspector.new('http://relativewithbase.com/company/page2/')
150
+ m.links.internal.should == ['http://relativewithbase.com/about', 'http://relativewithbase.com/sitemap']
151
+ end
152
+ end
153
+
154
+ describe 'Non-HTTP links' do
155
+ before(:each) do
156
+ @m = MetaInspector.new('http://example.com/nonhttp')
157
+ end
158
+
159
+ it "should get the links" do
160
+ @m.links.non_http.sort.should == [
161
+ "ftp://ftp.cdrom.com/",
162
+ "javascript:alert('hey');",
163
+ "mailto:user@example.com",
164
+ "skype:joeuser?call",
165
+ "telnet://telnet.cdrom.com"
166
+ ]
167
+ end
168
+ end
169
+
170
+ describe 'Protocol-relative URLs' do
171
+ before(:each) do
172
+ @m_http = MetaInspector.new('http://protocol-relative.com')
173
+ @m_https = MetaInspector.new('https://protocol-relative.com')
174
+ end
175
+
176
+ it "should convert protocol-relative links to http" do
177
+ @m_http.links.internal.should include('http://protocol-relative.com/contact')
178
+ @m_http.links.external.should include('http://yahoo.com/')
179
+ end
180
+
181
+ it "should convert protocol-relative links to https" do
182
+ @m_https.links.internal.should include('https://protocol-relative.com/contact')
183
+ @m_https.links.external.should include('https://yahoo.com/')
184
+ end
185
+ end
186
+
187
+ describe "Feed" do
188
+ it "should get rss feed" do
189
+ @m = MetaInspector.new('http://www.iteh.at')
190
+ @m.feed.should == 'http://www.iteh.at/de/rss/'
191
+ end
192
+
193
+ it "should get atom feed" do
194
+ @m = MetaInspector.new('http://www.tea-tron.com/jbravo/blog/')
195
+ @m.feed.should == 'http://www.tea-tron.com/jbravo/blog/feed/'
196
+ end
197
+
198
+ it "should return nil if no feed found" do
199
+ @m = MetaInspector.new('http://www.alazan.com')
200
+ @m.feed.should == nil
201
+ end
202
+ end
203
+ end
@@ -1,6 +1,4 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- require File.join(File.dirname(__FILE__), "/spec_helper")
1
+ require 'spec_helper'
4
2
 
5
3
  describe MetaInspector do
6
4
  it "returns a Document" do
@@ -0,0 +1,108 @@
1
+ require 'spec_helper'
2
+
3
+ describe MetaInspector do
4
+
5
+ describe "meta tags" do
6
+ let(:page) { MetaInspector.new('http://example.com/meta-tags') }
7
+
8
+ it "#meta_tags" do
9
+ page.meta_tags.should == {
10
+ 'name' => {
11
+ 'keywords' => ['one, two, three'],
12
+ 'description' => ['the description'],
13
+ 'author' => ['Joe Sample'],
14
+ 'robots' => ['index,follow'],
15
+ 'revisit' => ['15 days'],
16
+ 'dc.date.issued' => ['2011-09-15']
17
+ },
18
+
19
+ 'http-equiv' => {
20
+ 'content-type' => ['text/html; charset=UTF-8'],
21
+ 'content-style-type' => ['text/css']
22
+ },
23
+
24
+ 'property' => {
25
+ 'og:title' => ['An OG title'],
26
+ 'og:type' => ['website'],
27
+ 'og:url' => ['http://example.com/meta-tags'],
28
+ 'og:image' => ['http://example.com/rock.jpg',
29
+ 'http://example.com/rock2.jpg',
30
+ 'http://example.com/rock3.jpg'],
31
+ 'og:image:width' => ['300'],
32
+ 'og:image:height' => ['300', '1000']
33
+ },
34
+
35
+ 'charset' => ['UTF-8']
36
+ }
37
+ end
38
+
39
+ it "#meta_tag" do
40
+ page.meta_tag.should == {
41
+ 'name' => {
42
+ 'keywords' => 'one, two, three',
43
+ 'description' => 'the description',
44
+ 'author' => 'Joe Sample',
45
+ 'robots' => 'index,follow',
46
+ 'revisit' => '15 days',
47
+ 'dc.date.issued' => '2011-09-15'
48
+ },
49
+
50
+ 'http-equiv' => {
51
+ 'content-type' => 'text/html; charset=UTF-8',
52
+ 'content-style-type' => 'text/css'
53
+ },
54
+
55
+ 'property' => {
56
+ 'og:title' => 'An OG title',
57
+ 'og:type' => 'website',
58
+ 'og:url' => 'http://example.com/meta-tags',
59
+ 'og:image' => 'http://example.com/rock.jpg',
60
+ 'og:image:width' => '300',
61
+ 'og:image:height' => '300'
62
+ },
63
+
64
+ 'charset' => 'UTF-8'
65
+ }
66
+ end
67
+
68
+ it "#meta" do
69
+ page.meta.should == {
70
+ 'keywords' => 'one, two, three',
71
+ 'description' => 'the description',
72
+ 'author' => 'Joe Sample',
73
+ 'robots' => 'index,follow',
74
+ 'revisit' => '15 days',
75
+ 'dc.date.issued' => '2011-09-15',
76
+ 'content-type' => 'text/html; charset=UTF-8',
77
+ 'content-style-type' => 'text/css',
78
+ 'og:title' => 'An OG title',
79
+ 'og:type' => 'website',
80
+ 'og:url' => 'http://example.com/meta-tags',
81
+ 'og:image' => 'http://example.com/rock.jpg',
82
+ 'og:image:width' => '300',
83
+ 'og:image:height' => '300',
84
+ 'charset' => 'UTF-8'
85
+ }
86
+ end
87
+ end
88
+
89
+ describe 'Charset detection' do
90
+ it "should get the charset from <meta charset />" do
91
+ page = MetaInspector.new('http://charset001.com')
92
+
93
+ page.charset.should == "utf-8"
94
+ end
95
+
96
+ it "should get the charset from meta content type" do
97
+ page = MetaInspector.new('http://charset002.com')
98
+
99
+ page.charset.should == "windows-1252"
100
+ end
101
+
102
+ it "should get nil if no declared charset is found" do
103
+ page = MetaInspector.new('http://charset000.com')
104
+
105
+ page.charset.should == nil
106
+ end
107
+ end
108
+ end
@@ -0,0 +1,48 @@
1
+ require 'spec_helper'
2
+
3
+ describe MetaInspector do
4
+ describe "redirections" do
5
+ let(:logger) { MetaInspector::ExceptionLog.new }
6
+
7
+ context "when redirections are turned off" do
8
+ it "disallows redirections" do
9
+ page = MetaInspector.new("http://facebook.com", :allow_redirections => false)
10
+
11
+ page.url.should == "http://facebook.com/"
12
+ end
13
+ end
14
+
15
+ context "when redirections are on (default)" do
16
+ it "allows follows redirections" do
17
+ logger.should_not receive(:<<)
18
+
19
+ page = MetaInspector.new("http://facebook.com", exception_log: logger)
20
+
21
+ page.url.should == "https://www.facebook.com/"
22
+ end
23
+ end
24
+
25
+ context "when there are cookies required for proper redirection" do
26
+ before(:all) { WebMock.enable! }
27
+ after(:all) { WebMock.disable! }
28
+
29
+ it "allows follows redirections while sending the cookies" do
30
+ stub_request(:get, "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/")
31
+ .to_return(:status => 302,
32
+ :headers => {
33
+ "Location" => "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1",
34
+ "Set-Cookie" => "EMETA_COOKIE_CHECK=1; path=/; domain=clarionledger.com"
35
+ })
36
+
37
+ stub_request(:get, "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1")
38
+ .with(:headers => {"Cookie" => "EMETA_COOKIE_CHECK=1"})
39
+
40
+ logger.should_not receive(:<<)
41
+
42
+ page = MetaInspector.new("http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/", exception_log: logger)
43
+
44
+ page.url.should == "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1"
45
+ end
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,22 @@
1
+ require 'spec_helper'
2
+
3
+ describe MetaInspector do
4
+ it "should get the title from the head section" do
5
+ page = MetaInspector.new('http://example.com')
6
+
7
+ page.title.should == 'An example page'
8
+ end
9
+
10
+ describe '#description' do
11
+ it "should find description from meta description" do
12
+ page = MetaInspector.new('http://www.youtube.com/watch?v=iaGSSrp49uc')
13
+
14
+ page.description.should == "This is Youtube"
15
+ end
16
+
17
+ it "should find a secondary description if no meta description" do
18
+ page = MetaInspector.new('http://theonion-no-description.com')
19
+ page.description.should == "SAN FRANCISCO—In a move expected to revolutionize the mobile device industry, Apple launched its fastest and most powerful iPhone to date Tuesday, an innovative new model that can only be seen by the company's hippest and most dedicated customers. This is secondary text picked up because of a missing meta description."
20
+ end
21
+ end
22
+ end