metainspector 1.16.1 → 1.17.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,76 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require 'addressable/uri'
4
+
5
+ module MetaInspector
6
+ class URL
7
+ attr_reader :url
8
+
9
+ include MetaInspector::Exceptionable
10
+
11
+ def initialize(initial_url, options = {})
12
+ options = defaults.merge(options)
13
+ @exception_log = options[:exception_log]
14
+
15
+ self.url = initial_url
16
+ end
17
+
18
+ def scheme
19
+ parsed(url) ? parsed(url).scheme : nil
20
+ end
21
+
22
+ def host
23
+ parsed(url) ? parsed(url).host : nil
24
+ end
25
+
26
+ def root_url
27
+ "#{scheme}://#{host}/"
28
+ end
29
+
30
+ def url=(new_url)
31
+ @url = normalized(with_default_scheme(new_url))
32
+ end
33
+
34
+ # Converts a protocol-relative url to its full form, depending on the scheme of the page that contains it
35
+ def self.unrelativize(url, scheme)
36
+ url =~ /^\/\// ? "#{scheme}://#{url[2..-1]}" : url
37
+ end
38
+
39
+ # Convert a relative url like "/users" to an absolute one like "http://example.com/users"
40
+ # Respecting already absolute URLs like the ones starting with http:, ftp:, telnet:, mailto:, javascript: ...
41
+ def self.absolutify(url, base_url)
42
+ if url =~ /^\w*\:/i
43
+ MetaInspector::URL.new(url).url
44
+ else
45
+ Addressable::URI.join(base_url, url).normalize.to_s
46
+ end
47
+ rescue URI::InvalidURIError, Addressable::URI::InvalidURIError => e
48
+ @exception_log << e
49
+ nil
50
+ end
51
+
52
+ private
53
+
54
+ def defaults
55
+ { exception_log: MetaInspector::ExceptionLog.new }
56
+ end
57
+
58
+ # Adds 'http' as default scheme, if there is none
59
+ def with_default_scheme(url)
60
+ parsed(url) && parsed(url).scheme.nil? ? 'http://' + url : url
61
+ end
62
+
63
+ # Normalize url to deal with characters that should be encodes, add trailing slash, convert to downcase...
64
+ def normalized(url)
65
+ Addressable::URI.parse(url).normalize.to_s
66
+ end
67
+
68
+ def parsed(url)
69
+ URI.parse(url)
70
+
71
+ rescue URI::InvalidURIError, URI::InvalidComponentError => e
72
+ @exception_log << e
73
+ nil
74
+ end
75
+ end
76
+ end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.16.1"
4
+ VERSION = "1.17.0"
5
5
  end
@@ -0,0 +1,97 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require File.join(File.dirname(__FILE__), "/spec_helper")
4
+
5
+ describe MetaInspector::Document do
6
+ describe 'passing the contents of the document as html' do
7
+ before(:each) do
8
+ @m = MetaInspector::Document.new('http://cnn.com/', :document => "<html><head><title>Hello From Passed Html</title><a href='/hello'>Hello link</a></head><body></body></html>")
9
+ end
10
+
11
+ it "should get correct links when the url html is passed as an option" do
12
+ @m.links.should == ["http://cnn.com/hello"]
13
+ end
14
+
15
+ it "should get the title" do
16
+ @m.title.should == "Hello From Passed Html"
17
+ end
18
+ end
19
+
20
+ it "should return a String as to_s" do
21
+ MetaInspector::Document.new('http://pagerankalert.com').to_s.class.should == String
22
+ end
23
+
24
+ it "should return a Hash with all the values set" do
25
+ @m = MetaInspector::Document.new('http://pagerankalert.com')
26
+ @m.to_hash.should == {
27
+ "url" =>"http://pagerankalert.com/",
28
+ "title" =>"PageRankAlert.com :: Track your PageRank changes & receive alerts",
29
+ "links" => ["http://pagerankalert.com/",
30
+ "http://pagerankalert.com/es?language=es",
31
+ "http://pagerankalert.com/users/sign_up",
32
+ "http://pagerankalert.com/users/sign_in",
33
+ "mailto:pagerankalert@gmail.com",
34
+ "http://pagerankalert.posterous.com/",
35
+ "http://twitter.com/pagerankalert",
36
+ "http://twitter.com/share"],
37
+ "internal_links" => ["http://pagerankalert.com/",
38
+ "http://pagerankalert.com/es?language=es",
39
+ "http://pagerankalert.com/users/sign_up",
40
+ "http://pagerankalert.com/users/sign_in"],
41
+ "external_links" => ["mailto:pagerankalert@gmail.com",
42
+ "http://pagerankalert.posterous.com/",
43
+ "http://twitter.com/pagerankalert",
44
+ "http://twitter.com/share"],
45
+ "images" => ["http://pagerankalert.com/images/pagerank_alert.png?1305794559"],
46
+ "charset" => "utf-8",
47
+ "feed" => "http://feeds.feedburner.com/PageRankAlert",
48
+ "content_type" =>"text/html",
49
+ "meta" => {
50
+ "name" => {
51
+ "description"=> "Track your PageRank(TM) changes and receive alerts by email",
52
+ "keywords" => "pagerank, seo, optimization, google",
53
+ "robots" => "all,follow",
54
+ "csrf_param" => "authenticity_token",
55
+ "csrf_token" => "iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE="
56
+ },
57
+ "property"=>{}
58
+ }
59
+ }
60
+ end
61
+
62
+ describe 'exception handling' do
63
+ it "should parse images when parse_html_content_type_only is not specified" do
64
+ image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png')
65
+ desc = image_url.description
66
+
67
+ image_url.should be_ok
68
+ end
69
+
70
+ it "should parse images when parse_html_content_type_only is false" do
71
+ image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', :html_content_only => false)
72
+ desc = image_url.description
73
+
74
+ image_url.should be_ok
75
+ end
76
+
77
+ it "should handle errors when content is image/jpeg and html_content_type_only is true" do
78
+ image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', :html_content_only => true)
79
+
80
+ expect {
81
+ title = image_url.title
82
+ }.to change { image_url.exceptions.size }
83
+
84
+ image_url.exceptions.first.message.should == "The url provided contains image/png content instead of text/html content"
85
+ end
86
+
87
+ it "should handle errors when content is not text/html and html_content_type_only is true" do
88
+ tar_url = MetaInspector::Document.new('http://pagerankalert.com/file.tar.gz', :html_content_only => true)
89
+
90
+ expect {
91
+ title = tar_url.title
92
+ }.to change { tar_url.exceptions.size }
93
+
94
+ tar_url.exceptions.first.message.should == "The url provided contains application/x-gzip content instead of text/html content"
95
+ end
96
+ end
97
+ end
@@ -0,0 +1,59 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require File.join(File.dirname(__FILE__), "/spec_helper")
4
+
5
+ describe MetaInspector::ExceptionLog do
6
+ let(:logger) { MetaInspector::ExceptionLog.new }
7
+
8
+ describe "storing exceptions" do
9
+ it "should store exceptions" do
10
+ expect {
11
+ logger << StandardError.new("an error message")
12
+ }.to change { logger.exceptions.length }.from(0).to(1)
13
+ end
14
+
15
+ it "should return stored exceptions" do
16
+ first = StandardError.new("first message")
17
+ second = StandardError.new("second message")
18
+
19
+ logger << first
20
+ logger << second
21
+
22
+ logger.exceptions.should == [first, second]
23
+ end
24
+ end
25
+
26
+ describe "ok?" do
27
+ it "should be true if no exceptions stored" do
28
+ logger.should be_ok
29
+ end
30
+
31
+ it "should be false if some exception stored" do
32
+ logger << StandardError.new("some message")
33
+ logger.should_not be_ok
34
+ end
35
+ end
36
+
37
+ describe "warn_level" do
38
+ it "should be quiet by default" do
39
+ MetaInspector::ExceptionLog.new.warn_level.should be_nil
40
+ end
41
+
42
+ it "should warn about the error if warn_level is :warn" do
43
+ verbose_logger = MetaInspector::ExceptionLog.new(warn_level: :warn)
44
+ exception = StandardError.new("an error message")
45
+
46
+ verbose_logger.should_receive(:warn).with(exception)
47
+ verbose_logger << exception
48
+ end
49
+
50
+ it "should raise exceptions when warn_level is :raise" do
51
+ raiser_logger = MetaInspector::ExceptionLog.new(warn_level: :raise)
52
+ exception = StandardError.new("this should be raised")
53
+
54
+ expect {
55
+ raiser_logger << exception
56
+ }.to raise_exception(StandardError, "this should be raised")
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,9 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require File.join(File.dirname(__FILE__), "/spec_helper")
4
+
5
+ describe MetaInspector do
6
+ it "returns a Document" do
7
+ MetaInspector.new('http://example.com').class.should == MetaInspector::Document
8
+ end
9
+ end
@@ -0,0 +1,374 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ require File.join(File.dirname(__FILE__), "/spec_helper")
4
+
5
+ describe MetaInspector::Parser do
6
+ describe 'Doing a basic scrape' do
7
+
8
+ before(:each) do
9
+ @m = MetaInspector::Parser.new(doc 'http://pagerankalert.com')
10
+ end
11
+
12
+ it "should get the title" do
13
+ @m.title.should == 'PageRankAlert.com :: Track your PageRank changes & receive alerts'
14
+ end
15
+
16
+ it "should not find an image" do
17
+ @m.image.should == nil
18
+ end
19
+
20
+ describe "get image" do
21
+ it "should find the og image" do
22
+ @m = MetaInspector::Parser.new(doc 'http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
23
+ @m.image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
24
+ @m.meta_og_image.should == "http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg"
25
+ end
26
+
27
+ it "should find image on youtube" do
28
+ MetaInspector::Parser.new(doc 'http://www.youtube.com/watch?v=iaGSSrp49uc').image.should == "http://i2.ytimg.com/vi/iaGSSrp49uc/mqdefault.jpg"
29
+ end
30
+ end
31
+
32
+ describe "get images" do
33
+ it "should find all page images" do
34
+ @m.images.should == ["http://pagerankalert.com/images/pagerank_alert.png?1305794559"]
35
+ end
36
+
37
+ it "should find images on twitter" do
38
+ m = MetaInspector::Parser.new(doc 'https://twitter.com/markupvalidator')
39
+ m.images.length.should == 6
40
+ m.images.join("; ").should == "https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_reasonably_small.png; https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_normal.png; https://twimg0-a.akamaihd.net/profile_images/2293774732/v0pgo4xpdd9rou2xq5h0_normal.png; https://twimg0-a.akamaihd.net/profile_images/1538528659/jaime_nov_08_normal.jpg; https://si0.twimg.com/sticky/default_profile_images/default_profile_6_mini.png; https://twimg0-a.akamaihd.net/a/1342841381/images/bigger_spinner.gif"
41
+ end
42
+ end
43
+
44
+ it "should ignore malformed image tags" do
45
+ # There is an image tag without a source. The scraper should not fatal.
46
+ @m = MetaInspector::Parser.new(doc "http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups")
47
+ @m.images.size.should == 11
48
+ end
49
+
50
+ it "should have a Nokogiri::HTML::Document as parsed" do
51
+ @m.parsed.class.should == Nokogiri::HTML::Document
52
+ end
53
+
54
+ it "should return the document as a string" do
55
+ @m.to_s.class.should == String
56
+ end
57
+
58
+ describe "Feed" do
59
+ it "should get rss feed" do
60
+ @m = MetaInspector::Parser.new(doc 'http://www.iteh.at')
61
+ @m.feed.should == 'http://www.iteh.at/de/rss/'
62
+ end
63
+
64
+ it "should get atom feed" do
65
+ @m = MetaInspector::Parser.new(doc 'http://www.tea-tron.com/jbravo/blog/')
66
+ @m.feed.should == 'http://www.tea-tron.com/jbravo/blog/feed/'
67
+ end
68
+
69
+ it "should return nil if no feed found" do
70
+ @m = MetaInspector::Parser.new(doc 'http://www.alazan.com')
71
+ @m.feed.should == nil
72
+ end
73
+ end
74
+
75
+ describe "get description" do
76
+ it "should find description on youtube" do
77
+ MetaInspector::Parser.new(doc 'http://www.youtube.com/watch?v=iaGSSrp49uc').description.should == ""
78
+ end
79
+ end
80
+ end
81
+
82
+ describe 'Page with missing meta description' do
83
+ it "should find a secondary description" do
84
+ @m = MetaInspector::Parser.new(doc 'http://theonion-no-description.com')
85
+ @m.description.should == "SAN FRANCISCO—In a move expected to revolutionize the mobile device industry, Apple launched its fastest and most powerful iPhone to date Tuesday, an innovative new model that can only be seen by the company's hippest and most dedicated customers. This is secondary text picked up because of a missing meta description."
86
+ end
87
+ end
88
+
89
+ describe 'Links' do
90
+ before(:each) do
91
+ @m = MetaInspector::Parser.new(doc 'http://pagerankalert.com')
92
+ end
93
+
94
+ it "should get the links" do
95
+ @m.links.should == [ "http://pagerankalert.com/",
96
+ "http://pagerankalert.com/es?language=es",
97
+ "http://pagerankalert.com/users/sign_up",
98
+ "http://pagerankalert.com/users/sign_in",
99
+ "mailto:pagerankalert@gmail.com",
100
+ "http://pagerankalert.posterous.com/",
101
+ "http://twitter.com/pagerankalert",
102
+ "http://twitter.com/share" ]
103
+ end
104
+
105
+ it "should get correct absolute links for internal pages" do
106
+ @m.internal_links.should == [ "http://pagerankalert.com/",
107
+ "http://pagerankalert.com/es?language=es",
108
+ "http://pagerankalert.com/users/sign_up",
109
+ "http://pagerankalert.com/users/sign_in" ]
110
+ end
111
+
112
+ it "should get correct absolute links for external pages" do
113
+ @m.external_links.should == [ "mailto:pagerankalert@gmail.com",
114
+ "http://pagerankalert.posterous.com/",
115
+ "http://twitter.com/pagerankalert",
116
+ "http://twitter.com/share" ]
117
+ end
118
+
119
+ it "should get correct absolute links, correcting relative links from URL not ending with slash" do
120
+ m = MetaInspector::Parser.new(doc 'http://alazan.com/websolution.asp')
121
+ m.links.should == [ "http://alazan.com/index.asp",
122
+ "http://alazan.com/faqs.asp" ]
123
+ end
124
+
125
+ it "should return empty array if no links found" do
126
+ m = MetaInspector::Parser.new(doc 'http://example.com/empty')
127
+ m.links.should == []
128
+ end
129
+
130
+ describe "links with international characters" do
131
+ it "should get correct absolute links, encoding the URLs as needed" do
132
+ m = MetaInspector::Parser.new(doc 'http://international.com')
133
+ m.links.should == [ "http://international.com/espa%C3%B1a.asp",
134
+ "http://international.com/roman%C3%A9e",
135
+ "http://international.com/faqs#cami%C3%B3n",
136
+ "http://international.com/search?q=cami%C3%B3n",
137
+ "http://international.com/search?q=espa%C3%B1a#top",
138
+ "http://international.com/index.php?q=espa%C3%B1a&url=aHR0zZQ==&cntnt01pageid=21",
139
+ "http://example.com/espa%C3%B1a.asp",
140
+ "http://example.com/roman%C3%A9e",
141
+ "http://example.com/faqs#cami%C3%B3n",
142
+ "http://example.com/search?q=cami%C3%B3n",
143
+ "http://example.com/search?q=espa%C3%B1a#top"]
144
+ end
145
+
146
+ describe "internal links" do
147
+ it "should get correct internal links, encoding the URLs as needed but respecting # and ?" do
148
+ m = MetaInspector::Parser.new(doc 'http://international.com')
149
+ m.internal_links.should == [ "http://international.com/espa%C3%B1a.asp",
150
+ "http://international.com/roman%C3%A9e",
151
+ "http://international.com/faqs#cami%C3%B3n",
152
+ "http://international.com/search?q=cami%C3%B3n",
153
+ "http://international.com/search?q=espa%C3%B1a#top",
154
+ "http://international.com/index.php?q=espa%C3%B1a&url=aHR0zZQ==&cntnt01pageid=21"]
155
+ end
156
+
157
+ it "should not crash when processing malformed hrefs" do
158
+ m = MetaInspector::Parser.new(doc 'http://example.com/malformed_href')
159
+ expect {
160
+ m.internal_links.should == [ "http://example.com/faqs" ]
161
+ m.should be_ok
162
+ }.to_not raise_error
163
+ end
164
+ end
165
+
166
+ describe "external links" do
167
+ it "should get correct external links, encoding the URLs as needed but respecting # and ?" do
168
+ m = MetaInspector::Parser.new(doc 'http://international.com')
169
+ m.external_links.should == [ "http://example.com/espa%C3%B1a.asp",
170
+ "http://example.com/roman%C3%A9e",
171
+ "http://example.com/faqs#cami%C3%B3n",
172
+ "http://example.com/search?q=cami%C3%B3n",
173
+ "http://example.com/search?q=espa%C3%B1a#top"]
174
+ end
175
+
176
+ it "should not crash when processing malformed hrefs" do
177
+ m = MetaInspector::Parser.new(doc 'http://example.com/malformed_href')
178
+ expect {
179
+ m.external_links.should == ["skype:joeuser?call", "telnet://telnet.cdrom.com",
180
+ "javascript:alert('ok');", "javascript://", "mailto:email(at)example.com"]
181
+ m.should be_ok
182
+ }.to_not raise_error
183
+ end
184
+ end
185
+ end
186
+
187
+ it "should not crash with links that have weird href values" do
188
+ m = MetaInspector::Parser.new(doc 'http://example.com/invalid_href')
189
+ m.links.should == ["%3Cp%3Eftp://ftp.cdrom.com", "skype:joeuser?call", "telnet://telnet.cdrom.com"]
190
+ end
191
+ end
192
+
193
+ describe 'Relative links' do
194
+ describe 'From a root URL' do
195
+ before(:each) do
196
+ @m = MetaInspector::Parser.new(doc 'http://relative.com/')
197
+ end
198
+
199
+ it 'should get the relative links' do
200
+ @m.internal_links.should == ['http://relative.com/about', 'http://relative.com/sitemap']
201
+ end
202
+ end
203
+
204
+ describe 'From a document' do
205
+ before(:each) do
206
+ @m = MetaInspector::Parser.new(doc 'http://relative.com/company')
207
+ end
208
+
209
+ it 'should get the relative links' do
210
+ @m.internal_links.should == ['http://relative.com/about', 'http://relative.com/sitemap']
211
+ end
212
+ end
213
+
214
+ describe 'From a directory' do
215
+ before(:each) do
216
+ @m = MetaInspector::Parser.new(doc 'http://relative.com/company/')
217
+ end
218
+
219
+ it 'should get the relative links' do
220
+ @m.internal_links.should == ['http://relative.com/company/about', 'http://relative.com/sitemap']
221
+ end
222
+ end
223
+ end
224
+
225
+ describe 'Relative links with base' do
226
+ it 'should get the relative links from a document' do
227
+ m = MetaInspector::Parser.new(doc 'http://relativewithbase.com/company/page2')
228
+ m.internal_links.should == ['http://relativewithbase.com/about', 'http://relativewithbase.com/sitemap']
229
+ end
230
+
231
+ it 'should get the relative links from a directory' do
232
+ m = MetaInspector::Parser.new(doc 'http://relativewithbase.com/company/page2/')
233
+ m.internal_links.should == ['http://relativewithbase.com/about', 'http://relativewithbase.com/sitemap']
234
+ end
235
+ end
236
+
237
+ describe 'Non-HTTP links' do
238
+ before(:each) do
239
+ @m = MetaInspector::Parser.new(doc 'http://example.com/nonhttp')
240
+ end
241
+
242
+ it "should get the links" do
243
+ @m.links.sort.should == [
244
+ "ftp://ftp.cdrom.com/",
245
+ "javascript:alert('hey');",
246
+ "mailto:user@example.com",
247
+ "skype:joeuser?call",
248
+ "telnet://telnet.cdrom.com"
249
+ ]
250
+ end
251
+ end
252
+
253
+ describe 'Protocol-relative URLs' do
254
+ before(:each) do
255
+ @m_http = MetaInspector::Parser.new(doc 'http://protocol-relative.com')
256
+ @m_https = MetaInspector::Parser.new(doc 'https://protocol-relative.com')
257
+ end
258
+
259
+ it "should convert protocol-relative links to http" do
260
+ @m_http.links.should include('http://protocol-relative.com/contact')
261
+ @m_http.links.should include('http://yahoo.com/')
262
+ end
263
+
264
+ it "should convert protocol-relative links to https" do
265
+ @m_https.links.should include('https://protocol-relative.com/contact')
266
+ @m_https.links.should include('https://yahoo.com/')
267
+ end
268
+ end
269
+
270
+ describe 'Getting meta tags by ghost methods' do
271
+ before(:each) do
272
+ @m = MetaInspector::Parser.new(doc 'http://pagerankalert.com')
273
+ end
274
+
275
+ it "should get the robots meta tag" do
276
+ @m.meta_robots.should == 'all,follow'
277
+ end
278
+
279
+ it "should get the robots meta tag" do
280
+ @m.meta_RoBoTs.should == 'all,follow'
281
+ end
282
+
283
+ it "should get the description meta tag" do
284
+ @m.meta_description.should == 'Track your PageRank(TM) changes and receive alerts by email'
285
+ end
286
+
287
+ it "should get the keywords meta tag" do
288
+ @m.meta_keywords.should == "pagerank, seo, optimization, google"
289
+ end
290
+
291
+ it "should get the content-language meta tag" do
292
+ pending "mocks"
293
+ @m.meta_content_language.should == "en"
294
+ end
295
+
296
+ it "should get the Csrf_pAram meta tag" do
297
+ @m.meta_Csrf_pAram.should == "authenticity_token"
298
+ end
299
+
300
+ it "should return nil for nonfound meta_tags" do
301
+ @m.meta_lollypop.should == nil
302
+ end
303
+
304
+ it "should get the generator meta tag" do
305
+ @m = MetaInspector::Parser.new(doc 'http://www.inkthemes.com/')
306
+ @m.meta_generator.should == 'WordPress 3.4.2'
307
+ end
308
+
309
+ it "should find a meta_og_title" do
310
+ @m = MetaInspector::Parser.new(doc 'http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
311
+ @m.meta_og_title.should == "Apple Claims New iPhone Only Visible To Most Loyal Of Customers"
312
+ end
313
+
314
+ it "should not find a meta_og_something" do
315
+ @m = MetaInspector::Parser.new(doc 'http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
316
+ @m.meta_og_something.should == nil
317
+ end
318
+
319
+ it "should find a meta_twitter_site" do
320
+ @m = MetaInspector::Parser.new(doc 'http://www.youtube.com/watch?v=iaGSSrp49uc')
321
+ @m.meta_twitter_site.should == "@youtube"
322
+ end
323
+
324
+ it "should find a meta_twitter_player_width" do
325
+ @m = MetaInspector::Parser.new(doc 'http://www.youtube.com/watch?v=iaGSSrp49uc')
326
+ @m.meta_twitter_player_width.should == "1920"
327
+ end
328
+
329
+ it "should not find a meta_twitter_dummy" do
330
+ @m = MetaInspector::Parser.new(doc 'http://www.youtube.com/watch?v=iaGSSrp49uc')
331
+ @m.meta_twitter_dummy.should == nil
332
+ end
333
+
334
+ it "should find a meta_og_video_width" do
335
+ @m = MetaInspector::Parser.new(doc 'http://www.youtube.com/watch?v=iaGSSrp49uc')
336
+ @m.meta_og_video_width.should == "1920"
337
+ end
338
+ end
339
+
340
+ describe 'Charset detection' do
341
+ it "should get the charset from <meta charset />" do
342
+ @m = MetaInspector::Parser.new(doc 'http://charset001.com')
343
+ @m.charset.should == "utf-8"
344
+ end
345
+
346
+ it "should get the charset from meta content type" do
347
+ @m = MetaInspector::Parser.new(doc 'http://charset002.com')
348
+ @m.charset.should == "windows-1252"
349
+ end
350
+
351
+ it "should get nil if no declared charset is found" do
352
+ @m = MetaInspector::Parser.new(doc 'http://charset000.com')
353
+ @m.charset.should == nil
354
+ end
355
+ end
356
+
357
+ describe 'to_hash' do
358
+ it "should return a hash with all the values set" do
359
+ @m = MetaInspector::Parser.new(doc 'http://pagerankalert.com')
360
+ @m.to_hash.should == { "meta" => { "name" => { "description" => "Track your PageRank(TM) changes and receive alerts by email",
361
+ "keywords" => "pagerank, seo, optimization, google",
362
+ "robots" => "all,follow",
363
+ "csrf_param" => "authenticity_token",
364
+ "csrf_token" => "iW1/w+R8zrtDkhOlivkLZ793BN04Kr3X/pS+ixObHsE="},
365
+ "property"=>{}}}
366
+ end
367
+ end
368
+
369
+ private
370
+
371
+ def doc(url, options = {})
372
+ MetaInspector::Document.new(url, options)
373
+ end
374
+ end