metainspector 4.3.0 → 4.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +9 -3
- data/lib/meta_inspector/parsers/texts.rb +1 -0
- data/lib/meta_inspector/request.rb +1 -0
- data/lib/meta_inspector/version.rb +1 -1
- data/meta_inspector.gemspec +1 -1
- data/spec/document_spec.rb +18 -18
- data/spec/exception_log_spec.rb +7 -7
- data/spec/fixtures/no-content-type.response +14 -0
- data/spec/fixtures/title_not_present.response +21 -0
- data/spec/meta_inspector/images_spec.rb +28 -28
- data/spec/meta_inspector/links_spec.rb +38 -38
- data/spec/meta_inspector/meta_inspector_spec.rb +1 -1
- data/spec/meta_inspector/meta_tags_spec.rb +9 -9
- data/spec/meta_inspector/redirections_spec.rb +5 -5
- data/spec/meta_inspector/texts_spec.rb +14 -9
- data/spec/parser_spec.rb +2 -2
- data/spec/request_spec.rb +15 -9
- data/spec/spec_helper.rb +4 -1
- data/spec/url_spec.rb +16 -16
- metadata +8 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 521f4a333c421caaad3bcd737aecd9899d4be801
|
4
|
+
data.tar.gz: c97c25d293d3fc8f49a1e39c85d93ac9c0a65ba7
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b0f0c5914c5fe9c9320814c9d77506ccc33a29270fc3c0e2347d6102ad4a4d79f98421a4ca94105ac7cfb3d26c92ef023d4160ffb27eb0ab84bc7f1711e9f648
|
7
|
+
data.tar.gz: ae59d9a9a6c2d142164c744f47f9b2b9f900e46582fd2748bf3cd536ee4502294a384700a1d1d650b24167f4cbb4439c4e24eb395b8fbce3e9fdd8d402f93e1b
|
data/README.md
CHANGED
@@ -406,15 +406,21 @@ $ irb
|
|
406
406
|
=> "/plans-and-pricing"
|
407
407
|
```
|
408
408
|
|
409
|
-
##
|
409
|
+
## Contributing guidelines
|
410
410
|
|
411
|
-
You're welcome to fork this project and send pull requests. Just remember to
|
411
|
+
You're more than welcome to fork this project and send pull requests. Just remember to:
|
412
|
+
|
413
|
+
* Create a topic branch for your changes.
|
414
|
+
* Add specs.
|
415
|
+
* Keep your fake responses as small as possible. For each change in `spec/fixtures`, a comment should be included explaining why it's needed.
|
416
|
+
* Update `version.rb`, following the [semantic versioning convention](http://semver.org/).
|
417
|
+
* Update `README.md` if needed (for example, when you're adding or changing a feature).
|
412
418
|
|
413
419
|
Thanks to all the contributors:
|
414
420
|
|
415
421
|
[https://github.com/jaimeiniesta/metainspector/graphs/contributors](https://github.com/jaimeiniesta/metainspector/graphs/contributors)
|
416
422
|
|
417
|
-
You
|
423
|
+
You can also come to chat with us on our [Gitter room](https://gitter.im/jaimeiniesta/metainspector) and [Google group](https://groups.google.com/forum/#!forum/metainspector).
|
418
424
|
|
419
425
|
## Related projects
|
420
426
|
|
@@ -20,6 +20,7 @@ module MetaInspector
|
|
20
20
|
candidates.flatten!
|
21
21
|
candidates.map! { |c| (c.respond_to? :inner_text) ? c.inner_text : c }
|
22
22
|
candidates.compact!
|
23
|
+
return nil if candidates.empty?
|
23
24
|
candidates.map! { |c| c.gsub(/\s+/, ' ') }
|
24
25
|
candidates.uniq!
|
25
26
|
candidates.sort_by! { |t| -t.length }
|
data/meta_inspector.gemspec
CHANGED
@@ -21,7 +21,7 @@ Gem::Specification.new do |gem|
|
|
21
21
|
gem.add_dependency 'addressable', '~> 2.3.5'
|
22
22
|
gem.add_dependency 'fastimage'
|
23
23
|
|
24
|
-
gem.add_development_dependency 'rspec', '
|
24
|
+
gem.add_development_dependency 'rspec', '~> 3.0'
|
25
25
|
gem.add_development_dependency 'fakeweb', '1.3.0'
|
26
26
|
gem.add_development_dependency 'webmock'
|
27
27
|
gem.add_development_dependency 'awesome_print'
|
data/spec/document_spec.rb
CHANGED
@@ -5,21 +5,21 @@ describe MetaInspector::Document do
|
|
5
5
|
let(:doc) { MetaInspector::Document.new('http://cnn.com/', :document => "<html><head><title>Hello From Passed Html</title><a href='/hello'>Hello link</a></head><body></body></html>") }
|
6
6
|
|
7
7
|
it "should get correct links when the url html is passed as an option" do
|
8
|
-
doc.links.internal.
|
8
|
+
expect(doc.links.internal).to eq(["http://cnn.com/hello"])
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should get the title" do
|
12
|
-
doc.title.
|
12
|
+
expect(doc.title).to eq("Hello From Passed Html")
|
13
13
|
end
|
14
14
|
end
|
15
15
|
|
16
16
|
it "should return a String as to_s" do
|
17
|
-
MetaInspector::Document.new('http://pagerankalert.com').to_s.class.
|
17
|
+
expect(MetaInspector::Document.new('http://pagerankalert.com').to_s.class).to eq(String)
|
18
18
|
end
|
19
19
|
|
20
20
|
it "should return a Hash with all the values set" do
|
21
21
|
doc = MetaInspector::Document.new('http://pagerankalert.com')
|
22
|
-
doc.to_hash.
|
22
|
+
expect(doc.to_hash).to eq({
|
23
23
|
"url" => "http://pagerankalert.com/",
|
24
24
|
"scheme" => "http",
|
25
25
|
"host" => "pagerankalert.com",
|
@@ -71,28 +71,28 @@ describe MetaInspector::Document do
|
|
71
71
|
"via" => "1.1 varnish"
|
72
72
|
}
|
73
73
|
}
|
74
|
-
}
|
74
|
+
})
|
75
75
|
end
|
76
76
|
|
77
77
|
describe 'exception handling' do
|
78
78
|
let(:logger) { MetaInspector::ExceptionLog.new }
|
79
79
|
|
80
80
|
it "should parse images when parse_html_content_type_only is not specified" do
|
81
|
-
logger.
|
81
|
+
expect(logger).not_to receive(:<<)
|
82
82
|
|
83
83
|
image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', exception_log: logger)
|
84
84
|
image_url.title
|
85
85
|
end
|
86
86
|
|
87
87
|
it "should parse images when parse_html_content_type_only is false" do
|
88
|
-
logger.
|
88
|
+
expect(logger).not_to receive(:<<)
|
89
89
|
|
90
90
|
image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: false, exception_log: logger)
|
91
91
|
image_url.title
|
92
92
|
end
|
93
93
|
|
94
94
|
it "should handle errors when content is image/jpeg and html_content_type_only is true" do
|
95
|
-
logger.
|
95
|
+
expect(logger).to receive(:<<).with(an_instance_of(RuntimeError))
|
96
96
|
|
97
97
|
image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: true, exception_log: logger)
|
98
98
|
|
@@ -100,7 +100,7 @@ describe MetaInspector::Document do
|
|
100
100
|
end
|
101
101
|
|
102
102
|
it "should handle errors when content is not text/html and html_content_type_only is true" do
|
103
|
-
logger.
|
103
|
+
expect(logger).to receive(:<<).with(an_instance_of(RuntimeError))
|
104
104
|
|
105
105
|
tar_url = MetaInspector::Document.new('http://pagerankalert.com/file.tar.gz', html_content_only: true, exception_log: logger)
|
106
106
|
|
@@ -114,11 +114,11 @@ describe MetaInspector::Document do
|
|
114
114
|
end
|
115
115
|
|
116
116
|
it 'stores the exceptions' do
|
117
|
-
@bad_request.exceptions.
|
117
|
+
expect(@bad_request.exceptions).not_to be_empty
|
118
118
|
end
|
119
119
|
|
120
120
|
it 'makes ok? to return false' do
|
121
|
-
@bad_request.
|
121
|
+
expect(@bad_request).not_to be_ok
|
122
122
|
end
|
123
123
|
end
|
124
124
|
|
@@ -136,7 +136,7 @@ describe MetaInspector::Document do
|
|
136
136
|
bad_request.title
|
137
137
|
|
138
138
|
$stderr.rewind
|
139
|
-
$stderr.string.chomp.
|
139
|
+
expect($stderr.string.chomp).to eq("The url provided contains image/png content instead of text/html content")
|
140
140
|
end
|
141
141
|
|
142
142
|
it 'does not raise an exception' do
|
@@ -161,8 +161,8 @@ describe MetaInspector::Document do
|
|
161
161
|
expected_headers = {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"}
|
162
162
|
|
163
163
|
headers = {}
|
164
|
-
headers.
|
165
|
-
Faraday::Connection.
|
164
|
+
expect(headers).to receive(:merge!).with(expected_headers)
|
165
|
+
allow_any_instance_of(Faraday::Connection).to receive(:headers){headers}
|
166
166
|
MetaInspector::Document.new(url)
|
167
167
|
end
|
168
168
|
|
@@ -171,19 +171,19 @@ describe MetaInspector::Document do
|
|
171
171
|
headers = {'User-Agent' => 'Mozilla', 'Referer' => 'https://github.com/'}
|
172
172
|
|
173
173
|
headers = {}
|
174
|
-
headers.
|
175
|
-
Faraday::Connection.
|
174
|
+
expect(headers).to receive(:merge!).with(headers)
|
175
|
+
allow_any_instance_of(Faraday::Connection).to receive(:headers){headers}
|
176
176
|
MetaInspector::Document.new(url, headers: headers)
|
177
177
|
end
|
178
178
|
end
|
179
179
|
|
180
180
|
describe 'url normalization' do
|
181
181
|
it 'should normalize by default' do
|
182
|
-
MetaInspector.new('http://example.com/%EF%BD%9E').url.
|
182
|
+
expect(MetaInspector.new('http://example.com/%EF%BD%9E').url).to eq('http://example.com/~')
|
183
183
|
end
|
184
184
|
|
185
185
|
it 'should not normalize if the normalize_url option is false' do
|
186
|
-
MetaInspector.new('http://example.com/%EF%BD%9E', normalize_url: false).url.
|
186
|
+
expect(MetaInspector.new('http://example.com/%EF%BD%9E', normalize_url: false).url).to eq('http://example.com/%EF%BD%9E')
|
187
187
|
end
|
188
188
|
end
|
189
189
|
end
|
data/spec/exception_log_spec.rb
CHANGED
@@ -4,7 +4,7 @@ describe MetaInspector::ExceptionLog do
|
|
4
4
|
|
5
5
|
describe "warn_level" do
|
6
6
|
it "should be :raise by default" do
|
7
|
-
MetaInspector::ExceptionLog.new.warn_level.
|
7
|
+
expect(MetaInspector::ExceptionLog.new.warn_level).to eq(:raise)
|
8
8
|
end
|
9
9
|
|
10
10
|
it "should raise exceptions when warn_level is :raise" do
|
@@ -20,7 +20,7 @@ describe MetaInspector::ExceptionLog do
|
|
20
20
|
logger = MetaInspector::ExceptionLog.new(warn_level: :warn)
|
21
21
|
exception = StandardError.new("an error message")
|
22
22
|
|
23
|
-
logger.
|
23
|
+
expect(logger).to receive(:warn).with(exception)
|
24
24
|
logger << exception
|
25
25
|
end
|
26
26
|
|
@@ -49,28 +49,28 @@ describe MetaInspector::ExceptionLog do
|
|
49
49
|
logger << first
|
50
50
|
logger << second
|
51
51
|
|
52
|
-
logger.exceptions.
|
52
|
+
expect(logger.exceptions).to eq([first, second])
|
53
53
|
end
|
54
54
|
|
55
55
|
describe "ok?" do
|
56
56
|
it "should be true if no exceptions stored" do
|
57
|
-
logger.
|
57
|
+
expect(logger).to be_ok
|
58
58
|
end
|
59
59
|
|
60
60
|
it "should be false if some exception stored" do
|
61
61
|
logger << StandardError.new("some message")
|
62
|
-
logger.
|
62
|
+
expect(logger).not_to be_ok
|
63
63
|
end
|
64
64
|
|
65
65
|
it "should warn about misuse if warn_level is :raise" do
|
66
66
|
logger = MetaInspector::ExceptionLog.new(warn_level: :raise)
|
67
|
-
logger.
|
67
|
+
expect(logger).to receive(:warn).with("ExceptionLog#ok? should only be used when warn_level is :store")
|
68
68
|
logger.ok?
|
69
69
|
end
|
70
70
|
|
71
71
|
it "should warn about misuse if warn_level is :warn" do
|
72
72
|
logger = MetaInspector::ExceptionLog.new(warn_level: :warn)
|
73
|
-
logger.
|
73
|
+
expect(logger).to receive(:warn).with("ExceptionLog#ok? should only be used when warn_level is :store")
|
74
74
|
logger.ok?
|
75
75
|
end
|
76
76
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Server: nginx/0.7.67
|
3
|
+
Date: Fri, 18 Nov 2011 21:46:46 GMT
|
4
|
+
Connection: keep-alive
|
5
|
+
Last-Modified: Mon, 14 Nov 2011 16:53:18 GMT
|
6
|
+
Content-Length: 101
|
7
|
+
|
8
|
+
<html>
|
9
|
+
<head>
|
10
|
+
<title>Thsi page has no content-type</title>
|
11
|
+
</head>
|
12
|
+
<body>
|
13
|
+
</body>
|
14
|
+
</html>
|
@@ -0,0 +1,21 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Age: 13
|
3
|
+
Cache-Control: max-age=120
|
4
|
+
Content-Type: text/html
|
5
|
+
Date: Mon, 06 Jan 2014 12:47:42 GMT
|
6
|
+
Expires: Mon, 06 Jan 2014 12:49:28 GMT
|
7
|
+
Server: Apache/2.2.14 (Ubuntu)
|
8
|
+
Vary: Accept-Encoding
|
9
|
+
Via: 1.1 varnish
|
10
|
+
X-Powered-By: PHP/5.3.2-1ubuntu4.22
|
11
|
+
X-Varnish: 1188792404 1188790413
|
12
|
+
Content-Length: 202
|
13
|
+
Connection: keep-alive
|
14
|
+
|
15
|
+
<!DOCTYPE html>
|
16
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
|
17
|
+
<head></head>
|
18
|
+
<body>
|
19
|
+
<p>A sample page with many types of meta tags</p>
|
20
|
+
</body>
|
21
|
+
</html>
|
@@ -7,39 +7,39 @@ describe MetaInspector do
|
|
7
7
|
let(:page) { MetaInspector.new('https://twitter.com/markupvalidator') }
|
8
8
|
|
9
9
|
it "responds to #length" do
|
10
|
-
page.images.length.
|
10
|
+
expect(page.images.length).to eq(6)
|
11
11
|
end
|
12
12
|
|
13
13
|
it "responds to #size" do
|
14
|
-
page.images.size.
|
14
|
+
expect(page.images.size).to eq(6)
|
15
15
|
end
|
16
16
|
|
17
17
|
it "responds to #each" do
|
18
18
|
c = []
|
19
19
|
page.images.each {|i| c << i}
|
20
|
-
c.length.
|
20
|
+
expect(c.length).to eq(6)
|
21
21
|
end
|
22
22
|
|
23
23
|
it "responds to #sort" do
|
24
|
-
page.images.sort
|
25
|
-
.
|
24
|
+
expect(page.images.sort)
|
25
|
+
.to eq(["https://si0.twimg.com/sticky/default_profile_images/default_profile_6_mini.png",
|
26
26
|
"https://twimg0-a.akamaihd.net/a/1342841381/images/bigger_spinner.gif",
|
27
27
|
"https://twimg0-a.akamaihd.net/profile_images/1538528659/jaime_nov_08_normal.jpg",
|
28
28
|
"https://twimg0-a.akamaihd.net/profile_images/2293774732/v0pgo4xpdd9rou2xq5h0_normal.png",
|
29
29
|
"https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_normal.png",
|
30
|
-
"https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_reasonably_small.png"]
|
30
|
+
"https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_reasonably_small.png"])
|
31
31
|
end
|
32
32
|
|
33
33
|
it "responds to #first" do
|
34
|
-
page.images.first.
|
34
|
+
expect(page.images.first).to eq("https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_reasonably_small.png")
|
35
35
|
end
|
36
36
|
|
37
37
|
it "responds to #last" do
|
38
|
-
page.images.last.
|
38
|
+
expect(page.images.last).to eq("https://twimg0-a.akamaihd.net/a/1342841381/images/bigger_spinner.gif")
|
39
39
|
end
|
40
40
|
|
41
41
|
it "responds to #[]" do
|
42
|
-
page.images[0].
|
42
|
+
expect(page.images[0]).to eq("https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_reasonably_small.png")
|
43
43
|
end
|
44
44
|
|
45
45
|
end
|
@@ -47,26 +47,26 @@ describe MetaInspector do
|
|
47
47
|
it "should find all page images" do
|
48
48
|
page = MetaInspector.new('http://pagerankalert.com')
|
49
49
|
|
50
|
-
page.images.to_a.
|
50
|
+
expect(page.images.to_a).to eq(["http://pagerankalert.com/images/pagerank_alert.png?1305794559"])
|
51
51
|
end
|
52
52
|
|
53
53
|
it "should find images on twitter" do
|
54
54
|
page = MetaInspector.new('https://twitter.com/markupvalidator')
|
55
55
|
|
56
|
-
page.images.length.
|
57
|
-
page.images.to_a.
|
56
|
+
expect(page.images.length).to eq(6)
|
57
|
+
expect(page.images.to_a).to eq(["https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_reasonably_small.png",
|
58
58
|
"https://twimg0-a.akamaihd.net/profile_images/2380086215/fcu46ozay5f5al9kdfvq_normal.png",
|
59
59
|
"https://twimg0-a.akamaihd.net/profile_images/2293774732/v0pgo4xpdd9rou2xq5h0_normal.png",
|
60
60
|
"https://twimg0-a.akamaihd.net/profile_images/1538528659/jaime_nov_08_normal.jpg",
|
61
61
|
"https://si0.twimg.com/sticky/default_profile_images/default_profile_6_mini.png",
|
62
|
-
"https://twimg0-a.akamaihd.net/a/1342841381/images/bigger_spinner.gif"]
|
62
|
+
"https://twimg0-a.akamaihd.net/a/1342841381/images/bigger_spinner.gif"])
|
63
63
|
end
|
64
64
|
|
65
65
|
it "should ignore malformed image tags" do
|
66
66
|
# There is an image tag without a source. The scraper should not fatal.
|
67
67
|
page = MetaInspector.new("http://www.guardian.co.uk/media/pda/2011/sep/15/techcrunch-arrington-startups")
|
68
68
|
|
69
|
-
page.images.size.
|
69
|
+
expect(page.images.size).to eq(11)
|
70
70
|
end
|
71
71
|
end
|
72
72
|
|
@@ -74,19 +74,19 @@ describe MetaInspector do
|
|
74
74
|
it "should find the og image" do
|
75
75
|
page = MetaInspector.new('http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
|
76
76
|
|
77
|
-
page.images.best.
|
77
|
+
expect(page.images.best).to eq("http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg")
|
78
78
|
end
|
79
79
|
|
80
80
|
it "should find image on youtube" do
|
81
81
|
page = MetaInspector.new('http://www.youtube.com/watch?v=iaGSSrp49uc')
|
82
82
|
|
83
|
-
page.images.best.
|
83
|
+
expect(page.images.best).to eq("http://i2.ytimg.com/vi/iaGSSrp49uc/mqdefault.jpg")
|
84
84
|
end
|
85
85
|
|
86
86
|
it "should find image when og:image and twitter:image metatags are missing" do
|
87
87
|
page = MetaInspector.new('http://example.com/largest_image_using_image_size')
|
88
88
|
|
89
|
-
page.images.best.
|
89
|
+
expect(page.images.best).to eq("http://example.com/100x100")
|
90
90
|
end
|
91
91
|
end
|
92
92
|
|
@@ -94,19 +94,19 @@ describe MetaInspector do
|
|
94
94
|
it "should find the og image" do
|
95
95
|
page = MetaInspector.new('http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
|
96
96
|
|
97
|
-
page.images.owner_suggested.
|
97
|
+
expect(page.images.owner_suggested).to eq("http://o.onionstatic.com/images/articles/article/2772/Apple-Claims-600w-R_jpg_130x110_q85.jpg")
|
98
98
|
end
|
99
99
|
|
100
100
|
it "should find image on youtube" do
|
101
101
|
page = MetaInspector.new('http://www.youtube.com/watch?v=iaGSSrp49uc')
|
102
102
|
|
103
|
-
page.images.owner_suggested.
|
103
|
+
expect(page.images.owner_suggested).to eq("http://i2.ytimg.com/vi/iaGSSrp49uc/mqdefault.jpg")
|
104
104
|
end
|
105
105
|
|
106
106
|
it "should return nil when og:image and twitter:image metatags are missing" do
|
107
107
|
page = MetaInspector.new('http://example.com/largest_image_using_image_size')
|
108
108
|
|
109
|
-
page.images.owner_suggested.
|
109
|
+
expect(page.images.owner_suggested).to be nil
|
110
110
|
end
|
111
111
|
end
|
112
112
|
|
@@ -114,19 +114,19 @@ describe MetaInspector do
|
|
114
114
|
it "should find the largest image on the page using html sizes" do
|
115
115
|
page = MetaInspector.new('http://example.com/largest_image_in_html')
|
116
116
|
|
117
|
-
page.images.largest.
|
117
|
+
expect(page.images.largest).to eq("http://example.com/largest")
|
118
118
|
end
|
119
119
|
|
120
120
|
it "should find the largest image on the page using actual image sizes" do
|
121
121
|
page = MetaInspector.new('http://example.com/largest_image_using_image_size')
|
122
122
|
|
123
|
-
page.images.largest.
|
123
|
+
expect(page.images.largest).to eq("http://example.com/100x100")
|
124
124
|
end
|
125
125
|
|
126
126
|
it "should find the largest image without downloading images" do
|
127
127
|
page = MetaInspector.new('http://example.com/largest_image_using_image_size', download_images: false)
|
128
128
|
|
129
|
-
page.images.largest.
|
129
|
+
expect(page.images.largest).to eq("http://example.com/1x1")
|
130
130
|
end
|
131
131
|
end
|
132
132
|
|
@@ -134,31 +134,31 @@ describe MetaInspector do
|
|
134
134
|
it "should get favicon link when marked as icon" do
|
135
135
|
page = MetaInspector.new('http://pagerankalert.com/')
|
136
136
|
|
137
|
-
page.images.favicon.
|
137
|
+
expect(page.images.favicon).to eq('http://pagerankalert.com/src/favicon.ico')
|
138
138
|
end
|
139
139
|
|
140
140
|
it "should get favicon link when marked as shortcut" do
|
141
141
|
page = MetaInspector.new('http://pagerankalert-shortcut.com/')
|
142
142
|
|
143
|
-
page.images.favicon.
|
143
|
+
expect(page.images.favicon).to eq('http://pagerankalert-shortcut.com/src/favicon.ico')
|
144
144
|
end
|
145
145
|
|
146
146
|
it "should get favicon link when marked as shorcut and icon" do
|
147
147
|
page = MetaInspector.new('http://pagerankalert-shortcut-and-icon.com/')
|
148
148
|
|
149
|
-
page.images.favicon.
|
149
|
+
expect(page.images.favicon).to eq('http://pagerankalert-shortcut-and-icon.com/src/favicon.ico')
|
150
150
|
end
|
151
151
|
|
152
152
|
it "should get favicon link when there is also a touch icon" do
|
153
153
|
page = MetaInspector.new('http://pagerankalert-touch-icon.com/')
|
154
154
|
|
155
|
-
page.images.favicon.
|
155
|
+
expect(page.images.favicon).to eq('http://pagerankalert-touch-icon.com/src/favicon.ico')
|
156
156
|
end
|
157
157
|
|
158
158
|
it "should get favicon link of nil" do
|
159
159
|
page = MetaInspector.new('http://www.theonion.com/articles/apple-claims-new-iphone-only-visible-to-most-loyal,2772/')
|
160
160
|
|
161
|
-
page.images.favicon.
|
161
|
+
expect(page.images.favicon).to eq(nil)
|
162
162
|
end
|
163
163
|
end
|
164
164
|
end
|
@@ -5,21 +5,21 @@ describe MetaInspector do
|
|
5
5
|
|
6
6
|
describe '#links' do
|
7
7
|
it 'returns the internal links' do
|
8
|
-
page.links.internal.
|
8
|
+
expect(page.links.internal).to eq([ "http://example.com/",
|
9
9
|
"http://example.com/faqs",
|
10
10
|
"http://example.com/contact",
|
11
|
-
"http://example.com/team.html" ]
|
11
|
+
"http://example.com/team.html" ])
|
12
12
|
end
|
13
13
|
|
14
14
|
it 'returns the external links' do
|
15
|
-
page.links.external.
|
16
|
-
"https://github.com/" ]
|
15
|
+
expect(page.links.external).to eq([ "https://twitter.com/",
|
16
|
+
"https://github.com/" ])
|
17
17
|
end
|
18
18
|
|
19
19
|
it 'returns the non-HTTP links' do
|
20
|
-
page.links.non_http.
|
20
|
+
expect(page.links.non_http).to eq([ "mailto:hello@example.com",
|
21
21
|
"javascript:alert('hi');",
|
22
|
-
"ftp://ftp.example.com/" ]
|
22
|
+
"ftp://ftp.example.com/" ])
|
23
23
|
end
|
24
24
|
end
|
25
25
|
|
@@ -29,81 +29,81 @@ describe MetaInspector do
|
|
29
29
|
end
|
30
30
|
|
31
31
|
it "should get correct absolute links for internal pages" do
|
32
|
-
@m.links.internal.
|
32
|
+
expect(@m.links.internal).to eq([ "http://pagerankalert.com/",
|
33
33
|
"http://pagerankalert.com/es?language=es",
|
34
34
|
"http://pagerankalert.com/users/sign_up",
|
35
|
-
"http://pagerankalert.com/users/sign_in" ]
|
35
|
+
"http://pagerankalert.com/users/sign_in" ])
|
36
36
|
end
|
37
37
|
|
38
38
|
it "should get correct absolute links for external pages" do
|
39
|
-
@m.links.external.
|
39
|
+
expect(@m.links.external).to eq([ "http://pagerankalert.posterous.com/",
|
40
40
|
"http://twitter.com/pagerankalert",
|
41
|
-
"http://twitter.com/share" ]
|
41
|
+
"http://twitter.com/share" ])
|
42
42
|
end
|
43
43
|
|
44
44
|
it "should get correct absolute links, correcting relative links from URL not ending with slash" do
|
45
45
|
m = MetaInspector.new('http://alazan.com/websolution.asp')
|
46
46
|
|
47
|
-
m.links.internal.
|
48
|
-
"http://alazan.com/faqs.asp" ]
|
47
|
+
expect(m.links.internal).to eq([ "http://alazan.com/index.asp",
|
48
|
+
"http://alazan.com/faqs.asp" ])
|
49
49
|
end
|
50
50
|
|
51
51
|
describe "links with international characters" do
|
52
52
|
it "should get correct absolute links, encoding the URLs as needed" do
|
53
53
|
m = MetaInspector.new('http://international.com')
|
54
54
|
|
55
|
-
m.links.internal.
|
55
|
+
expect(m.links.internal).to eq([ "http://international.com/espa%C3%B1a.asp",
|
56
56
|
"http://international.com/roman%C3%A9e",
|
57
57
|
"http://international.com/faqs#cami%C3%B3n",
|
58
58
|
"http://international.com/search?q=cami%C3%B3n",
|
59
59
|
"http://international.com/search?q=espa%C3%B1a#top",
|
60
|
-
"http://international.com/index.php?q=espa%C3%B1a&url=aHR0zZQ==&cntnt01pageid=21"]
|
60
|
+
"http://international.com/index.php?q=espa%C3%B1a&url=aHR0zZQ==&cntnt01pageid=21"])
|
61
61
|
|
62
|
-
m.links.external.
|
62
|
+
expect(m.links.external).to eq([ "http://example.com/espa%C3%B1a.asp",
|
63
63
|
"http://example.com/roman%C3%A9e",
|
64
64
|
"http://example.com/faqs#cami%C3%B3n",
|
65
65
|
"http://example.com/search?q=cami%C3%B3n",
|
66
|
-
"http://example.com/search?q=espa%C3%B1a#top"]
|
66
|
+
"http://example.com/search?q=espa%C3%B1a#top"])
|
67
67
|
end
|
68
68
|
|
69
69
|
describe "internal links" do
|
70
70
|
it "should get correct internal links, encoding the URLs as needed but respecting # and ?" do
|
71
71
|
m = MetaInspector.new('http://international.com')
|
72
|
-
m.links.internal.
|
72
|
+
expect(m.links.internal).to eq([ "http://international.com/espa%C3%B1a.asp",
|
73
73
|
"http://international.com/roman%C3%A9e",
|
74
74
|
"http://international.com/faqs#cami%C3%B3n",
|
75
75
|
"http://international.com/search?q=cami%C3%B3n",
|
76
76
|
"http://international.com/search?q=espa%C3%B1a#top",
|
77
|
-
"http://international.com/index.php?q=espa%C3%B1a&url=aHR0zZQ==&cntnt01pageid=21"]
|
77
|
+
"http://international.com/index.php?q=espa%C3%B1a&url=aHR0zZQ==&cntnt01pageid=21"])
|
78
78
|
end
|
79
79
|
|
80
80
|
it "should not crash when processing malformed hrefs" do
|
81
81
|
m = MetaInspector.new('http://example.com/malformed_href')
|
82
|
-
m.links.internal.
|
82
|
+
expect(m.links.internal).to eq([ "http://example.com/faqs" ])
|
83
83
|
end
|
84
84
|
end
|
85
85
|
|
86
86
|
describe "external links" do
|
87
87
|
it "should get correct external links, encoding the URLs as needed but respecting # and ?" do
|
88
88
|
m = MetaInspector.new('http://international.com')
|
89
|
-
m.links.external.
|
89
|
+
expect(m.links.external).to eq([ "http://example.com/espa%C3%B1a.asp",
|
90
90
|
"http://example.com/roman%C3%A9e",
|
91
91
|
"http://example.com/faqs#cami%C3%B3n",
|
92
92
|
"http://example.com/search?q=cami%C3%B3n",
|
93
|
-
"http://example.com/search?q=espa%C3%B1a#top"]
|
93
|
+
"http://example.com/search?q=espa%C3%B1a#top"])
|
94
94
|
end
|
95
95
|
|
96
96
|
it "should not crash when processing malformed hrefs" do
|
97
97
|
m = MetaInspector.new('http://example.com/malformed_href')
|
98
|
-
m.links.non_http.
|
99
|
-
"javascript://", "mailto:email(at)example.com"]
|
98
|
+
expect(m.links.non_http).to eq(["skype:joeuser?call", "telnet://telnet.cdrom.com", "javascript:alert('ok');",
|
99
|
+
"javascript://", "mailto:email(at)example.com"])
|
100
100
|
end
|
101
101
|
end
|
102
102
|
end
|
103
103
|
|
104
104
|
it "should not crash with links that have weird href values" do
|
105
105
|
m = MetaInspector.new('http://example.com/invalid_href')
|
106
|
-
m.links.non_http.
|
106
|
+
expect(m.links.non_http).to eq(["%3Cp%3Eftp://ftp.cdrom.com", "skype:joeuser?call", "telnet://telnet.cdrom.com"])
|
107
107
|
end
|
108
108
|
end
|
109
109
|
|
@@ -114,7 +114,7 @@ describe MetaInspector do
|
|
114
114
|
end
|
115
115
|
|
116
116
|
it 'should get the relative links' do
|
117
|
-
@m.links.internal.
|
117
|
+
expect(@m.links.internal).to eq(['http://relative.com/about', 'http://relative.com/sitemap'])
|
118
118
|
end
|
119
119
|
end
|
120
120
|
|
@@ -124,7 +124,7 @@ describe MetaInspector do
|
|
124
124
|
end
|
125
125
|
|
126
126
|
it 'should get the relative links' do
|
127
|
-
@m.links.internal.
|
127
|
+
expect(@m.links.internal).to eq(['http://relative.com/about', 'http://relative.com/sitemap'])
|
128
128
|
end
|
129
129
|
end
|
130
130
|
|
@@ -134,7 +134,7 @@ describe MetaInspector do
|
|
134
134
|
end
|
135
135
|
|
136
136
|
it 'should get the relative links' do
|
137
|
-
@m.links.internal.
|
137
|
+
expect(@m.links.internal).to eq(['http://relative.com/company/about', 'http://relative.com/sitemap'])
|
138
138
|
end
|
139
139
|
end
|
140
140
|
end
|
@@ -142,12 +142,12 @@ describe MetaInspector do
|
|
142
142
|
describe 'Relative links with base' do
|
143
143
|
it 'should get the relative links from a document' do
|
144
144
|
m = MetaInspector.new('http://relativewithbase.com/company/page2')
|
145
|
-
m.links.internal.
|
145
|
+
expect(m.links.internal).to eq(['http://relativewithbase.com/about', 'http://relativewithbase.com/sitemap'])
|
146
146
|
end
|
147
147
|
|
148
148
|
it 'should get the relative links from a directory' do
|
149
149
|
m = MetaInspector.new('http://relativewithbase.com/company/page2/')
|
150
|
-
m.links.internal.
|
150
|
+
expect(m.links.internal).to eq(['http://relativewithbase.com/about', 'http://relativewithbase.com/sitemap'])
|
151
151
|
end
|
152
152
|
end
|
153
153
|
|
@@ -157,13 +157,13 @@ describe MetaInspector do
|
|
157
157
|
end
|
158
158
|
|
159
159
|
it "should get the links" do
|
160
|
-
@m.links.non_http.sort.
|
160
|
+
expect(@m.links.non_http.sort).to eq([
|
161
161
|
"ftp://ftp.cdrom.com/",
|
162
162
|
"javascript:alert('hey');",
|
163
163
|
"mailto:user@example.com",
|
164
164
|
"skype:joeuser?call",
|
165
165
|
"telnet://telnet.cdrom.com"
|
166
|
-
]
|
166
|
+
])
|
167
167
|
end
|
168
168
|
end
|
169
169
|
|
@@ -174,30 +174,30 @@ describe MetaInspector do
|
|
174
174
|
end
|
175
175
|
|
176
176
|
it "should convert protocol-relative links to http" do
|
177
|
-
@m_http.links.internal.
|
178
|
-
@m_http.links.external.
|
177
|
+
expect(@m_http.links.internal).to include('http://protocol-relative.com/contact')
|
178
|
+
expect(@m_http.links.external).to include('http://yahoo.com/')
|
179
179
|
end
|
180
180
|
|
181
181
|
it "should convert protocol-relative links to https" do
|
182
|
-
@m_https.links.internal.
|
183
|
-
@m_https.links.external.
|
182
|
+
expect(@m_https.links.internal).to include('https://protocol-relative.com/contact')
|
183
|
+
expect(@m_https.links.external).to include('https://yahoo.com/')
|
184
184
|
end
|
185
185
|
end
|
186
186
|
|
187
187
|
describe "Feed" do
|
188
188
|
it "should get rss feed" do
|
189
189
|
@m = MetaInspector.new('http://www.iteh.at')
|
190
|
-
@m.feed.
|
190
|
+
expect(@m.feed).to eq('http://www.iteh.at/de/rss/')
|
191
191
|
end
|
192
192
|
|
193
193
|
it "should get atom feed" do
|
194
194
|
@m = MetaInspector.new('http://www.tea-tron.com/jbravo/blog/')
|
195
|
-
@m.feed.
|
195
|
+
expect(@m.feed).to eq('http://www.tea-tron.com/jbravo/blog/feed/')
|
196
196
|
end
|
197
197
|
|
198
198
|
it "should return nil if no feed found" do
|
199
199
|
@m = MetaInspector.new('http://www.alazan.com')
|
200
|
-
@m.feed.
|
200
|
+
expect(@m.feed).to eq(nil)
|
201
201
|
end
|
202
202
|
end
|
203
203
|
end
|
@@ -6,7 +6,7 @@ describe MetaInspector do
|
|
6
6
|
let(:page) { MetaInspector.new('http://example.com/meta-tags') }
|
7
7
|
|
8
8
|
it "#meta_tags" do
|
9
|
-
page.meta_tags.
|
9
|
+
expect(page.meta_tags).to eq({
|
10
10
|
'name' => {
|
11
11
|
'keywords' => ['one, two, three'],
|
12
12
|
'description' => ['the description'],
|
@@ -33,11 +33,11 @@ describe MetaInspector do
|
|
33
33
|
},
|
34
34
|
|
35
35
|
'charset' => ['UTF-8']
|
36
|
-
}
|
36
|
+
})
|
37
37
|
end
|
38
38
|
|
39
39
|
it "#meta_tag" do
|
40
|
-
page.meta_tag.
|
40
|
+
expect(page.meta_tag).to eq({
|
41
41
|
'name' => {
|
42
42
|
'keywords' => 'one, two, three',
|
43
43
|
'description' => 'the description',
|
@@ -62,11 +62,11 @@ describe MetaInspector do
|
|
62
62
|
},
|
63
63
|
|
64
64
|
'charset' => 'UTF-8'
|
65
|
-
}
|
65
|
+
})
|
66
66
|
end
|
67
67
|
|
68
68
|
it "#meta" do
|
69
|
-
page.meta.
|
69
|
+
expect(page.meta).to eq({
|
70
70
|
'keywords' => 'one, two, three',
|
71
71
|
'description' => 'the description',
|
72
72
|
'author' => 'Joe Sample',
|
@@ -82,7 +82,7 @@ describe MetaInspector do
|
|
82
82
|
'og:image:width' => '300',
|
83
83
|
'og:image:height' => '300',
|
84
84
|
'charset' => 'UTF-8'
|
85
|
-
}
|
85
|
+
})
|
86
86
|
end
|
87
87
|
end
|
88
88
|
|
@@ -90,19 +90,19 @@ describe MetaInspector do
|
|
90
90
|
it "should get the charset from <meta charset />" do
|
91
91
|
page = MetaInspector.new('http://charset001.com')
|
92
92
|
|
93
|
-
page.charset.
|
93
|
+
expect(page.charset).to eq("utf-8")
|
94
94
|
end
|
95
95
|
|
96
96
|
it "should get the charset from meta content type" do
|
97
97
|
page = MetaInspector.new('http://charset002.com')
|
98
98
|
|
99
|
-
page.charset.
|
99
|
+
expect(page.charset).to eq("windows-1252")
|
100
100
|
end
|
101
101
|
|
102
102
|
it "should get nil if no declared charset is found" do
|
103
103
|
page = MetaInspector.new('http://charset000.com')
|
104
104
|
|
105
|
-
page.charset.
|
105
|
+
expect(page.charset).to eq(nil)
|
106
106
|
end
|
107
107
|
end
|
108
108
|
end
|
@@ -8,17 +8,17 @@ describe MetaInspector do
|
|
8
8
|
it "disallows redirections" do
|
9
9
|
page = MetaInspector.new("http://facebook.com", :allow_redirections => false)
|
10
10
|
|
11
|
-
page.url.
|
11
|
+
expect(page.url).to eq("http://facebook.com/")
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
15
15
|
context "when redirections are on (default)" do
|
16
16
|
it "allows follows redirections" do
|
17
|
-
logger.
|
17
|
+
expect(logger).not_to receive(:<<)
|
18
18
|
|
19
19
|
page = MetaInspector.new("http://facebook.com", exception_log: logger)
|
20
20
|
|
21
|
-
page.url.
|
21
|
+
expect(page.url).to eq("https://www.facebook.com/")
|
22
22
|
end
|
23
23
|
end
|
24
24
|
|
@@ -37,11 +37,11 @@ describe MetaInspector do
|
|
37
37
|
stub_request(:get, "http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1")
|
38
38
|
.with(:headers => {"Cookie" => "EMETA_COOKIE_CHECK=1"})
|
39
39
|
|
40
|
-
logger.
|
40
|
+
expect(logger).not_to receive(:<<)
|
41
41
|
|
42
42
|
page = MetaInspector.new("http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/", exception_log: logger)
|
43
43
|
|
44
|
-
page.url.
|
44
|
+
expect(page.url).to eq("http://blogs.clarionledger.com/dechols/2014/03/24/digital-medicine/?nclick_check=1")
|
45
45
|
end
|
46
46
|
end
|
47
47
|
end
|
@@ -3,38 +3,43 @@ require 'spec_helper'
|
|
3
3
|
describe MetaInspector do
|
4
4
|
it "should get the title from the head section" do
|
5
5
|
page = MetaInspector.new('http://example.com')
|
6
|
-
page.title.
|
6
|
+
expect(page.title).to eq('An example page')
|
7
7
|
end
|
8
8
|
|
9
9
|
describe '#best_title' do
|
10
10
|
it "should find 'head title' when that's the only thing" do
|
11
11
|
page = MetaInspector.new('http://example.com/title_in_head')
|
12
|
-
page.best_title.
|
12
|
+
expect(page.best_title).to eq('This title came from the head')
|
13
13
|
end
|
14
14
|
|
15
15
|
it "should find 'body title' when that's the only thing" do
|
16
16
|
page = MetaInspector.new('http://example.com/title_in_body')
|
17
|
-
page.best_title.
|
17
|
+
expect(page.best_title).to eq('This title came from the body, not the head')
|
18
18
|
end
|
19
19
|
|
20
20
|
it "should find 'og:title' when that's the only thing" do
|
21
21
|
page = MetaInspector.new('http://example.com/meta-tags')
|
22
|
-
page.best_title.
|
22
|
+
expect(page.best_title).to eq('An OG title')
|
23
23
|
end
|
24
24
|
|
25
25
|
it "should find the first <h1> when that's the only thing" do
|
26
26
|
page = MetaInspector.new('http://example.com/title_in_h1')
|
27
|
-
page.best_title.
|
27
|
+
expect(page.best_title).to eq('This title came from the first h1')
|
28
28
|
end
|
29
29
|
|
30
30
|
it "should choose the longest candidate from the available options" do
|
31
31
|
page = MetaInspector.new('http://example.com/title_best_choice')
|
32
|
-
page.best_title.
|
32
|
+
expect(page.best_title).to eq('This title came from the first h1 and should be the longest of them all, so should be chosen')
|
33
33
|
end
|
34
34
|
|
35
35
|
it "should strip leading and trailing whitespace and all line breaks" do
|
36
36
|
page = MetaInspector.new('http://example.com/title_in_head_with_whitespace')
|
37
|
-
page.best_title.
|
37
|
+
expect(page.best_title).to eq('This title came from the head and has leading and trailing whitespace')
|
38
|
+
end
|
39
|
+
|
40
|
+
it "should return nil if none of the candidates are present" do
|
41
|
+
page = MetaInspector.new('http://example.com/title_not_present')
|
42
|
+
expect(page.best_title).to be(nil)
|
38
43
|
end
|
39
44
|
|
40
45
|
end
|
@@ -43,12 +48,12 @@ describe MetaInspector do
|
|
43
48
|
it "should find description from meta description" do
|
44
49
|
page = MetaInspector.new('http://www.youtube.com/watch?v=iaGSSrp49uc')
|
45
50
|
|
46
|
-
page.description.
|
51
|
+
expect(page.description).to eq("This is Youtube")
|
47
52
|
end
|
48
53
|
|
49
54
|
it "should find a secondary description if no meta description" do
|
50
55
|
page = MetaInspector.new('http://theonion-no-description.com')
|
51
|
-
page.description.
|
56
|
+
expect(page.description).to eq("SAN FRANCISCO—In a move expected to revolutionize the mobile device industry, Apple launched its fastest and most powerful iPhone to date Tuesday, an innovative new model that can only be seen by the company's hippest and most dedicated customers. This is secondary text picked up because of a missing meta description.")
|
52
57
|
end
|
53
58
|
end
|
54
59
|
end
|
data/spec/parser_spec.rb
CHANGED
@@ -5,10 +5,10 @@ describe MetaInspector::Parser do
|
|
5
5
|
let(:parser) { MetaInspector::Parser.new(doc) }
|
6
6
|
|
7
7
|
it "should have a Nokogiri::HTML::Document as parsed" do
|
8
|
-
parser.parsed.class.
|
8
|
+
expect(parser.parsed.class).to eq(Nokogiri::HTML::Document)
|
9
9
|
end
|
10
10
|
|
11
11
|
it "should return the document as a string" do
|
12
|
-
parser.to_s.class.
|
12
|
+
expect(parser.to_s.class).to eq(String)
|
13
13
|
end
|
14
14
|
end
|
data/spec/request_spec.rb
CHANGED
@@ -6,24 +6,24 @@ describe MetaInspector::Request do
|
|
6
6
|
it "should return the content of the page" do
|
7
7
|
page_request = MetaInspector::Request.new(url('http://pagerankalert.com'))
|
8
8
|
|
9
|
-
page_request.read[0..14].
|
9
|
+
expect(page_request.read[0..14]).to eq("<!DOCTYPE html>")
|
10
10
|
end
|
11
11
|
end
|
12
12
|
|
13
13
|
describe "response" do
|
14
14
|
it "contains the response status" do
|
15
15
|
page_request = MetaInspector::Request.new(url('http://example.com'))
|
16
|
-
page_request.response.status.
|
16
|
+
expect(page_request.response.status).to eq(200)
|
17
17
|
end
|
18
18
|
|
19
19
|
it "contains the response headers" do
|
20
20
|
page_request = MetaInspector::Request.new(url('http://example.com'))
|
21
|
-
page_request.response.headers
|
22
|
-
.
|
21
|
+
expect(page_request.response.headers)
|
22
|
+
.to eq({"server"=>"nginx/0.7.67", "date"=>"Fri, 18 Nov 2011 21:46:46 GMT",
|
23
23
|
"content-type"=>"text/html", "connection"=>"keep-alive",
|
24
24
|
"last-modified"=>"Mon, 14 Nov 2011 16:53:18 GMT",
|
25
25
|
"content-length"=>"4987", "x-varnish"=>"2000423390",
|
26
|
-
"age"=>"0", "via"=>"1.1 varnish"}
|
26
|
+
"age"=>"0", "via"=>"1.1 varnish"})
|
27
27
|
end
|
28
28
|
end
|
29
29
|
|
@@ -31,13 +31,19 @@ describe MetaInspector::Request do
|
|
31
31
|
it "should return the correct content type of the url for html pages" do
|
32
32
|
page_request = MetaInspector::Request.new(url('http://pagerankalert.com'))
|
33
33
|
|
34
|
-
page_request.content_type.
|
34
|
+
expect(page_request.content_type).to eq("text/html")
|
35
35
|
end
|
36
36
|
|
37
37
|
it "should return the correct content type of the url for non html pages" do
|
38
38
|
image_request = MetaInspector::Request.new(url('http://pagerankalert.com/image.png'))
|
39
39
|
|
40
|
-
image_request.content_type.
|
40
|
+
expect(image_request.content_type).to eq("image/png")
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should return nil if there is not content type present" do
|
44
|
+
request = MetaInspector::Request.new(url('http://example.com/no-content-type'))
|
45
|
+
|
46
|
+
expect(request.content_type).to be(nil)
|
41
47
|
end
|
42
48
|
end
|
43
49
|
|
@@ -53,8 +59,8 @@ describe MetaInspector::Request do
|
|
53
59
|
end
|
54
60
|
|
55
61
|
it "should handle socket errors" do
|
56
|
-
TCPSocket.
|
57
|
-
logger.
|
62
|
+
allow(TCPSocket).to receive(:open).and_raise(SocketError)
|
63
|
+
expect(logger).to receive(:<<).with(an_instance_of(Faraday::Error::ConnectionFailed))
|
58
64
|
|
59
65
|
MetaInspector::Request.new(url('http://caca232dsdsaer3sdsd-asd343.org'), exception_log: logger)
|
60
66
|
end
|
data/spec/spec_helper.rb
CHANGED
@@ -16,7 +16,6 @@ end
|
|
16
16
|
RSpec.configure do |config|
|
17
17
|
config.filter_run focus: true
|
18
18
|
config.run_all_when_everything_filtered = true
|
19
|
-
config.treat_symbols_as_metadata_keys_with_true_values = true #rspec 3 default
|
20
19
|
end
|
21
20
|
|
22
21
|
#######################
|
@@ -31,6 +30,9 @@ FakeWeb.register_uri(:get, "http://example.com/", :response => fixture_file("exa
|
|
31
30
|
# Used to test response status codes
|
32
31
|
FakeWeb.register_uri(:get, "http://example.com/404", :response => fixture_file("404.response"))
|
33
32
|
|
33
|
+
# Used to test headers
|
34
|
+
FakeWeb.register_uri(:get, "http://example.com/no-content-type", :response => fixture_file("no-content-type.response"))
|
35
|
+
|
34
36
|
# Used to test largest image in page logic
|
35
37
|
FakeWeb.register_uri(:get, "http://example.com/largest_image_in_html", :response => fixture_file("largest_image_in_html.response"))
|
36
38
|
FakeWeb.register_uri(:get, "http://example.com/largest_image_using_image_size", :response => fixture_file("largest_image_using_image_size.response"))
|
@@ -43,6 +45,7 @@ FakeWeb.register_uri(:get, "http://example.com/title_in_body", :response => fixt
|
|
43
45
|
FakeWeb.register_uri(:get, "http://example.com/title_in_h1", :response => fixture_file("title_in_h1.response"))
|
44
46
|
FakeWeb.register_uri(:get, "http://example.com/title_best_choice", :response => fixture_file("title_best_choice.response"))
|
45
47
|
FakeWeb.register_uri(:get, "http://example.com/title_in_head_with_whitespace", :response => fixture_file("title_in_head_with_whitespace.response"))
|
48
|
+
FakeWeb.register_uri(:get, "http://example.com/title_not_present", :response => fixture_file("title_not_present.response"))
|
46
49
|
|
47
50
|
# These are older fixtures
|
48
51
|
FakeWeb.register_uri(:get, "http://pagerankalert.com", :response => fixture_file("pagerankalert.com.response"))
|
data/spec/url_spec.rb
CHANGED
@@ -2,38 +2,38 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe MetaInspector::URL do
|
4
4
|
it "should normalize URLs" do
|
5
|
-
MetaInspector::URL.new('http://example.com').url.
|
5
|
+
expect(MetaInspector::URL.new('http://example.com').url).to eq('http://example.com/')
|
6
6
|
end
|
7
7
|
|
8
8
|
it 'should accept an URL with a scheme' do
|
9
|
-
MetaInspector::URL.new('http://example.com/').url.
|
9
|
+
expect(MetaInspector::URL.new('http://example.com/').url).to eq('http://example.com/')
|
10
10
|
end
|
11
11
|
|
12
12
|
it "should use http:// as a default scheme" do
|
13
|
-
MetaInspector::URL.new('example.com').url.
|
13
|
+
expect(MetaInspector::URL.new('example.com').url).to eq('http://example.com/')
|
14
14
|
end
|
15
15
|
|
16
16
|
it "should accept an URL with international characters" do
|
17
|
-
MetaInspector::URL.new('http://international.com/olé').url.
|
17
|
+
expect(MetaInspector::URL.new('http://international.com/olé').url).to eq('http://international.com/ol%C3%A9')
|
18
18
|
end
|
19
19
|
|
20
20
|
it "should return the scheme" do
|
21
|
-
MetaInspector::URL.new('http://example.com').scheme.
|
22
|
-
MetaInspector::URL.new('https://example.com').scheme.
|
23
|
-
MetaInspector::URL.new('example.com').scheme.
|
21
|
+
expect(MetaInspector::URL.new('http://example.com').scheme).to eq('http')
|
22
|
+
expect(MetaInspector::URL.new('https://example.com').scheme).to eq('https')
|
23
|
+
expect(MetaInspector::URL.new('example.com').scheme).to eq('http')
|
24
24
|
end
|
25
25
|
|
26
26
|
it "should return the host" do
|
27
|
-
MetaInspector::URL.new('http://example.com').host.
|
28
|
-
MetaInspector::URL.new('https://example.com').host.
|
29
|
-
MetaInspector::URL.new('example.com').host.
|
27
|
+
expect(MetaInspector::URL.new('http://example.com').host).to eq('example.com')
|
28
|
+
expect(MetaInspector::URL.new('https://example.com').host).to eq('example.com')
|
29
|
+
expect(MetaInspector::URL.new('example.com').host).to eq('example.com')
|
30
30
|
end
|
31
31
|
|
32
32
|
it "should return the root url" do
|
33
|
-
MetaInspector::URL.new('http://example.com').root_url.
|
34
|
-
MetaInspector::URL.new('https://example.com').root_url.
|
35
|
-
MetaInspector::URL.new('example.com').root_url.
|
36
|
-
MetaInspector::URL.new('http://example.com/faqs').root_url.
|
33
|
+
expect(MetaInspector::URL.new('http://example.com').root_url).to eq('http://example.com/')
|
34
|
+
expect(MetaInspector::URL.new('https://example.com').root_url).to eq('https://example.com/')
|
35
|
+
expect(MetaInspector::URL.new('example.com').root_url).to eq('http://example.com/')
|
36
|
+
expect(MetaInspector::URL.new('http://example.com/faqs').root_url).to eq('http://example.com/')
|
37
37
|
end
|
38
38
|
|
39
39
|
describe "url=" do
|
@@ -41,14 +41,14 @@ describe MetaInspector::URL do
|
|
41
41
|
url = MetaInspector::URL.new('http://first.com/')
|
42
42
|
|
43
43
|
url.url = 'http://second.com/'
|
44
|
-
url.url.
|
44
|
+
expect(url.url).to eq('http://second.com/')
|
45
45
|
end
|
46
46
|
|
47
47
|
it "should add the missing scheme and normalize" do
|
48
48
|
url = MetaInspector::URL.new('http://first.com/')
|
49
49
|
|
50
50
|
url.url = 'second.com'
|
51
|
-
url.url.
|
51
|
+
expect(url.url).to eq('http://second.com/')
|
52
52
|
end
|
53
53
|
end
|
54
54
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.3.
|
4
|
+
version: 4.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-01-
|
11
|
+
date: 2015-01-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -98,16 +98,16 @@ dependencies:
|
|
98
98
|
name: rspec
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
|
-
- -
|
101
|
+
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version:
|
103
|
+
version: '3.0'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
|
-
- -
|
108
|
+
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version:
|
110
|
+
version: '3.0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: fakeweb
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -278,6 +278,7 @@ files:
|
|
278
278
|
- spec/fixtures/malformed_href.response
|
279
279
|
- spec/fixtures/markupvalidator_faqs.response
|
280
280
|
- spec/fixtures/meta_tags.response
|
281
|
+
- spec/fixtures/no-content-type.response
|
281
282
|
- spec/fixtures/nonhttp.response
|
282
283
|
- spec/fixtures/pagerankalert-shortcut-and-icon.com.response
|
283
284
|
- spec/fixtures/pagerankalert-shortcut.com.response
|
@@ -294,6 +295,7 @@ files:
|
|
294
295
|
- spec/fixtures/title_in_h1.response
|
295
296
|
- spec/fixtures/title_in_head.response
|
296
297
|
- spec/fixtures/title_in_head_with_whitespace.response
|
298
|
+
- spec/fixtures/title_not_present.response
|
297
299
|
- spec/fixtures/twitter_markupvalidator.response
|
298
300
|
- spec/fixtures/unsafe_facebook.com.response
|
299
301
|
- spec/fixtures/unsafe_https.facebook.com.response
|