metainspector 5.4.3 → 5.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/meta_inspector/document.rb +3 -1
- data/lib/meta_inspector/parser.rb +6 -6
- data/lib/meta_inspector/parsers/texts.rb +33 -0
- data/lib/meta_inspector/version.rb +1 -1
- data/spec/document_spec.rb +2 -0
- data/spec/fixtures/author_in_body.response +21 -0
- data/spec/fixtures/author_in_link.response +21 -0
- data/spec/fixtures/author_in_meta.response +23 -0
- data/spec/fixtures/author_in_twitter.response +23 -0
- data/spec/meta_inspector/texts_spec.rb +44 -0
- data/spec/spec_helper.rb +4 -0
- metadata +5 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e1034aff57cb80cddd6a4c03c7e3ef92da0c6bf
|
4
|
+
data.tar.gz: 44ff18adc7fd7a1bfb7c17034591708eb309c5a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55a2d91009a7158d15109818d945db5133e21223da78f9ccd82c22765e6c4ae018940b50a3cd389c7d57b8d0c522ac282f5c0a3d44b4d1ddbfa4636add0add28
|
7
|
+
data.tar.gz: dab9bff983779b57467dc8db0ba9a0bb0c628abb2cc8fbc2aedbdd77503fd6ca5a882bb3576e951ce847ee4000afd277a94fa21e91364c5b13a72a711d4fc0de
|
data/README.md
CHANGED
@@ -81,6 +81,8 @@ page.feed # Get rss or atom links in meta data fields as array
|
|
81
81
|
```ruby
|
82
82
|
page.title # title of the page from the head section, as string
|
83
83
|
page.best_title # best title of the page, from a selection of candidates
|
84
|
+
page.author # author of the page from the meta author tag
|
85
|
+
page.best_author # best author of the page, from a selection of candidates
|
84
86
|
page.description # returns the meta description
|
85
87
|
page.best_description # returns the first non-empty description between the following candidates: standard meta description, og:description, twitter:description, the first long paragraph
|
86
88
|
```
|
@@ -45,7 +45,7 @@ module MetaInspector
|
|
45
45
|
|
46
46
|
delegate [:content_type, :response] => :@request
|
47
47
|
|
48
|
-
delegate [:parsed, :title, :best_title,
|
48
|
+
delegate [:parsed, :title, :best_title, :author, :best_author,
|
49
49
|
:description, :best_description, :links,
|
50
50
|
:images, :feed, :charset, :meta_tags,
|
51
51
|
:meta_tag, :meta, :favicon,
|
@@ -60,6 +60,8 @@ module MetaInspector
|
|
60
60
|
'root_url' => root_url,
|
61
61
|
'title' => title,
|
62
62
|
'best_title' => best_title,
|
63
|
+
'author' => author,
|
64
|
+
'best_author' => best_author,
|
63
65
|
'description' => description,
|
64
66
|
'best_description' => best_description,
|
65
67
|
'links' => links.to_hash,
|
@@ -21,12 +21,12 @@ module MetaInspector
|
|
21
21
|
end
|
22
22
|
|
23
23
|
extend Forwardable
|
24
|
-
delegate [:url, :scheme, :host]
|
25
|
-
delegate [:meta_tags, :meta_tag, :meta, :charset]
|
26
|
-
delegate [:head_links, :stylesheets, :canonicals, :feed]
|
27
|
-
delegate [:links, :base_url]
|
28
|
-
delegate :images
|
29
|
-
delegate [:title, :best_title, :description, :best_description] => :@texts_parser
|
24
|
+
delegate [:url, :scheme, :host] => :@document
|
25
|
+
delegate [:meta_tags, :meta_tag, :meta, :charset] => :@meta_tag_parser
|
26
|
+
delegate [:head_links, :stylesheets, :canonicals, :feed] => :@head_links_parser
|
27
|
+
delegate [:links, :base_url] => :@links_parser
|
28
|
+
delegate :images => :@images_parser
|
29
|
+
delegate [:title, :best_title, :author, :best_author, :description, :best_description] => :@texts_parser
|
30
30
|
|
31
31
|
# Returns the whole parsed document
|
32
32
|
def parsed
|
@@ -14,6 +14,21 @@ module MetaInspector
|
|
14
14
|
@best_title ||= find_best_title
|
15
15
|
end
|
16
16
|
|
17
|
+
# Returns the meta author, if present
|
18
|
+
def author
|
19
|
+
@author ||= meta['author']
|
20
|
+
end
|
21
|
+
|
22
|
+
# An author getter that returns the first non-nil description
|
23
|
+
# from the following candidates:
|
24
|
+
# - the standard meta description
|
25
|
+
# - a link with the relational attribute "author"
|
26
|
+
# - address tag which may contain the author
|
27
|
+
# - the twitter:creator meta tag for the username
|
28
|
+
def best_author
|
29
|
+
@best_author ||= find_best_author
|
30
|
+
end
|
31
|
+
|
17
32
|
# Returns the meta description, if present
|
18
33
|
def description
|
19
34
|
@description ||= meta['description']
|
@@ -50,6 +65,24 @@ module MetaInspector
|
|
50
65
|
candidates.first
|
51
66
|
end
|
52
67
|
|
68
|
+
def find_best_author
|
69
|
+
candidates = [
|
70
|
+
meta['author'],
|
71
|
+
parsed.css('a[rel="author"]').first,
|
72
|
+
parsed.css('address').first,
|
73
|
+
meta['twitter:creator']
|
74
|
+
]
|
75
|
+
candidates.flatten!
|
76
|
+
candidates.compact!
|
77
|
+
candidates.map! { |c| (c.respond_to? :inner_text) ? c.inner_text : c }
|
78
|
+
candidates.map! { |c| c.strip }
|
79
|
+
return nil if candidates.empty?
|
80
|
+
candidates.map! { |c| c.gsub(/\s+/, ' ') }
|
81
|
+
candidates.uniq!
|
82
|
+
candidates.sort_by! { |t| -t.length }
|
83
|
+
candidates.first
|
84
|
+
end
|
85
|
+
|
53
86
|
def find_best_description
|
54
87
|
candidates = [
|
55
88
|
meta['description'],
|
data/spec/document_spec.rb
CHANGED
@@ -26,6 +26,8 @@ describe MetaInspector::Document do
|
|
26
26
|
"root_url" => "http://pagerankalert.com/",
|
27
27
|
"title" => "PageRankAlert.com :: Track your PageRank changes & receive alerts",
|
28
28
|
"best_title" => "PageRankAlert.com :: Track your PageRank changes & receive alerts",
|
29
|
+
"author" => nil,
|
30
|
+
"best_author" => nil,
|
29
31
|
"description" => "Track your PageRank(TM) changes and receive alerts by email",
|
30
32
|
"best_description"=> "Track your PageRank(TM) changes and receive alerts by email",
|
31
33
|
"favicon" => "http://pagerankalert.com/src/favicon.ico",
|
@@ -0,0 +1,21 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Age: 13
|
3
|
+
Cache-Control: max-age=120
|
4
|
+
Content-Type: text/html
|
5
|
+
Date: Mon, 06 Jan 2014 12:47:42 GMT
|
6
|
+
Expires: Mon, 06 Jan 2014 12:49:28 GMT
|
7
|
+
Server: Apache/2.2.14 (Ubuntu)
|
8
|
+
Vary: Accept-Encoding
|
9
|
+
Via: 1.1 varnish
|
10
|
+
X-Powered-By: PHP/5.3.2-1ubuntu4.22
|
11
|
+
X-Varnish: 1188792404 1188790413
|
12
|
+
Content-Length: 265
|
13
|
+
Connection: keep-alive
|
14
|
+
|
15
|
+
<!DOCTYPE html>
|
16
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
|
17
|
+
<head></head>
|
18
|
+
<body>
|
19
|
+
<address>This author came from the address tag</address>
|
20
|
+
</body>
|
21
|
+
</html>
|
@@ -0,0 +1,21 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Age: 13
|
3
|
+
Cache-Control: max-age=120
|
4
|
+
Content-Type: text/html
|
5
|
+
Date: Mon, 06 Jan 2014 12:47:42 GMT
|
6
|
+
Expires: Mon, 06 Jan 2014 12:49:28 GMT
|
7
|
+
Server: Apache/2.2.14 (Ubuntu)
|
8
|
+
Vary: Accept-Encoding
|
9
|
+
Via: 1.1 varnish
|
10
|
+
X-Powered-By: PHP/5.3.2-1ubuntu4.22
|
11
|
+
X-Varnish: 1188792404 1188790413
|
12
|
+
Content-Length: 265
|
13
|
+
Connection: keep-alive
|
14
|
+
|
15
|
+
<!DOCTYPE html>
|
16
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
|
17
|
+
<head></head>
|
18
|
+
<body>
|
19
|
+
<a rel="author">This author came from a link with the author relational attribute</a>
|
20
|
+
</body>
|
21
|
+
</html>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Age: 13
|
3
|
+
Cache-Control: max-age=120
|
4
|
+
Content-Type: text/html
|
5
|
+
Date: Mon, 06 Jan 2014 12:47:42 GMT
|
6
|
+
Expires: Mon, 06 Jan 2014 12:49:28 GMT
|
7
|
+
Server: Apache/2.2.14 (Ubuntu)
|
8
|
+
Vary: Accept-Encoding
|
9
|
+
Via: 1.1 varnish
|
10
|
+
X-Powered-By: PHP/5.3.2-1ubuntu4.22
|
11
|
+
X-Varnish: 1188792404 1188790413
|
12
|
+
Content-Length: 40571
|
13
|
+
Connection: keep-alive
|
14
|
+
|
15
|
+
<!DOCTYPE html>
|
16
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
|
17
|
+
<head>
|
18
|
+
<meta name="author" content="the author" />
|
19
|
+
</head>
|
20
|
+
<body>
|
21
|
+
<p>A sample page with author in standard meta tag</p>
|
22
|
+
</body>
|
23
|
+
</html>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Age: 13
|
3
|
+
Cache-Control: max-age=120
|
4
|
+
Content-Type: text/html
|
5
|
+
Date: Mon, 06 Jan 2014 12:47:42 GMT
|
6
|
+
Expires: Mon, 06 Jan 2014 12:49:28 GMT
|
7
|
+
Server: Apache/2.2.14 (Ubuntu)
|
8
|
+
Vary: Accept-Encoding
|
9
|
+
Via: 1.1 varnish
|
10
|
+
X-Powered-By: PHP/5.3.2-1ubuntu4.22
|
11
|
+
X-Varnish: 1188792404 1188790413
|
12
|
+
Content-Length: 40571
|
13
|
+
Connection: keep-alive
|
14
|
+
|
15
|
+
<!DOCTYPE html>
|
16
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
|
17
|
+
<head>
|
18
|
+
<meta property="twitter:creator" content="This author came from the twitter creator tag" />
|
19
|
+
</head>
|
20
|
+
<body>
|
21
|
+
<p>A sample page with author in Twitter meta tag</p>
|
22
|
+
</body>
|
23
|
+
</html>
|
@@ -49,6 +49,50 @@ describe MetaInspector do
|
|
49
49
|
end
|
50
50
|
end
|
51
51
|
|
52
|
+
describe '#author' do
|
53
|
+
it "should find author from meta author" do
|
54
|
+
page = MetaInspector.new('http://example.com/author_in_meta')
|
55
|
+
|
56
|
+
expect(page.author).to eq("the author")
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should be nil if no meta author" do
|
60
|
+
page = MetaInspector.new('http://example.com/empty')
|
61
|
+
|
62
|
+
expect(page.author).to be(nil)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe "#best_author" do
|
67
|
+
it "should return the author meta tag content if present" do
|
68
|
+
page = MetaInspector.new('http://example.com/author_in_meta')
|
69
|
+
|
70
|
+
expect(page.best_author).to eq("the author")
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should find a link with the relational attribute author if standard meta tag is not present" do
|
74
|
+
page = MetaInspector.new('http://example.com/author_in_link')
|
75
|
+
expect(page.best_author).to eq("This author came from a link with the author relational attribute")
|
76
|
+
end
|
77
|
+
|
78
|
+
it "should find the address tag if standard meta tag and relational attribute author are not present" do
|
79
|
+
page = MetaInspector.new('http://example.com/author_in_body')
|
80
|
+
expect(page.best_author).to eq("This author came from the address tag")
|
81
|
+
end
|
82
|
+
|
83
|
+
it "should return the twitter creator if address tag not present" do
|
84
|
+
page = MetaInspector.new('http://example.com/author_in_twitter')
|
85
|
+
|
86
|
+
expect(page.best_author).to eq("This author came from the twitter creator tag")
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should return nil if no author information present" do
|
90
|
+
page = MetaInspector.new('http://example.com/empty')
|
91
|
+
|
92
|
+
expect(page.best_author).to be(nil)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
52
96
|
describe '#description' do
|
53
97
|
it "should find description from meta description" do
|
54
98
|
page = MetaInspector.new('http://example.com/desc_in_meta')
|
data/spec/spec_helper.rb
CHANGED
@@ -44,6 +44,10 @@ RSpec.configure do |config|
|
|
44
44
|
stub_request(:get, "http://example.com/title_in_head").to_return(fixture_file("title_in_head.response"))
|
45
45
|
stub_request(:get, "http://example.com/title_in_head_with_whitespace").to_return(fixture_file("title_in_head_with_whitespace.response"))
|
46
46
|
stub_request(:get, "http://example.com/title_not_present").to_return(fixture_file("title_not_present.response"))
|
47
|
+
stub_request(:get, "http://example.com/author_in_meta").to_return(fixture_file("author_in_meta.response"))
|
48
|
+
stub_request(:get, "http://example.com/author_in_body").to_return(fixture_file("author_in_body.response"))
|
49
|
+
stub_request(:get, "http://example.com/author_in_link").to_return(fixture_file("author_in_link.response"))
|
50
|
+
stub_request(:get, "http://example.com/author_in_twitter").to_return(fixture_file("author_in_twitter.response"))
|
47
51
|
stub_request(:get, "http://example.com/~").to_return(fixture_file("example.response"))
|
48
52
|
stub_request(:get, "http://facebook.com/").to_return(fixture_file("facebook.com.response"))
|
49
53
|
stub_request(:get, "http://international.com").to_return(fixture_file("international.response"))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
@@ -262,6 +262,10 @@ files:
|
|
262
262
|
- spec/fixtures/404.response
|
263
263
|
- spec/fixtures/alazan.com.response
|
264
264
|
- spec/fixtures/alazan_websolution.response
|
265
|
+
- spec/fixtures/author_in_body.response
|
266
|
+
- spec/fixtures/author_in_link.response
|
267
|
+
- spec/fixtures/author_in_meta.response
|
268
|
+
- spec/fixtures/author_in_twitter.response
|
265
269
|
- spec/fixtures/broken_head_links.response
|
266
270
|
- spec/fixtures/charset_000.response
|
267
271
|
- spec/fixtures/charset_001.response
|