metainspector 5.4.3 → 5.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -0
- data/lib/meta_inspector/document.rb +3 -1
- data/lib/meta_inspector/parser.rb +6 -6
- data/lib/meta_inspector/parsers/texts.rb +33 -0
- data/lib/meta_inspector/version.rb +1 -1
- data/spec/document_spec.rb +2 -0
- data/spec/fixtures/author_in_body.response +21 -0
- data/spec/fixtures/author_in_link.response +21 -0
- data/spec/fixtures/author_in_meta.response +23 -0
- data/spec/fixtures/author_in_twitter.response +23 -0
- data/spec/meta_inspector/texts_spec.rb +44 -0
- data/spec/spec_helper.rb +4 -0
- metadata +5 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2e1034aff57cb80cddd6a4c03c7e3ef92da0c6bf
|
4
|
+
data.tar.gz: 44ff18adc7fd7a1bfb7c17034591708eb309c5a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 55a2d91009a7158d15109818d945db5133e21223da78f9ccd82c22765e6c4ae018940b50a3cd389c7d57b8d0c522ac282f5c0a3d44b4d1ddbfa4636add0add28
|
7
|
+
data.tar.gz: dab9bff983779b57467dc8db0ba9a0bb0c628abb2cc8fbc2aedbdd77503fd6ca5a882bb3576e951ce847ee4000afd277a94fa21e91364c5b13a72a711d4fc0de
|
data/README.md
CHANGED
@@ -81,6 +81,8 @@ page.feed # Get rss or atom links in meta data fields as array
|
|
81
81
|
```ruby
|
82
82
|
page.title # title of the page from the head section, as string
|
83
83
|
page.best_title # best title of the page, from a selection of candidates
|
84
|
+
page.author # author of the page from the meta author tag
|
85
|
+
page.best_author # best author of the page, from a selection of candidates
|
84
86
|
page.description # returns the meta description
|
85
87
|
page.best_description # returns the first non-empty description between the following candidates: standard meta description, og:description, twitter:description, the first long paragraph
|
86
88
|
```
|
@@ -45,7 +45,7 @@ module MetaInspector
|
|
45
45
|
|
46
46
|
delegate [:content_type, :response] => :@request
|
47
47
|
|
48
|
-
delegate [:parsed, :title, :best_title,
|
48
|
+
delegate [:parsed, :title, :best_title, :author, :best_author,
|
49
49
|
:description, :best_description, :links,
|
50
50
|
:images, :feed, :charset, :meta_tags,
|
51
51
|
:meta_tag, :meta, :favicon,
|
@@ -60,6 +60,8 @@ module MetaInspector
|
|
60
60
|
'root_url' => root_url,
|
61
61
|
'title' => title,
|
62
62
|
'best_title' => best_title,
|
63
|
+
'author' => author,
|
64
|
+
'best_author' => best_author,
|
63
65
|
'description' => description,
|
64
66
|
'best_description' => best_description,
|
65
67
|
'links' => links.to_hash,
|
@@ -21,12 +21,12 @@ module MetaInspector
|
|
21
21
|
end
|
22
22
|
|
23
23
|
extend Forwardable
|
24
|
-
delegate [:url, :scheme, :host]
|
25
|
-
delegate [:meta_tags, :meta_tag, :meta, :charset]
|
26
|
-
delegate [:head_links, :stylesheets, :canonicals, :feed]
|
27
|
-
delegate [:links, :base_url]
|
28
|
-
delegate :images
|
29
|
-
delegate [:title, :best_title, :description, :best_description] => :@texts_parser
|
24
|
+
delegate [:url, :scheme, :host] => :@document
|
25
|
+
delegate [:meta_tags, :meta_tag, :meta, :charset] => :@meta_tag_parser
|
26
|
+
delegate [:head_links, :stylesheets, :canonicals, :feed] => :@head_links_parser
|
27
|
+
delegate [:links, :base_url] => :@links_parser
|
28
|
+
delegate :images => :@images_parser
|
29
|
+
delegate [:title, :best_title, :author, :best_author, :description, :best_description] => :@texts_parser
|
30
30
|
|
31
31
|
# Returns the whole parsed document
|
32
32
|
def parsed
|
@@ -14,6 +14,21 @@ module MetaInspector
|
|
14
14
|
@best_title ||= find_best_title
|
15
15
|
end
|
16
16
|
|
17
|
+
# Returns the meta author, if present
|
18
|
+
def author
|
19
|
+
@author ||= meta['author']
|
20
|
+
end
|
21
|
+
|
22
|
+
# An author getter that returns the first non-nil description
|
23
|
+
# from the following candidates:
|
24
|
+
# - the standard meta description
|
25
|
+
# - a link with the relational attribute "author"
|
26
|
+
# - address tag which may contain the author
|
27
|
+
# - the twitter:creator meta tag for the username
|
28
|
+
def best_author
|
29
|
+
@best_author ||= find_best_author
|
30
|
+
end
|
31
|
+
|
17
32
|
# Returns the meta description, if present
|
18
33
|
def description
|
19
34
|
@description ||= meta['description']
|
@@ -50,6 +65,24 @@ module MetaInspector
|
|
50
65
|
candidates.first
|
51
66
|
end
|
52
67
|
|
68
|
+
def find_best_author
|
69
|
+
candidates = [
|
70
|
+
meta['author'],
|
71
|
+
parsed.css('a[rel="author"]').first,
|
72
|
+
parsed.css('address').first,
|
73
|
+
meta['twitter:creator']
|
74
|
+
]
|
75
|
+
candidates.flatten!
|
76
|
+
candidates.compact!
|
77
|
+
candidates.map! { |c| (c.respond_to? :inner_text) ? c.inner_text : c }
|
78
|
+
candidates.map! { |c| c.strip }
|
79
|
+
return nil if candidates.empty?
|
80
|
+
candidates.map! { |c| c.gsub(/\s+/, ' ') }
|
81
|
+
candidates.uniq!
|
82
|
+
candidates.sort_by! { |t| -t.length }
|
83
|
+
candidates.first
|
84
|
+
end
|
85
|
+
|
53
86
|
def find_best_description
|
54
87
|
candidates = [
|
55
88
|
meta['description'],
|
data/spec/document_spec.rb
CHANGED
@@ -26,6 +26,8 @@ describe MetaInspector::Document do
|
|
26
26
|
"root_url" => "http://pagerankalert.com/",
|
27
27
|
"title" => "PageRankAlert.com :: Track your PageRank changes & receive alerts",
|
28
28
|
"best_title" => "PageRankAlert.com :: Track your PageRank changes & receive alerts",
|
29
|
+
"author" => nil,
|
30
|
+
"best_author" => nil,
|
29
31
|
"description" => "Track your PageRank(TM) changes and receive alerts by email",
|
30
32
|
"best_description"=> "Track your PageRank(TM) changes and receive alerts by email",
|
31
33
|
"favicon" => "http://pagerankalert.com/src/favicon.ico",
|
@@ -0,0 +1,21 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Age: 13
|
3
|
+
Cache-Control: max-age=120
|
4
|
+
Content-Type: text/html
|
5
|
+
Date: Mon, 06 Jan 2014 12:47:42 GMT
|
6
|
+
Expires: Mon, 06 Jan 2014 12:49:28 GMT
|
7
|
+
Server: Apache/2.2.14 (Ubuntu)
|
8
|
+
Vary: Accept-Encoding
|
9
|
+
Via: 1.1 varnish
|
10
|
+
X-Powered-By: PHP/5.3.2-1ubuntu4.22
|
11
|
+
X-Varnish: 1188792404 1188790413
|
12
|
+
Content-Length: 265
|
13
|
+
Connection: keep-alive
|
14
|
+
|
15
|
+
<!DOCTYPE html>
|
16
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
|
17
|
+
<head></head>
|
18
|
+
<body>
|
19
|
+
<address>This author came from the address tag</address>
|
20
|
+
</body>
|
21
|
+
</html>
|
@@ -0,0 +1,21 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Age: 13
|
3
|
+
Cache-Control: max-age=120
|
4
|
+
Content-Type: text/html
|
5
|
+
Date: Mon, 06 Jan 2014 12:47:42 GMT
|
6
|
+
Expires: Mon, 06 Jan 2014 12:49:28 GMT
|
7
|
+
Server: Apache/2.2.14 (Ubuntu)
|
8
|
+
Vary: Accept-Encoding
|
9
|
+
Via: 1.1 varnish
|
10
|
+
X-Powered-By: PHP/5.3.2-1ubuntu4.22
|
11
|
+
X-Varnish: 1188792404 1188790413
|
12
|
+
Content-Length: 265
|
13
|
+
Connection: keep-alive
|
14
|
+
|
15
|
+
<!DOCTYPE html>
|
16
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
|
17
|
+
<head></head>
|
18
|
+
<body>
|
19
|
+
<a rel="author">This author came from a link with the author relational attribute</a>
|
20
|
+
</body>
|
21
|
+
</html>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Age: 13
|
3
|
+
Cache-Control: max-age=120
|
4
|
+
Content-Type: text/html
|
5
|
+
Date: Mon, 06 Jan 2014 12:47:42 GMT
|
6
|
+
Expires: Mon, 06 Jan 2014 12:49:28 GMT
|
7
|
+
Server: Apache/2.2.14 (Ubuntu)
|
8
|
+
Vary: Accept-Encoding
|
9
|
+
Via: 1.1 varnish
|
10
|
+
X-Powered-By: PHP/5.3.2-1ubuntu4.22
|
11
|
+
X-Varnish: 1188792404 1188790413
|
12
|
+
Content-Length: 40571
|
13
|
+
Connection: keep-alive
|
14
|
+
|
15
|
+
<!DOCTYPE html>
|
16
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
|
17
|
+
<head>
|
18
|
+
<meta name="author" content="the author" />
|
19
|
+
</head>
|
20
|
+
<body>
|
21
|
+
<p>A sample page with author in standard meta tag</p>
|
22
|
+
</body>
|
23
|
+
</html>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Age: 13
|
3
|
+
Cache-Control: max-age=120
|
4
|
+
Content-Type: text/html
|
5
|
+
Date: Mon, 06 Jan 2014 12:47:42 GMT
|
6
|
+
Expires: Mon, 06 Jan 2014 12:49:28 GMT
|
7
|
+
Server: Apache/2.2.14 (Ubuntu)
|
8
|
+
Vary: Accept-Encoding
|
9
|
+
Via: 1.1 varnish
|
10
|
+
X-Powered-By: PHP/5.3.2-1ubuntu4.22
|
11
|
+
X-Varnish: 1188792404 1188790413
|
12
|
+
Content-Length: 40571
|
13
|
+
Connection: keep-alive
|
14
|
+
|
15
|
+
<!DOCTYPE html>
|
16
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
|
17
|
+
<head>
|
18
|
+
<meta property="twitter:creator" content="This author came from the twitter creator tag" />
|
19
|
+
</head>
|
20
|
+
<body>
|
21
|
+
<p>A sample page with author in Twitter meta tag</p>
|
22
|
+
</body>
|
23
|
+
</html>
|
@@ -49,6 +49,50 @@ describe MetaInspector do
|
|
49
49
|
end
|
50
50
|
end
|
51
51
|
|
52
|
+
describe '#author' do
|
53
|
+
it "should find author from meta author" do
|
54
|
+
page = MetaInspector.new('http://example.com/author_in_meta')
|
55
|
+
|
56
|
+
expect(page.author).to eq("the author")
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should be nil if no meta author" do
|
60
|
+
page = MetaInspector.new('http://example.com/empty')
|
61
|
+
|
62
|
+
expect(page.author).to be(nil)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
describe "#best_author" do
|
67
|
+
it "should return the author meta tag content if present" do
|
68
|
+
page = MetaInspector.new('http://example.com/author_in_meta')
|
69
|
+
|
70
|
+
expect(page.best_author).to eq("the author")
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should find a link with the relational attribute author if standard meta tag is not present" do
|
74
|
+
page = MetaInspector.new('http://example.com/author_in_link')
|
75
|
+
expect(page.best_author).to eq("This author came from a link with the author relational attribute")
|
76
|
+
end
|
77
|
+
|
78
|
+
it "should find the address tag if standard meta tag and relational attribute author are not present" do
|
79
|
+
page = MetaInspector.new('http://example.com/author_in_body')
|
80
|
+
expect(page.best_author).to eq("This author came from the address tag")
|
81
|
+
end
|
82
|
+
|
83
|
+
it "should return the twitter creator if address tag not present" do
|
84
|
+
page = MetaInspector.new('http://example.com/author_in_twitter')
|
85
|
+
|
86
|
+
expect(page.best_author).to eq("This author came from the twitter creator tag")
|
87
|
+
end
|
88
|
+
|
89
|
+
it "should return nil if no author information present" do
|
90
|
+
page = MetaInspector.new('http://example.com/empty')
|
91
|
+
|
92
|
+
expect(page.best_author).to be(nil)
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
52
96
|
describe '#description' do
|
53
97
|
it "should find description from meta description" do
|
54
98
|
page = MetaInspector.new('http://example.com/desc_in_meta')
|
data/spec/spec_helper.rb
CHANGED
@@ -44,6 +44,10 @@ RSpec.configure do |config|
|
|
44
44
|
stub_request(:get, "http://example.com/title_in_head").to_return(fixture_file("title_in_head.response"))
|
45
45
|
stub_request(:get, "http://example.com/title_in_head_with_whitespace").to_return(fixture_file("title_in_head_with_whitespace.response"))
|
46
46
|
stub_request(:get, "http://example.com/title_not_present").to_return(fixture_file("title_not_present.response"))
|
47
|
+
stub_request(:get, "http://example.com/author_in_meta").to_return(fixture_file("author_in_meta.response"))
|
48
|
+
stub_request(:get, "http://example.com/author_in_body").to_return(fixture_file("author_in_body.response"))
|
49
|
+
stub_request(:get, "http://example.com/author_in_link").to_return(fixture_file("author_in_link.response"))
|
50
|
+
stub_request(:get, "http://example.com/author_in_twitter").to_return(fixture_file("author_in_twitter.response"))
|
47
51
|
stub_request(:get, "http://example.com/~").to_return(fixture_file("example.response"))
|
48
52
|
stub_request(:get, "http://facebook.com/").to_return(fixture_file("facebook.com.response"))
|
49
53
|
stub_request(:get, "http://international.com").to_return(fixture_file("international.response"))
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 5.
|
4
|
+
version: 5.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
@@ -262,6 +262,10 @@ files:
|
|
262
262
|
- spec/fixtures/404.response
|
263
263
|
- spec/fixtures/alazan.com.response
|
264
264
|
- spec/fixtures/alazan_websolution.response
|
265
|
+
- spec/fixtures/author_in_body.response
|
266
|
+
- spec/fixtures/author_in_link.response
|
267
|
+
- spec/fixtures/author_in_meta.response
|
268
|
+
- spec/fixtures/author_in_twitter.response
|
265
269
|
- spec/fixtures/broken_head_links.response
|
266
270
|
- spec/fixtures/charset_000.response
|
267
271
|
- spec/fixtures/charset_001.response
|