metainspector 5.4.3 → 5.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f6e7b6989a9d97ffd983e24718a751fb4b6bd6bf
4
- data.tar.gz: 19298091068438ac8cb97955a87e6ed00eacddbe
3
+ metadata.gz: 2e1034aff57cb80cddd6a4c03c7e3ef92da0c6bf
4
+ data.tar.gz: 44ff18adc7fd7a1bfb7c17034591708eb309c5a6
5
5
  SHA512:
6
- metadata.gz: 78bf72901f8c4bade41d3fe3bd4aa60a15477f187d983732d503838a1213e15788131371fb0f7e452e3654948abe91883b7d9f6b411477ed3308b29fac32ebf6
7
- data.tar.gz: c032e15346a3aa542a17efbadd154adab78cce03519b8d7b4919013923b1aebef90200bd2c8d6776b4cdf4918cf14d4785c5024a47982ff2e5ada10ef098d96c
6
+ metadata.gz: 55a2d91009a7158d15109818d945db5133e21223da78f9ccd82c22765e6c4ae018940b50a3cd389c7d57b8d0c522ac282f5c0a3d44b4d1ddbfa4636add0add28
7
+ data.tar.gz: dab9bff983779b57467dc8db0ba9a0bb0c628abb2cc8fbc2aedbdd77503fd6ca5a882bb3576e951ce847ee4000afd277a94fa21e91364c5b13a72a711d4fc0de
data/README.md CHANGED
@@ -81,6 +81,8 @@ page.feed # Get rss or atom links in meta data fields as array
81
81
  ```ruby
82
82
  page.title # title of the page from the head section, as string
83
83
  page.best_title # best title of the page, from a selection of candidates
84
+ page.author # author of the page from the meta author tag
85
+ page.best_author # best author of the page, from a selection of candidates
84
86
  page.description # returns the meta description
85
87
  page.best_description # returns the first non-empty description between the following candidates: standard meta description, og:description, twitter:description, the first long paragraph
86
88
  ```
@@ -45,7 +45,7 @@ module MetaInspector
45
45
 
46
46
  delegate [:content_type, :response] => :@request
47
47
 
48
- delegate [:parsed, :title, :best_title,
48
+ delegate [:parsed, :title, :best_title, :author, :best_author,
49
49
  :description, :best_description, :links,
50
50
  :images, :feed, :charset, :meta_tags,
51
51
  :meta_tag, :meta, :favicon,
@@ -60,6 +60,8 @@ module MetaInspector
60
60
  'root_url' => root_url,
61
61
  'title' => title,
62
62
  'best_title' => best_title,
63
+ 'author' => author,
64
+ 'best_author' => best_author,
63
65
  'description' => description,
64
66
  'best_description' => best_description,
65
67
  'links' => links.to_hash,
@@ -21,12 +21,12 @@ module MetaInspector
21
21
  end
22
22
 
23
23
  extend Forwardable
24
- delegate [:url, :scheme, :host] => :@document
25
- delegate [:meta_tags, :meta_tag, :meta, :charset] => :@meta_tag_parser
26
- delegate [:head_links, :stylesheets, :canonicals, :feed] => :@head_links_parser
27
- delegate [:links, :base_url] => :@links_parser
28
- delegate :images => :@images_parser
29
- delegate [:title, :best_title, :description, :best_description] => :@texts_parser
24
+ delegate [:url, :scheme, :host] => :@document
25
+ delegate [:meta_tags, :meta_tag, :meta, :charset] => :@meta_tag_parser
26
+ delegate [:head_links, :stylesheets, :canonicals, :feed] => :@head_links_parser
27
+ delegate [:links, :base_url] => :@links_parser
28
+ delegate :images => :@images_parser
29
+ delegate [:title, :best_title, :author, :best_author, :description, :best_description] => :@texts_parser
30
30
 
31
31
  # Returns the whole parsed document
32
32
  def parsed
@@ -14,6 +14,21 @@ module MetaInspector
14
14
  @best_title ||= find_best_title
15
15
  end
16
16
 
17
+ # Returns the meta author, if present
18
+ def author
19
+ @author ||= meta['author']
20
+ end
21
+
22
+ # An author getter that returns the first non-nil description
23
+ # from the following candidates:
24
+ # - the standard meta description
25
+ # - a link with the relational attribute "author"
26
+ # - address tag which may contain the author
27
+ # - the twitter:creator meta tag for the username
28
+ def best_author
29
+ @best_author ||= find_best_author
30
+ end
31
+
17
32
  # Returns the meta description, if present
18
33
  def description
19
34
  @description ||= meta['description']
@@ -50,6 +65,24 @@ module MetaInspector
50
65
  candidates.first
51
66
  end
52
67
 
68
+ def find_best_author
69
+ candidates = [
70
+ meta['author'],
71
+ parsed.css('a[rel="author"]').first,
72
+ parsed.css('address').first,
73
+ meta['twitter:creator']
74
+ ]
75
+ candidates.flatten!
76
+ candidates.compact!
77
+ candidates.map! { |c| (c.respond_to? :inner_text) ? c.inner_text : c }
78
+ candidates.map! { |c| c.strip }
79
+ return nil if candidates.empty?
80
+ candidates.map! { |c| c.gsub(/\s+/, ' ') }
81
+ candidates.uniq!
82
+ candidates.sort_by! { |t| -t.length }
83
+ candidates.first
84
+ end
85
+
53
86
  def find_best_description
54
87
  candidates = [
55
88
  meta['description'],
@@ -1,3 +1,3 @@
1
1
  module MetaInspector
2
- VERSION = '5.4.3'
2
+ VERSION = '5.5.0'
3
3
  end
@@ -26,6 +26,8 @@ describe MetaInspector::Document do
26
26
  "root_url" => "http://pagerankalert.com/",
27
27
  "title" => "PageRankAlert.com :: Track your PageRank changes & receive alerts",
28
28
  "best_title" => "PageRankAlert.com :: Track your PageRank changes & receive alerts",
29
+ "author" => nil,
30
+ "best_author" => nil,
29
31
  "description" => "Track your PageRank(TM) changes and receive alerts by email",
30
32
  "best_description"=> "Track your PageRank(TM) changes and receive alerts by email",
31
33
  "favicon" => "http://pagerankalert.com/src/favicon.ico",
@@ -0,0 +1,21 @@
1
+ HTTP/1.1 200 OK
2
+ Age: 13
3
+ Cache-Control: max-age=120
4
+ Content-Type: text/html
5
+ Date: Mon, 06 Jan 2014 12:47:42 GMT
6
+ Expires: Mon, 06 Jan 2014 12:49:28 GMT
7
+ Server: Apache/2.2.14 (Ubuntu)
8
+ Vary: Accept-Encoding
9
+ Via: 1.1 varnish
10
+ X-Powered-By: PHP/5.3.2-1ubuntu4.22
11
+ X-Varnish: 1188792404 1188790413
12
+ Content-Length: 265
13
+ Connection: keep-alive
14
+
15
+ <!DOCTYPE html>
16
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
17
+ <head></head>
18
+ <body>
19
+ <address>This author came from the address tag</address>
20
+ </body>
21
+ </html>
@@ -0,0 +1,21 @@
1
+ HTTP/1.1 200 OK
2
+ Age: 13
3
+ Cache-Control: max-age=120
4
+ Content-Type: text/html
5
+ Date: Mon, 06 Jan 2014 12:47:42 GMT
6
+ Expires: Mon, 06 Jan 2014 12:49:28 GMT
7
+ Server: Apache/2.2.14 (Ubuntu)
8
+ Vary: Accept-Encoding
9
+ Via: 1.1 varnish
10
+ X-Powered-By: PHP/5.3.2-1ubuntu4.22
11
+ X-Varnish: 1188792404 1188790413
12
+ Content-Length: 265
13
+ Connection: keep-alive
14
+
15
+ <!DOCTYPE html>
16
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
17
+ <head></head>
18
+ <body>
19
+ <a rel="author">This author came from a link with the author relational attribute</a>
20
+ </body>
21
+ </html>
@@ -0,0 +1,23 @@
1
+ HTTP/1.1 200 OK
2
+ Age: 13
3
+ Cache-Control: max-age=120
4
+ Content-Type: text/html
5
+ Date: Mon, 06 Jan 2014 12:47:42 GMT
6
+ Expires: Mon, 06 Jan 2014 12:49:28 GMT
7
+ Server: Apache/2.2.14 (Ubuntu)
8
+ Vary: Accept-Encoding
9
+ Via: 1.1 varnish
10
+ X-Powered-By: PHP/5.3.2-1ubuntu4.22
11
+ X-Varnish: 1188792404 1188790413
12
+ Content-Length: 40571
13
+ Connection: keep-alive
14
+
15
+ <!DOCTYPE html>
16
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
17
+ <head>
18
+ <meta name="author" content="the author" />
19
+ </head>
20
+ <body>
21
+ <p>A sample page with author in standard meta tag</p>
22
+ </body>
23
+ </html>
@@ -0,0 +1,23 @@
1
+ HTTP/1.1 200 OK
2
+ Age: 13
3
+ Cache-Control: max-age=120
4
+ Content-Type: text/html
5
+ Date: Mon, 06 Jan 2014 12:47:42 GMT
6
+ Expires: Mon, 06 Jan 2014 12:49:28 GMT
7
+ Server: Apache/2.2.14 (Ubuntu)
8
+ Vary: Accept-Encoding
9
+ Via: 1.1 varnish
10
+ X-Powered-By: PHP/5.3.2-1ubuntu4.22
11
+ X-Varnish: 1188792404 1188790413
12
+ Content-Length: 40571
13
+ Connection: keep-alive
14
+
15
+ <!DOCTYPE html>
16
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
17
+ <head>
18
+ <meta property="twitter:creator" content="This author came from the twitter creator tag" />
19
+ </head>
20
+ <body>
21
+ <p>A sample page with author in Twitter meta tag</p>
22
+ </body>
23
+ </html>
@@ -49,6 +49,50 @@ describe MetaInspector do
49
49
  end
50
50
  end
51
51
 
52
+ describe '#author' do
53
+ it "should find author from meta author" do
54
+ page = MetaInspector.new('http://example.com/author_in_meta')
55
+
56
+ expect(page.author).to eq("the author")
57
+ end
58
+
59
+ it "should be nil if no meta author" do
60
+ page = MetaInspector.new('http://example.com/empty')
61
+
62
+ expect(page.author).to be(nil)
63
+ end
64
+ end
65
+
66
+ describe "#best_author" do
67
+ it "should return the author meta tag content if present" do
68
+ page = MetaInspector.new('http://example.com/author_in_meta')
69
+
70
+ expect(page.best_author).to eq("the author")
71
+ end
72
+
73
+ it "should find a link with the relational attribute author if standard meta tag is not present" do
74
+ page = MetaInspector.new('http://example.com/author_in_link')
75
+ expect(page.best_author).to eq("This author came from a link with the author relational attribute")
76
+ end
77
+
78
+ it "should find the address tag if standard meta tag and relational attribute author are not present" do
79
+ page = MetaInspector.new('http://example.com/author_in_body')
80
+ expect(page.best_author).to eq("This author came from the address tag")
81
+ end
82
+
83
+ it "should return the twitter creator if address tag not present" do
84
+ page = MetaInspector.new('http://example.com/author_in_twitter')
85
+
86
+ expect(page.best_author).to eq("This author came from the twitter creator tag")
87
+ end
88
+
89
+ it "should return nil if no author information present" do
90
+ page = MetaInspector.new('http://example.com/empty')
91
+
92
+ expect(page.best_author).to be(nil)
93
+ end
94
+ end
95
+
52
96
  describe '#description' do
53
97
  it "should find description from meta description" do
54
98
  page = MetaInspector.new('http://example.com/desc_in_meta')
@@ -44,6 +44,10 @@ RSpec.configure do |config|
44
44
  stub_request(:get, "http://example.com/title_in_head").to_return(fixture_file("title_in_head.response"))
45
45
  stub_request(:get, "http://example.com/title_in_head_with_whitespace").to_return(fixture_file("title_in_head_with_whitespace.response"))
46
46
  stub_request(:get, "http://example.com/title_not_present").to_return(fixture_file("title_not_present.response"))
47
+ stub_request(:get, "http://example.com/author_in_meta").to_return(fixture_file("author_in_meta.response"))
48
+ stub_request(:get, "http://example.com/author_in_body").to_return(fixture_file("author_in_body.response"))
49
+ stub_request(:get, "http://example.com/author_in_link").to_return(fixture_file("author_in_link.response"))
50
+ stub_request(:get, "http://example.com/author_in_twitter").to_return(fixture_file("author_in_twitter.response"))
47
51
  stub_request(:get, "http://example.com/~").to_return(fixture_file("example.response"))
48
52
  stub_request(:get, "http://facebook.com/").to_return(fixture_file("facebook.com.response"))
49
53
  stub_request(:get, "http://international.com").to_return(fixture_file("international.response"))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.4.3
4
+ version: 5.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
@@ -262,6 +262,10 @@ files:
262
262
  - spec/fixtures/404.response
263
263
  - spec/fixtures/alazan.com.response
264
264
  - spec/fixtures/alazan_websolution.response
265
+ - spec/fixtures/author_in_body.response
266
+ - spec/fixtures/author_in_link.response
267
+ - spec/fixtures/author_in_meta.response
268
+ - spec/fixtures/author_in_twitter.response
265
269
  - spec/fixtures/broken_head_links.response
266
270
  - spec/fixtures/charset_000.response
267
271
  - spec/fixtures/charset_001.response