metainspector 5.4.3 → 5.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f6e7b6989a9d97ffd983e24718a751fb4b6bd6bf
4
- data.tar.gz: 19298091068438ac8cb97955a87e6ed00eacddbe
3
+ metadata.gz: 2e1034aff57cb80cddd6a4c03c7e3ef92da0c6bf
4
+ data.tar.gz: 44ff18adc7fd7a1bfb7c17034591708eb309c5a6
5
5
  SHA512:
6
- metadata.gz: 78bf72901f8c4bade41d3fe3bd4aa60a15477f187d983732d503838a1213e15788131371fb0f7e452e3654948abe91883b7d9f6b411477ed3308b29fac32ebf6
7
- data.tar.gz: c032e15346a3aa542a17efbadd154adab78cce03519b8d7b4919013923b1aebef90200bd2c8d6776b4cdf4918cf14d4785c5024a47982ff2e5ada10ef098d96c
6
+ metadata.gz: 55a2d91009a7158d15109818d945db5133e21223da78f9ccd82c22765e6c4ae018940b50a3cd389c7d57b8d0c522ac282f5c0a3d44b4d1ddbfa4636add0add28
7
+ data.tar.gz: dab9bff983779b57467dc8db0ba9a0bb0c628abb2cc8fbc2aedbdd77503fd6ca5a882bb3576e951ce847ee4000afd277a94fa21e91364c5b13a72a711d4fc0de
data/README.md CHANGED
@@ -81,6 +81,8 @@ page.feed # Get rss or atom links in meta data fields as array
81
81
  ```ruby
82
82
  page.title # title of the page from the head section, as string
83
83
  page.best_title # best title of the page, from a selection of candidates
84
+ page.author # author of the page from the meta author tag
85
+ page.best_author # best author of the page, from a selection of candidates
84
86
  page.description # returns the meta description
85
87
  page.best_description # returns the first non-empty description between the following candidates: standard meta description, og:description, twitter:description, the first long paragraph
86
88
  ```
@@ -45,7 +45,7 @@ module MetaInspector
45
45
 
46
46
  delegate [:content_type, :response] => :@request
47
47
 
48
- delegate [:parsed, :title, :best_title,
48
+ delegate [:parsed, :title, :best_title, :author, :best_author,
49
49
  :description, :best_description, :links,
50
50
  :images, :feed, :charset, :meta_tags,
51
51
  :meta_tag, :meta, :favicon,
@@ -60,6 +60,8 @@ module MetaInspector
60
60
  'root_url' => root_url,
61
61
  'title' => title,
62
62
  'best_title' => best_title,
63
+ 'author' => author,
64
+ 'best_author' => best_author,
63
65
  'description' => description,
64
66
  'best_description' => best_description,
65
67
  'links' => links.to_hash,
@@ -21,12 +21,12 @@ module MetaInspector
21
21
  end
22
22
 
23
23
  extend Forwardable
24
- delegate [:url, :scheme, :host] => :@document
25
- delegate [:meta_tags, :meta_tag, :meta, :charset] => :@meta_tag_parser
26
- delegate [:head_links, :stylesheets, :canonicals, :feed] => :@head_links_parser
27
- delegate [:links, :base_url] => :@links_parser
28
- delegate :images => :@images_parser
29
- delegate [:title, :best_title, :description, :best_description] => :@texts_parser
24
+ delegate [:url, :scheme, :host] => :@document
25
+ delegate [:meta_tags, :meta_tag, :meta, :charset] => :@meta_tag_parser
26
+ delegate [:head_links, :stylesheets, :canonicals, :feed] => :@head_links_parser
27
+ delegate [:links, :base_url] => :@links_parser
28
+ delegate :images => :@images_parser
29
+ delegate [:title, :best_title, :author, :best_author, :description, :best_description] => :@texts_parser
30
30
 
31
31
  # Returns the whole parsed document
32
32
  def parsed
@@ -14,6 +14,21 @@ module MetaInspector
14
14
  @best_title ||= find_best_title
15
15
  end
16
16
 
17
+ # Returns the meta author, if present
18
+ def author
19
+ @author ||= meta['author']
20
+ end
21
+
22
+ # An author getter that returns the first non-nil description
23
+ # from the following candidates:
24
+ # - the standard meta description
25
+ # - a link with the relational attribute "author"
26
+ # - address tag which may contain the author
27
+ # - the twitter:creator meta tag for the username
28
+ def best_author
29
+ @best_author ||= find_best_author
30
+ end
31
+
17
32
  # Returns the meta description, if present
18
33
  def description
19
34
  @description ||= meta['description']
@@ -50,6 +65,24 @@ module MetaInspector
50
65
  candidates.first
51
66
  end
52
67
 
68
+ def find_best_author
69
+ candidates = [
70
+ meta['author'],
71
+ parsed.css('a[rel="author"]').first,
72
+ parsed.css('address').first,
73
+ meta['twitter:creator']
74
+ ]
75
+ candidates.flatten!
76
+ candidates.compact!
77
+ candidates.map! { |c| (c.respond_to? :inner_text) ? c.inner_text : c }
78
+ candidates.map! { |c| c.strip }
79
+ return nil if candidates.empty?
80
+ candidates.map! { |c| c.gsub(/\s+/, ' ') }
81
+ candidates.uniq!
82
+ candidates.sort_by! { |t| -t.length }
83
+ candidates.first
84
+ end
85
+
53
86
  def find_best_description
54
87
  candidates = [
55
88
  meta['description'],
@@ -1,3 +1,3 @@
1
1
  module MetaInspector
2
- VERSION = '5.4.3'
2
+ VERSION = '5.5.0'
3
3
  end
@@ -26,6 +26,8 @@ describe MetaInspector::Document do
26
26
  "root_url" => "http://pagerankalert.com/",
27
27
  "title" => "PageRankAlert.com :: Track your PageRank changes & receive alerts",
28
28
  "best_title" => "PageRankAlert.com :: Track your PageRank changes & receive alerts",
29
+ "author" => nil,
30
+ "best_author" => nil,
29
31
  "description" => "Track your PageRank(TM) changes and receive alerts by email",
30
32
  "best_description"=> "Track your PageRank(TM) changes and receive alerts by email",
31
33
  "favicon" => "http://pagerankalert.com/src/favicon.ico",
@@ -0,0 +1,21 @@
1
+ HTTP/1.1 200 OK
2
+ Age: 13
3
+ Cache-Control: max-age=120
4
+ Content-Type: text/html
5
+ Date: Mon, 06 Jan 2014 12:47:42 GMT
6
+ Expires: Mon, 06 Jan 2014 12:49:28 GMT
7
+ Server: Apache/2.2.14 (Ubuntu)
8
+ Vary: Accept-Encoding
9
+ Via: 1.1 varnish
10
+ X-Powered-By: PHP/5.3.2-1ubuntu4.22
11
+ X-Varnish: 1188792404 1188790413
12
+ Content-Length: 265
13
+ Connection: keep-alive
14
+
15
+ <!DOCTYPE html>
16
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
17
+ <head></head>
18
+ <body>
19
+ <address>This author came from the address tag</address>
20
+ </body>
21
+ </html>
@@ -0,0 +1,21 @@
1
+ HTTP/1.1 200 OK
2
+ Age: 13
3
+ Cache-Control: max-age=120
4
+ Content-Type: text/html
5
+ Date: Mon, 06 Jan 2014 12:47:42 GMT
6
+ Expires: Mon, 06 Jan 2014 12:49:28 GMT
7
+ Server: Apache/2.2.14 (Ubuntu)
8
+ Vary: Accept-Encoding
9
+ Via: 1.1 varnish
10
+ X-Powered-By: PHP/5.3.2-1ubuntu4.22
11
+ X-Varnish: 1188792404 1188790413
12
+ Content-Length: 265
13
+ Connection: keep-alive
14
+
15
+ <!DOCTYPE html>
16
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
17
+ <head></head>
18
+ <body>
19
+ <a rel="author">This author came from a link with the author relational attribute</a>
20
+ </body>
21
+ </html>
@@ -0,0 +1,23 @@
1
+ HTTP/1.1 200 OK
2
+ Age: 13
3
+ Cache-Control: max-age=120
4
+ Content-Type: text/html
5
+ Date: Mon, 06 Jan 2014 12:47:42 GMT
6
+ Expires: Mon, 06 Jan 2014 12:49:28 GMT
7
+ Server: Apache/2.2.14 (Ubuntu)
8
+ Vary: Accept-Encoding
9
+ Via: 1.1 varnish
10
+ X-Powered-By: PHP/5.3.2-1ubuntu4.22
11
+ X-Varnish: 1188792404 1188790413
12
+ Content-Length: 40571
13
+ Connection: keep-alive
14
+
15
+ <!DOCTYPE html>
16
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
17
+ <head>
18
+ <meta name="author" content="the author" />
19
+ </head>
20
+ <body>
21
+ <p>A sample page with author in standard meta tag</p>
22
+ </body>
23
+ </html>
@@ -0,0 +1,23 @@
1
+ HTTP/1.1 200 OK
2
+ Age: 13
3
+ Cache-Control: max-age=120
4
+ Content-Type: text/html
5
+ Date: Mon, 06 Jan 2014 12:47:42 GMT
6
+ Expires: Mon, 06 Jan 2014 12:49:28 GMT
7
+ Server: Apache/2.2.14 (Ubuntu)
8
+ Vary: Accept-Encoding
9
+ Via: 1.1 varnish
10
+ X-Powered-By: PHP/5.3.2-1ubuntu4.22
11
+ X-Varnish: 1188792404 1188790413
12
+ Content-Length: 40571
13
+ Connection: keep-alive
14
+
15
+ <!DOCTYPE html>
16
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:fb="http://www.facebook.com/2008/fbml">
17
+ <head>
18
+ <meta property="twitter:creator" content="This author came from the twitter creator tag" />
19
+ </head>
20
+ <body>
21
+ <p>A sample page with author in Twitter meta tag</p>
22
+ </body>
23
+ </html>
@@ -49,6 +49,50 @@ describe MetaInspector do
49
49
  end
50
50
  end
51
51
 
52
+ describe '#author' do
53
+ it "should find author from meta author" do
54
+ page = MetaInspector.new('http://example.com/author_in_meta')
55
+
56
+ expect(page.author).to eq("the author")
57
+ end
58
+
59
+ it "should be nil if no meta author" do
60
+ page = MetaInspector.new('http://example.com/empty')
61
+
62
+ expect(page.author).to be(nil)
63
+ end
64
+ end
65
+
66
+ describe "#best_author" do
67
+ it "should return the author meta tag content if present" do
68
+ page = MetaInspector.new('http://example.com/author_in_meta')
69
+
70
+ expect(page.best_author).to eq("the author")
71
+ end
72
+
73
+ it "should find a link with the relational attribute author if standard meta tag is not present" do
74
+ page = MetaInspector.new('http://example.com/author_in_link')
75
+ expect(page.best_author).to eq("This author came from a link with the author relational attribute")
76
+ end
77
+
78
+ it "should find the address tag if standard meta tag and relational attribute author are not present" do
79
+ page = MetaInspector.new('http://example.com/author_in_body')
80
+ expect(page.best_author).to eq("This author came from the address tag")
81
+ end
82
+
83
+ it "should return the twitter creator if address tag not present" do
84
+ page = MetaInspector.new('http://example.com/author_in_twitter')
85
+
86
+ expect(page.best_author).to eq("This author came from the twitter creator tag")
87
+ end
88
+
89
+ it "should return nil if no author information present" do
90
+ page = MetaInspector.new('http://example.com/empty')
91
+
92
+ expect(page.best_author).to be(nil)
93
+ end
94
+ end
95
+
52
96
  describe '#description' do
53
97
  it "should find description from meta description" do
54
98
  page = MetaInspector.new('http://example.com/desc_in_meta')
@@ -44,6 +44,10 @@ RSpec.configure do |config|
44
44
  stub_request(:get, "http://example.com/title_in_head").to_return(fixture_file("title_in_head.response"))
45
45
  stub_request(:get, "http://example.com/title_in_head_with_whitespace").to_return(fixture_file("title_in_head_with_whitespace.response"))
46
46
  stub_request(:get, "http://example.com/title_not_present").to_return(fixture_file("title_not_present.response"))
47
+ stub_request(:get, "http://example.com/author_in_meta").to_return(fixture_file("author_in_meta.response"))
48
+ stub_request(:get, "http://example.com/author_in_body").to_return(fixture_file("author_in_body.response"))
49
+ stub_request(:get, "http://example.com/author_in_link").to_return(fixture_file("author_in_link.response"))
50
+ stub_request(:get, "http://example.com/author_in_twitter").to_return(fixture_file("author_in_twitter.response"))
47
51
  stub_request(:get, "http://example.com/~").to_return(fixture_file("example.response"))
48
52
  stub_request(:get, "http://facebook.com/").to_return(fixture_file("facebook.com.response"))
49
53
  stub_request(:get, "http://international.com").to_return(fixture_file("international.response"))
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.4.3
4
+ version: 5.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
@@ -262,6 +262,10 @@ files:
262
262
  - spec/fixtures/404.response
263
263
  - spec/fixtures/alazan.com.response
264
264
  - spec/fixtures/alazan_websolution.response
265
+ - spec/fixtures/author_in_body.response
266
+ - spec/fixtures/author_in_link.response
267
+ - spec/fixtures/author_in_meta.response
268
+ - spec/fixtures/author_in_twitter.response
265
269
  - spec/fixtures/broken_head_links.response
266
270
  - spec/fixtures/charset_000.response
267
271
  - spec/fixtures/charset_001.response