metainspector 4.0.0.rc3 → 4.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bf5c2667ff165768d1a0e0c49ebd47ea5f8de28e
4
- data.tar.gz: 15b2f4fb7a2f090a75fe06ab98959e35d5f97a3f
3
+ metadata.gz: 4fbb85a1c08f497b3c38edbdc97e0c8d96ee6c6a
4
+ data.tar.gz: 9ce2c80b81b1eb085037312e75fb82d1e46f4202
5
5
  SHA512:
6
- metadata.gz: eeb60786169e979dd8bb257832f2bf2c0270af8b2bf63056330826677a4943373aea51269a1ddfc397ae296cb786b5285997a1721b5ae412cc006214c872af18
7
- data.tar.gz: ae891af393d3746df5048a1e512e70f11718fc8357a2c8212376119afb174e8b7e0ccd180f48c252813581a4ed5671b0f01e35ca555b475efc9997238c29c952
6
+ metadata.gz: e12a19a7598d3a9c7d83d90c121336964490dcd8b334f72d9ceb64ea8efab67c3b269445eb1ebf46eb5385169ea04a81ef155533dbe92779614eb3e0a10c50b3
7
+ data.tar.gz: 555a9b35ee7f51def2c45a24e46996cc130a65d15daebda9841c7be74fda8a2c76cb0097c53a67ad763b80272db52d84f8bdb7b99ecee124929a19b3c36a6338
data/.gitignore CHANGED
@@ -7,3 +7,5 @@
7
7
  Gemfile.lock
8
8
  pkg/*
9
9
  .idea/
10
+ .rubocop_todo.yml
11
+ .rubocop.yml
@@ -0,0 +1,4 @@
1
+ # Forcing the new ruby 1.9 syntax for hashes is not a requirement,
2
+ # we still { :love => 'hashrockets' }
3
+ Style/HashSyntax:
4
+ Enabled: false
@@ -5,16 +5,18 @@ module MetaInspector
5
5
 
6
6
  include MetaInspector::Exceptionable
7
7
 
8
- # Initializes a new instance of MetaInspector::Document, setting the URL to the one given
8
+ # Initializes a new instance of MetaInspector::Document, setting the URL
9
9
  # Options:
10
- # => connection_timeout: defaults to 20 seconds
11
- # => read_timeout: defaults to 20 seconds
12
- # => retries: defaults to 3 times
13
- # => html_content_type_only: if an exception should be raised if request content-type is not text/html. Defaults to false
14
- # => allow_redirections: when true, follow HTTP redirects. Defaults to true
15
- # => document: the html of the url as a string
16
- # => warn_level: what to do when encountering exceptions. Can be :warn, :raise or nil
17
- # => headers: object containing custom headers for the request
10
+ # * connection_timeout: defaults to 20 seconds
11
+ # * read_timeout: defaults to 20 seconds
12
+ # * retries: defaults to 3 times
13
+ # * html_content_type_only: if an exception should be raised if request
14
+ # content-type is not text/html. Defaults to false.
15
+ # * allow_redirections: when true, follow HTTP redirects. Defaults to true
16
+ # * document: the html of the url as a string
17
+ # * warn_level: what to do when encountering exceptions.
18
+ # Can be :warn, :raise or nil
19
+ # * headers: object containing custom headers for the request
18
20
  def initialize(initial_url, options = {})
19
21
  options = defaults.merge(options)
20
22
  @connection_timeout = options[:connection_timeout]
@@ -37,25 +39,28 @@ module MetaInspector
37
39
  end
38
40
 
39
41
  extend Forwardable
40
- def_delegators :@url, :url, :scheme, :host, :root_url
41
- def_delegators :@request, :content_type, :response
42
- def_delegators :@parser, :parsed, :respond_to?, :title, :description, :links,
43
- :images, :image, :feed, :charset, :meta_tags, :meta_tag, :meta, :favicon
42
+ delegate [:url, :scheme, :host, :root_url] => :@url
43
+
44
+ delegate [:content_type, :response] => :@request
45
+
46
+ delegate [:parsed, :title, :description, :links,
47
+ :images, :feed, :charset, :meta_tags,
48
+ :meta_tag, :meta, :favicon] => :@parser
44
49
 
45
50
  # Returns all document data as a nested Hash
46
51
  def to_hash
47
52
  {
48
- 'url' => url,
49
- 'title' => title,
50
- 'links' => links.to_hash,
51
- 'images' => images.to_a,
52
- 'charset' => charset,
53
- 'feed' => feed,
54
- 'content_type' => content_type,
55
- 'meta_tags' => meta_tags,
56
- 'favicon' => images.favicon,
57
- 'response' => { 'status' => response.status,
58
- 'headers' => response.headers }
53
+ 'url' => url,
54
+ 'title' => title,
55
+ 'links' => links.to_hash,
56
+ 'images' => images.to_a,
57
+ 'charset' => charset,
58
+ 'feed' => feed,
59
+ 'content_type' => content_type,
60
+ 'meta_tags' => meta_tags,
61
+ 'favicon' => images.favicon,
62
+ 'response' => { 'status' => response.status,
63
+ 'headers' => response.headers }
59
64
  }
60
65
  end
61
66
 
@@ -67,18 +72,21 @@ module MetaInspector
67
72
  private
68
73
 
69
74
  def defaults
70
- { :timeout => 20,
71
- :retries => 3,
72
- :html_content_only => false,
73
- :warn_level => :raise,
74
- :headers => {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"},
75
- :allow_redirections => true
76
- }
75
+ { :timeout => 20,
76
+ :retries => 3,
77
+ :html_content_only => false,
78
+ :warn_level => :raise,
79
+ :headers => { 'User-Agent' => default_user_agent },
80
+ :allow_redirections => true }
81
+ end
82
+
83
+ def default_user_agent
84
+ "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"
77
85
  end
78
86
 
79
87
  def document
80
- @document ||= if html_content_only && content_type != "text/html"
81
- raise "The url provided contains #{content_type} content instead of text/html content" and nil
88
+ @document ||= if html_content_only && content_type != 'text/html'
89
+ fail "The url provided contains #{content_type} content instead of text/html content"
82
90
  else
83
91
  @request.read
84
92
  end
@@ -12,7 +12,7 @@ module MetaInspector
12
12
  def <<(exception)
13
13
  case warn_level
14
14
  when :raise
15
- raise exception
15
+ fail exception
16
16
  when :warn
17
17
  warn exception
18
18
  when :store
@@ -24,7 +24,7 @@ module MetaInspector
24
24
  if warn_level == :store
25
25
  exceptions.empty?
26
26
  else
27
- warn "ExceptionLog#ok? should only be used when warn_level is :store"
27
+ warn 'ExceptionLog#ok? should only be used when warn_level is :store'
28
28
  end
29
29
  end
30
30
  end
@@ -4,6 +4,6 @@ module MetaInspector
4
4
  #
5
5
  module Exceptionable
6
6
  extend Forwardable
7
- def_delegators :@exception_log, :exceptions, :ok?
7
+ delegate [:exceptions, :ok?] => :@exception_log
8
8
  end
9
9
  end
@@ -20,11 +20,11 @@ module MetaInspector
20
20
  end
21
21
 
22
22
  extend Forwardable
23
- def_delegators :@document, :url, :scheme, :host
24
- def_delegators :@meta_tag_parser, :meta_tags, :meta_tag, :meta, :charset
25
- def_delegators :@links_parser, :links, :feed, :base_url
26
- def_delegators :@images_parser, :images
27
- def_delegators :@texts_parser, :title, :description
23
+ delegate [:url, :scheme, :host] => :@document
24
+ delegate [:meta_tags, :meta_tag, :meta, :charset] => :@meta_tag_parser
25
+ delegate [:links, :feed, :base_url] => :@links_parser
26
+ delegate :images => :@images_parser
27
+ delegate [:title, :description] => :@texts_parser
28
28
 
29
29
  # Returns the whole parsed document
30
30
  def parsed
@@ -23,7 +23,7 @@ module MetaInspector
23
23
 
24
24
  # Cleans up nokogiri search results
25
25
  def cleanup(results)
26
- results.map { |_| _.value.strip }.reject { |_| _.empty? }.uniq
26
+ results.map { |r| r.value.strip }.reject(&:empty?).uniq
27
27
  end
28
28
  end
29
29
  end
@@ -1,8 +1,8 @@
1
1
  module MetaInspector
2
2
  module Parsers
3
3
  class ImagesParser < Base
4
- def_delegators :@main_parser, :parsed, :meta, :base_url
5
- def_delegators :images_collection, :each, :length, :size, :last, :[]
4
+ delegate [:parsed, :meta, :base_url] => :@main_parser
5
+ delegate [:each, :length, :size, :[], :last] => :images_collection
6
6
 
7
7
  include Enumerable
8
8
 
@@ -11,7 +11,7 @@ module MetaInspector
11
11
  end
12
12
 
13
13
  # Returns the parsed image from Facebook's open graph property tags
14
- # Most all major websites now define this property and is usually very relevant
14
+ # Most major websites now define this property and is usually relevant
15
15
  # See doc at http://developers.facebook.com/docs/opengraph/
16
16
  # If none found, tries with Twitter image
17
17
  def best
@@ -30,11 +30,15 @@ module MetaInspector
30
30
  private
31
31
 
32
32
  def images_collection
33
- @images_collection ||= parsed_images.map{ |i| URL.absolutify(i, base_url) }
33
+ @images_collection ||= absolutified_images
34
+ end
35
+
36
+ def absolutified_images
37
+ parsed_images.map { |i| URL.absolutify(i, base_url) }
34
38
  end
35
39
 
36
40
  def parsed_images
37
- @parsed_images ||= cleanup(parsed.search('//img/@src'))
41
+ cleanup(parsed.search('//img/@src'))
38
42
  end
39
43
  end
40
44
  end
@@ -1,7 +1,7 @@
1
1
  module MetaInspector
2
2
  module Parsers
3
3
  class LinksParser < Base
4
- def_delegators :@main_parser, :parsed, :url, :scheme, :host
4
+ delegate [:parsed, :url, :scheme, :host] => :@main_parser
5
5
 
6
6
  def links
7
7
  self
@@ -9,37 +9,39 @@ module MetaInspector
9
9
 
10
10
  # Returns all links found, unprocessed
11
11
  def raw
12
- @raw ||= cleanup(parsed.search("//a/@href")).compact.uniq
12
+ @raw ||= cleanup(parsed.search('//a/@href')).compact.uniq
13
13
  end
14
14
 
15
15
  # Returns all links found, unrelavitized and absolutified
16
16
  def all
17
- @all ||= raw.map { |l| URL.absolutify(URL.unrelativize(l, scheme), base_url) }
17
+ @all ||= raw.map { |link| URL.absolutify(URL.unrelativize(link, scheme), base_url) }
18
18
  .compact.uniq
19
19
  end
20
20
 
21
21
  # Returns all HTTP links found
22
22
  def http
23
- @http ||= all.select {|l| l =~ /^http(s)?:\/\//i}
23
+ @http ||= all.select { |link| link =~ /^http(s)?:\/\//i}
24
24
  end
25
25
 
26
26
  # Returns all non-HTTP links found
27
27
  def non_http
28
- @non_http ||= all.select {|l| l !~ /^http(s)?:\/\//i}
28
+ @non_http ||= all.select { |link| link !~ /^http(s)?:\/\//i}
29
29
  end
30
30
 
31
31
  # Returns all internal HTTP links found
32
32
  def internal
33
- @internal ||= http.select {|link| URL.new(link).host == host }
33
+ @internal ||= http.select { |link| URL.new(link).host == host }
34
34
  end
35
35
 
36
36
  # Returns all external HTTP links found
37
37
  def external
38
- @external ||= http.select {|link| URL.new(link).host != host }
38
+ @external ||= http.select { |link| URL.new(link).host != host }
39
39
  end
40
40
 
41
41
  def to_hash
42
- { 'internal' => internal, 'external' => external, 'non_http' => non_http }
42
+ { 'internal' => internal,
43
+ 'external' => external,
44
+ 'non_http' => non_http }
43
45
  end
44
46
 
45
47
  # Returns the parsed document meta rss link
@@ -47,7 +49,8 @@ module MetaInspector
47
49
  @feed ||= (parsed_feed('rss') || parsed_feed('atom'))
48
50
  end
49
51
 
50
- # Returns the base url to absolutify relative links. This can be the one set on a <base> tag,
52
+ # Returns the base url to absolutify relative links.
53
+ # This can be the one set on a <base> tag,
51
54
  # or the url of the document if no <base> tag was found.
52
55
  def base_url
53
56
  base_href || url
@@ -60,7 +63,7 @@ module MetaInspector
60
63
  feed ? URL.absolutify(feed.attributes['href'].value, base_url) : nil
61
64
  end
62
65
 
63
- # Returns the value of the href attribute on the <base /> tag, if it exists
66
+ # Returns the value of the href attribute on the <base /> tag, if exists
64
67
  def base_href
65
68
  parsed.search('base').first.attributes['href'].value rescue nil
66
69
  end
@@ -1,7 +1,7 @@
1
1
  module MetaInspector
2
2
  module Parsers
3
3
  class MetaTagsParser < Base
4
- def_delegators :@main_parser, :parsed
4
+ delegate :parsed => :@main_parser
5
5
 
6
6
  def meta_tags
7
7
  {
@@ -20,10 +20,10 @@ module MetaInspector
20
20
  meta_tag['name']
21
21
  .merge(meta_tag['http-equiv'])
22
22
  .merge(meta_tag['property'])
23
- .merge({'charset' => meta_tag['charset']})
23
+ .merge('charset' => meta_tag['charset'])
24
24
  end
25
25
 
26
- # Returns the charset from the meta tags, looking for it in the following order:
26
+ # Returns the charset from the meta tags, searching in this order:
27
27
  # <meta charset='utf-8' />
28
28
  # <meta http-equiv="Content-Type" content="text/html; charset=windows-1252" />
29
29
  def charset
@@ -33,12 +33,12 @@ module MetaInspector
33
33
  private
34
34
 
35
35
  def charset_from_meta_charset
36
- parsed.css("meta[charset]")[0].attributes['charset'].value rescue nil
36
+ parsed.css('meta[charset]')[0].attributes['charset'].value rescue nil
37
37
  end
38
38
 
39
39
  def charset_from_meta_content_type
40
40
  parsed.css("meta[http-equiv='Content-Type']")[0]
41
- .attributes['content'].value.split(";")[1].split("=")[1] rescue nil
41
+ .attributes['content'].value.split(';')[1].split('=')[1] rescue nil
42
42
  end
43
43
 
44
44
  def meta_tags_by(attribute)
@@ -58,12 +58,12 @@ module MetaInspector
58
58
  def convert_each_array_to_first_element_on(hash)
59
59
  hash.each_pair do |k, v|
60
60
  hash[k] = if v.is_a?(Hash)
61
- convert_each_array_to_first_element_on(v)
62
- elsif v.is_a?(Array)
63
- v.first
64
- else
65
- v
66
- end
61
+ convert_each_array_to_first_element_on(v)
62
+ elsif v.is_a?(Array)
63
+ v.first
64
+ else
65
+ v
66
+ end
67
67
  end
68
68
  end
69
69
 
@@ -1,7 +1,7 @@
1
1
  module MetaInspector
2
2
  module Parsers
3
3
  class TextsParser < Base
4
- def_delegators :@main_parser, :parsed, :meta
4
+ delegate [:parsed, :meta] => :@main_parser
5
5
 
6
6
  # Returns the parsed document title, from the content of the <title> tag
7
7
  # within the <head> section.
@@ -9,8 +9,9 @@ module MetaInspector
9
9
  @title ||= parsed.css('head title').inner_text rescue nil
10
10
  end
11
11
 
12
- # A description getter that first checks for a meta description and if not present will
13
- # guess by looking at the first paragraph with more than 120 characters
12
+ # A description getter that first checks for a meta description
13
+ # and if not present will guess by looking at the first paragraph
14
+ # with more than 120 characters
14
15
  def description
15
16
  meta['description'] || secondary_description
16
17
  end
@@ -18,25 +18,24 @@ module MetaInspector
18
18
  @exception_log = options[:exception_log]
19
19
  @headers = options[:headers]
20
20
 
21
- response # as soon as it is set up, we make the request so we can fail early
21
+ response # request early so we can fail early
22
22
  end
23
23
 
24
24
  extend Forwardable
25
- def_delegators :@url, :url
25
+ delegate :url => :@url
26
26
 
27
27
  def read
28
28
  response.body if response
29
29
  end
30
30
 
31
31
  def content_type
32
- response.headers["content-type"].split(";")[0] if response
32
+ response.headers['content-type'].split(';')[0] if response
33
33
  end
34
34
 
35
35
  def response
36
- request_count ||= 0
37
- request_count += 1
38
36
  @response ||= fetch
39
- rescue Faraday::TimeoutError, Faraday::Error::ConnectionFailed, RuntimeError => e
37
+ rescue Faraday::TimeoutError, Faraday::Error::ConnectionFailed,
38
+ RuntimeError => e
40
39
  @exception_log << e
41
40
  nil
42
41
  end
@@ -28,20 +28,23 @@ module MetaInspector
28
28
  @url = normalized(with_default_scheme(new_url))
29
29
  end
30
30
 
31
- # Converts a protocol-relative url to its full form, depending on the scheme of the page that contains it
31
+ # Converts a protocol-relative url to its full form,
32
+ # depending on the scheme of the page that contains it
32
33
  def self.unrelativize(url, scheme)
33
34
  url =~ /^\/\// ? "#{scheme}://#{url[2..-1]}" : url
34
35
  end
35
36
 
36
- # Convert a relative url like "/users" to an absolute one like "http://example.com/users"
37
- # Respecting already absolute URLs like the ones starting with http:, ftp:, telnet:, mailto:, javascript: ...
37
+ # Converts a relative URL to an absolute URL, like:
38
+ # "/faq" => "http://example.com/faq"
39
+ # Respecting already absolute URLs like the ones starting with
40
+ # http:, ftp:, telnet:, mailto:, javascript: ...
38
41
  def self.absolutify(url, base_url)
39
42
  if url =~ /^\w*\:/i
40
43
  MetaInspector::URL.new(url).url
41
44
  else
42
45
  Addressable::URI.join(base_url, url).normalize.to_s
43
46
  end
44
- rescue Addressable::URI::InvalidURIError => e
47
+ rescue Addressable::URI::InvalidURIError
45
48
  nil
46
49
  end
47
50
 
@@ -52,7 +55,8 @@ module MetaInspector
52
55
  parsed(url) && parsed(url).scheme.nil? ? 'http://' + url : url
53
56
  end
54
57
 
55
- # Normalize url to deal with characters that should be encodes, add trailing slash, convert to downcase...
58
+ # Normalize url to deal with characters that should be encoded,
59
+ # add trailing slash, convert to downcase...
56
60
  def normalized(url)
57
61
  Addressable::URI.parse(url).normalize.to_s
58
62
  end
@@ -1,3 +1,3 @@
1
1
  module MetaInspector
2
- VERSION = "4.0.0.rc3"
2
+ VERSION = "4.0.0"
3
3
  end
@@ -28,4 +28,5 @@ Gem::Specification.new do |gem|
28
28
  gem.add_development_dependency 'pry'
29
29
  gem.add_development_dependency 'guard'
30
30
  gem.add_development_dependency 'guard-rspec'
31
+ gem.add_development_dependency 'rubocop'
31
32
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.0.rc3
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-20 00:00:00.000000000 Z
11
+ date: 2014-11-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -192,6 +192,20 @@ dependencies:
192
192
  - - ">="
193
193
  - !ruby/object:Gem::Version
194
194
  version: '0'
195
+ - !ruby/object:Gem::Dependency
196
+ name: rubocop
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - ">="
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
195
209
  description: MetaInspector lets you scrape a web page and get its title, charset,
196
210
  link and meta tags
197
211
  email:
@@ -202,6 +216,7 @@ extra_rdoc_files: []
202
216
  files:
203
217
  - ".gitignore"
204
218
  - ".rspec.example"
219
+ - ".rubocop.yml.example"
205
220
  - ".travis.yml"
206
221
  - Gemfile
207
222
  - Guardfile
@@ -286,9 +301,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
286
301
  version: '0'
287
302
  required_rubygems_version: !ruby/object:Gem::Requirement
288
303
  requirements:
289
- - - ">"
304
+ - - ">="
290
305
  - !ruby/object:Gem::Version
291
- version: 1.3.1
306
+ version: '0'
292
307
  requirements: []
293
308
  rubyforge_project:
294
309
  rubygems_version: 2.2.2