metainspector 4.0.0.rc3 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bf5c2667ff165768d1a0e0c49ebd47ea5f8de28e
4
- data.tar.gz: 15b2f4fb7a2f090a75fe06ab98959e35d5f97a3f
3
+ metadata.gz: 4fbb85a1c08f497b3c38edbdc97e0c8d96ee6c6a
4
+ data.tar.gz: 9ce2c80b81b1eb085037312e75fb82d1e46f4202
5
5
  SHA512:
6
- metadata.gz: eeb60786169e979dd8bb257832f2bf2c0270af8b2bf63056330826677a4943373aea51269a1ddfc397ae296cb786b5285997a1721b5ae412cc006214c872af18
7
- data.tar.gz: ae891af393d3746df5048a1e512e70f11718fc8357a2c8212376119afb174e8b7e0ccd180f48c252813581a4ed5671b0f01e35ca555b475efc9997238c29c952
6
+ metadata.gz: e12a19a7598d3a9c7d83d90c121336964490dcd8b334f72d9ceb64ea8efab67c3b269445eb1ebf46eb5385169ea04a81ef155533dbe92779614eb3e0a10c50b3
7
+ data.tar.gz: 555a9b35ee7f51def2c45a24e46996cc130a65d15daebda9841c7be74fda8a2c76cb0097c53a67ad763b80272db52d84f8bdb7b99ecee124929a19b3c36a6338
data/.gitignore CHANGED
@@ -7,3 +7,5 @@
7
7
  Gemfile.lock
8
8
  pkg/*
9
9
  .idea/
10
+ .rubocop_todo.yml
11
+ .rubocop.yml
@@ -0,0 +1,4 @@
1
+ # Forcing the new ruby 1.9 syntax for hashes is not a requirement,
2
+ # we still { :love => 'hashrockets' }
3
+ Style/HashSyntax:
4
+ Enabled: false
@@ -5,16 +5,18 @@ module MetaInspector
5
5
 
6
6
  include MetaInspector::Exceptionable
7
7
 
8
- # Initializes a new instance of MetaInspector::Document, setting the URL to the one given
8
+ # Initializes a new instance of MetaInspector::Document, setting the URL
9
9
  # Options:
10
- # => connection_timeout: defaults to 20 seconds
11
- # => read_timeout: defaults to 20 seconds
12
- # => retries: defaults to 3 times
13
- # => html_content_type_only: if an exception should be raised if request content-type is not text/html. Defaults to false
14
- # => allow_redirections: when true, follow HTTP redirects. Defaults to true
15
- # => document: the html of the url as a string
16
- # => warn_level: what to do when encountering exceptions. Can be :warn, :raise or nil
17
- # => headers: object containing custom headers for the request
10
+ # * connection_timeout: defaults to 20 seconds
11
+ # * read_timeout: defaults to 20 seconds
12
+ # * retries: defaults to 3 times
13
+ # * html_content_type_only: if an exception should be raised if request
14
+ # content-type is not text/html. Defaults to false.
15
+ # * allow_redirections: when true, follow HTTP redirects. Defaults to true
16
+ # * document: the html of the url as a string
17
+ # * warn_level: what to do when encountering exceptions.
18
+ # Can be :warn, :raise or nil
19
+ # * headers: object containing custom headers for the request
18
20
  def initialize(initial_url, options = {})
19
21
  options = defaults.merge(options)
20
22
  @connection_timeout = options[:connection_timeout]
@@ -37,25 +39,28 @@ module MetaInspector
37
39
  end
38
40
 
39
41
  extend Forwardable
40
- def_delegators :@url, :url, :scheme, :host, :root_url
41
- def_delegators :@request, :content_type, :response
42
- def_delegators :@parser, :parsed, :respond_to?, :title, :description, :links,
43
- :images, :image, :feed, :charset, :meta_tags, :meta_tag, :meta, :favicon
42
+ delegate [:url, :scheme, :host, :root_url] => :@url
43
+
44
+ delegate [:content_type, :response] => :@request
45
+
46
+ delegate [:parsed, :title, :description, :links,
47
+ :images, :feed, :charset, :meta_tags,
48
+ :meta_tag, :meta, :favicon] => :@parser
44
49
 
45
50
  # Returns all document data as a nested Hash
46
51
  def to_hash
47
52
  {
48
- 'url' => url,
49
- 'title' => title,
50
- 'links' => links.to_hash,
51
- 'images' => images.to_a,
52
- 'charset' => charset,
53
- 'feed' => feed,
54
- 'content_type' => content_type,
55
- 'meta_tags' => meta_tags,
56
- 'favicon' => images.favicon,
57
- 'response' => { 'status' => response.status,
58
- 'headers' => response.headers }
53
+ 'url' => url,
54
+ 'title' => title,
55
+ 'links' => links.to_hash,
56
+ 'images' => images.to_a,
57
+ 'charset' => charset,
58
+ 'feed' => feed,
59
+ 'content_type' => content_type,
60
+ 'meta_tags' => meta_tags,
61
+ 'favicon' => images.favicon,
62
+ 'response' => { 'status' => response.status,
63
+ 'headers' => response.headers }
59
64
  }
60
65
  end
61
66
 
@@ -67,18 +72,21 @@ module MetaInspector
67
72
  private
68
73
 
69
74
  def defaults
70
- { :timeout => 20,
71
- :retries => 3,
72
- :html_content_only => false,
73
- :warn_level => :raise,
74
- :headers => {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"},
75
- :allow_redirections => true
76
- }
75
+ { :timeout => 20,
76
+ :retries => 3,
77
+ :html_content_only => false,
78
+ :warn_level => :raise,
79
+ :headers => { 'User-Agent' => default_user_agent },
80
+ :allow_redirections => true }
81
+ end
82
+
83
+ def default_user_agent
84
+ "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"
77
85
  end
78
86
 
79
87
  def document
80
- @document ||= if html_content_only && content_type != "text/html"
81
- raise "The url provided contains #{content_type} content instead of text/html content" and nil
88
+ @document ||= if html_content_only && content_type != 'text/html'
89
+ fail "The url provided contains #{content_type} content instead of text/html content"
82
90
  else
83
91
  @request.read
84
92
  end
@@ -12,7 +12,7 @@ module MetaInspector
12
12
  def <<(exception)
13
13
  case warn_level
14
14
  when :raise
15
- raise exception
15
+ fail exception
16
16
  when :warn
17
17
  warn exception
18
18
  when :store
@@ -24,7 +24,7 @@ module MetaInspector
24
24
  if warn_level == :store
25
25
  exceptions.empty?
26
26
  else
27
- warn "ExceptionLog#ok? should only be used when warn_level is :store"
27
+ warn 'ExceptionLog#ok? should only be used when warn_level is :store'
28
28
  end
29
29
  end
30
30
  end
@@ -4,6 +4,6 @@ module MetaInspector
4
4
  #
5
5
  module Exceptionable
6
6
  extend Forwardable
7
- def_delegators :@exception_log, :exceptions, :ok?
7
+ delegate [:exceptions, :ok?] => :@exception_log
8
8
  end
9
9
  end
@@ -20,11 +20,11 @@ module MetaInspector
20
20
  end
21
21
 
22
22
  extend Forwardable
23
- def_delegators :@document, :url, :scheme, :host
24
- def_delegators :@meta_tag_parser, :meta_tags, :meta_tag, :meta, :charset
25
- def_delegators :@links_parser, :links, :feed, :base_url
26
- def_delegators :@images_parser, :images
27
- def_delegators :@texts_parser, :title, :description
23
+ delegate [:url, :scheme, :host] => :@document
24
+ delegate [:meta_tags, :meta_tag, :meta, :charset] => :@meta_tag_parser
25
+ delegate [:links, :feed, :base_url] => :@links_parser
26
+ delegate :images => :@images_parser
27
+ delegate [:title, :description] => :@texts_parser
28
28
 
29
29
  # Returns the whole parsed document
30
30
  def parsed
@@ -23,7 +23,7 @@ module MetaInspector
23
23
 
24
24
  # Cleans up nokogiri search results
25
25
  def cleanup(results)
26
- results.map { |_| _.value.strip }.reject { |_| _.empty? }.uniq
26
+ results.map { |r| r.value.strip }.reject(&:empty?).uniq
27
27
  end
28
28
  end
29
29
  end
@@ -1,8 +1,8 @@
1
1
  module MetaInspector
2
2
  module Parsers
3
3
  class ImagesParser < Base
4
- def_delegators :@main_parser, :parsed, :meta, :base_url
5
- def_delegators :images_collection, :each, :length, :size, :last, :[]
4
+ delegate [:parsed, :meta, :base_url] => :@main_parser
5
+ delegate [:each, :length, :size, :[], :last] => :images_collection
6
6
 
7
7
  include Enumerable
8
8
 
@@ -11,7 +11,7 @@ module MetaInspector
11
11
  end
12
12
 
13
13
  # Returns the parsed image from Facebook's open graph property tags
14
- # Most all major websites now define this property and is usually very relevant
14
+ # Most major websites now define this property and is usually relevant
15
15
  # See doc at http://developers.facebook.com/docs/opengraph/
16
16
  # If none found, tries with Twitter image
17
17
  def best
@@ -30,11 +30,15 @@ module MetaInspector
30
30
  private
31
31
 
32
32
  def images_collection
33
- @images_collection ||= parsed_images.map{ |i| URL.absolutify(i, base_url) }
33
+ @images_collection ||= absolutified_images
34
+ end
35
+
36
+ def absolutified_images
37
+ parsed_images.map { |i| URL.absolutify(i, base_url) }
34
38
  end
35
39
 
36
40
  def parsed_images
37
- @parsed_images ||= cleanup(parsed.search('//img/@src'))
41
+ cleanup(parsed.search('//img/@src'))
38
42
  end
39
43
  end
40
44
  end
@@ -1,7 +1,7 @@
1
1
  module MetaInspector
2
2
  module Parsers
3
3
  class LinksParser < Base
4
- def_delegators :@main_parser, :parsed, :url, :scheme, :host
4
+ delegate [:parsed, :url, :scheme, :host] => :@main_parser
5
5
 
6
6
  def links
7
7
  self
@@ -9,37 +9,39 @@ module MetaInspector
9
9
 
10
10
  # Returns all links found, unprocessed
11
11
  def raw
12
- @raw ||= cleanup(parsed.search("//a/@href")).compact.uniq
12
+ @raw ||= cleanup(parsed.search('//a/@href')).compact.uniq
13
13
  end
14
14
 
15
15
  # Returns all links found, unrelavitized and absolutified
16
16
  def all
17
- @all ||= raw.map { |l| URL.absolutify(URL.unrelativize(l, scheme), base_url) }
17
+ @all ||= raw.map { |link| URL.absolutify(URL.unrelativize(link, scheme), base_url) }
18
18
  .compact.uniq
19
19
  end
20
20
 
21
21
  # Returns all HTTP links found
22
22
  def http
23
- @http ||= all.select {|l| l =~ /^http(s)?:\/\//i}
23
+ @http ||= all.select { |link| link =~ /^http(s)?:\/\//i}
24
24
  end
25
25
 
26
26
  # Returns all non-HTTP links found
27
27
  def non_http
28
- @non_http ||= all.select {|l| l !~ /^http(s)?:\/\//i}
28
+ @non_http ||= all.select { |link| link !~ /^http(s)?:\/\//i}
29
29
  end
30
30
 
31
31
  # Returns all internal HTTP links found
32
32
  def internal
33
- @internal ||= http.select {|link| URL.new(link).host == host }
33
+ @internal ||= http.select { |link| URL.new(link).host == host }
34
34
  end
35
35
 
36
36
  # Returns all external HTTP links found
37
37
  def external
38
- @external ||= http.select {|link| URL.new(link).host != host }
38
+ @external ||= http.select { |link| URL.new(link).host != host }
39
39
  end
40
40
 
41
41
  def to_hash
42
- { 'internal' => internal, 'external' => external, 'non_http' => non_http }
42
+ { 'internal' => internal,
43
+ 'external' => external,
44
+ 'non_http' => non_http }
43
45
  end
44
46
 
45
47
  # Returns the parsed document meta rss link
@@ -47,7 +49,8 @@ module MetaInspector
47
49
  @feed ||= (parsed_feed('rss') || parsed_feed('atom'))
48
50
  end
49
51
 
50
- # Returns the base url to absolutify relative links. This can be the one set on a <base> tag,
52
+ # Returns the base url to absolutify relative links.
53
+ # This can be the one set on a <base> tag,
51
54
  # or the url of the document if no <base> tag was found.
52
55
  def base_url
53
56
  base_href || url
@@ -60,7 +63,7 @@ module MetaInspector
60
63
  feed ? URL.absolutify(feed.attributes['href'].value, base_url) : nil
61
64
  end
62
65
 
63
- # Returns the value of the href attribute on the <base /> tag, if it exists
66
+ # Returns the value of the href attribute on the <base /> tag, if exists
64
67
  def base_href
65
68
  parsed.search('base').first.attributes['href'].value rescue nil
66
69
  end
@@ -1,7 +1,7 @@
1
1
  module MetaInspector
2
2
  module Parsers
3
3
  class MetaTagsParser < Base
4
- def_delegators :@main_parser, :parsed
4
+ delegate :parsed => :@main_parser
5
5
 
6
6
  def meta_tags
7
7
  {
@@ -20,10 +20,10 @@ module MetaInspector
20
20
  meta_tag['name']
21
21
  .merge(meta_tag['http-equiv'])
22
22
  .merge(meta_tag['property'])
23
- .merge({'charset' => meta_tag['charset']})
23
+ .merge('charset' => meta_tag['charset'])
24
24
  end
25
25
 
26
- # Returns the charset from the meta tags, looking for it in the following order:
26
+ # Returns the charset from the meta tags, searching in this order:
27
27
  # <meta charset='utf-8' />
28
28
  # <meta http-equiv="Content-Type" content="text/html; charset=windows-1252" />
29
29
  def charset
@@ -33,12 +33,12 @@ module MetaInspector
33
33
  private
34
34
 
35
35
  def charset_from_meta_charset
36
- parsed.css("meta[charset]")[0].attributes['charset'].value rescue nil
36
+ parsed.css('meta[charset]')[0].attributes['charset'].value rescue nil
37
37
  end
38
38
 
39
39
  def charset_from_meta_content_type
40
40
  parsed.css("meta[http-equiv='Content-Type']")[0]
41
- .attributes['content'].value.split(";")[1].split("=")[1] rescue nil
41
+ .attributes['content'].value.split(';')[1].split('=')[1] rescue nil
42
42
  end
43
43
 
44
44
  def meta_tags_by(attribute)
@@ -58,12 +58,12 @@ module MetaInspector
58
58
  def convert_each_array_to_first_element_on(hash)
59
59
  hash.each_pair do |k, v|
60
60
  hash[k] = if v.is_a?(Hash)
61
- convert_each_array_to_first_element_on(v)
62
- elsif v.is_a?(Array)
63
- v.first
64
- else
65
- v
66
- end
61
+ convert_each_array_to_first_element_on(v)
62
+ elsif v.is_a?(Array)
63
+ v.first
64
+ else
65
+ v
66
+ end
67
67
  end
68
68
  end
69
69
 
@@ -1,7 +1,7 @@
1
1
  module MetaInspector
2
2
  module Parsers
3
3
  class TextsParser < Base
4
- def_delegators :@main_parser, :parsed, :meta
4
+ delegate [:parsed, :meta] => :@main_parser
5
5
 
6
6
  # Returns the parsed document title, from the content of the <title> tag
7
7
  # within the <head> section.
@@ -9,8 +9,9 @@ module MetaInspector
9
9
  @title ||= parsed.css('head title').inner_text rescue nil
10
10
  end
11
11
 
12
- # A description getter that first checks for a meta description and if not present will
13
- # guess by looking at the first paragraph with more than 120 characters
12
+ # A description getter that first checks for a meta description
13
+ # and if not present will guess by looking at the first paragraph
14
+ # with more than 120 characters
14
15
  def description
15
16
  meta['description'] || secondary_description
16
17
  end
@@ -18,25 +18,24 @@ module MetaInspector
18
18
  @exception_log = options[:exception_log]
19
19
  @headers = options[:headers]
20
20
 
21
- response # as soon as it is set up, we make the request so we can fail early
21
+ response # request early so we can fail early
22
22
  end
23
23
 
24
24
  extend Forwardable
25
- def_delegators :@url, :url
25
+ delegate :url => :@url
26
26
 
27
27
  def read
28
28
  response.body if response
29
29
  end
30
30
 
31
31
  def content_type
32
- response.headers["content-type"].split(";")[0] if response
32
+ response.headers['content-type'].split(';')[0] if response
33
33
  end
34
34
 
35
35
  def response
36
- request_count ||= 0
37
- request_count += 1
38
36
  @response ||= fetch
39
- rescue Faraday::TimeoutError, Faraday::Error::ConnectionFailed, RuntimeError => e
37
+ rescue Faraday::TimeoutError, Faraday::Error::ConnectionFailed,
38
+ RuntimeError => e
40
39
  @exception_log << e
41
40
  nil
42
41
  end
@@ -28,20 +28,23 @@ module MetaInspector
28
28
  @url = normalized(with_default_scheme(new_url))
29
29
  end
30
30
 
31
- # Converts a protocol-relative url to its full form, depending on the scheme of the page that contains it
31
+ # Converts a protocol-relative url to its full form,
32
+ # depending on the scheme of the page that contains it
32
33
  def self.unrelativize(url, scheme)
33
34
  url =~ /^\/\// ? "#{scheme}://#{url[2..-1]}" : url
34
35
  end
35
36
 
36
- # Convert a relative url like "/users" to an absolute one like "http://example.com/users"
37
- # Respecting already absolute URLs like the ones starting with http:, ftp:, telnet:, mailto:, javascript: ...
37
+ # Converts a relative URL to an absolute URL, like:
38
+ # "/faq" => "http://example.com/faq"
39
+ # Respecting already absolute URLs like the ones starting with
40
+ # http:, ftp:, telnet:, mailto:, javascript: ...
38
41
  def self.absolutify(url, base_url)
39
42
  if url =~ /^\w*\:/i
40
43
  MetaInspector::URL.new(url).url
41
44
  else
42
45
  Addressable::URI.join(base_url, url).normalize.to_s
43
46
  end
44
- rescue Addressable::URI::InvalidURIError => e
47
+ rescue Addressable::URI::InvalidURIError
45
48
  nil
46
49
  end
47
50
 
@@ -52,7 +55,8 @@ module MetaInspector
52
55
  parsed(url) && parsed(url).scheme.nil? ? 'http://' + url : url
53
56
  end
54
57
 
55
- # Normalize url to deal with characters that should be encodes, add trailing slash, convert to downcase...
58
+ # Normalize url to deal with characters that should be encoded,
59
+ # add trailing slash, convert to downcase...
56
60
  def normalized(url)
57
61
  Addressable::URI.parse(url).normalize.to_s
58
62
  end
@@ -1,3 +1,3 @@
1
1
  module MetaInspector
2
- VERSION = "4.0.0.rc3"
2
+ VERSION = "4.0.0"
3
3
  end
@@ -28,4 +28,5 @@ Gem::Specification.new do |gem|
28
28
  gem.add_development_dependency 'pry'
29
29
  gem.add_development_dependency 'guard'
30
30
  gem.add_development_dependency 'guard-rspec'
31
+ gem.add_development_dependency 'rubocop'
31
32
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.0.rc3
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-20 00:00:00.000000000 Z
11
+ date: 2014-11-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -192,6 +192,20 @@ dependencies:
192
192
  - - ">="
193
193
  - !ruby/object:Gem::Version
194
194
  version: '0'
195
+ - !ruby/object:Gem::Dependency
196
+ name: rubocop
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - ">="
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
195
209
  description: MetaInspector lets you scrape a web page and get its title, charset,
196
210
  link and meta tags
197
211
  email:
@@ -202,6 +216,7 @@ extra_rdoc_files: []
202
216
  files:
203
217
  - ".gitignore"
204
218
  - ".rspec.example"
219
+ - ".rubocop.yml.example"
205
220
  - ".travis.yml"
206
221
  - Gemfile
207
222
  - Guardfile
@@ -286,9 +301,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
286
301
  version: '0'
287
302
  required_rubygems_version: !ruby/object:Gem::Requirement
288
303
  requirements:
289
- - - ">"
304
+ - - ">="
290
305
  - !ruby/object:Gem::Version
291
- version: 1.3.1
306
+ version: '0'
292
307
  requirements: []
293
308
  rubyforge_project:
294
309
  rubygems_version: 2.2.2