RubyGems - metainspector - Versions diffs - 4.0.0.rc3 → 4.0.0 - Mend

metainspector 4.0.0.rc3 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

checksums.yaml +4 -4
data/.gitignore +2 -0
data/.rubocop.yml.example +4 -0
data/lib/meta_inspector/document.rb +41 -33
data/lib/meta_inspector/exception_log.rb +2 -2
data/lib/meta_inspector/exceptionable.rb +1 -1
data/lib/meta_inspector/parser.rb +5 -5
data/lib/meta_inspector/parsers/base.rb +1 -1
data/lib/meta_inspector/parsers/images.rb +9 -5
data/lib/meta_inspector/parsers/links.rb +13 -10
data/lib/meta_inspector/parsers/meta_tags.rb +11 -11
data/lib/meta_inspector/parsers/texts.rb +4 -3
data/lib/meta_inspector/request.rb +5 -6
data/lib/meta_inspector/url.rb +9 -5
data/lib/meta_inspector/version.rb +1 -1
data/meta_inspector.gemspec +1 -0
metadata +19 -4

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: bf5c2667ff165768d1a0e0c49ebd47ea5f8de28e
-  data.tar.gz: 15b2f4fb7a2f090a75fe06ab98959e35d5f97a3f
+  metadata.gz: 4fbb85a1c08f497b3c38edbdc97e0c8d96ee6c6a
+  data.tar.gz: 9ce2c80b81b1eb085037312e75fb82d1e46f4202
 SHA512:
-  metadata.gz: eeb60786169e979dd8bb257832f2bf2c0270af8b2bf63056330826677a4943373aea51269a1ddfc397ae296cb786b5285997a1721b5ae412cc006214c872af18
-  data.tar.gz: ae891af393d3746df5048a1e512e70f11718fc8357a2c8212376119afb174e8b7e0ccd180f48c252813581a4ed5671b0f01e35ca555b475efc9997238c29c952
+  metadata.gz: e12a19a7598d3a9c7d83d90c121336964490dcd8b334f72d9ceb64ea8efab67c3b269445eb1ebf46eb5385169ea04a81ef155533dbe92779614eb3e0a10c50b3
+  data.tar.gz: 555a9b35ee7f51def2c45a24e46996cc130a65d15daebda9841c7be74fda8a2c76cb0097c53a67ad763b80272db52d84f8bdb7b99ecee124929a19b3c36a6338

data/.gitignore CHANGED Viewed

@@ -7,3 +7,5 @@
 Gemfile.lock
 pkg/*
 .idea/
+.rubocop_todo.yml
+.rubocop.yml

data/.rubocop.yml.example ADDED Viewed

@@ -0,0 +1,4 @@
+# Forcing the new ruby 1.9 syntax for hashes is not a requirement,
+# we still { :love => 'hashrockets' }
+Style/HashSyntax:
+  Enabled: false

data/lib/meta_inspector/document.rb CHANGED Viewed

@@ -5,16 +5,18 @@ module MetaInspector
     include MetaInspector::Exceptionable
-    # Initializes a new instance of MetaInspector::Document, setting the URL to the one given
+    # Initializes a new instance of MetaInspector::Document, setting the URL
     # Options:
-    # => connection_timeout: defaults to 20 seconds
-    # => read_timeout: defaults to 20 seconds
-    # => retries: defaults to 3 times
-    # => html_content_type_only: if an exception should be raised if request content-type is not text/html. Defaults to false
-    # => allow_redirections: when true, follow HTTP redirects. Defaults to true
-    # => document: the html of the url as a string
-    # => warn_level: what to do when encountering exceptions. Can be :warn, :raise or nil
-    # => headers: object containing custom headers for the request
+    # * connection_timeout: defaults to 20 seconds
+    # * read_timeout: defaults to 20 seconds
+    # * retries: defaults to 3 times
+    # * html_content_type_only: if an exception should be raised if request
+    #   content-type is not text/html. Defaults to false.
+    # * allow_redirections: when true, follow HTTP redirects. Defaults to true
+    # * document: the html of the url as a string
+    # * warn_level: what to do when encountering exceptions.
+    #   Can be :warn, :raise or nil
+    # * headers: object containing custom headers for the request
     def initialize(initial_url, options = {})
       options             = defaults.merge(options)
       @connection_timeout = options[:connection_timeout]
@@ -37,25 +39,28 @@ module MetaInspector
     end
     extend Forwardable
-    def_delegators :@url,     :url, :scheme, :host, :root_url
-    def_delegators :@request, :content_type, :response
-    def_delegators :@parser,  :parsed, :respond_to?, :title, :description, :links,
-                              :images, :image, :feed, :charset, :meta_tags, :meta_tag, :meta, :favicon
+    delegate [:url, :scheme, :host, :root_url]        => :@url
+    delegate [:content_type, :response]               => :@request
+    delegate [:parsed, :title, :description, :links,
+              :images, :feed, :charset, :meta_tags,
+              :meta_tag, :meta, :favicon]             => :@parser
     # Returns all document data as a nested Hash
     def to_hash
       {
-        'url' => url,
-        'title' => title,
-        'links' => links.to_hash,
-        'images' => images.to_a,
-        'charset' => charset,
-        'feed' => feed,
-        'content_type' => content_type,
-        'meta_tags' => meta_tags,
-        'favicon' => images.favicon,
-        'response' => { 'status'  => response.status,
-                        'headers' => response.headers }
+        'url'           => url,
+        'title'         => title,
+        'links'         => links.to_hash,
+        'images'        => images.to_a,
+        'charset'       => charset,
+        'feed'          => feed,
+        'content_type'  => content_type,
+        'meta_tags'     => meta_tags,
+        'favicon'       => images.favicon,
+        'response'      => { 'status'  => response.status,
+                             'headers' => response.headers }
       }
     end
@@ -67,18 +72,21 @@ module MetaInspector
     private
     def defaults
-      { :timeout => 20,
-        :retries => 3,
-        :html_content_only => false,
-        :warn_level => :raise,
-        :headers => {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"},
-        :allow_redirections => true
-      }
+      { :timeout            => 20,
+        :retries            => 3,
+        :html_content_only  => false,
+        :warn_level         => :raise,
+        :headers            => { 'User-Agent' => default_user_agent },
+        :allow_redirections => true }
+    end
+    def default_user_agent
+      "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"
     end
     def document
-      @document ||= if html_content_only && content_type != "text/html"
-                      raise "The url provided contains #{content_type} content instead of text/html content" and nil
+      @document ||= if html_content_only && content_type != 'text/html'
+                      fail "The url provided contains #{content_type} content instead of text/html content"
                     else
                       @request.read
                     end

data/lib/meta_inspector/exception_log.rb CHANGED Viewed

@@ -12,7 +12,7 @@ module MetaInspector
     def <<(exception)
       case warn_level
       when :raise
-        raise exception
+        fail exception
       when :warn
         warn exception
       when :store
@@ -24,7 +24,7 @@ module MetaInspector
       if warn_level == :store
         exceptions.empty?
       else
-        warn "ExceptionLog#ok? should only be used when warn_level is :store"
+        warn 'ExceptionLog#ok? should only be used when warn_level is :store'
       end
     end
   end

data/lib/meta_inspector/exceptionable.rb CHANGED Viewed

@@ -4,6 +4,6 @@ module MetaInspector
   #
   module Exceptionable
     extend Forwardable
-    def_delegators :@exception_log, :exceptions, :ok?
+    delegate [:exceptions, :ok?] => :@exception_log
   end
 end

data/lib/meta_inspector/parser.rb CHANGED Viewed

@@ -20,11 +20,11 @@ module MetaInspector
     end
     extend Forwardable
-    def_delegators :@document,        :url, :scheme, :host
-    def_delegators :@meta_tag_parser, :meta_tags, :meta_tag, :meta, :charset
-    def_delegators :@links_parser,    :links, :feed, :base_url
-    def_delegators :@images_parser,   :images
-    def_delegators :@texts_parser,    :title, :description
+    delegate [:url, :scheme, :host]                   => :@document
+    delegate [:meta_tags, :meta_tag, :meta, :charset] => :@meta_tag_parser
+    delegate [:links, :feed, :base_url]               => :@links_parser
+    delegate :images                                  => :@images_parser
+    delegate [:title, :description]                   => :@texts_parser
     # Returns the whole parsed document
     def parsed

data/lib/meta_inspector/parsers/base.rb CHANGED Viewed

@@ -23,7 +23,7 @@ module MetaInspector
       # Cleans up nokogiri search results
       def cleanup(results)
-        results.map { |_| _.value.strip }.reject { |_| _.empty? }.uniq
+        results.map { |r| r.value.strip }.reject(&:empty?).uniq
       end
     end
   end

data/lib/meta_inspector/parsers/images.rb CHANGED Viewed

@@ -1,8 +1,8 @@
 module MetaInspector
   module Parsers
     class ImagesParser < Base
-      def_delegators :@main_parser, :parsed, :meta, :base_url
-      def_delegators :images_collection, :each, :length, :size, :last, :[]
+      delegate [:parsed, :meta, :base_url]         => :@main_parser
+      delegate [:each, :length, :size, :[], :last] => :images_collection
       include Enumerable
@@ -11,7 +11,7 @@ module MetaInspector
       end
       # Returns the parsed image from Facebook's open graph property tags
-      # Most all major websites now define this property and is usually very relevant
+      # Most major websites now define this property and is usually relevant
       # See doc at http://developers.facebook.com/docs/opengraph/
       # If none found, tries with Twitter image
       def best
@@ -30,11 +30,15 @@ module MetaInspector
       private
       def images_collection
-        @images_collection ||= parsed_images.map{ |i| URL.absolutify(i, base_url) }
+        @images_collection ||= absolutified_images
+      end
+      def absolutified_images
+        parsed_images.map { |i| URL.absolutify(i, base_url) }
       end
       def parsed_images
-        @parsed_images ||= cleanup(parsed.search('//img/@src'))
+        cleanup(parsed.search('//img/@src'))
       end
     end
   end

data/lib/meta_inspector/parsers/links.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module MetaInspector
   module Parsers
     class LinksParser < Base
-      def_delegators :@main_parser, :parsed, :url, :scheme, :host
+      delegate [:parsed, :url, :scheme, :host] => :@main_parser
       def links
         self
@@ -9,37 +9,39 @@ module MetaInspector
       # Returns all links found, unprocessed
       def raw
-        @raw ||= cleanup(parsed.search("//a/@href")).compact.uniq
+        @raw ||= cleanup(parsed.search('//a/@href')).compact.uniq
       end
       # Returns all links found, unrelavitized and absolutified
       def all
-        @all ||= raw.map { |l| URL.absolutify(URL.unrelativize(l, scheme), base_url) }
+        @all ||= raw.map { |link| URL.absolutify(URL.unrelativize(link, scheme), base_url) }
                     .compact.uniq
       end
       # Returns all HTTP links found
       def http
-        @http ||= all.select {|l| l =~ /^http(s)?:\/\//i}
+        @http ||= all.select { |link| link =~ /^http(s)?:\/\//i}
       end
       # Returns all non-HTTP links found
       def non_http
-        @non_http ||= all.select {|l| l !~ /^http(s)?:\/\//i}
+        @non_http ||= all.select { |link| link !~ /^http(s)?:\/\//i}
       end
       # Returns all internal HTTP links found
       def internal
-        @internal ||= http.select {|link| URL.new(link).host == host }
+        @internal ||= http.select { |link| URL.new(link).host == host }
       end
       # Returns all external HTTP links found
       def external
-        @external ||= http.select {|link| URL.new(link).host != host }
+        @external ||= http.select { |link| URL.new(link).host != host }
       end
       def to_hash
-        { 'internal' => internal, 'external' => external, 'non_http' => non_http }
+        { 'internal' => internal,
+          'external' => external,
+          'non_http' => non_http }
       end
       # Returns the parsed document meta rss link
@@ -47,7 +49,8 @@ module MetaInspector
         @feed ||= (parsed_feed('rss') || parsed_feed('atom'))
       end
-      # Returns the base url to absolutify relative links. This can be the one set on a <base> tag,
+      # Returns the base url to absolutify relative links.
+      # This can be the one set on a <base> tag,
       # or the url of the document if no <base> tag was found.
       def base_url
         base_href || url
@@ -60,7 +63,7 @@ module MetaInspector
         feed ? URL.absolutify(feed.attributes['href'].value, base_url) : nil
       end
-      # Returns the value of the href attribute on the <base /> tag, if it exists
+      # Returns the value of the href attribute on the <base /> tag, if exists
       def base_href
         parsed.search('base').first.attributes['href'].value rescue nil
       end

data/lib/meta_inspector/parsers/meta_tags.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module MetaInspector
   module Parsers
     class MetaTagsParser < Base
-      def_delegators :@main_parser, :parsed
+      delegate :parsed => :@main_parser
       def meta_tags
         {
@@ -20,10 +20,10 @@ module MetaInspector
         meta_tag['name']
           .merge(meta_tag['http-equiv'])
           .merge(meta_tag['property'])
-          .merge({'charset' => meta_tag['charset']})
+          .merge('charset' => meta_tag['charset'])
       end
-      # Returns the charset from the meta tags, looking for it in the following order:
+      # Returns the charset from the meta tags, searching in this order:
       # <meta charset='utf-8' />
       # <meta http-equiv="Content-Type" content="text/html; charset=windows-1252" />
       def charset
@@ -33,12 +33,12 @@ module MetaInspector
       private
       def charset_from_meta_charset
-        parsed.css("meta[charset]")[0].attributes['charset'].value rescue nil
+        parsed.css('meta[charset]')[0].attributes['charset'].value rescue nil
       end
       def charset_from_meta_content_type
         parsed.css("meta[http-equiv='Content-Type']")[0]
-          .attributes['content'].value.split(";")[1].split("=")[1] rescue nil
+          .attributes['content'].value.split(';')[1].split('=')[1] rescue nil
       end
       def meta_tags_by(attribute)
@@ -58,12 +58,12 @@ module MetaInspector
       def convert_each_array_to_first_element_on(hash)
         hash.each_pair do |k, v|
           hash[k] = if v.is_a?(Hash)
-            convert_each_array_to_first_element_on(v)
-          elsif v.is_a?(Array)
-            v.first
-          else
-            v
-          end
+                      convert_each_array_to_first_element_on(v)
+                    elsif v.is_a?(Array)
+                      v.first
+                    else
+                      v
+                    end
         end
       end

data/lib/meta_inspector/parsers/texts.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 module MetaInspector
   module Parsers
     class TextsParser < Base
-      def_delegators :@main_parser, :parsed, :meta
+      delegate [:parsed, :meta] => :@main_parser
       # Returns the parsed document title, from the content of the <title> tag
       # within the <head> section.
@@ -9,8 +9,9 @@ module MetaInspector
         @title ||= parsed.css('head title').inner_text rescue nil
       end
-      # A description getter that first checks for a meta description and if not present will
-      # guess by looking at the first paragraph with more than 120 characters
+      # A description getter that first checks for a meta description
+      # and if not present will guess by looking at the first paragraph
+      # with more than 120 characters
       def description
         meta['description'] || secondary_description
       end

data/lib/meta_inspector/request.rb CHANGED Viewed

@@ -18,25 +18,24 @@ module MetaInspector
       @exception_log      = options[:exception_log]
       @headers            = options[:headers]
-      response            # as soon as it is set up, we make the request so we can fail early
+      response            # request early so we can fail early
     end
     extend Forwardable
-    def_delegators :@url, :url
+    delegate :url => :@url
     def read
       response.body if response
     end
     def content_type
-      response.headers["content-type"].split(";")[0] if response
+      response.headers['content-type'].split(';')[0] if response
     end
     def response
-      request_count ||= 0
-      request_count += 1
       @response ||= fetch
-    rescue Faraday::TimeoutError, Faraday::Error::ConnectionFailed, RuntimeError => e
+    rescue Faraday::TimeoutError, Faraday::Error::ConnectionFailed,
+           RuntimeError => e
       @exception_log << e
       nil
     end

data/lib/meta_inspector/url.rb CHANGED Viewed

@@ -28,20 +28,23 @@ module MetaInspector
       @url = normalized(with_default_scheme(new_url))
     end
-    # Converts a protocol-relative url to its full form, depending on the scheme of the page that contains it
+    # Converts a protocol-relative url to its full form,
+    # depending on the scheme of the page that contains it
     def self.unrelativize(url, scheme)
       url =~ /^\/\// ? "#{scheme}://#{url[2..-1]}" : url
     end
-    # Convert a relative url like "/users" to an absolute one like "http://example.com/users"
-    # Respecting already absolute URLs like the ones starting with http:, ftp:, telnet:, mailto:, javascript: ...
+    # Converts a relative URL to an absolute URL, like:
+    #   "/faq" => "http://example.com/faq"
+    # Respecting already absolute URLs like the ones starting with
+    #   http:, ftp:, telnet:, mailto:, javascript: ...
     def self.absolutify(url, base_url)
       if url =~ /^\w*\:/i
         MetaInspector::URL.new(url).url
       else
         Addressable::URI.join(base_url, url).normalize.to_s
       end
-    rescue Addressable::URI::InvalidURIError => e
+    rescue Addressable::URI::InvalidURIError
       nil
     end
@@ -52,7 +55,8 @@ module MetaInspector
       parsed(url) && parsed(url).scheme.nil? ? 'http://' + url : url
     end
-    # Normalize url to deal with characters that should be encodes, add trailing slash, convert to downcase...
+    # Normalize url to deal with characters that should be encoded,
+    # add trailing slash, convert to downcase...
     def normalized(url)
       Addressable::URI.parse(url).normalize.to_s
     end

data/lib/meta_inspector/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module MetaInspector
-  VERSION = "4.0.0.rc3"
+  VERSION = "4.0.0"
 end

data/meta_inspector.gemspec CHANGED Viewed

@@ -28,4 +28,5 @@ Gem::Specification.new do |gem|
   gem.add_development_dependency 'pry'
   gem.add_development_dependency 'guard'
   gem.add_development_dependency 'guard-rspec'
+  gem.add_development_dependency 'rubocop'
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: metainspector
 version: !ruby/object:Gem::Version
-  version: 4.0.0.rc3
+  version: 4.0.0
 platform: ruby
 authors:
 - Jaime Iniesta
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-11-20 00:00:00.000000000 Z
+date: 2014-11-27 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: nokogiri
@@ -192,6 +192,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: rubocop
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        version: '0'
 description: MetaInspector lets you scrape a web page and get its title, charset,
   link and meta tags
 email:
@@ -202,6 +216,7 @@ extra_rdoc_files: []
 files:
 - ".gitignore"
 - ".rspec.example"
+- ".rubocop.yml.example"
 - ".travis.yml"
 - Gemfile
 - Guardfile
@@ -286,9 +301,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
       version: '0'
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
-  - - ">"
+  - - ">="
     - !ruby/object:Gem::Version
-      version: 1.3.1
+      version: '0'
 requirements: []
 rubyforge_project:
 rubygems_version: 2.2.2