RubyGems - premailer - Versions diffs - 1.8.7 → 1.9.0 - Mend

premailer 1.8.7 → 1.9.0

Files changed (7) hide show

checksums.yaml +4 -4
data/README.md +6 -5
data/lib/premailer/adapter.rb +4 -0
data/lib/premailer/adapter/nokogiri_fast.rb +354 -0
data/lib/premailer/executor.rb +1 -1
data/lib/premailer/version.rb +1 -1
metadata +17 -10

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: ceccb9e67d075de22a76d50df38d99481a6811bf
-  data.tar.gz: 475cc5974d20a0b8b69bef467d10de3edff7d821
+  metadata.gz: 2e8e7d38035296170890e416aa7b33bb1aef052b
+  data.tar.gz: 4ee232c94b2585448385d2e8e8b8884380ada987
 SHA512:
-  metadata.gz: 2591ab12195ae73b9909b945131cdf06382917afde4e4270dc496b937eac87c98cea081c825209a0e4cfd40f8b2202f1953f980d8203cf90afcadec80b139c17
-  data.tar.gz: e34eda9604977da572ef0f712c710547e9e7d965370404a2b1a233a2dd04760a7461c70acf4b40008b72824fe2a38f4cb18bd36d49da794bd61bd41a09096721
+  metadata.gz: a6a5daedd7752a35cf677cbf62d58697809cc496f44372247f7a080395a13756e9a9a1e633c44ba44faeff55f83c118454cb146c7860108cd5d792dda0d82ab2
+  data.tar.gz: d4d53382db2412c1c8e4e022adc114828fd46e79c8fd6f8903089c34a2ffed0b8799252792f0fdf1eca3e0d7844bd6bffa8feccc8bf5b6e6294aec9b9ccda0c1

data/README.md CHANGED

@@ -36,16 +36,17 @@ require 'premailer'
 premailer = Premailer.new('http://example.com/myfile.html', :warn_level => Premailer::Warnings::SAFE)
-# Write the HTML output
-File.open("output.html", "w") do |fout|
-  fout.puts premailer.to_inline_css
-end
 # Write the plain-text output
+# This must come before to_inline_css (https://github.com/premailer/premailer/issues/201)
 File.open("output.txt", "w") do |fout|
   fout.puts premailer.to_plain_text
 end
+# Write the HTML output
+File.open("output.html", "w") do |fout|
+  fout.puts premailer.to_inline_css
+end
 # Output any CSS warnings
 premailer.warnings.each do |w|
   puts "#{w[:message]} (#{w[:level]}) may not render properly in #{w[:clients]}"

data/lib/premailer/adapter.rb CHANGED

@@ -4,17 +4,20 @@ class Premailer
   # Manages the adapter classes. Currently supports:
   #
   # * nokogiri
+  # * nokogiri_fast
   # * nokogumbo
   # * hpricot
   module Adapter
     autoload :Hpricot, 'premailer/adapter/hpricot'
     autoload :Nokogiri, 'premailer/adapter/nokogiri'
+    autoload :NokogiriFast, 'premailer/adapter/nokogiri_fast'
     autoload :Nokogumbo, 'premailer/adapter/nokogumbo'
     # adapter to required file mapping.
     REQUIREMENT_MAP = [
       ["nokogiri", :nokogiri],
+      ["nokogiri", :nokogiri_fast],
       ["nokogumbo", :nokogumbo],
       ["hpricot",  :hpricot],
     ]
@@ -32,6 +35,7 @@ class Premailer
     # @raise [RuntimeError] unless suitable adapter found.
     def self.default
       return :nokogiri if defined?(::Nokogiri)
+      return :nokogiri_fast if defined?(::NokogiriFast)
       return :nokogumbo if defined?(::Nokogumbo)
       return :hpricot  if defined?(::Hpricot)

data/lib/premailer/adapter/nokogiri_fast.rb ADDED

@@ -0,0 +1,354 @@
+require 'nokogiri'
+class Premailer
+  module Adapter
+    # NokogiriFast adapter
+    module NokogiriFast
+      # Merge CSS into the HTML document.
+      #
+      # @return [String] an HTML.
+      def to_inline_css
+        doc = @processed_doc
+        @unmergable_rules = CssParser::Parser.new
+        # Give all styles already in style attributes a specificity of 1000
+        # per http://www.w3.org/TR/CSS21/cascade.html#specificity
+        doc.search("*[@style]").each do |el|
+          el['style'] = '[SPEC=1000[' + el.attributes['style'] + ']]'
+        end
+        # Create an index for nodes by tag name/id/class
+        # Also precompute the map of nodes to descendants
+        index, all_nodes, descendants = make_index(doc)
+        # Iterate through the rules and merge them into the HTML
+        @css_parser.each_selector(:all) do |selector, declaration, specificity, media_types|
+          # Save un-mergable rules separately
+          selector.gsub!(/:link([\s]*)+/i) { |m| $1 }
+          # Convert element names to lower case
+          selector.gsub!(/([\s]|^)([\w]+)/) { |m| $1.to_s + $2.to_s.downcase }
+          if Premailer.is_media_query?(media_types) || selector =~ Premailer::RE_UNMERGABLE_SELECTORS
+            @unmergable_rules.add_rule_set!(CssParser::RuleSet.new(selector, declaration), media_types) unless @options[:preserve_styles]
+          else
+            begin
+              if selector =~ Premailer::RE_RESET_SELECTORS
+                # this is in place to preserve the MailChimp CSS reset: http://github.com/mailchimp/Email-Blueprints/
+                # however, this doesn't mean for testing pur
+                @unmergable_rules.add_rule_set!(CssParser::RuleSet.new(selector, declaration)) unless !@options[:preserve_reset]
+              end
+              # Try the new index based technique. If not supported, fall back to the old brute force one.
+              nodes = match_selector(index, all_nodes, descendants, selector) || doc.search(selector)
+              nodes.each do |el|
+                if el.elem? and (el.name != 'head' and el.parent.name != 'head')
+                  # Add a style attribute or append to the existing one
+                  block = "[SPEC=#{specificity}[#{declaration}]]"
+                  el['style'] = (el.attributes['style'].to_s ||= '') + ' ' + block
+                end
+              end
+            rescue ::Nokogiri::SyntaxError, RuntimeError, ArgumentError
+              $stderr.puts "CSS syntax error with selector: #{selector}" if @options[:verbose]
+              next
+            end
+          end
+        end
+        # Remove script tags
+        doc.search("script").remove if @options[:remove_scripts]
+        # Read STYLE attributes and perform folding
+        doc.search("*[@style]").each do |el|
+          style = el.attributes['style'].to_s
+          declarations = []
+          style.scan(/\[SPEC\=([\d]+)\[(.[^\]\]]*)\]\]/).each do |declaration|
+            rs = CssParser::RuleSet.new(nil, declaration[1].to_s, declaration[0].to_i)
+            declarations << rs
+          end
+          # Perform style folding
+          merged = CssParser.merge(declarations)
+          merged.expand_shorthand!
+          # Duplicate CSS attributes as HTML attributes
+          if Premailer::RELATED_ATTRIBUTES.has_key?(el.name) && @options[:css_to_attributes]
+            Premailer::RELATED_ATTRIBUTES[el.name].each do |css_att, html_att|
+              el[html_att] = merged[css_att].gsub(/url\(['|"](.*)['|"]\)/, '\1').gsub(/;$|\s*!important/, '').strip if el[html_att].nil? and not merged[css_att].empty?
+              merged.instance_variable_get("@declarations").tap do |declarations|
+                declarations.delete(css_att)
+              end
+            end
+          end
+          # Collapse multiple rules into one as much as possible.
+          merged.create_shorthand! if @options[:create_shorthands]
+          # write the inline STYLE attribute
+          # split by ';' but ignore those in brackets
+          attributes = Premailer.escape_string(merged.declarations_to_s).split(/;(?![^(]*\))/).map(&:strip)
+          attributes = attributes.map { |attr| [attr.split(':').first, attr] }.sort_by { |pair| pair.first }.map { |pair| pair[1] }
+          el['style'] = attributes.join('; ') + ";"
+        end
+        doc = write_unmergable_css_rules(doc, @unmergable_rules)
+        if @options[:remove_classes] or @options[:remove_comments]
+          doc.traverse do |el|
+            if el.comment? and @options[:remove_comments]
+              el.remove
+            elsif el.element?
+              el.remove_attribute('class') if @options[:remove_classes]
+            end
+          end
+        end
+        if @options[:remove_ids]
+          # find all anchor's targets and hash them
+          targets = []
+          doc.search("a[@href^='#']").each do |el|
+            target = el.get_attribute('href')[1..-1]
+            targets << target
+            el.set_attribute('href', "#" + Digest::MD5.hexdigest(target))
+          end
+          # hash ids that are links target, delete others
+          doc.search("*[@id]").each do |el|
+            id = el.get_attribute('id')
+            if targets.include?(id)
+              el.set_attribute('id', Digest::MD5.hexdigest(id))
+            else
+              el.remove_attribute('id')
+            end
+          end
+        end
+        if @options[:reset_contenteditable]
+          doc.search('*[@contenteditable]').each do |el|
+            el.remove_attribute('contenteditable')
+          end
+        end
+        @processed_doc = doc
+        if is_xhtml?
+          # we don't want to encode carriage returns
+          @processed_doc.to_xhtml(:encoding => @options[:output_encoding]).gsub(/&\#(xD|13);/i, "\r")
+        else
+          @processed_doc.to_html(:encoding => @options[:output_encoding])
+        end
+      end
+      # Create a <tt>style</tt> element with un-mergable rules (e.g. <tt>:hover</tt>)
+      # and write it into the <tt>body</tt>.
+      #
+      # <tt>doc</tt> is an Nokogiri document and <tt>unmergable_css_rules</tt> is a Css::RuleSet.
+      #
+      # @return [::Nokogiri::XML] a document.
+      def write_unmergable_css_rules(doc, unmergable_rules) # :nodoc:
+        styles = unmergable_rules.to_s
+        unless styles.empty?
+          style_tag = "<style type=\"text/css\">\n#{styles}</style>"
+          unless (body = doc.search('body')).empty?
+            if doc.at_css('body').children && !doc.at_css('body').children.empty?
+              doc.at_css('body').children.before(::Nokogiri::XML.fragment(style_tag))
+            else
+              doc.at_css('body').add_child(::Nokogiri::XML.fragment(style_tag))
+            end
+          else
+            doc.inner_html = style_tag += doc.inner_html
+          end
+        end
+        doc
+      end
+      # Converts the HTML document to a format suitable for plain-text e-mail.
+      #
+      # If present, uses the <body> element as its base; otherwise uses the whole document.
+      #
+      # @return [String] a plain text.
+      def to_plain_text
+        html_src = ''
+        begin
+          html_src = @doc.at("body").inner_html
+        rescue;
+        end
+        html_src = @doc.to_html unless html_src and not html_src.empty?
+        convert_to_text(html_src, @options[:line_length], @html_encoding)
+      end
+      # Gets the original HTML as a string.
+      # @return [String] HTML.
+      def to_s
+        if is_xhtml?
+          @doc.to_xhtml(:encoding => nil)
+        else
+          @doc.to_html(:encoding => nil)
+        end
+      end
+      # Load the HTML file and convert it into an Nokogiri document.
+      #
+      # @return [::Nokogiri::XML] a document.
+      def load_html(input) # :nodoc:
+        thing = nil
+        # TODO: duplicate options
+        if @options[:with_html_string] or @options[:inline] or input.respond_to?(:read)
+          thing = input
+        elsif @is_local_file
+          @base_dir = File.dirname(input)
+          thing = File.open(input, 'r')
+        else
+          thing = open(input)
+        end
+        if thing.respond_to?(:read)
+          thing = thing.read
+        end
+        return nil unless thing
+        doc = nil
+        # Handle HTML entities
+        if @options[:replace_html_entities] == true and thing.is_a?(String)
+          HTML_ENTITIES.map do |entity, replacement|
+            thing.gsub! entity, replacement
+          end
+        end
+        # Default encoding is ASCII-8BIT (binary) per http://groups.google.com/group/nokogiri-talk/msg/0b81ef0dc180dc74
+        # However, we really don't want to hardcode this. ASCII-8BIT should be the default, but not the only option.
+        if thing.is_a?(String) and RUBY_VERSION =~ /1.9/
+          thing = thing.force_encoding(@options[:input_encoding]).encode!
+          doc = ::Nokogiri::HTML5(thing)
+        else
+          default_encoding = RUBY_PLATFORM == 'java' ? nil : 'BINARY'
+          doc = ::Nokogiri::HTML5(thing)
+        end
+        # Fix for removing any CDATA tags from both style and script tags inserted per
+        # https://github.com/sparklemotion/nokogiri/issues/311 and
+        # https://github.com/premailer/premailer/issues/199
+        %w(style script).each do |tag|
+          doc.search(tag).children.each do |child|
+            child.swap(child.text()) if child.cdata?
+          end
+        end
+        doc
+      end
+      private
+      # For very large documents, it is useful to trade off some memory for performance.
+      # We can build an index of the nodes so we can quickly select by id/class/tagname
+      # instead of search the tree again and again.
+      #
+      # @param page The Nokogiri HTML document to index.
+      # @return [index, set_of_all_nodes, descendants] The index is a hash from key to set of nodes.
+      #         The "descendants" is a hash mapping a node to the set of its descendant nodes.
+      def make_index(page)
+        index = {} # Contains a map of tag/class/id names to set of nodes.
+        all_nodes = [] # A plain array of all nodes in the doc. The superset.
+        descendants = {} # Maps node -> set of descendants
+        page.traverse do |node|
+          all_nodes.push(node)
+          if node != page then
+            index_ancestry(page, node, node.parent, descendants)
+          end
+          # Index the node by tag name. This is the least selective
+          # of the three index types empirically.
+          index[node.name] = (index[node.name] || Set.new).add(node)
+          # Index the node by all class attributes it possesses.
+          # Classes are modestly selective. Usually more than tag names
+          # but less selective than ids.
+          if node.has_attribute?("class") then
+            node.get_attribute("class").split(/\s+/).each do |c|
+              c = '.' + c
+              index[c] = (index[c] || Set.new).add(node)
+            end
+          end
+          # Index the node by its "id" attribute if it has one.
+          # This is usually the most selective of the three.
+          if node.has_attribute?("id") then
+            id = '#' + node.get_attribute("id")
+            index[id] = (index[id] || Set.new).add(node)
+          end
+        end
+        # If an index key isn't there, then we should treat it as an empty set.
+        # This makes the index total and we don't need to special case presence.
+        # Note that the default value will never be modified. So we don't need
+        # default_proc.
+        index.default = Set.new
+        descendants.default = Set.new
+        return index, Set.new(all_nodes), descendants
+      end
+      # @param doc The top level document
+      # @param elem The element whose ancestry is to be captured
+      # @param parent the current parent in the process of capturing. Should be set to elem.parent for starters.
+      # @param descendants The running hash map of node -> set of nodes that maps descendants of a node.
+      # @return The descendants argument after updating it.
+      def index_ancestry(doc, elem, parent, descendants)
+        if parent then
+          descendants[parent] = (descendants[parent] || Set.new).add(elem)
+          if doc != parent then
+            index_ancestry(doc, elem, parent.parent, descendants)
+          end
+        end
+        descendants
+      end
+      # @param index An index hash returned by make_index
+      # @param base The base set of nodes within which the given spec is to be matched.
+      # @param intersection_selector A CSS intersection selector string of the form
+      #             "hello.world" or "#blue.diamond". This should not contain spaces.
+      # @return Set of nodes matching the given spec that are present in the base set.
+      def narrow_down_nodes(index, base, intersection_selector)
+        intersection_selector.split(/(?=[.#])/).reduce(base) do |acc, sel|
+          acc = index[sel].intersection(acc)
+          acc
+        end
+      end
+      # @param index An index returned by make_index
+      # @param allNodes The set of all nodes in the DOM to search
+      # @param selector A simple CSS tree matching selector of the form "div.container p.item span"
+      # @return Set of matching nodes
+      #
+      # Note that fancy CSS selector syntax is not supported. Anything
+      # not matching the regex /^[-a-zA-Z0-9\s_.#]*$/ should not be passed.
+      # It will return nil when such a selector is passed, so you can take
+      # action on the falsity of the return value.
+      def match_selector(index, all_nodes, descendants, selector)
+        if /[^-a-zA-Z0-9_\s.#]/.match(selector) then
+          return nil
+        end
+        take_children = false
+        selector.split(/\s+/).reduce(all_nodes) do |base, spec|
+          desc = base
+          if take_children then
+            desc = Set.new
+            base.each do |n|
+              desc.merge(descendants[n])
+            end
+          else
+            take_children = true
+          end
+          narrow_down_nodes(index, desc, spec)
+        end
+      end
+    end
+  end
+end

data/lib/premailer/executor.rb CHANGED

@@ -45,7 +45,7 @@ opts = OptionParser.new do |opts|
   end
   opts.on("-j", "--remove-scripts", "Remove <script> elements") do |v|
-    options[:remove_classes] = v
+    options[:remove_scripts] = v
   end
   opts.on("-l", "--line-length N", Integer, "Line length for plaintext (default: #{options[:line_length].to_s})") do |v|

data/lib/premailer/version.rb CHANGED

@@ -1,4 +1,4 @@
 class Premailer
   # Premailer version.
-  VERSION = '1.8.7'.freeze
+  VERSION = '1.9.0'.freeze
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: premailer
 version: !ruby/object:Gem::Version
-  version: 1.8.7
+  version: 1.9.0
 platform: ruby
 authors:
 - Alex Dunae
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-06-28 00:00:00.000000000 Z
+date: 2017-01-14 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: css_parser
@@ -56,7 +56,7 @@ dependencies:
   name: rake
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">"
       - !ruby/object:Gem::Version
         version: '0.8'
     - - "!="
@@ -66,7 +66,7 @@ dependencies:
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">"
       - !ruby/object:Gem::Version
         version: '0.8'
     - - "!="
@@ -93,6 +93,9 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: 1.4.4
+    - - "<="
+      - !ruby/object:Gem::Version
+        version: 1.6.8
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
@@ -100,20 +103,23 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: 1.4.4
+    - - "<="
+      - !ruby/object:Gem::Version
+        version: 1.6.8
 - !ruby/object:Gem::Dependency
   name: yard
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: 0.8.7.6
+        version: '0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: 0.8.7.6
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: redcarpet
   requirement: !ruby/object:Gem::Requirement
@@ -199,6 +205,7 @@ files:
 - lib/premailer/adapter.rb
 - lib/premailer/adapter/hpricot.rb
 - lib/premailer/adapter/nokogiri.rb
+- lib/premailer/adapter/nokogiri_fast.rb
 - lib/premailer/adapter/nokogumbo.rb
 - lib/premailer/executor.rb
 - lib/premailer/html_to_plain_text.rb
@@ -207,7 +214,8 @@ files:
 - misc/client_support.yaml
 homepage: http://premailer.dialect.ca/
 licenses: []
-metadata: {}
+metadata:
+  yard.run: yri
 post_install_message:
 rdoc_options: []
 require_paths:
@@ -229,4 +237,3 @@ signing_key:
 specification_version: 4
 summary: Preflight for HTML e-mail.
 test_files: []
-has_rdoc: true