RubyGems - premailer - Versions diffs - 1.5.2 → 1.5.3 - Mend

premailer 1.5.2 → 1.5.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

data/CHANGELOG.rdoc +3 -0
data/README.rdoc +3 -3
data/init.rb +1 -0
data/lib/premailer/html_to_plain_text.rb +39 -23
data/lib/premailer/premailer.rb +15 -12
data/rakefile.rb +42 -0
metadata +8 -6

data/CHANGELOG.rdoc CHANGED

@@ -1,5 +1,8 @@
 = Premailer CHANGELOG
+== Version 1.5.3
+ * improved plaintext conversion
 == Version 1.5.2
  * released to GitHub
  * fixed handling of mailto links

data/README.rdoc CHANGED

@@ -7,9 +7,9 @@ huge pain and a simple newsletter becomes un-managable very quickly. This
 script is my solution.
 * CSS styles are converted to inline style attributes
-  Checks style and link[rel=stylesheet] tags and preserves existing inline attributes
+  Checks <tt>style</tt> and <tt>link[rel=stylesheet]</tt> tags and preserves existing inline attributes
 * Relative paths are converted to absolute paths
-  Checks links in href, src and CSS url('')
+  Checks links in <tt>href</tt>, <tt>src</tt> and CSS <tt>url('')</tt>
 * CSS properties are checked against e-mail client capabilities
   Based on the Email Standards Project's guides
 * A plain text version is created
@@ -59,7 +59,7 @@ A few areas that are particularly in need of love:
 Premailer is written in Ruby.
-The web interface can be found at http://premailer.dialect.ca/ .
+The web interface can be found at http://premailer.dialect.ca .
 The source code can be found at http://github.com/alexdunae/premailer .

data/init.rb ADDED

	@@ -0,0 +1 @@
1	+ require 'premailer'

data/lib/premailer/html_to_plain_text.rb CHANGED

@@ -8,51 +8,67 @@ module HtmlToPlainText
   #
   # TODO:
   #  - add support for DL, OL
-  def convert_to_text(html, line_length, from_charset = 'UTF-8')
+  def convert_to_text(html, line_length = 65, from_charset = 'UTF-8')
     r = Text::Reform.new(:trim => true,
                          :squeeze => false,
                          :break => Text::Reform.break_wrap)
     txt = html
-    he = HTMLEntities.new                                 # decode HTML entities
+    # decode HTML entities
+    he = HTMLEntities.new
     txt = he.decode(txt)
-    txt.gsub!(/<h([0-9]+)[^>]*>(.*)<\/h[0-9]+>/i) do |s|  # handle headings
+    # handle headings (H1-H6)
+    txt.gsub!(/[ \t]*<h([0-9]+)[^>]*>(.*)<\/h[0-9]+>/i) do |s|
       hlevel = $1.to_i
-      htext = $2.gsub(/<\/?[^>]*>/i, '')                  # remove tags inside headings
+      # cleanup text inside of headings
+      htext = $2.gsub(/<\/?[^>]*>/i, '').strip
       hlength = (htext.length > line_length ?
                   line_length :
                   htext.length)
       case hlevel
-        when 1                                            # H1
+        when 1   # H1, asterisks above and below
           ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength) + "\n"
-        when 2                                            # H2
+        when 2   # H1, dashes above and below
           ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength) + "\n"
-        else                                              # H3-H6 are styled the same
+        else     # H3-H6, dashes below
           htext + "\n" + ('-' * htext.length) + "\n"
       end
     end
-    txt.gsub!(/<a.*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/i) do |s|   # links
-      $2 + ' [' + $1 + ']'
+    # links
+    txt.gsub!(/<a.*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/i) do |s|
+      $2.strip + ' ( ' + $1.strip + ' )'
     end
-    txt.gsub!(/(<li[\s]+[^>]*>|<li>)/i, '  * ')                     # unordered LIsts
-    txt.gsub!(/<\/p>/i, "\n\n")                           # paragraphs
+    # lists -- TODO: should handle ordered lists
+    txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ')
+    # list not followed by a newline
+    txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n")
+    # paragraphs and line breaks
+    txt.gsub!(/<\/p>/i, "\n\n")
+    txt.gsub!(/<br[\/ ]*>/i, "\n")
+    # strip remaining tags
+    txt.gsub!(/<\/?[^>]*>/, '')
+    # wrap text
+    txt = r.format(('[' * line_length), txt)
-    txt.gsub!(/<\/?[^>]*>/, '')                           # strip remaining tags
-    txt.gsub!(/\A[\s]+|[\s]+\Z|^[ \t]+/m, '')             # strip extra spaces
-    txt.gsub!(/[\n]{3,}/m, "\n\n")                        # tighten line breaks
-    txt = r.format(('[' * line_length), txt)   # wrap text
-    txt.gsub!(/^[\*][\s]/m, '  * ')                        # add spaces back to lists
-    txt.gsub!(/^\s+$/, "\n")                    # \r\n and \r -> \n
-    txt.gsub!(/\r\n?/, "\n")                    # \r\n and \r -> \n
-    txt.gsub!(/[\n]{3,}/, "\n")
-    txt
+    # remove linefeeds (\r\n and \r -> \n)
+    txt.gsub!(/\r\n?/, "\n")
+    # strip extra spaces
+    txt.gsub!(/\302\240+/, " ") # non-breaking spaces -> spaces
+    txt.gsub!(/\n[ \t]+/, "\n") # space at start of lines
+    txt.gsub!(/[ \t]+\n/, "\n") # space at end of lines
+    # no more than two consecutive newlines
+    txt.gsub!(/[\n]{3,}/, "\n\n")
+    txt.strip
   end
 end

data/lib/premailer/premailer.rb CHANGED

@@ -33,7 +33,7 @@ class Premailer
   include HtmlToPlainText
   include CssParser
-  VERSION = '1.5.2'
+  VERSION = '1.5.3'
   CLIENT_SUPPORT_FILE = File.dirname(__FILE__) + '/../../misc/client_support.yaml'
@@ -42,7 +42,9 @@ class Premailer
   # should also exclude :first-letter, etc...
   # URI of the HTML file used
-  attr_reader   :html_file
+  attr_reader   :html_file
+  attr_reader   :processed_doc
   module Warnings
     NONE = 0
@@ -175,6 +177,8 @@ class Premailer
     doc.search('*').remove_class if @options[:remove_classes]
+    @processed_doc = doc
     doc.to_html
   end
@@ -265,7 +269,6 @@ protected
       next if tags.empty?
       tags.each do |tag|
         # skip links that look like they have merge tags
         # and mailto, ftp, etc...
         if tag.attributes[attribute] =~ /^(\{|\[|<|\#|mailto:|ftp:|gopher:)/i
@@ -329,15 +332,15 @@ protected
   # from http://www.ruby-forum.com/topic/140101
   def self.canonicalize(uri) # :nodoc:
-     u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
-     u.normalize!
-     newpath = u.path
-     while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
-                $1 == '..' ? match : ''
-              } do end
-     newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
-     u.path = newpath
-     u.to_s
+    u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
+    u.normalize!
+    newpath = u.path
+    while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
+      $1 == '..' ? match : ''
+    } do end
+    newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
+    u.path = newpath
+    u.to_s
   end
   # Check <tt>CLIENT_SUPPORT_FILE</tt> for any CSS warnings

data/rakefile.rb ADDED

@@ -0,0 +1,42 @@
+require 'rake'
+require 'fileutils'
+require 'lib/premailer'
+desc 'Default: parse a URL.'
+task :default => [:inline]
+desc 'Parse a URL and write out the output.'
+task :inline do
+  url = ENV['url']
+  output = ENV['output']
+  if !url or url.empty? or !output or output.empty?
+    puts 'Usage: rake inline url=http://example.com/ output=output.html'
+    exit
+  end
+  premailer = Premailer.new(url, :warn_level => Premailer::Warnings::SAFE)
+  fout = File.open(output, "w")
+  fout.puts premailer.to_inline_css
+  fout.close
+  puts "Succesfully parsed '#{url}' into '#{output}'"
+  puts premailer.warnings.length.to_s + ' CSS warnings were found'
+end
+task :text do
+  url = ENV['url']
+  output = ENV['output']
+  if !url or url.empty? or !output or output.empty?
+    puts 'Usage: rake text url=http://example.com/ output=output.txt'
+    exit
+  end
+  premailer = Premailer.new(url, :warn_level => Premailer::Warnings::SAFE)
+  fout = File.open(output, "w")
+  fout.puts premailer.to_plain_text
+  fout.close
+  puts "Succesfully parsed '#{url}' into '#{output}'"
+end

metadata CHANGED

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: premailer
 version: !ruby/object:Gem::Version
-  version: 1.5.2
+  version: 1.5.3
 platform: ruby
 authors:
 - Alex Dunae
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2009-11-27 00:00:00 -08:00
+date: 2009-12-03 00:00:00 -08:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -51,12 +51,14 @@ extensions: []
 extra_rdoc_files: []
 files:
-- README.rdoc
-- CHANGELOG.rdoc
-- LICENSE.rdoc
+- init.rb
+- rakefile.rb
 - lib/premailer.rb
-- lib/premailer/premailer.rb
 - lib/premailer/html_to_plain_text.rb
+- lib/premailer/premailer.rb
+- CHANGELOG.rdoc
+- LICENSE.rdoc
+- README.rdoc
 - misc/client_support.yaml
 has_rdoc: true
 homepage: http://premailer.dialect.ca/