premailer 1.5.2 → 1.5.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,8 @@
1
1
  = Premailer CHANGELOG
2
2
 
3
+ == Version 1.5.3
4
+ * improved plaintext conversion
5
+
3
6
  == Version 1.5.2
4
7
  * released to GitHub
5
8
  * fixed handling of mailto links
@@ -7,9 +7,9 @@ huge pain and a simple newsletter becomes un-managable very quickly. This
7
7
  script is my solution.
8
8
 
9
9
  * CSS styles are converted to inline style attributes
10
- Checks style and link[rel=stylesheet] tags and preserves existing inline attributes
10
+ Checks <tt>style</tt> and <tt>link[rel=stylesheet]</tt> tags and preserves existing inline attributes
11
11
  * Relative paths are converted to absolute paths
12
- Checks links in href, src and CSS url('')
12
+ Checks links in <tt>href</tt>, <tt>src</tt> and CSS <tt>url('')</tt>
13
13
  * CSS properties are checked against e-mail client capabilities
14
14
  Based on the Email Standards Project's guides
15
15
  * A plain text version is created
@@ -59,7 +59,7 @@ A few areas that are particularly in need of love:
59
59
 
60
60
  Premailer is written in Ruby.
61
61
 
62
- The web interface can be found at http://premailer.dialect.ca/ .
62
+ The web interface can be found at http://premailer.dialect.ca .
63
63
 
64
64
  The source code can be found at http://github.com/alexdunae/premailer .
65
65
 
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'premailer'
@@ -8,51 +8,67 @@ module HtmlToPlainText
8
8
  #
9
9
  # TODO:
10
10
  # - add support for DL, OL
11
- def convert_to_text(html, line_length, from_charset = 'UTF-8')
11
+ def convert_to_text(html, line_length = 65, from_charset = 'UTF-8')
12
12
  r = Text::Reform.new(:trim => true,
13
13
  :squeeze => false,
14
14
  :break => Text::Reform.break_wrap)
15
15
 
16
16
  txt = html
17
17
 
18
- he = HTMLEntities.new # decode HTML entities
19
-
18
+ # decode HTML entities
19
+ he = HTMLEntities.new
20
20
  txt = he.decode(txt)
21
21
 
22
- txt.gsub!(/<h([0-9]+)[^>]*>(.*)<\/h[0-9]+>/i) do |s| # handle headings
22
+ # handle headings (H1-H6)
23
+ txt.gsub!(/[ \t]*<h([0-9]+)[^>]*>(.*)<\/h[0-9]+>/i) do |s|
23
24
  hlevel = $1.to_i
24
- htext = $2.gsub(/<\/?[^>]*>/i, '') # remove tags inside headings
25
+ # cleanup text inside of headings
26
+ htext = $2.gsub(/<\/?[^>]*>/i, '').strip
25
27
  hlength = (htext.length > line_length ?
26
28
  line_length :
27
29
  htext.length)
28
30
 
29
31
  case hlevel
30
- when 1 # H1
32
+ when 1 # H1, asterisks above and below
31
33
  ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength) + "\n"
32
- when 2 # H2
34
+ when 2 # H1, dashes above and below
33
35
  ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength) + "\n"
34
- else # H3-H6 are styled the same
36
+ else # H3-H6, dashes below
35
37
  htext + "\n" + ('-' * htext.length) + "\n"
36
38
  end
37
39
  end
38
40
 
39
- txt.gsub!(/<a.*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/i) do |s| # links
40
- $2 + ' [' + $1 + ']'
41
+ # links
42
+ txt.gsub!(/<a.*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/i) do |s|
43
+ $2.strip + ' ( ' + $1.strip + ' )'
41
44
  end
42
45
 
43
- txt.gsub!(/(<li[\s]+[^>]*>|<li>)/i, ' * ') # unordered LIsts
44
- txt.gsub!(/<\/p>/i, "\n\n") # paragraphs
46
+ # lists -- TODO: should handle ordered lists
47
+ txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ')
48
+ # list not followed by a newline
49
+ txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n")
50
+
51
+ # paragraphs and line breaks
52
+ txt.gsub!(/<\/p>/i, "\n\n")
53
+ txt.gsub!(/<br[\/ ]*>/i, "\n")
54
+
55
+ # strip remaining tags
56
+ txt.gsub!(/<\/?[^>]*>/, '')
57
+
58
+ # wrap text
59
+ txt = r.format(('[' * line_length), txt)
45
60
 
46
- txt.gsub!(/<\/?[^>]*>/, '') # strip remaining tags
47
- txt.gsub!(/\A[\s]+|[\s]+\Z|^[ \t]+/m, '') # strip extra spaces
48
- txt.gsub!(/[\n]{3,}/m, "\n\n") # tighten line breaks
49
-
50
- txt = r.format(('[' * line_length), txt) # wrap text
51
- txt.gsub!(/^[\*][\s]/m, ' * ') # add spaces back to lists
52
-
53
- txt.gsub!(/^\s+$/, "\n") # \r\n and \r -> \n
54
- txt.gsub!(/\r\n?/, "\n") # \r\n and \r -> \n
55
- txt.gsub!(/[\n]{3,}/, "\n")
56
- txt
61
+ # remove linefeeds (\r\n and \r -> \n)
62
+ txt.gsub!(/\r\n?/, "\n")
63
+
64
+ # strip extra spaces
65
+ txt.gsub!(/\302\240+/, " ") # non-breaking spaces -> spaces
66
+ txt.gsub!(/\n[ \t]+/, "\n") # space at start of lines
67
+ txt.gsub!(/[ \t]+\n/, "\n") # space at end of lines
68
+
69
+ # no more than two consecutive newlines
70
+ txt.gsub!(/[\n]{3,}/, "\n\n")
71
+
72
+ txt.strip
57
73
  end
58
74
  end
@@ -33,7 +33,7 @@ class Premailer
33
33
  include HtmlToPlainText
34
34
  include CssParser
35
35
 
36
- VERSION = '1.5.2'
36
+ VERSION = '1.5.3'
37
37
 
38
38
  CLIENT_SUPPORT_FILE = File.dirname(__FILE__) + '/../../misc/client_support.yaml'
39
39
 
@@ -42,7 +42,9 @@ class Premailer
42
42
  # should also exclude :first-letter, etc...
43
43
 
44
44
  # URI of the HTML file used
45
- attr_reader :html_file
45
+ attr_reader :html_file
46
+
47
+ attr_reader :processed_doc
46
48
 
47
49
  module Warnings
48
50
  NONE = 0
@@ -175,6 +177,8 @@ class Premailer
175
177
 
176
178
  doc.search('*').remove_class if @options[:remove_classes]
177
179
 
180
+ @processed_doc = doc
181
+
178
182
  doc.to_html
179
183
  end
180
184
 
@@ -265,7 +269,6 @@ protected
265
269
  next if tags.empty?
266
270
 
267
271
  tags.each do |tag|
268
-
269
272
  # skip links that look like they have merge tags
270
273
  # and mailto, ftp, etc...
271
274
  if tag.attributes[attribute] =~ /^(\{|\[|<|\#|mailto:|ftp:|gopher:)/i
@@ -329,15 +332,15 @@ protected
329
332
 
330
333
  # from http://www.ruby-forum.com/topic/140101
331
334
  def self.canonicalize(uri) # :nodoc:
332
- u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
333
- u.normalize!
334
- newpath = u.path
335
- while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
336
- $1 == '..' ? match : ''
337
- } do end
338
- newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
339
- u.path = newpath
340
- u.to_s
335
+ u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
336
+ u.normalize!
337
+ newpath = u.path
338
+ while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
339
+ $1 == '..' ? match : ''
340
+ } do end
341
+ newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
342
+ u.path = newpath
343
+ u.to_s
341
344
  end
342
345
 
343
346
  # Check <tt>CLIENT_SUPPORT_FILE</tt> for any CSS warnings
@@ -0,0 +1,42 @@
1
+ require 'rake'
2
+ require 'fileutils'
3
+ require 'lib/premailer'
4
+
5
+ desc 'Default: parse a URL.'
6
+ task :default => [:inline]
7
+
8
+ desc 'Parse a URL and write out the output.'
9
+ task :inline do
10
+ url = ENV['url']
11
+ output = ENV['output']
12
+
13
+ if !url or url.empty? or !output or output.empty?
14
+ puts 'Usage: rake inline url=http://example.com/ output=output.html'
15
+ exit
16
+ end
17
+
18
+ premailer = Premailer.new(url, :warn_level => Premailer::Warnings::SAFE)
19
+ fout = File.open(output, "w")
20
+ fout.puts premailer.to_inline_css
21
+ fout.close
22
+
23
+ puts "Succesfully parsed '#{url}' into '#{output}'"
24
+ puts premailer.warnings.length.to_s + ' CSS warnings were found'
25
+ end
26
+
27
+ task :text do
28
+ url = ENV['url']
29
+ output = ENV['output']
30
+
31
+ if !url or url.empty? or !output or output.empty?
32
+ puts 'Usage: rake text url=http://example.com/ output=output.txt'
33
+ exit
34
+ end
35
+
36
+ premailer = Premailer.new(url, :warn_level => Premailer::Warnings::SAFE)
37
+ fout = File.open(output, "w")
38
+ fout.puts premailer.to_plain_text
39
+ fout.close
40
+
41
+ puts "Succesfully parsed '#{url}' into '#{output}'"
42
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: premailer
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.2
4
+ version: 1.5.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Alex Dunae
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-11-27 00:00:00 -08:00
12
+ date: 2009-12-03 00:00:00 -08:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -51,12 +51,14 @@ extensions: []
51
51
  extra_rdoc_files: []
52
52
 
53
53
  files:
54
- - README.rdoc
55
- - CHANGELOG.rdoc
56
- - LICENSE.rdoc
54
+ - init.rb
55
+ - rakefile.rb
57
56
  - lib/premailer.rb
58
- - lib/premailer/premailer.rb
59
57
  - lib/premailer/html_to_plain_text.rb
58
+ - lib/premailer/premailer.rb
59
+ - CHANGELOG.rdoc
60
+ - LICENSE.rdoc
61
+ - README.rdoc
60
62
  - misc/client_support.yaml
61
63
  has_rdoc: true
62
64
  homepage: http://premailer.dialect.ca/