premailer 1.7.3 → 1.7.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -44,6 +44,10 @@ opts = OptionParser.new do |opts|
44
44
  options[:remove_classes] = v
45
45
  end
46
46
 
47
+ opts.on("-j", "--remove-scripts", "Remove <script> elements") do |v|
48
+ options[:remove_classes] = v
49
+ end
50
+
47
51
  opts.on("-l", "--line-length N", Integer, "Line length for plaintext (default: #{options[:line_length].to_s})") do |v|
48
52
  options[:line_length] = v
49
53
  end
@@ -13,21 +13,37 @@ module HtmlToPlainText
13
13
  # decode HTML entities
14
14
  he = HTMLEntities.new
15
15
  txt = he.decode(txt)
16
-
17
- # replace image by their alt attribute
18
- txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\/>/i, '\1')
19
16
 
20
- # replace image by their alt attribute
21
- txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\/>/i, '\1')
22
- txt.gsub!(/<img.+?alt='([^\']*)\'[^>]*\/>/i, '\1')
17
+ # replace images with their alt attributes
18
+ # for img tags with "" for attribute quotes
19
+ # with or without closing tag
20
+ # eg. the following formats:
21
+ # <img alt="" />
22
+ # <img alt="">
23
+ txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\>/i, '\1')
24
+
25
+ # for img tags with '' for attribute quotes
26
+ # with or without closing tag
27
+ # eg. the following formats:
28
+ # <img alt='' />
29
+ # <img alt=''>
30
+ txt.gsub!(/<img.+?alt=\'([^\']*)\'[^>]*\>/i, '\1')
23
31
 
24
32
  # links
25
- txt.gsub!(/<a.+?href=\"([^\"]*)\"[^>]*>(.+?)<\/a>/i) do |s|
26
- $2.strip + ' ( ' + $1.strip + ' )'
33
+ txt.gsub!(/<a.+?href=\"(mailto:)?([^\"]*)\"[^>]*>((.|\s)*?)<\/a>/i) do |s|
34
+ if $3.empty?
35
+ ''
36
+ else
37
+ $3.strip + ' ( ' + $2.strip + ' )'
38
+ end
27
39
  end
28
40
 
29
- txt.gsub!(/<a.+?href='([^\']*)\'[^>]*>(.+?)<\/a>/i) do |s|
30
- $2.strip + ' ( ' + $1.strip + ' )'
41
+ txt.gsub!(/<a.+?href='(mailto:)?([^\']*)\'[^>]*>((.|\s)*?)<\/a>/i) do |s|
42
+ if $3.empty?
43
+ ''
44
+ else
45
+ $3.strip + ' ( ' + $2.strip + ' )'
46
+ end
31
47
  end
32
48
 
33
49
 
@@ -73,7 +89,7 @@ module HtmlToPlainText
73
89
  txt.gsub!(/<\/?[^>]*>/, '')
74
90
 
75
91
  txt = word_wrap(txt, line_length)
76
-
92
+
77
93
  # remove linefeeds (\r\n and \r -> \n)
78
94
  txt.gsub!(/\r\n?/, "\n")
79
95
 
@@ -87,7 +103,7 @@ module HtmlToPlainText
87
103
 
88
104
  # no more than two consecutive spaces
89
105
  txt.gsub!(/ {2,}/, " ")
90
-
106
+
91
107
  # the word messes up the parens
92
108
  txt.gsub!(/\([ \n](http[^)]+)[\n ]\)/) do |s|
93
109
  "( " + $1 + " )"
@@ -1,47 +1,88 @@
1
- # Premailer by Alex Dunae (dunae.ca, e-mail 'code' at the same domain), 2008-10
2
- #
3
1
  # Premailer processes HTML and CSS to improve e-mail deliverability.
4
2
  #
5
3
  # Premailer's main function is to render all CSS as inline <tt>style</tt>
6
4
  # attributes. It also converts relative links to absolute links and checks
7
5
  # the 'safety' of CSS properties against a CSS support chart.
8
6
  #
9
- # = Example
10
- # premailer = Premailer.new('http://example.com/myfile.html', :warn_level => Premailer::Warnings::SAFE)
7
+ # ## Example of use
8
+ #
9
+ # ```ruby
10
+ # premailer = Premailer.new('http://example.com/myfile.html', :warn_level => Premailer::Warnings::SAFE)
11
+ #
12
+ # # Write the HTML output
13
+ # fout = File.open("output.html", "w")
14
+ # fout.puts premailer.to_inline_css
15
+ # fout.close
11
16
  #
12
- # # Write the HTML output
13
- # fout = File.open("output.html", "w")
14
- # fout.puts premailer.to_inline_css
15
- # fout.close
17
+ # # Write the plain-text output
18
+ # fout = File.open("ouput.txt", "w")
19
+ # fout.puts premailer.to_plain_text
20
+ # fout.close
16
21
  #
17
- # # Write the plain-text output
18
- # fout = File.open("ouput.txt", "w")
19
- # fout.puts premailer.to_plain_text
20
- # fout.close
22
+ # # List any CSS warnings
23
+ # puts premailer.warnings.length.to_s + ' warnings found'
24
+ # premailer.warnings.each do |w|
25
+ # puts "#{w[:message]} (#{w[:level]}) may not render properly in #{w[:clients]}"
26
+ # end
21
27
  #
22
- # # List any CSS warnings
23
- # puts premailer.warnings.length.to_s + ' warnings found'
24
- # premailer.warnings.each do |w|
25
- # puts "#{w[:message]} (#{w[:level]}) may not render properly in #{w[:clients]}"
26
- # end
28
+ # premailer = Premailer.new(html_file, :warn_level => Premailer::Warnings::SAFE)
29
+ # puts premailer.to_inline_css
30
+ # ```
27
31
  #
28
- # premailer = Premailer.new(html_file, :warn_level => Premailer::Warnings::SAFE)
29
- # puts premailer.to_inline_css
32
+ require 'premailer/version'
33
+
30
34
  class Premailer
31
35
  include HtmlToPlainText
32
36
  include CssParser
33
37
 
34
- VERSION = '1.7.3'
35
-
36
38
  CLIENT_SUPPORT_FILE = File.dirname(__FILE__) + '/../../misc/client_support.yaml'
37
39
 
40
+ # Unmergable selectors regexp.
38
41
  RE_UNMERGABLE_SELECTORS = /(\:(visited|active|hover|focus|after|before|selection|target|first\-(line|letter))|^\@)/i
42
+ # Reset selectors regexp.
39
43
  RE_RESET_SELECTORS = /^(\:\#outlook|body.*|\.ReadMsgBody|\.ExternalClass|img|\#backgroundTable)$/
40
44
 
45
+ # list of HTMLEntities to fix
46
+ # source: http://stackoverflow.com/questions/2812781/how-to-convert-webpage-apostrophe-8217-to-ascii-39-in-ruby-1-
47
+ HTML_ENTITIES = {
48
+ "1.8" => {
49
+ "\342\200\231" => "'",
50
+ "\342\200\246" => "...",
51
+ "\342\200\176" => "'",
52
+ "\342\200\177" => "'",
53
+ "\342\200\230" => "'",
54
+ "\342\200\231" => "'",
55
+ "\342\200\232" => ',',
56
+ "\342\200\233" => "'",
57
+ "\342\200\234" => '"',
58
+ "\342\200\235" => '"',
59
+ "\342\200\041" => '-',
60
+ "\342\200\174" => '-',
61
+ "\342\200\220" => '-',
62
+ "\342\200\223" => '-',
63
+ "\342\200\224" => '--',
64
+ "\342\200\225" => '--',
65
+ "\342\200\042" => '--'
66
+ },
67
+ "1.9" => {
68
+ "&#8217;" => "'",
69
+ "&#8230;" => "...",
70
+ "&#8216;" => "'",
71
+ "&#8218;" => ',',
72
+ "&#8219;" => "'",
73
+ "&#8220;" => '"',
74
+ "&#8221;" => '"',
75
+ "&#8208;" => '-',
76
+ "&#8211;" => '-',
77
+ "&#8212;" => '--',
78
+ "&#8213;" => '--'
79
+ }
80
+ }
81
+
41
82
  # list of CSS attributes that can be rendered as HTML attributes
42
83
  #
43
- # TODO: too much repetition
44
- # TODO: background=""
84
+ # @todo too much repetition
85
+ # @todo background=""
45
86
  RELATED_ATTRIBUTES = {
46
87
  'h1' => {'text-align' => 'align'},
47
88
  'h2' => {'text-align' => 'align'},
@@ -55,6 +96,7 @@ class Premailer
55
96
  'body' => {'background-color' => 'bgcolor'},
56
97
  'table' => {
57
98
  'background-color' => 'bgcolor',
99
+ 'background-image' => 'background',
58
100
  '-premailer-width' => 'width',
59
101
  '-premailer-height' => 'height',
60
102
  '-premailer-cellpadding' => 'cellpadding',
@@ -77,8 +119,7 @@ class Premailer
77
119
  'background-color' => 'bgcolor',
78
120
  'vertical-align' => 'valign',
79
121
  '-premailer-width' => 'width',
80
- '-premailer-height' => 'height',
81
- '-premailer-colspan' => 'colspan'
122
+ '-premailer-height' => 'height'
82
123
  },
83
124
  'img' => {'float' => 'align'}
84
125
  }
@@ -90,6 +131,7 @@ class Premailer
90
131
  attr_reader :base_url
91
132
 
92
133
  # base directory used to resolve links for local files
134
+ # @return [String] base directory
93
135
  attr_reader :base_dir
94
136
 
95
137
  # unmergeable CSS rules to be preserved in the head (CssParser)
@@ -101,38 +143,49 @@ class Premailer
101
143
  # source HTML document (Hpricot/Nokogiri)
102
144
  attr_reader :doc
103
145
 
146
+ # Warning levels
104
147
  module Warnings
148
+ # No warnings
105
149
  NONE = 0
150
+ # Safe
106
151
  SAFE = 1
152
+ # Poor
107
153
  POOR = 2
154
+ # Risky
108
155
  RISKY = 3
109
156
  end
110
157
  include Warnings
111
158
 
159
+ # Waning level names
112
160
  WARN_LABEL = %w(NONE SAFE POOR RISKY)
113
161
 
114
162
  # Create a new Premailer object.
115
163
  #
116
- # +html+ is the HTML data to process. It can be either an IO object, the URL of a
117
- # remote file, a local path or a raw HTML string. If passing an HTML string you
118
- # must set the +:with_html_string+ option to +true+.
164
+ # @param html is the HTML data to process. It can be either an IO object, the URL of a
165
+ # remote file, a local path or a raw HTML string. If passing an HTML string you
166
+ # must set the with_html_string option to true.
119
167
  #
120
- # ==== Options
121
- # [+line_length+] Line length used by to_plain_text. Boolean, default is 65.
122
- # [+warn_level+] What level of CSS compatibility warnings to show (see Warnings).
123
- # [+link_query_string+] A string to append to every <tt>a href=""</tt> link. Do not include the initial <tt>?</tt>.
124
- # [+base_url+] Used to calculate absolute URLs for local files.
125
- # [+css+] Manually specify CSS stylesheets.
126
- # [+css_to_attributes+] Copy related CSS attributes into HTML attributes (e.g. +background-color+ to +bgcolor+)
127
- # [+css_string+] Pass CSS as a string
128
- # [+remove_ids+] Remove ID attributes whenever possible and convert IDs used as anchors to hashed to avoid collisions in webmail programs. Default is +false+.
129
- # [+remove_classes+] Remove class attributes. Default is +false+.
130
- # [+remove_comments+] Remove html comments. Default is +false+.
131
- # [+preserve_styles+] Whether to preserve any <tt>link rel=stylesheet</tt> and <tt>style</tt> elements. Default is +false+.
132
- # [+preserve_reset+] Whether to preserve styles associated with the MailChimp reset code
133
- # [+with_html_string+] Whether the +html+ param should be treated as a raw string.
134
- # [+verbose+] Whether to print errors and warnings to <tt>$stderr</tt>. Default is +false+.
135
- # [+adapter+] Which HTML parser to use, either <tt>:nokogiri</tt> or <tt>:hpricot</tt>. Default is <tt>:hpricot</tt>.
168
+ # @param [Hash] options the options to handle html with.
169
+ # @option options [FixNum] :line_length Line length used by to_plain_text. Default is 65.
170
+ # @option options [FixNum] :warn_level What level of CSS compatibility warnings to show (see {Premailer::Warnings}).
171
+ # @option options [String] :link_query_string A string to append to every <tt>a href=""</tt> link. Do not include the initial <tt>?</tt>.
172
+ # @option options [String] :base_url Used to calculate absolute URLs for local files.
173
+ # @option options [Array(String)] :css Manually specify CSS stylesheets.
174
+ # @option options [Boolean] :css_to_attributes Copy related CSS attributes into HTML attributes (e.g. background-color to bgcolor)
175
+ # @option options [String] :css_string Pass CSS as a string
176
+ # @option options [Boolean] :remove_ids Remove ID attributes whenever possible and convert IDs used as anchors to hashed to avoid collisions in webmail programs. Default is false.
177
+ # @option options [Boolean] :remove_classes Remove class attributes. Default is false.
178
+ # @option options [Boolean] :remove_comments Remove html comments. Default is false.
179
+ # @option options [Boolean] :remove_scripts Remove <tt>script</tt> elements. Default is true.
180
+ # @option options [Boolean] :preserve_styles Whether to preserve any <tt>link rel=stylesheet</tt> and <tt>style</tt> elements. Default is false.
181
+ # @option options [Boolean] :preserve_reset Whether to preserve styles associated with the MailChimp reset code.
182
+ # @option options [Boolean] :with_html_string Whether the html param should be treated as a raw string.
183
+ # @option options [Boolean] :verbose Whether to print errors and warnings to <tt>$stderr</tt>. Default is false.
184
+ # @option options [Boolean] :include_link_tags Whether to include css from <tt>link rel=stylesheet</tt> tags. Default is true.
185
+ # @option options [Boolean] :include_style_tags Whether to include css from <tt>style</tt> tags. Default is true.
186
+ # @option options [String] :input_encoding Manually specify the source documents encoding. This is a good idea.
187
+ # @option options [Boolean] :replace_html_entities Convert HTML entities to actual characters. Default is false.
188
+ # @option options [Symbol] :adapter Which HTML parser to use, either <tt>:nokogiri</tt> or <tt>:hpricot</tt>. Default is <tt>:hpricot</tt>.
136
189
  def initialize(html, options = {})
137
190
  @options = {:warn_level => Warnings::SAFE,
138
191
  :line_length => 65,
@@ -141,6 +194,7 @@ class Premailer
141
194
  :remove_classes => false,
142
195
  :remove_ids => false,
143
196
  :remove_comments => false,
197
+ :remove_scripts => true,
144
198
  :css => [],
145
199
  :css_to_attributes => true,
146
200
  :with_html_string => false,
@@ -150,7 +204,12 @@ class Premailer
150
204
  :verbose => false,
151
205
  :debug => false,
152
206
  :io_exceptions => false,
153
- :adapter => Adapter.use}.merge(options)
207
+ :include_link_tags => true,
208
+ :include_style_tags => true,
209
+ :input_encoding => 'ASCII-8BIT',
210
+ :replace_html_entities => false,
211
+ :adapter => Adapter.use,
212
+ }.merge(options)
154
213
 
155
214
  @html_file = html
156
215
  @is_local_file = @options[:with_html_string] || Premailer.local_data?(html)
@@ -190,7 +249,8 @@ class Premailer
190
249
  load_css_from_html!
191
250
  end
192
251
 
193
- # Array containing a hash of CSS warnings.
252
+ # CSS warnings.
253
+ # @return [Array(Hash)] Array of warnings.
194
254
  def warnings
195
255
  return [] if @options[:warn_level] == Warnings::NONE
196
256
  @css_warnings = check_client_support if @css_warnings.empty?
@@ -200,6 +260,7 @@ class Premailer
200
260
  protected
201
261
  def load_css_from_local_file!(path)
202
262
  css_block = ''
263
+ path.gsub!(/\Afile:/, '')
203
264
  begin
204
265
  File.open(path, "r") do |file|
205
266
  while line = file.gets
@@ -215,6 +276,7 @@ protected
215
276
  @css_parser.add_block!(css_string, {:base_uri => @base_url, :base_dir => @base_dir, :only_media_types => [:screen, :handheld]})
216
277
  end
217
278
 
279
+ # @private
218
280
  def load_css_from_options! # :nodoc:
219
281
  load_css_from_string(@options[:css_string]) if @options[:css_string]
220
282
 
@@ -229,21 +291,26 @@ protected
229
291
 
230
292
  # Load CSS included in <tt>style</tt> and <tt>link</tt> tags from an HTML document.
231
293
  def load_css_from_html! # :nodoc:
232
- if tags = @doc.search("link[@rel='stylesheet'], style")
294
+ if (@options[:adapter] == :nokogiri)
295
+ tags = @doc.search("link[@rel='stylesheet']", "//style[not(contains(@data-premailer,'ignore'))]")
296
+ else
297
+ tags = @doc.search("link[@rel='stylesheet'], style:not([@data-premailer='ignore'])")
298
+ end
299
+ if tags
233
300
  tags.each do |tag|
234
- if tag.to_s.strip =~ /^\<link/i && tag.attributes['href'] && media_type_ok?(tag.attributes['media'])
301
+ if tag.to_s.strip =~ /^\<link/i && tag.attributes['href'] && media_type_ok?(tag.attributes['media']) && @options[:include_link_tags]
235
302
  # A user might want to <link /> to a local css file that is also mirrored on the site
236
303
  # but the local one is different (e.g. newer) than the live file, premailer will now choose the local file
237
-
304
+
238
305
  if tag.attributes['href'].to_s.include? @base_url.to_s and @html_file.kind_of?(String)
239
306
  link_uri = File.join(File.dirname(@html_file), tag.attributes['href'].to_s.sub!(@base_url.to_s, ''))
240
307
  end
241
-
308
+
242
309
  # if the file does not exist locally, try to grab the remote reference
243
310
  if link_uri.nil? or not File.exists?(link_uri)
244
311
  link_uri = Premailer.resolve_link(tag.attributes['href'].to_s, @html_file)
245
312
  end
246
-
313
+
247
314
  if Premailer.local_data?(link_uri)
248
315
  $stderr.puts "Loading css from local file: " + link_uri if @options[:verbose]
249
316
  load_css_from_local_file!(link_uri)
@@ -252,7 +319,7 @@ protected
252
319
  @css_parser.load_uri!(link_uri, {:only_media_types => [:screen, :handheld]})
253
320
  end
254
321
 
255
- elsif tag.to_s.strip =~ /^\<style/i
322
+ elsif tag.to_s.strip =~ /^\<style/i && @options[:include_style_tags]
256
323
  @css_parser.add_block!(tag.inner_html, :base_uri => @base_url, :base_dir => @base_dir, :only_media_types => [:screen, :handheld])
257
324
  end
258
325
  end
@@ -264,6 +331,8 @@ protected
264
331
 
265
332
  # here be deprecated methods
266
333
  public
334
+ # @private
335
+ # @deprecated
267
336
  def local_uri?(uri) # :nodoc:
268
337
  warn "[DEPRECATION] `local_uri?` is deprecated. Please use `Premailer.local_data?` instead."
269
338
  Premailer.local_data?(uri)
@@ -271,11 +340,11 @@ public
271
340
 
272
341
  # here be instance methods
273
342
 
274
- def media_type_ok?(media_types) # :nodoc:
343
+ # @private
344
+ def media_type_ok?(media_types)
345
+ media_types = media_types.to_s
275
346
  return true if media_types.nil? or media_types.empty?
276
347
  media_types.split(/[\s]+|,/).any? { |media_type| media_type.strip =~ /screen|handheld|all/i }
277
- rescue
278
- true
279
348
  end
280
349
 
281
350
  def append_query_string(doc, qs)
@@ -295,7 +364,7 @@ public
295
364
  doc.search('a').each do|el|
296
365
  href = el.attributes['href'].to_s.strip
297
366
  next if href.nil? or href.empty?
298
-
367
+
299
368
  next if href[0,1] =~ /[\#\{\[\<\%]/ # don't bother with anchors or special-looking links
300
369
 
301
370
  begin
@@ -327,7 +396,7 @@ public
327
396
  doc
328
397
  end
329
398
 
330
- # Check for an XHTML doctype
399
+ # Check for an XHTML doctype
331
400
  def is_xhtml?
332
401
  intro = @doc.to_html.strip.split("\n")[0..2].join(' ')
333
402
  is_xhtml = !!(intro =~ /w3c\/\/[\s]*dtd[\s]+xhtml/i)
@@ -335,7 +404,7 @@ public
335
404
  is_xhtml
336
405
  end
337
406
 
338
- # Convert relative links to absolute links.
407
+ # Convert relative links to absolute links.
339
408
  #
340
409
  # Processes <tt>href</tt> <tt>src</tt> and <tt>background</tt> attributes
341
410
  # as well as CSS <tt>url()</tt> declarations found in inline <tt>style</tt> attributes.
@@ -356,7 +425,7 @@ public
356
425
  tags.each do |tag|
357
426
  # skip links that look like they have merge tags
358
427
  # and mailto, ftp, etc...
359
- if tag.attributes[attribute].to_s =~ /^([\%\<\{\#\[]|data:|tel:|file:|sms:|callto:|facetime:|mailto:|ftp:|gopher:)/i
428
+ if tag.attributes[attribute].to_s =~ /^([\%\<\{\#\[]|data:|tel:|file:|sms:|callto:|facetime:|mailto:|ftp:|gopher:|cid:)/i
360
429
  next
361
430
  end
362
431
 
@@ -387,20 +456,22 @@ public
387
456
  end
388
457
 
389
458
 
459
+ # @private
390
460
  def self.escape_string(str) # :nodoc:
391
461
  str.gsub(/"/ , "'")
392
462
  end
393
463
 
464
+ # @private
394
465
  def self.resolve_link(path, base_path) # :nodoc:
395
466
  path.strip!
396
467
  resolved = nil
397
- if path =~ /(http[s]?|ftp):\/\//i
468
+ if path =~ /\A(?:(https?|ftp|file):)\/\//i
398
469
  resolved = path
399
470
  Premailer.canonicalize(resolved)
400
471
  elsif base_path.kind_of?(URI)
401
472
  resolved = base_path.merge(path)
402
473
  Premailer.canonicalize(resolved)
403
- elsif base_path.kind_of?(String) and base_path =~ /^(http[s]?|ftp):\/\//i
474
+ elsif base_path.kind_of?(String) and base_path =~ /\A(?:(?:https?|ftp|file):)\/\//i
404
475
  resolved = URI.parse(base_path)
405
476
  resolved = resolved.merge(path)
406
477
  Premailer.canonicalize(resolved)
@@ -413,8 +484,9 @@ public
413
484
  #
414
485
  # IO objects return true, as do strings that look like URLs.
415
486
  def self.local_data?(data)
416
- return true if data.is_a?(IO) || data.is_a?(StringIO)
417
- return false if data =~ /^(http|https|ftp)\:\/\//i
487
+ return true if data.is_a?(IO) || data.is_a?(StringIO)
488
+ return true if data =~ /\Afile:\/\//i
489
+ return false if data =~ /\A(?:(https?|ftp):)\/\//i
418
490
  true
419
491
  end
420
492