premailer 1.7.3 → 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -44,6 +44,10 @@ opts = OptionParser.new do |opts|
44
44
  options[:remove_classes] = v
45
45
  end
46
46
 
47
+ opts.on("-j", "--remove-scripts", "Remove <script> elements") do |v|
48
+ options[:remove_classes] = v
49
+ end
50
+
47
51
  opts.on("-l", "--line-length N", Integer, "Line length for plaintext (default: #{options[:line_length].to_s})") do |v|
48
52
  options[:line_length] = v
49
53
  end
@@ -13,21 +13,37 @@ module HtmlToPlainText
13
13
  # decode HTML entities
14
14
  he = HTMLEntities.new
15
15
  txt = he.decode(txt)
16
-
17
- # replace image by their alt attribute
18
- txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\/>/i, '\1')
19
16
 
20
- # replace image by their alt attribute
21
- txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\/>/i, '\1')
22
- txt.gsub!(/<img.+?alt='([^\']*)\'[^>]*\/>/i, '\1')
17
+ # replace images with their alt attributes
18
+ # for img tags with "" for attribute quotes
19
+ # with or without closing tag
20
+ # eg. the following formats:
21
+ # <img alt="" />
22
+ # <img alt="">
23
+ txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\>/i, '\1')
24
+
25
+ # for img tags with '' for attribute quotes
26
+ # with or without closing tag
27
+ # eg. the following formats:
28
+ # <img alt='' />
29
+ # <img alt=''>
30
+ txt.gsub!(/<img.+?alt=\'([^\']*)\'[^>]*\>/i, '\1')
23
31
 
24
32
  # links
25
- txt.gsub!(/<a.+?href=\"([^\"]*)\"[^>]*>(.+?)<\/a>/i) do |s|
26
- $2.strip + ' ( ' + $1.strip + ' )'
33
+ txt.gsub!(/<a.+?href=\"(mailto:)?([^\"]*)\"[^>]*>((.|\s)*?)<\/a>/i) do |s|
34
+ if $3.empty?
35
+ ''
36
+ else
37
+ $3.strip + ' ( ' + $2.strip + ' )'
38
+ end
27
39
  end
28
40
 
29
- txt.gsub!(/<a.+?href='([^\']*)\'[^>]*>(.+?)<\/a>/i) do |s|
30
- $2.strip + ' ( ' + $1.strip + ' )'
41
+ txt.gsub!(/<a.+?href='(mailto:)?([^\']*)\'[^>]*>((.|\s)*?)<\/a>/i) do |s|
42
+ if $3.empty?
43
+ ''
44
+ else
45
+ $3.strip + ' ( ' + $2.strip + ' )'
46
+ end
31
47
  end
32
48
 
33
49
 
@@ -73,7 +89,7 @@ module HtmlToPlainText
73
89
  txt.gsub!(/<\/?[^>]*>/, '')
74
90
 
75
91
  txt = word_wrap(txt, line_length)
76
-
92
+
77
93
  # remove linefeeds (\r\n and \r -> \n)
78
94
  txt.gsub!(/\r\n?/, "\n")
79
95
 
@@ -87,7 +103,7 @@ module HtmlToPlainText
87
103
 
88
104
  # no more than two consecutive spaces
89
105
  txt.gsub!(/ {2,}/, " ")
90
-
106
+
91
107
  # the word messes up the parens
92
108
  txt.gsub!(/\([ \n](http[^)]+)[\n ]\)/) do |s|
93
109
  "( " + $1 + " )"
@@ -1,47 +1,88 @@
1
- # Premailer by Alex Dunae (dunae.ca, e-mail 'code' at the same domain), 2008-10
2
- #
3
1
  # Premailer processes HTML and CSS to improve e-mail deliverability.
4
2
  #
5
3
  # Premailer's main function is to render all CSS as inline <tt>style</tt>
6
4
  # attributes. It also converts relative links to absolute links and checks
7
5
  # the 'safety' of CSS properties against a CSS support chart.
8
6
  #
9
- # = Example
10
- # premailer = Premailer.new('http://example.com/myfile.html', :warn_level => Premailer::Warnings::SAFE)
7
+ # ## Example of use
8
+ #
9
+ # ```ruby
10
+ # premailer = Premailer.new('http://example.com/myfile.html', :warn_level => Premailer::Warnings::SAFE)
11
+ #
12
+ # # Write the HTML output
13
+ # fout = File.open("output.html", "w")
14
+ # fout.puts premailer.to_inline_css
15
+ # fout.close
11
16
  #
12
- # # Write the HTML output
13
- # fout = File.open("output.html", "w")
14
- # fout.puts premailer.to_inline_css
15
- # fout.close
17
+ # # Write the plain-text output
18
+ # fout = File.open("ouput.txt", "w")
19
+ # fout.puts premailer.to_plain_text
20
+ # fout.close
16
21
  #
17
- # # Write the plain-text output
18
- # fout = File.open("ouput.txt", "w")
19
- # fout.puts premailer.to_plain_text
20
- # fout.close
22
+ # # List any CSS warnings
23
+ # puts premailer.warnings.length.to_s + ' warnings found'
24
+ # premailer.warnings.each do |w|
25
+ # puts "#{w[:message]} (#{w[:level]}) may not render properly in #{w[:clients]}"
26
+ # end
21
27
  #
22
- # # List any CSS warnings
23
- # puts premailer.warnings.length.to_s + ' warnings found'
24
- # premailer.warnings.each do |w|
25
- # puts "#{w[:message]} (#{w[:level]}) may not render properly in #{w[:clients]}"
26
- # end
28
+ # premailer = Premailer.new(html_file, :warn_level => Premailer::Warnings::SAFE)
29
+ # puts premailer.to_inline_css
30
+ # ```
27
31
  #
28
- # premailer = Premailer.new(html_file, :warn_level => Premailer::Warnings::SAFE)
29
- # puts premailer.to_inline_css
32
+ require 'premailer/version'
33
+
30
34
  class Premailer
31
35
  include HtmlToPlainText
32
36
  include CssParser
33
37
 
34
- VERSION = '1.7.3'
35
-
36
38
  CLIENT_SUPPORT_FILE = File.dirname(__FILE__) + '/../../misc/client_support.yaml'
37
39
 
40
+ # Unmergable selectors regexp.
38
41
  RE_UNMERGABLE_SELECTORS = /(\:(visited|active|hover|focus|after|before|selection|target|first\-(line|letter))|^\@)/i
42
+ # Reset selectors regexp.
39
43
  RE_RESET_SELECTORS = /^(\:\#outlook|body.*|\.ReadMsgBody|\.ExternalClass|img|\#backgroundTable)$/
40
44
 
45
+ # list of HTMLEntities to fix
46
+ # source: http://stackoverflow.com/questions/2812781/how-to-convert-webpage-apostrophe-8217-to-ascii-39-in-ruby-1-
47
+ HTML_ENTITIES = {
48
+ "1.8" => {
49
+ "\342\200\231" => "'",
50
+ "\342\200\246" => "...",
51
+ "\342\200\176" => "'",
52
+ "\342\200\177" => "'",
53
+ "\342\200\230" => "'",
54
+ "\342\200\231" => "'",
55
+ "\342\200\232" => ',',
56
+ "\342\200\233" => "'",
57
+ "\342\200\234" => '"',
58
+ "\342\200\235" => '"',
59
+ "\342\200\041" => '-',
60
+ "\342\200\174" => '-',
61
+ "\342\200\220" => '-',
62
+ "\342\200\223" => '-',
63
+ "\342\200\224" => '--',
64
+ "\342\200\225" => '--',
65
+ "\342\200\042" => '--'
66
+ },
67
+ "1.9" => {
68
+ "&#8217;" => "'",
69
+ "&#8230;" => "...",
70
+ "&#8216;" => "'",
71
+ "&#8218;" => ',',
72
+ "&#8219;" => "'",
73
+ "&#8220;" => '"',
74
+ "&#8221;" => '"',
75
+ "&#8208;" => '-',
76
+ "&#8211;" => '-',
77
+ "&#8212;" => '--',
78
+ "&#8213;" => '--'
79
+ }
80
+ }
81
+
41
82
  # list of CSS attributes that can be rendered as HTML attributes
42
83
  #
43
- # TODO: too much repetition
44
- # TODO: background=""
84
+ # @todo too much repetition
85
+ # @todo background=""
45
86
  RELATED_ATTRIBUTES = {
46
87
  'h1' => {'text-align' => 'align'},
47
88
  'h2' => {'text-align' => 'align'},
@@ -55,6 +96,7 @@ class Premailer
55
96
  'body' => {'background-color' => 'bgcolor'},
56
97
  'table' => {
57
98
  'background-color' => 'bgcolor',
99
+ 'background-image' => 'background',
58
100
  '-premailer-width' => 'width',
59
101
  '-premailer-height' => 'height',
60
102
  '-premailer-cellpadding' => 'cellpadding',
@@ -77,8 +119,7 @@ class Premailer
77
119
  'background-color' => 'bgcolor',
78
120
  'vertical-align' => 'valign',
79
121
  '-premailer-width' => 'width',
80
- '-premailer-height' => 'height',
81
- '-premailer-colspan' => 'colspan'
122
+ '-premailer-height' => 'height'
82
123
  },
83
124
  'img' => {'float' => 'align'}
84
125
  }
@@ -90,6 +131,7 @@ class Premailer
90
131
  attr_reader :base_url
91
132
 
92
133
  # base directory used to resolve links for local files
134
+ # @return [String] base directory
93
135
  attr_reader :base_dir
94
136
 
95
137
  # unmergeable CSS rules to be preserved in the head (CssParser)
@@ -101,38 +143,49 @@ class Premailer
101
143
  # source HTML document (Hpricot/Nokogiri)
102
144
  attr_reader :doc
103
145
 
146
+ # Warning levels
104
147
  module Warnings
148
+ # No warnings
105
149
  NONE = 0
150
+ # Safe
106
151
  SAFE = 1
152
+ # Poor
107
153
  POOR = 2
154
+ # Risky
108
155
  RISKY = 3
109
156
  end
110
157
  include Warnings
111
158
 
159
+ # Waning level names
112
160
  WARN_LABEL = %w(NONE SAFE POOR RISKY)
113
161
 
114
162
  # Create a new Premailer object.
115
163
  #
116
- # +html+ is the HTML data to process. It can be either an IO object, the URL of a
117
- # remote file, a local path or a raw HTML string. If passing an HTML string you
118
- # must set the +:with_html_string+ option to +true+.
164
+ # @param html is the HTML data to process. It can be either an IO object, the URL of a
165
+ # remote file, a local path or a raw HTML string. If passing an HTML string you
166
+ # must set the with_html_string option to true.
119
167
  #
120
- # ==== Options
121
- # [+line_length+] Line length used by to_plain_text. Boolean, default is 65.
122
- # [+warn_level+] What level of CSS compatibility warnings to show (see Warnings).
123
- # [+link_query_string+] A string to append to every <tt>a href=""</tt> link. Do not include the initial <tt>?</tt>.
124
- # [+base_url+] Used to calculate absolute URLs for local files.
125
- # [+css+] Manually specify CSS stylesheets.
126
- # [+css_to_attributes+] Copy related CSS attributes into HTML attributes (e.g. +background-color+ to +bgcolor+)
127
- # [+css_string+] Pass CSS as a string
128
- # [+remove_ids+] Remove ID attributes whenever possible and convert IDs used as anchors to hashed to avoid collisions in webmail programs. Default is +false+.
129
- # [+remove_classes+] Remove class attributes. Default is +false+.
130
- # [+remove_comments+] Remove html comments. Default is +false+.
131
- # [+preserve_styles+] Whether to preserve any <tt>link rel=stylesheet</tt> and <tt>style</tt> elements. Default is +false+.
132
- # [+preserve_reset+] Whether to preserve styles associated with the MailChimp reset code
133
- # [+with_html_string+] Whether the +html+ param should be treated as a raw string.
134
- # [+verbose+] Whether to print errors and warnings to <tt>$stderr</tt>. Default is +false+.
135
- # [+adapter+] Which HTML parser to use, either <tt>:nokogiri</tt> or <tt>:hpricot</tt>. Default is <tt>:hpricot</tt>.
168
+ # @param [Hash] options the options to handle html with.
169
+ # @option options [FixNum] :line_length Line length used by to_plain_text. Default is 65.
170
+ # @option options [FixNum] :warn_level What level of CSS compatibility warnings to show (see {Premailer::Warnings}).
171
+ # @option options [String] :link_query_string A string to append to every <tt>a href=""</tt> link. Do not include the initial <tt>?</tt>.
172
+ # @option options [String] :base_url Used to calculate absolute URLs for local files.
173
+ # @option options [Array(String)] :css Manually specify CSS stylesheets.
174
+ # @option options [Boolean] :css_to_attributes Copy related CSS attributes into HTML attributes (e.g. background-color to bgcolor)
175
+ # @option options [String] :css_string Pass CSS as a string
176
+ # @option options [Boolean] :remove_ids Remove ID attributes whenever possible and convert IDs used as anchors to hashed to avoid collisions in webmail programs. Default is false.
177
+ # @option options [Boolean] :remove_classes Remove class attributes. Default is false.
178
+ # @option options [Boolean] :remove_comments Remove html comments. Default is false.
179
+ # @option options [Boolean] :remove_scripts Remove <tt>script</tt> elements. Default is true.
180
+ # @option options [Boolean] :preserve_styles Whether to preserve any <tt>link rel=stylesheet</tt> and <tt>style</tt> elements. Default is false.
181
+ # @option options [Boolean] :preserve_reset Whether to preserve styles associated with the MailChimp reset code.
182
+ # @option options [Boolean] :with_html_string Whether the html param should be treated as a raw string.
183
+ # @option options [Boolean] :verbose Whether to print errors and warnings to <tt>$stderr</tt>. Default is false.
184
+ # @option options [Boolean] :include_link_tags Whether to include css from <tt>link rel=stylesheet</tt> tags. Default is true.
185
+ # @option options [Boolean] :include_style_tags Whether to include css from <tt>style</tt> tags. Default is true.
186
+ # @option options [String] :input_encoding Manually specify the source documents encoding. This is a good idea.
187
+ # @option options [Boolean] :replace_html_entities Convert HTML entities to actual characters. Default is false.
188
+ # @option options [Symbol] :adapter Which HTML parser to use, either <tt>:nokogiri</tt> or <tt>:hpricot</tt>. Default is <tt>:hpricot</tt>.
136
189
  def initialize(html, options = {})
137
190
  @options = {:warn_level => Warnings::SAFE,
138
191
  :line_length => 65,
@@ -141,6 +194,7 @@ class Premailer
141
194
  :remove_classes => false,
142
195
  :remove_ids => false,
143
196
  :remove_comments => false,
197
+ :remove_scripts => true,
144
198
  :css => [],
145
199
  :css_to_attributes => true,
146
200
  :with_html_string => false,
@@ -150,7 +204,12 @@ class Premailer
150
204
  :verbose => false,
151
205
  :debug => false,
152
206
  :io_exceptions => false,
153
- :adapter => Adapter.use}.merge(options)
207
+ :include_link_tags => true,
208
+ :include_style_tags => true,
209
+ :input_encoding => 'ASCII-8BIT',
210
+ :replace_html_entities => false,
211
+ :adapter => Adapter.use,
212
+ }.merge(options)
154
213
 
155
214
  @html_file = html
156
215
  @is_local_file = @options[:with_html_string] || Premailer.local_data?(html)
@@ -190,7 +249,8 @@ class Premailer
190
249
  load_css_from_html!
191
250
  end
192
251
 
193
- # Array containing a hash of CSS warnings.
252
+ # CSS warnings.
253
+ # @return [Array(Hash)] Array of warnings.
194
254
  def warnings
195
255
  return [] if @options[:warn_level] == Warnings::NONE
196
256
  @css_warnings = check_client_support if @css_warnings.empty?
@@ -200,6 +260,7 @@ class Premailer
200
260
  protected
201
261
  def load_css_from_local_file!(path)
202
262
  css_block = ''
263
+ path.gsub!(/\Afile:/, '')
203
264
  begin
204
265
  File.open(path, "r") do |file|
205
266
  while line = file.gets
@@ -215,6 +276,7 @@ protected
215
276
  @css_parser.add_block!(css_string, {:base_uri => @base_url, :base_dir => @base_dir, :only_media_types => [:screen, :handheld]})
216
277
  end
217
278
 
279
+ # @private
218
280
  def load_css_from_options! # :nodoc:
219
281
  load_css_from_string(@options[:css_string]) if @options[:css_string]
220
282
 
@@ -229,21 +291,26 @@ protected
229
291
 
230
292
  # Load CSS included in <tt>style</tt> and <tt>link</tt> tags from an HTML document.
231
293
  def load_css_from_html! # :nodoc:
232
- if tags = @doc.search("link[@rel='stylesheet'], style")
294
+ if (@options[:adapter] == :nokogiri)
295
+ tags = @doc.search("link[@rel='stylesheet']", "//style[not(contains(@data-premailer,'ignore'))]")
296
+ else
297
+ tags = @doc.search("link[@rel='stylesheet'], style:not([@data-premailer='ignore'])")
298
+ end
299
+ if tags
233
300
  tags.each do |tag|
234
- if tag.to_s.strip =~ /^\<link/i && tag.attributes['href'] && media_type_ok?(tag.attributes['media'])
301
+ if tag.to_s.strip =~ /^\<link/i && tag.attributes['href'] && media_type_ok?(tag.attributes['media']) && @options[:include_link_tags]
235
302
  # A user might want to <link /> to a local css file that is also mirrored on the site
236
303
  # but the local one is different (e.g. newer) than the live file, premailer will now choose the local file
237
-
304
+
238
305
  if tag.attributes['href'].to_s.include? @base_url.to_s and @html_file.kind_of?(String)
239
306
  link_uri = File.join(File.dirname(@html_file), tag.attributes['href'].to_s.sub!(@base_url.to_s, ''))
240
307
  end
241
-
308
+
242
309
  # if the file does not exist locally, try to grab the remote reference
243
310
  if link_uri.nil? or not File.exists?(link_uri)
244
311
  link_uri = Premailer.resolve_link(tag.attributes['href'].to_s, @html_file)
245
312
  end
246
-
313
+
247
314
  if Premailer.local_data?(link_uri)
248
315
  $stderr.puts "Loading css from local file: " + link_uri if @options[:verbose]
249
316
  load_css_from_local_file!(link_uri)
@@ -252,7 +319,7 @@ protected
252
319
  @css_parser.load_uri!(link_uri, {:only_media_types => [:screen, :handheld]})
253
320
  end
254
321
 
255
- elsif tag.to_s.strip =~ /^\<style/i
322
+ elsif tag.to_s.strip =~ /^\<style/i && @options[:include_style_tags]
256
323
  @css_parser.add_block!(tag.inner_html, :base_uri => @base_url, :base_dir => @base_dir, :only_media_types => [:screen, :handheld])
257
324
  end
258
325
  end
@@ -264,6 +331,8 @@ protected
264
331
 
265
332
  # here be deprecated methods
266
333
  public
334
+ # @private
335
+ # @deprecated
267
336
  def local_uri?(uri) # :nodoc:
268
337
  warn "[DEPRECATION] `local_uri?` is deprecated. Please use `Premailer.local_data?` instead."
269
338
  Premailer.local_data?(uri)
@@ -271,11 +340,11 @@ public
271
340
 
272
341
  # here be instance methods
273
342
 
274
- def media_type_ok?(media_types) # :nodoc:
343
+ # @private
344
+ def media_type_ok?(media_types)
345
+ media_types = media_types.to_s
275
346
  return true if media_types.nil? or media_types.empty?
276
347
  media_types.split(/[\s]+|,/).any? { |media_type| media_type.strip =~ /screen|handheld|all/i }
277
- rescue
278
- true
279
348
  end
280
349
 
281
350
  def append_query_string(doc, qs)
@@ -295,7 +364,7 @@ public
295
364
  doc.search('a').each do|el|
296
365
  href = el.attributes['href'].to_s.strip
297
366
  next if href.nil? or href.empty?
298
-
367
+
299
368
  next if href[0,1] =~ /[\#\{\[\<\%]/ # don't bother with anchors or special-looking links
300
369
 
301
370
  begin
@@ -327,7 +396,7 @@ public
327
396
  doc
328
397
  end
329
398
 
330
- # Check for an XHTML doctype
399
+ # Check for an XHTML doctype
331
400
  def is_xhtml?
332
401
  intro = @doc.to_html.strip.split("\n")[0..2].join(' ')
333
402
  is_xhtml = !!(intro =~ /w3c\/\/[\s]*dtd[\s]+xhtml/i)
@@ -335,7 +404,7 @@ public
335
404
  is_xhtml
336
405
  end
337
406
 
338
- # Convert relative links to absolute links.
407
+ # Convert relative links to absolute links.
339
408
  #
340
409
  # Processes <tt>href</tt> <tt>src</tt> and <tt>background</tt> attributes
341
410
  # as well as CSS <tt>url()</tt> declarations found in inline <tt>style</tt> attributes.
@@ -356,7 +425,7 @@ public
356
425
  tags.each do |tag|
357
426
  # skip links that look like they have merge tags
358
427
  # and mailto, ftp, etc...
359
- if tag.attributes[attribute].to_s =~ /^([\%\<\{\#\[]|data:|tel:|file:|sms:|callto:|facetime:|mailto:|ftp:|gopher:)/i
428
+ if tag.attributes[attribute].to_s =~ /^([\%\<\{\#\[]|data:|tel:|file:|sms:|callto:|facetime:|mailto:|ftp:|gopher:|cid:)/i
360
429
  next
361
430
  end
362
431
 
@@ -387,20 +456,22 @@ public
387
456
  end
388
457
 
389
458
 
459
+ # @private
390
460
  def self.escape_string(str) # :nodoc:
391
461
  str.gsub(/"/ , "'")
392
462
  end
393
463
 
464
+ # @private
394
465
  def self.resolve_link(path, base_path) # :nodoc:
395
466
  path.strip!
396
467
  resolved = nil
397
- if path =~ /(http[s]?|ftp):\/\//i
468
+ if path =~ /\A(?:(https?|ftp|file):)\/\//i
398
469
  resolved = path
399
470
  Premailer.canonicalize(resolved)
400
471
  elsif base_path.kind_of?(URI)
401
472
  resolved = base_path.merge(path)
402
473
  Premailer.canonicalize(resolved)
403
- elsif base_path.kind_of?(String) and base_path =~ /^(http[s]?|ftp):\/\//i
474
+ elsif base_path.kind_of?(String) and base_path =~ /\A(?:(?:https?|ftp|file):)\/\//i
404
475
  resolved = URI.parse(base_path)
405
476
  resolved = resolved.merge(path)
406
477
  Premailer.canonicalize(resolved)
@@ -413,8 +484,9 @@ public
413
484
  #
414
485
  # IO objects return true, as do strings that look like URLs.
415
486
  def self.local_data?(data)
416
- return true if data.is_a?(IO) || data.is_a?(StringIO)
417
- return false if data =~ /^(http|https|ftp)\:\/\//i
487
+ return true if data.is_a?(IO) || data.is_a?(StringIO)
488
+ return true if data =~ /\Afile:\/\//i
489
+ return false if data =~ /\A(?:(https?|ftp):)\/\//i
418
490
  true
419
491
  end
420
492