nachof-premailer 1.5.5

Sign up to get free protection for your applications and to get access to all the features.
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'premailer'
@@ -0,0 +1,8 @@
1
+ # Premailer by Alex Dunae (dunae.ca, e-mail 'code' at the same domain), 2008-09
2
+ require 'yaml'
3
+ require 'open-uri'
4
+ require 'hpricot'
5
+ require 'css_parser'
6
+
7
+ require File.dirname(__FILE__) + "/premailer/html_to_plain_text"
8
+ require File.dirname(__FILE__) + "/premailer/premailer"
@@ -0,0 +1,81 @@
1
+ require 'text/reform'
2
+ require 'htmlentities'
3
+
4
+ # Support functions for Premailer
5
+ module HtmlToPlainText
6
+
7
+ # Returns the text in UTF-8 format with all HTML tags removed
8
+ #
9
+ # TODO:
10
+ # - add support for DL, OL
11
+ def convert_to_text(html, line_length = 65, from_charset = 'UTF-8')
12
+ r = Text::Reform.new(:trim => true,
13
+ :squeeze => false,
14
+ :break => Text::Reform.break_wrap)
15
+
16
+ txt = html
17
+
18
+ # decode HTML entities
19
+ he = HTMLEntities.new
20
+ txt = he.decode(txt)
21
+
22
+ # handle headings (H1-H6)
23
+ txt.gsub!(/(<\/h[1-6]>)/i, "\n\\1") # move closing tags to new lines
24
+ txt.gsub!(/[\s]*<h([1-6]+)[^>]*>[\s]*(.*)[\s]*<\/h[1-6]+>/i) do |s|
25
+ hlevel = $1.to_i
26
+
27
+ htext = $2
28
+ htext.gsub!(/<br[\s]*\/?>/i, "\n") # handle <br>s
29
+ htext.gsub!(/<\/?[^>]*>/i, '') # strip tags
30
+
31
+ # determine maximum line length
32
+ hlength = 0
33
+ htext.each { |l| llength = l.strip.length; hlength = llength if llength > hlength }
34
+ hlength = line_length if hlength > line_length
35
+
36
+ case hlevel
37
+ when 1 # H1, asterisks above and below
38
+ htext = ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength)
39
+ when 2 # H1, dashes above and below
40
+ htext = ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength)
41
+ else # H3-H6, dashes below
42
+ htext = htext + "\n" + ('-' * hlength)
43
+ end
44
+
45
+ "\n\n" + htext + "\n\n"
46
+ end
47
+
48
+ # links
49
+ txt.gsub!(/<a.*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/i) do |s|
50
+ $2.strip + ' ( ' + $1.strip + ' )'
51
+ end
52
+
53
+ # lists -- TODO: should handle ordered lists
54
+ txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ')
55
+ # list not followed by a newline
56
+ txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n")
57
+
58
+ # paragraphs and line breaks
59
+ txt.gsub!(/<\/p>/i, "\n\n")
60
+ txt.gsub!(/<br[\/ ]*>/i, "\n")
61
+
62
+ # strip remaining tags
63
+ txt.gsub!(/<\/?[^>]*>/, '')
64
+
65
+ # wrap text
66
+ txt = r.format(('[' * line_length), txt)
67
+
68
+ # remove linefeeds (\r\n and \r -> \n)
69
+ txt.gsub!(/\r\n?/, "\n")
70
+
71
+ # strip extra spaces
72
+ txt.gsub!(/\302\240+/, " ") # non-breaking spaces -> spaces
73
+ txt.gsub!(/\n[ \t]+/, "\n") # space at start of lines
74
+ txt.gsub!(/[ \t]+\n/, "\n") # space at end of lines
75
+
76
+ # no more than two consecutive newlines
77
+ txt.gsub!(/[\n]{3,}/, "\n\n")
78
+
79
+ txt.strip
80
+ end
81
+ end
@@ -0,0 +1,476 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # Premailer by Alex Dunae (dunae.ca, e-mail 'code' at the same domain), 2008-09
4
+ #
5
+ # Premailer processes HTML and CSS to improve e-mail deliverability.
6
+ #
7
+ # Premailer's main function is to render all CSS as inline <tt>style</tt>
8
+ # attributes. It also converts relative links to absolute links and checks
9
+ # the 'safety' of CSS properties against a CSS support chart.
10
+ #
11
+ # = Example
12
+ # premailer = Premailer.new('http://example.com/myfile.html', :warn_level => Premailer::Warnings::SAFE)
13
+ #
14
+ # # Write the HTML output
15
+ # fout = File.open("output.html", "w")
16
+ # fout.puts premailer.to_inline_css
17
+ # fout.close
18
+ #
19
+ # # Write the plain-text output
20
+ # fout = File.open("ouput.txt", "w")
21
+ # fout.puts premailer.to_plain_text
22
+ # fout.close
23
+ #
24
+ # # List any CSS warnings
25
+ # puts premailer.warnings.length.to_s + ' warnings found'
26
+ # premailer.warnings.each do |w|
27
+ # puts "#{w[:message]} (#{w[:level]}) may not render properly in #{w[:clients]}"
28
+ # end
29
+ #
30
+ # premailer = Premailer.new(html_file, :warn_level => Premailer::Warnings::SAFE)
31
+ # puts premailer.to_inline_css
32
+ class Premailer
33
+ include HtmlToPlainText
34
+ include CssParser
35
+
36
+ VERSION = '1.5.5'
37
+
38
+ CLIENT_SUPPORT_FILE = File.dirname(__FILE__) + '/../../misc/client_support.yaml'
39
+
40
+ RE_UNMERGABLE_SELECTORS = /(\:(visited|active|hover|focus|after|before|selection|target|first\-(line|letter))|^\@)/i
41
+
42
+ # list of CSS attributes that can be rendered as HTML attributes
43
+ #
44
+ # TODO: too much repetition
45
+ # TODO: background=""
46
+ RELATED_ATTRIBUTES = {
47
+ 'h1' => {'text-align' => 'align'},
48
+ 'h2' => {'text-align' => 'align'},
49
+ 'h3' => {'text-align' => 'align'},
50
+ 'h4' => {'text-align' => 'align'},
51
+ 'h5' => {'text-align' => 'align'},
52
+ 'h6' => {'text-align' => 'align'},
53
+ 'p' => {'text-align' => 'align'},
54
+ 'div' => {'text-align' => 'align'},
55
+ 'blockquote' => {'text-align' => 'align'},
56
+ 'body' => {'background-color' => 'bgcolor'},
57
+ 'table' => {'background-color' => 'bgcolor'},
58
+ 'tr' => {'text-align' => 'align', 'background-color' => 'bgcolor'},
59
+ 'th' => {'text-align' => 'align', 'background-color' => 'bgcolor', 'vertical-align' => 'valign'},
60
+ 'td' => {'text-align' => 'align', 'background-color' => 'bgcolor', 'vertical-align' => 'valign'},
61
+ 'img' => {'float' => 'align'}
62
+ }
63
+
64
+ # URI of the HTML file used
65
+ attr_reader :html_file
66
+
67
+ # processed HTML document (Hpricot)
68
+ attr_reader :processed_doc
69
+
70
+ # source HTML document (Hpricot)
71
+ attr_reader :doc
72
+
73
+ module Warnings
74
+ NONE = 0
75
+ SAFE = 1
76
+ POOR = 2
77
+ RISKY = 3
78
+ end
79
+ include Warnings
80
+
81
+ WARN_LABEL = %w(NONE SAFE POOR RISKY)
82
+
83
+ # Create a new Premailer object.
84
+ #
85
+ # +path+ is the path to the HTML file to process. Can be either the URL of a
86
+ # remote file or a local path.
87
+ #
88
+ # ==== Options
89
+ # [+line_length+] Line length used by to_plain_text. Boolean, default is 65.
90
+ # [+warn_level+] What level of CSS compatibility warnings to show (see Warnings).
91
+ # [+link_query_string+] A string to append to every <a href=""> link. Do not include the initial +?+.
92
+ # [+base_url+] Used to calculate absolute URLs for local files.
93
+ # [+css+] Manually specify a CSS stylesheet.
94
+ # [+css_to_attributes+] Copy related CSS attributes into HTML attributes (e.g. +background-color+ to +bgcolor+)
95
+ def initialize(path, options = {})
96
+
97
+ if options[:html_code]
98
+ @is_local_file = true
99
+ @html_file = "string"
100
+ @html_code = options[:html_code]
101
+ else
102
+ @html_file = path
103
+ @is_local_file = local_uri?(path)
104
+
105
+ @html_code = File.open(@html_file) { |f| f.read }
106
+ end
107
+
108
+ load_html(@html_code, options)
109
+ end
110
+
111
+ def self.html(code, options = {})
112
+ new(nil, options.merge(:html_code => code))
113
+ end
114
+
115
+ def load_html(html, options = {})
116
+ @options = {:warn_level => Warnings::SAFE,
117
+ :line_length => 65,
118
+ :link_query_string => nil,
119
+ :base_url => nil,
120
+ :remove_classes => false,
121
+ :css => [],
122
+ :css_to_attributes => true}.merge(options)
123
+
124
+ @css_files = @options[:css]
125
+
126
+ @css_warnings = []
127
+
128
+ @css_parser = CssParser::Parser.new({:absolute_paths => true,
129
+ :import => true,
130
+ :io_exceptions => false
131
+ })
132
+
133
+ @doc, @html_charset = Hpricot(html)
134
+ @processed_doc = @doc
135
+
136
+ if @is_local_file and @options[:base_url]
137
+ @processed_doc = convert_inline_links(@processed_doc, @options[:base_url])
138
+ elsif not @is_local_file
139
+ @processed_doc = convert_inline_links(@processed_doc, @html_file)
140
+ end
141
+ load_css_from_options!
142
+ load_css_from_html!
143
+ end
144
+
145
+ def local_uri?(uri)
146
+ if uri =~ /^(http|https|ftp)\:\/\//i
147
+ return false
148
+ else
149
+ return true
150
+ end
151
+ end
152
+
153
+ # Array containing a hash of CSS warnings.
154
+ def warnings
155
+ return [] if @options[:warn_level] == Warnings::NONE
156
+ @css_warnings = check_client_support if @css_warnings.empty?
157
+ @css_warnings
158
+ end
159
+
160
+ # Returns the original HTML as a string.
161
+ def to_s
162
+ @doc.to_html
163
+ end
164
+
165
+ # Converts the HTML document to a format suitable for plain-text e-mail.
166
+ #
167
+ # Returns a string.
168
+ def to_plain_text
169
+ html_src = ''
170
+ begin
171
+ html_src = @doc.search("body").innerHTML
172
+ rescue
173
+ html_src = @doc.to_html
174
+ end
175
+ convert_to_text(html_src, @options[:line_length], @html_charset)
176
+ end
177
+
178
+ # Merge CSS into the HTML document.
179
+ #
180
+ # Returns a string.
181
+ def to_inline_css
182
+ doc = @processed_doc
183
+ unmergable_rules = CssParser::Parser.new
184
+
185
+ # Give all styles already in style attributes a specificity of 1000
186
+ # per http://www.w3.org/TR/CSS21/cascade.html#specificity
187
+ doc.search("*[@style]").each do |el|
188
+ el['style'] = '[SPEC=1000[' + el.attributes['style'] + ']]'
189
+ end
190
+
191
+ # Iterate through the rules and merge them into the HTML
192
+ @css_parser.each_selector(:all) do |selector, declaration, specificity|
193
+ # Save un-mergable rules separately
194
+ selector.gsub!(/:link([\s]|$)+/i, '')
195
+
196
+ # Convert element names to lower case
197
+ selector.gsub!(/([\s]|^)([\w]+)/) {|m| $1.to_s + $2.to_s.downcase }
198
+
199
+ if selector =~ RE_UNMERGABLE_SELECTORS
200
+ unmergable_rules.add_rule_set!(RuleSet.new(selector, declaration))
201
+ else
202
+
203
+ doc.search(selector) do |el|
204
+ if el.elem?
205
+ # Add a style attribute or append to the existing one
206
+ block = "[SPEC=#{specificity}[#{declaration}]]"
207
+ el['style'] = (el.attributes['style'] ||= '') + ' ' + block
208
+ end
209
+ end
210
+ end
211
+ end
212
+
213
+ # Read STYLE attributes and perform folding
214
+ doc.search("*[@style]").each do |el|
215
+ style = el.attributes['style'].to_s
216
+
217
+ declarations = []
218
+
219
+ style.scan(/\[SPEC\=([\d]+)\[(.[^\]\]]*)\]\]/).each do |declaration|
220
+ rs = RuleSet.new(nil, declaration[1].to_s, declaration[0].to_i)
221
+ declarations << rs
222
+ end
223
+
224
+ # Perform style folding
225
+ merged = CssParser.merge(declarations)
226
+ merged.expand_shorthand!
227
+
228
+ #if @options[:prefer_cellpadding] and (el.name == 'td' or el.name == 'th') and el['cellpadding'].nil?
229
+ # if cellpadding = equivalent_cellpadding(merged)
230
+ # el['cellpadding'] = cellpadding
231
+ # merged['padding-left'] = nil
232
+ # merged['padding-right'] = nil
233
+ # merged['padding-top'] = nil
234
+ # merged['padding-bottom'] = nil
235
+ # end
236
+ #end
237
+
238
+ # Duplicate CSS attributes as HTML attributes
239
+ if RELATED_ATTRIBUTES.has_key?(el.name)
240
+ RELATED_ATTRIBUTES[el.name].each do |css_att, html_att|
241
+ el[html_att] = merged[css_att].gsub(/;$/, '').strip if el[html_att].nil? and not merged[css_att].empty?
242
+ end
243
+ end
244
+
245
+ merged.create_dimensions_shorthand!
246
+
247
+ # write the inline STYLE attribute
248
+ el['style'] = Premailer.escape_string(merged.declarations_to_s)
249
+ end
250
+
251
+ doc = write_unmergable_css_rules(doc, unmergable_rules)
252
+
253
+ doc.search('*').remove_class if @options[:remove_classes]
254
+
255
+ @processed_doc = doc
256
+
257
+ doc.to_html
258
+ end
259
+
260
+
261
+ protected
262
+ def load_css_from_local_file!(path)
263
+ css_block = ''
264
+ begin
265
+ File.open(path, "r") do |file|
266
+ while line = file.gets
267
+ css_block << line
268
+ end
269
+ end
270
+ @css_parser.add_block!(css_block, {:base_uri => @html_file})
271
+ rescue; end
272
+ end
273
+
274
+ def load_css_from_options! # :nodoc:
275
+ @css_files.each do |css_file|
276
+ if local_uri?(css_file)
277
+ load_css_from_local_file!(css_file)
278
+ else
279
+ @css_parser.load_uri!(css_file)
280
+ end
281
+ end
282
+ end
283
+
284
+ # Load CSS included in <tt>style</tt> and <tt>link</tt> tags from an HTML document.
285
+ def load_css_from_html! # :nodoc:
286
+ if tags = @doc.search("link[@rel='stylesheet'], style")
287
+ tags.each do |tag|
288
+
289
+ if tag.to_s.strip =~ /^\<link/i and tag.attributes['href'] and media_type_ok?(tag.attributes['media'])
290
+
291
+ link_uri = Premailer.resolve_link(tag.attributes['href'].to_s, @html_file)
292
+ if @is_local_file
293
+ load_css_from_local_file!(link_uri)
294
+ else
295
+ @css_parser.load_uri!(link_uri)
296
+ end
297
+
298
+ elsif tag.to_s.strip =~ /^\<style/i
299
+ @css_parser.add_block!(tag.innerHTML, :base_uri => URI.parse(@html_file))
300
+ end
301
+ end
302
+ tags.remove
303
+ end
304
+ end
305
+
306
+ def media_type_ok?(media_types) # :nodoc:
307
+ return true if media_types.nil? or media_types.empty?
308
+ return media_types.split(/[\s]+|,/).any? { |media_type| media_type.strip =~ /screen|handheld|all/i }
309
+ rescue
310
+ return true
311
+ end
312
+
313
+ # Create a <tt>style</tt> element with un-mergable rules (e.g. <tt>:hover</tt>)
314
+ # and write it into the <tt>body</tt>.
315
+ #
316
+ # <tt>doc</tt> is an Hpricot document and <tt>unmergable_css_rules</tt> is a Css::RuleSet.
317
+ #
318
+ # Returns an Hpricot document.
319
+ def write_unmergable_css_rules(doc, unmergable_rules) # :nodoc:
320
+ styles = ''
321
+ unmergable_rules.each_selector(:all, :force_important => true) do |selector, declarations, specificity|
322
+ styles += "#{selector} { #{declarations} }\n"
323
+ end
324
+
325
+ unless styles.empty?
326
+ style_tag = "\n<style type=\"text/css\">\n#{styles}</style>\n"
327
+ doc.search("head").append(style_tag)
328
+ end
329
+ doc
330
+ end
331
+
332
+ # Convert relative links to absolute links.
333
+ #
334
+ # Processes <tt>href</tt> <tt>src</tt> and <tt>background</tt> attributes
335
+ # as well as CSS <tt>url()</tt> declarations found in inline <tt>style</tt> attributes.
336
+ #
337
+ # <tt>doc</tt> is an Hpricot document and <tt>base_uri</tt> is either a string or a URI.
338
+ #
339
+ # Returns an Hpricot document.
340
+ def convert_inline_links(doc, base_uri) # :nodoc:
341
+ base_uri = URI.parse(base_uri) unless base_uri.kind_of?(URI)
342
+
343
+ append_qs = @options[:link_query_string] ||= ''
344
+
345
+ ['href', 'src', 'background'].each do |attribute|
346
+ tags = doc.search("*[@#{attribute}]")
347
+
348
+ next if tags.empty?
349
+
350
+ tags.each do |tag|
351
+ # skip links that look like they have merge tags
352
+ # and mailto, ftp, etc...
353
+ if tag.attributes[attribute] =~ /^(\{|\[|<|\#|mailto:|ftp:|gopher:)/i
354
+ next
355
+ end
356
+
357
+ if tag.attributes[attribute] =~ /^http/i
358
+ begin
359
+ merged = URI.parse(tag.attributes[attribute])
360
+ rescue; next; end
361
+ else
362
+ begin
363
+ merged = Premailer.resolve_link(tag.attributes[attribute].to_s, base_uri)
364
+ rescue
365
+ begin
366
+ merged = Premailer.resolve_link(URI.escape(tag.attributes[attribute].to_s), base_uri)
367
+ rescue; end
368
+ end
369
+ end
370
+
371
+ # make sure 'merged' is a URI
372
+ merged = URI.parse(merged.to_s) unless merged.kind_of?(URI)
373
+
374
+ # only append a querystring to <a> tags
375
+ if tag.name =~ /^a$/i and not append_qs.empty?
376
+ if merged.query
377
+ merged.query = merged.query + '&' + append_qs
378
+ else
379
+ merged.query = append_qs
380
+ end
381
+ end
382
+ tag[attribute] = merged.to_s
383
+
384
+ end # end of each tag
385
+ end # end of each attrs
386
+
387
+ doc.search("*[@style]").each do |el|
388
+ el['style'] = CssParser.convert_uris(el.attributes['style'].to_s, base_uri)
389
+ end
390
+ doc
391
+ end
392
+
393
+ def self.escape_string(str) # :nodoc:
394
+ str.gsub(/"/, "'")
395
+ end
396
+
397
+ def self.resolve_link(path, base_path) # :nodoc:
398
+ path.strip!
399
+ resolved = nil
400
+ if base_path.kind_of?(URI)
401
+ resolved = base_path.merge(path)
402
+ return Premailer.canonicalize(resolved)
403
+ elsif base_path.kind_of?(String) and base_path =~ /^(http[s]?|ftp):\/\//i
404
+ resolved = URI.parse(base_path)
405
+ resolved = resolved.merge(path)
406
+ return Premailer.canonicalize(resolved)
407
+ else
408
+
409
+ return File.expand_path(path, File.dirname(base_path))
410
+ end
411
+ end
412
+
413
+ # from http://www.ruby-forum.com/topic/140101
414
+ def self.canonicalize(uri) # :nodoc:
415
+ u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
416
+ u.normalize!
417
+ newpath = u.path
418
+ while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
419
+ $1 == '..' ? match : ''
420
+ } do end
421
+ newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
422
+ u.path = newpath
423
+ u.to_s
424
+ end
425
+
426
+ # Check <tt>CLIENT_SUPPORT_FILE</tt> for any CSS warnings
427
+ def check_client_support # :nodoc:
428
+ @client_support = @client_support ||= YAML::load(File.open(CLIENT_SUPPORT_FILE))
429
+
430
+ warnings = []
431
+ properties = []
432
+
433
+ # Get a list off CSS properties
434
+ @processed_doc.search("*[@style]").each do |el|
435
+ style_url = el.attributes['style'].gsub(/([\w\-]+)[\s]*\:/i) do |s|
436
+ properties.push($1)
437
+ end
438
+ end
439
+
440
+ properties.uniq!
441
+
442
+ property_support = @client_support['css_properties']
443
+ properties.each do |prop|
444
+ if property_support.include?(prop) and
445
+ property_support[prop].include?('support') and
446
+ property_support[prop]['support'] >= @options[:warn_level]
447
+ warnings.push({:message => "#{prop} CSS property",
448
+ :level => WARN_LABEL[property_support[prop]['support']],
449
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
450
+ end
451
+ end
452
+
453
+ @client_support['attributes'].each do |attribute, data|
454
+ next unless data['support'] >= @options[:warn_level]
455
+ if @doc.search("*[@#{attribute}]").length > 0
456
+ warnings.push({:message => "#{attribute} HTML attribute",
457
+ :level => WARN_LABEL[property_support[prop]['support']],
458
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
459
+ end
460
+ end
461
+
462
+ @client_support['elements'].each do |element, data|
463
+ next unless data['support'] >= @options[:warn_level]
464
+ if @doc.search("element").length > 0
465
+ warnings.push({:message => "#{element} HTML element",
466
+ :level => WARN_LABEL[property_support[prop]['support']],
467
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
468
+ end
469
+ end
470
+
471
+ return warnings
472
+ end
473
+ end
474
+
475
+
476
+