sundawg_premailer 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'premailer'
@@ -0,0 +1,8 @@
1
+ # Premailer by Alex Dunae (dunae.ca, e-mail 'code' at the same domain), 2008-09
2
+ require 'yaml'
3
+ require 'open-uri'
4
+ require 'hpricot'
5
+ require 'css_parser'
6
+
7
+ require File.dirname(__FILE__) + "/premailer/html_to_plain_text"
8
+ require File.dirname(__FILE__) + "/premailer/premailer"
@@ -0,0 +1,74 @@
1
+ require 'text/reform'
2
+ require 'htmlentities'
3
+
4
+ # Support functions for Premailer
5
+ module HtmlToPlainText
6
+
7
+ # Returns the text in UTF-8 format with all HTML tags removed
8
+ #
9
+ # TODO:
10
+ # - add support for DL, OL
11
+ def convert_to_text(html, line_length = 65, from_charset = 'UTF-8')
12
+ r = Text::Reform.new(:trim => true,
13
+ :squeeze => false,
14
+ :break => Text::Reform.break_wrap)
15
+
16
+ txt = html
17
+
18
+ # decode HTML entities
19
+ he = HTMLEntities.new
20
+ txt = he.decode(txt)
21
+
22
+ # handle headings (H1-H6)
23
+ txt.gsub!(/[ \t]*<h([0-9]+)[^>]*>(.*)<\/h[0-9]+>/i) do |s|
24
+ hlevel = $1.to_i
25
+ # cleanup text inside of headings
26
+ htext = $2.gsub(/<\/?[^>]*>/i, '').strip
27
+ hlength = (htext.length > line_length ?
28
+ line_length :
29
+ htext.length)
30
+
31
+ case hlevel
32
+ when 1 # H1, asterisks above and below
33
+ ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength) + "\n"
34
+ when 2 # H1, dashes above and below
35
+ ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength) + "\n"
36
+ else # H3-H6, dashes below
37
+ htext + "\n" + ('-' * htext.length) + "\n"
38
+ end
39
+ end
40
+
41
+ # links
42
+ txt.gsub!(/<a.*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/i) do |s|
43
+ $2.strip + ' ( ' + $1.strip + ' )'
44
+ end
45
+
46
+ # lists -- TODO: should handle ordered lists
47
+ txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ')
48
+ # list not followed by a newline
49
+ txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n")
50
+
51
+ # paragraphs and line breaks
52
+ txt.gsub!(/<\/p>/i, "\n\n")
53
+ txt.gsub!(/<br[\/ ]*>/i, "\n")
54
+
55
+ # strip remaining tags
56
+ txt.gsub!(/<\/?[^>]*>/, '')
57
+
58
+ # wrap text
59
+ txt = r.format(('[' * line_length), txt)
60
+
61
+ # remove linefeeds (\r\n and \r -> \n)
62
+ txt.gsub!(/\r\n?/, "\n")
63
+
64
+ # strip extra spaces
65
+ txt.gsub!(/\302\240+/, " ") # non-breaking spaces -> spaces
66
+ txt.gsub!(/\n[ \t]+/, "\n") # space at start of lines
67
+ txt.gsub!(/[ \t]+\n/, "\n") # space at end of lines
68
+
69
+ # no more than two consecutive newlines
70
+ txt.gsub!(/[\n]{3,}/, "\n\n")
71
+
72
+ txt.strip
73
+ end
74
+ end
@@ -0,0 +1,481 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # Premailer by Alex Dunae (dunae.ca, e-mail 'code' at the same domain), 2008-09
4
+ #
5
+ # Premailer processes HTML and CSS to improve e-mail deliverability.
6
+ #
7
+ # Premailer's main function is to render all CSS as inline <tt>style</tt>
8
+ # attributes. It also converts relative links to absolute links and checks
9
+ # the 'safety' of CSS properties against a CSS support chart.
10
+ #
11
+ # = Example
12
+ # premailer = Premailer.new('http://example.com/myfile.html', :warn_level => Premailer::Warnings::SAFE)
13
+ #
14
+ # # Write the HTML output
15
+ # fout = File.open("output.html", "w")
16
+ # fout.puts premailer.to_inline_css
17
+ # fout.close
18
+ #
19
+ # # Write the plain-text output
20
+ # fout = File.open("ouput.txt", "w")
21
+ # fout.puts premailer.to_plain_text
22
+ # fout.close
23
+ #
24
+ # # List any CSS warnings
25
+ # puts premailer.warnings.length.to_s + ' warnings found'
26
+ # premailer.warnings.each do |w|
27
+ # puts "#{w[:message]} (#{w[:level]}) may not render properly in #{w[:clients]}"
28
+ # end
29
+ #
30
+ # premailer = Premailer.new(html_file, :warn_level => Premailer::Warnings::SAFE)
31
+ # puts premailer.to_inline_css
32
+ class Premailer
33
+ include HtmlToPlainText
34
+ include CssParser
35
+
36
+ VERSION = '1.5.5'
37
+
38
+ CLIENT_SUPPORT_FILE = File.dirname(__FILE__) + '/../../misc/client_support.yaml'
39
+
40
+ RE_UNMERGABLE_SELECTORS = /(\:(visited|active|hover|focus|after|before|selection|target|first\-(line|letter))|^\@)/i
41
+
42
+ # list of CSS attributes that can be rendered as HTML attributes
43
+ #
44
+ # TODO: too much repetition
45
+ # TODO: background=""
46
+ RELATED_ATTRIBUTES = {
47
+ 'h1' => {'text-align' => 'align'},
48
+ 'h2' => {'text-align' => 'align'},
49
+ 'h3' => {'text-align' => 'align'},
50
+ 'h4' => {'text-align' => 'align'},
51
+ 'h5' => {'text-align' => 'align'},
52
+ 'h6' => {'text-align' => 'align'},
53
+ 'p' => {'text-align' => 'align'},
54
+ 'div' => {'text-align' => 'align'},
55
+ 'blockquote' => {'text-align' => 'align'},
56
+ 'body' => {'background-color' => 'bgcolor'},
57
+ 'table' => {'background-color' => 'bgcolor'},
58
+ 'tr' => {'text-align' => 'align', 'background-color' => 'bgcolor'},
59
+ 'th' => {'text-align' => 'align', 'background-color' => 'bgcolor', 'vertical-align' => 'valign'},
60
+ 'td' => {'text-align' => 'align', 'background-color' => 'bgcolor', 'vertical-align' => 'valign'},
61
+ 'img' => {'float' => 'align'}
62
+ }
63
+
64
+ # URI of the HTML file used
65
+ attr_reader :html_file
66
+
67
+ # processed HTML document (Hpricot)
68
+ attr_reader :processed_doc
69
+
70
+ # source HTML document (Hpricot)
71
+ attr_reader :doc
72
+
73
+ module Warnings
74
+ NONE = 0
75
+ SAFE = 1
76
+ POOR = 2
77
+ RISKY = 3
78
+ end
79
+ include Warnings
80
+
81
+ WARN_LABEL = %w(NONE SAFE POOR RISKY)
82
+
83
+ # Create a new Premailer object.
84
+ #
85
+ # +path+ is the path to the HTML file to process. Can be either the URL of a
86
+ # remote file or a local path.
87
+ #
88
+ # ==== Options
89
+ # [+line_length+] Line length used by to_plain_text. Boolean, default is 65.
90
+ # [+warn_level+] What level of CSS compatibility warnings to show (see Warnings).
91
+ # [+link_query_string+] A string to append to every <a href=""> link. Do not include the initial +?+.
92
+ # [+base_url+] Used to calculate absolute URLs for local files.
93
+ # [+css+] Manually specify a CSS stylesheet.
94
+ # [+css_to_attributes+] Copy related CSS attributes into HTML attributes (e.g. +background-color+ to +bgcolor+)
95
+ # [+in_memory+] Informs premailer to parse an HTML String
96
+ def initialize(source, options = {})
97
+ @options = {:warn_level => Warnings::SAFE,
98
+ :line_length => 65,
99
+ :link_query_string => nil,
100
+ :base_url => nil,
101
+ :remove_classes => false,
102
+ :in_memory => false,
103
+ :css => [],
104
+ :css_to_attributes => true}.merge(options)
105
+ @html_file = source
106
+
107
+ @is_local_file = local_uri?(source)
108
+
109
+ @is_in_memory = options[:in_memory]
110
+ @is_local_file = true if @is_in_memory
111
+
112
+ @css_files = @options[:css]
113
+
114
+ @css_warnings = []
115
+
116
+ @css_parser = CssParser::Parser.new({:absolute_paths => true,
117
+ :import => true,
118
+ :io_exceptions => false
119
+ })
120
+
121
+ @doc, @html_charset = load_html(@html_file)
122
+ @processed_doc = @doc
123
+
124
+ if @is_local_file and @options[:base_url]
125
+ @processed_doc = convert_inline_links(@processed_doc, @options[:base_url])
126
+ elsif not @is_local_file
127
+ @processed_doc = convert_inline_links(@processed_doc, @html_file)
128
+ end
129
+ load_css_from_options!
130
+ load_css_from_html!
131
+ end
132
+
133
+ def local_uri?(uri)
134
+ if uri =~ /^(http|https|ftp)\:\/\//i
135
+ return false
136
+ else
137
+ return true
138
+ end
139
+ end
140
+
141
+ # Array containing a hash of CSS warnings.
142
+ def warnings
143
+ return [] if @options[:warn_level] == Warnings::NONE
144
+ @css_warnings = check_client_support if @css_warnings.empty?
145
+ @css_warnings
146
+ end
147
+
148
+ # Returns the original HTML as a string.
149
+ def to_s
150
+ @doc.to_html
151
+ end
152
+
153
+ # Converts the HTML document to a format suitable for plain-text e-mail.
154
+ #
155
+ # Returns a string.
156
+ def to_plain_text
157
+ html_src = ''
158
+ begin
159
+ html_src = @doc.search("body").innerHTML
160
+ rescue
161
+ html_src = @doc.to_html
162
+ end
163
+ convert_to_text(html_src, @options[:line_length], @html_charset)
164
+ end
165
+
166
+ # Merge CSS into the HTML document.
167
+ #
168
+ # Returns a string.
169
+ def to_inline_css
170
+ doc = @processed_doc
171
+ unmergable_rules = CssParser::Parser.new
172
+
173
+ # Give all styles already in style attributes a specificity of 1000
174
+ # per http://www.w3.org/TR/CSS21/cascade.html#specificity
175
+ doc.search("*[@style]").each do |el|
176
+ el['style'] = '[SPEC=1000[' + el.attributes['style'] + ']]'
177
+ end
178
+
179
+ # Iterate through the rules and merge them into the HTML
180
+ @css_parser.each_selector(:all) do |selector, declaration, specificity|
181
+ # Save un-mergable rules separately
182
+ selector.gsub!(/:link([\s]|$)+/i, '')
183
+
184
+ # Convert element names to lower case
185
+ selector.gsub!(/([\s]|^)([\w]+)/) {|m| $1.to_s + $2.to_s.downcase }
186
+
187
+ if selector =~ RE_UNMERGABLE_SELECTORS
188
+ unmergable_rules.add_rule_set!(RuleSet.new(selector, declaration))
189
+ else
190
+
191
+ doc.search(selector) do |el|
192
+ if el.elem?
193
+ # Add a style attribute or append to the existing one
194
+ block = "[SPEC=#{specificity}[#{declaration}]]"
195
+ el['style'] = (el.attributes['style'] ||= '') + ' ' + block
196
+ end
197
+ end
198
+ end
199
+ end
200
+
201
+ # Read STYLE attributes and perform folding
202
+ doc.search("*[@style]").each do |el|
203
+ style = el.attributes['style'].to_s
204
+
205
+ declarations = []
206
+
207
+ style.scan(/\[SPEC\=([\d]+)\[(.[^\]\]]*)\]\]/).each do |declaration|
208
+ rs = RuleSet.new(nil, declaration[1].to_s, declaration[0].to_i)
209
+ declarations << rs
210
+ end
211
+
212
+ # Perform style folding
213
+ merged = CssParser.merge(declarations)
214
+ merged.expand_shorthand!
215
+
216
+ #if @options[:prefer_cellpadding] and (el.name == 'td' or el.name == 'th') and el['cellpadding'].nil?
217
+ # if cellpadding = equivalent_cellpadding(merged)
218
+ # el['cellpadding'] = cellpadding
219
+ # merged['padding-left'] = nil
220
+ # merged['padding-right'] = nil
221
+ # merged['padding-top'] = nil
222
+ # merged['padding-bottom'] = nil
223
+ # end
224
+ #end
225
+
226
+ # Duplicate CSS attributes as HTML attributes
227
+ if RELATED_ATTRIBUTES.has_key?(el.name)
228
+ RELATED_ATTRIBUTES[el.name].each do |css_att, html_att|
229
+ el[html_att] = merged[css_att].gsub(/;$/, '').strip if el[html_att].nil? and not merged[css_att].empty?
230
+ end
231
+ end
232
+
233
+ merged.create_dimensions_shorthand!
234
+
235
+ # write the inline STYLE attribute
236
+ el['style'] = Premailer.escape_string(merged.declarations_to_s)
237
+ end
238
+
239
+ doc = write_unmergable_css_rules(doc, unmergable_rules)
240
+
241
+ doc.search('*').remove_class if @options[:remove_classes]
242
+
243
+ @processed_doc = doc
244
+
245
+ doc.to_html
246
+ end
247
+
248
+
249
+ protected
250
+ # Load the HTML file and convert it into an Hpricot document.
251
+ #
252
+ # Returns an Hpricot document and a string with the HTML file's character set.
253
+ def load_html(path) # :nodoc:
254
+ if @is_in_memory
255
+ return Hpricot(path)
256
+ elsif @is_local_file
257
+ return Hpricot(File.open(path, "r") {|f| f.read })
258
+ else
259
+ return Hpricot(open(path))
260
+ end
261
+ end
262
+
263
+ def load_css_from_local_file!(path)
264
+ css_block = ''
265
+ begin
266
+ File.open(path, "r") do |file|
267
+ while line = file.gets
268
+ css_block << line
269
+ end
270
+ end
271
+ @css_parser.add_block!(css_block, {:base_uri => @html_file})
272
+ rescue; end
273
+ end
274
+
275
+ def load_css_from_options! # :nodoc:
276
+ @css_files.each do |css_file|
277
+ if local_uri?(css_file)
278
+ load_css_from_local_file!(css_file)
279
+ else
280
+ @css_parser.load_uri!(css_file)
281
+ end
282
+ end
283
+ end
284
+
285
+ # Load CSS included in <tt>style</tt> and <tt>link</tt> tags from an HTML document.
286
+ def load_css_from_html! # :nodoc:
287
+ if tags = @doc.search("link[@rel='stylesheet'], style")
288
+ tags.each do |tag|
289
+
290
+ if tag.to_s.strip =~ /^\<link/i and tag.attributes['href'] and media_type_ok?(tag.attributes['media'])
291
+
292
+ link_uri = Premailer.resolve_link(tag.attributes['href'].to_s, @html_file)
293
+ if @is_local_file
294
+ load_css_from_local_file!(link_uri)
295
+ else
296
+ @css_parser.load_uri!(link_uri)
297
+ end
298
+
299
+ elsif tag.to_s.strip =~ /^\<style/i
300
+ if @is_in_memory
301
+ @css_parser.add_block!(tag.innerHTML)
302
+ else
303
+ @css_parser.add_block!(tag.innerHTML, :base_uri => URI.parse(@html_file))
304
+ end
305
+ end
306
+ end
307
+ tags.remove
308
+ end
309
+ end
310
+
311
+ def media_type_ok?(media_types) # :nodoc:
312
+ return true if media_types.nil? or media_types.empty?
313
+ return media_types.split(/[\s]+|,/).any? { |media_type| media_type.strip =~ /screen|handheld|all/i }
314
+ rescue
315
+ return true
316
+ end
317
+
318
+ # Create a <tt>style</tt> element with un-mergable rules (e.g. <tt>:hover</tt>)
319
+ # and write it into the <tt>body</tt>.
320
+ #
321
+ # <tt>doc</tt> is an Hpricot document and <tt>unmergable_css_rules</tt> is a Css::RuleSet.
322
+ #
323
+ # Returns an Hpricot document.
324
+ def write_unmergable_css_rules(doc, unmergable_rules) # :nodoc:
325
+ styles = ''
326
+ unmergable_rules.each_selector(:all, :force_important => true) do |selector, declarations, specificity|
327
+ styles += "#{selector} { #{declarations} }\n"
328
+ end
329
+
330
+ unless styles.empty?
331
+ style_tag = "\n<style type=\"text/css\">\n#{styles}</style>\n"
332
+ doc.search("head").append(style_tag)
333
+ end
334
+ doc
335
+ end
336
+
337
+ # Convert relative links to absolute links.
338
+ #
339
+ # Processes <tt>href</tt> <tt>src</tt> and <tt>background</tt> attributes
340
+ # as well as CSS <tt>url()</tt> declarations found in inline <tt>style</tt> attributes.
341
+ #
342
+ # <tt>doc</tt> is an Hpricot document and <tt>base_uri</tt> is either a string or a URI.
343
+ #
344
+ # Returns an Hpricot document.
345
+ def convert_inline_links(doc, base_uri) # :nodoc:
346
+ base_uri = URI.parse(base_uri) unless base_uri.kind_of?(URI)
347
+
348
+ append_qs = @options[:link_query_string] ||= ''
349
+
350
+ ['href', 'src', 'background'].each do |attribute|
351
+ tags = doc.search("*[@#{attribute}]")
352
+
353
+ next if tags.empty?
354
+
355
+ tags.each do |tag|
356
+ # skip links that look like they have merge tags
357
+ # and mailto, ftp, etc...
358
+ if tag.attributes[attribute] =~ /^(\{|\[|<|\#|mailto:|ftp:|gopher:)/i
359
+ next
360
+ end
361
+
362
+ if tag.attributes[attribute] =~ /^http/i
363
+ begin
364
+ merged = URI.parse(tag.attributes[attribute])
365
+ rescue; next; end
366
+ else
367
+ begin
368
+ merged = Premailer.resolve_link(tag.attributes[attribute].to_s, base_uri)
369
+ rescue
370
+ begin
371
+ merged = Premailer.resolve_link(URI.escape(tag.attributes[attribute].to_s), base_uri)
372
+ rescue; end
373
+ end
374
+ end
375
+
376
+ # make sure 'merged' is a URI
377
+ merged = URI.parse(merged.to_s) unless merged.kind_of?(URI)
378
+
379
+ # only append a querystring to <a> tags
380
+ if tag.name =~ /^a$/i and not append_qs.empty?
381
+ if merged.query
382
+ merged.query = merged.query + '&' + append_qs
383
+ else
384
+ merged.query = append_qs
385
+ end
386
+ end
387
+ tag[attribute] = merged.to_s
388
+
389
+ end # end of each tag
390
+ end # end of each attrs
391
+
392
+ doc.search("*[@style]").each do |el|
393
+ el['style'] = CssParser.convert_uris(el.attributes['style'].to_s, base_uri)
394
+ end
395
+ doc
396
+ end
397
+
398
+ def self.escape_string(str) # :nodoc:
399
+ str.gsub(/"/, "'")
400
+ end
401
+
402
+ def self.resolve_link(path, base_path) # :nodoc:
403
+ path.strip!
404
+ resolved = nil
405
+ if base_path.kind_of?(URI)
406
+ resolved = base_path.merge(path)
407
+ return Premailer.canonicalize(resolved)
408
+ elsif base_path.kind_of?(String) and base_path =~ /^(http[s]?|ftp):\/\//i
409
+ resolved = URI.parse(base_path)
410
+ resolved = resolved.merge(path)
411
+ return Premailer.canonicalize(resolved)
412
+ else
413
+
414
+ return File.expand_path(path, File.dirname(base_path))
415
+ end
416
+ end
417
+
418
+ # from http://www.ruby-forum.com/topic/140101
419
+ def self.canonicalize(uri) # :nodoc:
420
+ u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
421
+ u.normalize!
422
+ newpath = u.path
423
+ while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
424
+ $1 == '..' ? match : ''
425
+ } do end
426
+ newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
427
+ u.path = newpath
428
+ u.to_s
429
+ end
430
+
431
+ # Check <tt>CLIENT_SUPPORT_FILE</tt> for any CSS warnings
432
+ def check_client_support # :nodoc:
433
+ @client_support = @client_support ||= YAML::load(File.open(CLIENT_SUPPORT_FILE))
434
+
435
+ warnings = []
436
+ properties = []
437
+
438
+ # Get a list off CSS properties
439
+ @processed_doc.search("*[@style]").each do |el|
440
+ style_url = el.attributes['style'].gsub(/([\w\-]+)[\s]*\:/i) do |s|
441
+ properties.push($1)
442
+ end
443
+ end
444
+
445
+ properties.uniq!
446
+
447
+ property_support = @client_support['css_properties']
448
+ properties.each do |prop|
449
+ if property_support.include?(prop) and
450
+ property_support[prop].include?('support') and
451
+ property_support[prop]['support'] >= @options[:warn_level]
452
+ warnings.push({:message => "#{prop} CSS property",
453
+ :level => WARN_LABEL[property_support[prop]['support']],
454
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
455
+ end
456
+ end
457
+
458
+ @client_support['attributes'].each do |attribute, data|
459
+ next unless data['support'] >= @options[:warn_level]
460
+ if @doc.search("*[@#{attribute}]").length > 0
461
+ warnings.push({:message => "#{attribute} HTML attribute",
462
+ :level => WARN_LABEL[property_support[prop]['support']],
463
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
464
+ end
465
+ end
466
+
467
+ @client_support['elements'].each do |element, data|
468
+ next unless data['support'] >= @options[:warn_level]
469
+ if @doc.search("element").length > 0
470
+ warnings.push({:message => "#{element} HTML element",
471
+ :level => WARN_LABEL[property_support[prop]['support']],
472
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
473
+ end
474
+ end
475
+
476
+ return warnings
477
+ end
478
+ end
479
+
480
+
481
+