sundawg_premailer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'premailer'
@@ -0,0 +1,8 @@
1
+ # Premailer by Alex Dunae (dunae.ca, e-mail 'code' at the same domain), 2008-09
2
+ require 'yaml'
3
+ require 'open-uri'
4
+ require 'hpricot'
5
+ require 'css_parser'
6
+
7
+ require File.dirname(__FILE__) + "/premailer/html_to_plain_text"
8
+ require File.dirname(__FILE__) + "/premailer/premailer"
@@ -0,0 +1,74 @@
1
+ require 'text/reform'
2
+ require 'htmlentities'
3
+
4
+ # Support functions for Premailer
5
+ module HtmlToPlainText
6
+
7
+ # Returns the text in UTF-8 format with all HTML tags removed
8
+ #
9
+ # TODO:
10
+ # - add support for DL, OL
11
+ def convert_to_text(html, line_length = 65, from_charset = 'UTF-8')
12
+ r = Text::Reform.new(:trim => true,
13
+ :squeeze => false,
14
+ :break => Text::Reform.break_wrap)
15
+
16
+ txt = html
17
+
18
+ # decode HTML entities
19
+ he = HTMLEntities.new
20
+ txt = he.decode(txt)
21
+
22
+ # handle headings (H1-H6)
23
+ txt.gsub!(/[ \t]*<h([0-9]+)[^>]*>(.*)<\/h[0-9]+>/i) do |s|
24
+ hlevel = $1.to_i
25
+ # cleanup text inside of headings
26
+ htext = $2.gsub(/<\/?[^>]*>/i, '').strip
27
+ hlength = (htext.length > line_length ?
28
+ line_length :
29
+ htext.length)
30
+
31
+ case hlevel
32
+ when 1 # H1, asterisks above and below
33
+ ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength) + "\n"
34
+ when 2 # H1, dashes above and below
35
+ ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength) + "\n"
36
+ else # H3-H6, dashes below
37
+ htext + "\n" + ('-' * htext.length) + "\n"
38
+ end
39
+ end
40
+
41
+ # links
42
+ txt.gsub!(/<a.*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/i) do |s|
43
+ $2.strip + ' ( ' + $1.strip + ' )'
44
+ end
45
+
46
+ # lists -- TODO: should handle ordered lists
47
+ txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ')
48
+ # list not followed by a newline
49
+ txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n")
50
+
51
+ # paragraphs and line breaks
52
+ txt.gsub!(/<\/p>/i, "\n\n")
53
+ txt.gsub!(/<br[\/ ]*>/i, "\n")
54
+
55
+ # strip remaining tags
56
+ txt.gsub!(/<\/?[^>]*>/, '')
57
+
58
+ # wrap text
59
+ txt = r.format(('[' * line_length), txt)
60
+
61
+ # remove linefeeds (\r\n and \r -> \n)
62
+ txt.gsub!(/\r\n?/, "\n")
63
+
64
+ # strip extra spaces
65
+ txt.gsub!(/\302\240+/, " ") # non-breaking spaces -> spaces
66
+ txt.gsub!(/\n[ \t]+/, "\n") # space at start of lines
67
+ txt.gsub!(/[ \t]+\n/, "\n") # space at end of lines
68
+
69
+ # no more than two consecutive newlines
70
+ txt.gsub!(/[\n]{3,}/, "\n\n")
71
+
72
+ txt.strip
73
+ end
74
+ end
@@ -0,0 +1,481 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # Premailer by Alex Dunae (dunae.ca, e-mail 'code' at the same domain), 2008-09
4
+ #
5
+ # Premailer processes HTML and CSS to improve e-mail deliverability.
6
+ #
7
+ # Premailer's main function is to render all CSS as inline <tt>style</tt>
8
+ # attributes. It also converts relative links to absolute links and checks
9
+ # the 'safety' of CSS properties against a CSS support chart.
10
+ #
11
+ # = Example
12
+ # premailer = Premailer.new('http://example.com/myfile.html', :warn_level => Premailer::Warnings::SAFE)
13
+ #
14
+ # # Write the HTML output
15
+ # fout = File.open("output.html", "w")
16
+ # fout.puts premailer.to_inline_css
17
+ # fout.close
18
+ #
19
+ # # Write the plain-text output
20
+ # fout = File.open("ouput.txt", "w")
21
+ # fout.puts premailer.to_plain_text
22
+ # fout.close
23
+ #
24
+ # # List any CSS warnings
25
+ # puts premailer.warnings.length.to_s + ' warnings found'
26
+ # premailer.warnings.each do |w|
27
+ # puts "#{w[:message]} (#{w[:level]}) may not render properly in #{w[:clients]}"
28
+ # end
29
+ #
30
+ # premailer = Premailer.new(html_file, :warn_level => Premailer::Warnings::SAFE)
31
+ # puts premailer.to_inline_css
32
+ class Premailer
33
+ include HtmlToPlainText
34
+ include CssParser
35
+
36
+ VERSION = '1.5.5'
37
+
38
+ CLIENT_SUPPORT_FILE = File.dirname(__FILE__) + '/../../misc/client_support.yaml'
39
+
40
+ RE_UNMERGABLE_SELECTORS = /(\:(visited|active|hover|focus|after|before|selection|target|first\-(line|letter))|^\@)/i
41
+
42
+ # list of CSS attributes that can be rendered as HTML attributes
43
+ #
44
+ # TODO: too much repetition
45
+ # TODO: background=""
46
+ RELATED_ATTRIBUTES = {
47
+ 'h1' => {'text-align' => 'align'},
48
+ 'h2' => {'text-align' => 'align'},
49
+ 'h3' => {'text-align' => 'align'},
50
+ 'h4' => {'text-align' => 'align'},
51
+ 'h5' => {'text-align' => 'align'},
52
+ 'h6' => {'text-align' => 'align'},
53
+ 'p' => {'text-align' => 'align'},
54
+ 'div' => {'text-align' => 'align'},
55
+ 'blockquote' => {'text-align' => 'align'},
56
+ 'body' => {'background-color' => 'bgcolor'},
57
+ 'table' => {'background-color' => 'bgcolor'},
58
+ 'tr' => {'text-align' => 'align', 'background-color' => 'bgcolor'},
59
+ 'th' => {'text-align' => 'align', 'background-color' => 'bgcolor', 'vertical-align' => 'valign'},
60
+ 'td' => {'text-align' => 'align', 'background-color' => 'bgcolor', 'vertical-align' => 'valign'},
61
+ 'img' => {'float' => 'align'}
62
+ }
63
+
64
+ # URI of the HTML file used
65
+ attr_reader :html_file
66
+
67
+ # processed HTML document (Hpricot)
68
+ attr_reader :processed_doc
69
+
70
+ # source HTML document (Hpricot)
71
+ attr_reader :doc
72
+
73
+ module Warnings
74
+ NONE = 0
75
+ SAFE = 1
76
+ POOR = 2
77
+ RISKY = 3
78
+ end
79
+ include Warnings
80
+
81
+ WARN_LABEL = %w(NONE SAFE POOR RISKY)
82
+
83
+ # Create a new Premailer object.
84
+ #
85
+ # +path+ is the path to the HTML file to process. Can be either the URL of a
86
+ # remote file or a local path.
87
+ #
88
+ # ==== Options
89
+ # [+line_length+] Line length used by to_plain_text. Boolean, default is 65.
90
+ # [+warn_level+] What level of CSS compatibility warnings to show (see Warnings).
91
+ # [+link_query_string+] A string to append to every <a href=""> link. Do not include the initial +?+.
92
+ # [+base_url+] Used to calculate absolute URLs for local files.
93
+ # [+css+] Manually specify a CSS stylesheet.
94
+ # [+css_to_attributes+] Copy related CSS attributes into HTML attributes (e.g. +background-color+ to +bgcolor+)
95
+ # [+in_memory+] Informs premailer to parse an HTML String
96
+ def initialize(source, options = {})
97
+ @options = {:warn_level => Warnings::SAFE,
98
+ :line_length => 65,
99
+ :link_query_string => nil,
100
+ :base_url => nil,
101
+ :remove_classes => false,
102
+ :in_memory => false,
103
+ :css => [],
104
+ :css_to_attributes => true}.merge(options)
105
+ @html_file = source
106
+
107
+ @is_local_file = local_uri?(source)
108
+
109
+ @is_in_memory = options[:in_memory]
110
+ @is_local_file = true if @is_in_memory
111
+
112
+ @css_files = @options[:css]
113
+
114
+ @css_warnings = []
115
+
116
+ @css_parser = CssParser::Parser.new({:absolute_paths => true,
117
+ :import => true,
118
+ :io_exceptions => false
119
+ })
120
+
121
+ @doc, @html_charset = load_html(@html_file)
122
+ @processed_doc = @doc
123
+
124
+ if @is_local_file and @options[:base_url]
125
+ @processed_doc = convert_inline_links(@processed_doc, @options[:base_url])
126
+ elsif not @is_local_file
127
+ @processed_doc = convert_inline_links(@processed_doc, @html_file)
128
+ end
129
+ load_css_from_options!
130
+ load_css_from_html!
131
+ end
132
+
133
+ def local_uri?(uri)
134
+ if uri =~ /^(http|https|ftp)\:\/\//i
135
+ return false
136
+ else
137
+ return true
138
+ end
139
+ end
140
+
141
+ # Array containing a hash of CSS warnings.
142
+ def warnings
143
+ return [] if @options[:warn_level] == Warnings::NONE
144
+ @css_warnings = check_client_support if @css_warnings.empty?
145
+ @css_warnings
146
+ end
147
+
148
+ # Returns the original HTML as a string.
149
+ def to_s
150
+ @doc.to_html
151
+ end
152
+
153
+ # Converts the HTML document to a format suitable for plain-text e-mail.
154
+ #
155
+ # Returns a string.
156
+ def to_plain_text
157
+ html_src = ''
158
+ begin
159
+ html_src = @doc.search("body").innerHTML
160
+ rescue
161
+ html_src = @doc.to_html
162
+ end
163
+ convert_to_text(html_src, @options[:line_length], @html_charset)
164
+ end
165
+
166
+ # Merge CSS into the HTML document.
167
+ #
168
+ # Returns a string.
169
+ def to_inline_css
170
+ doc = @processed_doc
171
+ unmergable_rules = CssParser::Parser.new
172
+
173
+ # Give all styles already in style attributes a specificity of 1000
174
+ # per http://www.w3.org/TR/CSS21/cascade.html#specificity
175
+ doc.search("*[@style]").each do |el|
176
+ el['style'] = '[SPEC=1000[' + el.attributes['style'] + ']]'
177
+ end
178
+
179
+ # Iterate through the rules and merge them into the HTML
180
+ @css_parser.each_selector(:all) do |selector, declaration, specificity|
181
+ # Save un-mergable rules separately
182
+ selector.gsub!(/:link([\s]|$)+/i, '')
183
+
184
+ # Convert element names to lower case
185
+ selector.gsub!(/([\s]|^)([\w]+)/) {|m| $1.to_s + $2.to_s.downcase }
186
+
187
+ if selector =~ RE_UNMERGABLE_SELECTORS
188
+ unmergable_rules.add_rule_set!(RuleSet.new(selector, declaration))
189
+ else
190
+
191
+ doc.search(selector) do |el|
192
+ if el.elem?
193
+ # Add a style attribute or append to the existing one
194
+ block = "[SPEC=#{specificity}[#{declaration}]]"
195
+ el['style'] = (el.attributes['style'] ||= '') + ' ' + block
196
+ end
197
+ end
198
+ end
199
+ end
200
+
201
+ # Read STYLE attributes and perform folding
202
+ doc.search("*[@style]").each do |el|
203
+ style = el.attributes['style'].to_s
204
+
205
+ declarations = []
206
+
207
+ style.scan(/\[SPEC\=([\d]+)\[(.[^\]\]]*)\]\]/).each do |declaration|
208
+ rs = RuleSet.new(nil, declaration[1].to_s, declaration[0].to_i)
209
+ declarations << rs
210
+ end
211
+
212
+ # Perform style folding
213
+ merged = CssParser.merge(declarations)
214
+ merged.expand_shorthand!
215
+
216
+ #if @options[:prefer_cellpadding] and (el.name == 'td' or el.name == 'th') and el['cellpadding'].nil?
217
+ # if cellpadding = equivalent_cellpadding(merged)
218
+ # el['cellpadding'] = cellpadding
219
+ # merged['padding-left'] = nil
220
+ # merged['padding-right'] = nil
221
+ # merged['padding-top'] = nil
222
+ # merged['padding-bottom'] = nil
223
+ # end
224
+ #end
225
+
226
+ # Duplicate CSS attributes as HTML attributes
227
+ if RELATED_ATTRIBUTES.has_key?(el.name)
228
+ RELATED_ATTRIBUTES[el.name].each do |css_att, html_att|
229
+ el[html_att] = merged[css_att].gsub(/;$/, '').strip if el[html_att].nil? and not merged[css_att].empty?
230
+ end
231
+ end
232
+
233
+ merged.create_dimensions_shorthand!
234
+
235
+ # write the inline STYLE attribute
236
+ el['style'] = Premailer.escape_string(merged.declarations_to_s)
237
+ end
238
+
239
+ doc = write_unmergable_css_rules(doc, unmergable_rules)
240
+
241
+ doc.search('*').remove_class if @options[:remove_classes]
242
+
243
+ @processed_doc = doc
244
+
245
+ doc.to_html
246
+ end
247
+
248
+
249
+ protected
250
+ # Load the HTML file and convert it into an Hpricot document.
251
+ #
252
+ # Returns an Hpricot document and a string with the HTML file's character set.
253
+ def load_html(path) # :nodoc:
254
+ if @is_in_memory
255
+ return Hpricot(path)
256
+ elsif @is_local_file
257
+ return Hpricot(File.open(path, "r") {|f| f.read })
258
+ else
259
+ return Hpricot(open(path))
260
+ end
261
+ end
262
+
263
+ def load_css_from_local_file!(path)
264
+ css_block = ''
265
+ begin
266
+ File.open(path, "r") do |file|
267
+ while line = file.gets
268
+ css_block << line
269
+ end
270
+ end
271
+ @css_parser.add_block!(css_block, {:base_uri => @html_file})
272
+ rescue; end
273
+ end
274
+
275
+ def load_css_from_options! # :nodoc:
276
+ @css_files.each do |css_file|
277
+ if local_uri?(css_file)
278
+ load_css_from_local_file!(css_file)
279
+ else
280
+ @css_parser.load_uri!(css_file)
281
+ end
282
+ end
283
+ end
284
+
285
+ # Load CSS included in <tt>style</tt> and <tt>link</tt> tags from an HTML document.
286
+ def load_css_from_html! # :nodoc:
287
+ if tags = @doc.search("link[@rel='stylesheet'], style")
288
+ tags.each do |tag|
289
+
290
+ if tag.to_s.strip =~ /^\<link/i and tag.attributes['href'] and media_type_ok?(tag.attributes['media'])
291
+
292
+ link_uri = Premailer.resolve_link(tag.attributes['href'].to_s, @html_file)
293
+ if @is_local_file
294
+ load_css_from_local_file!(link_uri)
295
+ else
296
+ @css_parser.load_uri!(link_uri)
297
+ end
298
+
299
+ elsif tag.to_s.strip =~ /^\<style/i
300
+ if @is_in_memory
301
+ @css_parser.add_block!(tag.innerHTML)
302
+ else
303
+ @css_parser.add_block!(tag.innerHTML, :base_uri => URI.parse(@html_file))
304
+ end
305
+ end
306
+ end
307
+ tags.remove
308
+ end
309
+ end
310
+
311
+ def media_type_ok?(media_types) # :nodoc:
312
+ return true if media_types.nil? or media_types.empty?
313
+ return media_types.split(/[\s]+|,/).any? { |media_type| media_type.strip =~ /screen|handheld|all/i }
314
+ rescue
315
+ return true
316
+ end
317
+
318
+ # Create a <tt>style</tt> element with un-mergable rules (e.g. <tt>:hover</tt>)
319
+ # and write it into the <tt>body</tt>.
320
+ #
321
+ # <tt>doc</tt> is an Hpricot document and <tt>unmergable_css_rules</tt> is a Css::RuleSet.
322
+ #
323
+ # Returns an Hpricot document.
324
+ def write_unmergable_css_rules(doc, unmergable_rules) # :nodoc:
325
+ styles = ''
326
+ unmergable_rules.each_selector(:all, :force_important => true) do |selector, declarations, specificity|
327
+ styles += "#{selector} { #{declarations} }\n"
328
+ end
329
+
330
+ unless styles.empty?
331
+ style_tag = "\n<style type=\"text/css\">\n#{styles}</style>\n"
332
+ doc.search("head").append(style_tag)
333
+ end
334
+ doc
335
+ end
336
+
337
+ # Convert relative links to absolute links.
338
+ #
339
+ # Processes <tt>href</tt> <tt>src</tt> and <tt>background</tt> attributes
340
+ # as well as CSS <tt>url()</tt> declarations found in inline <tt>style</tt> attributes.
341
+ #
342
+ # <tt>doc</tt> is an Hpricot document and <tt>base_uri</tt> is either a string or a URI.
343
+ #
344
+ # Returns an Hpricot document.
345
+ def convert_inline_links(doc, base_uri) # :nodoc:
346
+ base_uri = URI.parse(base_uri) unless base_uri.kind_of?(URI)
347
+
348
+ append_qs = @options[:link_query_string] ||= ''
349
+
350
+ ['href', 'src', 'background'].each do |attribute|
351
+ tags = doc.search("*[@#{attribute}]")
352
+
353
+ next if tags.empty?
354
+
355
+ tags.each do |tag|
356
+ # skip links that look like they have merge tags
357
+ # and mailto, ftp, etc...
358
+ if tag.attributes[attribute] =~ /^(\{|\[|<|\#|mailto:|ftp:|gopher:)/i
359
+ next
360
+ end
361
+
362
+ if tag.attributes[attribute] =~ /^http/i
363
+ begin
364
+ merged = URI.parse(tag.attributes[attribute])
365
+ rescue; next; end
366
+ else
367
+ begin
368
+ merged = Premailer.resolve_link(tag.attributes[attribute].to_s, base_uri)
369
+ rescue
370
+ begin
371
+ merged = Premailer.resolve_link(URI.escape(tag.attributes[attribute].to_s), base_uri)
372
+ rescue; end
373
+ end
374
+ end
375
+
376
+ # make sure 'merged' is a URI
377
+ merged = URI.parse(merged.to_s) unless merged.kind_of?(URI)
378
+
379
+ # only append a querystring to <a> tags
380
+ if tag.name =~ /^a$/i and not append_qs.empty?
381
+ if merged.query
382
+ merged.query = merged.query + '&' + append_qs
383
+ else
384
+ merged.query = append_qs
385
+ end
386
+ end
387
+ tag[attribute] = merged.to_s
388
+
389
+ end # end of each tag
390
+ end # end of each attrs
391
+
392
+ doc.search("*[@style]").each do |el|
393
+ el['style'] = CssParser.convert_uris(el.attributes['style'].to_s, base_uri)
394
+ end
395
+ doc
396
+ end
397
+
398
+ def self.escape_string(str) # :nodoc:
399
+ str.gsub(/"/, "'")
400
+ end
401
+
402
+ def self.resolve_link(path, base_path) # :nodoc:
403
+ path.strip!
404
+ resolved = nil
405
+ if base_path.kind_of?(URI)
406
+ resolved = base_path.merge(path)
407
+ return Premailer.canonicalize(resolved)
408
+ elsif base_path.kind_of?(String) and base_path =~ /^(http[s]?|ftp):\/\//i
409
+ resolved = URI.parse(base_path)
410
+ resolved = resolved.merge(path)
411
+ return Premailer.canonicalize(resolved)
412
+ else
413
+
414
+ return File.expand_path(path, File.dirname(base_path))
415
+ end
416
+ end
417
+
418
+ # from http://www.ruby-forum.com/topic/140101
419
+ def self.canonicalize(uri) # :nodoc:
420
+ u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
421
+ u.normalize!
422
+ newpath = u.path
423
+ while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
424
+ $1 == '..' ? match : ''
425
+ } do end
426
+ newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
427
+ u.path = newpath
428
+ u.to_s
429
+ end
430
+
431
+ # Check <tt>CLIENT_SUPPORT_FILE</tt> for any CSS warnings
432
+ def check_client_support # :nodoc:
433
+ @client_support = @client_support ||= YAML::load(File.open(CLIENT_SUPPORT_FILE))
434
+
435
+ warnings = []
436
+ properties = []
437
+
438
+ # Get a list off CSS properties
439
+ @processed_doc.search("*[@style]").each do |el|
440
+ style_url = el.attributes['style'].gsub(/([\w\-]+)[\s]*\:/i) do |s|
441
+ properties.push($1)
442
+ end
443
+ end
444
+
445
+ properties.uniq!
446
+
447
+ property_support = @client_support['css_properties']
448
+ properties.each do |prop|
449
+ if property_support.include?(prop) and
450
+ property_support[prop].include?('support') and
451
+ property_support[prop]['support'] >= @options[:warn_level]
452
+ warnings.push({:message => "#{prop} CSS property",
453
+ :level => WARN_LABEL[property_support[prop]['support']],
454
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
455
+ end
456
+ end
457
+
458
+ @client_support['attributes'].each do |attribute, data|
459
+ next unless data['support'] >= @options[:warn_level]
460
+ if @doc.search("*[@#{attribute}]").length > 0
461
+ warnings.push({:message => "#{attribute} HTML attribute",
462
+ :level => WARN_LABEL[property_support[prop]['support']],
463
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
464
+ end
465
+ end
466
+
467
+ @client_support['elements'].each do |element, data|
468
+ next unless data['support'] >= @options[:warn_level]
469
+ if @doc.search("element").length > 0
470
+ warnings.push({:message => "#{element} HTML element",
471
+ :level => WARN_LABEL[property_support[prop]['support']],
472
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
473
+ end
474
+ end
475
+
476
+ return warnings
477
+ end
478
+ end
479
+
480
+
481
+