akzhan-premailer 1.5.5

Sign up to get free protection for your applications and to get access to all the features.
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'premailer'
@@ -0,0 +1,6 @@
1
+ require 'yaml'
2
+ require 'open-uri'
3
+ require 'nokogiri'
4
+ require 'css_parser'
5
+ require 'premailer/html_to_plain_text'
6
+ require 'premailer/premailer'
@@ -0,0 +1,81 @@
1
+ # coding: utf-8
2
+ require 'htmlentities'
3
+
4
+ # Support functions for Premailer
5
+ module HtmlToPlainText
6
+
7
+ # Returns the text in UTF-8 format with all HTML tags removed
8
+ #
9
+ # TODO:
10
+ # - add support for DL, OL
11
+ def convert_to_text(html, line_length = 65, from_charset = 'UTF-8')
12
+ #r = Text::Reform.new(:trim => true,
13
+ # :squeeze => false,
14
+ # :break => Text::Reform.break_wrap)
15
+
16
+ txt = html
17
+
18
+ # decode HTML entities
19
+ he = HTMLEntities.new
20
+ txt = he.decode(txt)
21
+
22
+ # handle headings (H1-H6)
23
+ txt.gsub!(/(<\/h[1-6]>)/i, "\n\\1") # move closing tags to new lines
24
+ txt.gsub!(/[\s]*<h([1-6]+)[^>]*>[\s]*(.*)[\s]*<\/h[1-6]+>/i) do |s|
25
+ hlevel = $1.to_i
26
+
27
+ htext = $2
28
+ htext.gsub!(/<br[\s]*\/?>/i, "\n") # handle <br>s
29
+ htext.gsub!(/<\/?[^>]*>/i, '') # strip tags
30
+
31
+ # determine maximum line length
32
+ hlength = 0
33
+ htext.each_line { |l| llength = l.strip.length; hlength = llength if llength > hlength }
34
+ hlength = line_length if hlength > line_length
35
+
36
+ case hlevel
37
+ when 1 # H1, asterisks above and below
38
+ htext = ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength)
39
+ when 2 # H1, dashes above and below
40
+ htext = ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength)
41
+ else # H3-H6, dashes below
42
+ htext = htext + "\n" + ('-' * hlength)
43
+ end
44
+
45
+ "\n\n" + htext + "\n\n"
46
+ end
47
+
48
+ # links
49
+ txt.gsub!(/<a.*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/i) do |s|
50
+ $2.strip + ' ( ' + $1.strip + ' )'
51
+ end
52
+
53
+ # lists -- TODO: should handle ordered lists
54
+ txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ')
55
+ # list not followed by a newline
56
+ txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n")
57
+
58
+ # paragraphs and line breaks
59
+ txt.gsub!(/<\/p>/i, "\n\n")
60
+ txt.gsub!(/<br[\/ ]*>/i, "\n")
61
+
62
+ # strip remaining tags
63
+ txt.gsub!(/<\/?[^>]*>/, '')
64
+
65
+ # wrap text
66
+ #txt = r.format(('[' * line_length), txt)
67
+
68
+ # remove linefeeds (\r\n and \r -> \n)
69
+ txt.gsub!(/\r\n?/, "\n")
70
+
71
+ # strip extra spaces
72
+ txt.gsub!(/\302\240+/, " ") # non-breaking spaces -> spaces
73
+ txt.gsub!(/\n[ \t]+/, "\n") # space at start of lines
74
+ txt.gsub!(/[ \t]+\n/, "\n") # space at end of lines
75
+
76
+ # no more than two consecutive newlines
77
+ txt.gsub!(/[\n]{3,}/, "\n\n")
78
+
79
+ txt.strip
80
+ end
81
+ end
@@ -0,0 +1,506 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # Premailer by Alex Dunae (dunae.ca, e-mail 'code' at the same domain), 2008-09
4
+ #
5
+ # Premailer processes HTML and CSS to improve e-mail deliverability.
6
+ #
7
+ # Premailer's main function is to render all CSS as inline <tt>style</tt>
8
+ # attributes. It also converts relative links to absolute links and checks
9
+ # the 'safety' of CSS properties against a CSS support chart.
10
+ #
11
+ # = Example
12
+ # premailer = Premailer.new('http://example.com/myfile.html', :warn_level => Premailer::Warnings::SAFE)
13
+ #
14
+ # # Write the HTML output
15
+ # fout = File.open("output.html", "w")
16
+ # fout.puts premailer.to_inline_css
17
+ # fout.close
18
+ #
19
+ # # Write the plain-text output
20
+ # fout = File.open("ouput.txt", "w")
21
+ # fout.puts premailer.to_plain_text
22
+ # fout.close
23
+ #
24
+ # # List any CSS warnings
25
+ # puts premailer.warnings.length.to_s + ' warnings found'
26
+ # premailer.warnings.each do |w|
27
+ # puts "#{w[:message]} (#{w[:level]}) may not render properly in #{w[:clients]}"
28
+ # end
29
+ #
30
+ # premailer = Premailer.new(html_file, :warn_level => Premailer::Warnings::SAFE)
31
+ # puts premailer.to_inline_css
32
+ class Premailer
33
+ include HtmlToPlainText
34
+ include CssParser
35
+
36
+ VERSION = '1.5.5'
37
+
38
+ CLIENT_SUPPORT_FILE = File.dirname(__FILE__) + '/../../misc/client_support.yaml'
39
+
40
+ RE_UNMERGABLE_SELECTORS = /(\:(visited|active|hover|focus|after|before|selection|target|first\-(line|letter))|^\@)/i
41
+
42
+ # list of CSS attributes that can be rendered as HTML attributes
43
+ #
44
+ # TODO: too much repetition
45
+ # TODO: background=""
46
+ RELATED_ATTRIBUTES = {
47
+ 'h1' => {'text-align' => 'align'},
48
+ 'h2' => {'text-align' => 'align'},
49
+ 'h3' => {'text-align' => 'align'},
50
+ 'h4' => {'text-align' => 'align'},
51
+ 'h5' => {'text-align' => 'align'},
52
+ 'h6' => {'text-align' => 'align'},
53
+ 'p' => {'text-align' => 'align'},
54
+ 'div' => {'text-align' => 'align'},
55
+ 'blockquote' => {'text-align' => 'align'},
56
+ 'body' => {'background-color' => 'bgcolor'},
57
+ 'table' => {'background-color' => 'bgcolor'},
58
+ 'tr' => {'text-align' => 'align', 'background-color' => 'bgcolor'},
59
+ 'th' => {'text-align' => 'align', 'background-color' => 'bgcolor', 'vertical-align' => 'valign'},
60
+ 'td' => {'text-align' => 'align', 'background-color' => 'bgcolor', 'vertical-align' => 'valign'},
61
+ 'img' => {'float' => 'align'}
62
+ }
63
+
64
+ # URI of the HTML file used
65
+ attr_reader :html_file
66
+
67
+ # processed HTML document (Nokogiri)
68
+ attr_reader :processed_doc
69
+
70
+ # source HTML document (Nokogiri)
71
+ attr_reader :doc
72
+
73
+ module Warnings
74
+ NONE = 0
75
+ SAFE = 1
76
+ POOR = 2
77
+ RISKY = 3
78
+ end
79
+ include Warnings
80
+
81
+ WARN_LABEL = %w(NONE SAFE POOR RISKY)
82
+
83
+ # Create a new Premailer object.
84
+ #
85
+ # +html+ is the HTML data to process. Can be either an IO object, the URL of a
86
+ # remote file or a local path.
87
+ #
88
+ # ==== Options
89
+ # [+line_length+] Line length used by to_plain_text. Boolean, default is 65.
90
+ # [+warn_level+] What level of CSS compatibility warnings to show (see Warnings).
91
+ # [+link_query_string+] A string to append to every <a href=""> link. Do not include the initial +?+.
92
+ # [+base_url+] Used to calculate absolute URLs for local files.
93
+ # [+css+] Manually specify a CSS stylesheet.
94
+ # [+css_to_attributes+] Copy related CSS attributes into HTML attributes (e.g. +background-color+ to +bgcolor+)
95
+ def initialize(html, options = {})
96
+ @options = {:warn_level => Warnings::SAFE,
97
+ :line_length => 65,
98
+ :link_query_string => nil,
99
+ :base_url => nil,
100
+ :remove_classes => false,
101
+ :css => [],
102
+ :css_to_attributes => true,
103
+ :verbose => false,
104
+ :io_exceptions => false}.merge(options)
105
+ @html_file = html
106
+
107
+ @is_local_file = Premailer.local_data?(html)
108
+
109
+ @css_files = @options[:css]
110
+
111
+ @css_warnings = []
112
+
113
+ if @is_local_file and @options[:base_url]
114
+ @base_url = @options[:base_url]
115
+ elsif not @is_local_file
116
+ @html_file
117
+ end
118
+ @css_parser = CssParser::Parser.new({
119
+ :absolute_paths => true,
120
+ :import => true,
121
+ :io_exceptions => @options[:io_exceptions]
122
+ })
123
+
124
+ @doc = load_html(@html_file)
125
+ @html_charset = @doc.encoding
126
+ @processed_doc = @doc
127
+ @processed_doc = convert_inline_links(@processed_doc, @base_url) if @base_url
128
+ load_css_from_options!
129
+ load_css_from_html!
130
+ end
131
+
132
+ # Array containing a hash of CSS warnings.
133
+ def warnings
134
+ return [] if @options[:warn_level] == Warnings::NONE
135
+ @css_warnings = check_client_support if @css_warnings.empty?
136
+ @css_warnings
137
+ end
138
+
139
+ # Returns the original HTML as a string.
140
+ def to_s
141
+ @doc.to_html
142
+ end
143
+
144
+ # Converts the HTML document to a format suitable for plain-text e-mail.
145
+ #
146
+ # Returns a string.
147
+ def to_plain_text
148
+ html_src = ''
149
+ begin
150
+ html_src = @doc.search("body").inner_html
151
+ rescue
152
+ html_src = @doc.to_html
153
+ end
154
+ convert_to_text(html_src, @options[:line_length], @html_charset)
155
+ end
156
+
157
+ # Merge CSS into the HTML document.
158
+ #
159
+ # Returns a string.
160
+ def to_inline_css
161
+ doc = @processed_doc
162
+ unmergable_rules = CssParser::Parser.new
163
+
164
+ # Give all styles already in style attributes a specificity of 1000
165
+ # per http://www.w3.org/TR/CSS21/cascade.html#specificity
166
+ doc.search("*[@style]").each do |el|
167
+ el['style'] = '[SPEC=1000[' + el.attributes['style'] + ']]'
168
+ end
169
+
170
+ # Iterate through the rules and merge them into the HTML
171
+ @css_parser.each_selector(:all) do |selector, declaration, specificity|
172
+ # Save un-mergable rules separately
173
+ selector.gsub!(/:link([\s]|$)+/i, '')
174
+
175
+ # Convert element names to lower case
176
+ selector.gsub!(/([\s]|^)([\w]+)/) {|m| $1.to_s + $2.to_s.downcase }
177
+
178
+ if selector =~ RE_UNMERGABLE_SELECTORS
179
+ unmergable_rules.add_rule_set!(RuleSet.new(selector, declaration))
180
+ else
181
+
182
+ doc.css(selector).each do |el|
183
+ if el.elem?
184
+ # Add a style attribute or append to the existing one
185
+ block = "[SPEC=#{specificity}[#{declaration}]]"
186
+ el['style'] = (el.attributes['style'].to_s ||= '') + ' ' + block
187
+ end
188
+ end
189
+ end
190
+ end
191
+
192
+ # Read STYLE attributes and perform folding
193
+ doc.search("*[@style]").each do |el|
194
+ style = el.attributes['style'].to_s
195
+
196
+ declarations = []
197
+
198
+ style.scan(/\[SPEC\=([\d]+)\[(.[^\]\]]*)\]\]/).each do |declaration|
199
+ rs = RuleSet.new(nil, declaration[1].to_s, declaration[0].to_i)
200
+ declarations << rs
201
+ end
202
+
203
+ # Perform style folding
204
+ merged = CssParser.merge(declarations)
205
+ merged.expand_shorthand!
206
+
207
+ #if @options[:prefer_cellpadding] and (el.name == 'td' or el.name == 'th') and el['cellpadding'].nil?
208
+ # if cellpadding = equivalent_cellpadding(merged)
209
+ # el['cellpadding'] = cellpadding
210
+ # merged['padding-left'] = nil
211
+ # merged['padding-right'] = nil
212
+ # merged['padding-top'] = nil
213
+ # merged['padding-bottom'] = nil
214
+ # end
215
+ #end
216
+
217
+ # Duplicate CSS attributes as HTML attributes
218
+ if RELATED_ATTRIBUTES.has_key?(el.name)
219
+ RELATED_ATTRIBUTES[el.name].each do |css_att, html_att|
220
+ el[html_att] = merged[css_att].gsub(/;$/, '').strip if el[html_att].nil? and not merged[css_att].empty?
221
+ end
222
+ end
223
+
224
+ merged.create_dimensions_shorthand!
225
+
226
+ # write the inline STYLE attribute
227
+ el['style'] = Premailer.escape_string(merged.declarations_to_s)
228
+ end
229
+
230
+ doc = write_unmergable_css_rules(doc, unmergable_rules)
231
+
232
+ doc.search('*').remove_class if @options[:remove_classes]
233
+
234
+ @processed_doc = doc
235
+
236
+ doc.to_html
237
+ end
238
+
239
+
240
+ protected
241
+ # Load the HTML file and convert it into an Nokogiri document.
242
+ #
243
+ # Returns an Nokogiri document and a string with the HTML file's character set.
244
+ def load_html(path) # :nodoc:
245
+ if @options[:inline]
246
+ Nokogiri::HTML(path)
247
+ else
248
+ if @is_local_file
249
+ if path.is_a?(IO) || path.is_a?(StringIO)
250
+ Nokogiri::HTML(path.read)
251
+ else
252
+ Nokogiri::HTML(File.open(path, "r") {|f| f.read })
253
+ end
254
+ else
255
+ Nokogiri::HTML(open(path))
256
+ end
257
+ end
258
+ end
259
+
260
+ def load_css_from_local_file!(path)
261
+ css_block = ''
262
+ begin
263
+ File.open(path, "r") do |file|
264
+ while line = file.gets
265
+ css_block << line
266
+ end
267
+ end
268
+ @css_parser.add_block!(css_block, {:base_uri => @base_url})
269
+ rescue; end
270
+ end
271
+
272
+ def load_css_from_options! # :nodoc:
273
+ @css_files.each do |css_file|
274
+ if Premailer.local_data?(css_file)
275
+ load_css_from_local_file!(css_file)
276
+ else
277
+ @css_parser.load_uri!(css_file)
278
+ end
279
+ end
280
+ end
281
+
282
+ # Load CSS included in <tt>style</tt> and <tt>link</tt> tags from an HTML document.
283
+ def load_css_from_html! # :nodoc:
284
+ if tags = @doc.search("link[@rel='stylesheet'], style")
285
+ tags.each do |tag|
286
+
287
+ if tag.to_s.strip =~ /^\<link/i and tag.attributes['href'] and media_type_ok?(tag.attributes['media'])
288
+
289
+ link_uri = Premailer.resolve_link(tag.attributes['href'].to_s, @html_file)
290
+ if Premailer.local_data?(link_uri)
291
+ puts "Loading css from local file: " + link_uri if @options[:verbose]
292
+ load_css_from_local_file!(link_uri)
293
+ else
294
+ puts "Loading css from uri: " + link_uri if @options[:verbose]
295
+ @css_parser.load_uri!(link_uri)
296
+ end
297
+
298
+ elsif tag.to_s.strip =~ /^\<style/i
299
+ if @html_file.is_a?(IO) || @html_file.is_a?(StringIO)
300
+ @css_parser.add_block!(tag.inner_html)
301
+ else
302
+ @css_parser.add_block!(tag.inner_html, :base_uri => URI.parse(@html_file))
303
+ end
304
+ end
305
+ end
306
+ tags.remove
307
+ end
308
+ end
309
+
310
+ def media_type_ok?(media_types) # :nodoc:
311
+ return true if media_types.nil? or media_types.empty?
312
+ return media_types.split(/[\s]+|,/).any? { |media_type| media_type.strip =~ /screen|handheld|all/i }
313
+ rescue
314
+ return true
315
+ end
316
+
317
+ # Create a <tt>style</tt> element with un-mergable rules (e.g. <tt>:hover</tt>)
318
+ # and write it into the <tt>body</tt>.
319
+ #
320
+ # <tt>doc</tt> is an Nokogiri document and <tt>unmergable_css_rules</tt> is a Css::RuleSet.
321
+ #
322
+ # Returns an Nokogiri document.
323
+ def write_unmergable_css_rules(doc, unmergable_rules) # :nodoc:
324
+ styles = ''
325
+ unmergable_rules.each_selector(:all, :force_important => true) do |selector, declarations, specificity|
326
+ styles += "#{selector} { #{declarations} }\n"
327
+ end
328
+
329
+ unless styles.empty?
330
+ style_tag = "\n<style type=\"text/css\">\n#{styles}</style>\n"
331
+ doc.css("head").children.last.after(style_tag)
332
+ end
333
+ doc
334
+ end
335
+
336
+ # Convert relative links to absolute links.
337
+ #
338
+ # Processes <tt>href</tt> <tt>src</tt> and <tt>background</tt> attributes
339
+ # as well as CSS <tt>url()</tt> declarations found in inline <tt>style</tt> attributes.
340
+ #
341
+ # <tt>doc</tt> is an Nokogiri document and <tt>base_uri</tt> is either a string or a URI.
342
+ #
343
+ # Returns an Nokogiri document.
344
+ def convert_inline_links(doc, base_uri) # :nodoc:
345
+ base_uri = URI.parse(base_uri) unless base_uri.kind_of?(URI)
346
+
347
+ append_qs = @options[:link_query_string] ||= ''
348
+
349
+ ['href', 'src', 'background'].each do |attribute|
350
+ tags = doc.search("*[@#{attribute}]")
351
+
352
+ next if tags.empty?
353
+
354
+ tags.each do |tag|
355
+ # skip links that look like they have merge tags
356
+ # and mailto, ftp, etc...
357
+ if tag.attributes[attribute] =~ /^(\{|\[|<|\#|mailto:|ftp:|gopher:)/i
358
+ next
359
+ end
360
+
361
+ if tag.attributes[attribute] =~ /^http/i
362
+ begin
363
+ merged = URI.parse(tag.attributes[attribute])
364
+ rescue; next; end
365
+ else
366
+ begin
367
+ merged = Premailer.resolve_link(tag.attributes[attribute].to_s, base_uri)
368
+ rescue
369
+ begin
370
+ merged = Premailer.resolve_link(URI.escape(tag.attributes[attribute].to_s), base_uri)
371
+ rescue; end
372
+ end
373
+ end
374
+
375
+ # make sure 'merged' is a URI
376
+ merged = URI.parse(merged.to_s) unless merged.kind_of?(URI)
377
+
378
+ # only append a querystring to <a> tags
379
+ if tag.name =~ /^a$/i and not append_qs.empty?
380
+ if merged.query
381
+ merged.query = merged.query + '&' + append_qs
382
+ else
383
+ merged.query = append_qs
384
+ end
385
+ end
386
+ tag[attribute] = merged.to_s
387
+
388
+ end # end of each tag
389
+ end # end of each attrs
390
+
391
+ doc.search("*[@style]").each do |el|
392
+ el['style'] = CssParser.convert_uris(el.attributes['style'].to_s, base_uri)
393
+ end
394
+ doc
395
+ end
396
+
397
+ # here be deprecated methods
398
+ public
399
+
400
+ def local_uri?(uri) # :nodoc:
401
+ warn "[DEPRECATION] `local_uri?` is deprecated. Please use `Premailer.local_data?` instead."
402
+ Premailer.local_data?(uri)
403
+ end
404
+
405
+ # here be instance methods
406
+
407
+ def self.escape_string(str) # :nodoc:
408
+ str.gsub(/"/, "'")
409
+ end
410
+
411
+ def self.resolve_link(path, base_path) # :nodoc:
412
+ path.strip!
413
+ resolved = nil
414
+ if path =~ /(http[s]?|ftp):\/\//i
415
+ resolved = path
416
+ return Premailer.canonicalize(resolved)
417
+ elsif base_path.kind_of?(URI)
418
+ resolved = base_path.merge(path)
419
+ return Premailer.canonicalize(resolved)
420
+ elsif base_path.kind_of?(String) and base_path =~ /^(http[s]?|ftp):\/\//i
421
+ resolved = URI.parse(base_path)
422
+ resolved = resolved.merge(path)
423
+ return Premailer.canonicalize(resolved)
424
+ else
425
+
426
+ return File.expand_path(path, File.dirname(base_path))
427
+ end
428
+ end
429
+
430
+ # Test the passed variable to see if we are in local or remote mode.
431
+ #
432
+ # IO objects return true, as do strings that look like URLs.
433
+ def self.local_data?(data)
434
+ if data.is_a?(IO) || data.is_a?(StringIO)
435
+ return true
436
+ elsif data =~ /^(http|https|ftp)\:\/\//i
437
+ return false
438
+ else
439
+ return true
440
+ end
441
+ end
442
+
443
+ # from http://www.ruby-forum.com/topic/140101
444
+ def self.canonicalize(uri) # :nodoc:
445
+ u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
446
+ u.normalize!
447
+ newpath = u.path
448
+ while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
449
+ $1 == '..' ? match : ''
450
+ } do end
451
+ newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
452
+ u.path = newpath
453
+ u.to_s
454
+ end
455
+
456
+ # Check <tt>CLIENT_SUPPORT_FILE</tt> for any CSS warnings
457
+ def check_client_support # :nodoc:
458
+ @client_support = @client_support ||= YAML::load(File.open(CLIENT_SUPPORT_FILE))
459
+
460
+ warnings = []
461
+ properties = []
462
+
463
+ # Get a list off CSS properties
464
+ @processed_doc.search("*[@style]").each do |el|
465
+ style_url = el.attributes['style'].to_s.gsub(/([\w\-]+)[\s]*\:/i) do |s|
466
+ properties.push($1)
467
+ end
468
+ end
469
+
470
+ properties.uniq!
471
+
472
+ property_support = @client_support['css_properties']
473
+ properties.each do |prop|
474
+ if property_support.include?(prop) and
475
+ property_support[prop].include?('support') and
476
+ property_support[prop]['support'] >= @options[:warn_level]
477
+ warnings.push({:message => "#{prop} CSS property",
478
+ :level => WARN_LABEL[property_support[prop]['support']],
479
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
480
+ end
481
+ end
482
+
483
+ @client_support['attributes'].each do |attribute, data|
484
+ next unless data['support'] >= @options[:warn_level]
485
+ if @doc.search("*[@#{attribute}]").length > 0
486
+ warnings.push({:message => "#{attribute} HTML attribute",
487
+ :level => WARN_LABEL[property_support[prop]['support']],
488
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
489
+ end
490
+ end
491
+
492
+ @client_support['elements'].each do |element, data|
493
+ next unless data['support'] >= @options[:warn_level]
494
+ if @doc.search("element").length > 0
495
+ warnings.push({:message => "#{element} HTML element",
496
+ :level => WARN_LABEL[property_support[prop]['support']],
497
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
498
+ end
499
+ end
500
+
501
+ return warnings
502
+ end
503
+ end
504
+
505
+
506
+