akzhan-premailer 1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/init.rb ADDED
@@ -0,0 +1 @@
1
+ require 'premailer'
@@ -0,0 +1,6 @@
1
+ require 'yaml'
2
+ require 'open-uri'
3
+ require 'nokogiri'
4
+ require 'css_parser'
5
+ require 'premailer/html_to_plain_text'
6
+ require 'premailer/premailer'
@@ -0,0 +1,81 @@
1
+ # coding: utf-8
2
+ require 'htmlentities'
3
+
4
+ # Support functions for Premailer
5
+ module HtmlToPlainText
6
+
7
+ # Returns the text in UTF-8 format with all HTML tags removed
8
+ #
9
+ # TODO:
10
+ # - add support for DL, OL
11
+ def convert_to_text(html, line_length = 65, from_charset = 'UTF-8')
12
+ #r = Text::Reform.new(:trim => true,
13
+ # :squeeze => false,
14
+ # :break => Text::Reform.break_wrap)
15
+
16
+ txt = html
17
+
18
+ # decode HTML entities
19
+ he = HTMLEntities.new
20
+ txt = he.decode(txt)
21
+
22
+ # handle headings (H1-H6)
23
+ txt.gsub!(/(<\/h[1-6]>)/i, "\n\\1") # move closing tags to new lines
24
+ txt.gsub!(/[\s]*<h([1-6]+)[^>]*>[\s]*(.*)[\s]*<\/h[1-6]+>/i) do |s|
25
+ hlevel = $1.to_i
26
+
27
+ htext = $2
28
+ htext.gsub!(/<br[\s]*\/?>/i, "\n") # handle <br>s
29
+ htext.gsub!(/<\/?[^>]*>/i, '') # strip tags
30
+
31
+ # determine maximum line length
32
+ hlength = 0
33
+ htext.each_line { |l| llength = l.strip.length; hlength = llength if llength > hlength }
34
+ hlength = line_length if hlength > line_length
35
+
36
+ case hlevel
37
+ when 1 # H1, asterisks above and below
38
+ htext = ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength)
39
+ when 2 # H1, dashes above and below
40
+ htext = ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength)
41
+ else # H3-H6, dashes below
42
+ htext = htext + "\n" + ('-' * hlength)
43
+ end
44
+
45
+ "\n\n" + htext + "\n\n"
46
+ end
47
+
48
+ # links
49
+ txt.gsub!(/<a.*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/i) do |s|
50
+ $2.strip + ' ( ' + $1.strip + ' )'
51
+ end
52
+
53
+ # lists -- TODO: should handle ordered lists
54
+ txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ')
55
+ # list not followed by a newline
56
+ txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n")
57
+
58
+ # paragraphs and line breaks
59
+ txt.gsub!(/<\/p>/i, "\n\n")
60
+ txt.gsub!(/<br[\/ ]*>/i, "\n")
61
+
62
+ # strip remaining tags
63
+ txt.gsub!(/<\/?[^>]*>/, '')
64
+
65
+ # wrap text
66
+ #txt = r.format(('[' * line_length), txt)
67
+
68
+ # remove linefeeds (\r\n and \r -> \n)
69
+ txt.gsub!(/\r\n?/, "\n")
70
+
71
+ # strip extra spaces
72
+ txt.gsub!(/\302\240+/, " ") # non-breaking spaces -> spaces
73
+ txt.gsub!(/\n[ \t]+/, "\n") # space at start of lines
74
+ txt.gsub!(/[ \t]+\n/, "\n") # space at end of lines
75
+
76
+ # no more than two consecutive newlines
77
+ txt.gsub!(/[\n]{3,}/, "\n\n")
78
+
79
+ txt.strip
80
+ end
81
+ end
@@ -0,0 +1,506 @@
1
+ #!/usr/bin/ruby
2
+ #
3
+ # Premailer by Alex Dunae (dunae.ca, e-mail 'code' at the same domain), 2008-09
4
+ #
5
+ # Premailer processes HTML and CSS to improve e-mail deliverability.
6
+ #
7
+ # Premailer's main function is to render all CSS as inline <tt>style</tt>
8
+ # attributes. It also converts relative links to absolute links and checks
9
+ # the 'safety' of CSS properties against a CSS support chart.
10
+ #
11
+ # = Example
12
+ # premailer = Premailer.new('http://example.com/myfile.html', :warn_level => Premailer::Warnings::SAFE)
13
+ #
14
+ # # Write the HTML output
15
+ # fout = File.open("output.html", "w")
16
+ # fout.puts premailer.to_inline_css
17
+ # fout.close
18
+ #
19
+ # # Write the plain-text output
20
+ # fout = File.open("ouput.txt", "w")
21
+ # fout.puts premailer.to_plain_text
22
+ # fout.close
23
+ #
24
+ # # List any CSS warnings
25
+ # puts premailer.warnings.length.to_s + ' warnings found'
26
+ # premailer.warnings.each do |w|
27
+ # puts "#{w[:message]} (#{w[:level]}) may not render properly in #{w[:clients]}"
28
+ # end
29
+ #
30
+ # premailer = Premailer.new(html_file, :warn_level => Premailer::Warnings::SAFE)
31
+ # puts premailer.to_inline_css
32
+ class Premailer
33
+ include HtmlToPlainText
34
+ include CssParser
35
+
36
+ VERSION = '1.5.5'
37
+
38
+ CLIENT_SUPPORT_FILE = File.dirname(__FILE__) + '/../../misc/client_support.yaml'
39
+
40
+ RE_UNMERGABLE_SELECTORS = /(\:(visited|active|hover|focus|after|before|selection|target|first\-(line|letter))|^\@)/i
41
+
42
+ # list of CSS attributes that can be rendered as HTML attributes
43
+ #
44
+ # TODO: too much repetition
45
+ # TODO: background=""
46
+ RELATED_ATTRIBUTES = {
47
+ 'h1' => {'text-align' => 'align'},
48
+ 'h2' => {'text-align' => 'align'},
49
+ 'h3' => {'text-align' => 'align'},
50
+ 'h4' => {'text-align' => 'align'},
51
+ 'h5' => {'text-align' => 'align'},
52
+ 'h6' => {'text-align' => 'align'},
53
+ 'p' => {'text-align' => 'align'},
54
+ 'div' => {'text-align' => 'align'},
55
+ 'blockquote' => {'text-align' => 'align'},
56
+ 'body' => {'background-color' => 'bgcolor'},
57
+ 'table' => {'background-color' => 'bgcolor'},
58
+ 'tr' => {'text-align' => 'align', 'background-color' => 'bgcolor'},
59
+ 'th' => {'text-align' => 'align', 'background-color' => 'bgcolor', 'vertical-align' => 'valign'},
60
+ 'td' => {'text-align' => 'align', 'background-color' => 'bgcolor', 'vertical-align' => 'valign'},
61
+ 'img' => {'float' => 'align'}
62
+ }
63
+
64
+ # URI of the HTML file used
65
+ attr_reader :html_file
66
+
67
+ # processed HTML document (Nokogiri)
68
+ attr_reader :processed_doc
69
+
70
+ # source HTML document (Nokogiri)
71
+ attr_reader :doc
72
+
73
+ module Warnings
74
+ NONE = 0
75
+ SAFE = 1
76
+ POOR = 2
77
+ RISKY = 3
78
+ end
79
+ include Warnings
80
+
81
+ WARN_LABEL = %w(NONE SAFE POOR RISKY)
82
+
83
+ # Create a new Premailer object.
84
+ #
85
+ # +html+ is the HTML data to process. Can be either an IO object, the URL of a
86
+ # remote file or a local path.
87
+ #
88
+ # ==== Options
89
+ # [+line_length+] Line length used by to_plain_text. Boolean, default is 65.
90
+ # [+warn_level+] What level of CSS compatibility warnings to show (see Warnings).
91
+ # [+link_query_string+] A string to append to every <a href=""> link. Do not include the initial +?+.
92
+ # [+base_url+] Used to calculate absolute URLs for local files.
93
+ # [+css+] Manually specify a CSS stylesheet.
94
+ # [+css_to_attributes+] Copy related CSS attributes into HTML attributes (e.g. +background-color+ to +bgcolor+)
95
+ def initialize(html, options = {})
96
+ @options = {:warn_level => Warnings::SAFE,
97
+ :line_length => 65,
98
+ :link_query_string => nil,
99
+ :base_url => nil,
100
+ :remove_classes => false,
101
+ :css => [],
102
+ :css_to_attributes => true,
103
+ :verbose => false,
104
+ :io_exceptions => false}.merge(options)
105
+ @html_file = html
106
+
107
+ @is_local_file = Premailer.local_data?(html)
108
+
109
+ @css_files = @options[:css]
110
+
111
+ @css_warnings = []
112
+
113
+ if @is_local_file and @options[:base_url]
114
+ @base_url = @options[:base_url]
115
+ elsif not @is_local_file
116
+ @html_file
117
+ end
118
+ @css_parser = CssParser::Parser.new({
119
+ :absolute_paths => true,
120
+ :import => true,
121
+ :io_exceptions => @options[:io_exceptions]
122
+ })
123
+
124
+ @doc = load_html(@html_file)
125
+ @html_charset = @doc.encoding
126
+ @processed_doc = @doc
127
+ @processed_doc = convert_inline_links(@processed_doc, @base_url) if @base_url
128
+ load_css_from_options!
129
+ load_css_from_html!
130
+ end
131
+
132
+ # Array containing a hash of CSS warnings.
133
+ def warnings
134
+ return [] if @options[:warn_level] == Warnings::NONE
135
+ @css_warnings = check_client_support if @css_warnings.empty?
136
+ @css_warnings
137
+ end
138
+
139
+ # Returns the original HTML as a string.
140
+ def to_s
141
+ @doc.to_html
142
+ end
143
+
144
+ # Converts the HTML document to a format suitable for plain-text e-mail.
145
+ #
146
+ # Returns a string.
147
+ def to_plain_text
148
+ html_src = ''
149
+ begin
150
+ html_src = @doc.search("body").inner_html
151
+ rescue
152
+ html_src = @doc.to_html
153
+ end
154
+ convert_to_text(html_src, @options[:line_length], @html_charset)
155
+ end
156
+
157
+ # Merge CSS into the HTML document.
158
+ #
159
+ # Returns a string.
160
+ def to_inline_css
161
+ doc = @processed_doc
162
+ unmergable_rules = CssParser::Parser.new
163
+
164
+ # Give all styles already in style attributes a specificity of 1000
165
+ # per http://www.w3.org/TR/CSS21/cascade.html#specificity
166
+ doc.search("*[@style]").each do |el|
167
+ el['style'] = '[SPEC=1000[' + el.attributes['style'] + ']]'
168
+ end
169
+
170
+ # Iterate through the rules and merge them into the HTML
171
+ @css_parser.each_selector(:all) do |selector, declaration, specificity|
172
+ # Save un-mergable rules separately
173
+ selector.gsub!(/:link([\s]|$)+/i, '')
174
+
175
+ # Convert element names to lower case
176
+ selector.gsub!(/([\s]|^)([\w]+)/) {|m| $1.to_s + $2.to_s.downcase }
177
+
178
+ if selector =~ RE_UNMERGABLE_SELECTORS
179
+ unmergable_rules.add_rule_set!(RuleSet.new(selector, declaration))
180
+ else
181
+
182
+ doc.css(selector).each do |el|
183
+ if el.elem?
184
+ # Add a style attribute or append to the existing one
185
+ block = "[SPEC=#{specificity}[#{declaration}]]"
186
+ el['style'] = (el.attributes['style'].to_s ||= '') + ' ' + block
187
+ end
188
+ end
189
+ end
190
+ end
191
+
192
+ # Read STYLE attributes and perform folding
193
+ doc.search("*[@style]").each do |el|
194
+ style = el.attributes['style'].to_s
195
+
196
+ declarations = []
197
+
198
+ style.scan(/\[SPEC\=([\d]+)\[(.[^\]\]]*)\]\]/).each do |declaration|
199
+ rs = RuleSet.new(nil, declaration[1].to_s, declaration[0].to_i)
200
+ declarations << rs
201
+ end
202
+
203
+ # Perform style folding
204
+ merged = CssParser.merge(declarations)
205
+ merged.expand_shorthand!
206
+
207
+ #if @options[:prefer_cellpadding] and (el.name == 'td' or el.name == 'th') and el['cellpadding'].nil?
208
+ # if cellpadding = equivalent_cellpadding(merged)
209
+ # el['cellpadding'] = cellpadding
210
+ # merged['padding-left'] = nil
211
+ # merged['padding-right'] = nil
212
+ # merged['padding-top'] = nil
213
+ # merged['padding-bottom'] = nil
214
+ # end
215
+ #end
216
+
217
+ # Duplicate CSS attributes as HTML attributes
218
+ if RELATED_ATTRIBUTES.has_key?(el.name)
219
+ RELATED_ATTRIBUTES[el.name].each do |css_att, html_att|
220
+ el[html_att] = merged[css_att].gsub(/;$/, '').strip if el[html_att].nil? and not merged[css_att].empty?
221
+ end
222
+ end
223
+
224
+ merged.create_dimensions_shorthand!
225
+
226
+ # write the inline STYLE attribute
227
+ el['style'] = Premailer.escape_string(merged.declarations_to_s)
228
+ end
229
+
230
+ doc = write_unmergable_css_rules(doc, unmergable_rules)
231
+
232
+ doc.search('*').remove_class if @options[:remove_classes]
233
+
234
+ @processed_doc = doc
235
+
236
+ doc.to_html
237
+ end
238
+
239
+
240
+ protected
241
+ # Load the HTML file and convert it into an Nokogiri document.
242
+ #
243
+ # Returns an Nokogiri document and a string with the HTML file's character set.
244
+ def load_html(path) # :nodoc:
245
+ if @options[:inline]
246
+ Nokogiri::HTML(path)
247
+ else
248
+ if @is_local_file
249
+ if path.is_a?(IO) || path.is_a?(StringIO)
250
+ Nokogiri::HTML(path.read)
251
+ else
252
+ Nokogiri::HTML(File.open(path, "r") {|f| f.read })
253
+ end
254
+ else
255
+ Nokogiri::HTML(open(path))
256
+ end
257
+ end
258
+ end
259
+
260
+ def load_css_from_local_file!(path)
261
+ css_block = ''
262
+ begin
263
+ File.open(path, "r") do |file|
264
+ while line = file.gets
265
+ css_block << line
266
+ end
267
+ end
268
+ @css_parser.add_block!(css_block, {:base_uri => @base_url})
269
+ rescue; end
270
+ end
271
+
272
+ def load_css_from_options! # :nodoc:
273
+ @css_files.each do |css_file|
274
+ if Premailer.local_data?(css_file)
275
+ load_css_from_local_file!(css_file)
276
+ else
277
+ @css_parser.load_uri!(css_file)
278
+ end
279
+ end
280
+ end
281
+
282
+ # Load CSS included in <tt>style</tt> and <tt>link</tt> tags from an HTML document.
283
+ def load_css_from_html! # :nodoc:
284
+ if tags = @doc.search("link[@rel='stylesheet'], style")
285
+ tags.each do |tag|
286
+
287
+ if tag.to_s.strip =~ /^\<link/i and tag.attributes['href'] and media_type_ok?(tag.attributes['media'])
288
+
289
+ link_uri = Premailer.resolve_link(tag.attributes['href'].to_s, @html_file)
290
+ if Premailer.local_data?(link_uri)
291
+ puts "Loading css from local file: " + link_uri if @options[:verbose]
292
+ load_css_from_local_file!(link_uri)
293
+ else
294
+ puts "Loading css from uri: " + link_uri if @options[:verbose]
295
+ @css_parser.load_uri!(link_uri)
296
+ end
297
+
298
+ elsif tag.to_s.strip =~ /^\<style/i
299
+ if @html_file.is_a?(IO) || @html_file.is_a?(StringIO)
300
+ @css_parser.add_block!(tag.inner_html)
301
+ else
302
+ @css_parser.add_block!(tag.inner_html, :base_uri => URI.parse(@html_file))
303
+ end
304
+ end
305
+ end
306
+ tags.remove
307
+ end
308
+ end
309
+
310
+ def media_type_ok?(media_types) # :nodoc:
311
+ return true if media_types.nil? or media_types.empty?
312
+ return media_types.split(/[\s]+|,/).any? { |media_type| media_type.strip =~ /screen|handheld|all/i }
313
+ rescue
314
+ return true
315
+ end
316
+
317
+ # Create a <tt>style</tt> element with un-mergable rules (e.g. <tt>:hover</tt>)
318
+ # and write it into the <tt>body</tt>.
319
+ #
320
+ # <tt>doc</tt> is an Nokogiri document and <tt>unmergable_css_rules</tt> is a Css::RuleSet.
321
+ #
322
+ # Returns an Nokogiri document.
323
+ def write_unmergable_css_rules(doc, unmergable_rules) # :nodoc:
324
+ styles = ''
325
+ unmergable_rules.each_selector(:all, :force_important => true) do |selector, declarations, specificity|
326
+ styles += "#{selector} { #{declarations} }\n"
327
+ end
328
+
329
+ unless styles.empty?
330
+ style_tag = "\n<style type=\"text/css\">\n#{styles}</style>\n"
331
+ doc.css("head").children.last.after(style_tag)
332
+ end
333
+ doc
334
+ end
335
+
336
+ # Convert relative links to absolute links.
337
+ #
338
+ # Processes <tt>href</tt> <tt>src</tt> and <tt>background</tt> attributes
339
+ # as well as CSS <tt>url()</tt> declarations found in inline <tt>style</tt> attributes.
340
+ #
341
+ # <tt>doc</tt> is an Nokogiri document and <tt>base_uri</tt> is either a string or a URI.
342
+ #
343
+ # Returns an Nokogiri document.
344
+ def convert_inline_links(doc, base_uri) # :nodoc:
345
+ base_uri = URI.parse(base_uri) unless base_uri.kind_of?(URI)
346
+
347
+ append_qs = @options[:link_query_string] ||= ''
348
+
349
+ ['href', 'src', 'background'].each do |attribute|
350
+ tags = doc.search("*[@#{attribute}]")
351
+
352
+ next if tags.empty?
353
+
354
+ tags.each do |tag|
355
+ # skip links that look like they have merge tags
356
+ # and mailto, ftp, etc...
357
+ if tag.attributes[attribute] =~ /^(\{|\[|<|\#|mailto:|ftp:|gopher:)/i
358
+ next
359
+ end
360
+
361
+ if tag.attributes[attribute] =~ /^http/i
362
+ begin
363
+ merged = URI.parse(tag.attributes[attribute])
364
+ rescue; next; end
365
+ else
366
+ begin
367
+ merged = Premailer.resolve_link(tag.attributes[attribute].to_s, base_uri)
368
+ rescue
369
+ begin
370
+ merged = Premailer.resolve_link(URI.escape(tag.attributes[attribute].to_s), base_uri)
371
+ rescue; end
372
+ end
373
+ end
374
+
375
+ # make sure 'merged' is a URI
376
+ merged = URI.parse(merged.to_s) unless merged.kind_of?(URI)
377
+
378
+ # only append a querystring to <a> tags
379
+ if tag.name =~ /^a$/i and not append_qs.empty?
380
+ if merged.query
381
+ merged.query = merged.query + '&' + append_qs
382
+ else
383
+ merged.query = append_qs
384
+ end
385
+ end
386
+ tag[attribute] = merged.to_s
387
+
388
+ end # end of each tag
389
+ end # end of each attrs
390
+
391
+ doc.search("*[@style]").each do |el|
392
+ el['style'] = CssParser.convert_uris(el.attributes['style'].to_s, base_uri)
393
+ end
394
+ doc
395
+ end
396
+
397
+ # here be deprecated methods
398
+ public
399
+
400
+ def local_uri?(uri) # :nodoc:
401
+ warn "[DEPRECATION] `local_uri?` is deprecated. Please use `Premailer.local_data?` instead."
402
+ Premailer.local_data?(uri)
403
+ end
404
+
405
+ # here be instance methods
406
+
407
+ def self.escape_string(str) # :nodoc:
408
+ str.gsub(/"/, "'")
409
+ end
410
+
411
+ def self.resolve_link(path, base_path) # :nodoc:
412
+ path.strip!
413
+ resolved = nil
414
+ if path =~ /(http[s]?|ftp):\/\//i
415
+ resolved = path
416
+ return Premailer.canonicalize(resolved)
417
+ elsif base_path.kind_of?(URI)
418
+ resolved = base_path.merge(path)
419
+ return Premailer.canonicalize(resolved)
420
+ elsif base_path.kind_of?(String) and base_path =~ /^(http[s]?|ftp):\/\//i
421
+ resolved = URI.parse(base_path)
422
+ resolved = resolved.merge(path)
423
+ return Premailer.canonicalize(resolved)
424
+ else
425
+
426
+ return File.expand_path(path, File.dirname(base_path))
427
+ end
428
+ end
429
+
430
+ # Test the passed variable to see if we are in local or remote mode.
431
+ #
432
+ # IO objects return true, as do strings that look like URLs.
433
+ def self.local_data?(data)
434
+ if data.is_a?(IO) || data.is_a?(StringIO)
435
+ return true
436
+ elsif data =~ /^(http|https|ftp)\:\/\//i
437
+ return false
438
+ else
439
+ return true
440
+ end
441
+ end
442
+
443
+ # from http://www.ruby-forum.com/topic/140101
444
+ def self.canonicalize(uri) # :nodoc:
445
+ u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
446
+ u.normalize!
447
+ newpath = u.path
448
+ while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
449
+ $1 == '..' ? match : ''
450
+ } do end
451
+ newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
452
+ u.path = newpath
453
+ u.to_s
454
+ end
455
+
456
+ # Check <tt>CLIENT_SUPPORT_FILE</tt> for any CSS warnings
457
+ def check_client_support # :nodoc:
458
+ @client_support = @client_support ||= YAML::load(File.open(CLIENT_SUPPORT_FILE))
459
+
460
+ warnings = []
461
+ properties = []
462
+
463
+ # Get a list off CSS properties
464
+ @processed_doc.search("*[@style]").each do |el|
465
+ style_url = el.attributes['style'].to_s.gsub(/([\w\-]+)[\s]*\:/i) do |s|
466
+ properties.push($1)
467
+ end
468
+ end
469
+
470
+ properties.uniq!
471
+
472
+ property_support = @client_support['css_properties']
473
+ properties.each do |prop|
474
+ if property_support.include?(prop) and
475
+ property_support[prop].include?('support') and
476
+ property_support[prop]['support'] >= @options[:warn_level]
477
+ warnings.push({:message => "#{prop} CSS property",
478
+ :level => WARN_LABEL[property_support[prop]['support']],
479
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
480
+ end
481
+ end
482
+
483
+ @client_support['attributes'].each do |attribute, data|
484
+ next unless data['support'] >= @options[:warn_level]
485
+ if @doc.search("*[@#{attribute}]").length > 0
486
+ warnings.push({:message => "#{attribute} HTML attribute",
487
+ :level => WARN_LABEL[property_support[prop]['support']],
488
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
489
+ end
490
+ end
491
+
492
+ @client_support['elements'].each do |element, data|
493
+ next unless data['support'] >= @options[:warn_level]
494
+ if @doc.search("element").length > 0
495
+ warnings.push({:message => "#{element} HTML element",
496
+ :level => WARN_LABEL[property_support[prop]['support']],
497
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
498
+ end
499
+ end
500
+
501
+ return warnings
502
+ end
503
+ end
504
+
505
+
506
+