podkot-premailer 1.7.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +11 -0
  3. data/.travis.yml +8 -0
  4. data/.yardopts +9 -0
  5. data/Gemfile +11 -0
  6. data/LICENSE.md +11 -0
  7. data/README.md +103 -0
  8. data/bin/premailer +7 -0
  9. data/init.rb +1 -0
  10. data/lib/premailer.rb +10 -0
  11. data/lib/premailer/adapter.rb +63 -0
  12. data/lib/premailer/adapter/hpricot.rb +199 -0
  13. data/lib/premailer/adapter/nokogiri.rb +223 -0
  14. data/lib/premailer/executor.rb +100 -0
  15. data/lib/premailer/html_to_plain_text.rb +102 -0
  16. data/lib/premailer/premailer.rb +550 -0
  17. data/lib/premailer/version.rb +4 -0
  18. data/local-premailer +9 -0
  19. data/misc/client_support.yaml +230 -0
  20. data/premailer.gemspec +26 -0
  21. data/rakefile.rb +71 -0
  22. data/test/files/base.html +142 -0
  23. data/test/files/chars.html +6 -0
  24. data/test/files/contact_bg.png +0 -0
  25. data/test/files/dialect.png +0 -0
  26. data/test/files/dots_end.png +0 -0
  27. data/test/files/dots_h.gif +0 -0
  28. data/test/files/html4.html +12 -0
  29. data/test/files/html_with_uri.html +9 -0
  30. data/test/files/import.css +13 -0
  31. data/test/files/inc/2009-placeholder.png +0 -0
  32. data/test/files/iso-8859-2.html +1 -0
  33. data/test/files/iso-8859-5.html +8 -0
  34. data/test/files/no_css.html +11 -0
  35. data/test/files/noimport.css +13 -0
  36. data/test/files/styles.css +106 -0
  37. data/test/files/xhtml.html +11 -0
  38. data/test/future_tests.rb +50 -0
  39. data/test/helper.rb +40 -0
  40. data/test/test_adapter.rb +29 -0
  41. data/test/test_html_to_plain_text.rb +155 -0
  42. data/test/test_links.rb +185 -0
  43. data/test/test_misc.rb +278 -0
  44. data/test/test_premailer.rb +277 -0
  45. data/test/test_warnings.rb +95 -0
  46. metadata +216 -0
@@ -0,0 +1,223 @@
1
+ require 'nokogiri'
2
+
3
+ class Premailer
4
+ module Adapter
5
+ # Nokogiri adapter
6
+ module Nokogiri
7
+
8
+ # Merge CSS into the HTML document.
9
+ #
10
+ # @return [String] an HTML.
11
+ def to_inline_css
12
+ doc = @processed_doc
13
+ @unmergable_rules = CssParser::Parser.new
14
+
15
+ # Give all styles already in style attributes a specificity of 1000
16
+ # per http://www.w3.org/TR/CSS21/cascade.html#specificity
17
+ doc.search("*[@style]").each do |el|
18
+ el['style'] = '[SPEC=1000[' + el.attributes['style'] + ']]'
19
+ end
20
+
21
+ # Iterate through the rules and merge them into the HTML
22
+ @css_parser.each_selector(:all) do |selector, declaration, specificity|
23
+ # Save un-mergable rules separately
24
+ selector.gsub!(/:link([\s]*)+/i) {|m| $1 }
25
+
26
+ # Convert element names to lower case
27
+ selector.gsub!(/([\s]|^)([\w]+)/) {|m| $1.to_s + $2.to_s.downcase }
28
+
29
+ if selector =~ Premailer::RE_UNMERGABLE_SELECTORS
30
+ @unmergable_rules.add_rule_set!(CssParser::RuleSet.new(selector, declaration)) unless @options[:preserve_styles]
31
+ else
32
+ begin
33
+ # Change single ID CSS selectors into xpath so that we can match more
34
+ # than one element. Added to work around dodgy generated code.
35
+ selector.gsub!(/\A\#([\w_\-]+)\Z/, '*[@id=\1]')
36
+
37
+ doc.search(selector).each do |el|
38
+ if el.elem? and (el.name != 'head' and el.parent.name != 'head')
39
+ # Add a style attribute or append to the existing one
40
+ block = "[SPEC=#{specificity}[#{declaration}]]"
41
+ el['style'] = (el.attributes['style'].to_s ||= '') + ' ' + block
42
+ end
43
+ end
44
+ rescue ::Nokogiri::SyntaxError, RuntimeError, ArgumentError
45
+ $stderr.puts "CSS syntax error with selector: #{selector}" if @options[:verbose]
46
+ next
47
+ end
48
+ end
49
+ end
50
+
51
+ # Remove script tags
52
+ if @options[:remove_scripts]
53
+ doc.search("script").remove
54
+ end
55
+
56
+ # Read STYLE attributes and perform folding
57
+ doc.search("*[@style]").each do |el|
58
+ style = el.attributes['style'].to_s
59
+
60
+ declarations = []
61
+ style.scan(/\[SPEC\=([\d]+)\[(.[^\]\]]*)\]\]/).each do |declaration|
62
+ rs = CssParser::RuleSet.new(nil, declaration[1].to_s, declaration[0].to_i)
63
+ declarations << rs
64
+ end
65
+
66
+ # Perform style folding
67
+ merged = CssParser.merge(declarations)
68
+ merged.expand_shorthand!
69
+
70
+ # Duplicate CSS attributes as HTML attributes
71
+ if Premailer::RELATED_ATTRIBUTES.has_key?(el.name)
72
+ Premailer::RELATED_ATTRIBUTES[el.name].each do |css_att, html_att|
73
+ el[html_att] = merged[css_att].gsub(/url\('(.*)'\)/,'\1').gsub(/;$/, '').strip if el[html_att].nil? and not merged[css_att].empty?
74
+ end
75
+ end
76
+
77
+ # write the inline STYLE attribute
78
+ el['style'] = Premailer.escape_string(merged.declarations_to_s).split(';').map(&:strip).sort.join('; ')
79
+ end
80
+
81
+ doc = write_unmergable_css_rules(doc, @unmergable_rules)
82
+
83
+ if @options[:remove_classes] or @options[:remove_comments]
84
+ doc.traverse do |el|
85
+ if el.comment? and @options[:remove_comments]
86
+ el.remove
87
+ elsif el.element?
88
+ el.remove_attribute('class') if @options[:remove_classes]
89
+ end
90
+ end
91
+ end
92
+
93
+ if @options[:remove_ids]
94
+ # find all anchor's targets and hash them
95
+ targets = []
96
+ doc.search("a[@href^='#']").each do |el|
97
+ target = el.get_attribute('href')[1..-1]
98
+ targets << target
99
+ el.set_attribute('href', "#" + Digest::MD5.hexdigest(target))
100
+ end
101
+ # hash ids that are links target, delete others
102
+ doc.search("*[@id]").each do |el|
103
+ id = el.get_attribute('id')
104
+ if targets.include?(id)
105
+ el.set_attribute('id', Digest::MD5.hexdigest(id))
106
+ else
107
+ el.remove_attribute('id')
108
+ end
109
+ end
110
+ end
111
+
112
+ @processed_doc = doc
113
+ if is_xhtml?
114
+ # we don't want to encode carriage returns
115
+ @processed_doc.to_xhtml(:encoding => nil).gsub(/&\#xD;/i, "\r")
116
+ else
117
+ @processed_doc.to_html
118
+ end
119
+ end
120
+
121
+ # Create a <tt>style</tt> element with un-mergable rules (e.g. <tt>:hover</tt>)
122
+ # and write it into the <tt>body</tt>.
123
+ #
124
+ # <tt>doc</tt> is an Nokogiri document and <tt>unmergable_css_rules</tt> is a Css::RuleSet.
125
+ #
126
+ # @return [::Nokogiri::XML] a document.
127
+ def write_unmergable_css_rules(doc, unmergable_rules) # :nodoc:
128
+ styles = ''
129
+ unmergable_rules.each_selector(:all, :force_important => true) do |selector, declarations, specificity|
130
+ styles += "#{selector} { #{declarations} }\n"
131
+ end
132
+
133
+ unless styles.empty?
134
+ style_tag = "<style type=\"text/css\">\n#{styles}></style>"
135
+ if head = doc.search('head')
136
+ doc.at_css('head').children.before(::Nokogiri::XML.fragment(style_tag))
137
+ elsif body = doc.search('body')
138
+ doc.at_css('body').children.before(::Nokogiri::XML.fragment(style_tag))
139
+ else
140
+ doc.inner_html = style_tag += doc.inner_html
141
+ end
142
+ end
143
+ doc
144
+ end
145
+
146
+
147
+ # Converts the HTML document to a format suitable for plain-text e-mail.
148
+ #
149
+ # If present, uses the <body> element as its base; otherwise uses the whole document.
150
+ #
151
+ # @return [String] a plain text.
152
+ def to_plain_text
153
+ html_src = ''
154
+ begin
155
+ html_src = @doc.at("body").inner_html
156
+ rescue; end
157
+
158
+ html_src = @doc.to_html unless html_src and not html_src.empty?
159
+ convert_to_text(html_src, @options[:line_length], @html_encoding)
160
+ end
161
+
162
+ # Gets the original HTML as a string.
163
+ # @return [String] HTML.
164
+ def to_s
165
+ if is_xhtml?
166
+ @doc.to_xhtml(:encoding => nil)
167
+ else
168
+ @doc.to_html(:encoding => nil)
169
+ end
170
+ end
171
+
172
+ # Load the HTML file and convert it into an Nokogiri document.
173
+ #
174
+ # @return [::Nokogiri::XML] a document.
175
+ def load_html(input) # :nodoc:
176
+ thing = nil
177
+
178
+ # TODO: duplicate options
179
+ if @options[:with_html_string] or @options[:inline] or input.respond_to?(:read)
180
+ thing = input
181
+ elsif @is_local_file
182
+ @base_dir = File.dirname(input)
183
+ thing = File.open(input, 'r')
184
+ else
185
+ thing = open(input)
186
+ end
187
+
188
+ if thing.respond_to?(:read)
189
+ thing = thing.read
190
+ end
191
+
192
+ return nil unless thing
193
+ doc = nil
194
+
195
+ # Handle HTML entities
196
+ if @options[:replace_html_entities] == true and thing.is_a?(String)
197
+ if RUBY_VERSION =~ /1.9/
198
+ html_entity_ruby_version = "1.9"
199
+ elsif RUBY_VERSION =~ /1.8/
200
+ html_entity_ruby_version = "1.8"
201
+ end
202
+ if html_entity_ruby_version
203
+ HTML_ENTITIES[html_entity_ruby_version].map do |entity, replacement|
204
+ thing.gsub! entity, replacement
205
+ end
206
+ end
207
+ end
208
+ # Default encoding is ASCII-8BIT (binary) per http://groups.google.com/group/nokogiri-talk/msg/0b81ef0dc180dc74
209
+ # However, we really don't want to hardcode this. ASCII-8BIG should be the default, but not the only option.
210
+ if thing.is_a?(String) and RUBY_VERSION =~ /1.9/
211
+ thing = thing.force_encoding(@options[:input_encoding]).encode!
212
+ doc = ::Nokogiri::HTML(thing, nil, @options[:input_encoding]) {|c| c.recover }
213
+ else
214
+ default_encoding = RUBY_PLATFORM == 'java' ? nil : 'BINARY'
215
+ doc = ::Nokogiri::HTML(thing, nil, @options[:input_encoding] || default_encoding) {|c| c.recover }
216
+ end
217
+
218
+ return doc
219
+ end
220
+
221
+ end
222
+ end
223
+ end
@@ -0,0 +1,100 @@
1
+ require 'optparse'
2
+ require 'premailer'
3
+
4
+ # defaults
5
+ options = {
6
+ :base_url => nil,
7
+ :link_query_string => nil,
8
+ :remove_classes => false,
9
+ :verbose => false,
10
+ :line_length => 65
11
+ }
12
+
13
+ mode = :html
14
+
15
+ opts = OptionParser.new do |opts|
16
+ opts.banner = "Improve the rendering of HTML emails by making CSS inline among other things. Takes a path to a local file, a URL or a pipe as input.\n\n"
17
+ opts.define_head "Usage: premailer <optional uri|optional path> [options]"
18
+ opts.separator ""
19
+ opts.separator "Examples:"
20
+ opts.separator " premailer http://example.com/ > out.html"
21
+ opts.separator " premailer http://example.com/ --mode txt > out.txt"
22
+ opts.separator " cat input.html | premailer -q src=email > out.html"
23
+ opts.separator " premailer ./public/index.html"
24
+ opts.separator ""
25
+ opts.separator "Options:"
26
+
27
+ opts.on("--mode MODE", [:html, :txt], "Output: html or txt") do |v|
28
+ mode = v
29
+ end
30
+
31
+ opts.on("-b", "--base-url STRING", String, "Base URL, useful for local files") do |v|
32
+ options[:base_url] = v
33
+ end
34
+
35
+ opts.on("-q", "--query-string STRING", String, "Query string to append to links") do |v|
36
+ options[:link_query_string] = v
37
+ end
38
+
39
+ opts.on("--css FILE,FILE", Array, "Additional CSS stylesheets") do |v|
40
+ options[:css] = v
41
+ end
42
+
43
+ opts.on("-r", "--remove-classes", "Remove HTML classes") do |v|
44
+ options[:remove_classes] = v
45
+ end
46
+
47
+ opts.on("-j", "--remove-scripts", "Remove <script> elements") do |v|
48
+ options[:remove_classes] = v
49
+ end
50
+
51
+ opts.on("-l", "--line-length N", Integer, "Line length for plaintext (default: #{options[:line_length].to_s})") do |v|
52
+ options[:line_length] = v
53
+ end
54
+
55
+ opts.on("-d", "--io-exceptions", "Abort on I/O errors") do |v|
56
+ options[:io_exceptions] = v
57
+ end
58
+
59
+ opts.on("-v", "--verbose", "Print additional information at runtime") do |v|
60
+ options[:verbose] = v
61
+ end
62
+
63
+ opts.on_tail("-?", "--help", "Show this message") do
64
+ puts opts
65
+ exit
66
+ end
67
+
68
+ opts.on_tail("-V", "--version", "Show version") do
69
+ puts "Premailer #{Premailer::VERSION} (c) 2008-2010 Alex Dunae"
70
+ exit
71
+ end
72
+ end
73
+ opts.parse!
74
+
75
+ $stderr.puts "Processing in #{mode} mode with options #{options.inspect}" if options[:verbose]
76
+
77
+ premailer = nil
78
+ input = nil
79
+
80
+ if $stdin.tty? or STDIN.fcntl(Fcntl::F_GETFL, 0) == 0
81
+ input = ARGV.shift
82
+ else
83
+ input = $stdin
84
+ options[:with_html_string] = true
85
+ end
86
+
87
+ if input
88
+ premailer = Premailer.new(input, options)
89
+ else
90
+ puts opts
91
+ exit 1
92
+ end
93
+
94
+ if mode == :txt
95
+ print premailer.to_plain_text
96
+ else
97
+ print premailer.to_inline_css
98
+ end
99
+
100
+ exit
@@ -0,0 +1,102 @@
1
+ # coding: utf-8
2
+ require 'htmlentities'
3
+
4
+ # Support functions for Premailer
5
+ module HtmlToPlainText
6
+
7
+ # Returns the text in UTF-8 format with all HTML tags removed
8
+ #
9
+ # TODO: add support for DL, OL
10
+ def convert_to_text(html, line_length = 65, from_charset = 'UTF-8')
11
+ txt = html
12
+
13
+ # decode HTML entities
14
+ he = HTMLEntities.new
15
+ txt = he.decode(txt)
16
+
17
+ # replace image by their alt attribute
18
+ txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\/>/i, '\1')
19
+ txt.gsub!(/<img.+?alt='([^\']*)\'[^>]*\/>/i, '\1')
20
+
21
+ # links
22
+ txt.gsub!(/<a.+?href=\"(mailto:)?([^\"]*)\"[^>]*>((.|\s)+?)<\/a>/i) do |s|
23
+ $3.strip + ' ( ' + $2.strip + ' )'
24
+ end
25
+
26
+ txt.gsub!(/<a.+?href='(mailto:)?([^\']*)\'[^>]*>((.|\s)+?)<\/a>/i) do |s|
27
+ $3.strip + ' ( ' + $2.strip + ' )'
28
+ end
29
+
30
+
31
+ # handle headings (H1-H6)
32
+ txt.gsub!(/(<\/h[1-6]>)/i, "\n\\1") # move closing tags to new lines
33
+ txt.gsub!(/[\s]*<h([1-6]+)[^>]*>[\s]*(.*)[\s]*<\/h[1-6]+>/i) do |s|
34
+ hlevel = $1.to_i
35
+
36
+ htext = $2
37
+ htext.gsub!(/<br[\s]*\/?>/i, "\n") # handle <br>s
38
+ htext.gsub!(/<\/?[^>]*>/i, '') # strip tags
39
+
40
+ # determine maximum line length
41
+ hlength = 0
42
+ htext.each_line { |l| llength = l.strip.length; hlength = llength if llength > hlength }
43
+ hlength = line_length if hlength > line_length
44
+
45
+ case hlevel
46
+ when 1 # H1, asterisks above and below
47
+ htext = ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength)
48
+ when 2 # H1, dashes above and below
49
+ htext = ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength)
50
+ else # H3-H6, dashes below
51
+ htext = htext + "\n" + ('-' * hlength)
52
+ end
53
+
54
+ "\n\n" + htext + "\n\n"
55
+ end
56
+
57
+ # wrap spans
58
+ txt.gsub!(/(<\/span>)[\s]+(<span)/mi, '\1 \2')
59
+
60
+ # lists -- TODO: should handle ordered lists
61
+ txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ')
62
+ # list not followed by a newline
63
+ txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n")
64
+
65
+ # paragraphs and line breaks
66
+ txt.gsub!(/<\/p>/i, "\n\n")
67
+ txt.gsub!(/<br[\/ ]*>/i, "\n")
68
+
69
+ # strip remaining tags
70
+ txt.gsub!(/<\/?[^>]*>/, '')
71
+
72
+ txt = word_wrap(txt, line_length)
73
+
74
+ # remove linefeeds (\r\n and \r -> \n)
75
+ txt.gsub!(/\r\n?/, "\n")
76
+
77
+ # strip extra spaces
78
+ txt.gsub!(/\302\240+/, " ") # non-breaking spaces -> spaces
79
+ txt.gsub!(/\n[ \t]+/, "\n") # space at start of lines
80
+ txt.gsub!(/[ \t]+\n/, "\n") # space at end of lines
81
+
82
+ # no more than two consecutive newlines
83
+ txt.gsub!(/[\n]{3,}/, "\n\n")
84
+
85
+ # no more than two consecutive spaces
86
+ txt.gsub!(/ {2,}/, " ")
87
+
88
+ # the word messes up the parens
89
+ txt.gsub!(/\([ \n](http[^)]+)[\n ]\)/) do |s|
90
+ "( " + $1 + " )"
91
+ end
92
+
93
+ txt.strip
94
+ end
95
+
96
+ # Taken from Rails' word_wrap helper (http://api.rubyonrails.org/classes/ActionView/Helpers/TextHelper.html#method-i-word_wrap)
97
+ def word_wrap(txt, line_length)
98
+ txt.split("\n").collect do |line|
99
+ line.length > line_length ? line.gsub(/(.{1,#{line_length}})(\s+|$)/, "\\1\n").strip : line
100
+ end * "\n"
101
+ end
102
+ end
@@ -0,0 +1,550 @@
1
+ # Premailer processes HTML and CSS to improve e-mail deliverability.
2
+ #
3
+ # Premailer's main function is to render all CSS as inline <tt>style</tt>
4
+ # attributes. It also converts relative links to absolute links and checks
5
+ # the 'safety' of CSS properties against a CSS support chart.
6
+ #
7
+ # ## Example of use
8
+ #
9
+ # ```ruby
10
+ # premailer = Premailer.new('http://example.com/myfile.html', :warn_level => Premailer::Warnings::SAFE)
11
+ #
12
+ # # Write the HTML output
13
+ # fout = File.open("output.html", "w")
14
+ # fout.puts premailer.to_inline_css
15
+ # fout.close
16
+ #
17
+ # # Write the plain-text output
18
+ # fout = File.open("ouput.txt", "w")
19
+ # fout.puts premailer.to_plain_text
20
+ # fout.close
21
+ #
22
+ # # List any CSS warnings
23
+ # puts premailer.warnings.length.to_s + ' warnings found'
24
+ # premailer.warnings.each do |w|
25
+ # puts "#{w[:message]} (#{w[:level]}) may not render properly in #{w[:clients]}"
26
+ # end
27
+ #
28
+ # premailer = Premailer.new(html_file, :warn_level => Premailer::Warnings::SAFE)
29
+ # puts premailer.to_inline_css
30
+ # ```
31
+ #
32
+ require 'premailer/version'
33
+
34
+ class Premailer
35
+ include HtmlToPlainText
36
+ include CssParser
37
+
38
+ CLIENT_SUPPORT_FILE = File.dirname(__FILE__) + '/../../misc/client_support.yaml'
39
+
40
+ # Unmergable selectors regexp.
41
+ RE_UNMERGABLE_SELECTORS = /(\:(visited|active|hover|focus|after|before|selection|target|first\-(line|letter))|^\@)/i
42
+ # Reset selectors regexp.
43
+ RE_RESET_SELECTORS = /^(\:\#outlook|body.*|\.ReadMsgBody|\.ExternalClass|img|\#backgroundTable)$/
44
+
45
+ # list of HTMLEntities to fix
46
+ # source: http://stackoverflow.com/questions/2812781/how-to-convert-webpage-apostrophe-8217-to-ascii-39-in-ruby-1-
47
+ HTML_ENTITIES = {
48
+ "1.8" => {
49
+ "\342\200\231" => "'",
50
+ "\342\200\246" => "...",
51
+ "\342\200\176" => "'",
52
+ "\342\200\177" => "'",
53
+ "\342\200\230" => "'",
54
+ "\342\200\231" => "'",
55
+ "\342\200\232" => ',',
56
+ "\342\200\233" => "'",
57
+ "\342\200\234" => '"',
58
+ "\342\200\235" => '"',
59
+ "\342\200\041" => '-',
60
+ "\342\200\174" => '-',
61
+ "\342\200\220" => '-',
62
+ "\342\200\223" => '-',
63
+ "\342\200\224" => '--',
64
+ "\342\200\225" => '--',
65
+ "\342\200\042" => '--'
66
+ },
67
+ "1.9" => {
68
+ "&#8217;" => "'",
69
+ "&#8230;" => "...",
70
+ "&#8216;" => "'",
71
+ "&#8218;" => ',',
72
+ "&#8219;" => "'",
73
+ "&#8220;" => '"',
74
+ "&#8221;" => '"',
75
+ "&#8208;" => '-',
76
+ "&#8211;" => '-',
77
+ "&#8212;" => '--',
78
+ "&#8213;" => '--'
79
+ }
80
+ }
81
+
82
+ # list of CSS attributes that can be rendered as HTML attributes
83
+ #
84
+ # @todo too much repetition
85
+ # @todo background=""
86
+ RELATED_ATTRIBUTES = {
87
+ 'h1' => {'text-align' => 'align'},
88
+ 'h2' => {'text-align' => 'align'},
89
+ 'h3' => {'text-align' => 'align'},
90
+ 'h4' => {'text-align' => 'align'},
91
+ 'h5' => {'text-align' => 'align'},
92
+ 'h6' => {'text-align' => 'align'},
93
+ 'p' => {'text-align' => 'align'},
94
+ 'div' => {'text-align' => 'align'},
95
+ 'blockquote' => {'text-align' => 'align'},
96
+ 'body' => {'background-color' => 'bgcolor'},
97
+ 'table' => {
98
+ 'background-color' => 'bgcolor',
99
+ 'background-image' => 'background',
100
+ '-premailer-width' => 'width',
101
+ '-premailer-height' => 'height',
102
+ '-premailer-cellpadding' => 'cellpadding',
103
+ '-premailer-cellspacing' => 'cellspacing',
104
+ },
105
+ 'tr' => {
106
+ 'text-align' => 'align',
107
+ 'background-color' => 'bgcolor',
108
+ '-premailer-height' => 'height'
109
+ },
110
+ 'th' => {
111
+ 'text-align' => 'align',
112
+ 'background-color' => 'bgcolor',
113
+ 'vertical-align' => 'valign',
114
+ '-premailer-width' => 'width',
115
+ '-premailer-height' => 'height'
116
+ },
117
+ 'td' => {
118
+ 'text-align' => 'align',
119
+ 'background-color' => 'bgcolor',
120
+ 'vertical-align' => 'valign',
121
+ '-premailer-width' => 'width',
122
+ '-premailer-height' => 'height'
123
+ },
124
+ 'img' => {'float' => 'align'}
125
+ }
126
+
127
+ # URI of the HTML file used
128
+ attr_reader :html_file
129
+
130
+ # base URL used to resolve links
131
+ attr_reader :base_url
132
+
133
+ # base directory used to resolve links for local files
134
+ # @return [String] base directory
135
+ attr_reader :base_dir
136
+
137
+ # unmergeable CSS rules to be preserved in the head (CssParser)
138
+ attr_reader :unmergable_rules
139
+
140
+ # processed HTML document (Hpricot/Nokogiri)
141
+ attr_reader :processed_doc
142
+
143
+ # source HTML document (Hpricot/Nokogiri)
144
+ attr_reader :doc
145
+
146
+ # Warning levels
147
+ module Warnings
148
+ # No warnings
149
+ NONE = 0
150
+ # Safe
151
+ SAFE = 1
152
+ # Poor
153
+ POOR = 2
154
+ # Risky
155
+ RISKY = 3
156
+ end
157
+ include Warnings
158
+
159
+ # Waning level names
160
+ WARN_LABEL = %w(NONE SAFE POOR RISKY)
161
+
162
+ # Create a new Premailer object.
163
+ #
164
+ # @param html is the HTML data to process. It can be either an IO object, the URL of a
165
+ # remote file, a local path or a raw HTML string. If passing an HTML string you
166
+ # must set the with_html_string option to true.
167
+ #
168
+ # @param [Hash] options the options to handle html with.
169
+ # @option options [FixNum] :line_length Line length used by to_plain_text. Default is 65.
170
+ # @option options [FixNum] :warn_level What level of CSS compatibility warnings to show (see {Premailer::Warnings}).
171
+ # @option options [String] :link_query_string A string to append to every <tt>a href=""</tt> link. Do not include the initial <tt>?</tt>.
172
+ # @option options [String] :base_url Used to calculate absolute URLs for local files.
173
+ # @option options [Array(String)] :css Manually specify CSS stylesheets.
174
+ # @option options [Boolean] :css_to_attributes Copy related CSS attributes into HTML attributes (e.g. background-color to bgcolor)
175
+ # @option options [String] :css_string Pass CSS as a string
176
+ # @option options [Boolean] :remove_ids Remove ID attributes whenever possible and convert IDs used as anchors to hashed to avoid collisions in webmail programs. Default is false.
177
+ # @option options [Boolean] :remove_classes Remove class attributes. Default is false.
178
+ # @option options [Boolean] :remove_comments Remove html comments. Default is false.
179
+ # @option options [Boolean] :remove_scripts Remove <tt>script</tt> elements. Default is true.
180
+ # @option options [Boolean] :preserve_styles Whether to preserve any <tt>link rel=stylesheet</tt> and <tt>style</tt> elements. Default is false.
181
+ # @option options [Boolean] :preserve_reset Whether to preserve styles associated with the MailChimp reset code.
182
+ # @option options [Boolean] :with_html_string Whether the html param should be treated as a raw string.
183
+ # @option options [Boolean] :verbose Whether to print errors and warnings to <tt>$stderr</tt>. Default is false.
184
+ # @option options [Boolean] :include_link_tags Whether to include css from <tt>link rel=stylesheet</tt> tags. Default is true.
185
+ # @option options [Boolean] :include_style_tags Whether to include css from <tt>style</tt> tags. Default is true.
186
+ # @option options [String] :input_encoding Manually specify the source documents encoding. This is a good idea.
187
+ # @option options [Boolean] :replace_html_entities Convert HTML entities to actual characters. Default is false.
188
+ # @option options [Symbol] :adapter Which HTML parser to use, either <tt>:nokogiri</tt> or <tt>:hpricot</tt>. Default is <tt>:hpricot</tt>.
189
+ def initialize(html, options = {})
190
+ @options = {:warn_level => Warnings::SAFE,
191
+ :line_length => 65,
192
+ :link_query_string => nil,
193
+ :base_url => nil,
194
+ :remove_classes => false,
195
+ :remove_ids => false,
196
+ :remove_comments => false,
197
+ :remove_scripts => true,
198
+ :css => [],
199
+ :css_to_attributes => true,
200
+ :with_html_string => false,
201
+ :css_string => nil,
202
+ :preserve_styles => false,
203
+ :preserve_reset => true,
204
+ :verbose => false,
205
+ :debug => false,
206
+ :io_exceptions => false,
207
+ :include_link_tags => true,
208
+ :include_style_tags => true,
209
+ :input_encoding => 'ASCII-8BIT',
210
+ :replace_html_entities => false,
211
+ :adapter => Adapter.use,
212
+ }.merge(options)
213
+
214
+ @html_file = html
215
+ @is_local_file = @options[:with_html_string] || Premailer.local_data?(html)
216
+
217
+ @css_files = [@options[:css]].flatten
218
+
219
+ @css_warnings = []
220
+
221
+ @base_url = nil
222
+ @base_dir = nil
223
+ @unmergable_rules = nil
224
+
225
+ if @options[:base_url]
226
+ @base_url = URI.parse(@options.delete(:base_url))
227
+ elsif not @is_local_file
228
+ @base_url = URI.parse(@html_file)
229
+ end
230
+
231
+ @css_parser = CssParser::Parser.new({
232
+ :absolute_paths => true,
233
+ :import => true,
234
+ :io_exceptions => @options[:io_exceptions]
235
+ })
236
+
237
+ @adapter_class = Adapter.find @options[:adapter]
238
+
239
+ self.class.send(:include, @adapter_class)
240
+
241
+ @doc = load_html(@html_file)
242
+
243
+ @processed_doc = @doc
244
+ @processed_doc = convert_inline_links(@processed_doc, @base_url) if @base_url
245
+ if options[:link_query_string]
246
+ @processed_doc = append_query_string(@processed_doc, options[:link_query_string])
247
+ end
248
+ load_css_from_options!
249
+ load_css_from_html!
250
+ end
251
+
252
+ # CSS warnings.
253
+ # @return [Array(Hash)] Array of warnings.
254
+ def warnings
255
+ return [] if @options[:warn_level] == Warnings::NONE
256
+ @css_warnings = check_client_support if @css_warnings.empty?
257
+ @css_warnings
258
+ end
259
+
260
+ protected
261
+ def load_css_from_local_file!(path)
262
+ css_block = ''
263
+ path.gsub!(/\Afile:/, '')
264
+ begin
265
+ File.open(path, "r") do |file|
266
+ while line = file.gets
267
+ css_block << line
268
+ end
269
+ end
270
+
271
+ load_css_from_string(css_block)
272
+ rescue; end
273
+ end
274
+
275
+ def load_css_from_string(css_string)
276
+ @css_parser.add_block!(css_string, {:base_uri => @base_url, :base_dir => @base_dir, :only_media_types => [:screen, :handheld]})
277
+ end
278
+
279
+ # @private
280
+ def load_css_from_options! # :nodoc:
281
+ load_css_from_string(@options[:css_string]) if @options[:css_string]
282
+
283
+ @css_files.each do |css_file|
284
+ if Premailer.local_data?(css_file)
285
+ load_css_from_local_file!(css_file)
286
+ else
287
+ @css_parser.load_uri!(css_file)
288
+ end
289
+ end
290
+ end
291
+
292
+ # Load CSS included in <tt>style</tt> and <tt>link</tt> tags from an HTML document.
293
+ def load_css_from_html! # :nodoc:
294
+ if tags = @doc.search("link[@rel='stylesheet'], style")
295
+ tags.each do |tag|
296
+ if tag.to_s.strip =~ /^\<link/i && tag.attributes['href'] && media_type_ok?(tag.attributes['media']) && @options[:include_link_tags]
297
+ # A user might want to <link /> to a local css file that is also mirrored on the site
298
+ # but the local one is different (e.g. newer) than the live file, premailer will now choose the local file
299
+
300
+ if tag.attributes['href'].to_s.include? @base_url.to_s and @html_file.kind_of?(String)
301
+ link_uri = File.join(File.dirname(@html_file), tag.attributes['href'].to_s.sub!(@base_url.to_s, ''))
302
+ end
303
+
304
+ # if the file does not exist locally, try to grab the remote reference
305
+ if link_uri.nil? or not File.exists?(link_uri)
306
+ link_uri = Premailer.resolve_link(tag.attributes['href'].to_s, @html_file)
307
+ end
308
+
309
+ if Premailer.local_data?(link_uri)
310
+ $stderr.puts "Loading css from local file: " + link_uri if @options[:verbose]
311
+ load_css_from_local_file!(link_uri)
312
+ else
313
+ $stderr.puts "Loading css from uri: " + link_uri if @options[:verbose]
314
+ @css_parser.load_uri!(link_uri, {:only_media_types => [:screen, :handheld]})
315
+ end
316
+
317
+ elsif tag.to_s.strip =~ /^\<style/i && @options[:include_style_tags]
318
+ @css_parser.add_block!(tag.inner_html, :base_uri => @base_url, :base_dir => @base_dir, :only_media_types => [:screen, :handheld])
319
+ end
320
+ end
321
+ tags.remove unless @options[:preserve_styles]
322
+ end
323
+ end
324
+
325
+
326
+
327
+ # here be deprecated methods
328
+ public
329
+ # @private
330
+ # @deprecated
331
+ def local_uri?(uri) # :nodoc:
332
+ warn "[DEPRECATION] `local_uri?` is deprecated. Please use `Premailer.local_data?` instead."
333
+ Premailer.local_data?(uri)
334
+ end
335
+
336
+ # here be instance methods
337
+
338
+ # @private
339
+ def media_type_ok?(media_types)
340
+ return true if media_types.nil? or media_types.empty?
341
+ media_types.split(/[\s]+|,/).any? { |media_type| media_type.strip =~ /screen|handheld|all/i }
342
+ rescue
343
+ true
344
+ end
345
+
346
+ def append_query_string(doc, qs)
347
+ return doc if qs.nil?
348
+
349
+ qs.to_s.gsub!(/^[\?]*/, '').strip!
350
+ return doc if qs.empty?
351
+
352
+ begin
353
+ current_host = @base_url.host
354
+ rescue
355
+ current_host = nil
356
+ end
357
+
358
+ $stderr.puts "Attempting to append_query_string: #{qs}" if @options[:verbose]
359
+
360
+ doc.search('a').each do|el|
361
+ href = el.attributes['href'].to_s.strip
362
+ next if href.nil? or href.empty?
363
+
364
+ next if href[0,1] =~ /[\#\{\[\<\%]/ # don't bother with anchors or special-looking links
365
+
366
+ begin
367
+ href = URI.parse(href)
368
+
369
+ if current_host and href.host != nil and href.host != current_host
370
+ $stderr.puts "Skipping append_query_string for: #{href.to_s} because host is no good" if @options[:verbose]
371
+ next
372
+ end
373
+
374
+ if href.scheme and href.scheme != 'http' and href.scheme != 'https'
375
+ puts "Skipping append_query_string for: #{href.to_s} because scheme is no good" if @options[:verbose]
376
+ next
377
+ end
378
+
379
+ if href.query and not href.query.empty?
380
+ href.query = href.query + '&amp;' + qs
381
+ else
382
+ href.query = qs
383
+ end
384
+
385
+ el['href'] = href.to_s
386
+ rescue URI::Error => e
387
+ $stderr.puts "Skipping append_query_string for: #{href.to_s} (#{e.message})" if @options[:verbose]
388
+ next
389
+ end
390
+
391
+ end
392
+ doc
393
+ end
394
+
395
+ # Check for an XHTML doctype
396
+ def is_xhtml?
397
+ intro = @doc.to_html.strip.split("\n")[0..2].join(' ')
398
+ is_xhtml = !!(intro =~ /w3c\/\/[\s]*dtd[\s]+xhtml/i)
399
+ $stderr.puts "Is XHTML? #{is_xhtml.inspect}\nChecked:\n#{intro}" if @options[:debug]
400
+ is_xhtml
401
+ end
402
+
403
+ # Convert relative links to absolute links.
404
+ #
405
+ # Processes <tt>href</tt> <tt>src</tt> and <tt>background</tt> attributes
406
+ # as well as CSS <tt>url()</tt> declarations found in inline <tt>style</tt> attributes.
407
+ #
408
+ # <tt>doc</tt> is an Hpricot document and <tt>base_uri</tt> is either a string or a URI.
409
+ #
410
+ # Returns an Hpricot document.
411
+ def convert_inline_links(doc, base_uri) # :nodoc:
412
+ base_uri = URI.parse(base_uri) unless base_uri.kind_of?(URI)
413
+
414
+ append_qs = @options[:link_query_string] || ''
415
+
416
+ ['href', 'src', 'background'].each do |attribute|
417
+ tags = doc.search("*[@#{attribute}]")
418
+
419
+ next if tags.empty?
420
+
421
+ tags.each do |tag|
422
+ # skip links that look like they have merge tags
423
+ # and mailto, ftp, etc...
424
+ if tag.attributes[attribute].to_s =~ /^([\%\<\{\#\[]|data:|tel:|file:|sms:|callto:|facetime:|mailto:|ftp:|gopher:|cid:)/i
425
+ next
426
+ end
427
+
428
+ if tag.attributes[attribute].to_s =~ /^http/i
429
+ begin
430
+ merged = URI.parse(tag.attributes[attribute])
431
+ rescue; next; end
432
+ else
433
+ begin
434
+ merged = Premailer.resolve_link(tag.attributes[attribute].to_s, base_uri)
435
+ rescue
436
+ begin
437
+ merged = Premailer.resolve_link(URI.escape(tag.attributes[attribute].to_s), base_uri)
438
+ rescue; end
439
+ end
440
+ end
441
+
442
+ # make sure 'merged' is a URI
443
+ merged = URI.parse(merged.to_s) unless merged.kind_of?(URI)
444
+ tag[attribute] = merged.to_s
445
+ end # end of each tag
446
+ end # end of each attrs
447
+
448
+ doc.search("*[@style]").each do |el|
449
+ el['style'] = CssParser.convert_uris(el.attributes['style'].to_s, base_uri)
450
+ end
451
+ doc
452
+ end
453
+
454
+
455
+ # @private
456
+ def self.escape_string(str) # :nodoc:
457
+ str.gsub(/"/ , "'")
458
+ end
459
+
460
+ # @private
461
+ def self.resolve_link(path, base_path) # :nodoc:
462
+ path.strip!
463
+ resolved = nil
464
+ if path =~ /\A(?:(https?|ftp|file):)\/\//i
465
+ resolved = path
466
+ Premailer.canonicalize(resolved)
467
+ elsif base_path.kind_of?(URI)
468
+ resolved = base_path.merge(path)
469
+ Premailer.canonicalize(resolved)
470
+ elsif base_path.kind_of?(String) and base_path =~ /\A(?:(?:https?|ftp|file):)\/\//i
471
+ resolved = URI.parse(base_path)
472
+ resolved = resolved.merge(path)
473
+ Premailer.canonicalize(resolved)
474
+ else
475
+ File.expand_path(path, File.dirname(base_path))
476
+ end
477
+ end
478
+
479
+ # Test the passed variable to see if we are in local or remote mode.
480
+ #
481
+ # IO objects return true, as do strings that look like URLs.
482
+ def self.local_data?(data)
483
+ return true if data.is_a?(IO) || data.is_a?(StringIO)
484
+ return true if data =~ /\Afile:\/\//i
485
+ return false if data =~ /\A(?:(https?|ftp):)\/\//i
486
+ true
487
+ end
488
+
489
+ # from http://www.ruby-forum.com/topic/140101
490
+ def self.canonicalize(uri) # :nodoc:
491
+ u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
492
+ u.normalize!
493
+ newpath = u.path
494
+ while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
495
+ $1 == '..' ? match : ''
496
+ } do end
497
+ newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
498
+ u.path = newpath
499
+ u.to_s
500
+ end
501
+
502
+ # Check <tt>CLIENT_SUPPORT_FILE</tt> for any CSS warnings
503
+ def check_client_support # :nodoc:
504
+ @client_support ||= YAML::load(File.open(CLIENT_SUPPORT_FILE))
505
+
506
+ warnings = []
507
+ properties = []
508
+
509
+ # Get a list off CSS properties
510
+ @processed_doc.search("*[@style]").each do |el|
511
+ style_url = el.attributes['style'].to_s.gsub(/([\w\-]+)[\s]*\:/i) do |s|
512
+ properties.push($1)
513
+ end
514
+ end
515
+
516
+ properties.uniq!
517
+
518
+ property_support = @client_support['css_properties']
519
+ properties.each do |prop|
520
+ if property_support.include?(prop) and
521
+ property_support[prop].include?('support') and
522
+ property_support[prop]['support'] >= @options[:warn_level]
523
+ warnings.push({:message => "#{prop} CSS property",
524
+ :level => WARN_LABEL[property_support[prop]['support']],
525
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
526
+ end
527
+ end
528
+
529
+ @client_support['attributes'].each do |attribute, data|
530
+ next unless data['support'] >= @options[:warn_level]
531
+ if @doc.search("*[@#{attribute}]").length > 0
532
+ warnings.push({:message => "#{attribute} HTML attribute",
533
+ :level => WARN_LABEL[data['support']],
534
+ :clients => data['unsupported_in'].join(', ')})
535
+ end
536
+ end
537
+
538
+ @client_support['elements'].each do |element, data|
539
+ next unless data['support'] >= @options[:warn_level]
540
+ if @doc.search(element).length > 0
541
+ warnings.push({:message => "#{element} HTML element",
542
+ :level => WARN_LABEL[data['support']],
543
+ :clients => data['unsupported_in'].join(', ')})
544
+ end
545
+ end
546
+
547
+ return warnings
548
+ end
549
+ end
550
+