regru-premailer 1.7.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,221 @@
1
+ require 'nokogiri'
2
+
3
+ class Premailer
4
+ module Adapter
5
+ # Nokogiri adapter
6
+ module Nokogiri
7
+
8
+ # Merge CSS into the HTML document.
9
+ #
10
+ # @return [String] an HTML.
11
+ def to_inline_css
12
+ doc = @processed_doc
13
+ @unmergable_rules = CssParser::Parser.new
14
+
15
+ # Give all styles already in style attributes a specificity of 1000
16
+ # per http://www.w3.org/TR/CSS21/cascade.html#specificity
17
+ doc.search("*[@style]").each do |el|
18
+ el['style'] = '[SPEC=1000[' + el.attributes['style'] + ']]'
19
+ end
20
+
21
+ # Iterate through the rules and merge them into the HTML
22
+ @css_parser.each_selector(:all) do |selector, declaration, specificity|
23
+ # Save un-mergable rules separately
24
+ selector.gsub!(/:link([\s]*)+/i) {|m| $1 }
25
+
26
+ # Convert element names to lower case
27
+ selector.gsub!(/([\s]|^)([\w]+)/) {|m| $1.to_s + $2.to_s.downcase }
28
+
29
+ if selector =~ Premailer::RE_UNMERGABLE_SELECTORS
30
+ @unmergable_rules.add_rule_set!(CssParser::RuleSet.new(selector, declaration)) unless @options[:preserve_styles]
31
+ else
32
+ begin
33
+ # Change single ID CSS selectors into xpath so that we can match more
34
+ # than one element. Added to work around dodgy generated code.
35
+ selector.gsub!(/\A\#([\w_\-]+)\Z/, '*[@id=\1]')
36
+
37
+ doc.search(selector).each do |el|
38
+ if el.elem? and (el.name != 'head' and el.parent.name != 'head')
39
+ # Add a style attribute or append to the existing one
40
+ block = "[SPEC=#{specificity}[#{declaration}]]"
41
+ el['style'] = (el.attributes['style'].to_s ||= '') + ' ' + block
42
+ end
43
+ end
44
+ rescue ::Nokogiri::SyntaxError, RuntimeError, ArgumentError
45
+ $stderr.puts "CSS syntax error with selector: #{selector}" if @options[:verbose]
46
+ next
47
+ end
48
+ end
49
+ end
50
+
51
+ # Remove script tags
52
+ if @options[:remove_scripts]
53
+ doc.search("script").remove
54
+ end
55
+
56
+ # Read STYLE attributes and perform folding
57
+ doc.search("*[@style]").each do |el|
58
+ style = el.attributes['style'].to_s
59
+
60
+ declarations = []
61
+ style.scan(/\[SPEC\=([\d]+)\[(.[^\]\]]*)\]\]/).each do |declaration|
62
+ rs = CssParser::RuleSet.new(nil, declaration[1].to_s, declaration[0].to_i)
63
+ declarations << rs
64
+ end
65
+
66
+ # Perform style folding
67
+ merged = CssParser.merge(declarations)
68
+ merged.expand_shorthand!
69
+
70
+ # Duplicate CSS attributes as HTML attributes
71
+ if Premailer::RELATED_ATTRIBUTES.has_key?(el.name)
72
+ Premailer::RELATED_ATTRIBUTES[el.name].each do |css_att, html_att|
73
+ el[html_att] = merged[css_att].gsub(/url\('(.*)'\)/,'\1').gsub(/;$/, '').strip if el[html_att].nil? and not merged[css_att].empty?
74
+ end
75
+ end
76
+
77
+ # write the inline STYLE attribute
78
+ el['style'] = Premailer.escape_string(merged.declarations_to_s).split(';').map(&:strip).sort.join('; ')
79
+ end
80
+
81
+ doc = write_unmergable_css_rules(doc, @unmergable_rules)
82
+
83
+ if @options[:remove_classes] or @options[:remove_comments]
84
+ doc.traverse do |el|
85
+ if el.comment? and @options[:remove_comments]
86
+ el.remove
87
+ elsif el.element?
88
+ el.remove_attribute('class') if @options[:remove_classes]
89
+ end
90
+ end
91
+ end
92
+
93
+ if @options[:remove_ids]
94
+ # find all anchor's targets and hash them
95
+ targets = []
96
+ doc.search("a[@href^='#']").each do |el|
97
+ target = el.get_attribute('href')[1..-1]
98
+ targets << target
99
+ el.set_attribute('href', "#" + Digest::MD5.hexdigest(target))
100
+ end
101
+ # hash ids that are links target, delete others
102
+ doc.search("*[@id]").each do |el|
103
+ id = el.get_attribute('id')
104
+ if targets.include?(id)
105
+ el.set_attribute('id', Digest::MD5.hexdigest(id))
106
+ else
107
+ el.remove_attribute('id')
108
+ end
109
+ end
110
+ end
111
+
112
+ @processed_doc = doc
113
+ if is_xhtml?
114
+ # we don't want to encode carriage returns
115
+ @processed_doc.to_xhtml(:encoding => nil).gsub(/&\#xD;/i, "\r")
116
+ else
117
+ @processed_doc.to_html
118
+ end
119
+ end
120
+
121
+ # Create a <tt>style</tt> element with un-mergable rules (e.g. <tt>:hover</tt>)
122
+ # and write it into the <tt>body</tt>.
123
+ #
124
+ # <tt>doc</tt> is an Nokogiri document and <tt>unmergable_css_rules</tt> is a Css::RuleSet.
125
+ #
126
+ # @return [::Nokogiri::XML] a document.
127
+ def write_unmergable_css_rules(doc, unmergable_rules) # :nodoc:
128
+ styles = ''
129
+ unmergable_rules.each_selector(:all, :force_important => true) do |selector, declarations, specificity|
130
+ styles += "#{selector} { #{declarations} }\n"
131
+ end
132
+
133
+ unless styles.empty?
134
+ style_tag = "<style type=\"text/css\">\n#{styles}></style>"
135
+ if body = doc.search('body')
136
+ doc.at_css('body').children.before(::Nokogiri::XML.fragment(style_tag))
137
+ else
138
+ doc.inner_html = style_tag += doc.inner_html
139
+ end
140
+ end
141
+ doc
142
+ end
143
+
144
+
145
+ # Converts the HTML document to a format suitable for plain-text e-mail.
146
+ #
147
+ # If present, uses the <body> element as its base; otherwise uses the whole document.
148
+ #
149
+ # @return [String] a plain text.
150
+ def to_plain_text
151
+ html_src = ''
152
+ begin
153
+ html_src = @doc.at("body").inner_html
154
+ rescue; end
155
+
156
+ html_src = @doc.to_html unless html_src and not html_src.empty?
157
+ convert_to_text(html_src, @options[:line_length], @html_encoding)
158
+ end
159
+
160
+ # Gets the original HTML as a string.
161
+ # @return [String] HTML.
162
+ def to_s
163
+ if is_xhtml?
164
+ @doc.to_xhtml(:encoding => nil)
165
+ else
166
+ @doc.to_html(:encoding => nil)
167
+ end
168
+ end
169
+
170
+ # Load the HTML file and convert it into an Nokogiri document.
171
+ #
172
+ # @return [::Nokogiri::XML] a document.
173
+ def load_html(input) # :nodoc:
174
+ thing = nil
175
+
176
+ # TODO: duplicate options
177
+ if @options[:with_html_string] or @options[:inline] or input.respond_to?(:read)
178
+ thing = input
179
+ elsif @is_local_file
180
+ @base_dir = File.dirname(input)
181
+ thing = File.open(input, 'r')
182
+ else
183
+ thing = open(input)
184
+ end
185
+
186
+ if thing.respond_to?(:read)
187
+ thing = thing.read
188
+ end
189
+
190
+ return nil unless thing
191
+ doc = nil
192
+
193
+ # Handle HTML entities
194
+ if @options[:replace_html_entities] == true and thing.is_a?(String)
195
+ if RUBY_VERSION =~ /1.9/
196
+ html_entity_ruby_version = "1.9"
197
+ elsif RUBY_VERSION =~ /1.8/
198
+ html_entity_ruby_version = "1.8"
199
+ end
200
+ if html_entity_ruby_version
201
+ HTML_ENTITIES[html_entity_ruby_version].map do |entity, replacement|
202
+ thing.gsub! entity, replacement
203
+ end
204
+ end
205
+ end
206
+ # Default encoding is ASCII-8BIT (binary) per http://groups.google.com/group/nokogiri-talk/msg/0b81ef0dc180dc74
207
+ # However, we really don't want to hardcode this. ASCII-8BIG should be the default, but not the only option.
208
+ if thing.is_a?(String) and RUBY_VERSION =~ /1.9/
209
+ thing = thing.force_encoding(@options[:input_encoding]).encode!
210
+ doc = ::Nokogiri::HTML(thing, nil, @options[:input_encoding]) {|c| c.recover }
211
+ else
212
+ default_encoding = RUBY_PLATFORM == 'java' ? nil : 'BINARY'
213
+ doc = ::Nokogiri::HTML(thing, nil, @options[:input_encoding] || default_encoding) {|c| c.recover }
214
+ end
215
+
216
+ return doc
217
+ end
218
+
219
+ end
220
+ end
221
+ end
@@ -0,0 +1,100 @@
1
+ require 'optparse'
2
+ require 'premailer'
3
+
4
+ # defaults
5
+ options = {
6
+ :base_url => nil,
7
+ :link_query_string => nil,
8
+ :remove_classes => false,
9
+ :verbose => false,
10
+ :line_length => 65
11
+ }
12
+
13
+ mode = :html
14
+
15
+ opts = OptionParser.new do |opts|
16
+ opts.banner = "Improve the rendering of HTML emails by making CSS inline among other things. Takes a path to a local file, a URL or a pipe as input.\n\n"
17
+ opts.define_head "Usage: premailer <optional uri|optional path> [options]"
18
+ opts.separator ""
19
+ opts.separator "Examples:"
20
+ opts.separator " premailer http://example.com/ > out.html"
21
+ opts.separator " premailer http://example.com/ --mode txt > out.txt"
22
+ opts.separator " cat input.html | premailer -q src=email > out.html"
23
+ opts.separator " premailer ./public/index.html"
24
+ opts.separator ""
25
+ opts.separator "Options:"
26
+
27
+ opts.on("--mode MODE", [:html, :txt], "Output: html or txt") do |v|
28
+ mode = v
29
+ end
30
+
31
+ opts.on("-b", "--base-url STRING", String, "Base URL, useful for local files") do |v|
32
+ options[:base_url] = v
33
+ end
34
+
35
+ opts.on("-q", "--query-string STRING", String, "Query string to append to links") do |v|
36
+ options[:link_query_string] = v
37
+ end
38
+
39
+ opts.on("--css FILE,FILE", Array, "Additional CSS stylesheets") do |v|
40
+ options[:css] = v
41
+ end
42
+
43
+ opts.on("-r", "--remove-classes", "Remove HTML classes") do |v|
44
+ options[:remove_classes] = v
45
+ end
46
+
47
+ opts.on("-j", "--remove-scripts", "Remove <script> elements") do |v|
48
+ options[:remove_classes] = v
49
+ end
50
+
51
+ opts.on("-l", "--line-length N", Integer, "Line length for plaintext (default: #{options[:line_length].to_s})") do |v|
52
+ options[:line_length] = v
53
+ end
54
+
55
+ opts.on("-d", "--io-exceptions", "Abort on I/O errors") do |v|
56
+ options[:io_exceptions] = v
57
+ end
58
+
59
+ opts.on("-v", "--verbose", "Print additional information at runtime") do |v|
60
+ options[:verbose] = v
61
+ end
62
+
63
+ opts.on_tail("-?", "--help", "Show this message") do
64
+ puts opts
65
+ exit
66
+ end
67
+
68
+ opts.on_tail("-V", "--version", "Show version") do
69
+ puts "Premailer #{Premailer::VERSION} (c) 2008-2010 Alex Dunae"
70
+ exit
71
+ end
72
+ end
73
+ opts.parse!
74
+
75
+ $stderr.puts "Processing in #{mode} mode with options #{options.inspect}" if options[:verbose]
76
+
77
+ premailer = nil
78
+ input = nil
79
+
80
+ if $stdin.tty? or STDIN.fcntl(Fcntl::F_GETFL, 0) == 0
81
+ input = ARGV.shift
82
+ else
83
+ input = $stdin
84
+ options[:with_html_string] = true
85
+ end
86
+
87
+ if input
88
+ premailer = Premailer.new(input, options)
89
+ else
90
+ puts opts
91
+ exit 1
92
+ end
93
+
94
+ if mode == :txt
95
+ print premailer.to_plain_text
96
+ else
97
+ print premailer.to_inline_css
98
+ end
99
+
100
+ exit
@@ -0,0 +1,105 @@
1
+ # coding: utf-8
2
+ require 'htmlentities'
3
+
4
+ # Support functions for Premailer
5
+ module HtmlToPlainText
6
+
7
+ # Returns the text in UTF-8 format with all HTML tags removed
8
+ #
9
+ # TODO: add support for DL, OL
10
+ def convert_to_text(html, line_length = 65, from_charset = 'UTF-8')
11
+ txt = html
12
+
13
+ # decode HTML entities
14
+ he = HTMLEntities.new
15
+ txt = he.decode(txt)
16
+
17
+ # replace image by their alt attribute
18
+ txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\/>/i, '\1')
19
+
20
+ # replace image by their alt attribute
21
+ txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\/>/i, '\1')
22
+ txt.gsub!(/<img.+?alt='([^\']*)\'[^>]*\/>/i, '\1')
23
+
24
+ # links
25
+ txt.gsub!(/<a.+?href=\"(mailto:)?([^\"]*)\"[^>]*>((.|\s)+?)<\/a>/i) do |s|
26
+ $3.strip + ' ( ' + $2.strip + ' )'
27
+ end
28
+
29
+ txt.gsub!(/<a.+?href='(mailto:)?([^\']*)\'[^>]*>((.|\s)+?)<\/a>/i) do |s|
30
+ $3.strip + ' ( ' + $2.strip + ' )'
31
+ end
32
+
33
+
34
+ # handle headings (H1-H6)
35
+ txt.gsub!(/(<\/h[1-6]>)/i, "\n\\1") # move closing tags to new lines
36
+ txt.gsub!(/[\s]*<h([1-6]+)[^>]*>[\s]*(.*)[\s]*<\/h[1-6]+>/i) do |s|
37
+ hlevel = $1.to_i
38
+
39
+ htext = $2
40
+ htext.gsub!(/<br[\s]*\/?>/i, "\n") # handle <br>s
41
+ htext.gsub!(/<\/?[^>]*>/i, '') # strip tags
42
+
43
+ # determine maximum line length
44
+ hlength = 0
45
+ htext.each_line { |l| llength = l.strip.length; hlength = llength if llength > hlength }
46
+ hlength = line_length if hlength > line_length
47
+
48
+ case hlevel
49
+ when 1 # H1, asterisks above and below
50
+ htext = ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength)
51
+ when 2 # H1, dashes above and below
52
+ htext = ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength)
53
+ else # H3-H6, dashes below
54
+ htext = htext + "\n" + ('-' * hlength)
55
+ end
56
+
57
+ "\n\n" + htext + "\n\n"
58
+ end
59
+
60
+ # wrap spans
61
+ txt.gsub!(/(<\/span>)[\s]+(<span)/mi, '\1 \2')
62
+
63
+ # lists -- TODO: should handle ordered lists
64
+ txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ')
65
+ # list not followed by a newline
66
+ txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n")
67
+
68
+ # paragraphs and line breaks
69
+ txt.gsub!(/<\/p>/i, "\n\n")
70
+ txt.gsub!(/<br[\/ ]*>/i, "\n")
71
+
72
+ # strip remaining tags
73
+ txt.gsub!(/<\/?[^>]*>/, '')
74
+
75
+ txt = word_wrap(txt, line_length)
76
+
77
+ # remove linefeeds (\r\n and \r -> \n)
78
+ txt.gsub!(/\r\n?/, "\n")
79
+
80
+ # strip extra spaces
81
+ txt.gsub!(/\302\240+/, " ") # non-breaking spaces -> spaces
82
+ txt.gsub!(/\n[ \t]+/, "\n") # space at start of lines
83
+ txt.gsub!(/[ \t]+\n/, "\n") # space at end of lines
84
+
85
+ # no more than two consecutive newlines
86
+ txt.gsub!(/[\n]{3,}/, "\n\n")
87
+
88
+ # no more than two consecutive spaces
89
+ txt.gsub!(/ {2,}/, " ")
90
+
91
+ # the word messes up the parens
92
+ txt.gsub!(/\([ \n](http[^)]+)[\n ]\)/) do |s|
93
+ "( " + $1 + " )"
94
+ end
95
+
96
+ txt.strip
97
+ end
98
+
99
+ # Taken from Rails' word_wrap helper (http://api.rubyonrails.org/classes/ActionView/Helpers/TextHelper.html#method-i-word_wrap)
100
+ def word_wrap(txt, line_length)
101
+ txt.split("\n").collect do |line|
102
+ line.length > line_length ? line.gsub(/(.{1,#{line_length}})(\s+|$)/, "\\1\n").strip : line
103
+ end * "\n"
104
+ end
105
+ end
@@ -0,0 +1,549 @@
1
+ # Premailer processes HTML and CSS to improve e-mail deliverability.
2
+ #
3
+ # Premailer's main function is to render all CSS as inline <tt>style</tt>
4
+ # attributes. It also converts relative links to absolute links and checks
5
+ # the 'safety' of CSS properties against a CSS support chart.
6
+ #
7
+ # ## Example of use
8
+ #
9
+ # ```ruby
10
+ # premailer = Premailer.new('http://example.com/myfile.html', :warn_level => Premailer::Warnings::SAFE)
11
+ #
12
+ # # Write the HTML output
13
+ # fout = File.open("output.html", "w")
14
+ # fout.puts premailer.to_inline_css
15
+ # fout.close
16
+ #
17
+ # # Write the plain-text output
18
+ # fout = File.open("ouput.txt", "w")
19
+ # fout.puts premailer.to_plain_text
20
+ # fout.close
21
+ #
22
+ # # List any CSS warnings
23
+ # puts premailer.warnings.length.to_s + ' warnings found'
24
+ # premailer.warnings.each do |w|
25
+ # puts "#{w[:message]} (#{w[:level]}) may not render properly in #{w[:clients]}"
26
+ # end
27
+ #
28
+ # premailer = Premailer.new(html_file, :warn_level => Premailer::Warnings::SAFE)
29
+ # puts premailer.to_inline_css
30
+ # ```
31
+ #
32
+ class Premailer
33
+ include HtmlToPlainText
34
+ include CssParser
35
+
36
+ # Premailer version.
37
+ VERSION = '1.7.4'
38
+
39
+ CLIENT_SUPPORT_FILE = File.dirname(__FILE__) + '/../../misc/client_support.yaml'
40
+
41
+ # Unmergable selectors regexp.
42
+ RE_UNMERGABLE_SELECTORS = /(\:(visited|active|hover|focus|after|before|selection|target|first\-(line|letter))|^\@)/i
43
+ # Reset selectors regexp.
44
+ RE_RESET_SELECTORS = /^(\:\#outlook|body.*|\.ReadMsgBody|\.ExternalClass|img|\#backgroundTable)$/
45
+
46
+ # list of HTMLEntities to fix
47
+ # source: http://stackoverflow.com/questions/2812781/how-to-convert-webpage-apostrophe-8217-to-ascii-39-in-ruby-1-
48
+ HTML_ENTITIES = {
49
+ "1.8" => {
50
+ "\342\200\231" => "'",
51
+ "\342\200\246" => "...",
52
+ "\342\200\176" => "'",
53
+ "\342\200\177" => "'",
54
+ "\342\200\230" => "'",
55
+ "\342\200\231" => "'",
56
+ "\342\200\232" => ',',
57
+ "\342\200\233" => "'",
58
+ "\342\200\234" => '"',
59
+ "\342\200\235" => '"',
60
+ "\342\200\041" => '-',
61
+ "\342\200\174" => '-',
62
+ "\342\200\220" => '-',
63
+ "\342\200\223" => '-',
64
+ "\342\200\224" => '--',
65
+ "\342\200\225" => '--',
66
+ "\342\200\042" => '--'
67
+ },
68
+ "1.9" => {
69
+ "&#8217;" => "'",
70
+ "&#8230;" => "...",
71
+ "&#8216;" => "'",
72
+ "&#8218;" => ',',
73
+ "&#8219;" => "'",
74
+ "&#8220;" => '"',
75
+ "&#8221;" => '"',
76
+ "&#8208;" => '-',
77
+ "&#8211;" => '-',
78
+ "&#8212;" => '--',
79
+ "&#8213;" => '--'
80
+ }
81
+ }
82
+
83
+ # list of CSS attributes that can be rendered as HTML attributes
84
+ #
85
+ # @todo too much repetition
86
+ # @todo background=""
87
+ RELATED_ATTRIBUTES = {
88
+ 'h1' => {'text-align' => 'align'},
89
+ 'h2' => {'text-align' => 'align'},
90
+ 'h3' => {'text-align' => 'align'},
91
+ 'h4' => {'text-align' => 'align'},
92
+ 'h5' => {'text-align' => 'align'},
93
+ 'h6' => {'text-align' => 'align'},
94
+ 'p' => {'text-align' => 'align'},
95
+ 'div' => {'text-align' => 'align'},
96
+ 'blockquote' => {'text-align' => 'align'},
97
+ 'body' => {'background-color' => 'bgcolor'},
98
+ 'table' => {
99
+ 'background-color' => 'bgcolor',
100
+ 'background-image' => 'background',
101
+ '-premailer-width' => 'width',
102
+ '-premailer-height' => 'height',
103
+ '-premailer-cellpadding' => 'cellpadding',
104
+ '-premailer-cellspacing' => 'cellspacing',
105
+ },
106
+ 'tr' => {
107
+ 'text-align' => 'align',
108
+ 'background-color' => 'bgcolor',
109
+ '-premailer-height' => 'height'
110
+ },
111
+ 'th' => {
112
+ 'text-align' => 'align',
113
+ 'background-color' => 'bgcolor',
114
+ 'vertical-align' => 'valign',
115
+ '-premailer-width' => 'width',
116
+ '-premailer-height' => 'height'
117
+ },
118
+ 'td' => {
119
+ 'text-align' => 'align',
120
+ 'background-color' => 'bgcolor',
121
+ 'vertical-align' => 'valign',
122
+ '-premailer-width' => 'width',
123
+ '-premailer-height' => 'height'
124
+ },
125
+ 'img' => {'float' => 'align'}
126
+ }
127
+
128
+ # URI of the HTML file used
129
+ attr_reader :html_file
130
+
131
+ # base URL used to resolve links
132
+ attr_reader :base_url
133
+
134
+ # base directory used to resolve links for local files
135
+ # @return [String] base directory
136
+ attr_reader :base_dir
137
+
138
+ # unmergeable CSS rules to be preserved in the head (CssParser)
139
+ attr_reader :unmergable_rules
140
+
141
+ # processed HTML document (Hpricot/Nokogiri)
142
+ attr_reader :processed_doc
143
+
144
+ # source HTML document (Hpricot/Nokogiri)
145
+ attr_reader :doc
146
+
147
+ # Warning levels
148
+ module Warnings
149
+ # No warnings
150
+ NONE = 0
151
+ # Safe
152
+ SAFE = 1
153
+ # Poor
154
+ POOR = 2
155
+ # Risky
156
+ RISKY = 3
157
+ end
158
+ include Warnings
159
+
160
+ # Waning level names
161
+ WARN_LABEL = %w(NONE SAFE POOR RISKY)
162
+
163
+ # Create a new Premailer object.
164
+ #
165
+ # @param html is the HTML data to process. It can be either an IO object, the URL of a
166
+ # remote file, a local path or a raw HTML string. If passing an HTML string you
167
+ # must set the with_html_string option to true.
168
+ #
169
+ # @param [Hash] options the options to handle html with.
170
+ # @option options [FixNum] :line_length Line length used by to_plain_text. Default is 65.
171
+ # @option options [FixNum] :warn_level What level of CSS compatibility warnings to show (see {Premailer::Warnings}).
172
+ # @option options [String] :link_query_string A string to append to every <tt>a href=""</tt> link. Do not include the initial <tt>?</tt>.
173
+ # @option options [String] :base_url Used to calculate absolute URLs for local files.
174
+ # @option options [Array(String)] :css Manually specify CSS stylesheets.
175
+ # @option options [Boolean] :css_to_attributes Copy related CSS attributes into HTML attributes (e.g. background-color to bgcolor)
176
+ # @option options [String] :css_string Pass CSS as a string
177
+ # @option options [Boolean] :remove_ids Remove ID attributes whenever possible and convert IDs used as anchors to hashed to avoid collisions in webmail programs. Default is false.
178
+ # @option options [Boolean] :remove_classes Remove class attributes. Default is false.
179
+ # @option options [Boolean] :remove_comments Remove html comments. Default is false.
180
+ # @option options [Boolean] :remove_scripts Remove <tt>script</tt> elements. Default is true.
181
+ # @option options [Boolean] :preserve_styles Whether to preserve any <tt>link rel=stylesheet</tt> and <tt>style</tt> elements. Default is false.
182
+ # @option options [Boolean] :preserve_reset Whether to preserve styles associated with the MailChimp reset code.
183
+ # @option options [Boolean] :with_html_string Whether the html param should be treated as a raw string.
184
+ # @option options [Boolean] :verbose Whether to print errors and warnings to <tt>$stderr</tt>. Default is false.
185
+ # @option options [Boolean] :include_link_tags Whether to include css from <tt>link rel=stylesheet</tt> tags. Default is true.
186
+ # @option options [Boolean] :include_style_tags Whether to include css from <tt>style</tt> tags. Default is true.
187
+ # @option options [String] :input_encoding Manually specify the source documents encoding. This is a good idea.
188
+ # @option options [Boolean] :replace_html_entities Convert HTML entities to actual characters. Default is false.
189
+ # @option options [Symbol] :adapter Which HTML parser to use, either <tt>:nokogiri</tt> or <tt>:hpricot</tt>. Default is <tt>:hpricot</tt>.
190
+ def initialize(html, options = {})
191
+ @options = {:warn_level => Warnings::SAFE,
192
+ :line_length => 65,
193
+ :link_query_string => nil,
194
+ :base_url => nil,
195
+ :remove_classes => false,
196
+ :remove_ids => false,
197
+ :remove_comments => false,
198
+ :remove_scripts => true,
199
+ :css => [],
200
+ :css_to_attributes => true,
201
+ :with_html_string => false,
202
+ :css_string => nil,
203
+ :preserve_styles => false,
204
+ :preserve_reset => true,
205
+ :verbose => false,
206
+ :debug => false,
207
+ :io_exceptions => false,
208
+ :include_link_tags => true,
209
+ :include_style_tags => true,
210
+ :input_encoding => 'ASCII-8BIT',
211
+ :replace_html_entities => false,
212
+ :adapter => Adapter.use,
213
+ }.merge(options)
214
+
215
+ @html_file = html
216
+ @is_local_file = @options[:with_html_string] || Premailer.local_data?(html)
217
+
218
+ @css_files = [@options[:css]].flatten
219
+
220
+ @css_warnings = []
221
+
222
+ @base_url = nil
223
+ @base_dir = nil
224
+ @unmergable_rules = nil
225
+
226
+ if @options[:base_url]
227
+ @base_url = URI.parse(@options.delete(:base_url))
228
+ elsif not @is_local_file
229
+ @base_url = URI.parse(@html_file)
230
+ end
231
+
232
+ @css_parser = CssParser::Parser.new({
233
+ :absolute_paths => true,
234
+ :import => true,
235
+ :io_exceptions => @options[:io_exceptions]
236
+ })
237
+
238
+ @adapter_class = Adapter.find @options[:adapter]
239
+
240
+ self.class.send(:include, @adapter_class)
241
+
242
+ @doc = load_html(@html_file)
243
+
244
+ @processed_doc = @doc
245
+ @processed_doc = convert_inline_links(@processed_doc, @base_url) if @base_url
246
+ if options[:link_query_string]
247
+ @processed_doc = append_query_string(@processed_doc, options[:link_query_string])
248
+ end
249
+ load_css_from_options!
250
+ load_css_from_html!
251
+ end
252
+
253
+ # CSS warnings.
254
+ # @return [Array(Hash)] Array of warnings.
255
+ def warnings
256
+ return [] if @options[:warn_level] == Warnings::NONE
257
+ @css_warnings = check_client_support if @css_warnings.empty?
258
+ @css_warnings
259
+ end
260
+
261
+ protected
262
+ def load_css_from_local_file!(path)
263
+ css_block = ''
264
+ begin
265
+ File.open(path, "r") do |file|
266
+ while line = file.gets
267
+ css_block << line
268
+ end
269
+ end
270
+
271
+ load_css_from_string(css_block)
272
+ rescue; end
273
+ end
274
+
275
+ def load_css_from_string(css_string)
276
+ @css_parser.add_block!(css_string, {:base_uri => @base_url, :base_dir => @base_dir, :only_media_types => [:screen, :handheld]})
277
+ end
278
+
279
+ # @private
280
+ def load_css_from_options! # :nodoc:
281
+ load_css_from_string(@options[:css_string]) if @options[:css_string]
282
+
283
+ @css_files.each do |css_file|
284
+ if Premailer.local_data?(css_file)
285
+ load_css_from_local_file!(css_file)
286
+ else
287
+ @css_parser.load_uri!(css_file)
288
+ end
289
+ end
290
+ end
291
+
292
+ # Load CSS included in <tt>style</tt> and <tt>link</tt> tags from an HTML document.
293
+ def load_css_from_html! # :nodoc:
294
+ if tags = @doc.search("link[@rel='stylesheet'], style")
295
+ tags.each do |tag|
296
+ if tag.to_s.strip =~ /^\<link/i && tag.attributes['href'] && media_type_ok?(tag.attributes['media']) && @options[:include_link_tags]
297
+ # A user might want to <link /> to a local css file that is also mirrored on the site
298
+ # but the local one is different (e.g. newer) than the live file, premailer will now choose the local file
299
+
300
+ if tag.attributes['href'].to_s.include? @base_url.to_s and @html_file.kind_of?(String)
301
+ link_uri = File.join(File.dirname(@html_file), tag.attributes['href'].to_s.sub!(@base_url.to_s, ''))
302
+ end
303
+
304
+ # if the file does not exist locally, try to grab the remote reference
305
+ if link_uri.nil? or not File.exists?(link_uri)
306
+ link_uri = Premailer.resolve_link(tag.attributes['href'].to_s, @html_file)
307
+ end
308
+
309
+ if Premailer.local_data?(link_uri)
310
+ $stderr.puts "Loading css from local file: " + link_uri if @options[:verbose]
311
+ load_css_from_local_file!(link_uri)
312
+ else
313
+ $stderr.puts "Loading css from uri: " + link_uri if @options[:verbose]
314
+ @css_parser.load_uri!(link_uri, {:only_media_types => [:screen, :handheld]})
315
+ end
316
+
317
+ elsif tag.to_s.strip =~ /^\<style/i && @options[:include_style_tags]
318
+ @css_parser.add_block!(tag.inner_html, :base_uri => @base_url, :base_dir => @base_dir, :only_media_types => [:screen, :handheld])
319
+ end
320
+ end
321
+ tags.remove unless @options[:preserve_styles]
322
+ end
323
+ end
324
+
325
+
326
+
327
+ # here be deprecated methods
328
+ public
329
+ # @private
330
+ # @deprecated
331
+ def local_uri?(uri) # :nodoc:
332
+ warn "[DEPRECATION] `local_uri?` is deprecated. Please use `Premailer.local_data?` instead."
333
+ Premailer.local_data?(uri)
334
+ end
335
+
336
+ # here be instance methods
337
+
338
+ # @private
339
+ def media_type_ok?(media_types)
340
+ return true if media_types.nil? or media_types.empty?
341
+ media_types.split(/[\s]+|,/).any? { |media_type| media_type.strip =~ /screen|handheld|all/i }
342
+ rescue
343
+ true
344
+ end
345
+
346
+ def append_query_string(doc, qs)
347
+ return doc if qs.nil?
348
+
349
+ qs.to_s.gsub!(/^[\?]*/, '').strip!
350
+ return doc if qs.empty?
351
+
352
+ begin
353
+ current_host = @base_url.host
354
+ rescue
355
+ current_host = nil
356
+ end
357
+
358
+ $stderr.puts "Attempting to append_query_string: #{qs}" if @options[:verbose]
359
+
360
+ doc.search('a').each do|el|
361
+ href = el.attributes['href'].to_s.strip
362
+ next if href.nil? or href.empty?
363
+
364
+ next if href[0,1] =~ /[\#\{\[\<\%]/ # don't bother with anchors or special-looking links
365
+
366
+ begin
367
+ href = URI.parse(href)
368
+
369
+ if current_host and href.host != nil and href.host != current_host
370
+ $stderr.puts "Skipping append_query_string for: #{href.to_s} because host is no good" if @options[:verbose]
371
+ next
372
+ end
373
+
374
+ if href.scheme and href.scheme != 'http' and href.scheme != 'https'
375
+ puts "Skipping append_query_string for: #{href.to_s} because scheme is no good" if @options[:verbose]
376
+ next
377
+ end
378
+
379
+ if href.query and not href.query.empty?
380
+ href.query = href.query + '&amp;' + qs
381
+ else
382
+ href.query = qs
383
+ end
384
+
385
+ el['href'] = href.to_s
386
+ rescue URI::Error => e
387
+ $stderr.puts "Skipping append_query_string for: #{href.to_s} (#{e.message})" if @options[:verbose]
388
+ next
389
+ end
390
+
391
+ end
392
+ doc
393
+ end
394
+
395
+ # Check for an XHTML doctype
396
+ def is_xhtml?
397
+ intro = @doc.to_html.strip.split("\n")[0..2].join(' ')
398
+ is_xhtml = !!(intro =~ /w3c\/\/[\s]*dtd[\s]+xhtml/i)
399
+ $stderr.puts "Is XHTML? #{is_xhtml.inspect}\nChecked:\n#{intro}" if @options[:debug]
400
+ is_xhtml
401
+ end
402
+
403
+ # Convert relative links to absolute links.
404
+ #
405
+ # Processes <tt>href</tt> <tt>src</tt> and <tt>background</tt> attributes
406
+ # as well as CSS <tt>url()</tt> declarations found in inline <tt>style</tt> attributes.
407
+ #
408
+ # <tt>doc</tt> is an Hpricot document and <tt>base_uri</tt> is either a string or a URI.
409
+ #
410
+ # Returns an Hpricot document.
411
+ def convert_inline_links(doc, base_uri) # :nodoc:
412
+ base_uri = URI.parse(base_uri) unless base_uri.kind_of?(URI)
413
+
414
+ append_qs = @options[:link_query_string] || ''
415
+
416
+ ['href', 'src', 'background'].each do |attribute|
417
+ tags = doc.search("*[@#{attribute}]")
418
+
419
+ next if tags.empty?
420
+
421
+ tags.each do |tag|
422
+ # skip links that look like they have merge tags
423
+ # and mailto, ftp, etc...
424
+ if tag.attributes[attribute].to_s =~ /^([\%\<\{\#\[]|data:|tel:|file:|sms:|callto:|facetime:|mailto:|ftp:|gopher:|cid:)/i
425
+ next
426
+ end
427
+
428
+ if tag.attributes[attribute].to_s =~ /^http/i
429
+ begin
430
+ merged = URI.parse(tag.attributes[attribute])
431
+ rescue; next; end
432
+ else
433
+ begin
434
+ merged = Premailer.resolve_link(tag.attributes[attribute].to_s, base_uri)
435
+ rescue
436
+ begin
437
+ merged = Premailer.resolve_link(URI.escape(tag.attributes[attribute].to_s), base_uri)
438
+ rescue; end
439
+ end
440
+ end
441
+
442
+ # make sure 'merged' is a URI
443
+ merged = URI.parse(merged.to_s) unless merged.kind_of?(URI)
444
+ tag[attribute] = merged.to_s
445
+ end # end of each tag
446
+ end # end of each attrs
447
+
448
+ doc.search("*[@style]").each do |el|
449
+ el['style'] = CssParser.convert_uris(el.attributes['style'].to_s, base_uri)
450
+ end
451
+ doc
452
+ end
453
+
454
+
455
+ # @private
456
+ def self.escape_string(str) # :nodoc:
457
+ str.gsub(/"/ , "'")
458
+ end
459
+
460
+ # @private
461
+ def self.resolve_link(path, base_path) # :nodoc:
462
+ path.strip!
463
+ resolved = nil
464
+ if path =~ /(http[s]?|ftp):\/\//i
465
+ resolved = path
466
+ Premailer.canonicalize(resolved)
467
+ elsif base_path.kind_of?(URI)
468
+ resolved = base_path.merge(path)
469
+ Premailer.canonicalize(resolved)
470
+ elsif base_path.kind_of?(String) and base_path =~ /\A(http[s]?|ftp):\/\//i
471
+ resolved = URI.parse(base_path)
472
+ resolved = resolved.merge(path)
473
+ Premailer.canonicalize(resolved)
474
+ else
475
+ File.expand_path(path, File.dirname(base_path))
476
+ end
477
+ end
478
+
479
+ # Test the passed variable to see if we are in local or remote mode.
480
+ #
481
+ # IO objects return true, as do strings that look like URLs.
482
+ def self.local_data?(data)
483
+ return true if data.is_a?(IO) || data.is_a?(StringIO)
484
+ return false if data =~ /\A(http|https|ftp)\:\/\//i
485
+ true
486
+ end
487
+
488
+ # from http://www.ruby-forum.com/topic/140101
489
+ def self.canonicalize(uri) # :nodoc:
490
+ u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
491
+ u.normalize!
492
+ newpath = u.path
493
+ while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
494
+ $1 == '..' ? match : ''
495
+ } do end
496
+ newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
497
+ u.path = newpath
498
+ u.to_s
499
+ end
500
+
501
+ # Check <tt>CLIENT_SUPPORT_FILE</tt> for any CSS warnings
502
+ def check_client_support # :nodoc:
503
+ @client_support ||= YAML::load(File.open(CLIENT_SUPPORT_FILE))
504
+
505
+ warnings = []
506
+ properties = []
507
+
508
+ # Get a list off CSS properties
509
+ @processed_doc.search("*[@style]").each do |el|
510
+ style_url = el.attributes['style'].to_s.gsub(/([\w\-]+)[\s]*\:/i) do |s|
511
+ properties.push($1)
512
+ end
513
+ end
514
+
515
+ properties.uniq!
516
+
517
+ property_support = @client_support['css_properties']
518
+ properties.each do |prop|
519
+ if property_support.include?(prop) and
520
+ property_support[prop].include?('support') and
521
+ property_support[prop]['support'] >= @options[:warn_level]
522
+ warnings.push({:message => "#{prop} CSS property",
523
+ :level => WARN_LABEL[property_support[prop]['support']],
524
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
525
+ end
526
+ end
527
+
528
+ @client_support['attributes'].each do |attribute, data|
529
+ next unless data['support'] >= @options[:warn_level]
530
+ if @doc.search("*[@#{attribute}]").length > 0
531
+ warnings.push({:message => "#{attribute} HTML attribute",
532
+ :level => WARN_LABEL[data['support']],
533
+ :clients => data['unsupported_in'].join(', ')})
534
+ end
535
+ end
536
+
537
+ @client_support['elements'].each do |element, data|
538
+ next unless data['support'] >= @options[:warn_level]
539
+ if @doc.search(element).length > 0
540
+ warnings.push({:message => "#{element} HTML element",
541
+ :level => WARN_LABEL[data['support']],
542
+ :clients => data['unsupported_in'].join(', ')})
543
+ end
544
+ end
545
+
546
+ return warnings
547
+ end
548
+ end
549
+