regru-premailer 1.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,221 @@
1
+ require 'nokogiri'
2
+
3
+ class Premailer
4
+ module Adapter
5
+ # Nokogiri adapter
6
+ module Nokogiri
7
+
8
+ # Merge CSS into the HTML document.
9
+ #
10
+ # @return [String] an HTML.
11
+ def to_inline_css
12
+ doc = @processed_doc
13
+ @unmergable_rules = CssParser::Parser.new
14
+
15
+ # Give all styles already in style attributes a specificity of 1000
16
+ # per http://www.w3.org/TR/CSS21/cascade.html#specificity
17
+ doc.search("*[@style]").each do |el|
18
+ el['style'] = '[SPEC=1000[' + el.attributes['style'] + ']]'
19
+ end
20
+
21
+ # Iterate through the rules and merge them into the HTML
22
+ @css_parser.each_selector(:all) do |selector, declaration, specificity|
23
+ # Save un-mergable rules separately
24
+ selector.gsub!(/:link([\s]*)+/i) {|m| $1 }
25
+
26
+ # Convert element names to lower case
27
+ selector.gsub!(/([\s]|^)([\w]+)/) {|m| $1.to_s + $2.to_s.downcase }
28
+
29
+ if selector =~ Premailer::RE_UNMERGABLE_SELECTORS
30
+ @unmergable_rules.add_rule_set!(CssParser::RuleSet.new(selector, declaration)) unless @options[:preserve_styles]
31
+ else
32
+ begin
33
+ # Change single ID CSS selectors into xpath so that we can match more
34
+ # than one element. Added to work around dodgy generated code.
35
+ selector.gsub!(/\A\#([\w_\-]+)\Z/, '*[@id=\1]')
36
+
37
+ doc.search(selector).each do |el|
38
+ if el.elem? and (el.name != 'head' and el.parent.name != 'head')
39
+ # Add a style attribute or append to the existing one
40
+ block = "[SPEC=#{specificity}[#{declaration}]]"
41
+ el['style'] = (el.attributes['style'].to_s ||= '') + ' ' + block
42
+ end
43
+ end
44
+ rescue ::Nokogiri::SyntaxError, RuntimeError, ArgumentError
45
+ $stderr.puts "CSS syntax error with selector: #{selector}" if @options[:verbose]
46
+ next
47
+ end
48
+ end
49
+ end
50
+
51
+ # Remove script tags
52
+ if @options[:remove_scripts]
53
+ doc.search("script").remove
54
+ end
55
+
56
+ # Read STYLE attributes and perform folding
57
+ doc.search("*[@style]").each do |el|
58
+ style = el.attributes['style'].to_s
59
+
60
+ declarations = []
61
+ style.scan(/\[SPEC\=([\d]+)\[(.[^\]\]]*)\]\]/).each do |declaration|
62
+ rs = CssParser::RuleSet.new(nil, declaration[1].to_s, declaration[0].to_i)
63
+ declarations << rs
64
+ end
65
+
66
+ # Perform style folding
67
+ merged = CssParser.merge(declarations)
68
+ merged.expand_shorthand!
69
+
70
+ # Duplicate CSS attributes as HTML attributes
71
+ if Premailer::RELATED_ATTRIBUTES.has_key?(el.name)
72
+ Premailer::RELATED_ATTRIBUTES[el.name].each do |css_att, html_att|
73
+ el[html_att] = merged[css_att].gsub(/url\('(.*)'\)/,'\1').gsub(/;$/, '').strip if el[html_att].nil? and not merged[css_att].empty?
74
+ end
75
+ end
76
+
77
+ # write the inline STYLE attribute
78
+ el['style'] = Premailer.escape_string(merged.declarations_to_s).split(';').map(&:strip).sort.join('; ')
79
+ end
80
+
81
+ doc = write_unmergable_css_rules(doc, @unmergable_rules)
82
+
83
+ if @options[:remove_classes] or @options[:remove_comments]
84
+ doc.traverse do |el|
85
+ if el.comment? and @options[:remove_comments]
86
+ el.remove
87
+ elsif el.element?
88
+ el.remove_attribute('class') if @options[:remove_classes]
89
+ end
90
+ end
91
+ end
92
+
93
+ if @options[:remove_ids]
94
+ # find all anchor's targets and hash them
95
+ targets = []
96
+ doc.search("a[@href^='#']").each do |el|
97
+ target = el.get_attribute('href')[1..-1]
98
+ targets << target
99
+ el.set_attribute('href', "#" + Digest::MD5.hexdigest(target))
100
+ end
101
+ # hash ids that are links target, delete others
102
+ doc.search("*[@id]").each do |el|
103
+ id = el.get_attribute('id')
104
+ if targets.include?(id)
105
+ el.set_attribute('id', Digest::MD5.hexdigest(id))
106
+ else
107
+ el.remove_attribute('id')
108
+ end
109
+ end
110
+ end
111
+
112
+ @processed_doc = doc
113
+ if is_xhtml?
114
+ # we don't want to encode carriage returns
115
+ @processed_doc.to_xhtml(:encoding => nil).gsub(/&\#xD;/i, "\r")
116
+ else
117
+ @processed_doc.to_html
118
+ end
119
+ end
120
+
121
+ # Create a <tt>style</tt> element with un-mergable rules (e.g. <tt>:hover</tt>)
122
+ # and write it into the <tt>body</tt>.
123
+ #
124
+ # <tt>doc</tt> is an Nokogiri document and <tt>unmergable_css_rules</tt> is a Css::RuleSet.
125
+ #
126
+ # @return [::Nokogiri::XML] a document.
127
+ def write_unmergable_css_rules(doc, unmergable_rules) # :nodoc:
128
+ styles = ''
129
+ unmergable_rules.each_selector(:all, :force_important => true) do |selector, declarations, specificity|
130
+ styles += "#{selector} { #{declarations} }\n"
131
+ end
132
+
133
+ unless styles.empty?
134
+ style_tag = "<style type=\"text/css\">\n#{styles}></style>"
135
+ if body = doc.search('body')
136
+ doc.at_css('body').children.before(::Nokogiri::XML.fragment(style_tag))
137
+ else
138
+ doc.inner_html = style_tag += doc.inner_html
139
+ end
140
+ end
141
+ doc
142
+ end
143
+
144
+
145
+ # Converts the HTML document to a format suitable for plain-text e-mail.
146
+ #
147
+ # If present, uses the <body> element as its base; otherwise uses the whole document.
148
+ #
149
+ # @return [String] a plain text.
150
+ def to_plain_text
151
+ html_src = ''
152
+ begin
153
+ html_src = @doc.at("body").inner_html
154
+ rescue; end
155
+
156
+ html_src = @doc.to_html unless html_src and not html_src.empty?
157
+ convert_to_text(html_src, @options[:line_length], @html_encoding)
158
+ end
159
+
160
+ # Gets the original HTML as a string.
161
+ # @return [String] HTML.
162
+ def to_s
163
+ if is_xhtml?
164
+ @doc.to_xhtml(:encoding => nil)
165
+ else
166
+ @doc.to_html(:encoding => nil)
167
+ end
168
+ end
169
+
170
+ # Load the HTML file and convert it into an Nokogiri document.
171
+ #
172
+ # @return [::Nokogiri::XML] a document.
173
+ def load_html(input) # :nodoc:
174
+ thing = nil
175
+
176
+ # TODO: duplicate options
177
+ if @options[:with_html_string] or @options[:inline] or input.respond_to?(:read)
178
+ thing = input
179
+ elsif @is_local_file
180
+ @base_dir = File.dirname(input)
181
+ thing = File.open(input, 'r')
182
+ else
183
+ thing = open(input)
184
+ end
185
+
186
+ if thing.respond_to?(:read)
187
+ thing = thing.read
188
+ end
189
+
190
+ return nil unless thing
191
+ doc = nil
192
+
193
+ # Handle HTML entities
194
+ if @options[:replace_html_entities] == true and thing.is_a?(String)
195
+ if RUBY_VERSION =~ /1.9/
196
+ html_entity_ruby_version = "1.9"
197
+ elsif RUBY_VERSION =~ /1.8/
198
+ html_entity_ruby_version = "1.8"
199
+ end
200
+ if html_entity_ruby_version
201
+ HTML_ENTITIES[html_entity_ruby_version].map do |entity, replacement|
202
+ thing.gsub! entity, replacement
203
+ end
204
+ end
205
+ end
206
+ # Default encoding is ASCII-8BIT (binary) per http://groups.google.com/group/nokogiri-talk/msg/0b81ef0dc180dc74
207
+ # However, we really don't want to hardcode this. ASCII-8BIG should be the default, but not the only option.
208
+ if thing.is_a?(String) and RUBY_VERSION =~ /1.9/
209
+ thing = thing.force_encoding(@options[:input_encoding]).encode!
210
+ doc = ::Nokogiri::HTML(thing, nil, @options[:input_encoding]) {|c| c.recover }
211
+ else
212
+ default_encoding = RUBY_PLATFORM == 'java' ? nil : 'BINARY'
213
+ doc = ::Nokogiri::HTML(thing, nil, @options[:input_encoding] || default_encoding) {|c| c.recover }
214
+ end
215
+
216
+ return doc
217
+ end
218
+
219
+ end
220
+ end
221
+ end
@@ -0,0 +1,100 @@
1
+ require 'optparse'
2
+ require 'premailer'
3
+
4
+ # defaults
5
+ options = {
6
+ :base_url => nil,
7
+ :link_query_string => nil,
8
+ :remove_classes => false,
9
+ :verbose => false,
10
+ :line_length => 65
11
+ }
12
+
13
+ mode = :html
14
+
15
+ opts = OptionParser.new do |opts|
16
+ opts.banner = "Improve the rendering of HTML emails by making CSS inline among other things. Takes a path to a local file, a URL or a pipe as input.\n\n"
17
+ opts.define_head "Usage: premailer <optional uri|optional path> [options]"
18
+ opts.separator ""
19
+ opts.separator "Examples:"
20
+ opts.separator " premailer http://example.com/ > out.html"
21
+ opts.separator " premailer http://example.com/ --mode txt > out.txt"
22
+ opts.separator " cat input.html | premailer -q src=email > out.html"
23
+ opts.separator " premailer ./public/index.html"
24
+ opts.separator ""
25
+ opts.separator "Options:"
26
+
27
+ opts.on("--mode MODE", [:html, :txt], "Output: html or txt") do |v|
28
+ mode = v
29
+ end
30
+
31
+ opts.on("-b", "--base-url STRING", String, "Base URL, useful for local files") do |v|
32
+ options[:base_url] = v
33
+ end
34
+
35
+ opts.on("-q", "--query-string STRING", String, "Query string to append to links") do |v|
36
+ options[:link_query_string] = v
37
+ end
38
+
39
+ opts.on("--css FILE,FILE", Array, "Additional CSS stylesheets") do |v|
40
+ options[:css] = v
41
+ end
42
+
43
+ opts.on("-r", "--remove-classes", "Remove HTML classes") do |v|
44
+ options[:remove_classes] = v
45
+ end
46
+
47
+ opts.on("-j", "--remove-scripts", "Remove <script> elements") do |v|
48
+ options[:remove_classes] = v
49
+ end
50
+
51
+ opts.on("-l", "--line-length N", Integer, "Line length for plaintext (default: #{options[:line_length].to_s})") do |v|
52
+ options[:line_length] = v
53
+ end
54
+
55
+ opts.on("-d", "--io-exceptions", "Abort on I/O errors") do |v|
56
+ options[:io_exceptions] = v
57
+ end
58
+
59
+ opts.on("-v", "--verbose", "Print additional information at runtime") do |v|
60
+ options[:verbose] = v
61
+ end
62
+
63
+ opts.on_tail("-?", "--help", "Show this message") do
64
+ puts opts
65
+ exit
66
+ end
67
+
68
+ opts.on_tail("-V", "--version", "Show version") do
69
+ puts "Premailer #{Premailer::VERSION} (c) 2008-2010 Alex Dunae"
70
+ exit
71
+ end
72
+ end
73
+ opts.parse!
74
+
75
+ $stderr.puts "Processing in #{mode} mode with options #{options.inspect}" if options[:verbose]
76
+
77
+ premailer = nil
78
+ input = nil
79
+
80
+ if $stdin.tty? or STDIN.fcntl(Fcntl::F_GETFL, 0) == 0
81
+ input = ARGV.shift
82
+ else
83
+ input = $stdin
84
+ options[:with_html_string] = true
85
+ end
86
+
87
+ if input
88
+ premailer = Premailer.new(input, options)
89
+ else
90
+ puts opts
91
+ exit 1
92
+ end
93
+
94
+ if mode == :txt
95
+ print premailer.to_plain_text
96
+ else
97
+ print premailer.to_inline_css
98
+ end
99
+
100
+ exit
@@ -0,0 +1,105 @@
1
+ # coding: utf-8
2
+ require 'htmlentities'
3
+
4
+ # Support functions for Premailer
5
+ module HtmlToPlainText
6
+
7
+ # Returns the text in UTF-8 format with all HTML tags removed
8
+ #
9
+ # TODO: add support for DL, OL
10
+ def convert_to_text(html, line_length = 65, from_charset = 'UTF-8')
11
+ txt = html
12
+
13
+ # decode HTML entities
14
+ he = HTMLEntities.new
15
+ txt = he.decode(txt)
16
+
17
+ # replace image by their alt attribute
18
+ txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\/>/i, '\1')
19
+
20
+ # replace image by their alt attribute
21
+ txt.gsub!(/<img.+?alt=\"([^\"]*)\"[^>]*\/>/i, '\1')
22
+ txt.gsub!(/<img.+?alt='([^\']*)\'[^>]*\/>/i, '\1')
23
+
24
+ # links
25
+ txt.gsub!(/<a.+?href=\"(mailto:)?([^\"]*)\"[^>]*>((.|\s)+?)<\/a>/i) do |s|
26
+ $3.strip + ' ( ' + $2.strip + ' )'
27
+ end
28
+
29
+ txt.gsub!(/<a.+?href='(mailto:)?([^\']*)\'[^>]*>((.|\s)+?)<\/a>/i) do |s|
30
+ $3.strip + ' ( ' + $2.strip + ' )'
31
+ end
32
+
33
+
34
+ # handle headings (H1-H6)
35
+ txt.gsub!(/(<\/h[1-6]>)/i, "\n\\1") # move closing tags to new lines
36
+ txt.gsub!(/[\s]*<h([1-6]+)[^>]*>[\s]*(.*)[\s]*<\/h[1-6]+>/i) do |s|
37
+ hlevel = $1.to_i
38
+
39
+ htext = $2
40
+ htext.gsub!(/<br[\s]*\/?>/i, "\n") # handle <br>s
41
+ htext.gsub!(/<\/?[^>]*>/i, '') # strip tags
42
+
43
+ # determine maximum line length
44
+ hlength = 0
45
+ htext.each_line { |l| llength = l.strip.length; hlength = llength if llength > hlength }
46
+ hlength = line_length if hlength > line_length
47
+
48
+ case hlevel
49
+ when 1 # H1, asterisks above and below
50
+ htext = ('*' * hlength) + "\n" + htext + "\n" + ('*' * hlength)
51
+ when 2 # H1, dashes above and below
52
+ htext = ('-' * hlength) + "\n" + htext + "\n" + ('-' * hlength)
53
+ else # H3-H6, dashes below
54
+ htext = htext + "\n" + ('-' * hlength)
55
+ end
56
+
57
+ "\n\n" + htext + "\n\n"
58
+ end
59
+
60
+ # wrap spans
61
+ txt.gsub!(/(<\/span>)[\s]+(<span)/mi, '\1 \2')
62
+
63
+ # lists -- TODO: should handle ordered lists
64
+ txt.gsub!(/[\s]*(<li[^>]*>)[\s]*/i, '* ')
65
+ # list not followed by a newline
66
+ txt.gsub!(/<\/li>[\s]*(?![\n])/i, "\n")
67
+
68
+ # paragraphs and line breaks
69
+ txt.gsub!(/<\/p>/i, "\n\n")
70
+ txt.gsub!(/<br[\/ ]*>/i, "\n")
71
+
72
+ # strip remaining tags
73
+ txt.gsub!(/<\/?[^>]*>/, '')
74
+
75
+ txt = word_wrap(txt, line_length)
76
+
77
+ # remove linefeeds (\r\n and \r -> \n)
78
+ txt.gsub!(/\r\n?/, "\n")
79
+
80
+ # strip extra spaces
81
+ txt.gsub!(/\302\240+/, " ") # non-breaking spaces -> spaces
82
+ txt.gsub!(/\n[ \t]+/, "\n") # space at start of lines
83
+ txt.gsub!(/[ \t]+\n/, "\n") # space at end of lines
84
+
85
+ # no more than two consecutive newlines
86
+ txt.gsub!(/[\n]{3,}/, "\n\n")
87
+
88
+ # no more than two consecutive spaces
89
+ txt.gsub!(/ {2,}/, " ")
90
+
91
+ # the word messes up the parens
92
+ txt.gsub!(/\([ \n](http[^)]+)[\n ]\)/) do |s|
93
+ "( " + $1 + " )"
94
+ end
95
+
96
+ txt.strip
97
+ end
98
+
99
+ # Taken from Rails' word_wrap helper (http://api.rubyonrails.org/classes/ActionView/Helpers/TextHelper.html#method-i-word_wrap)
100
+ def word_wrap(txt, line_length)
101
+ txt.split("\n").collect do |line|
102
+ line.length > line_length ? line.gsub(/(.{1,#{line_length}})(\s+|$)/, "\\1\n").strip : line
103
+ end * "\n"
104
+ end
105
+ end
@@ -0,0 +1,549 @@
1
+ # Premailer processes HTML and CSS to improve e-mail deliverability.
2
+ #
3
+ # Premailer's main function is to render all CSS as inline <tt>style</tt>
4
+ # attributes. It also converts relative links to absolute links and checks
5
+ # the 'safety' of CSS properties against a CSS support chart.
6
+ #
7
+ # ## Example of use
8
+ #
9
+ # ```ruby
10
+ # premailer = Premailer.new('http://example.com/myfile.html', :warn_level => Premailer::Warnings::SAFE)
11
+ #
12
+ # # Write the HTML output
13
+ # fout = File.open("output.html", "w")
14
+ # fout.puts premailer.to_inline_css
15
+ # fout.close
16
+ #
17
+ # # Write the plain-text output
18
+ # fout = File.open("ouput.txt", "w")
19
+ # fout.puts premailer.to_plain_text
20
+ # fout.close
21
+ #
22
+ # # List any CSS warnings
23
+ # puts premailer.warnings.length.to_s + ' warnings found'
24
+ # premailer.warnings.each do |w|
25
+ # puts "#{w[:message]} (#{w[:level]}) may not render properly in #{w[:clients]}"
26
+ # end
27
+ #
28
+ # premailer = Premailer.new(html_file, :warn_level => Premailer::Warnings::SAFE)
29
+ # puts premailer.to_inline_css
30
+ # ```
31
+ #
32
+ class Premailer
33
+ include HtmlToPlainText
34
+ include CssParser
35
+
36
+ # Premailer version.
37
+ VERSION = '1.7.4'
38
+
39
+ CLIENT_SUPPORT_FILE = File.dirname(__FILE__) + '/../../misc/client_support.yaml'
40
+
41
+ # Unmergable selectors regexp.
42
+ RE_UNMERGABLE_SELECTORS = /(\:(visited|active|hover|focus|after|before|selection|target|first\-(line|letter))|^\@)/i
43
+ # Reset selectors regexp.
44
+ RE_RESET_SELECTORS = /^(\:\#outlook|body.*|\.ReadMsgBody|\.ExternalClass|img|\#backgroundTable)$/
45
+
46
+ # list of HTMLEntities to fix
47
+ # source: http://stackoverflow.com/questions/2812781/how-to-convert-webpage-apostrophe-8217-to-ascii-39-in-ruby-1-
48
+ HTML_ENTITIES = {
49
+ "1.8" => {
50
+ "\342\200\231" => "'",
51
+ "\342\200\246" => "...",
52
+ "\342\200\176" => "'",
53
+ "\342\200\177" => "'",
54
+ "\342\200\230" => "'",
55
+ "\342\200\231" => "'",
56
+ "\342\200\232" => ',',
57
+ "\342\200\233" => "'",
58
+ "\342\200\234" => '"',
59
+ "\342\200\235" => '"',
60
+ "\342\200\041" => '-',
61
+ "\342\200\174" => '-',
62
+ "\342\200\220" => '-',
63
+ "\342\200\223" => '-',
64
+ "\342\200\224" => '--',
65
+ "\342\200\225" => '--',
66
+ "\342\200\042" => '--'
67
+ },
68
+ "1.9" => {
69
+ "&#8217;" => "'",
70
+ "&#8230;" => "...",
71
+ "&#8216;" => "'",
72
+ "&#8218;" => ',',
73
+ "&#8219;" => "'",
74
+ "&#8220;" => '"',
75
+ "&#8221;" => '"',
76
+ "&#8208;" => '-',
77
+ "&#8211;" => '-',
78
+ "&#8212;" => '--',
79
+ "&#8213;" => '--'
80
+ }
81
+ }
82
+
83
+ # list of CSS attributes that can be rendered as HTML attributes
84
+ #
85
+ # @todo too much repetition
86
+ # @todo background=""
87
+ RELATED_ATTRIBUTES = {
88
+ 'h1' => {'text-align' => 'align'},
89
+ 'h2' => {'text-align' => 'align'},
90
+ 'h3' => {'text-align' => 'align'},
91
+ 'h4' => {'text-align' => 'align'},
92
+ 'h5' => {'text-align' => 'align'},
93
+ 'h6' => {'text-align' => 'align'},
94
+ 'p' => {'text-align' => 'align'},
95
+ 'div' => {'text-align' => 'align'},
96
+ 'blockquote' => {'text-align' => 'align'},
97
+ 'body' => {'background-color' => 'bgcolor'},
98
+ 'table' => {
99
+ 'background-color' => 'bgcolor',
100
+ 'background-image' => 'background',
101
+ '-premailer-width' => 'width',
102
+ '-premailer-height' => 'height',
103
+ '-premailer-cellpadding' => 'cellpadding',
104
+ '-premailer-cellspacing' => 'cellspacing',
105
+ },
106
+ 'tr' => {
107
+ 'text-align' => 'align',
108
+ 'background-color' => 'bgcolor',
109
+ '-premailer-height' => 'height'
110
+ },
111
+ 'th' => {
112
+ 'text-align' => 'align',
113
+ 'background-color' => 'bgcolor',
114
+ 'vertical-align' => 'valign',
115
+ '-premailer-width' => 'width',
116
+ '-premailer-height' => 'height'
117
+ },
118
+ 'td' => {
119
+ 'text-align' => 'align',
120
+ 'background-color' => 'bgcolor',
121
+ 'vertical-align' => 'valign',
122
+ '-premailer-width' => 'width',
123
+ '-premailer-height' => 'height'
124
+ },
125
+ 'img' => {'float' => 'align'}
126
+ }
127
+
128
+ # URI of the HTML file used
129
+ attr_reader :html_file
130
+
131
+ # base URL used to resolve links
132
+ attr_reader :base_url
133
+
134
+ # base directory used to resolve links for local files
135
+ # @return [String] base directory
136
+ attr_reader :base_dir
137
+
138
+ # unmergeable CSS rules to be preserved in the head (CssParser)
139
+ attr_reader :unmergable_rules
140
+
141
+ # processed HTML document (Hpricot/Nokogiri)
142
+ attr_reader :processed_doc
143
+
144
+ # source HTML document (Hpricot/Nokogiri)
145
+ attr_reader :doc
146
+
147
+ # Warning levels
148
+ module Warnings
149
+ # No warnings
150
+ NONE = 0
151
+ # Safe
152
+ SAFE = 1
153
+ # Poor
154
+ POOR = 2
155
+ # Risky
156
+ RISKY = 3
157
+ end
158
+ include Warnings
159
+
160
+ # Waning level names
161
+ WARN_LABEL = %w(NONE SAFE POOR RISKY)
162
+
163
+ # Create a new Premailer object.
164
+ #
165
+ # @param html is the HTML data to process. It can be either an IO object, the URL of a
166
+ # remote file, a local path or a raw HTML string. If passing an HTML string you
167
+ # must set the with_html_string option to true.
168
+ #
169
+ # @param [Hash] options the options to handle html with.
170
+ # @option options [FixNum] :line_length Line length used by to_plain_text. Default is 65.
171
+ # @option options [FixNum] :warn_level What level of CSS compatibility warnings to show (see {Premailer::Warnings}).
172
+ # @option options [String] :link_query_string A string to append to every <tt>a href=""</tt> link. Do not include the initial <tt>?</tt>.
173
+ # @option options [String] :base_url Used to calculate absolute URLs for local files.
174
+ # @option options [Array(String)] :css Manually specify CSS stylesheets.
175
+ # @option options [Boolean] :css_to_attributes Copy related CSS attributes into HTML attributes (e.g. background-color to bgcolor)
176
+ # @option options [String] :css_string Pass CSS as a string
177
+ # @option options [Boolean] :remove_ids Remove ID attributes whenever possible and convert IDs used as anchors to hashed to avoid collisions in webmail programs. Default is false.
178
+ # @option options [Boolean] :remove_classes Remove class attributes. Default is false.
179
+ # @option options [Boolean] :remove_comments Remove html comments. Default is false.
180
+ # @option options [Boolean] :remove_scripts Remove <tt>script</tt> elements. Default is true.
181
+ # @option options [Boolean] :preserve_styles Whether to preserve any <tt>link rel=stylesheet</tt> and <tt>style</tt> elements. Default is false.
182
+ # @option options [Boolean] :preserve_reset Whether to preserve styles associated with the MailChimp reset code.
183
+ # @option options [Boolean] :with_html_string Whether the html param should be treated as a raw string.
184
+ # @option options [Boolean] :verbose Whether to print errors and warnings to <tt>$stderr</tt>. Default is false.
185
+ # @option options [Boolean] :include_link_tags Whether to include css from <tt>link rel=stylesheet</tt> tags. Default is true.
186
+ # @option options [Boolean] :include_style_tags Whether to include css from <tt>style</tt> tags. Default is true.
187
+ # @option options [String] :input_encoding Manually specify the source documents encoding. This is a good idea.
188
+ # @option options [Boolean] :replace_html_entities Convert HTML entities to actual characters. Default is false.
189
+ # @option options [Symbol] :adapter Which HTML parser to use, either <tt>:nokogiri</tt> or <tt>:hpricot</tt>. Default is <tt>:hpricot</tt>.
190
+ def initialize(html, options = {})
191
+ @options = {:warn_level => Warnings::SAFE,
192
+ :line_length => 65,
193
+ :link_query_string => nil,
194
+ :base_url => nil,
195
+ :remove_classes => false,
196
+ :remove_ids => false,
197
+ :remove_comments => false,
198
+ :remove_scripts => true,
199
+ :css => [],
200
+ :css_to_attributes => true,
201
+ :with_html_string => false,
202
+ :css_string => nil,
203
+ :preserve_styles => false,
204
+ :preserve_reset => true,
205
+ :verbose => false,
206
+ :debug => false,
207
+ :io_exceptions => false,
208
+ :include_link_tags => true,
209
+ :include_style_tags => true,
210
+ :input_encoding => 'ASCII-8BIT',
211
+ :replace_html_entities => false,
212
+ :adapter => Adapter.use,
213
+ }.merge(options)
214
+
215
+ @html_file = html
216
+ @is_local_file = @options[:with_html_string] || Premailer.local_data?(html)
217
+
218
+ @css_files = [@options[:css]].flatten
219
+
220
+ @css_warnings = []
221
+
222
+ @base_url = nil
223
+ @base_dir = nil
224
+ @unmergable_rules = nil
225
+
226
+ if @options[:base_url]
227
+ @base_url = URI.parse(@options.delete(:base_url))
228
+ elsif not @is_local_file
229
+ @base_url = URI.parse(@html_file)
230
+ end
231
+
232
+ @css_parser = CssParser::Parser.new({
233
+ :absolute_paths => true,
234
+ :import => true,
235
+ :io_exceptions => @options[:io_exceptions]
236
+ })
237
+
238
+ @adapter_class = Adapter.find @options[:adapter]
239
+
240
+ self.class.send(:include, @adapter_class)
241
+
242
+ @doc = load_html(@html_file)
243
+
244
+ @processed_doc = @doc
245
+ @processed_doc = convert_inline_links(@processed_doc, @base_url) if @base_url
246
+ if options[:link_query_string]
247
+ @processed_doc = append_query_string(@processed_doc, options[:link_query_string])
248
+ end
249
+ load_css_from_options!
250
+ load_css_from_html!
251
+ end
252
+
253
+ # CSS warnings.
254
+ # @return [Array(Hash)] Array of warnings.
255
+ def warnings
256
+ return [] if @options[:warn_level] == Warnings::NONE
257
+ @css_warnings = check_client_support if @css_warnings.empty?
258
+ @css_warnings
259
+ end
260
+
261
+ protected
262
+ def load_css_from_local_file!(path)
263
+ css_block = ''
264
+ begin
265
+ File.open(path, "r") do |file|
266
+ while line = file.gets
267
+ css_block << line
268
+ end
269
+ end
270
+
271
+ load_css_from_string(css_block)
272
+ rescue; end
273
+ end
274
+
275
+ def load_css_from_string(css_string)
276
+ @css_parser.add_block!(css_string, {:base_uri => @base_url, :base_dir => @base_dir, :only_media_types => [:screen, :handheld]})
277
+ end
278
+
279
+ # @private
280
+ def load_css_from_options! # :nodoc:
281
+ load_css_from_string(@options[:css_string]) if @options[:css_string]
282
+
283
+ @css_files.each do |css_file|
284
+ if Premailer.local_data?(css_file)
285
+ load_css_from_local_file!(css_file)
286
+ else
287
+ @css_parser.load_uri!(css_file)
288
+ end
289
+ end
290
+ end
291
+
292
+ # Load CSS included in <tt>style</tt> and <tt>link</tt> tags from an HTML document.
293
+ def load_css_from_html! # :nodoc:
294
+ if tags = @doc.search("link[@rel='stylesheet'], style")
295
+ tags.each do |tag|
296
+ if tag.to_s.strip =~ /^\<link/i && tag.attributes['href'] && media_type_ok?(tag.attributes['media']) && @options[:include_link_tags]
297
+ # A user might want to <link /> to a local css file that is also mirrored on the site
298
+ # but the local one is different (e.g. newer) than the live file, premailer will now choose the local file
299
+
300
+ if tag.attributes['href'].to_s.include? @base_url.to_s and @html_file.kind_of?(String)
301
+ link_uri = File.join(File.dirname(@html_file), tag.attributes['href'].to_s.sub!(@base_url.to_s, ''))
302
+ end
303
+
304
+ # if the file does not exist locally, try to grab the remote reference
305
+ if link_uri.nil? or not File.exists?(link_uri)
306
+ link_uri = Premailer.resolve_link(tag.attributes['href'].to_s, @html_file)
307
+ end
308
+
309
+ if Premailer.local_data?(link_uri)
310
+ $stderr.puts "Loading css from local file: " + link_uri if @options[:verbose]
311
+ load_css_from_local_file!(link_uri)
312
+ else
313
+ $stderr.puts "Loading css from uri: " + link_uri if @options[:verbose]
314
+ @css_parser.load_uri!(link_uri, {:only_media_types => [:screen, :handheld]})
315
+ end
316
+
317
+ elsif tag.to_s.strip =~ /^\<style/i && @options[:include_style_tags]
318
+ @css_parser.add_block!(tag.inner_html, :base_uri => @base_url, :base_dir => @base_dir, :only_media_types => [:screen, :handheld])
319
+ end
320
+ end
321
+ tags.remove unless @options[:preserve_styles]
322
+ end
323
+ end
324
+
325
+
326
+
327
+ # here be deprecated methods
328
+ public
329
+ # @private
330
+ # @deprecated
331
+ def local_uri?(uri) # :nodoc:
332
+ warn "[DEPRECATION] `local_uri?` is deprecated. Please use `Premailer.local_data?` instead."
333
+ Premailer.local_data?(uri)
334
+ end
335
+
336
+ # here be instance methods
337
+
338
+ # @private
339
+ def media_type_ok?(media_types)
340
+ return true if media_types.nil? or media_types.empty?
341
+ media_types.split(/[\s]+|,/).any? { |media_type| media_type.strip =~ /screen|handheld|all/i }
342
+ rescue
343
+ true
344
+ end
345
+
346
+ def append_query_string(doc, qs)
347
+ return doc if qs.nil?
348
+
349
+ qs.to_s.gsub!(/^[\?]*/, '').strip!
350
+ return doc if qs.empty?
351
+
352
+ begin
353
+ current_host = @base_url.host
354
+ rescue
355
+ current_host = nil
356
+ end
357
+
358
+ $stderr.puts "Attempting to append_query_string: #{qs}" if @options[:verbose]
359
+
360
+ doc.search('a').each do|el|
361
+ href = el.attributes['href'].to_s.strip
362
+ next if href.nil? or href.empty?
363
+
364
+ next if href[0,1] =~ /[\#\{\[\<\%]/ # don't bother with anchors or special-looking links
365
+
366
+ begin
367
+ href = URI.parse(href)
368
+
369
+ if current_host and href.host != nil and href.host != current_host
370
+ $stderr.puts "Skipping append_query_string for: #{href.to_s} because host is no good" if @options[:verbose]
371
+ next
372
+ end
373
+
374
+ if href.scheme and href.scheme != 'http' and href.scheme != 'https'
375
+ puts "Skipping append_query_string for: #{href.to_s} because scheme is no good" if @options[:verbose]
376
+ next
377
+ end
378
+
379
+ if href.query and not href.query.empty?
380
+ href.query = href.query + '&amp;' + qs
381
+ else
382
+ href.query = qs
383
+ end
384
+
385
+ el['href'] = href.to_s
386
+ rescue URI::Error => e
387
+ $stderr.puts "Skipping append_query_string for: #{href.to_s} (#{e.message})" if @options[:verbose]
388
+ next
389
+ end
390
+
391
+ end
392
+ doc
393
+ end
394
+
395
+ # Check for an XHTML doctype
396
+ def is_xhtml?
397
+ intro = @doc.to_html.strip.split("\n")[0..2].join(' ')
398
+ is_xhtml = !!(intro =~ /w3c\/\/[\s]*dtd[\s]+xhtml/i)
399
+ $stderr.puts "Is XHTML? #{is_xhtml.inspect}\nChecked:\n#{intro}" if @options[:debug]
400
+ is_xhtml
401
+ end
402
+
403
+ # Convert relative links to absolute links.
404
+ #
405
+ # Processes <tt>href</tt> <tt>src</tt> and <tt>background</tt> attributes
406
+ # as well as CSS <tt>url()</tt> declarations found in inline <tt>style</tt> attributes.
407
+ #
408
+ # <tt>doc</tt> is an Hpricot document and <tt>base_uri</tt> is either a string or a URI.
409
+ #
410
+ # Returns an Hpricot document.
411
+ def convert_inline_links(doc, base_uri) # :nodoc:
412
+ base_uri = URI.parse(base_uri) unless base_uri.kind_of?(URI)
413
+
414
+ append_qs = @options[:link_query_string] || ''
415
+
416
+ ['href', 'src', 'background'].each do |attribute|
417
+ tags = doc.search("*[@#{attribute}]")
418
+
419
+ next if tags.empty?
420
+
421
+ tags.each do |tag|
422
+ # skip links that look like they have merge tags
423
+ # and mailto, ftp, etc...
424
+ if tag.attributes[attribute].to_s =~ /^([\%\<\{\#\[]|data:|tel:|file:|sms:|callto:|facetime:|mailto:|ftp:|gopher:|cid:)/i
425
+ next
426
+ end
427
+
428
+ if tag.attributes[attribute].to_s =~ /^http/i
429
+ begin
430
+ merged = URI.parse(tag.attributes[attribute])
431
+ rescue; next; end
432
+ else
433
+ begin
434
+ merged = Premailer.resolve_link(tag.attributes[attribute].to_s, base_uri)
435
+ rescue
436
+ begin
437
+ merged = Premailer.resolve_link(URI.escape(tag.attributes[attribute].to_s), base_uri)
438
+ rescue; end
439
+ end
440
+ end
441
+
442
+ # make sure 'merged' is a URI
443
+ merged = URI.parse(merged.to_s) unless merged.kind_of?(URI)
444
+ tag[attribute] = merged.to_s
445
+ end # end of each tag
446
+ end # end of each attrs
447
+
448
+ doc.search("*[@style]").each do |el|
449
+ el['style'] = CssParser.convert_uris(el.attributes['style'].to_s, base_uri)
450
+ end
451
+ doc
452
+ end
453
+
454
+
455
+ # @private
456
+ def self.escape_string(str) # :nodoc:
457
+ str.gsub(/"/ , "'")
458
+ end
459
+
460
+ # @private
461
+ def self.resolve_link(path, base_path) # :nodoc:
462
+ path.strip!
463
+ resolved = nil
464
+ if path =~ /(http[s]?|ftp):\/\//i
465
+ resolved = path
466
+ Premailer.canonicalize(resolved)
467
+ elsif base_path.kind_of?(URI)
468
+ resolved = base_path.merge(path)
469
+ Premailer.canonicalize(resolved)
470
+ elsif base_path.kind_of?(String) and base_path =~ /\A(http[s]?|ftp):\/\//i
471
+ resolved = URI.parse(base_path)
472
+ resolved = resolved.merge(path)
473
+ Premailer.canonicalize(resolved)
474
+ else
475
+ File.expand_path(path, File.dirname(base_path))
476
+ end
477
+ end
478
+
479
+ # Test the passed variable to see if we are in local or remote mode.
480
+ #
481
+ # IO objects return true, as do strings that look like URLs.
482
+ def self.local_data?(data)
483
+ return true if data.is_a?(IO) || data.is_a?(StringIO)
484
+ return false if data =~ /\A(http|https|ftp)\:\/\//i
485
+ true
486
+ end
487
+
488
+ # from http://www.ruby-forum.com/topic/140101
489
+ def self.canonicalize(uri) # :nodoc:
490
+ u = uri.kind_of?(URI) ? uri : URI.parse(uri.to_s)
491
+ u.normalize!
492
+ newpath = u.path
493
+ while newpath.gsub!(%r{([^/]+)/\.\./?}) { |match|
494
+ $1 == '..' ? match : ''
495
+ } do end
496
+ newpath = newpath.gsub(%r{/\./}, '/').sub(%r{/\.\z}, '/')
497
+ u.path = newpath
498
+ u.to_s
499
+ end
500
+
501
+ # Check <tt>CLIENT_SUPPORT_FILE</tt> for any CSS warnings
502
+ def check_client_support # :nodoc:
503
+ @client_support ||= YAML::load(File.open(CLIENT_SUPPORT_FILE))
504
+
505
+ warnings = []
506
+ properties = []
507
+
508
+ # Get a list off CSS properties
509
+ @processed_doc.search("*[@style]").each do |el|
510
+ style_url = el.attributes['style'].to_s.gsub(/([\w\-]+)[\s]*\:/i) do |s|
511
+ properties.push($1)
512
+ end
513
+ end
514
+
515
+ properties.uniq!
516
+
517
+ property_support = @client_support['css_properties']
518
+ properties.each do |prop|
519
+ if property_support.include?(prop) and
520
+ property_support[prop].include?('support') and
521
+ property_support[prop]['support'] >= @options[:warn_level]
522
+ warnings.push({:message => "#{prop} CSS property",
523
+ :level => WARN_LABEL[property_support[prop]['support']],
524
+ :clients => property_support[prop]['unsupported_in'].join(', ')})
525
+ end
526
+ end
527
+
528
+ @client_support['attributes'].each do |attribute, data|
529
+ next unless data['support'] >= @options[:warn_level]
530
+ if @doc.search("*[@#{attribute}]").length > 0
531
+ warnings.push({:message => "#{attribute} HTML attribute",
532
+ :level => WARN_LABEL[data['support']],
533
+ :clients => data['unsupported_in'].join(', ')})
534
+ end
535
+ end
536
+
537
+ @client_support['elements'].each do |element, data|
538
+ next unless data['support'] >= @options[:warn_level]
539
+ if @doc.search(element).length > 0
540
+ warnings.push({:message => "#{element} HTML element",
541
+ :level => WARN_LABEL[data['support']],
542
+ :clients => data['unsupported_in'].join(', ')})
543
+ end
544
+ end
545
+
546
+ return warnings
547
+ end
548
+ end
549
+