css_parser 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG ADDED
@@ -0,0 +1,33 @@
1
+ = Premailer CHANGELOG
2
+
3
+ == Version 0.9
4
+ * initial proof-of-concept
5
+ * PHP web version
6
+
7
+ == Version 1.0
8
+ * ported web interface to eRuby
9
+ * incremental parsing improvements
10
+
11
+ == Version 1.1
12
+ * proper calculation of selector specificity per CSS 2.1 spec
13
+ * support for <tt>@import</tt>
14
+ * preliminary support for shorthand CSS properties (<tt>margin</tt>, <tt>padding</tt>)
15
+ * preliminary separation of CSS parser
16
+
17
+ == Version 1.2
18
+ * respect <tt>LINK</tt> media types
19
+ * better style folding
20
+ * incremental parsing improvements
21
+
22
+ == Version 1.3
23
+ * separate CSS parser into its own library
24
+ * handle <tt>background: red url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR42mP4%2F58BAAT%2FAf9jgNErAAAAAElFTkSuQmCC);</tt>
25
+ * preserve <tt>:hover</tt> etc... in head styles
26
+
27
+ == TODO: Future
28
+ * respect <tt>@media</tt> rule (http://www.w3.org/TR/CSS21/media.html#at-media-rule)
29
+ * complete shorthand properties support (<tt>border-width</tt>, <tt>font</tt>, <tt>background</tt>)
30
+ * better quote escaping
31
+ * UTF-8 and other charsets (test page: http://kianga.kcore.de/2004/09/21/utf8_test)
32
+ * make warnings for <tt>border</tt> match <tt>border-left</tt>, etc...
33
+ * correctly parse http://www.webstandards.org/files/acid2/test.html
data/LICENSE ADDED
@@ -0,0 +1,42 @@
1
+ = CSS Parser License
2
+
3
+ Copyright (c) 2007 Alex Dunae
4
+
5
+ Premailer is copyrighted free software by Alex Dunae (http://dunae.ca/).
6
+ You can redistribute it and/or modify it under the conditions below:
7
+
8
+ 1. You may make and give away verbatim copies of the source form of the
9
+ software without restriction, provided that you duplicate all of the
10
+ original copyright notices and associated disclaimers.
11
+
12
+ 2. You may modify your copy of the software in any way, provided that
13
+ you do at least ONE of the following:
14
+
15
+ a) place your modifications in the Public Domain or otherwise
16
+ make them Freely Available, such as by posting said
17
+ modifications to the internet or an equivalent medium, or by
18
+ allowing the author to include your modifications in the software.
19
+
20
+ b) use the modified software only within your corporation or
21
+ organization.
22
+
23
+ c) rename any non-standard executables so the names do not conflict
24
+ with standard executables, which must also be provided.
25
+
26
+ d) make other distribution arrangements with the author.
27
+
28
+ 3. You may modify and include the part of the software into any other
29
+ software (possibly commercial) as long as clear acknowledgement and
30
+ a link back to the original software (http://code.dunae.ca/premailer.web/)
31
+ is provided.
32
+
33
+ 5. The scripts and library files supplied as input to or produced as
34
+ output from the software do not automatically fall under the
35
+ copyright of the software, but belong to whomever generated them,
36
+ and may be sold commercially, and may be aggregated with this
37
+ software.
38
+
39
+ 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
40
+ IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
41
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42
+ PURPOSE.
data/README ADDED
@@ -0,0 +1,60 @@
1
+ = Ruby CSS Parser
2
+
3
+ Load, parse and cascade CSS rule sets in Ruby.
4
+
5
+ === Setup
6
+
7
+ Install the gem from RubyGems.
8
+
9
+ gem install css_parser
10
+
11
+ Done.
12
+
13
+ === An example
14
+ require 'css_parser'
15
+ include CssParser
16
+
17
+ parser = CssParser::Parser.new
18
+ parser.load_file!('http://example.com/styles/style.css')
19
+
20
+ # lookup a rule by a selector
21
+ parser.find('#content')
22
+ #=> 'font-size: 13px; line-height: 1.2;'
23
+
24
+ # lookup a rule by a selector and media type
25
+ parser.find('#content', [:screen, :handheld])
26
+
27
+ # iterate through selectors by media type
28
+ parser.each_selector(:screen) do |selector, declarations, specificity|
29
+ ...
30
+ end
31
+
32
+ # add a block of CSS
33
+ css = <<-EOT
34
+ body { margin: 0 1em; }
35
+ EOT
36
+
37
+ parser.add_block!(css)
38
+
39
+ # output all CSS rules in a single stylesheet
40
+ parser.to_s
41
+ => #content { font-size: 13px; line-height: 1.2; }
42
+ body { margin: 0 1em; }
43
+
44
+ === Testing
45
+
46
+ You can run the suite of unit tests using <tt>rake test</tt>.
47
+
48
+ The download/import tests require that WEBrick is installed. The tests set up
49
+ a temporary server on port 12000 and pull down files from the <tt>test/fixtures/</tt>
50
+ directory.
51
+
52
+ === Credits and code
53
+
54
+ * Project page: http://code.dunae.ca/css_parser/
55
+ * Source: http://code.dunae.ca/svn/css_parser/
56
+ * Docs: http://code.dunae.ca/css_parser/doc/
57
+
58
+ By Alex Dunae (dunae.ca, e-mail 'code' at the same domain), 2007.
59
+
60
+ Made with love on Vancouver Island.
@@ -0,0 +1,345 @@
1
+ module CssParser
2
+ # Exception class used for any errors encountered while downloading remote files.
3
+ class RemoteFileError < IOError; end
4
+
5
+ # Exception class used if a request is made to load a CSS file more than once.
6
+ class CircularReferenceError < StandardError; end
7
+
8
+
9
+ # == Parser class
10
+ #
11
+ # All CSS is converted to UTF-8.
12
+ #
13
+ # When calling Parser#new there are some configuaration options:
14
+ # [<tt>absolute_paths</tt>] Convert relative paths to absolute paths (<tt>href</tt>, <tt>src</tt> and <tt>url('')</tt>. Boolean, default is <tt>false</tt>.
15
+ # [<tt>import</tt>] Follow <tt>@import</tt> rules. Boolean, default is <tt>true</tt>.
16
+ # [<tt>io_exceptions</tt>] Throw an exception if a link can not be found. Boolean, default is <tt>true</tt>.
17
+ class Parser
18
+ USER_AGENT = "Ruby CSS Parser/#{VERSION} (http://code.dunae.ca/css_parser/)"
19
+
20
+ STRIP_CSS_COMMENTS_RX = /\/\*.*?\*\//m
21
+ STRIP_HTML_COMMENTS_RX = /\<\!\-\-|\-\-\>/m
22
+
23
+ # Initial parsing
24
+ RE_AT_IMPORT_RULE = /\@import[\s]+(url\()?["']+(.[^'"]*)["']\)?([\w\s\,]*);?/i
25
+
26
+ #--
27
+ # RE_AT_IMPORT_RULE = Regexp.new('@import[\s]*(' + RE_STRING.to_s + ')([\w\s\,]*)[;]?', Regexp::IGNORECASE) -- should handle url() even though it is not allowed
28
+ #++
29
+
30
+ # Array of CSS files that have been loaded.
31
+ attr_reader :loaded_uris
32
+
33
+ #attr_reader :rules
34
+
35
+ #--
36
+ # Class variable? see http://www.oreillynet.com/ruby/blog/2007/01/nubygems_dont_use_class_variab_1.html
37
+ #++
38
+ @folded_declaration_cache = {}
39
+ class << self; attr_reader :folded_declaration_cache; end
40
+
41
+ def initialize(options = {})
42
+ @options = {:absolute_paths => false,
43
+ :import => true,
44
+ :io_exceptions => true}.merge(options)
45
+
46
+ # array of RuleSets
47
+ @rules = []
48
+
49
+
50
+ @loaded_uris = []
51
+
52
+ # unprocessed blocks of CSS
53
+ @blocks = []
54
+ reset!
55
+ end
56
+
57
+ # Get declarations by selector.
58
+ #
59
+ # +media_types+ are optional, and can be a symbol or an array of symbols.
60
+ # The default value is <tt>:all</tt>.
61
+ #
62
+ # ==== Examples
63
+ # find_by_selector('#content')
64
+ # => 'font-size: 13px; line-height: 1.2;'
65
+ #
66
+ # find_by_selector('#content', [:screen, :handheld])
67
+ # => 'font-size: 13px; line-height: 1.2;'
68
+ #
69
+ # find_by_selector('#content', :print)
70
+ # => 'font-size: 11pt; line-height: 1.2;'
71
+ #
72
+ # Returns an array of declarations.
73
+ def find_by_selector(selector, media_types = :all)
74
+ out = []
75
+ each_selector(media_types) do |sel, dec, spec|
76
+ out << dec if sel.strip == selector.strip
77
+ end
78
+ out
79
+ end
80
+ alias_method :[], :find_by_selector
81
+
82
+
83
+ # Add a raw block of CSS.
84
+ #
85
+ # ==== Example
86
+ # css = <<-EOT
87
+ # body { font-size: 10pt }
88
+ # p { margin: 0px; }
89
+ # @media screen, print {
90
+ # body { line-height: 1.2 }
91
+ # }
92
+ # EOT
93
+ #
94
+ # parser = CssParser::Parser.new
95
+ # parser.load_css!(css)
96
+ #--
97
+ # TODO: add media_type
98
+ #++
99
+ def add_block!(block, options = {})
100
+ options = {:base_uri => nil, :charset => nil, :media_types => :all}.merge(options)
101
+
102
+ block = cleanup_block(block)
103
+
104
+ if options[:base_uri] and @options[:absolute_paths]
105
+ block = CssParser.convert_uris(block, options[:base_uri])
106
+ end
107
+
108
+ parse_block_into_rule_sets!(block, options)
109
+
110
+ end
111
+
112
+ # Add a CSS rule by setting the +selectors+, +declarations+ and +media_types+.
113
+ #
114
+ # +media_types+ can be a symbol or an array of symbols.
115
+ def add_rule!(selectors, declarations, media_types = :all)
116
+ rule_set = RuleSet.new(selectors, declarations)
117
+ add_rule_set!(rule_set, media_types)
118
+ end
119
+
120
+ # Add a CssParser RuleSet object.
121
+ #
122
+ # +media_types+ can be a symbol or an array of symbols.
123
+ def add_rule_set!(ruleset, media_types = :all)
124
+ raise ArgumentError unless ruleset.kind_of?(CssParser::RuleSet)
125
+
126
+ media_types = [media_types] if media_types.kind_of?(Symbol)
127
+
128
+ @rules << {:media_types => media_types, :rules => ruleset}
129
+ end
130
+
131
+ # Iterate through RuleSet objects.
132
+ #
133
+ # +media_types+ can be a symbol or an array of symbols.
134
+ def each_rule_set(media_types = :all) # :yields: rule_set
135
+ media_types = [:all] if media_types.nil?
136
+ media_types = [media_types] if media_types.kind_of?(Symbol)
137
+
138
+ @rules.each do |block|
139
+ if media_types.include?(:all) or block[:media_types].any? { |mt| media_types.include?(mt) }
140
+ yield block[:rules]
141
+ end
142
+ end
143
+ end
144
+
145
+ # Iterate through CSS selectors.
146
+ #
147
+ # +media_types+ can be a symbol or an array of symbols.
148
+ # See RuleSet#each_selector for +options+.
149
+ def each_selector(media_types = :all, options = {}) # :yields: selectors, declarations, specificity
150
+ each_rule_set(media_types) do |rule_set|
151
+ #puts rule_set
152
+ rule_set.each_selector(options) do |selectors, declarations, specificity|
153
+ yield selectors, declarations, specificity
154
+ end
155
+ end
156
+ end
157
+
158
+ # Output all CSS rules as a single stylesheet.
159
+ def to_s(media_types = :all)
160
+ out = ''
161
+ each_selector(media_types) do |selectors, declarations, specificity|
162
+ out << "#{selectors} {\n#{declarations}\n}\n"
163
+ end
164
+ out
165
+ end
166
+
167
+ # Merge declarations with the same selector.
168
+ def compact! # :nodoc:
169
+ compacted = []
170
+
171
+ compacted
172
+ end
173
+
174
+ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
175
+ options = {:media_types => :all}.merge(options)
176
+ media_types = options[:media_types]
177
+
178
+ in_declarations = false
179
+
180
+ block_depth = 0
181
+
182
+ # @charset is ignored for now
183
+ in_charset = false
184
+ in_string = false
185
+ in_at_media_rule = false
186
+
187
+ current_selectors = ''
188
+ current_declarations = ''
189
+
190
+ block.scan(/([\\]?[{}\s"]|(.[^\s"{}\\]*))/).each do |matches|
191
+ #block.scan(/((.[^{}"\n\r\f\s]*)[\s]|(.[^{}"\n\r\f]*)\{|(.[^{}"\n\r\f]*)\}|(.[^{}"\n\r\f]*)\"|(.*)[\s]+)/).each do |matches|
192
+ token = matches[0]
193
+ #puts "TOKEN: #{token}" unless token =~ /^[\s]*$/
194
+ if token =~ /\A"/ # found un-escaped double quote
195
+ in_string = !in_string
196
+ end
197
+
198
+ if in_declarations
199
+ current_declarations += token
200
+
201
+ if token =~ /\}/ and not in_string
202
+ current_declarations.gsub!(/\}[\s]*$/, '')
203
+
204
+ in_declarations = false
205
+
206
+ unless current_declarations.strip.empty?
207
+ #puts "SAVING #{current_selectors} -> #{current_declarations}"
208
+ add_rule!(current_selectors, current_declarations, media_types)
209
+ end
210
+
211
+ current_selectors = ''
212
+ current_declarations = ''
213
+ end
214
+ elsif token =~ /@media/i
215
+ # found '@media', reset current media_types
216
+ in_at_media_rule = true
217
+ media_types = []
218
+ elsif in_at_media_rule
219
+ if token =~ /\{/
220
+ block_depth = block_depth + 1
221
+ in_at_media_rule = false
222
+ else
223
+ token.gsub!(/[,\s]*/, '')
224
+ media_types << token.strip.downcase.to_sym unless token.empty?
225
+ end
226
+ elsif in_charset or token =~ /@charset/i
227
+ # iterate until we are out of the charset declaration
228
+ in_charset = (token =~ /;/ ? false : true)
229
+ else
230
+ if token =~ /\}/ and not in_string
231
+ block_depth = block_depth - 1
232
+ else
233
+ if token =~ /\{/ and not in_string
234
+ current_selectors.gsub!(/^[\s]*/, '')
235
+ current_selectors.gsub!(/[\s]*$/, '')
236
+ in_declarations = true
237
+ else
238
+ current_selectors += token
239
+ end
240
+ end
241
+ end
242
+ end
243
+ end
244
+
245
+ # Load a remote CSS file.
246
+ def load_uri!(uri, base_uri = nil, media_types = :all)
247
+ base_uri = uri if base_uri.nil?
248
+ src, charset = read_remote_file(uri)
249
+
250
+ # Load @imported CSS
251
+ src.scan(RE_AT_IMPORT_RULE).each do |import_rule|
252
+ import_path = import_rule[1].to_s.gsub(/['"]*/, '').strip
253
+ import_uri = URI.parse(base_uri.to_s).merge(import_path)
254
+ #puts import_uri.to_s
255
+
256
+ media_types = []
257
+ if media_string = import_rule[import_rule.length-1]
258
+ media_string.split(/\s|\,/).each do |t|
259
+ media_types << t.to_sym unless t.empty?
260
+ end
261
+ end
262
+
263
+ # Recurse
264
+ load_uri!(import_uri, nil, media_types)
265
+ end
266
+
267
+ # Remove @import declarations
268
+ src.gsub!(RE_AT_IMPORT_RULE, '')
269
+
270
+ # Relative paths need to be converted here
271
+ src = CssParser.convert_uris(src, base_uri) if base_uri and @options[:absolute_paths]
272
+
273
+ add_block!(src, {:media_types => media_types})
274
+ end
275
+
276
+ protected
277
+ # Strip comments and clean up blank lines from a block of CSS.
278
+ #
279
+ # Returns a string.
280
+ def cleanup_block(block) # :nodoc:
281
+ # Strip CSS comments
282
+ block.gsub!(STRIP_CSS_COMMENTS_RX, '')
283
+
284
+ # Strip HTML comments - they shouldn't really be in here but
285
+ # some people are just crazy...
286
+ block.gsub!(STRIP_HTML_COMMENTS_RX, '')
287
+
288
+ # Strip lines containing just whitespace
289
+ block.gsub!(/^\s+$/, "")
290
+
291
+ block
292
+ end
293
+
294
+ # Download a file into a string.
295
+ #
296
+ # Returns the file's data and character set in an array.
297
+ #--
298
+ # TODO: add option to fail silently or throw and exception on a 404
299
+ #++
300
+ def read_remote_file(uri) # :nodoc:
301
+ raise CircularReferenceError, "can't load #{uri.to_s} more than once" if @loaded_uris.include?(uri.to_s)
302
+ @loaded_uris << uri.to_s
303
+
304
+ begin
305
+ #fh = open(uri, 'rb')
306
+ fh = open(uri, 'rb', 'User-Agent' => USER_AGENT, 'Accept-Encoding' => 'gzip')
307
+
308
+ if fh.content_encoding.include?('gzip')
309
+ remote_src = Zlib::GzipReader.new(fh).read
310
+ else
311
+ remote_src = fh.read
312
+ end
313
+
314
+ #puts "reading #{uri} (#{fh.charset})"
315
+
316
+ ic = Iconv.new('UTF-8//IGNORE', fh.charset)
317
+ src = ic.iconv(remote_src)
318
+
319
+ fh.close
320
+ return src, fh.charset
321
+ rescue
322
+ raise RemoteFileError if @options[:io_exceptions]
323
+ return '', nil
324
+ end
325
+ end
326
+
327
+ private
328
+ # Save a folded declaration block to the internal cache.
329
+ def save_folded_declaration(block_hash, folded_declaration) # :nodoc:
330
+ @folded_declaration_cache[block_hash] = folded_declaration
331
+ end
332
+
333
+ # Retrieve a folded declaration block from the internal cache.
334
+ def get_folded_declaration(block_hash) # :nodoc:
335
+ return @folded_declaration_cache[block_hash] ||= nil
336
+ end
337
+
338
+ def reset! # :nodoc:
339
+ @folded_declaration_cache = {}
340
+ @css_source = ''
341
+ @css_rules = []
342
+ @css_warnings = []
343
+ end
344
+ end
345
+ end
@@ -0,0 +1,46 @@
1
+ module CssParser
2
+ # :stopdoc:
3
+ # Base types
4
+ RE_NL = Regexp.new('(\n|\r\n|\r|\f)')
5
+ RE_NON_ASCII = Regexp.new('([\x00-\xFF])', Regexp::IGNORECASE) #[^\0-\177]
6
+ RE_UNICODE = Regexp.new('(\\\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])*)', Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE)
7
+ RE_ESCAPE = Regexp.union(RE_UNICODE, '|(\\\\[^\n\r\f0-9a-f])')
8
+ RE_IDENT = Regexp.new("[\-]?([_a-z]|#{RE_NON_ASCII}|#{RE_ESCAPE})([_a-z0-9\-]|#{RE_NON_ASCII}|#{RE_ESCAPE})*", Regexp::IGNORECASE)
9
+
10
+ # General strings
11
+ RE_STRING1 = Regexp.new('(\"(.[^\n\r\f\\"]*|\\\\' + RE_NL.to_s + '|' + RE_ESCAPE.to_s + ')*\")')
12
+ RE_STRING2 = Regexp.new('(\'(.[^\n\r\f\\\']*|\\\\' + RE_NL.to_s + '|' + RE_ESCAPE.to_s + ')*\')')
13
+ RE_STRING = Regexp.union(RE_STRING1, RE_STRING2)
14
+
15
+ RE_URI = Regexp.new('(url\([\s]*([\s]*' + RE_STRING.to_s + '[\s]*)[\s]*\))|(url\([\s]*([!#$%&*\-~]|' + RE_NON_ASCII.to_s + '|' + RE_ESCAPE.to_s + ')*[\s]*)\)', Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE)
16
+ URI_RX = /url\(("([^"]*)"|'([^']*)'|([^)]*))\)/im
17
+
18
+ # Initial parsing
19
+ RE_AT_IMPORT_RULE = /\@import[\s]+(url\()?["']+(.[^'"]*)["']\)?([\w\s\,]*);?/i
20
+
21
+ #--
22
+ #RE_AT_MEDIA_RULE = Regexp.new('(\"(.[^\n\r\f\\"]*|\\\\' + RE_NL.to_s + '|' + RE_ESCAPE.to_s + ')*\")')
23
+
24
+ #RE_AT_IMPORT_RULE = Regexp.new('@import[\s]*(' + RE_STRING.to_s + ')([\w\s\,]*)[;]?', Regexp::IGNORECASE) -- should handle url() even though it is not allowed
25
+ #++
26
+ IMPORTANT_IN_PROPERTY_RX = /[\s]*\!important[\s]*/i
27
+ STRIP_CSS_COMMENTS_RX = /\/\*.*?\*\//m
28
+ STRIP_HTML_COMMENTS_RX = /\<\!\-\-|\-\-\>/m
29
+
30
+ # Special units
31
+ BOX_MODEL_UNITS_RX = /(auto|inherit|0|([\-]*([0-9]+|[0-9]*\.[0-9]+)(e[mx]+|px|[cm]+m|p[tc+]|in|\%)))([\s;]|\Z)/imx
32
+ RE_LENGTH_OR_PERCENTAGE = Regexp.new('([\-]*(([0-9]*\.[0-9]+)|[0-9]+)(e[mx]+|px|[cm]+m|p[tc+]|in|\%))', Regexp::IGNORECASE)
33
+ RE_BACKGROUND_POSITION = Regexp.new("((#{RE_LENGTH_OR_PERCENTAGE})|left|center|right|top|bottom)", Regexp::IGNORECASE | Regexp::EXTENDED)
34
+ FONT_UNITS_RX = /(([x]+\-)*small|medium|large[r]*|auto|inherit|([0-9]+|[0-9]*\.[0-9]+)(e[mx]+|px|[cm]+m|p[tc+]|in|\%)*)/i
35
+
36
+ # Patterns for specificity calculations
37
+ ELEMENTS_AND_PSEUDO_ELEMENTS_RX = /((^|[\s\+\>]+)[\w]+|\:(first\-line|first\-letter|before|after))/i
38
+ NON_ID_ATTRIBUTES_AND_PSEUDO_CLASSES_RX = /(\.[\w]+)|(\[[\w]+)|(\:(link|first\-child|lang))/i
39
+
40
+ # Colours
41
+ RE_COLOUR_RGB = Regexp.new('(rgb[\s]*\([\s-]*[\d]+(\.[\d]+)?[%\s]*,[\s-]*[\d]+(\.[\d]+)?[%\s]*,[\s-]*[\d]+(\.[\d]+)?[%\s]*\))', Regexp::IGNORECASE)
42
+ RE_COLOUR_HEX = /(#([0-9a-f]{6}|[0-9a-f]{3})([\s;]|$))/i
43
+ RE_COLOUR_NAMED = /([\s]*^)?(aqua|black|blue|fuchsia|gray|green|lime|maroon|navy|olive|orange|purple|red|silver|teal|white|yellow|transparent)([\s]*$)?/i
44
+ RE_COLOUR = Regexp.union(RE_COLOUR_RGB, RE_COLOUR_HEX, RE_COLOUR_NAMED)
45
+ # :startdoc:
46
+ end