css_parser 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG ADDED
@@ -0,0 +1,33 @@
1
+ = Premailer CHANGELOG
2
+
3
+ == Version 0.9
4
+ * initial proof-of-concept
5
+ * PHP web version
6
+
7
+ == Version 1.0
8
+ * ported web interface to eRuby
9
+ * incremental parsing improvements
10
+
11
+ == Version 1.1
12
+ * proper calculation of selector specificity per CSS 2.1 spec
13
+ * support for <tt>@import</tt>
14
+ * preliminary support for shorthand CSS properties (<tt>margin</tt>, <tt>padding</tt>)
15
+ * preliminary separation of CSS parser
16
+
17
+ == Version 1.2
18
+ * respect <tt>LINK</tt> media types
19
+ * better style folding
20
+ * incremental parsing improvements
21
+
22
+ == Version 1.3
23
+ * separate CSS parser into its own library
24
+ * handle <tt>background: red url(data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAIAAACQd1PeAAAADElEQVR42mP4%2F58BAAT%2FAf9jgNErAAAAAElFTkSuQmCC);</tt>
25
+ * preserve <tt>:hover</tt> etc... in head styles
26
+
27
+ == TODO: Future
28
+ * respect <tt>@media</tt> rule (http://www.w3.org/TR/CSS21/media.html#at-media-rule)
29
+ * complete shorthand properties support (<tt>border-width</tt>, <tt>font</tt>, <tt>background</tt>)
30
+ * better quote escaping
31
+ * UTF-8 and other charsets (test page: http://kianga.kcore.de/2004/09/21/utf8_test)
32
+ * make warnings for <tt>border</tt> match <tt>border-left</tt>, etc...
33
+ * correctly parse http://www.webstandards.org/files/acid2/test.html
data/LICENSE ADDED
@@ -0,0 +1,42 @@
1
+ = CSS Parser License
2
+
3
+ Copyright (c) 2007 Alex Dunae
4
+
5
+ Premailer is copyrighted free software by Alex Dunae (http://dunae.ca/).
6
+ You can redistribute it and/or modify it under the conditions below:
7
+
8
+ 1. You may make and give away verbatim copies of the source form of the
9
+ software without restriction, provided that you duplicate all of the
10
+ original copyright notices and associated disclaimers.
11
+
12
+ 2. You may modify your copy of the software in any way, provided that
13
+ you do at least ONE of the following:
14
+
15
+ a) place your modifications in the Public Domain or otherwise
16
+ make them Freely Available, such as by posting said
17
+ modifications to the internet or an equivalent medium, or by
18
+ allowing the author to include your modifications in the software.
19
+
20
+ b) use the modified software only within your corporation or
21
+ organization.
22
+
23
+ c) rename any non-standard executables so the names do not conflict
24
+ with standard executables, which must also be provided.
25
+
26
+ d) make other distribution arrangements with the author.
27
+
28
+ 3. You may modify and include the part of the software into any other
29
+ software (possibly commercial) as long as clear acknowledgement and
30
+ a link back to the original software (http://code.dunae.ca/premailer.web/)
31
+ is provided.
32
+
33
+ 5. The scripts and library files supplied as input to or produced as
34
+ output from the software do not automatically fall under the
35
+ copyright of the software, but belong to whomever generated them,
36
+ and may be sold commercially, and may be aggregated with this
37
+ software.
38
+
39
+ 6. THIS SOFTWARE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
40
+ IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
41
+ WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
42
+ PURPOSE.
data/README ADDED
@@ -0,0 +1,60 @@
1
+ = Ruby CSS Parser
2
+
3
+ Load, parse and cascade CSS rule sets in Ruby.
4
+
5
+ === Setup
6
+
7
+ Install the gem from RubyGems.
8
+
9
+ gem install css_parser
10
+
11
+ Done.
12
+
13
+ === An example
14
+ require 'css_parser'
15
+ include CssParser
16
+
17
+ parser = CssParser::Parser.new
18
+ parser.load_file!('http://example.com/styles/style.css')
19
+
20
+ # lookup a rule by a selector
21
+ parser.find('#content')
22
+ #=> 'font-size: 13px; line-height: 1.2;'
23
+
24
+ # lookup a rule by a selector and media type
25
+ parser.find('#content', [:screen, :handheld])
26
+
27
+ # iterate through selectors by media type
28
+ parser.each_selector(:screen) do |selector, declarations, specificity|
29
+ ...
30
+ end
31
+
32
+ # add a block of CSS
33
+ css = <<-EOT
34
+ body { margin: 0 1em; }
35
+ EOT
36
+
37
+ parser.add_block!(css)
38
+
39
+ # output all CSS rules in a single stylesheet
40
+ parser.to_s
41
+ => #content { font-size: 13px; line-height: 1.2; }
42
+ body { margin: 0 1em; }
43
+
44
+ === Testing
45
+
46
+ You can run the suite of unit tests using <tt>rake test</tt>.
47
+
48
+ The download/import tests require that WEBrick is installed. The tests set up
49
+ a temporary server on port 12000 and pull down files from the <tt>test/fixtures/</tt>
50
+ directory.
51
+
52
+ === Credits and code
53
+
54
+ * Project page: http://code.dunae.ca/css_parser/
55
+ * Source: http://code.dunae.ca/svn/css_parser/
56
+ * Docs: http://code.dunae.ca/css_parser/doc/
57
+
58
+ By Alex Dunae (dunae.ca, e-mail 'code' at the same domain), 2007.
59
+
60
+ Made with love on Vancouver Island.
@@ -0,0 +1,345 @@
1
+ module CssParser
2
+ # Exception class used for any errors encountered while downloading remote files.
3
+ class RemoteFileError < IOError; end
4
+
5
+ # Exception class used if a request is made to load a CSS file more than once.
6
+ class CircularReferenceError < StandardError; end
7
+
8
+
9
+ # == Parser class
10
+ #
11
+ # All CSS is converted to UTF-8.
12
+ #
13
+ # When calling Parser#new there are some configuaration options:
14
+ # [<tt>absolute_paths</tt>] Convert relative paths to absolute paths (<tt>href</tt>, <tt>src</tt> and <tt>url('')</tt>. Boolean, default is <tt>false</tt>.
15
+ # [<tt>import</tt>] Follow <tt>@import</tt> rules. Boolean, default is <tt>true</tt>.
16
+ # [<tt>io_exceptions</tt>] Throw an exception if a link can not be found. Boolean, default is <tt>true</tt>.
17
+ class Parser
18
+ USER_AGENT = "Ruby CSS Parser/#{VERSION} (http://code.dunae.ca/css_parser/)"
19
+
20
+ STRIP_CSS_COMMENTS_RX = /\/\*.*?\*\//m
21
+ STRIP_HTML_COMMENTS_RX = /\<\!\-\-|\-\-\>/m
22
+
23
+ # Initial parsing
24
+ RE_AT_IMPORT_RULE = /\@import[\s]+(url\()?["']+(.[^'"]*)["']\)?([\w\s\,]*);?/i
25
+
26
+ #--
27
+ # RE_AT_IMPORT_RULE = Regexp.new('@import[\s]*(' + RE_STRING.to_s + ')([\w\s\,]*)[;]?', Regexp::IGNORECASE) -- should handle url() even though it is not allowed
28
+ #++
29
+
30
+ # Array of CSS files that have been loaded.
31
+ attr_reader :loaded_uris
32
+
33
+ #attr_reader :rules
34
+
35
+ #--
36
+ # Class variable? see http://www.oreillynet.com/ruby/blog/2007/01/nubygems_dont_use_class_variab_1.html
37
+ #++
38
+ @folded_declaration_cache = {}
39
+ class << self; attr_reader :folded_declaration_cache; end
40
+
41
+ def initialize(options = {})
42
+ @options = {:absolute_paths => false,
43
+ :import => true,
44
+ :io_exceptions => true}.merge(options)
45
+
46
+ # array of RuleSets
47
+ @rules = []
48
+
49
+
50
+ @loaded_uris = []
51
+
52
+ # unprocessed blocks of CSS
53
+ @blocks = []
54
+ reset!
55
+ end
56
+
57
+ # Get declarations by selector.
58
+ #
59
+ # +media_types+ are optional, and can be a symbol or an array of symbols.
60
+ # The default value is <tt>:all</tt>.
61
+ #
62
+ # ==== Examples
63
+ # find_by_selector('#content')
64
+ # => 'font-size: 13px; line-height: 1.2;'
65
+ #
66
+ # find_by_selector('#content', [:screen, :handheld])
67
+ # => 'font-size: 13px; line-height: 1.2;'
68
+ #
69
+ # find_by_selector('#content', :print)
70
+ # => 'font-size: 11pt; line-height: 1.2;'
71
+ #
72
+ # Returns an array of declarations.
73
+ def find_by_selector(selector, media_types = :all)
74
+ out = []
75
+ each_selector(media_types) do |sel, dec, spec|
76
+ out << dec if sel.strip == selector.strip
77
+ end
78
+ out
79
+ end
80
+ alias_method :[], :find_by_selector
81
+
82
+
83
+ # Add a raw block of CSS.
84
+ #
85
+ # ==== Example
86
+ # css = <<-EOT
87
+ # body { font-size: 10pt }
88
+ # p { margin: 0px; }
89
+ # @media screen, print {
90
+ # body { line-height: 1.2 }
91
+ # }
92
+ # EOT
93
+ #
94
+ # parser = CssParser::Parser.new
95
+ # parser.load_css!(css)
96
+ #--
97
+ # TODO: add media_type
98
+ #++
99
+ def add_block!(block, options = {})
100
+ options = {:base_uri => nil, :charset => nil, :media_types => :all}.merge(options)
101
+
102
+ block = cleanup_block(block)
103
+
104
+ if options[:base_uri] and @options[:absolute_paths]
105
+ block = CssParser.convert_uris(block, options[:base_uri])
106
+ end
107
+
108
+ parse_block_into_rule_sets!(block, options)
109
+
110
+ end
111
+
112
+ # Add a CSS rule by setting the +selectors+, +declarations+ and +media_types+.
113
+ #
114
+ # +media_types+ can be a symbol or an array of symbols.
115
+ def add_rule!(selectors, declarations, media_types = :all)
116
+ rule_set = RuleSet.new(selectors, declarations)
117
+ add_rule_set!(rule_set, media_types)
118
+ end
119
+
120
+ # Add a CssParser RuleSet object.
121
+ #
122
+ # +media_types+ can be a symbol or an array of symbols.
123
+ def add_rule_set!(ruleset, media_types = :all)
124
+ raise ArgumentError unless ruleset.kind_of?(CssParser::RuleSet)
125
+
126
+ media_types = [media_types] if media_types.kind_of?(Symbol)
127
+
128
+ @rules << {:media_types => media_types, :rules => ruleset}
129
+ end
130
+
131
+ # Iterate through RuleSet objects.
132
+ #
133
+ # +media_types+ can be a symbol or an array of symbols.
134
+ def each_rule_set(media_types = :all) # :yields: rule_set
135
+ media_types = [:all] if media_types.nil?
136
+ media_types = [media_types] if media_types.kind_of?(Symbol)
137
+
138
+ @rules.each do |block|
139
+ if media_types.include?(:all) or block[:media_types].any? { |mt| media_types.include?(mt) }
140
+ yield block[:rules]
141
+ end
142
+ end
143
+ end
144
+
145
+ # Iterate through CSS selectors.
146
+ #
147
+ # +media_types+ can be a symbol or an array of symbols.
148
+ # See RuleSet#each_selector for +options+.
149
+ def each_selector(media_types = :all, options = {}) # :yields: selectors, declarations, specificity
150
+ each_rule_set(media_types) do |rule_set|
151
+ #puts rule_set
152
+ rule_set.each_selector(options) do |selectors, declarations, specificity|
153
+ yield selectors, declarations, specificity
154
+ end
155
+ end
156
+ end
157
+
158
+ # Output all CSS rules as a single stylesheet.
159
+ def to_s(media_types = :all)
160
+ out = ''
161
+ each_selector(media_types) do |selectors, declarations, specificity|
162
+ out << "#{selectors} {\n#{declarations}\n}\n"
163
+ end
164
+ out
165
+ end
166
+
167
+ # Merge declarations with the same selector.
168
+ def compact! # :nodoc:
169
+ compacted = []
170
+
171
+ compacted
172
+ end
173
+
174
+ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
175
+ options = {:media_types => :all}.merge(options)
176
+ media_types = options[:media_types]
177
+
178
+ in_declarations = false
179
+
180
+ block_depth = 0
181
+
182
+ # @charset is ignored for now
183
+ in_charset = false
184
+ in_string = false
185
+ in_at_media_rule = false
186
+
187
+ current_selectors = ''
188
+ current_declarations = ''
189
+
190
+ block.scan(/([\\]?[{}\s"]|(.[^\s"{}\\]*))/).each do |matches|
191
+ #block.scan(/((.[^{}"\n\r\f\s]*)[\s]|(.[^{}"\n\r\f]*)\{|(.[^{}"\n\r\f]*)\}|(.[^{}"\n\r\f]*)\"|(.*)[\s]+)/).each do |matches|
192
+ token = matches[0]
193
+ #puts "TOKEN: #{token}" unless token =~ /^[\s]*$/
194
+ if token =~ /\A"/ # found un-escaped double quote
195
+ in_string = !in_string
196
+ end
197
+
198
+ if in_declarations
199
+ current_declarations += token
200
+
201
+ if token =~ /\}/ and not in_string
202
+ current_declarations.gsub!(/\}[\s]*$/, '')
203
+
204
+ in_declarations = false
205
+
206
+ unless current_declarations.strip.empty?
207
+ #puts "SAVING #{current_selectors} -> #{current_declarations}"
208
+ add_rule!(current_selectors, current_declarations, media_types)
209
+ end
210
+
211
+ current_selectors = ''
212
+ current_declarations = ''
213
+ end
214
+ elsif token =~ /@media/i
215
+ # found '@media', reset current media_types
216
+ in_at_media_rule = true
217
+ media_types = []
218
+ elsif in_at_media_rule
219
+ if token =~ /\{/
220
+ block_depth = block_depth + 1
221
+ in_at_media_rule = false
222
+ else
223
+ token.gsub!(/[,\s]*/, '')
224
+ media_types << token.strip.downcase.to_sym unless token.empty?
225
+ end
226
+ elsif in_charset or token =~ /@charset/i
227
+ # iterate until we are out of the charset declaration
228
+ in_charset = (token =~ /;/ ? false : true)
229
+ else
230
+ if token =~ /\}/ and not in_string
231
+ block_depth = block_depth - 1
232
+ else
233
+ if token =~ /\{/ and not in_string
234
+ current_selectors.gsub!(/^[\s]*/, '')
235
+ current_selectors.gsub!(/[\s]*$/, '')
236
+ in_declarations = true
237
+ else
238
+ current_selectors += token
239
+ end
240
+ end
241
+ end
242
+ end
243
+ end
244
+
245
+ # Load a remote CSS file.
246
+ def load_uri!(uri, base_uri = nil, media_types = :all)
247
+ base_uri = uri if base_uri.nil?
248
+ src, charset = read_remote_file(uri)
249
+
250
+ # Load @imported CSS
251
+ src.scan(RE_AT_IMPORT_RULE).each do |import_rule|
252
+ import_path = import_rule[1].to_s.gsub(/['"]*/, '').strip
253
+ import_uri = URI.parse(base_uri.to_s).merge(import_path)
254
+ #puts import_uri.to_s
255
+
256
+ media_types = []
257
+ if media_string = import_rule[import_rule.length-1]
258
+ media_string.split(/\s|\,/).each do |t|
259
+ media_types << t.to_sym unless t.empty?
260
+ end
261
+ end
262
+
263
+ # Recurse
264
+ load_uri!(import_uri, nil, media_types)
265
+ end
266
+
267
+ # Remove @import declarations
268
+ src.gsub!(RE_AT_IMPORT_RULE, '')
269
+
270
+ # Relative paths need to be converted here
271
+ src = CssParser.convert_uris(src, base_uri) if base_uri and @options[:absolute_paths]
272
+
273
+ add_block!(src, {:media_types => media_types})
274
+ end
275
+
276
+ protected
277
+ # Strip comments and clean up blank lines from a block of CSS.
278
+ #
279
+ # Returns a string.
280
+ def cleanup_block(block) # :nodoc:
281
+ # Strip CSS comments
282
+ block.gsub!(STRIP_CSS_COMMENTS_RX, '')
283
+
284
+ # Strip HTML comments - they shouldn't really be in here but
285
+ # some people are just crazy...
286
+ block.gsub!(STRIP_HTML_COMMENTS_RX, '')
287
+
288
+ # Strip lines containing just whitespace
289
+ block.gsub!(/^\s+$/, "")
290
+
291
+ block
292
+ end
293
+
294
+ # Download a file into a string.
295
+ #
296
+ # Returns the file's data and character set in an array.
297
+ #--
298
+ # TODO: add option to fail silently or throw and exception on a 404
299
+ #++
300
+ def read_remote_file(uri) # :nodoc:
301
+ raise CircularReferenceError, "can't load #{uri.to_s} more than once" if @loaded_uris.include?(uri.to_s)
302
+ @loaded_uris << uri.to_s
303
+
304
+ begin
305
+ #fh = open(uri, 'rb')
306
+ fh = open(uri, 'rb', 'User-Agent' => USER_AGENT, 'Accept-Encoding' => 'gzip')
307
+
308
+ if fh.content_encoding.include?('gzip')
309
+ remote_src = Zlib::GzipReader.new(fh).read
310
+ else
311
+ remote_src = fh.read
312
+ end
313
+
314
+ #puts "reading #{uri} (#{fh.charset})"
315
+
316
+ ic = Iconv.new('UTF-8//IGNORE', fh.charset)
317
+ src = ic.iconv(remote_src)
318
+
319
+ fh.close
320
+ return src, fh.charset
321
+ rescue
322
+ raise RemoteFileError if @options[:io_exceptions]
323
+ return '', nil
324
+ end
325
+ end
326
+
327
+ private
328
+ # Save a folded declaration block to the internal cache.
329
+ def save_folded_declaration(block_hash, folded_declaration) # :nodoc:
330
+ @folded_declaration_cache[block_hash] = folded_declaration
331
+ end
332
+
333
+ # Retrieve a folded declaration block from the internal cache.
334
+ def get_folded_declaration(block_hash) # :nodoc:
335
+ return @folded_declaration_cache[block_hash] ||= nil
336
+ end
337
+
338
+ def reset! # :nodoc:
339
+ @folded_declaration_cache = {}
340
+ @css_source = ''
341
+ @css_rules = []
342
+ @css_warnings = []
343
+ end
344
+ end
345
+ end
@@ -0,0 +1,46 @@
1
+ module CssParser
2
+ # :stopdoc:
3
+ # Base types
4
+ RE_NL = Regexp.new('(\n|\r\n|\r|\f)')
5
+ RE_NON_ASCII = Regexp.new('([\x00-\xFF])', Regexp::IGNORECASE) #[^\0-\177]
6
+ RE_UNICODE = Regexp.new('(\\\\[0-9a-f]{1,6}(\r\n|[ \n\r\t\f])*)', Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE)
7
+ RE_ESCAPE = Regexp.union(RE_UNICODE, '|(\\\\[^\n\r\f0-9a-f])')
8
+ RE_IDENT = Regexp.new("[\-]?([_a-z]|#{RE_NON_ASCII}|#{RE_ESCAPE})([_a-z0-9\-]|#{RE_NON_ASCII}|#{RE_ESCAPE})*", Regexp::IGNORECASE)
9
+
10
+ # General strings
11
+ RE_STRING1 = Regexp.new('(\"(.[^\n\r\f\\"]*|\\\\' + RE_NL.to_s + '|' + RE_ESCAPE.to_s + ')*\")')
12
+ RE_STRING2 = Regexp.new('(\'(.[^\n\r\f\\\']*|\\\\' + RE_NL.to_s + '|' + RE_ESCAPE.to_s + ')*\')')
13
+ RE_STRING = Regexp.union(RE_STRING1, RE_STRING2)
14
+
15
+ RE_URI = Regexp.new('(url\([\s]*([\s]*' + RE_STRING.to_s + '[\s]*)[\s]*\))|(url\([\s]*([!#$%&*\-~]|' + RE_NON_ASCII.to_s + '|' + RE_ESCAPE.to_s + ')*[\s]*)\)', Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE)
16
+ URI_RX = /url\(("([^"]*)"|'([^']*)'|([^)]*))\)/im
17
+
18
+ # Initial parsing
19
+ RE_AT_IMPORT_RULE = /\@import[\s]+(url\()?["']+(.[^'"]*)["']\)?([\w\s\,]*);?/i
20
+
21
+ #--
22
+ #RE_AT_MEDIA_RULE = Regexp.new('(\"(.[^\n\r\f\\"]*|\\\\' + RE_NL.to_s + '|' + RE_ESCAPE.to_s + ')*\")')
23
+
24
+ #RE_AT_IMPORT_RULE = Regexp.new('@import[\s]*(' + RE_STRING.to_s + ')([\w\s\,]*)[;]?', Regexp::IGNORECASE) -- should handle url() even though it is not allowed
25
+ #++
26
+ IMPORTANT_IN_PROPERTY_RX = /[\s]*\!important[\s]*/i
27
+ STRIP_CSS_COMMENTS_RX = /\/\*.*?\*\//m
28
+ STRIP_HTML_COMMENTS_RX = /\<\!\-\-|\-\-\>/m
29
+
30
+ # Special units
31
+ BOX_MODEL_UNITS_RX = /(auto|inherit|0|([\-]*([0-9]+|[0-9]*\.[0-9]+)(e[mx]+|px|[cm]+m|p[tc+]|in|\%)))([\s;]|\Z)/imx
32
+ RE_LENGTH_OR_PERCENTAGE = Regexp.new('([\-]*(([0-9]*\.[0-9]+)|[0-9]+)(e[mx]+|px|[cm]+m|p[tc+]|in|\%))', Regexp::IGNORECASE)
33
+ RE_BACKGROUND_POSITION = Regexp.new("((#{RE_LENGTH_OR_PERCENTAGE})|left|center|right|top|bottom)", Regexp::IGNORECASE | Regexp::EXTENDED)
34
+ FONT_UNITS_RX = /(([x]+\-)*small|medium|large[r]*|auto|inherit|([0-9]+|[0-9]*\.[0-9]+)(e[mx]+|px|[cm]+m|p[tc+]|in|\%)*)/i
35
+
36
+ # Patterns for specificity calculations
37
+ ELEMENTS_AND_PSEUDO_ELEMENTS_RX = /((^|[\s\+\>]+)[\w]+|\:(first\-line|first\-letter|before|after))/i
38
+ NON_ID_ATTRIBUTES_AND_PSEUDO_CLASSES_RX = /(\.[\w]+)|(\[[\w]+)|(\:(link|first\-child|lang))/i
39
+
40
+ # Colours
41
+ RE_COLOUR_RGB = Regexp.new('(rgb[\s]*\([\s-]*[\d]+(\.[\d]+)?[%\s]*,[\s-]*[\d]+(\.[\d]+)?[%\s]*,[\s-]*[\d]+(\.[\d]+)?[%\s]*\))', Regexp::IGNORECASE)
42
+ RE_COLOUR_HEX = /(#([0-9a-f]{6}|[0-9a-f]{3})([\s;]|$))/i
43
+ RE_COLOUR_NAMED = /([\s]*^)?(aqua|black|blue|fuchsia|gray|green|lime|maroon|navy|olive|orange|purple|red|silver|teal|white|yellow|transparent)([\s]*$)?/i
44
+ RE_COLOUR = Regexp.union(RE_COLOUR_RGB, RE_COLOUR_HEX, RE_COLOUR_NAMED)
45
+ # :startdoc:
46
+ end