css_parser_1.1.0 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/css_parser.rb ADDED
@@ -0,0 +1,151 @@
1
+ require 'uri'
2
+ require 'digest/md5'
3
+ require 'zlib'
4
+ require 'iconv'
5
+
6
+ module CssParser
7
+ VERSION = '1.1.0'
8
+
9
+ # Merge multiple CSS RuleSets by cascading according to the CSS 2.1 cascading rules
10
+ # (http://www.w3.org/TR/REC-CSS2/cascade.html#cascading-order).
11
+ #
12
+ # Takes one or more RuleSet objects.
13
+ #
14
+ # Returns a RuleSet.
15
+ #
16
+ # ==== Cascading
17
+ # If a RuleSet object has its +specificity+ defined, that specificity is
18
+ # used in the cascade calculations.
19
+ #
20
+ # If no specificity is explicitly set and the RuleSet has *one* selector,
21
+ # the specificity is calculated using that selector.
22
+ #
23
+ # If no selectors or multiple selectors are present, the specificity is
24
+ # treated as 0.
25
+ #
26
+ # ==== Example #1
27
+ # rs1 = RuleSet.new(nil, 'color: black;')
28
+ # rs2 = RuleSet.new(nil, 'margin: 0px;')
29
+ #
30
+ # merged = CssParser.merge(rs1, rs2)
31
+ #
32
+ # puts merged
33
+ # => "{ margin: 0px; color: black; }"
34
+ #
35
+ # ==== Example #2
36
+ # rs1 = RuleSet.new(nil, 'background-color: black;')
37
+ # rs2 = RuleSet.new(nil, 'background-image: none;')
38
+ #
39
+ # merged = CssParser.merge(rs1, rs2)
40
+ #
41
+ # puts merged
42
+ # => "{ background: none black; }"
43
+ #--
44
+ # TODO: declaration_hashes should be able to contain a RuleSet
45
+ # this should be a Class method
46
+ def CssParser.merge(*rule_sets)
47
+ @folded_declaration_cache = {}
48
+
49
+ # in case called like CssParser.merge([rule_set, rule_set])
50
+ rule_sets.flatten! if rule_sets[0].kind_of?(Array)
51
+
52
+ unless rule_sets.all? {|rs| rs.kind_of?(CssParser::RuleSet)}
53
+ raise ArgumentError, "all parameters must be CssParser::RuleSets."
54
+ end
55
+
56
+ return rule_sets[0] if rule_sets.length == 1
57
+
58
+ # Internal storage of CSS properties that we will keep
59
+ properties = {}
60
+
61
+ rule_sets.each do |rule_set|
62
+ rule_set.expand_shorthand!
63
+
64
+ specificity = rule_set.specificity
65
+ unless specificity
66
+ if rule_set.selectors.length == 1
67
+ specificity = calculate_specificity(rule_set.selectors[0])
68
+ else
69
+ specificity = 0
70
+ end
71
+ end
72
+
73
+ rule_set.each_declaration do |property, value, is_important|
74
+ # Add the property to the list to be folded per http://www.w3.org/TR/CSS21/cascade.html#cascading-order
75
+ if not properties.has_key?(property) or
76
+ is_important or # step 2
77
+ properties[property][:specificity] < specificity or # step 3
78
+ properties[property][:specificity] == specificity # step 4
79
+ properties[property] = {:value => value, :specificity => specificity, :is_important => is_important}
80
+ end
81
+ end
82
+ end
83
+
84
+ merged = RuleSet.new(nil, nil)
85
+
86
+ # TODO: what about important
87
+ properties.each do |property, details|
88
+ merged[property.strip] = details[:value].strip
89
+ end
90
+
91
+ merged.create_shorthand!
92
+ merged
93
+ end
94
+
95
+ # Calculates the specificity of a CSS selector
96
+ # per http://www.w3.org/TR/CSS21/cascade.html#specificity
97
+ #
98
+ # Returns an integer.
99
+ #
100
+ # ==== Example
101
+ # CssParser.calculate_specificity('#content div p:first-line a:link')
102
+ # => 114
103
+ #--
104
+ # Thanks to Rafael Salazar and Nick Fitzsimons on the css-discuss list for their help.
105
+ #++
106
+ def CssParser.calculate_specificity(selector)
107
+ a = 0
108
+ b = selector.scan(/\#/).length
109
+ c = selector.scan(NON_ID_ATTRIBUTES_AND_PSEUDO_CLASSES_RX).length
110
+ d = selector.scan(ELEMENTS_AND_PSEUDO_ELEMENTS_RX).length
111
+
112
+ (a.to_s + b.to_s + c.to_s + d.to_s).to_i
113
+ rescue
114
+ return 0
115
+ end
116
+
117
+ # Make <tt>url()</tt> links absolute.
118
+ #
119
+ # Takes a block of CSS and returns it with all relative URIs converted to absolute URIs.
120
+ #
121
+ # "For CSS style sheets, the base URI is that of the style sheet, not that of the source document."
122
+ # per http://www.w3.org/TR/CSS21/syndata.html#uri
123
+ #
124
+ # Returns a string.
125
+ #
126
+ # ==== Example
127
+ # CssParser.convert_uris("body { background: url('../style/yellow.png?abc=123') };",
128
+ # "http://example.org/style/basic.css").inspect
129
+ # => "body { background: url('http://example.org/style/yellow.png?abc=123') };"
130
+ def self.convert_uris(css, base_uri)
131
+ out = ''
132
+ base_uri = URI.parse(base_uri) unless base_uri.kind_of?(URI)
133
+
134
+ out = css.gsub(URI_RX) do |s|
135
+ uri = $1.to_s
136
+ uri.gsub!(/["']+/, '')
137
+ # Don't process URLs that are already absolute
138
+ unless uri =~ /^[a-z]+\:\/\//i
139
+ begin
140
+ uri = base_uri.merge(uri)
141
+ rescue; end
142
+ end
143
+ "url('" + uri.to_s + "')"
144
+ end
145
+ out
146
+ end
147
+ end
148
+
149
+ require File.dirname(__FILE__) + '/css_parser/rule_set'
150
+ require File.dirname(__FILE__) + '/css_parser/regexps'
151
+ require File.dirname(__FILE__) + '/css_parser/parser'
@@ -0,0 +1,362 @@
1
+ module CssParser
2
+ # Exception class used for any errors encountered while downloading remote files.
3
+ class RemoteFileError < IOError; end
4
+
5
+ # Exception class used if a request is made to load a CSS file more than once.
6
+ class CircularReferenceError < StandardError; end
7
+
8
+
9
+ # == Parser class
10
+ #
11
+ # All CSS is converted to UTF-8.
12
+ #
13
+ # When calling Parser#new there are some configuaration options:
14
+ # [<tt>absolute_paths</tt>] Convert relative paths to absolute paths (<tt>href</tt>, <tt>src</tt> and <tt>url('')</tt>. Boolean, default is <tt>false</tt>.
15
+ # [<tt>import</tt>] Follow <tt>@import</tt> rules. Boolean, default is <tt>true</tt>.
16
+ # [<tt>io_exceptions</tt>] Throw an exception if a link can not be found. Boolean, default is <tt>true</tt>.
17
+ class Parser
18
+ USER_AGENT = "Ruby CSS Parser/#{CssParser::VERSION} (http://code.dunae.ca/css_parser/)"
19
+
20
+ STRIP_CSS_COMMENTS_RX = /\/\*.*?\*\//m
21
+ STRIP_HTML_COMMENTS_RX = /\<\!\-\-|\-\-\>/m
22
+
23
+ # Initial parsing
24
+ RE_AT_IMPORT_RULE = /\@import[\s]+(url\()?["']+(.[^'"]*)["']\)?([\w\s\,]*);?/i
25
+
26
+ #--
27
+ # RE_AT_IMPORT_RULE = Regexp.new('@import[\s]*(' + RE_STRING.to_s + ')([\w\s\,]*)[;]?', Regexp::IGNORECASE) -- should handle url() even though it is not allowed
28
+ #++
29
+
30
+ # Array of CSS files that have been loaded.
31
+ attr_reader :loaded_uris
32
+
33
+ #attr_reader :rules
34
+
35
+ #--
36
+ # Class variable? see http://www.oreillynet.com/ruby/blog/2007/01/nubygems_dont_use_class_variab_1.html
37
+ #++
38
+ @folded_declaration_cache = {}
39
+ class << self; attr_reader :folded_declaration_cache; end
40
+
41
+ def initialize(options = {})
42
+ @options = {:absolute_paths => false,
43
+ :import => true,
44
+ :io_exceptions => true}.merge(options)
45
+
46
+ # array of RuleSets
47
+ @rules = []
48
+
49
+
50
+ @loaded_uris = []
51
+
52
+ # unprocessed blocks of CSS
53
+ @blocks = []
54
+ reset!
55
+ end
56
+
57
+ # Get declarations by selector.
58
+ #
59
+ # +media_types+ are optional, and can be a symbol or an array of symbols.
60
+ # The default value is <tt>:all</tt>.
61
+ #
62
+ # ==== Examples
63
+ # find_by_selector('#content')
64
+ # => 'font-size: 13px; line-height: 1.2;'
65
+ #
66
+ # find_by_selector('#content', [:screen, :handheld])
67
+ # => 'font-size: 13px; line-height: 1.2;'
68
+ #
69
+ # find_by_selector('#content', :print)
70
+ # => 'font-size: 11pt; line-height: 1.2;'
71
+ #
72
+ # Returns an array of declarations.
73
+ def find_by_selector(selector, media_types = :all)
74
+ out = []
75
+ each_selector(media_types) do |sel, dec, spec|
76
+ out << dec if sel.strip == selector.strip
77
+ end
78
+ out
79
+ end
80
+ alias_method :[], :find_by_selector
81
+
82
+
83
+ # Add a raw block of CSS.
84
+ #
85
+ # In order to follow +@import+ rules you must supply either a
86
+ # +:base_dir+ or +:base_uri+ option.
87
+ #
88
+ # ==== Example
89
+ # css = <<-EOT
90
+ # body { font-size: 10pt }
91
+ # p { margin: 0px; }
92
+ # @media screen, print {
93
+ # body { line-height: 1.2 }
94
+ # }
95
+ # EOT
96
+ #
97
+ # parser = CssParser::Parser.new
98
+ # parser.load_css!(css)
99
+ #--
100
+ # TODO: add media_type
101
+ #++
102
+ def add_block!(block, options = {})
103
+ options = {:base_uri => nil, :base_dir => nil, :charset => nil, :media_types => :all}.merge(options)
104
+
105
+ block = cleanup_block(block)
106
+
107
+ if options[:base_uri] and @options[:absolute_paths]
108
+ block = CssParser.convert_uris(block, options[:base_uri])
109
+ end
110
+
111
+ # Load @imported CSS
112
+ block.scan(RE_AT_IMPORT_RULE).each do |import_rule|
113
+ media_types = []
114
+ if media_string = import_rule[import_rule.length-1]
115
+ media_string.split(/\s|\,/).each do |t|
116
+ media_types << t.to_sym unless t.empty?
117
+ end
118
+ end
119
+
120
+ import_path = import_rule[1].to_s.gsub(/['"]*/, '').strip
121
+
122
+ if options[:base_uri]
123
+ import_uri = URI.parse(options[:base_uri].to_s).merge(import_path)
124
+ load_uri!(import_uri, options[:base_uri], media_types)
125
+ elsif options[:base_dir]
126
+ load_file!(import_path, options[:base_dir], media_types)
127
+ end
128
+ end
129
+
130
+ # Remove @import declarations
131
+ block.gsub!(RE_AT_IMPORT_RULE, '')
132
+
133
+
134
+
135
+ parse_block_into_rule_sets!(block, options)
136
+
137
+ end
138
+
139
+ # Add a CSS rule by setting the +selectors+, +declarations+ and +media_types+.
140
+ #
141
+ # +media_types+ can be a symbol or an array of symbols.
142
+ def add_rule!(selectors, declarations, media_types = :all)
143
+ rule_set = RuleSet.new(selectors, declarations)
144
+ add_rule_set!(rule_set, media_types)
145
+ end
146
+
147
+ # Add a CssParser RuleSet object.
148
+ #
149
+ # +media_types+ can be a symbol or an array of symbols.
150
+ def add_rule_set!(ruleset, media_types = :all)
151
+ raise ArgumentError unless ruleset.kind_of?(CssParser::RuleSet)
152
+
153
+ media_types = [media_types] if media_types.kind_of?(Symbol)
154
+
155
+ @rules << {:media_types => media_types, :rules => ruleset}
156
+ end
157
+
158
+ # Iterate through RuleSet objects.
159
+ #
160
+ # +media_types+ can be a symbol or an array of symbols.
161
+ def each_rule_set(media_types = :all) # :yields: rule_set
162
+ media_types = [:all] if media_types.nil?
163
+ media_types = [media_types] if media_types.kind_of?(Symbol)
164
+
165
+ @rules.each do |block|
166
+ if media_types.include?(:all) or block[:media_types].any? { |mt| media_types.include?(mt) }
167
+ yield block[:rules]
168
+ end
169
+ end
170
+ end
171
+
172
+ # Iterate through CSS selectors.
173
+ #
174
+ # +media_types+ can be a symbol or an array of symbols.
175
+ # See RuleSet#each_selector for +options+.
176
+ def each_selector(media_types = :all, options = {}) # :yields: selectors, declarations, specificity
177
+ each_rule_set(media_types) do |rule_set|
178
+ #puts rule_set
179
+ rule_set.each_selector(options) do |selectors, declarations, specificity|
180
+ yield selectors, declarations, specificity
181
+ end
182
+ end
183
+ end
184
+
185
+ # Output all CSS rules as a single stylesheet.
186
+ def to_s(media_types = :all)
187
+ out = ''
188
+ each_selector(media_types) do |selectors, declarations, specificity|
189
+ out << "#{selectors} {\n#{declarations}\n}\n"
190
+ end
191
+ out
192
+ end
193
+
194
+ # Merge declarations with the same selector.
195
+ def compact! # :nodoc:
196
+ compacted = []
197
+
198
+ compacted
199
+ end
200
+
201
+ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
202
+ options = {:media_types => :all}.merge(options)
203
+ media_types = options[:media_types]
204
+
205
+ in_declarations = false
206
+
207
+ block_depth = 0
208
+
209
+ # @charset is ignored for now
210
+ in_charset = false
211
+ in_string = false
212
+ in_at_media_rule = false
213
+
214
+ current_selectors = ''
215
+ current_declarations = ''
216
+
217
+ block.scan(/([\\]?[{}\s"]|(.[^\s"{}\\]*))/).each do |matches|
218
+ #block.scan(/((.[^{}"\n\r\f\s]*)[\s]|(.[^{}"\n\r\f]*)\{|(.[^{}"\n\r\f]*)\}|(.[^{}"\n\r\f]*)\"|(.*)[\s]+)/).each do |matches|
219
+ token = matches[0]
220
+ #puts "TOKEN: #{token}" unless token =~ /^[\s]*$/
221
+ if token =~ /\A"/ # found un-escaped double quote
222
+ in_string = !in_string
223
+ end
224
+
225
+ if in_declarations
226
+ current_declarations += token
227
+
228
+ if token =~ /\}/ and not in_string
229
+ current_declarations.gsub!(/\}[\s]*$/, '')
230
+
231
+ in_declarations = false
232
+
233
+ unless current_declarations.strip.empty?
234
+ #puts "SAVING #{current_selectors} -> #{current_declarations}"
235
+ add_rule!(current_selectors, current_declarations, media_types)
236
+ end
237
+
238
+ current_selectors = ''
239
+ current_declarations = ''
240
+ end
241
+ elsif token =~ /@media/i
242
+ # found '@media', reset current media_types
243
+ in_at_media_rule = true
244
+ media_types = []
245
+ elsif in_at_media_rule
246
+ if token =~ /\{/
247
+ block_depth = block_depth + 1
248
+ in_at_media_rule = false
249
+ else
250
+ token.gsub!(/[,\s]*/, '')
251
+ media_types << token.strip.downcase.to_sym unless token.empty?
252
+ end
253
+ elsif in_charset or token =~ /@charset/i
254
+ # iterate until we are out of the charset declaration
255
+ in_charset = (token =~ /;/ ? false : true)
256
+ else
257
+ if token =~ /\}/ and not in_string
258
+ block_depth = block_depth - 1
259
+ else
260
+ if token =~ /\{/ and not in_string
261
+ current_selectors.gsub!(/^[\s]*/, '')
262
+ current_selectors.gsub!(/[\s]*$/, '')
263
+ in_declarations = true
264
+ else
265
+ current_selectors += token
266
+ end
267
+ end
268
+ end
269
+ end
270
+ end
271
+
272
+ # Load a remote CSS file.
273
+ def load_uri!(uri, base_uri = nil, media_types = :all)
274
+ base_uri = uri if base_uri.nil?
275
+ src, charset = read_remote_file(uri)
276
+
277
+ add_block!(src, {:media_types => media_types, :base_uri => base_uri})
278
+ end
279
+
280
+ # Load a local CSS file.
281
+ def load_file!(file_name, base_dir = nil, media_types = :all)
282
+ file_name = File.expand_path(file_name, base_dir)
283
+ return unless File.readable?(file_name)
284
+
285
+ src = IO.read(file_name)
286
+ base_dir = File.dirname(file_name)
287
+
288
+ add_block!(src, {:media_types => media_types, :base_dir => base_dir})
289
+ end
290
+
291
+
292
+
293
+ protected
294
+ # Strip comments and clean up blank lines from a block of CSS.
295
+ #
296
+ # Returns a string.
297
+ def cleanup_block(block) # :nodoc:
298
+ # Strip CSS comments
299
+ block.gsub!(STRIP_CSS_COMMENTS_RX, '')
300
+
301
+ # Strip HTML comments - they shouldn't really be in here but
302
+ # some people are just crazy...
303
+ block.gsub!(STRIP_HTML_COMMENTS_RX, '')
304
+
305
+ # Strip lines containing just whitespace
306
+ block.gsub!(/^\s+$/, "")
307
+
308
+ block
309
+ end
310
+
311
+ # Download a file into a string.
312
+ #
313
+ # Returns the file's data and character set in an array.
314
+ #--
315
+ # TODO: add option to fail silently or throw and exception on a 404
316
+ #++
317
+ def read_remote_file(uri) # :nodoc:
318
+ raise CircularReferenceError, "can't load #{uri.to_s} more than once" if @loaded_uris.include?(uri.to_s)
319
+ @loaded_uris << uri.to_s
320
+
321
+ begin
322
+ #fh = open(uri, 'rb')
323
+ fh = open(uri, 'rb', 'User-Agent' => USER_AGENT, 'Accept-Encoding' => 'gzip')
324
+
325
+ if fh.content_encoding.include?('gzip')
326
+ remote_src = Zlib::GzipReader.new(fh).read
327
+ else
328
+ remote_src = fh.read
329
+ end
330
+
331
+ #puts "reading #{uri} (#{fh.charset})"
332
+
333
+ ic = Iconv.new('UTF-8//IGNORE', fh.charset)
334
+ src = ic.iconv(remote_src)
335
+
336
+ fh.close
337
+ return src, fh.charset
338
+ rescue
339
+ raise RemoteFileError if @options[:io_exceptions]
340
+ return '', nil
341
+ end
342
+ end
343
+
344
+ private
345
+ # Save a folded declaration block to the internal cache.
346
+ def save_folded_declaration(block_hash, folded_declaration) # :nodoc:
347
+ @folded_declaration_cache[block_hash] = folded_declaration
348
+ end
349
+
350
+ # Retrieve a folded declaration block from the internal cache.
351
+ def get_folded_declaration(block_hash) # :nodoc:
352
+ return @folded_declaration_cache[block_hash] ||= nil
353
+ end
354
+
355
+ def reset! # :nodoc:
356
+ @folded_declaration_cache = {}
357
+ @css_source = ''
358
+ @css_rules = []
359
+ @css_warnings = []
360
+ end
361
+ end
362
+ end