marcosinger-css_parser 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,162 @@
1
+ require 'uri'
2
+ require 'net/https'
3
+ require 'open-uri'
4
+ require 'digest/md5'
5
+ require 'zlib'
6
+ require 'stringio'
7
+ require 'iconv'
8
+
9
+ module CssParser
10
+ VERSION = '1.2.0'
11
+
12
+ # Merge multiple CSS RuleSets by cascading according to the CSS 2.1 cascading rules
13
+ # (http://www.w3.org/TR/REC-CSS2/cascade.html#cascading-order).
14
+ #
15
+ # Takes one or more RuleSet objects.
16
+ #
17
+ # Returns a RuleSet.
18
+ #
19
+ # ==== Cascading
20
+ # If a RuleSet object has its +specificity+ defined, that specificity is
21
+ # used in the cascade calculations.
22
+ #
23
+ # If no specificity is explicitly set and the RuleSet has *one* selector,
24
+ # the specificity is calculated using that selector.
25
+ #
26
+ # If no selectors or multiple selectors are present, the specificity is
27
+ # treated as 0.
28
+ #
29
+ # ==== Example #1
30
+ # rs1 = RuleSet.new(nil, 'color: black;')
31
+ # rs2 = RuleSet.new(nil, 'margin: 0px;')
32
+ #
33
+ # merged = CssParser.merge(rs1, rs2)
34
+ #
35
+ # puts merged
36
+ # => "{ margin: 0px; color: black; }"
37
+ #
38
+ # ==== Example #2
39
+ # rs1 = RuleSet.new(nil, 'background-color: black;')
40
+ # rs2 = RuleSet.new(nil, 'background-image: none;')
41
+ #
42
+ # merged = CssParser.merge(rs1, rs2)
43
+ #
44
+ # puts merged
45
+ # => "{ background: none black; }"
46
+ #--
47
+ # TODO: declaration_hashes should be able to contain a RuleSet
48
+ # this should be a Class method
49
+ def CssParser.merge(*rule_sets)
50
+ @folded_declaration_cache = {}
51
+
52
+ # in case called like CssParser.merge([rule_set, rule_set])
53
+ rule_sets.flatten! if rule_sets[0].kind_of?(Array)
54
+
55
+ unless rule_sets.all? {|rs| rs.kind_of?(CssParser::RuleSet)}
56
+ raise ArgumentError, "all parameters must be CssParser::RuleSets."
57
+ end
58
+
59
+ return rule_sets[0] if rule_sets.length == 1
60
+
61
+ # Internal storage of CSS properties that we will keep
62
+ properties = {}
63
+
64
+ rule_sets.each do |rule_set|
65
+ rule_set.expand_shorthand!
66
+
67
+ specificity = rule_set.specificity
68
+ unless specificity
69
+ if rule_set.selectors.length == 1
70
+ specificity = calculate_specificity(rule_set.selectors[0])
71
+ else
72
+ specificity = 0
73
+ end
74
+ end
75
+
76
+ rule_set.each_declaration do |property, value, is_important|
77
+ # Add the property to the list to be folded per http://www.w3.org/TR/CSS21/cascade.html#cascading-order
78
+ if not properties.has_key?(property)
79
+ properties[property] = {:value => value, :specificity => specificity, :is_important => is_important}
80
+ elsif properties[property][:specificity] < specificity or properties[property][:specificity] == specificity
81
+ unless properties[property][:is_important]
82
+ properties[property] = {:value => value, :specificity => specificity, :is_important => is_important}
83
+ end
84
+ end
85
+
86
+ if is_important
87
+ properties[property] = {:value => value, :specificity => specificity, :is_important => is_important}
88
+ end
89
+ end
90
+ end
91
+
92
+ merged = RuleSet.new(nil, nil)
93
+
94
+ properties.each do |property, details|
95
+ if details[:is_important]
96
+ merged[property.strip] = details[:value].strip.gsub(/\;\Z/, '') + '!important'
97
+ else
98
+ merged[property.strip] = details[:value].strip
99
+ end
100
+ end
101
+
102
+ merged.create_shorthand!
103
+ merged
104
+ end
105
+
106
+ # Calculates the specificity of a CSS selector
107
+ # per http://www.w3.org/TR/CSS21/cascade.html#specificity
108
+ #
109
+ # Returns an integer.
110
+ #
111
+ # ==== Example
112
+ # CssParser.calculate_specificity('#content div p:first-line a:link')
113
+ # => 114
114
+ #--
115
+ # Thanks to Rafael Salazar and Nick Fitzsimons on the css-discuss list for their help.
116
+ #++
117
+ def CssParser.calculate_specificity(selector)
118
+ a = 0
119
+ b = selector.scan(/\#/).length
120
+ c = selector.scan(NON_ID_ATTRIBUTES_AND_PSEUDO_CLASSES_RX).length
121
+ d = selector.scan(ELEMENTS_AND_PSEUDO_ELEMENTS_RX).length
122
+
123
+ (a.to_s + b.to_s + c.to_s + d.to_s).to_i
124
+ rescue
125
+ return 0
126
+ end
127
+
128
+ # Make <tt>url()</tt> links absolute.
129
+ #
130
+ # Takes a block of CSS and returns it with all relative URIs converted to absolute URIs.
131
+ #
132
+ # "For CSS style sheets, the base URI is that of the style sheet, not that of the source document."
133
+ # per http://www.w3.org/TR/CSS21/syndata.html#uri
134
+ #
135
+ # Returns a string.
136
+ #
137
+ # ==== Example
138
+ # CssParser.convert_uris("body { background: url('../style/yellow.png?abc=123') };",
139
+ # "http://example.org/style/basic.css").inspect
140
+ # => "body { background: url('http://example.org/style/yellow.png?abc=123') };"
141
+ def self.convert_uris(css, base_uri)
142
+ out = ''
143
+ base_uri = URI.parse(base_uri) unless base_uri.kind_of?(URI)
144
+
145
+ out = css.gsub(URI_RX) do |s|
146
+ uri = $1.to_s
147
+ uri.gsub!(/["']+/, '')
148
+ # Don't process URLs that are already absolute
149
+ unless uri =~ /^[a-z]+\:\/\//i
150
+ begin
151
+ uri = base_uri.merge(uri)
152
+ rescue; end
153
+ end
154
+ "url('" + uri.to_s + "')"
155
+ end
156
+ out
157
+ end
158
+ end
159
+
160
+ require File.dirname(__FILE__) + '/css_parser/rule_set'
161
+ require File.dirname(__FILE__) + '/css_parser/regexps'
162
+ require File.dirname(__FILE__) + '/css_parser/parser'
@@ -0,0 +1,454 @@
1
+ module CssParser
2
+ # Exception class used for any errors encountered while downloading remote files.
3
+ class RemoteFileError < IOError; end
4
+
5
+ # Exception class used if a request is made to load a CSS file more than once.
6
+ class CircularReferenceError < StandardError; end
7
+
8
+
9
+ # == Parser class
10
+ #
11
+ # All CSS is converted to UTF-8.
12
+ #
13
+ # When calling Parser#new there are some configuaration options:
14
+ # [<tt>absolute_paths</tt>] Convert relative paths to absolute paths (<tt>href</tt>, <tt>src</tt> and <tt>url('')</tt>. Boolean, default is <tt>false</tt>.
15
+ # [<tt>import</tt>] Follow <tt>@import</tt> rules. Boolean, default is <tt>true</tt>.
16
+ # [<tt>io_exceptions</tt>] Throw an exception if a link can not be found. Boolean, default is <tt>true</tt>.
17
+ class Parser
18
+ USER_AGENT = "Ruby CSS Parser/#{CssParser::VERSION} (http://github.com/alexdunae/css_parser)"
19
+
20
+ STRIP_CSS_COMMENTS_RX = /\/\*.*?\*\//m
21
+ STRIP_HTML_COMMENTS_RX = /\<\!\-\-|\-\-\>/m
22
+
23
+ # Initial parsing
24
+ RE_AT_IMPORT_RULE = /\@import\s*(?:url\s*)?(?:\()?(?:\s*)["']?([^'"\s\)]*)["']?\)?([\w\s\,^\])]*)\)?[;\n]?/
25
+
26
+ # Array of CSS files that have been loaded.
27
+ attr_reader :loaded_uris
28
+
29
+ #attr_reader :rules
30
+
31
+ #--
32
+ # Class variable? see http://www.oreillynet.com/ruby/blog/2007/01/nubygems_dont_use_class_variab_1.html
33
+ #++
34
+ @folded_declaration_cache = {}
35
+ class << self; attr_reader :folded_declaration_cache; end
36
+
37
+ def initialize(options = {})
38
+ @options = {:absolute_paths => false,
39
+ :import => true,
40
+ :io_exceptions => true}.merge(options)
41
+
42
+ # array of RuleSets
43
+ @rules = []
44
+
45
+
46
+ @loaded_uris = []
47
+
48
+ # unprocessed blocks of CSS
49
+ @blocks = []
50
+ reset!
51
+ end
52
+
53
+ # Get declarations by selector.
54
+ #
55
+ # +media_types+ are optional, and can be a symbol or an array of symbols.
56
+ # The default value is <tt>:all</tt>.
57
+ #
58
+ # ==== Examples
59
+ # find_by_selector('#content')
60
+ # => 'font-size: 13px; line-height: 1.2;'
61
+ #
62
+ # find_by_selector('#content', [:screen, :handheld])
63
+ # => 'font-size: 13px; line-height: 1.2;'
64
+ #
65
+ # find_by_selector('#content', :print)
66
+ # => 'font-size: 11pt; line-height: 1.2;'
67
+ #
68
+ # Now you can use Regexp too
69
+ #
70
+ # ==== Examples
71
+ # find_by_selector('/#content/')
72
+ # => 'font-size: 13px; line-height: 1.2;'
73
+ #
74
+ # find_by_selector('/\.content/') # Don't forget use escape with dot(\)
75
+ # => 'float:left;'
76
+ #
77
+ # find_by_selector('/content/')
78
+ # => {'#content' => {'font-size' => '11pt;', 'line-height' => '1.2;'}, '.content' => {'float' => 'left'}}
79
+ #
80
+ # Returns an array or hash of declarations.
81
+ def find_by_selector(selector, media_types = :all)
82
+ out = []
83
+ out_hash = {}
84
+
85
+ each_selector(media_types) do |sel, dec, spec|
86
+ if selector.is_a?(Regexp)
87
+ dec_hash = {}
88
+ dec.split(';').map{|x| dec_hash.merge!({x.split(':').first.lstrip => x.split(":").last.lstrip})}
89
+ out_hash.merge!({sel => dec_hash}){ |key, v1, v2| "#{v1} #{v2}"} if sel.strip =~ selector
90
+ elsif selector.is_a?(String)
91
+ out << dec if sel.strip == selector.strip;
92
+ end
93
+ end
94
+ selector.is_a?(Regexp) ? (out_hash.size < 2 ? out_hash.values : out_hash) : out
95
+ end
96
+ alias_method :[], :find_by_selector
97
+
98
+
99
+ # Add a raw block of CSS.
100
+ #
101
+ # In order to follow +@import+ rules you must supply either a
102
+ # +:base_dir+ or +:base_uri+ option.
103
+ #
104
+ # Use the +:media_types+ option to set the media type(s) for this block. Takes an array of symbols.
105
+ #
106
+ # Use the +:only_media_types+ option to selectively follow +@import+ rules. Takes an array of symbols.
107
+ #
108
+ # ==== Example
109
+ # css = <<-EOT
110
+ # body { font-size: 10pt }
111
+ # p { margin: 0px; }
112
+ # @media screen, print {
113
+ # body { line-height: 1.2 }
114
+ # }
115
+ # EOT
116
+ #
117
+ # parser = CssParser::Parser.new
118
+ # parser.add_block!(css)
119
+ def add_block!(block, options = {})
120
+ options = {:base_uri => nil, :base_dir => nil, :charset => nil, :media_types => :all, :only_media_types => :all}.merge(options)
121
+ options[:media_types] = [options[:media_types]].flatten
122
+ options[:only_media_types] = [options[:only_media_types]].flatten
123
+
124
+ block = cleanup_block(block)
125
+
126
+ if options[:base_uri] and @options[:absolute_paths]
127
+ block = CssParser.convert_uris(block, options[:base_uri])
128
+ end
129
+
130
+ # Load @imported CSS
131
+ block.scan(RE_AT_IMPORT_RULE).each do |import_rule|
132
+ media_types = []
133
+ if media_string = import_rule[-1]
134
+ media_string.split(/\s|\,/).each do |t|
135
+ media_types << t.to_sym unless t.empty?
136
+ end
137
+ end
138
+
139
+ next unless options[:only_media_types].include?(:all) or media_types.length < 1 or (media_types & options[:only_media_types]).length > 0
140
+
141
+ import_path = import_rule[0].to_s.gsub(/['"]*/, '').strip
142
+
143
+ if options[:base_uri]
144
+ import_uri = URI.parse(options[:base_uri].to_s).merge(import_path)
145
+ load_uri!(import_uri, options[:base_uri], media_types)
146
+ elsif options[:base_dir]
147
+ load_file!(import_path, options[:base_dir], media_types)
148
+ end
149
+ end
150
+
151
+ # Remove @import declarations
152
+ block.gsub!(RE_AT_IMPORT_RULE, '')
153
+
154
+ parse_block_into_rule_sets!(block, options)
155
+ end
156
+
157
+ # Add a CSS rule by setting the +selectors+, +declarations+ and +media_types+.
158
+ #
159
+ # +media_types+ can be a symbol or an array of symbols.
160
+ def add_rule!(selectors, declarations, media_types = :all)
161
+ rule_set = RuleSet.new(selectors, declarations)
162
+ add_rule_set!(rule_set, media_types)
163
+ end
164
+
165
+ # Add a CssParser RuleSet object.
166
+ #
167
+ # +media_types+ can be a symbol or an array of symbols.
168
+ def add_rule_set!(ruleset, media_types = :all)
169
+ raise ArgumentError unless ruleset.kind_of?(CssParser::RuleSet)
170
+
171
+ media_types = [media_types] if media_types.kind_of?(Symbol)
172
+
173
+ @rules << {:media_types => media_types, :rules => ruleset}
174
+ end
175
+
176
+ # Iterate through RuleSet objects.
177
+ #
178
+ # +media_types+ can be a symbol or an array of symbols.
179
+ def each_rule_set(media_types = :all) # :yields: rule_set
180
+ media_types = [:all] if media_types.nil?
181
+ media_types = [media_types] if media_types.kind_of?(Symbol)
182
+
183
+ @rules.each do |block|
184
+ if media_types.include?(:all) or block[:media_types].any? { |mt| media_types.include?(mt) }
185
+ yield block[:rules]
186
+ end
187
+ end
188
+ end
189
+
190
+ # Iterate through CSS selectors.
191
+ #
192
+ # +media_types+ can be a symbol or an array of symbols.
193
+ # See RuleSet#each_selector for +options+.
194
+ def each_selector(media_types = :all, options = {}) # :yields: selectors, declarations, specificity
195
+ each_rule_set(media_types) do |rule_set|
196
+ #puts rule_set
197
+ rule_set.each_selector(options) do |selectors, declarations, specificity|
198
+ yield selectors, declarations, specificity
199
+ end
200
+ end
201
+ end
202
+
203
+ # Output all CSS rules as a single stylesheet.
204
+ def to_s(media_types = :all)
205
+ out = ''
206
+ each_selector(media_types) do |selectors, declarations, specificity|
207
+ out << "#{selectors} {\n#{declarations}\n}\n"
208
+ end
209
+ out
210
+ end
211
+
212
+ # Merge declarations with the same selector.
213
+ def compact! # :nodoc:
214
+ compacted = []
215
+
216
+ compacted
217
+ end
218
+
219
+ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
220
+ options = {:media_types => :all}.merge(options)
221
+ media_types = options[:media_types]
222
+
223
+ in_declarations = 0
224
+
225
+ block_depth = 0
226
+
227
+ # @charset is ignored for now
228
+ in_charset = false
229
+ in_string = false
230
+ in_at_media_rule = false
231
+
232
+ current_selectors = ''
233
+ current_declarations = ''
234
+
235
+ block.scan(/([\\]?[{}\s"]|(.[^\s"{}\\]*))/).each do |matches|
236
+ #block.scan(/((.[^{}"\n\r\f\s]*)[\s]|(.[^{}"\n\r\f]*)\{|(.[^{}"\n\r\f]*)\}|(.[^{}"\n\r\f]*)\"|(.*)[\s]+)/).each do |matches|
237
+ token = matches[0]
238
+
239
+ #puts "TOKEN: #{token}" unless token =~ /^[\s]*$/
240
+ if token =~ /\A"/ # found un-escaped double quote
241
+ in_string = !in_string
242
+ end
243
+
244
+ if in_declarations > 0
245
+
246
+ # too deep, malformed declaration block
247
+ if in_declarations > 1
248
+ in_declarations -= 1 if token =~ /\}/
249
+ next
250
+ end
251
+
252
+ if token =~ /\{/
253
+ in_declarations += 1
254
+ next
255
+ end
256
+
257
+ current_declarations += token
258
+
259
+ if token =~ /\}/ and not in_string
260
+ current_declarations.gsub!(/\}[\s]*$/, '')
261
+
262
+ in_declarations -= 1
263
+
264
+ unless current_declarations.strip.empty?
265
+ #puts "SAVING #{current_selectors} -> #{current_declarations}"
266
+ add_rule!(current_selectors, current_declarations, media_types)
267
+ end
268
+
269
+ current_selectors = ''
270
+ current_declarations = ''
271
+ end
272
+ elsif token =~ /@media/i
273
+ # found '@media', reset current media_types
274
+ in_at_media_rule = true
275
+ media_types = []
276
+ elsif in_at_media_rule
277
+ if token =~ /\{/
278
+ block_depth = block_depth + 1
279
+ in_at_media_rule = false
280
+ else
281
+ token.gsub!(/[,\s]*/, '')
282
+ media_types << token.strip.downcase.to_sym unless token.empty?
283
+ end
284
+ elsif in_charset or token =~ /@charset/i
285
+ # iterate until we are out of the charset declaration
286
+ in_charset = (token =~ /;/ ? false : true)
287
+ else
288
+ if token =~ /\}/ and not in_string
289
+ block_depth = block_depth - 1
290
+ else
291
+ if token =~ /\{/ and not in_string
292
+ current_selectors.gsub!(/^[\s]*/, '')
293
+ current_selectors.gsub!(/[\s]*$/, '')
294
+ in_declarations += 1
295
+ else
296
+ current_selectors += token
297
+ end
298
+ end
299
+ end
300
+ end
301
+
302
+ # check for unclosed braces
303
+ if in_declarations > 0
304
+ add_rule!(current_selectors, current_declarations, media_types)
305
+ end
306
+ end
307
+
308
+ # Load a remote CSS file.
309
+ #
310
+ # You can also pass in file://test.css
311
+ #
312
+ # See add_block! for options.
313
+ #
314
+ # Deprecated: originally accepted three params: `uri`, `base_uri` and `media_types`
315
+ def load_uri!(uri, options = {}, deprecated = nil)
316
+ uri = URI.parse(uri) unless uri.respond_to? :scheme
317
+ #base_uri = nil, media_types = :all, options = {}
318
+
319
+ opts = {:base_uri => nil, :media_types => :all}
320
+
321
+ if options.is_a? Hash
322
+ opts.merge!(options)
323
+ else
324
+ opts[:base_uri] = options if options.is_a? String
325
+ opts[:media_types] = deprecated if deprecated
326
+ end
327
+
328
+
329
+ if uri.scheme == 'file' or uri.scheme.nil?
330
+ uri.path = File.expand_path(uri.path)
331
+ uri.scheme = 'file'
332
+ end
333
+
334
+ opts[:base_uri] = uri if opts[:base_uri].nil?
335
+
336
+ src, charset = read_remote_file(uri)
337
+
338
+ if src
339
+ add_block!(src, opts)
340
+ end
341
+ end
342
+
343
+ # Load a local CSS file.
344
+ def load_file!(file_name, base_dir = nil, media_types = :all)
345
+ file_name = File.expand_path(file_name, base_dir)
346
+ return unless File.readable?(file_name)
347
+
348
+ src = IO.read(file_name)
349
+ base_dir = File.dirname(file_name)
350
+
351
+ add_block!(src, {:media_types => media_types, :base_dir => base_dir})
352
+ end
353
+
354
+
355
+
356
+ protected
357
+ # Strip comments and clean up blank lines from a block of CSS.
358
+ #
359
+ # Returns a string.
360
+ def cleanup_block(block) # :nodoc:
361
+ # Strip CSS comments
362
+ block.gsub!(STRIP_CSS_COMMENTS_RX, '')
363
+
364
+ # Strip HTML comments - they shouldn't really be in here but
365
+ # some people are just crazy...
366
+ block.gsub!(STRIP_HTML_COMMENTS_RX, '')
367
+
368
+ # Strip lines containing just whitespace
369
+ block.gsub!(/^\s+$/, "")
370
+
371
+ block
372
+ end
373
+
374
+ # Download a file into a string.
375
+ #
376
+ # Returns the file's data and character set in an array.
377
+ #--
378
+ # TODO: add option to fail silently or throw and exception on a 404
379
+ #++
380
+ def read_remote_file(uri) # :nodoc:
381
+ if @loaded_uris.include?(uri.to_s)
382
+ raise CircularReferenceError, "can't load #{uri.to_s} more than once" if @options[:io_exceptions]
383
+ return '', nil
384
+ end
385
+
386
+ @loaded_uris << uri.to_s
387
+
388
+ src = '', charset = nil
389
+
390
+ begin
391
+ uri = URI.parse(uri.to_s)
392
+ http = Net::HTTP.new(uri.host, uri.port)
393
+
394
+ if uri.scheme == 'file'
395
+ # local file
396
+ fh = open(uri.path, 'rb')
397
+ src = fh.read
398
+ fh.close
399
+ else
400
+ # remote file
401
+ if uri.scheme == 'https'
402
+ http.use_ssl = true
403
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
404
+ end
405
+
406
+ res, src = http.get(uri.path, {'User-Agent' => USER_AGENT, 'Accept-Encoding' => 'gzip'})
407
+ charset = fh.respond_to?(:charset) ? fh.charset : 'utf-8'
408
+
409
+ if res.code.to_i >= 400
410
+ raise RemoteFileError if @options[:io_exceptions]
411
+ return '', nil
412
+ end
413
+
414
+ case res['content-encoding']
415
+ when 'gzip'
416
+ io = Zlib::GzipReader.new(StringIO.new(res.body))
417
+ src = io.read
418
+ when 'deflate'
419
+ io = Zlib::Inflate.new
420
+ src = io.inflate(res.body)
421
+ end
422
+ end
423
+
424
+ if charset
425
+ ic = Iconv.new('UTF-8//IGNORE', charset)
426
+ src = ic.iconv(src)
427
+ end
428
+ rescue
429
+ raise RemoteFileError if @options[:io_exceptions]
430
+ return nil, nil
431
+ end
432
+
433
+ return src, charset
434
+ end
435
+
436
+ private
437
+ # Save a folded declaration block to the internal cache.
438
+ def save_folded_declaration(block_hash, folded_declaration) # :nodoc:
439
+ @folded_declaration_cache[block_hash] = folded_declaration
440
+ end
441
+
442
+ # Retrieve a folded declaration block from the internal cache.
443
+ def get_folded_declaration(block_hash) # :nodoc:
444
+ return @folded_declaration_cache[block_hash] ||= nil
445
+ end
446
+
447
+ def reset! # :nodoc:
448
+ @folded_declaration_cache = {}
449
+ @css_source = ''
450
+ @css_rules = []
451
+ @css_warnings = []
452
+ end
453
+ end
454
+ end