marcosinger-css_parser 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,162 @@
1
+ require 'uri'
2
+ require 'net/https'
3
+ require 'open-uri'
4
+ require 'digest/md5'
5
+ require 'zlib'
6
+ require 'stringio'
7
+ require 'iconv'
8
+
9
+ module CssParser
10
+ VERSION = '1.2.0'
11
+
12
+ # Merge multiple CSS RuleSets by cascading according to the CSS 2.1 cascading rules
13
+ # (http://www.w3.org/TR/REC-CSS2/cascade.html#cascading-order).
14
+ #
15
+ # Takes one or more RuleSet objects.
16
+ #
17
+ # Returns a RuleSet.
18
+ #
19
+ # ==== Cascading
20
+ # If a RuleSet object has its +specificity+ defined, that specificity is
21
+ # used in the cascade calculations.
22
+ #
23
+ # If no specificity is explicitly set and the RuleSet has *one* selector,
24
+ # the specificity is calculated using that selector.
25
+ #
26
+ # If no selectors or multiple selectors are present, the specificity is
27
+ # treated as 0.
28
+ #
29
+ # ==== Example #1
30
+ # rs1 = RuleSet.new(nil, 'color: black;')
31
+ # rs2 = RuleSet.new(nil, 'margin: 0px;')
32
+ #
33
+ # merged = CssParser.merge(rs1, rs2)
34
+ #
35
+ # puts merged
36
+ # => "{ margin: 0px; color: black; }"
37
+ #
38
+ # ==== Example #2
39
+ # rs1 = RuleSet.new(nil, 'background-color: black;')
40
+ # rs2 = RuleSet.new(nil, 'background-image: none;')
41
+ #
42
+ # merged = CssParser.merge(rs1, rs2)
43
+ #
44
+ # puts merged
45
+ # => "{ background: none black; }"
46
+ #--
47
+ # TODO: declaration_hashes should be able to contain a RuleSet
48
+ # this should be a Class method
49
+ def CssParser.merge(*rule_sets)
50
+ @folded_declaration_cache = {}
51
+
52
+ # in case called like CssParser.merge([rule_set, rule_set])
53
+ rule_sets.flatten! if rule_sets[0].kind_of?(Array)
54
+
55
+ unless rule_sets.all? {|rs| rs.kind_of?(CssParser::RuleSet)}
56
+ raise ArgumentError, "all parameters must be CssParser::RuleSets."
57
+ end
58
+
59
+ return rule_sets[0] if rule_sets.length == 1
60
+
61
+ # Internal storage of CSS properties that we will keep
62
+ properties = {}
63
+
64
+ rule_sets.each do |rule_set|
65
+ rule_set.expand_shorthand!
66
+
67
+ specificity = rule_set.specificity
68
+ unless specificity
69
+ if rule_set.selectors.length == 1
70
+ specificity = calculate_specificity(rule_set.selectors[0])
71
+ else
72
+ specificity = 0
73
+ end
74
+ end
75
+
76
+ rule_set.each_declaration do |property, value, is_important|
77
+ # Add the property to the list to be folded per http://www.w3.org/TR/CSS21/cascade.html#cascading-order
78
+ if not properties.has_key?(property)
79
+ properties[property] = {:value => value, :specificity => specificity, :is_important => is_important}
80
+ elsif properties[property][:specificity] < specificity or properties[property][:specificity] == specificity
81
+ unless properties[property][:is_important]
82
+ properties[property] = {:value => value, :specificity => specificity, :is_important => is_important}
83
+ end
84
+ end
85
+
86
+ if is_important
87
+ properties[property] = {:value => value, :specificity => specificity, :is_important => is_important}
88
+ end
89
+ end
90
+ end
91
+
92
+ merged = RuleSet.new(nil, nil)
93
+
94
+ properties.each do |property, details|
95
+ if details[:is_important]
96
+ merged[property.strip] = details[:value].strip.gsub(/\;\Z/, '') + '!important'
97
+ else
98
+ merged[property.strip] = details[:value].strip
99
+ end
100
+ end
101
+
102
+ merged.create_shorthand!
103
+ merged
104
+ end
105
+
106
+ # Calculates the specificity of a CSS selector
107
+ # per http://www.w3.org/TR/CSS21/cascade.html#specificity
108
+ #
109
+ # Returns an integer.
110
+ #
111
+ # ==== Example
112
+ # CssParser.calculate_specificity('#content div p:first-line a:link')
113
+ # => 114
114
+ #--
115
+ # Thanks to Rafael Salazar and Nick Fitzsimons on the css-discuss list for their help.
116
+ #++
117
+ def CssParser.calculate_specificity(selector)
118
+ a = 0
119
+ b = selector.scan(/\#/).length
120
+ c = selector.scan(NON_ID_ATTRIBUTES_AND_PSEUDO_CLASSES_RX).length
121
+ d = selector.scan(ELEMENTS_AND_PSEUDO_ELEMENTS_RX).length
122
+
123
+ (a.to_s + b.to_s + c.to_s + d.to_s).to_i
124
+ rescue
125
+ return 0
126
+ end
127
+
128
+ # Make <tt>url()</tt> links absolute.
129
+ #
130
+ # Takes a block of CSS and returns it with all relative URIs converted to absolute URIs.
131
+ #
132
+ # "For CSS style sheets, the base URI is that of the style sheet, not that of the source document."
133
+ # per http://www.w3.org/TR/CSS21/syndata.html#uri
134
+ #
135
+ # Returns a string.
136
+ #
137
+ # ==== Example
138
+ # CssParser.convert_uris("body { background: url('../style/yellow.png?abc=123') };",
139
+ # "http://example.org/style/basic.css").inspect
140
+ # => "body { background: url('http://example.org/style/yellow.png?abc=123') };"
141
+ def self.convert_uris(css, base_uri)
142
+ out = ''
143
+ base_uri = URI.parse(base_uri) unless base_uri.kind_of?(URI)
144
+
145
+ out = css.gsub(URI_RX) do |s|
146
+ uri = $1.to_s
147
+ uri.gsub!(/["']+/, '')
148
+ # Don't process URLs that are already absolute
149
+ unless uri =~ /^[a-z]+\:\/\//i
150
+ begin
151
+ uri = base_uri.merge(uri)
152
+ rescue; end
153
+ end
154
+ "url('" + uri.to_s + "')"
155
+ end
156
+ out
157
+ end
158
+ end
159
+
160
+ require File.dirname(__FILE__) + '/css_parser/rule_set'
161
+ require File.dirname(__FILE__) + '/css_parser/regexps'
162
+ require File.dirname(__FILE__) + '/css_parser/parser'
@@ -0,0 +1,454 @@
1
+ module CssParser
2
+ # Exception class used for any errors encountered while downloading remote files.
3
+ class RemoteFileError < IOError; end
4
+
5
+ # Exception class used if a request is made to load a CSS file more than once.
6
+ class CircularReferenceError < StandardError; end
7
+
8
+
9
+ # == Parser class
10
+ #
11
+ # All CSS is converted to UTF-8.
12
+ #
13
+ # When calling Parser#new there are some configuaration options:
14
+ # [<tt>absolute_paths</tt>] Convert relative paths to absolute paths (<tt>href</tt>, <tt>src</tt> and <tt>url('')</tt>. Boolean, default is <tt>false</tt>.
15
+ # [<tt>import</tt>] Follow <tt>@import</tt> rules. Boolean, default is <tt>true</tt>.
16
+ # [<tt>io_exceptions</tt>] Throw an exception if a link can not be found. Boolean, default is <tt>true</tt>.
17
+ class Parser
18
+ USER_AGENT = "Ruby CSS Parser/#{CssParser::VERSION} (http://github.com/alexdunae/css_parser)"
19
+
20
+ STRIP_CSS_COMMENTS_RX = /\/\*.*?\*\//m
21
+ STRIP_HTML_COMMENTS_RX = /\<\!\-\-|\-\-\>/m
22
+
23
+ # Initial parsing
24
+ RE_AT_IMPORT_RULE = /\@import\s*(?:url\s*)?(?:\()?(?:\s*)["']?([^'"\s\)]*)["']?\)?([\w\s\,^\])]*)\)?[;\n]?/
25
+
26
+ # Array of CSS files that have been loaded.
27
+ attr_reader :loaded_uris
28
+
29
+ #attr_reader :rules
30
+
31
+ #--
32
+ # Class variable? see http://www.oreillynet.com/ruby/blog/2007/01/nubygems_dont_use_class_variab_1.html
33
+ #++
34
+ @folded_declaration_cache = {}
35
+ class << self; attr_reader :folded_declaration_cache; end
36
+
37
+ def initialize(options = {})
38
+ @options = {:absolute_paths => false,
39
+ :import => true,
40
+ :io_exceptions => true}.merge(options)
41
+
42
+ # array of RuleSets
43
+ @rules = []
44
+
45
+
46
+ @loaded_uris = []
47
+
48
+ # unprocessed blocks of CSS
49
+ @blocks = []
50
+ reset!
51
+ end
52
+
53
+ # Get declarations by selector.
54
+ #
55
+ # +media_types+ are optional, and can be a symbol or an array of symbols.
56
+ # The default value is <tt>:all</tt>.
57
+ #
58
+ # ==== Examples
59
+ # find_by_selector('#content')
60
+ # => 'font-size: 13px; line-height: 1.2;'
61
+ #
62
+ # find_by_selector('#content', [:screen, :handheld])
63
+ # => 'font-size: 13px; line-height: 1.2;'
64
+ #
65
+ # find_by_selector('#content', :print)
66
+ # => 'font-size: 11pt; line-height: 1.2;'
67
+ #
68
+ # Now you can use Regexp too
69
+ #
70
+ # ==== Examples
71
+ # find_by_selector('/#content/')
72
+ # => 'font-size: 13px; line-height: 1.2;'
73
+ #
74
+ # find_by_selector('/\.content/') # Don't forget use escape with dot(\)
75
+ # => 'float:left;'
76
+ #
77
+ # find_by_selector('/content/')
78
+ # => {'#content' => {'font-size' => '11pt;', 'line-height' => '1.2;'}, '.content' => {'float' => 'left'}}
79
+ #
80
+ # Returns an array or hash of declarations.
81
+ def find_by_selector(selector, media_types = :all)
82
+ out = []
83
+ out_hash = {}
84
+
85
+ each_selector(media_types) do |sel, dec, spec|
86
+ if selector.is_a?(Regexp)
87
+ dec_hash = {}
88
+ dec.split(';').map{|x| dec_hash.merge!({x.split(':').first.lstrip => x.split(":").last.lstrip})}
89
+ out_hash.merge!({sel => dec_hash}){ |key, v1, v2| "#{v1} #{v2}"} if sel.strip =~ selector
90
+ elsif selector.is_a?(String)
91
+ out << dec if sel.strip == selector.strip;
92
+ end
93
+ end
94
+ selector.is_a?(Regexp) ? (out_hash.size < 2 ? out_hash.values : out_hash) : out
95
+ end
96
+ alias_method :[], :find_by_selector
97
+
98
+
99
+ # Add a raw block of CSS.
100
+ #
101
+ # In order to follow +@import+ rules you must supply either a
102
+ # +:base_dir+ or +:base_uri+ option.
103
+ #
104
+ # Use the +:media_types+ option to set the media type(s) for this block. Takes an array of symbols.
105
+ #
106
+ # Use the +:only_media_types+ option to selectively follow +@import+ rules. Takes an array of symbols.
107
+ #
108
+ # ==== Example
109
+ # css = <<-EOT
110
+ # body { font-size: 10pt }
111
+ # p { margin: 0px; }
112
+ # @media screen, print {
113
+ # body { line-height: 1.2 }
114
+ # }
115
+ # EOT
116
+ #
117
+ # parser = CssParser::Parser.new
118
+ # parser.add_block!(css)
119
+ def add_block!(block, options = {})
120
+ options = {:base_uri => nil, :base_dir => nil, :charset => nil, :media_types => :all, :only_media_types => :all}.merge(options)
121
+ options[:media_types] = [options[:media_types]].flatten
122
+ options[:only_media_types] = [options[:only_media_types]].flatten
123
+
124
+ block = cleanup_block(block)
125
+
126
+ if options[:base_uri] and @options[:absolute_paths]
127
+ block = CssParser.convert_uris(block, options[:base_uri])
128
+ end
129
+
130
+ # Load @imported CSS
131
+ block.scan(RE_AT_IMPORT_RULE).each do |import_rule|
132
+ media_types = []
133
+ if media_string = import_rule[-1]
134
+ media_string.split(/\s|\,/).each do |t|
135
+ media_types << t.to_sym unless t.empty?
136
+ end
137
+ end
138
+
139
+ next unless options[:only_media_types].include?(:all) or media_types.length < 1 or (media_types & options[:only_media_types]).length > 0
140
+
141
+ import_path = import_rule[0].to_s.gsub(/['"]*/, '').strip
142
+
143
+ if options[:base_uri]
144
+ import_uri = URI.parse(options[:base_uri].to_s).merge(import_path)
145
+ load_uri!(import_uri, options[:base_uri], media_types)
146
+ elsif options[:base_dir]
147
+ load_file!(import_path, options[:base_dir], media_types)
148
+ end
149
+ end
150
+
151
+ # Remove @import declarations
152
+ block.gsub!(RE_AT_IMPORT_RULE, '')
153
+
154
+ parse_block_into_rule_sets!(block, options)
155
+ end
156
+
157
+ # Add a CSS rule by setting the +selectors+, +declarations+ and +media_types+.
158
+ #
159
+ # +media_types+ can be a symbol or an array of symbols.
160
+ def add_rule!(selectors, declarations, media_types = :all)
161
+ rule_set = RuleSet.new(selectors, declarations)
162
+ add_rule_set!(rule_set, media_types)
163
+ end
164
+
165
+ # Add a CssParser RuleSet object.
166
+ #
167
+ # +media_types+ can be a symbol or an array of symbols.
168
+ def add_rule_set!(ruleset, media_types = :all)
169
+ raise ArgumentError unless ruleset.kind_of?(CssParser::RuleSet)
170
+
171
+ media_types = [media_types] if media_types.kind_of?(Symbol)
172
+
173
+ @rules << {:media_types => media_types, :rules => ruleset}
174
+ end
175
+
176
+ # Iterate through RuleSet objects.
177
+ #
178
+ # +media_types+ can be a symbol or an array of symbols.
179
+ def each_rule_set(media_types = :all) # :yields: rule_set
180
+ media_types = [:all] if media_types.nil?
181
+ media_types = [media_types] if media_types.kind_of?(Symbol)
182
+
183
+ @rules.each do |block|
184
+ if media_types.include?(:all) or block[:media_types].any? { |mt| media_types.include?(mt) }
185
+ yield block[:rules]
186
+ end
187
+ end
188
+ end
189
+
190
+ # Iterate through CSS selectors.
191
+ #
192
+ # +media_types+ can be a symbol or an array of symbols.
193
+ # See RuleSet#each_selector for +options+.
194
+ def each_selector(media_types = :all, options = {}) # :yields: selectors, declarations, specificity
195
+ each_rule_set(media_types) do |rule_set|
196
+ #puts rule_set
197
+ rule_set.each_selector(options) do |selectors, declarations, specificity|
198
+ yield selectors, declarations, specificity
199
+ end
200
+ end
201
+ end
202
+
203
+ # Output all CSS rules as a single stylesheet.
204
+ def to_s(media_types = :all)
205
+ out = ''
206
+ each_selector(media_types) do |selectors, declarations, specificity|
207
+ out << "#{selectors} {\n#{declarations}\n}\n"
208
+ end
209
+ out
210
+ end
211
+
212
+ # Merge declarations with the same selector.
213
+ def compact! # :nodoc:
214
+ compacted = []
215
+
216
+ compacted
217
+ end
218
+
219
+ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
220
+ options = {:media_types => :all}.merge(options)
221
+ media_types = options[:media_types]
222
+
223
+ in_declarations = 0
224
+
225
+ block_depth = 0
226
+
227
+ # @charset is ignored for now
228
+ in_charset = false
229
+ in_string = false
230
+ in_at_media_rule = false
231
+
232
+ current_selectors = ''
233
+ current_declarations = ''
234
+
235
+ block.scan(/([\\]?[{}\s"]|(.[^\s"{}\\]*))/).each do |matches|
236
+ #block.scan(/((.[^{}"\n\r\f\s]*)[\s]|(.[^{}"\n\r\f]*)\{|(.[^{}"\n\r\f]*)\}|(.[^{}"\n\r\f]*)\"|(.*)[\s]+)/).each do |matches|
237
+ token = matches[0]
238
+
239
+ #puts "TOKEN: #{token}" unless token =~ /^[\s]*$/
240
+ if token =~ /\A"/ # found un-escaped double quote
241
+ in_string = !in_string
242
+ end
243
+
244
+ if in_declarations > 0
245
+
246
+ # too deep, malformed declaration block
247
+ if in_declarations > 1
248
+ in_declarations -= 1 if token =~ /\}/
249
+ next
250
+ end
251
+
252
+ if token =~ /\{/
253
+ in_declarations += 1
254
+ next
255
+ end
256
+
257
+ current_declarations += token
258
+
259
+ if token =~ /\}/ and not in_string
260
+ current_declarations.gsub!(/\}[\s]*$/, '')
261
+
262
+ in_declarations -= 1
263
+
264
+ unless current_declarations.strip.empty?
265
+ #puts "SAVING #{current_selectors} -> #{current_declarations}"
266
+ add_rule!(current_selectors, current_declarations, media_types)
267
+ end
268
+
269
+ current_selectors = ''
270
+ current_declarations = ''
271
+ end
272
+ elsif token =~ /@media/i
273
+ # found '@media', reset current media_types
274
+ in_at_media_rule = true
275
+ media_types = []
276
+ elsif in_at_media_rule
277
+ if token =~ /\{/
278
+ block_depth = block_depth + 1
279
+ in_at_media_rule = false
280
+ else
281
+ token.gsub!(/[,\s]*/, '')
282
+ media_types << token.strip.downcase.to_sym unless token.empty?
283
+ end
284
+ elsif in_charset or token =~ /@charset/i
285
+ # iterate until we are out of the charset declaration
286
+ in_charset = (token =~ /;/ ? false : true)
287
+ else
288
+ if token =~ /\}/ and not in_string
289
+ block_depth = block_depth - 1
290
+ else
291
+ if token =~ /\{/ and not in_string
292
+ current_selectors.gsub!(/^[\s]*/, '')
293
+ current_selectors.gsub!(/[\s]*$/, '')
294
+ in_declarations += 1
295
+ else
296
+ current_selectors += token
297
+ end
298
+ end
299
+ end
300
+ end
301
+
302
+ # check for unclosed braces
303
+ if in_declarations > 0
304
+ add_rule!(current_selectors, current_declarations, media_types)
305
+ end
306
+ end
307
+
308
+ # Load a remote CSS file.
309
+ #
310
+ # You can also pass in file://test.css
311
+ #
312
+ # See add_block! for options.
313
+ #
314
+ # Deprecated: originally accepted three params: `uri`, `base_uri` and `media_types`
315
+ def load_uri!(uri, options = {}, deprecated = nil)
316
+ uri = URI.parse(uri) unless uri.respond_to? :scheme
317
+ #base_uri = nil, media_types = :all, options = {}
318
+
319
+ opts = {:base_uri => nil, :media_types => :all}
320
+
321
+ if options.is_a? Hash
322
+ opts.merge!(options)
323
+ else
324
+ opts[:base_uri] = options if options.is_a? String
325
+ opts[:media_types] = deprecated if deprecated
326
+ end
327
+
328
+
329
+ if uri.scheme == 'file' or uri.scheme.nil?
330
+ uri.path = File.expand_path(uri.path)
331
+ uri.scheme = 'file'
332
+ end
333
+
334
+ opts[:base_uri] = uri if opts[:base_uri].nil?
335
+
336
+ src, charset = read_remote_file(uri)
337
+
338
+ if src
339
+ add_block!(src, opts)
340
+ end
341
+ end
342
+
343
+ # Load a local CSS file.
344
+ def load_file!(file_name, base_dir = nil, media_types = :all)
345
+ file_name = File.expand_path(file_name, base_dir)
346
+ return unless File.readable?(file_name)
347
+
348
+ src = IO.read(file_name)
349
+ base_dir = File.dirname(file_name)
350
+
351
+ add_block!(src, {:media_types => media_types, :base_dir => base_dir})
352
+ end
353
+
354
+
355
+
356
+ protected
357
+ # Strip comments and clean up blank lines from a block of CSS.
358
+ #
359
+ # Returns a string.
360
+ def cleanup_block(block) # :nodoc:
361
+ # Strip CSS comments
362
+ block.gsub!(STRIP_CSS_COMMENTS_RX, '')
363
+
364
+ # Strip HTML comments - they shouldn't really be in here but
365
+ # some people are just crazy...
366
+ block.gsub!(STRIP_HTML_COMMENTS_RX, '')
367
+
368
+ # Strip lines containing just whitespace
369
+ block.gsub!(/^\s+$/, "")
370
+
371
+ block
372
+ end
373
+
374
+ # Download a file into a string.
375
+ #
376
+ # Returns the file's data and character set in an array.
377
+ #--
378
+ # TODO: add option to fail silently or throw and exception on a 404
379
+ #++
380
+ def read_remote_file(uri) # :nodoc:
381
+ if @loaded_uris.include?(uri.to_s)
382
+ raise CircularReferenceError, "can't load #{uri.to_s} more than once" if @options[:io_exceptions]
383
+ return '', nil
384
+ end
385
+
386
+ @loaded_uris << uri.to_s
387
+
388
+ src = '', charset = nil
389
+
390
+ begin
391
+ uri = URI.parse(uri.to_s)
392
+ http = Net::HTTP.new(uri.host, uri.port)
393
+
394
+ if uri.scheme == 'file'
395
+ # local file
396
+ fh = open(uri.path, 'rb')
397
+ src = fh.read
398
+ fh.close
399
+ else
400
+ # remote file
401
+ if uri.scheme == 'https'
402
+ http.use_ssl = true
403
+ http.verify_mode = OpenSSL::SSL::VERIFY_NONE
404
+ end
405
+
406
+ res, src = http.get(uri.path, {'User-Agent' => USER_AGENT, 'Accept-Encoding' => 'gzip'})
407
+ charset = fh.respond_to?(:charset) ? fh.charset : 'utf-8'
408
+
409
+ if res.code.to_i >= 400
410
+ raise RemoteFileError if @options[:io_exceptions]
411
+ return '', nil
412
+ end
413
+
414
+ case res['content-encoding']
415
+ when 'gzip'
416
+ io = Zlib::GzipReader.new(StringIO.new(res.body))
417
+ src = io.read
418
+ when 'deflate'
419
+ io = Zlib::Inflate.new
420
+ src = io.inflate(res.body)
421
+ end
422
+ end
423
+
424
+ if charset
425
+ ic = Iconv.new('UTF-8//IGNORE', charset)
426
+ src = ic.iconv(src)
427
+ end
428
+ rescue
429
+ raise RemoteFileError if @options[:io_exceptions]
430
+ return nil, nil
431
+ end
432
+
433
+ return src, charset
434
+ end
435
+
436
+ private
437
+ # Save a folded declaration block to the internal cache.
438
+ def save_folded_declaration(block_hash, folded_declaration) # :nodoc:
439
+ @folded_declaration_cache[block_hash] = folded_declaration
440
+ end
441
+
442
+ # Retrieve a folded declaration block from the internal cache.
443
+ def get_folded_declaration(block_hash) # :nodoc:
444
+ return @folded_declaration_cache[block_hash] ||= nil
445
+ end
446
+
447
+ def reset! # :nodoc:
448
+ @folded_declaration_cache = {}
449
+ @css_source = ''
450
+ @css_rules = []
451
+ @css_warnings = []
452
+ end
453
+ end
454
+ end