css_parser_1.1.0 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/css_parser.rb ADDED
@@ -0,0 +1,151 @@
1
+ require 'uri'
2
+ require 'digest/md5'
3
+ require 'zlib'
4
+ require 'iconv'
5
+
6
+ module CssParser
7
+ VERSION = '1.1.0'
8
+
9
+ # Merge multiple CSS RuleSets by cascading according to the CSS 2.1 cascading rules
10
+ # (http://www.w3.org/TR/REC-CSS2/cascade.html#cascading-order).
11
+ #
12
+ # Takes one or more RuleSet objects.
13
+ #
14
+ # Returns a RuleSet.
15
+ #
16
+ # ==== Cascading
17
+ # If a RuleSet object has its +specificity+ defined, that specificity is
18
+ # used in the cascade calculations.
19
+ #
20
+ # If no specificity is explicitly set and the RuleSet has *one* selector,
21
+ # the specificity is calculated using that selector.
22
+ #
23
+ # If no selectors or multiple selectors are present, the specificity is
24
+ # treated as 0.
25
+ #
26
+ # ==== Example #1
27
+ # rs1 = RuleSet.new(nil, 'color: black;')
28
+ # rs2 = RuleSet.new(nil, 'margin: 0px;')
29
+ #
30
+ # merged = CssParser.merge(rs1, rs2)
31
+ #
32
+ # puts merged
33
+ # => "{ margin: 0px; color: black; }"
34
+ #
35
+ # ==== Example #2
36
+ # rs1 = RuleSet.new(nil, 'background-color: black;')
37
+ # rs2 = RuleSet.new(nil, 'background-image: none;')
38
+ #
39
+ # merged = CssParser.merge(rs1, rs2)
40
+ #
41
+ # puts merged
42
+ # => "{ background: none black; }"
43
+ #--
44
+ # TODO: declaration_hashes should be able to contain a RuleSet
45
+ # this should be a Class method
46
+ def CssParser.merge(*rule_sets)
47
+ @folded_declaration_cache = {}
48
+
49
+ # in case called like CssParser.merge([rule_set, rule_set])
50
+ rule_sets.flatten! if rule_sets[0].kind_of?(Array)
51
+
52
+ unless rule_sets.all? {|rs| rs.kind_of?(CssParser::RuleSet)}
53
+ raise ArgumentError, "all parameters must be CssParser::RuleSets."
54
+ end
55
+
56
+ return rule_sets[0] if rule_sets.length == 1
57
+
58
+ # Internal storage of CSS properties that we will keep
59
+ properties = {}
60
+
61
+ rule_sets.each do |rule_set|
62
+ rule_set.expand_shorthand!
63
+
64
+ specificity = rule_set.specificity
65
+ unless specificity
66
+ if rule_set.selectors.length == 1
67
+ specificity = calculate_specificity(rule_set.selectors[0])
68
+ else
69
+ specificity = 0
70
+ end
71
+ end
72
+
73
+ rule_set.each_declaration do |property, value, is_important|
74
+ # Add the property to the list to be folded per http://www.w3.org/TR/CSS21/cascade.html#cascading-order
75
+ if not properties.has_key?(property) or
76
+ is_important or # step 2
77
+ properties[property][:specificity] < specificity or # step 3
78
+ properties[property][:specificity] == specificity # step 4
79
+ properties[property] = {:value => value, :specificity => specificity, :is_important => is_important}
80
+ end
81
+ end
82
+ end
83
+
84
+ merged = RuleSet.new(nil, nil)
85
+
86
+ # TODO: what about important
87
+ properties.each do |property, details|
88
+ merged[property.strip] = details[:value].strip
89
+ end
90
+
91
+ merged.create_shorthand!
92
+ merged
93
+ end
94
+
95
+ # Calculates the specificity of a CSS selector
96
+ # per http://www.w3.org/TR/CSS21/cascade.html#specificity
97
+ #
98
+ # Returns an integer.
99
+ #
100
+ # ==== Example
101
+ # CssParser.calculate_specificity('#content div p:first-line a:link')
102
+ # => 114
103
+ #--
104
+ # Thanks to Rafael Salazar and Nick Fitzsimons on the css-discuss list for their help.
105
+ #++
106
+ def CssParser.calculate_specificity(selector)
107
+ a = 0
108
+ b = selector.scan(/\#/).length
109
+ c = selector.scan(NON_ID_ATTRIBUTES_AND_PSEUDO_CLASSES_RX).length
110
+ d = selector.scan(ELEMENTS_AND_PSEUDO_ELEMENTS_RX).length
111
+
112
+ (a.to_s + b.to_s + c.to_s + d.to_s).to_i
113
+ rescue
114
+ return 0
115
+ end
116
+
117
+ # Make <tt>url()</tt> links absolute.
118
+ #
119
+ # Takes a block of CSS and returns it with all relative URIs converted to absolute URIs.
120
+ #
121
+ # "For CSS style sheets, the base URI is that of the style sheet, not that of the source document."
122
+ # per http://www.w3.org/TR/CSS21/syndata.html#uri
123
+ #
124
+ # Returns a string.
125
+ #
126
+ # ==== Example
127
+ # CssParser.convert_uris("body { background: url('../style/yellow.png?abc=123') };",
128
+ # "http://example.org/style/basic.css").inspect
129
+ # => "body { background: url('http://example.org/style/yellow.png?abc=123') };"
130
+ def self.convert_uris(css, base_uri)
131
+ out = ''
132
+ base_uri = URI.parse(base_uri) unless base_uri.kind_of?(URI)
133
+
134
+ out = css.gsub(URI_RX) do |s|
135
+ uri = $1.to_s
136
+ uri.gsub!(/["']+/, '')
137
+ # Don't process URLs that are already absolute
138
+ unless uri =~ /^[a-z]+\:\/\//i
139
+ begin
140
+ uri = base_uri.merge(uri)
141
+ rescue; end
142
+ end
143
+ "url('" + uri.to_s + "')"
144
+ end
145
+ out
146
+ end
147
+ end
148
+
149
+ require File.dirname(__FILE__) + '/css_parser/rule_set'
150
+ require File.dirname(__FILE__) + '/css_parser/regexps'
151
+ require File.dirname(__FILE__) + '/css_parser/parser'
@@ -0,0 +1,362 @@
1
+ module CssParser
2
+ # Exception class used for any errors encountered while downloading remote files.
3
+ class RemoteFileError < IOError; end
4
+
5
+ # Exception class used if a request is made to load a CSS file more than once.
6
+ class CircularReferenceError < StandardError; end
7
+
8
+
9
+ # == Parser class
10
+ #
11
+ # All CSS is converted to UTF-8.
12
+ #
13
+ # When calling Parser#new there are some configuaration options:
14
+ # [<tt>absolute_paths</tt>] Convert relative paths to absolute paths (<tt>href</tt>, <tt>src</tt> and <tt>url('')</tt>. Boolean, default is <tt>false</tt>.
15
+ # [<tt>import</tt>] Follow <tt>@import</tt> rules. Boolean, default is <tt>true</tt>.
16
+ # [<tt>io_exceptions</tt>] Throw an exception if a link can not be found. Boolean, default is <tt>true</tt>.
17
+ class Parser
18
+ USER_AGENT = "Ruby CSS Parser/#{CssParser::VERSION} (http://code.dunae.ca/css_parser/)"
19
+
20
+ STRIP_CSS_COMMENTS_RX = /\/\*.*?\*\//m
21
+ STRIP_HTML_COMMENTS_RX = /\<\!\-\-|\-\-\>/m
22
+
23
+ # Initial parsing
24
+ RE_AT_IMPORT_RULE = /\@import[\s]+(url\()?["']+(.[^'"]*)["']\)?([\w\s\,]*);?/i
25
+
26
+ #--
27
+ # RE_AT_IMPORT_RULE = Regexp.new('@import[\s]*(' + RE_STRING.to_s + ')([\w\s\,]*)[;]?', Regexp::IGNORECASE) -- should handle url() even though it is not allowed
28
+ #++
29
+
30
+ # Array of CSS files that have been loaded.
31
+ attr_reader :loaded_uris
32
+
33
+ #attr_reader :rules
34
+
35
+ #--
36
+ # Class variable? see http://www.oreillynet.com/ruby/blog/2007/01/nubygems_dont_use_class_variab_1.html
37
+ #++
38
+ @folded_declaration_cache = {}
39
+ class << self; attr_reader :folded_declaration_cache; end
40
+
41
+ def initialize(options = {})
42
+ @options = {:absolute_paths => false,
43
+ :import => true,
44
+ :io_exceptions => true}.merge(options)
45
+
46
+ # array of RuleSets
47
+ @rules = []
48
+
49
+
50
+ @loaded_uris = []
51
+
52
+ # unprocessed blocks of CSS
53
+ @blocks = []
54
+ reset!
55
+ end
56
+
57
+ # Get declarations by selector.
58
+ #
59
+ # +media_types+ are optional, and can be a symbol or an array of symbols.
60
+ # The default value is <tt>:all</tt>.
61
+ #
62
+ # ==== Examples
63
+ # find_by_selector('#content')
64
+ # => 'font-size: 13px; line-height: 1.2;'
65
+ #
66
+ # find_by_selector('#content', [:screen, :handheld])
67
+ # => 'font-size: 13px; line-height: 1.2;'
68
+ #
69
+ # find_by_selector('#content', :print)
70
+ # => 'font-size: 11pt; line-height: 1.2;'
71
+ #
72
+ # Returns an array of declarations.
73
+ def find_by_selector(selector, media_types = :all)
74
+ out = []
75
+ each_selector(media_types) do |sel, dec, spec|
76
+ out << dec if sel.strip == selector.strip
77
+ end
78
+ out
79
+ end
80
+ alias_method :[], :find_by_selector
81
+
82
+
83
+ # Add a raw block of CSS.
84
+ #
85
+ # In order to follow +@import+ rules you must supply either a
86
+ # +:base_dir+ or +:base_uri+ option.
87
+ #
88
+ # ==== Example
89
+ # css = <<-EOT
90
+ # body { font-size: 10pt }
91
+ # p { margin: 0px; }
92
+ # @media screen, print {
93
+ # body { line-height: 1.2 }
94
+ # }
95
+ # EOT
96
+ #
97
+ # parser = CssParser::Parser.new
98
+ # parser.load_css!(css)
99
+ #--
100
+ # TODO: add media_type
101
+ #++
102
+ def add_block!(block, options = {})
103
+ options = {:base_uri => nil, :base_dir => nil, :charset => nil, :media_types => :all}.merge(options)
104
+
105
+ block = cleanup_block(block)
106
+
107
+ if options[:base_uri] and @options[:absolute_paths]
108
+ block = CssParser.convert_uris(block, options[:base_uri])
109
+ end
110
+
111
+ # Load @imported CSS
112
+ block.scan(RE_AT_IMPORT_RULE).each do |import_rule|
113
+ media_types = []
114
+ if media_string = import_rule[import_rule.length-1]
115
+ media_string.split(/\s|\,/).each do |t|
116
+ media_types << t.to_sym unless t.empty?
117
+ end
118
+ end
119
+
120
+ import_path = import_rule[1].to_s.gsub(/['"]*/, '').strip
121
+
122
+ if options[:base_uri]
123
+ import_uri = URI.parse(options[:base_uri].to_s).merge(import_path)
124
+ load_uri!(import_uri, options[:base_uri], media_types)
125
+ elsif options[:base_dir]
126
+ load_file!(import_path, options[:base_dir], media_types)
127
+ end
128
+ end
129
+
130
+ # Remove @import declarations
131
+ block.gsub!(RE_AT_IMPORT_RULE, '')
132
+
133
+
134
+
135
+ parse_block_into_rule_sets!(block, options)
136
+
137
+ end
138
+
139
+ # Add a CSS rule by setting the +selectors+, +declarations+ and +media_types+.
140
+ #
141
+ # +media_types+ can be a symbol or an array of symbols.
142
+ def add_rule!(selectors, declarations, media_types = :all)
143
+ rule_set = RuleSet.new(selectors, declarations)
144
+ add_rule_set!(rule_set, media_types)
145
+ end
146
+
147
+ # Add a CssParser RuleSet object.
148
+ #
149
+ # +media_types+ can be a symbol or an array of symbols.
150
+ def add_rule_set!(ruleset, media_types = :all)
151
+ raise ArgumentError unless ruleset.kind_of?(CssParser::RuleSet)
152
+
153
+ media_types = [media_types] if media_types.kind_of?(Symbol)
154
+
155
+ @rules << {:media_types => media_types, :rules => ruleset}
156
+ end
157
+
158
+ # Iterate through RuleSet objects.
159
+ #
160
+ # +media_types+ can be a symbol or an array of symbols.
161
+ def each_rule_set(media_types = :all) # :yields: rule_set
162
+ media_types = [:all] if media_types.nil?
163
+ media_types = [media_types] if media_types.kind_of?(Symbol)
164
+
165
+ @rules.each do |block|
166
+ if media_types.include?(:all) or block[:media_types].any? { |mt| media_types.include?(mt) }
167
+ yield block[:rules]
168
+ end
169
+ end
170
+ end
171
+
172
+ # Iterate through CSS selectors.
173
+ #
174
+ # +media_types+ can be a symbol or an array of symbols.
175
+ # See RuleSet#each_selector for +options+.
176
+ def each_selector(media_types = :all, options = {}) # :yields: selectors, declarations, specificity
177
+ each_rule_set(media_types) do |rule_set|
178
+ #puts rule_set
179
+ rule_set.each_selector(options) do |selectors, declarations, specificity|
180
+ yield selectors, declarations, specificity
181
+ end
182
+ end
183
+ end
184
+
185
+ # Output all CSS rules as a single stylesheet.
186
+ def to_s(media_types = :all)
187
+ out = ''
188
+ each_selector(media_types) do |selectors, declarations, specificity|
189
+ out << "#{selectors} {\n#{declarations}\n}\n"
190
+ end
191
+ out
192
+ end
193
+
194
+ # Merge declarations with the same selector.
195
+ def compact! # :nodoc:
196
+ compacted = []
197
+
198
+ compacted
199
+ end
200
+
201
+ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
202
+ options = {:media_types => :all}.merge(options)
203
+ media_types = options[:media_types]
204
+
205
+ in_declarations = false
206
+
207
+ block_depth = 0
208
+
209
+ # @charset is ignored for now
210
+ in_charset = false
211
+ in_string = false
212
+ in_at_media_rule = false
213
+
214
+ current_selectors = ''
215
+ current_declarations = ''
216
+
217
+ block.scan(/([\\]?[{}\s"]|(.[^\s"{}\\]*))/).each do |matches|
218
+ #block.scan(/((.[^{}"\n\r\f\s]*)[\s]|(.[^{}"\n\r\f]*)\{|(.[^{}"\n\r\f]*)\}|(.[^{}"\n\r\f]*)\"|(.*)[\s]+)/).each do |matches|
219
+ token = matches[0]
220
+ #puts "TOKEN: #{token}" unless token =~ /^[\s]*$/
221
+ if token =~ /\A"/ # found un-escaped double quote
222
+ in_string = !in_string
223
+ end
224
+
225
+ if in_declarations
226
+ current_declarations += token
227
+
228
+ if token =~ /\}/ and not in_string
229
+ current_declarations.gsub!(/\}[\s]*$/, '')
230
+
231
+ in_declarations = false
232
+
233
+ unless current_declarations.strip.empty?
234
+ #puts "SAVING #{current_selectors} -> #{current_declarations}"
235
+ add_rule!(current_selectors, current_declarations, media_types)
236
+ end
237
+
238
+ current_selectors = ''
239
+ current_declarations = ''
240
+ end
241
+ elsif token =~ /@media/i
242
+ # found '@media', reset current media_types
243
+ in_at_media_rule = true
244
+ media_types = []
245
+ elsif in_at_media_rule
246
+ if token =~ /\{/
247
+ block_depth = block_depth + 1
248
+ in_at_media_rule = false
249
+ else
250
+ token.gsub!(/[,\s]*/, '')
251
+ media_types << token.strip.downcase.to_sym unless token.empty?
252
+ end
253
+ elsif in_charset or token =~ /@charset/i
254
+ # iterate until we are out of the charset declaration
255
+ in_charset = (token =~ /;/ ? false : true)
256
+ else
257
+ if token =~ /\}/ and not in_string
258
+ block_depth = block_depth - 1
259
+ else
260
+ if token =~ /\{/ and not in_string
261
+ current_selectors.gsub!(/^[\s]*/, '')
262
+ current_selectors.gsub!(/[\s]*$/, '')
263
+ in_declarations = true
264
+ else
265
+ current_selectors += token
266
+ end
267
+ end
268
+ end
269
+ end
270
+ end
271
+
272
+ # Load a remote CSS file.
273
+ def load_uri!(uri, base_uri = nil, media_types = :all)
274
+ base_uri = uri if base_uri.nil?
275
+ src, charset = read_remote_file(uri)
276
+
277
+ add_block!(src, {:media_types => media_types, :base_uri => base_uri})
278
+ end
279
+
280
+ # Load a local CSS file.
281
+ def load_file!(file_name, base_dir = nil, media_types = :all)
282
+ file_name = File.expand_path(file_name, base_dir)
283
+ return unless File.readable?(file_name)
284
+
285
+ src = IO.read(file_name)
286
+ base_dir = File.dirname(file_name)
287
+
288
+ add_block!(src, {:media_types => media_types, :base_dir => base_dir})
289
+ end
290
+
291
+
292
+
293
+ protected
294
+ # Strip comments and clean up blank lines from a block of CSS.
295
+ #
296
+ # Returns a string.
297
+ def cleanup_block(block) # :nodoc:
298
+ # Strip CSS comments
299
+ block.gsub!(STRIP_CSS_COMMENTS_RX, '')
300
+
301
+ # Strip HTML comments - they shouldn't really be in here but
302
+ # some people are just crazy...
303
+ block.gsub!(STRIP_HTML_COMMENTS_RX, '')
304
+
305
+ # Strip lines containing just whitespace
306
+ block.gsub!(/^\s+$/, "")
307
+
308
+ block
309
+ end
310
+
311
+ # Download a file into a string.
312
+ #
313
+ # Returns the file's data and character set in an array.
314
+ #--
315
+ # TODO: add option to fail silently or throw and exception on a 404
316
+ #++
317
+ def read_remote_file(uri) # :nodoc:
318
+ raise CircularReferenceError, "can't load #{uri.to_s} more than once" if @loaded_uris.include?(uri.to_s)
319
+ @loaded_uris << uri.to_s
320
+
321
+ begin
322
+ #fh = open(uri, 'rb')
323
+ fh = open(uri, 'rb', 'User-Agent' => USER_AGENT, 'Accept-Encoding' => 'gzip')
324
+
325
+ if fh.content_encoding.include?('gzip')
326
+ remote_src = Zlib::GzipReader.new(fh).read
327
+ else
328
+ remote_src = fh.read
329
+ end
330
+
331
+ #puts "reading #{uri} (#{fh.charset})"
332
+
333
+ ic = Iconv.new('UTF-8//IGNORE', fh.charset)
334
+ src = ic.iconv(remote_src)
335
+
336
+ fh.close
337
+ return src, fh.charset
338
+ rescue
339
+ raise RemoteFileError if @options[:io_exceptions]
340
+ return '', nil
341
+ end
342
+ end
343
+
344
+ private
345
+ # Save a folded declaration block to the internal cache.
346
+ def save_folded_declaration(block_hash, folded_declaration) # :nodoc:
347
+ @folded_declaration_cache[block_hash] = folded_declaration
348
+ end
349
+
350
+ # Retrieve a folded declaration block from the internal cache.
351
+ def get_folded_declaration(block_hash) # :nodoc:
352
+ return @folded_declaration_cache[block_hash] ||= nil
353
+ end
354
+
355
+ def reset! # :nodoc:
356
+ @folded_declaration_cache = {}
357
+ @css_source = ''
358
+ @css_rules = []
359
+ @css_warnings = []
360
+ end
361
+ end
362
+ end