DanaDanger-css_parser 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,9 @@
1
+ === Ruby CSS Parser CHANGELOG
2
+
3
+ ==== Version 0.9
4
+ * Initial version forked from Premailer project
5
+
6
+ ==== TODO: Future
7
+ * border shorthand/folding support
8
+ * re-implement caching on CssParser.merge
9
+ * correctly parse http://www.webstandards.org/files/acid2/test.html
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ === Ruby CSS Parser License
2
+
3
+ Copyright (c) 2007-08 Alex Dunae
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
data/README ADDED
@@ -0,0 +1,58 @@
1
+ === Ruby CSS Parser
2
+
3
+ Load, parse and cascade CSS rule sets in Ruby.
4
+
5
+ ==== Setup
6
+
7
+ Install the gem from RubyGems.
8
+
9
+ gem install css_parser
10
+
11
+ Done.
12
+
13
+ ==== An example
14
+ require 'css_parser'
15
+ include CssParser
16
+
17
+ parser = CssParser::Parser.new
18
+ parser.load_file!('http://example.com/styles/style.css')
19
+
20
+ # lookup a rule by a selector
21
+ parser.find('#content')
22
+ #=> 'font-size: 13px; line-height: 1.2;'
23
+
24
+ # lookup a rule by a selector and media type
25
+ parser.find('#content', [:screen, :handheld])
26
+
27
+ # iterate through selectors by media type
28
+ parser.each_selector(:screen) do |selector, declarations, specificity|
29
+ ...
30
+ end
31
+
32
+ # add a block of CSS
33
+ css = <<-EOT
34
+ body { margin: 0 1em; }
35
+ EOT
36
+
37
+ parser.add_block!(css)
38
+
39
+ # output all CSS rules in a single stylesheet
40
+ parser.to_s
41
+ => #content { font-size: 13px; line-height: 1.2; }
42
+ body { margin: 0 1em; }
43
+
44
+ ==== Testing
45
+
46
+ You can run the suite of unit tests using <tt>rake test</tt>.
47
+
48
+ The download/import tests require that WEBrick is installed. The tests set up
49
+ a temporary server on port 12000 and pull down files from the <tt>test/fixtures/</tt>
50
+ directory.
51
+
52
+ ==== Credits and code
53
+
54
+ By Alex Dunae (dunae.ca, e-mail 'code' at the same domain), 2007-08.
55
+
56
+ Project homepage: http://code.google.com/p/ruby-css-parser/
57
+
58
+ Made on Vancouver Island.
@@ -0,0 +1,149 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ require 'uri'
3
+ require 'md5'
4
+ require 'zlib'
5
+ require 'iconv'
6
+ require 'css_parser/rule_set'
7
+ require 'css_parser/regexps'
8
+ require 'css_parser/parser'
9
+
10
+ module CssParser
11
+ # Merge multiple CSS RuleSets by cascading according to the CSS 2.1 cascading rules
12
+ # (http://www.w3.org/TR/REC-CSS2/cascade.html#cascading-order).
13
+ #
14
+ # Takes one or more RuleSet objects.
15
+ #
16
+ # Returns a RuleSet.
17
+ #
18
+ # ==== Cascading
19
+ # If a RuleSet object has its +specificity+ defined, that specificity is
20
+ # used in the cascade calculations.
21
+ #
22
+ # If no specificity is explicitly set and the RuleSet has *one* selector,
23
+ # the specificity is calculated using that selector.
24
+ #
25
+ # If no selectors or multiple selectors are present, the specificity is
26
+ # treated as 0.
27
+ #
28
+ # ==== Example #1
29
+ # rs1 = RuleSet.new(nil, 'color: black;')
30
+ # rs2 = RuleSet.new(nil, 'margin: 0px;')
31
+ #
32
+ # merged = CssParser.merge(rs1, rs2)
33
+ #
34
+ # puts merged
35
+ # => "{ margin: 0px; color: black; }"
36
+ #
37
+ # ==== Example #2
38
+ # rs1 = RuleSet.new(nil, 'background-color: black;')
39
+ # rs2 = RuleSet.new(nil, 'background-image: none;')
40
+ #
41
+ # merged = CssParser.merge(rs1, rs2)
42
+ #
43
+ # puts merged
44
+ # => "{ background: none black; }"
45
+ #--
46
+ # TODO: declaration_hashes should be able to contain a RuleSet
47
+ # this should be a Class method
48
+ def CssParser.merge(*rule_sets)
49
+ @folded_declaration_cache = {}
50
+
51
+ # in case called like CssParser.merge([rule_set, rule_set])
52
+ rule_sets.flatten! if rule_sets[0].kind_of?(Array)
53
+
54
+ unless rule_sets.all? {|rs| rs.kind_of?(CssParser::RuleSet)}
55
+ raise ArgumentError, "all parameters must be CssParser::RuleSets."
56
+ end
57
+
58
+ return rule_sets[0] if rule_sets.length == 1
59
+
60
+ # Internal storage of CSS properties that we will keep
61
+ properties = {}
62
+
63
+ rule_sets.each do |rule_set|
64
+ rule_set.expand_shorthand!
65
+
66
+ specificity = rule_set.specificity
67
+ unless specificity
68
+ if rule_set.selectors.length == 1
69
+ specificity = calculate_specificity(rule_set.selectors[0])
70
+ else
71
+ specificity = 0
72
+ end
73
+ end
74
+
75
+ rule_set.each_declaration do |property, value, is_important|
76
+ # Add the property to the list to be folded per http://www.w3.org/TR/CSS21/cascade.html#cascading-order
77
+ if not properties.has_key?(property) or
78
+ is_important or # step 2
79
+ properties[property][:specificity] < specificity or # step 3
80
+ properties[property][:specificity] == specificity # step 4
81
+ properties[property] = {:value => value, :specificity => specificity, :is_important => is_important}
82
+ end
83
+ end
84
+ end
85
+
86
+ merged = RuleSet.new(nil, nil)
87
+
88
+ # TODO: what about important
89
+ properties.each do |property, details|
90
+ merged[property.strip] = details[:value].strip
91
+ end
92
+
93
+ merged.create_shorthand!
94
+ merged
95
+ end
96
+
97
+ # Calculates the specificity of a CSS selector
98
+ # per http://www.w3.org/TR/CSS21/cascade.html#specificity
99
+ #
100
+ # Returns an integer.
101
+ #
102
+ # ==== Example
103
+ # CssParser.calculate_specificity('#content div p:first-line a:link')
104
+ # => 114
105
+ #--
106
+ # Thanks to Rafael Salazar and Nick Fitzsimons on the css-discuss list for their help.
107
+ #++
108
+ def CssParser.calculate_specificity(selector)
109
+ a = 0
110
+ b = selector.scan(/\#/).length
111
+ c = selector.scan(NON_ID_ATTRIBUTES_AND_PSEUDO_CLASSES_RX).length
112
+ d = selector.scan(ELEMENTS_AND_PSEUDO_ELEMENTS_RX).length
113
+
114
+ (a.to_s + b.to_s + c.to_s + d.to_s).to_i
115
+ rescue
116
+ return 0
117
+ end
118
+
119
+ # Make <tt>url()</tt> links absolute.
120
+ #
121
+ # Takes a block of CSS and returns it with all relative URIs converted to absolute URIs.
122
+ #
123
+ # "For CSS style sheets, the base URI is that of the style sheet, not that of the source document."
124
+ # per http://www.w3.org/TR/CSS21/syndata.html#uri
125
+ #
126
+ # Returns a string.
127
+ #
128
+ # ==== Example
129
+ # CssParser.convert_uris("body { background: url('../style/yellow.png?abc=123') };",
130
+ # "http://example.org/style/basic.css").inspect
131
+ # => "body { background: url('http://example.org/style/yellow.png?abc=123') };"
132
+ def self.convert_uris(css, base_uri)
133
+ out = ''
134
+ base_uri = URI.parse(base_uri) unless base_uri.kind_of?(URI)
135
+
136
+ out = css.gsub(URI_RX) do |s|
137
+ uri = $1.to_s
138
+ uri.gsub!(/["']+/, '')
139
+ # Don't process URLs that are already absolute
140
+ unless uri =~ /^[a-z]+\:\/\//i
141
+ begin
142
+ uri = base_uri.merge(uri)
143
+ rescue; end
144
+ end
145
+ "url('" + uri.to_s + "')"
146
+ end
147
+ out
148
+ end
149
+ end
@@ -0,0 +1,345 @@
1
+ module CssParser
2
+ # Exception class used for any errors encountered while downloading remote files.
3
+ class RemoteFileError < IOError; end
4
+
5
+ # Exception class used if a request is made to load a CSS file more than once.
6
+ class CircularReferenceError < StandardError; end
7
+
8
+
9
+ # == Parser class
10
+ #
11
+ # All CSS is converted to UTF-8.
12
+ #
13
+ # When calling Parser#new there are some configuaration options:
14
+ # [<tt>absolute_paths</tt>] Convert relative paths to absolute paths (<tt>href</tt>, <tt>src</tt> and <tt>url('')</tt>. Boolean, default is <tt>false</tt>.
15
+ # [<tt>import</tt>] Follow <tt>@import</tt> rules. Boolean, default is <tt>true</tt>.
16
+ # [<tt>io_exceptions</tt>] Throw an exception if a link can not be found. Boolean, default is <tt>true</tt>.
17
+ class Parser
18
+ USER_AGENT = "Ruby CSS Parser/#{VERSION} (http://code.dunae.ca/css_parser/)"
19
+
20
+ STRIP_CSS_COMMENTS_RX = /\/\*.*?\*\//m
21
+ STRIP_HTML_COMMENTS_RX = /\<\!\-\-|\-\-\>/m
22
+
23
+ # Initial parsing
24
+ RE_AT_IMPORT_RULE = /\@import[\s]+(url\()?["']+(.[^'"]*)["']\)?([\w\s\,]*);?/i
25
+
26
+ #--
27
+ # RE_AT_IMPORT_RULE = Regexp.new('@import[\s]*(' + RE_STRING.to_s + ')([\w\s\,]*)[;]?', Regexp::IGNORECASE) -- should handle url() even though it is not allowed
28
+ #++
29
+
30
+ # Array of CSS files that have been loaded.
31
+ attr_reader :loaded_uris
32
+
33
+ #attr_reader :rules
34
+
35
+ #--
36
+ # Class variable? see http://www.oreillynet.com/ruby/blog/2007/01/nubygems_dont_use_class_variab_1.html
37
+ #++
38
+ @folded_declaration_cache = {}
39
+ class << self; attr_reader :folded_declaration_cache; end
40
+
41
+ def initialize(options = {})
42
+ @options = {:absolute_paths => false,
43
+ :import => true,
44
+ :io_exceptions => true}.merge(options)
45
+
46
+ # array of RuleSets
47
+ @rules = []
48
+
49
+
50
+ @loaded_uris = []
51
+
52
+ # unprocessed blocks of CSS
53
+ @blocks = []
54
+ reset!
55
+ end
56
+
57
+ # Get declarations by selector.
58
+ #
59
+ # +media_types+ are optional, and can be a symbol or an array of symbols.
60
+ # The default value is <tt>:all</tt>.
61
+ #
62
+ # ==== Examples
63
+ # find_by_selector('#content')
64
+ # => 'font-size: 13px; line-height: 1.2;'
65
+ #
66
+ # find_by_selector('#content', [:screen, :handheld])
67
+ # => 'font-size: 13px; line-height: 1.2;'
68
+ #
69
+ # find_by_selector('#content', :print)
70
+ # => 'font-size: 11pt; line-height: 1.2;'
71
+ #
72
+ # Returns an array of declarations.
73
+ def find_by_selector(selector, media_types = :all)
74
+ out = []
75
+ each_selector(media_types) do |sel, dec, spec|
76
+ out << dec if sel.strip == selector.strip
77
+ end
78
+ out
79
+ end
80
+ alias_method :[], :find_by_selector
81
+
82
+
83
+ # Add a raw block of CSS.
84
+ #
85
+ # ==== Example
86
+ # css = <<-EOT
87
+ # body { font-size: 10pt }
88
+ # p { margin: 0px; }
89
+ # @media screen, print {
90
+ # body { line-height: 1.2 }
91
+ # }
92
+ # EOT
93
+ #
94
+ # parser = CssParser::Parser.new
95
+ # parser.load_css!(css)
96
+ #--
97
+ # TODO: add media_type
98
+ #++
99
+ def add_block!(block, options = {})
100
+ options = {:base_uri => nil, :charset => nil, :media_types => :all}.merge(options)
101
+
102
+ block = cleanup_block(block)
103
+
104
+ if options[:base_uri] and @options[:absolute_paths]
105
+ block = CssParser.convert_uris(block, options[:base_uri])
106
+ end
107
+
108
+ parse_block_into_rule_sets!(block, options)
109
+
110
+ end
111
+
112
+ # Add a CSS rule by setting the +selectors+, +declarations+ and +media_types+.
113
+ #
114
+ # +media_types+ can be a symbol or an array of symbols.
115
+ def add_rule!(selectors, declarations, media_types = :all)
116
+ rule_set = RuleSet.new(selectors, declarations)
117
+ add_rule_set!(rule_set, media_types)
118
+ end
119
+
120
+ # Add a CssParser RuleSet object.
121
+ #
122
+ # +media_types+ can be a symbol or an array of symbols.
123
+ def add_rule_set!(ruleset, media_types = :all)
124
+ raise ArgumentError unless ruleset.kind_of?(CssParser::RuleSet)
125
+
126
+ media_types = [media_types] if media_types.kind_of?(Symbol)
127
+
128
+ @rules << {:media_types => media_types, :rules => ruleset}
129
+ end
130
+
131
+ # Iterate through RuleSet objects.
132
+ #
133
+ # +media_types+ can be a symbol or an array of symbols.
134
+ def each_rule_set(media_types = :all) # :yields: rule_set
135
+ media_types = [:all] if media_types.nil?
136
+ media_types = [media_types] if media_types.kind_of?(Symbol)
137
+
138
+ @rules.each do |block|
139
+ if media_types.include?(:all) or block[:media_types].any? { |mt| media_types.include?(mt) }
140
+ yield block[:rules]
141
+ end
142
+ end
143
+ end
144
+
145
+ # Iterate through CSS selectors.
146
+ #
147
+ # +media_types+ can be a symbol or an array of symbols.
148
+ # See RuleSet#each_selector for +options+.
149
+ def each_selector(media_types = :all, options = {}) # :yields: selectors, declarations, specificity
150
+ each_rule_set(media_types) do |rule_set|
151
+ #puts rule_set
152
+ rule_set.each_selector(options) do |selectors, declarations, specificity|
153
+ yield selectors, declarations, specificity
154
+ end
155
+ end
156
+ end
157
+
158
+ # Output all CSS rules as a single stylesheet.
159
+ def to_s(media_types = :all)
160
+ out = ''
161
+ each_selector(media_types) do |selectors, declarations, specificity|
162
+ out << "#{selectors} {\n#{declarations}\n}\n"
163
+ end
164
+ out
165
+ end
166
+
167
+ # Merge declarations with the same selector.
168
+ def compact! # :nodoc:
169
+ compacted = []
170
+
171
+ compacted
172
+ end
173
+
174
+ def parse_block_into_rule_sets!(block, options = {}) # :nodoc:
175
+ options = {:media_types => :all}.merge(options)
176
+ media_types = options[:media_types]
177
+
178
+ in_declarations = false
179
+
180
+ block_depth = 0
181
+
182
+ # @charset is ignored for now
183
+ in_charset = false
184
+ in_string = false
185
+ in_at_media_rule = false
186
+
187
+ current_selectors = ''
188
+ current_declarations = ''
189
+
190
+ block.scan(/([\\]?[{}\s"]|(.[^\s"{}\\]*))/).each do |matches|
191
+ #block.scan(/((.[^{}"\n\r\f\s]*)[\s]|(.[^{}"\n\r\f]*)\{|(.[^{}"\n\r\f]*)\}|(.[^{}"\n\r\f]*)\"|(.*)[\s]+)/).each do |matches|
192
+ token = matches[0]
193
+ #puts "TOKEN: #{token}" unless token =~ /^[\s]*$/
194
+ if token =~ /\A"/ # found un-escaped double quote
195
+ in_string = !in_string
196
+ end
197
+
198
+ if in_declarations
199
+ current_declarations += token
200
+
201
+ if token =~ /\}/ and not in_string
202
+ current_declarations.gsub!(/\}[\s]*$/, '')
203
+
204
+ in_declarations = false
205
+
206
+ unless current_declarations.strip.empty?
207
+ #puts "SAVING #{current_selectors} -> #{current_declarations}"
208
+ add_rule!(current_selectors, current_declarations, media_types)
209
+ end
210
+
211
+ current_selectors = ''
212
+ current_declarations = ''
213
+ end
214
+ elsif token =~ /@media/i
215
+ # found '@media', reset current media_types
216
+ in_at_media_rule = true
217
+ media_types = []
218
+ elsif in_at_media_rule
219
+ if token =~ /\{/
220
+ block_depth = block_depth + 1
221
+ in_at_media_rule = false
222
+ else
223
+ token.gsub!(/[,\s]*/, '')
224
+ media_types << token.strip.downcase.to_sym unless token.empty?
225
+ end
226
+ elsif in_charset or token =~ /@charset/i
227
+ # iterate until we are out of the charset declaration
228
+ in_charset = (token =~ /;/ ? false : true)
229
+ else
230
+ if token =~ /\}/ and not in_string
231
+ block_depth = block_depth - 1
232
+ else
233
+ if token =~ /\{/ and not in_string
234
+ current_selectors.gsub!(/^[\s]*/, '')
235
+ current_selectors.gsub!(/[\s]*$/, '')
236
+ in_declarations = true
237
+ else
238
+ current_selectors += token
239
+ end
240
+ end
241
+ end
242
+ end
243
+ end
244
+
245
+ # Load a remote CSS file.
246
+ def load_uri!(uri, base_uri = nil, media_types = :all)
247
+ base_uri = uri if base_uri.nil?
248
+ src, charset = read_remote_file(uri)
249
+
250
+ # Load @imported CSS
251
+ src.scan(RE_AT_IMPORT_RULE).each do |import_rule|
252
+ import_path = import_rule[1].to_s.gsub(/['"]*/, '').strip
253
+ import_uri = URI.parse(base_uri.to_s).merge(import_path)
254
+ #puts import_uri.to_s
255
+
256
+ media_types = []
257
+ if media_string = import_rule[import_rule.length-1]
258
+ media_string.split(/\s|\,/).each do |t|
259
+ media_types << t.to_sym unless t.empty?
260
+ end
261
+ end
262
+
263
+ # Recurse
264
+ load_uri!(import_uri, nil, media_types)
265
+ end
266
+
267
+ # Remove @import declarations
268
+ src.gsub!(RE_AT_IMPORT_RULE, '')
269
+
270
+ # Relative paths need to be converted here
271
+ src = CssParser.convert_uris(src, base_uri) if base_uri and @options[:absolute_paths]
272
+
273
+ add_block!(src, {:media_types => media_types})
274
+ end
275
+
276
+ protected
277
+ # Strip comments and clean up blank lines from a block of CSS.
278
+ #
279
+ # Returns a string.
280
+ def cleanup_block(block) # :nodoc:
281
+ # Strip CSS comments
282
+ block.gsub!(STRIP_CSS_COMMENTS_RX, '')
283
+
284
+ # Strip HTML comments - they shouldn't really be in here but
285
+ # some people are just crazy...
286
+ block.gsub!(STRIP_HTML_COMMENTS_RX, '')
287
+
288
+ # Strip lines containing just whitespace
289
+ block.gsub!(/^\s+$/, "")
290
+
291
+ block
292
+ end
293
+
294
+ # Download a file into a string.
295
+ #
296
+ # Returns the file's data and character set in an array.
297
+ #--
298
+ # TODO: add option to fail silently or throw and exception on a 404
299
+ #++
300
+ def read_remote_file(uri) # :nodoc:
301
+ raise CircularReferenceError, "can't load #{uri.to_s} more than once" if @loaded_uris.include?(uri.to_s)
302
+ @loaded_uris << uri.to_s
303
+
304
+ begin
305
+ #fh = open(uri, 'rb')
306
+ fh = open(uri, 'rb', 'User-Agent' => USER_AGENT, 'Accept-Encoding' => 'gzip')
307
+
308
+ if fh.content_encoding.include?('gzip')
309
+ remote_src = Zlib::GzipReader.new(fh).read
310
+ else
311
+ remote_src = fh.read
312
+ end
313
+
314
+ #puts "reading #{uri} (#{fh.charset})"
315
+
316
+ ic = Iconv.new('UTF-8//IGNORE', fh.charset)
317
+ src = ic.iconv(remote_src)
318
+
319
+ fh.close
320
+ return src, fh.charset
321
+ rescue
322
+ raise RemoteFileError if @options[:io_exceptions]
323
+ return '', nil
324
+ end
325
+ end
326
+
327
+ private
328
+ # Save a folded declaration block to the internal cache.
329
+ def save_folded_declaration(block_hash, folded_declaration) # :nodoc:
330
+ @folded_declaration_cache[block_hash] = folded_declaration
331
+ end
332
+
333
+ # Retrieve a folded declaration block from the internal cache.
334
+ def get_folded_declaration(block_hash) # :nodoc:
335
+ return @folded_declaration_cache[block_hash] ||= nil
336
+ end
337
+
338
+ def reset! # :nodoc:
339
+ @folded_declaration_cache = {}
340
+ @css_source = ''
341
+ @css_rules = []
342
+ @css_warnings = []
343
+ end
344
+ end
345
+ end