css_toolkit 1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,463 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ require 'css_properties'
3
+ require 'css_misc'
4
+ require 'css_base'
5
+ require 'css_stylesheet'
6
+ require 'css_rule_set'
7
+ require 'css_media_set'
8
+ require 'css_declaration'
9
+ require 'css_comment'
10
+ require 'css_import'
11
+
12
+ module CssTidy
13
+
14
+ class Parser
15
+ # these are used to poke values in for testing instance methods
16
+ attr_accessor :css, :index, :sheet
17
+
18
+ # setup the class vars used by Tidy
19
+ def initialize
20
+
21
+ # temporary array to hold data during development
22
+ @stylesheet = CssTidy::StyleSheet.new
23
+
24
+ # the raw, unprocessed css
25
+ @raw_css = ''
26
+
27
+ # the string that is being processed
28
+ @css = ''
29
+
30
+ # the current position in the string
31
+ @index = 0
32
+
33
+ # the current parser context. i.e where we are in the CSS
34
+ @context = [NONE, IN_SELECTOR]
35
+
36
+ # the current line number
37
+ @line_number = 1
38
+ end
39
+
40
+ def parse(css)
41
+ css_length = css.length
42
+ @css = css.clone
43
+
44
+ # vars used in processing of sheets
45
+ current_at_block = ''
46
+ invalid_at = false
47
+ invalid_at_name = ''
48
+
49
+ current_selector = ''
50
+ current_property = ''
51
+ current_ruleset = CssTidy::RuleSet.new
52
+
53
+ current_value = ''
54
+ sub_value = ''
55
+ sub_value_array = []
56
+
57
+ current_string = ''
58
+ string_char = ''
59
+ str_in_str = false
60
+
61
+ current_comment = ''
62
+
63
+ while @index < css_length
64
+
65
+ if is_newline?
66
+ @line_number += 1
67
+ end
68
+ case @context.last
69
+ when IN_AT_BLOCK
70
+ if is_token?
71
+ # current_at_block empty? allows comment inside of selectors to pass
72
+ if is_comment? && current_at_block.strip.empty?
73
+ @context << IN_COMMENT
74
+ @index += 1 # move past '*'
75
+ elsif is_current_char? '{'
76
+ @context << IN_SELECTOR
77
+ elsif is_current_char? ','
78
+ current_at_block = current_at_block.strip + ','
79
+ elsif is_current_char? ['(',')',':','/','*','!','\\']
80
+ # catch media queries and escapes
81
+ current_at_block << current_char
82
+ end # of is_comment
83
+ else # not token
84
+ if(! ( (is_char_ctype?(:space, current_at_block[-1,1]) || is_char_token?(current_at_block[-1,1]) && current_at_block[-1,1] == ',') && is_ctype?(:space) ))
85
+ current_at_block << current_char
86
+ end
87
+ end
88
+
89
+ when IN_SELECTOR
90
+ if is_token?
91
+ # current_selector empty? allows comment inside of selectors to pass
92
+ if is_comment? && current_selector.strip.empty?
93
+ @context << IN_COMMENT
94
+ @index += 1
95
+ elsif is_current_char?('@') && current_selector.strip.empty?
96
+ # Check for at-rule
97
+ invalid_at = true
98
+ AT_RULES.each do |name, type|
99
+ size_of_property = name.length
100
+ # look ahead for the name
101
+ property_to_find = (@css[@index+1,size_of_property]).strip.downcase
102
+ if name == property_to_find
103
+ if type == IN_AT_BLOCK
104
+ current_at_block = '@' + name
105
+ else
106
+ current_selector = '@' + name
107
+ end
108
+ @context << type
109
+ @index += size_of_property
110
+ invalid_at = false
111
+ end
112
+ end
113
+
114
+ if invalid_at
115
+ current_selector = '@'
116
+ invalid_at_name = ''
117
+ puts "invalid At rule"
118
+ # for($j = $i+1; $j < $size; ++$j)
119
+ # {
120
+ # if(!ctype_alpha($string{$j}))
121
+ # {
122
+ # break;
123
+ # }
124
+ # $invalid_at_name .= $string{$j};
125
+ # }
126
+ # $this->log('Invalid @-rule: '.$invalid_at_name.' (removed)','Warning');
127
+ end
128
+ elsif is_current_char?('"') || is_current_char?("'")
129
+ @context << IN_STRING
130
+ current_string = current_char
131
+ string_char = current_char
132
+ elsif invalid_at && is_current_char?(';')
133
+ invalid_at = false
134
+ @context << IN_SELECTOR
135
+ elsif is_current_char?('{')
136
+ @context << IN_PROPERTY
137
+ elsif is_current_char?('}')
138
+ current_at_block = ''
139
+ @stylesheet.end_at_block
140
+ current_selector = ''
141
+ # when there is a new selector we save the last set
142
+ @stylesheet << current_ruleset unless current_ruleset.empty?
143
+ # and start a new one
144
+ current_ruleset = CssTidy::RuleSet.new
145
+ elsif is_current_char?([',','\\'])
146
+ current_selector = current_selector.strip + current_char
147
+ #remove unnecessary universal selector, FS#147
148
+ #elseif ! (is_current_char?('*') && @in_array($string{$i+1}, array('.', '#', '[', ':'))))
149
+ else
150
+ current_selector << current_char
151
+ end
152
+ else # not is_token
153
+ last_position = current_selector.length - 1
154
+ if( last_position == -1 || ! ( (is_char_ctype?(:space, current_selector[last_position,1]) || is_char_token?(current_selector[last_position,1]) && current_selector[last_position,1] == ',') && is_ctype?(:space) ))
155
+ current_selector << current_char
156
+ end
157
+ end
158
+
159
+ when IN_PROPERTY
160
+ if is_token?
161
+ if (is_current_char?(':') || is_current_char?('=')) && ! current_property.empty?
162
+ @context << IN_VALUE
163
+ elsif is_comment? && current_property.empty?
164
+ @context << IN_COMMENT
165
+ @index += 1 # move past '*'
166
+ elsif is_current_char?('}')
167
+ @context << IN_SELECTOR
168
+ invalid_at = false
169
+ current_property = ''
170
+ current_selector = ''
171
+ # when there is a new selector we save the last set
172
+ @stylesheet << current_ruleset unless current_ruleset.empty?
173
+ # and start a new one
174
+ current_ruleset = CssTidy::RuleSet.new
175
+ elsif is_current_char?(';')
176
+ current_property = ''
177
+ elsif is_current_char?(['*','\\']) # allow star hack and \ hack for properties
178
+ current_property << current_char
179
+ end
180
+ elsif ! is_ctype?(:space)
181
+ current_property << current_char
182
+ end
183
+
184
+ when IN_VALUE
185
+ property_next = is_newline? && property_is_next? || @index == css_length-1
186
+ if is_token? || property_next
187
+ if is_comment?
188
+ @context << IN_COMMENT
189
+ @index += 1
190
+ elsif is_current_char?('"') || is_current_char?("'") || is_current_char?('(')
191
+ current_string = current_char
192
+ string_char = is_current_char?('(') ? ')' : current_char
193
+ @context << IN_STRING
194
+ elsif is_current_char?([',','\\'])
195
+ sub_value = sub_value.strip + current_char
196
+ elsif is_current_char?(';') || property_next
197
+ if current_selector[0,1] == '@' && AT_RULES.has_key?(current_selector[1..-1]) && AT_RULES[current_selector[1..-1]] == IN_VALUE
198
+ sub_value_array << sub_value.strip
199
+
200
+ @context << IN_SELECTOR
201
+ case current_selector
202
+ when '@charset'
203
+ unless (@stylesheet.charset = sub_value_array[0])
204
+ puts "extra charset"
205
+ end
206
+ when '@namespace'
207
+ #$this->namespace = implode(' ',$this->sub_value_arr);
208
+ when '@import'
209
+ @stylesheet << CssTidy::Import.new(sub_value_array.join(' '))
210
+ end
211
+
212
+ sub_value_array = []
213
+ sub_value = ''
214
+ current_selector = ''
215
+ else
216
+ @context << IN_PROPERTY
217
+ end
218
+ elsif ! is_current_char?('}')
219
+ sub_value << current_char
220
+ end
221
+
222
+ if (is_current_char?('}') || is_current_char?(';') || property_next) && ! current_selector.empty?
223
+ unless current_at_block.empty?
224
+ @stylesheet << CssTidy::MediaSet.new(current_at_block.strip)
225
+ current_at_block = ''
226
+ end
227
+
228
+ if ! sub_value.strip.empty?
229
+ sub_value_array << sub_value.strip
230
+ sub_value = ''
231
+ end
232
+
233
+ current_value = sub_value_array.join(' ')
234
+
235
+ valid = is_property_valid?(current_property)
236
+ if (! invalid_at || valid)
237
+ current_ruleset.add_rule({:selector => current_selector.strip, :declarations => "#{current_property}:#{current_value}" })
238
+ end
239
+
240
+ current_property = ''
241
+ sub_value_array = []
242
+ current_value = ''
243
+ end
244
+
245
+ if is_current_char?('}')
246
+ @context << IN_SELECTOR
247
+ invalid_at = false
248
+ current_selector = ''
249
+ # when there is a new selector we save the last set
250
+ @stylesheet << current_ruleset unless current_ruleset.empty?
251
+ # and start a new one
252
+ current_ruleset = CssTidy::RuleSet.new
253
+ end
254
+ elsif ! property_next
255
+ sub_value << current_char
256
+
257
+ if is_ctype?(:space)
258
+ if ! sub_value.strip.empty?
259
+ sub_value_array << sub_value.strip
260
+ sub_value = ''
261
+ end
262
+ end
263
+ end
264
+
265
+ when IN_STRING
266
+ if string_char === ')' && (is_current_char?('"') || is_current_char?("'")) && ! str_in_str && ! is_escaped?
267
+ str_in_str = true
268
+ elsif string_char === ')' && (is_current_char?('"') || is_current_char?("'")) && str_in_str && ! is_escaped?
269
+ str_in_str = false
270
+ end
271
+ temp_add = current_char # // ...and no not-escaped backslash at the previous position
272
+
273
+ if is_newline? && !is_current_char?('\\',-1) && ! is_escaped?(-1)
274
+ temp_add = "\\A "
275
+ #$this->log('Fixed incorrect newline in string','Warning');
276
+ end
277
+
278
+ if !(string_char === ')' && is_css_whitespace?(current_char) && !str_in_str)
279
+ current_string << temp_add
280
+ end
281
+
282
+ if is_current_char?(string_char) && !is_escaped? && !str_in_str
283
+ @context.pop
284
+
285
+ if is_css_whitespace?(current_string) && current_property != 'content'
286
+ if (!quoted_string)
287
+ if (string_char === '"' || string_char === '\'')
288
+ # Temporarily disable this optimization to avoid problems with @charset rule, quote properties, and some attribute selectors...
289
+ # Attribute selectors fixed, added quotes to @chartset, no problems with properties detected. Enabled
290
+ #current_string = current_string.slice($this->cur_string, 1, -1);
291
+ elsif (current_string > 3) && (current_string[1,1] === '"' || current_string[1,1] === '\'')
292
+ #current_string = current_string + substr($this->cur_string, 2, -2) . substr($this->cur_string, -1);
293
+ end
294
+ else
295
+ quoted_string = false
296
+ end
297
+ end
298
+
299
+ if @context[-1] === IN_VALUE # from in value?
300
+ sub_value << current_string
301
+ elsif @context[-1] === IN_SELECTOR
302
+ current_selector << current_string;
303
+ end
304
+ end
305
+
306
+ when IN_COMMENT
307
+ if is_comment_end?
308
+ @context.pop # go back to previous context
309
+ @index += 1 # skip the '/'
310
+ @stylesheet << CssTidy::Comment.new(current_comment)
311
+ current_comment = ''
312
+ else
313
+ current_comment << current_char
314
+ end
315
+
316
+ end
317
+ @index += 1
318
+ end
319
+
320
+ @stylesheet
321
+ end
322
+
323
+ def current_char
324
+ @css[@index,1]
325
+ end
326
+
327
+ # Checks if the next word in a string from after current index is a CSS property
328
+ def property_is_next?
329
+ pos = @css.index(':', @index+1)
330
+
331
+ if ! pos
332
+ return false
333
+ end
334
+
335
+ # get the length until just before the ':'
336
+ size_of_property = pos - @index - 1
337
+
338
+ # extract the name of the property
339
+ property_to_find = (@css[@index+1,size_of_property]).strip.downcase
340
+
341
+ if PROPERTIES.has_key?(property_to_find)
342
+ #$this->log('Added semicolon to the end of declaration','Warning');
343
+ return true
344
+ else
345
+ return false
346
+ end
347
+ end
348
+
349
+ def is_property_valid?(property)
350
+ PROPERTIES.has_key?(property)
351
+ end
352
+
353
+ def is_css_whitespace?(char)
354
+ WHITESPACE.include?(char)
355
+ end
356
+
357
+
358
+ # These functions all test the character at the current index location
359
+
360
+ def is_token?(offset=0)
361
+ is_char_token?(@css[@index+offset,1])
362
+ end
363
+
364
+ def is_char_token?(char)
365
+ TOKENS.include?(char)
366
+ end
367
+
368
+ # Checks if a character is escaped (and returns true if it is)
369
+ def is_escaped?(offset=0)
370
+ is_char_escaped?(@css[@index+offset-1,1])
371
+ end
372
+
373
+ def is_char_escaped?(char)
374
+ # cannot backtrack before index '1' (would be -1, or the end of the string)
375
+ if @index > 0
376
+ if char === '\\'
377
+ return true
378
+ end
379
+ end
380
+ false
381
+ end
382
+
383
+
384
+ def is_comment?
385
+ # cannot look beyond the end of the string
386
+ if @index < @css.length
387
+ if @css[@index, 2] == '/*'
388
+ return true
389
+ end
390
+ end
391
+ false
392
+ end
393
+
394
+ def is_comment_end?
395
+ # cannot look beyond the end of the string
396
+ if @index < @css.length
397
+ if @css[@index, 2] == '*/'
398
+ return true
399
+ end
400
+ end
401
+ false
402
+ end
403
+
404
+ def is_newline?
405
+ @css[@index,1] =~ /\n|\r/
406
+ end
407
+
408
+ def is_ctype?(ctype, offset=0)
409
+ if @index < @css.length
410
+ is_char_ctype?(ctype, @css[@index+offset,1])
411
+ end
412
+ end
413
+
414
+ def is_char_ctype?(ctype, char)
415
+ case ctype
416
+ when :space
417
+ char =~ / |\t|\f|\v|\n|\r/
418
+ when :xdigit # hexidecimal
419
+ char =~ /[0-9a-f]/i
420
+ when :alpha
421
+ char =~ /[A-Za-z]/
422
+ end
423
+ end
424
+
425
+ # any sort of character - use for readability
426
+ def is_current_char?(char,offset=0)
427
+ case char.class.to_s
428
+ when 'String'
429
+ @css[@index+offset,1] == char
430
+ when 'Array'
431
+ char.include?(@css[@index+offset,1])
432
+ end
433
+ end
434
+
435
+ def is_at_rule?(text)
436
+ #if($this->selector{0} == '@' && isset($at_rules[substr($this->selector,1)]) && $at_rules[substr($this->selector,1)] == 'iv')
437
+ end
438
+
439
+ private
440
+
441
+ # debugging help
442
+ def context_name
443
+ # parser current context
444
+ case @context.last
445
+ when NONE
446
+ 'None'
447
+ when IN_SELECTOR
448
+ 'in selector'
449
+ when IN_PROPERTY
450
+ 'in property'
451
+ when IN_VALUE
452
+ 'in value'
453
+ when IN_STRING
454
+ 'in string'
455
+ when IN_COMMENT
456
+ 'in comment'
457
+ when IN_AT_BLOCK
458
+ 'in at block'
459
+ end
460
+ end
461
+
462
+ end
463
+ end