crass 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/lib/crass/scanner.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+ require 'strscan'
2
3
 
3
4
  module Crass
4
5
 
@@ -17,13 +18,12 @@ module Crass
17
18
  # position, not a byte position, so it accounts for multi-byte characters.
18
19
  attr_accessor :pos
19
20
 
20
- # The string being scanned.
21
- attr_reader :string
22
-
23
21
  # Creates a Scanner instance for the given _input_ string or IO instance.
24
22
  def initialize(input)
25
- @string = input.is_a?(IO) ? input.read : input.to_s
26
- @chars = @string.chars.to_a
23
+ string = input.is_a?(IO) ? input.read : input.to_s
24
+
25
+ @chars = string.chars.to_a
26
+ @scanner = StringScanner.new(string)
27
27
 
28
28
  reset
29
29
  end
@@ -31,20 +31,20 @@ module Crass
31
31
  # Consumes the next character and returns it, advancing the pointer, or
32
32
  # an empty string if the end of the string has been reached.
33
33
  def consume
34
- @current = @chars[@pos] || ''
35
- @pos += 1 if @current
36
- @current
34
+ if @pos == @len
35
+ ''
36
+ else
37
+ @pos += 1
38
+ @current = @scanner.getch
39
+ end
37
40
  end
38
41
 
39
42
  # Consumes the rest of the string and returns it, advancing the pointer to
40
43
  # the end of the string. Returns an empty string is the end of the string
41
44
  # has already been reached.
42
45
  def consume_rest
43
- rest = @string[@pos..@len] || ''
44
- @current = rest[-1] || ''
45
- @pos = @len
46
-
47
- rest
46
+ @pos = @len
47
+ @scanner.rest
48
48
  end
49
49
 
50
50
  # Returns `true` if the end of the string has been reached, `false`
@@ -62,7 +62,7 @@ module Crass
62
62
  # Returns the substring between {#marker} and {#pos}, without altering the
63
63
  # pointer.
64
64
  def marked
65
- if result = @chars[@marker...@pos]
65
+ if result = @chars[@marker, @pos - @marker]
66
66
  result.join('')
67
67
  else
68
68
  ''
@@ -73,24 +73,21 @@ module Crass
73
73
  # doesn't consume them. The number of characters returned may be less than
74
74
  # _length_ if the end of the string is reached.
75
75
  def peek(length = 1)
76
- if result = @chars[@pos, length]
77
- result.join('')
78
- else
79
- ''
80
- end
76
+ @scanner.peek(length)
81
77
  end
82
78
 
83
79
  # Moves the pointer back one character without changing the value of
84
80
  # {#current}. The next call to {#consume} will re-consume the current
85
81
  # character.
86
82
  def reconsume
83
+ @scanner.unscan
87
84
  @pos -= 1 if @pos > 0
88
85
  end
89
86
 
90
87
  # Resets the pointer to the beginning of the string.
91
88
  def reset
92
89
  @current = nil
93
- @len = @string.length
90
+ @len = @chars.size
94
91
  @marker = 0
95
92
  @pos = 0
96
93
  end
@@ -99,29 +96,30 @@ module Crass
99
96
  # matched substring will be returned and the pointer will be advanced.
100
97
  # Otherwise, `nil` will be returned.
101
98
  def scan(pattern)
102
- match = pattern.match(@string, @pos)
103
- return nil if match.nil? || match.begin(0) != @pos
104
-
105
- @pos = match.end(0)
106
- @current = @chars[@pos - 1]
99
+ if match = @scanner.scan(pattern)
100
+ @pos += match.size
101
+ @current = @chars[@pos - 1]
102
+ end
107
103
 
108
- match[0]
104
+ match
109
105
  end
110
106
 
111
107
  # Scans the string until the _pattern_ is matched. Returns the substring up
112
108
  # to and including the end of the match, and advances the pointer. If there
113
109
  # is no match, `nil` is returned and the pointer is not advanced.
114
110
  def scan_until(pattern)
115
- start = @pos
116
- match = pattern.match(@string, @pos)
117
-
118
- return nil if match.nil?
119
-
120
- @pos = match.end(0)
121
- @current = @chars[@pos - 1]
111
+ if match = @scanner.scan_until(pattern)
112
+ @pos += match.size
113
+ @current = @chars[@pos - 1]
114
+ end
122
115
 
123
- @string[start...@pos]
116
+ match
124
117
  end
125
118
  end
126
119
 
120
+ # Returns the string being scanned.
121
+ def string
122
+ @scanner.string
123
+ end
124
+
127
125
  end
@@ -32,7 +32,6 @@ module Crass
32
32
 
33
33
  RE_UNICODE_RANGE_START = /\+(?:[0-9A-Fa-f]|\?)/
34
34
  RE_UNICODE_RANGE_END = /-[0-9A-Fa-f]/
35
- RE_URL_QUOTE = /["']/
36
35
  RE_WHITESPACE = /[\n\u0009\u0020]+/
37
36
 
38
37
  # -- Class Methods ---------------------------------------------------------
@@ -67,47 +66,47 @@ module Crass
67
66
  #
68
67
  # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-token0
69
68
  def consume
70
- return token(:eof) if @s.eos?
69
+ return nil if @s.eos?
71
70
 
72
71
  @s.mark
73
- return token(:whitespace) if @s.scan(RE_WHITESPACE)
72
+ return create_token(:whitespace) if @s.scan(RE_WHITESPACE)
74
73
 
75
- case char = @s.consume
76
- when '"'
74
+ char = @s.consume
75
+
76
+ case char.to_sym
77
+ when :'"'
77
78
  consume_string('"')
78
79
 
79
- when '#'
80
+ when :'#'
80
81
  if @s.peek =~ RE_NAME || valid_escape?
81
- value = consume_name
82
-
83
- token(:hash,
82
+ create_token(:hash,
84
83
  :type => start_identifier? ? :id : :unrestricted,
85
- :value => value)
84
+ :value => consume_name)
86
85
  else
87
- token(:delim, :value => char)
86
+ create_token(:delim, :value => char)
88
87
  end
89
88
 
90
- when '$'
89
+ when :'$'
91
90
  if @s.peek == '='
92
91
  @s.consume
93
- token(:suffix_match)
92
+ create_token(:suffix_match)
94
93
  else
95
- token(:delim, :value => char)
94
+ create_token(:delim, :value => char)
96
95
  end
97
96
 
98
- when "'"
97
+ when :"'"
99
98
  consume_string("'")
100
99
 
101
- when '('
102
- token(:'(')
100
+ when :'('
101
+ create_token(:'(')
103
102
 
104
- when ')'
105
- token(:')')
103
+ when :')'
104
+ create_token(:')')
106
105
 
107
- when '*'
106
+ when :*
108
107
  if @s.peek == '='
109
108
  @s.consume
110
- token(:substring_match)
109
+ create_token(:substring_match)
111
110
 
112
111
  elsif @options[:preserve_hacks] && @s.peek =~ RE_NAME_START
113
112
  # NON-STANDARD: IE * hack
@@ -115,121 +114,117 @@ module Crass
115
114
  consume_ident
116
115
 
117
116
  else
118
- token(:delim, :value => char)
117
+ create_token(:delim, :value => char)
119
118
  end
120
119
 
121
- when '+'
122
- if start_number?
120
+ when :+
121
+ if start_number?(char + @s.peek(2))
123
122
  @s.reconsume
124
123
  consume_numeric
125
124
  else
126
- token(:delim, :value => char)
125
+ create_token(:delim, :value => char)
127
126
  end
128
127
 
129
- when ','
130
- token(:comma)
128
+ when :','
129
+ create_token(:comma)
131
130
 
132
- when '-'
133
- if start_number?
131
+ when :-
132
+ if start_number?(char + @s.peek(2))
134
133
  @s.reconsume
135
134
  consume_numeric
136
- elsif start_identifier?
135
+ elsif start_identifier?(char + @s.peek(2))
137
136
  @s.reconsume
138
137
  consume_ident
139
138
  elsif @s.peek(2) == '->'
140
139
  @s.consume
141
140
  @s.consume
142
- token(:cdc)
141
+ create_token(:cdc)
143
142
  else
144
- token(:delim, :value => char)
143
+ create_token(:delim, :value => char)
145
144
  end
146
145
 
147
- when '.'
148
- if start_number?
146
+ when :'.'
147
+ if start_number?(char + @s.peek(2))
149
148
  @s.reconsume
150
149
  consume_numeric
151
150
  else
152
- token(:delim, :value => char)
151
+ create_token(:delim, :value => char)
153
152
  end
154
153
 
155
- when '/'
154
+ when :/
156
155
  if @s.peek == '*'
157
156
  @s.consume
158
157
 
159
158
  if text = @s.scan_until(RE_COMMENT_CLOSE)
160
159
  text.slice!(-2, 2)
161
160
  else
162
- text = @s.rest
161
+ text = @s.consume_rest
163
162
  end
164
163
 
165
164
  if @options[:preserve_comments]
166
- token(:comment, :value => text)
165
+ create_token(:comment, :value => text)
167
166
  else
168
167
  consume
169
168
  end
170
169
  else
171
- token(:delim, :value => char)
170
+ create_token(:delim, :value => char)
172
171
  end
173
172
 
174
- when ':'
175
- token(:colon)
173
+ when :':'
174
+ create_token(:colon)
176
175
 
177
- when ';'
178
- token(:semicolon)
176
+ when :';'
177
+ create_token(:semicolon)
179
178
 
180
- when '<'
179
+ when :<
181
180
  if @s.peek(3) == '!--'
182
181
  @s.consume
183
182
  @s.consume
184
183
  @s.consume
185
184
 
186
- token(:cdo)
185
+ create_token(:cdo)
187
186
  else
188
- token(:delim, :value => char)
187
+ create_token(:delim, :value => char)
189
188
  end
190
189
 
191
- when '@'
190
+ when :'@'
192
191
  if start_identifier?
193
- token(:at_keyword, :value => consume_name)
192
+ create_token(:at_keyword, :value => consume_name)
194
193
  else
195
- token(:delim, :value => char)
194
+ create_token(:delim, :value => char)
196
195
  end
197
196
 
198
- when '['
199
- token(:'[')
197
+ when :'['
198
+ create_token(:'[')
200
199
 
201
- when '\\'
200
+ when :'\\'
202
201
  if valid_escape?(char + @s.peek)
203
202
  @s.reconsume
204
203
  consume_ident
205
204
  else
206
- token(:delim,
205
+ create_token(:delim,
207
206
  :error => true,
208
207
  :value => char)
209
208
  end
210
209
 
211
- when ']'
212
- token(:']')
210
+ when :']'
211
+ create_token(:']')
213
212
 
214
- when '^'
213
+ when :'^'
215
214
  if @s.peek == '='
216
215
  @s.consume
217
- token(:prefix_match)
216
+ create_token(:prefix_match)
218
217
  else
219
- token(:delim, :value => char)
218
+ create_token(:delim, :value => char)
220
219
  end
221
220
 
222
- when '{'
223
- token(:'{')
221
+ when :'{'
222
+ create_token(:'{')
224
223
 
225
- when '}'
226
- token(:'}')
227
-
228
- when RE_DIGIT
229
- @s.reconsume
230
- consume_numeric
224
+ when :'}'
225
+ create_token(:'}')
231
226
 
232
- when 'U', 'u'
227
+ when :U, :u
233
228
  if @s.peek(2) =~ RE_UNICODE_RANGE_START
234
229
  @s.consume
235
230
  consume_unicode_range
@@ -238,58 +233,68 @@ module Crass
238
233
  consume_ident
239
234
  end
240
235
 
241
- when RE_NAME_START
242
- @s.reconsume
243
- consume_ident
244
-
245
- when '|'
236
+ when :|
246
237
  case @s.peek
247
238
  when '='
248
239
  @s.consume
249
- token(:dash_match)
240
+ create_token(:dash_match)
250
241
 
251
242
  when '|'
252
243
  @s.consume
253
- token(:column)
244
+ create_token(:column)
254
245
 
255
246
  else
256
- token(:delim, :value => char)
247
+ create_token(:delim, :value => char)
257
248
  end
258
249
 
259
- when '~'
250
+ when :~
260
251
  if @s.peek == '='
261
252
  @s.consume
262
- token(:include_match)
253
+ create_token(:include_match)
263
254
  else
264
- token(:delim, :value => char)
255
+ create_token(:delim, :value => char)
265
256
  end
266
257
 
267
258
  else
268
- token(:delim, :value => char)
259
+ case char
260
+ when RE_DIGIT
261
+ @s.reconsume
262
+ consume_numeric
263
+
264
+ when RE_NAME_START
265
+ @s.reconsume
266
+ consume_ident
267
+
268
+ else
269
+ create_token(:delim, :value => char)
270
+ end
269
271
  end
270
272
  end
271
273
 
272
274
  # Consumes the remnants of a bad URL and returns the consumed text.
273
275
  #
274
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-the-remnants-of-a-bad-url0
276
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-the-remnants-of-a-bad-url
275
277
  def consume_bad_url
276
278
  text = ''
277
279
 
278
- while true
279
- return text if @s.eos?
280
-
281
- if valid_escape?
280
+ until @s.eos?
281
+ if valid_escape?(@s.current + @s.peek)
282
+ text << consume_escaped
283
+ elsif valid_escape?
284
+ @s.consume
282
285
  text << consume_escaped
283
286
  else
284
287
  char = @s.consume
285
288
 
286
289
  if char == ')'
287
- return text
290
+ break
288
291
  else
289
292
  text << char
290
293
  end
291
294
  end
292
295
  end
296
+
297
+ text
293
298
  end
294
299
 
295
300
  # Consumes an escaped code point and returns its unescaped value.
@@ -325,33 +330,39 @@ module Crass
325
330
 
326
331
  # Consumes an ident-like token and returns it.
327
332
  #
328
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-an-ident-like-token0
333
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-an-ident-like-token
329
334
  def consume_ident
330
335
  value = consume_name
331
336
 
332
- if value.downcase == 'url' && @s.peek == '('
337
+ if @s.peek == '('
333
338
  @s.consume
334
- consume_url
335
- elsif @s.peek == '('
336
- @s.consume
337
- token(:function, :value => value)
339
+
340
+ if value.downcase == 'url'
341
+ consume_url
342
+ else
343
+ create_token(:function, :value => value)
344
+ end
338
345
  else
339
- token(:ident, :value => value)
346
+ create_token(:ident, :value => value)
340
347
  end
341
348
  end
342
349
 
343
350
  # Consumes a name and returns it.
344
351
  #
345
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-name0
352
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-name
346
353
  def consume_name
347
354
  result = ''
348
355
 
349
- while char = @s.peek
350
- if char =~ RE_NAME
351
- result << @s.consume
356
+ while true
357
+ if match = @s.scan(RE_NAME)
358
+ result << match
359
+ next
360
+ end
352
361
 
353
- elsif char == '\\' && valid_escape?
354
- result << @s.consume
362
+ char = @s.peek
363
+
364
+ if char == '\\' && valid_escape?
365
+ @s.consume
355
366
  result << consume_escaped
356
367
 
357
368
  # NON-STANDARD: IE * hack
@@ -391,12 +402,12 @@ module Crass
391
402
 
392
403
  # Consumes a numeric token and returns it.
393
404
  #
394
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-numeric-token0
405
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-numeric-token
395
406
  def consume_numeric
396
407
  number = consume_number
397
408
 
398
409
  if start_identifier?
399
- token(:dimension,
410
+ create_token(:dimension,
400
411
  :repr => number[0],
401
412
  :type => number[2],
402
413
  :unit => consume_name,
@@ -405,12 +416,13 @@ module Crass
405
416
  elsif @s.peek == '%'
406
417
  @s.consume
407
418
 
408
- token(:percentage,
419
+ create_token(:percentage,
409
420
  :repr => number[0],
421
+ :type => number[2],
410
422
  :value => number[1])
411
423
 
412
424
  else
413
- token(:number,
425
+ create_token(:number,
414
426
  :repr => number[0],
415
427
  :type => number[2],
416
428
  :value => number[1])
@@ -420,16 +432,18 @@ module Crass
420
432
  # Consumes a string token that ends at the given character, and returns the
421
433
  # token.
422
434
  #
423
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-string-token0
435
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-string-token
424
436
  def consume_string(ending)
425
437
  value = ''
426
438
 
427
- while char = @s.consume
428
- case char
429
- when ending then break
439
+ until @s.eos?
440
+ case char = @s.consume
441
+ when ending
442
+ break
430
443
 
431
444
  when "\n"
432
- return token(:bad_string,
445
+ @s.reconsume
446
+ return create_token(:bad_string,
433
447
  :error => true,
434
448
  :value => value)
435
449
 
@@ -443,7 +457,7 @@ module Crass
443
457
  @s.consume
444
458
 
445
459
  else
446
- value += consume_escaped
460
+ value << consume_escaped
447
461
  end
448
462
 
449
463
  else
@@ -451,15 +465,15 @@ module Crass
451
465
  end
452
466
  end
453
467
 
454
- token(:string, :value => value)
468
+ create_token(:string, :value => value)
455
469
  end
456
470
 
457
471
  # Consumes a Unicode range token and returns it. Assumes the initial "u+" or
458
472
  # "U+" has already been consumed.
459
473
  #
460
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-unicode-range-token0
474
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-unicode-range-token
461
475
  def consume_unicode_range
462
- value = @s.scan(RE_HEX)
476
+ value = @s.scan(RE_HEX) || ''
463
477
 
464
478
  while value.length < 6
465
479
  break unless @s.peek == '?'
@@ -471,37 +485,43 @@ module Crass
471
485
  if value.include?('?')
472
486
  range[:start] = value.gsub('?', '0').hex
473
487
  range[:end] = value.gsub('?', 'F').hex
474
- return token(:unicode_range, range)
488
+ return create_token(:unicode_range, range)
475
489
  end
476
490
 
477
491
  range[:start] = value.hex
478
492
 
479
493
  if @s.peek(2) =~ RE_UNICODE_RANGE_END
480
- range[:value] << @s.consume << end_value = @s.scan(RE_HEX)
481
- range[:end] = end_value.hex
494
+ @s.consume
495
+ range[:end] = (@s.scan(RE_HEX) || '').hex
482
496
  else
483
497
  range[:end] = range[:start]
484
498
  end
485
499
 
486
- token(:unicode_range, range)
500
+ create_token(:unicode_range, range)
487
501
  end
488
502
 
489
503
  # Consumes a URL token and returns it. Assumes the original "url(" has
490
504
  # already been consumed.
491
505
  #
492
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-url-token0
506
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-url-token
493
507
  def consume_url
494
508
  value = ''
495
509
 
496
510
  @s.scan(RE_WHITESPACE)
497
- return token(:url, :value => value) if @s.eos?
511
+
512
+ if @s.eos?
513
+ return create_token(:url, :value => value)
514
+ end
498
515
 
499
516
  # Quoted URL.
500
- if @s.peek =~ RE_URL_QUOTE
517
+ next_char = @s.peek
518
+
519
+ if next_char == "'" || next_char == '"'
501
520
  string = consume_string(@s.consume)
502
521
 
503
522
  if string[:node] == :bad_string
504
- return token(:bad_url, :value => string[:value] + consume_bad_url)
523
+ return create_token(:bad_url,
524
+ :value => string[:value] + consume_bad_url)
505
525
  end
506
526
 
507
527
  value = string[:value]
@@ -509,16 +529,17 @@ module Crass
509
529
 
510
530
  if @s.eos? || @s.peek == ')'
511
531
  @s.consume
512
- return token(:url, :value => value)
532
+ return create_token(:url, :value => value)
513
533
  else
514
- return token(:bad_url, :value => value + consume_bad_url)
534
+ return create_token(:bad_url, :value => value + consume_bad_url)
515
535
  end
516
536
  end
517
537
 
518
538
  # Unquoted URL.
519
- while !@s.eos?
539
+ until @s.eos?
520
540
  case char = @s.consume
521
- when ')' then break
541
+ when ')'
542
+ break
522
543
 
523
544
  when RE_WHITESPACE
524
545
  @s.scan(RE_WHITESPACE)
@@ -527,19 +548,19 @@ module Crass
527
548
  @s.consume
528
549
  break
529
550
  else
530
- return token(:bad_url, :value => value + consume_bad_url)
551
+ return create_token(:bad_url, :value => value + consume_bad_url)
531
552
  end
532
553
 
533
554
  when '"', "'", '(', RE_NON_PRINTABLE
534
- return token(:bad_url,
555
+ return create_token(:bad_url,
535
556
  :error => true,
536
557
  :value => value + consume_bad_url)
537
558
 
538
559
  when '\\'
539
- if valid_escape?
560
+ if valid_escape?(char + @s.peek)
540
561
  value << consume_escaped
541
562
  else
542
- return token(:bad_url,
563
+ return create_token(:bad_url,
543
564
  :error => true,
544
565
  :value => value + consume_bad_url
545
566
  )
@@ -550,7 +571,7 @@ module Crass
550
571
  end
551
572
  end
552
573
 
553
- token(:url, :value => value)
574
+ create_token(:url, :value => value)
554
575
  end
555
576
 
556
577
  # Converts a valid CSS number string into a number and returns the number.
@@ -571,6 +592,15 @@ module Crass
571
592
  s * (i + f * 10**-d) * 10**(t * e)
572
593
  end
573
594
 
595
+ # Creates and returns a new token with the given _properties_.
596
+ def create_token(type, properties = {})
597
+ {
598
+ :node => type,
599
+ :pos => @s.marker,
600
+ :raw => @s.marked
601
+ }.merge!(properties)
602
+ end
603
+
574
604
  # Preprocesses _input_ to prepare it for the tokenizer.
575
605
  #
576
606
  # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#input-preprocessing
@@ -630,25 +660,14 @@ module Crass
630
660
  end
631
661
  end
632
662
 
633
- # Creates and returns a new token with the given _properties_.
634
- def token(type, properties = {})
635
- {
636
- :node => type,
637
- :pos => @s.marker,
638
- :raw => @s.marked
639
- }.merge!(properties)
640
- end
641
-
642
663
  # Tokenizes the input stream and returns an array of tokens.
643
664
  def tokenize
644
665
  @s.reset
645
666
 
646
667
  tokens = []
647
- token = consume
648
668
 
649
- while token && token[:node] != :eof
669
+ while token = consume
650
670
  tokens << token
651
- token = consume
652
671
  end
653
672
 
654
673
  tokens