crass 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/crass/scanner.rb CHANGED
@@ -1,4 +1,5 @@
1
1
  # encoding: utf-8
2
+ require 'strscan'
2
3
 
3
4
  module Crass
4
5
 
@@ -17,13 +18,12 @@ module Crass
17
18
  # position, not a byte position, so it accounts for multi-byte characters.
18
19
  attr_accessor :pos
19
20
 
20
- # The string being scanned.
21
- attr_reader :string
22
-
23
21
  # Creates a Scanner instance for the given _input_ string or IO instance.
24
22
  def initialize(input)
25
- @string = input.is_a?(IO) ? input.read : input.to_s
26
- @chars = @string.chars.to_a
23
+ string = input.is_a?(IO) ? input.read : input.to_s
24
+
25
+ @chars = string.chars.to_a
26
+ @scanner = StringScanner.new(string)
27
27
 
28
28
  reset
29
29
  end
@@ -31,20 +31,20 @@ module Crass
31
31
  # Consumes the next character and returns it, advancing the pointer, or
32
32
  # an empty string if the end of the string has been reached.
33
33
  def consume
34
- @current = @chars[@pos] || ''
35
- @pos += 1 if @current
36
- @current
34
+ if @pos == @len
35
+ ''
36
+ else
37
+ @pos += 1
38
+ @current = @scanner.getch
39
+ end
37
40
  end
38
41
 
39
42
  # Consumes the rest of the string and returns it, advancing the pointer to
40
43
  # the end of the string. Returns an empty string is the end of the string
41
44
  # has already been reached.
42
45
  def consume_rest
43
- rest = @string[@pos..@len] || ''
44
- @current = rest[-1] || ''
45
- @pos = @len
46
-
47
- rest
46
+ @pos = @len
47
+ @scanner.rest
48
48
  end
49
49
 
50
50
  # Returns `true` if the end of the string has been reached, `false`
@@ -62,7 +62,7 @@ module Crass
62
62
  # Returns the substring between {#marker} and {#pos}, without altering the
63
63
  # pointer.
64
64
  def marked
65
- if result = @chars[@marker...@pos]
65
+ if result = @chars[@marker, @pos - @marker]
66
66
  result.join('')
67
67
  else
68
68
  ''
@@ -73,24 +73,21 @@ module Crass
73
73
  # doesn't consume them. The number of characters returned may be less than
74
74
  # _length_ if the end of the string is reached.
75
75
  def peek(length = 1)
76
- if result = @chars[@pos, length]
77
- result.join('')
78
- else
79
- ''
80
- end
76
+ @scanner.peek(length)
81
77
  end
82
78
 
83
79
  # Moves the pointer back one character without changing the value of
84
80
  # {#current}. The next call to {#consume} will re-consume the current
85
81
  # character.
86
82
  def reconsume
83
+ @scanner.unscan
87
84
  @pos -= 1 if @pos > 0
88
85
  end
89
86
 
90
87
  # Resets the pointer to the beginning of the string.
91
88
  def reset
92
89
  @current = nil
93
- @len = @string.length
90
+ @len = @chars.size
94
91
  @marker = 0
95
92
  @pos = 0
96
93
  end
@@ -99,29 +96,30 @@ module Crass
99
96
  # matched substring will be returned and the pointer will be advanced.
100
97
  # Otherwise, `nil` will be returned.
101
98
  def scan(pattern)
102
- match = pattern.match(@string, @pos)
103
- return nil if match.nil? || match.begin(0) != @pos
104
-
105
- @pos = match.end(0)
106
- @current = @chars[@pos - 1]
99
+ if match = @scanner.scan(pattern)
100
+ @pos += match.size
101
+ @current = @chars[@pos - 1]
102
+ end
107
103
 
108
- match[0]
104
+ match
109
105
  end
110
106
 
111
107
  # Scans the string until the _pattern_ is matched. Returns the substring up
112
108
  # to and including the end of the match, and advances the pointer. If there
113
109
  # is no match, `nil` is returned and the pointer is not advanced.
114
110
  def scan_until(pattern)
115
- start = @pos
116
- match = pattern.match(@string, @pos)
117
-
118
- return nil if match.nil?
119
-
120
- @pos = match.end(0)
121
- @current = @chars[@pos - 1]
111
+ if match = @scanner.scan_until(pattern)
112
+ @pos += match.size
113
+ @current = @chars[@pos - 1]
114
+ end
122
115
 
123
- @string[start...@pos]
116
+ match
124
117
  end
125
118
  end
126
119
 
120
+ # Returns the string being scanned.
121
+ def string
122
+ @scanner.string
123
+ end
124
+
127
125
  end
@@ -32,7 +32,6 @@ module Crass
32
32
 
33
33
  RE_UNICODE_RANGE_START = /\+(?:[0-9A-Fa-f]|\?)/
34
34
  RE_UNICODE_RANGE_END = /-[0-9A-Fa-f]/
35
- RE_URL_QUOTE = /["']/
36
35
  RE_WHITESPACE = /[\n\u0009\u0020]+/
37
36
 
38
37
  # -- Class Methods ---------------------------------------------------------
@@ -67,47 +66,47 @@ module Crass
67
66
  #
68
67
  # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-token0
69
68
  def consume
70
- return token(:eof) if @s.eos?
69
+ return nil if @s.eos?
71
70
 
72
71
  @s.mark
73
- return token(:whitespace) if @s.scan(RE_WHITESPACE)
72
+ return create_token(:whitespace) if @s.scan(RE_WHITESPACE)
74
73
 
75
- case char = @s.consume
76
- when '"'
74
+ char = @s.consume
75
+
76
+ case char.to_sym
77
+ when :'"'
77
78
  consume_string('"')
78
79
 
79
- when '#'
80
+ when :'#'
80
81
  if @s.peek =~ RE_NAME || valid_escape?
81
- value = consume_name
82
-
83
- token(:hash,
82
+ create_token(:hash,
84
83
  :type => start_identifier? ? :id : :unrestricted,
85
- :value => value)
84
+ :value => consume_name)
86
85
  else
87
- token(:delim, :value => char)
86
+ create_token(:delim, :value => char)
88
87
  end
89
88
 
90
- when '$'
89
+ when :'$'
91
90
  if @s.peek == '='
92
91
  @s.consume
93
- token(:suffix_match)
92
+ create_token(:suffix_match)
94
93
  else
95
- token(:delim, :value => char)
94
+ create_token(:delim, :value => char)
96
95
  end
97
96
 
98
- when "'"
97
+ when :"'"
99
98
  consume_string("'")
100
99
 
101
- when '('
102
- token(:'(')
100
+ when :'('
101
+ create_token(:'(')
103
102
 
104
- when ')'
105
- token(:')')
103
+ when :')'
104
+ create_token(:')')
106
105
 
107
- when '*'
106
+ when :*
108
107
  if @s.peek == '='
109
108
  @s.consume
110
- token(:substring_match)
109
+ create_token(:substring_match)
111
110
 
112
111
  elsif @options[:preserve_hacks] && @s.peek =~ RE_NAME_START
113
112
  # NON-STANDARD: IE * hack
@@ -115,121 +114,117 @@ module Crass
115
114
  consume_ident
116
115
 
117
116
  else
118
- token(:delim, :value => char)
117
+ create_token(:delim, :value => char)
119
118
  end
120
119
 
121
- when '+'
122
- if start_number?
120
+ when :+
121
+ if start_number?(char + @s.peek(2))
123
122
  @s.reconsume
124
123
  consume_numeric
125
124
  else
126
- token(:delim, :value => char)
125
+ create_token(:delim, :value => char)
127
126
  end
128
127
 
129
- when ','
130
- token(:comma)
128
+ when :','
129
+ create_token(:comma)
131
130
 
132
- when '-'
133
- if start_number?
131
+ when :-
132
+ if start_number?(char + @s.peek(2))
134
133
  @s.reconsume
135
134
  consume_numeric
136
- elsif start_identifier?
135
+ elsif start_identifier?(char + @s.peek(2))
137
136
  @s.reconsume
138
137
  consume_ident
139
138
  elsif @s.peek(2) == '->'
140
139
  @s.consume
141
140
  @s.consume
142
- token(:cdc)
141
+ create_token(:cdc)
143
142
  else
144
- token(:delim, :value => char)
143
+ create_token(:delim, :value => char)
145
144
  end
146
145
 
147
- when '.'
148
- if start_number?
146
+ when :'.'
147
+ if start_number?(char + @s.peek(2))
149
148
  @s.reconsume
150
149
  consume_numeric
151
150
  else
152
- token(:delim, :value => char)
151
+ create_token(:delim, :value => char)
153
152
  end
154
153
 
155
- when '/'
154
+ when :/
156
155
  if @s.peek == '*'
157
156
  @s.consume
158
157
 
159
158
  if text = @s.scan_until(RE_COMMENT_CLOSE)
160
159
  text.slice!(-2, 2)
161
160
  else
162
- text = @s.rest
161
+ text = @s.consume_rest
163
162
  end
164
163
 
165
164
  if @options[:preserve_comments]
166
- token(:comment, :value => text)
165
+ create_token(:comment, :value => text)
167
166
  else
168
167
  consume
169
168
  end
170
169
  else
171
- token(:delim, :value => char)
170
+ create_token(:delim, :value => char)
172
171
  end
173
172
 
174
- when ':'
175
- token(:colon)
173
+ when :':'
174
+ create_token(:colon)
176
175
 
177
- when ';'
178
- token(:semicolon)
176
+ when :';'
177
+ create_token(:semicolon)
179
178
 
180
- when '<'
179
+ when :<
181
180
  if @s.peek(3) == '!--'
182
181
  @s.consume
183
182
  @s.consume
184
183
  @s.consume
185
184
 
186
- token(:cdo)
185
+ create_token(:cdo)
187
186
  else
188
- token(:delim, :value => char)
187
+ create_token(:delim, :value => char)
189
188
  end
190
189
 
191
- when '@'
190
+ when :'@'
192
191
  if start_identifier?
193
- token(:at_keyword, :value => consume_name)
192
+ create_token(:at_keyword, :value => consume_name)
194
193
  else
195
- token(:delim, :value => char)
194
+ create_token(:delim, :value => char)
196
195
  end
197
196
 
198
- when '['
199
- token(:'[')
197
+ when :'['
198
+ create_token(:'[')
200
199
 
201
- when '\\'
200
+ when :'\\'
202
201
  if valid_escape?(char + @s.peek)
203
202
  @s.reconsume
204
203
  consume_ident
205
204
  else
206
- token(:delim,
205
+ create_token(:delim,
207
206
  :error => true,
208
207
  :value => char)
209
208
  end
210
209
 
211
- when ']'
212
- token(:']')
210
+ when :']'
211
+ create_token(:']')
213
212
 
214
- when '^'
213
+ when :'^'
215
214
  if @s.peek == '='
216
215
  @s.consume
217
- token(:prefix_match)
216
+ create_token(:prefix_match)
218
217
  else
219
- token(:delim, :value => char)
218
+ create_token(:delim, :value => char)
220
219
  end
221
220
 
222
- when '{'
223
- token(:'{')
221
+ when :'{'
222
+ create_token(:'{')
224
223
 
225
- when '}'
226
- token(:'}')
227
-
228
- when RE_DIGIT
229
- @s.reconsume
230
- consume_numeric
224
+ when :'}'
225
+ create_token(:'}')
231
226
 
232
- when 'U', 'u'
227
+ when :U, :u
233
228
  if @s.peek(2) =~ RE_UNICODE_RANGE_START
234
229
  @s.consume
235
230
  consume_unicode_range
@@ -238,58 +233,68 @@ module Crass
238
233
  consume_ident
239
234
  end
240
235
 
241
- when RE_NAME_START
242
- @s.reconsume
243
- consume_ident
244
-
245
- when '|'
236
+ when :|
246
237
  case @s.peek
247
238
  when '='
248
239
  @s.consume
249
- token(:dash_match)
240
+ create_token(:dash_match)
250
241
 
251
242
  when '|'
252
243
  @s.consume
253
- token(:column)
244
+ create_token(:column)
254
245
 
255
246
  else
256
- token(:delim, :value => char)
247
+ create_token(:delim, :value => char)
257
248
  end
258
249
 
259
- when '~'
250
+ when :~
260
251
  if @s.peek == '='
261
252
  @s.consume
262
- token(:include_match)
253
+ create_token(:include_match)
263
254
  else
264
- token(:delim, :value => char)
255
+ create_token(:delim, :value => char)
265
256
  end
266
257
 
267
258
  else
268
- token(:delim, :value => char)
259
+ case char
260
+ when RE_DIGIT
261
+ @s.reconsume
262
+ consume_numeric
263
+
264
+ when RE_NAME_START
265
+ @s.reconsume
266
+ consume_ident
267
+
268
+ else
269
+ create_token(:delim, :value => char)
270
+ end
269
271
  end
270
272
  end
271
273
 
272
274
  # Consumes the remnants of a bad URL and returns the consumed text.
273
275
  #
274
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-the-remnants-of-a-bad-url0
276
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-the-remnants-of-a-bad-url
275
277
  def consume_bad_url
276
278
  text = ''
277
279
 
278
- while true
279
- return text if @s.eos?
280
-
281
- if valid_escape?
280
+ until @s.eos?
281
+ if valid_escape?(@s.current + @s.peek)
282
+ text << consume_escaped
283
+ elsif valid_escape?
284
+ @s.consume
282
285
  text << consume_escaped
283
286
  else
284
287
  char = @s.consume
285
288
 
286
289
  if char == ')'
287
- return text
290
+ break
288
291
  else
289
292
  text << char
290
293
  end
291
294
  end
292
295
  end
296
+
297
+ text
293
298
  end
294
299
 
295
300
  # Consumes an escaped code point and returns its unescaped value.
@@ -325,33 +330,39 @@ module Crass
325
330
 
326
331
  # Consumes an ident-like token and returns it.
327
332
  #
328
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-an-ident-like-token0
333
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-an-ident-like-token
329
334
  def consume_ident
330
335
  value = consume_name
331
336
 
332
- if value.downcase == 'url' && @s.peek == '('
337
+ if @s.peek == '('
333
338
  @s.consume
334
- consume_url
335
- elsif @s.peek == '('
336
- @s.consume
337
- token(:function, :value => value)
339
+
340
+ if value.downcase == 'url'
341
+ consume_url
342
+ else
343
+ create_token(:function, :value => value)
344
+ end
338
345
  else
339
- token(:ident, :value => value)
346
+ create_token(:ident, :value => value)
340
347
  end
341
348
  end
342
349
 
343
350
  # Consumes a name and returns it.
344
351
  #
345
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-name0
352
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-name
346
353
  def consume_name
347
354
  result = ''
348
355
 
349
- while char = @s.peek
350
- if char =~ RE_NAME
351
- result << @s.consume
356
+ while true
357
+ if match = @s.scan(RE_NAME)
358
+ result << match
359
+ next
360
+ end
352
361
 
353
- elsif char == '\\' && valid_escape?
354
- result << @s.consume
362
+ char = @s.peek
363
+
364
+ if char == '\\' && valid_escape?
365
+ @s.consume
355
366
  result << consume_escaped
356
367
 
357
368
  # NON-STANDARD: IE * hack
@@ -391,12 +402,12 @@ module Crass
391
402
 
392
403
  # Consumes a numeric token and returns it.
393
404
  #
394
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-numeric-token0
405
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-numeric-token
395
406
  def consume_numeric
396
407
  number = consume_number
397
408
 
398
409
  if start_identifier?
399
- token(:dimension,
410
+ create_token(:dimension,
400
411
  :repr => number[0],
401
412
  :type => number[2],
402
413
  :unit => consume_name,
@@ -405,12 +416,13 @@ module Crass
405
416
  elsif @s.peek == '%'
406
417
  @s.consume
407
418
 
408
- token(:percentage,
419
+ create_token(:percentage,
409
420
  :repr => number[0],
421
+ :type => number[2],
410
422
  :value => number[1])
411
423
 
412
424
  else
413
- token(:number,
425
+ create_token(:number,
414
426
  :repr => number[0],
415
427
  :type => number[2],
416
428
  :value => number[1])
@@ -420,16 +432,18 @@ module Crass
420
432
  # Consumes a string token that ends at the given character, and returns the
421
433
  # token.
422
434
  #
423
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-string-token0
435
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-string-token
424
436
  def consume_string(ending)
425
437
  value = ''
426
438
 
427
- while char = @s.consume
428
- case char
429
- when ending then break
439
+ until @s.eos?
440
+ case char = @s.consume
441
+ when ending
442
+ break
430
443
 
431
444
  when "\n"
432
- return token(:bad_string,
445
+ @s.reconsume
446
+ return create_token(:bad_string,
433
447
  :error => true,
434
448
  :value => value)
435
449
 
@@ -443,7 +457,7 @@ module Crass
443
457
  @s.consume
444
458
 
445
459
  else
446
- value += consume_escaped
460
+ value << consume_escaped
447
461
  end
448
462
 
449
463
  else
@@ -451,15 +465,15 @@ module Crass
451
465
  end
452
466
  end
453
467
 
454
- token(:string, :value => value)
468
+ create_token(:string, :value => value)
455
469
  end
456
470
 
457
471
  # Consumes a Unicode range token and returns it. Assumes the initial "u+" or
458
472
  # "U+" has already been consumed.
459
473
  #
460
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-unicode-range-token0
474
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-unicode-range-token
461
475
  def consume_unicode_range
462
- value = @s.scan(RE_HEX)
476
+ value = @s.scan(RE_HEX) || ''
463
477
 
464
478
  while value.length < 6
465
479
  break unless @s.peek == '?'
@@ -471,37 +485,43 @@ module Crass
471
485
  if value.include?('?')
472
486
  range[:start] = value.gsub('?', '0').hex
473
487
  range[:end] = value.gsub('?', 'F').hex
474
- return token(:unicode_range, range)
488
+ return create_token(:unicode_range, range)
475
489
  end
476
490
 
477
491
  range[:start] = value.hex
478
492
 
479
493
  if @s.peek(2) =~ RE_UNICODE_RANGE_END
480
- range[:value] << @s.consume << end_value = @s.scan(RE_HEX)
481
- range[:end] = end_value.hex
494
+ @s.consume
495
+ range[:end] = (@s.scan(RE_HEX) || '').hex
482
496
  else
483
497
  range[:end] = range[:start]
484
498
  end
485
499
 
486
- token(:unicode_range, range)
500
+ create_token(:unicode_range, range)
487
501
  end
488
502
 
489
503
  # Consumes a URL token and returns it. Assumes the original "url(" has
490
504
  # already been consumed.
491
505
  #
492
- # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-url-token0
506
+ # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#consume-a-url-token
493
507
  def consume_url
494
508
  value = ''
495
509
 
496
510
  @s.scan(RE_WHITESPACE)
497
- return token(:url, :value => value) if @s.eos?
511
+
512
+ if @s.eos?
513
+ return create_token(:url, :value => value)
514
+ end
498
515
 
499
516
  # Quoted URL.
500
- if @s.peek =~ RE_URL_QUOTE
517
+ next_char = @s.peek
518
+
519
+ if next_char == "'" || next_char == '"'
501
520
  string = consume_string(@s.consume)
502
521
 
503
522
  if string[:node] == :bad_string
504
- return token(:bad_url, :value => string[:value] + consume_bad_url)
523
+ return create_token(:bad_url,
524
+ :value => string[:value] + consume_bad_url)
505
525
  end
506
526
 
507
527
  value = string[:value]
@@ -509,16 +529,17 @@ module Crass
509
529
 
510
530
  if @s.eos? || @s.peek == ')'
511
531
  @s.consume
512
- return token(:url, :value => value)
532
+ return create_token(:url, :value => value)
513
533
  else
514
- return token(:bad_url, :value => value + consume_bad_url)
534
+ return create_token(:bad_url, :value => value + consume_bad_url)
515
535
  end
516
536
  end
517
537
 
518
538
  # Unquoted URL.
519
- while !@s.eos?
539
+ until @s.eos?
520
540
  case char = @s.consume
521
- when ')' then break
541
+ when ')'
542
+ break
522
543
 
523
544
  when RE_WHITESPACE
524
545
  @s.scan(RE_WHITESPACE)
@@ -527,19 +548,19 @@ module Crass
527
548
  @s.consume
528
549
  break
529
550
  else
530
- return token(:bad_url, :value => value + consume_bad_url)
551
+ return create_token(:bad_url, :value => value + consume_bad_url)
531
552
  end
532
553
 
533
554
  when '"', "'", '(', RE_NON_PRINTABLE
534
- return token(:bad_url,
555
+ return create_token(:bad_url,
535
556
  :error => true,
536
557
  :value => value + consume_bad_url)
537
558
 
538
559
  when '\\'
539
- if valid_escape?
560
+ if valid_escape?(char + @s.peek)
540
561
  value << consume_escaped
541
562
  else
542
- return token(:bad_url,
563
+ return create_token(:bad_url,
543
564
  :error => true,
544
565
  :value => value + consume_bad_url
545
566
  )
@@ -550,7 +571,7 @@ module Crass
550
571
  end
551
572
  end
552
573
 
553
- token(:url, :value => value)
574
+ create_token(:url, :value => value)
554
575
  end
555
576
 
556
577
  # Converts a valid CSS number string into a number and returns the number.
@@ -571,6 +592,15 @@ module Crass
571
592
  s * (i + f * 10**-d) * 10**(t * e)
572
593
  end
573
594
 
595
+ # Creates and returns a new token with the given _properties_.
596
+ def create_token(type, properties = {})
597
+ {
598
+ :node => type,
599
+ :pos => @s.marker,
600
+ :raw => @s.marked
601
+ }.merge!(properties)
602
+ end
603
+
574
604
  # Preprocesses _input_ to prepare it for the tokenizer.
575
605
  #
576
606
  # http://www.w3.org/TR/2013/WD-css-syntax-3-20130919/#input-preprocessing
@@ -630,25 +660,14 @@ module Crass
630
660
  end
631
661
  end
632
662
 
633
- # Creates and returns a new token with the given _properties_.
634
- def token(type, properties = {})
635
- {
636
- :node => type,
637
- :pos => @s.marker,
638
- :raw => @s.marked
639
- }.merge!(properties)
640
- end
641
-
642
663
  # Tokenizes the input stream and returns an array of tokens.
643
664
  def tokenize
644
665
  @s.reset
645
666
 
646
667
  tokens = []
647
- token = consume
648
668
 
649
- while token && token[:node] != :eof
669
+ while token = consume
650
670
  tokens << token
651
- token = consume
652
671
  end
653
672
 
654
673
  tokens