coderay 0.9.8 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (87) hide show
  1. data/{lib/README → README_INDEX.rdoc} +10 -21
  2. data/Rakefile +6 -6
  3. data/bin/coderay +193 -64
  4. data/lib/coderay.rb +61 -105
  5. data/lib/coderay/duo.rb +17 -21
  6. data/lib/coderay/encoder.rb +100 -112
  7. data/lib/coderay/encoders/_map.rb +12 -7
  8. data/lib/coderay/encoders/comment_filter.rb +12 -30
  9. data/lib/coderay/encoders/count.rb +29 -11
  10. data/lib/coderay/encoders/debug.rb +32 -20
  11. data/lib/coderay/encoders/div.rb +13 -9
  12. data/lib/coderay/encoders/filter.rb +34 -51
  13. data/lib/coderay/encoders/html.rb +155 -161
  14. data/lib/coderay/encoders/html/css.rb +4 -9
  15. data/lib/coderay/encoders/html/numbering.rb +115 -0
  16. data/lib/coderay/encoders/html/output.rb +22 -70
  17. data/lib/coderay/encoders/json.rb +59 -45
  18. data/lib/coderay/encoders/lines_of_code.rb +12 -57
  19. data/lib/coderay/encoders/null.rb +6 -14
  20. data/lib/coderay/encoders/page.rb +13 -9
  21. data/lib/coderay/encoders/span.rb +13 -9
  22. data/lib/coderay/encoders/statistic.rb +58 -39
  23. data/lib/coderay/encoders/terminal.rb +179 -0
  24. data/lib/coderay/encoders/text.rb +31 -17
  25. data/lib/coderay/encoders/token_kind_filter.rb +111 -0
  26. data/lib/coderay/encoders/xml.rb +19 -18
  27. data/lib/coderay/encoders/yaml.rb +37 -9
  28. data/lib/coderay/for_redcloth.rb +4 -4
  29. data/lib/coderay/helpers/file_type.rb +127 -246
  30. data/lib/coderay/helpers/gzip.rb +41 -0
  31. data/lib/coderay/helpers/plugin.rb +241 -306
  32. data/lib/coderay/helpers/word_list.rb +65 -126
  33. data/lib/coderay/scanner.rb +173 -156
  34. data/lib/coderay/scanners/_map.rb +18 -17
  35. data/lib/coderay/scanners/c.rb +63 -77
  36. data/lib/coderay/scanners/clojure.rb +217 -0
  37. data/lib/coderay/scanners/cpp.rb +71 -84
  38. data/lib/coderay/scanners/css.rb +103 -120
  39. data/lib/coderay/scanners/debug.rb +47 -44
  40. data/lib/coderay/scanners/delphi.rb +70 -76
  41. data/lib/coderay/scanners/diff.rb +141 -50
  42. data/lib/coderay/scanners/erb.rb +81 -0
  43. data/lib/coderay/scanners/groovy.rb +104 -113
  44. data/lib/coderay/scanners/haml.rb +168 -0
  45. data/lib/coderay/scanners/html.rb +181 -110
  46. data/lib/coderay/scanners/java.rb +73 -75
  47. data/lib/coderay/scanners/java/builtin_types.rb +2 -0
  48. data/lib/coderay/scanners/java_script.rb +90 -101
  49. data/lib/coderay/scanners/json.rb +40 -53
  50. data/lib/coderay/scanners/php.rb +123 -147
  51. data/lib/coderay/scanners/python.rb +93 -91
  52. data/lib/coderay/scanners/raydebug.rb +66 -0
  53. data/lib/coderay/scanners/ruby.rb +343 -326
  54. data/lib/coderay/scanners/ruby/patterns.rb +40 -106
  55. data/lib/coderay/scanners/ruby/string_state.rb +71 -0
  56. data/lib/coderay/scanners/sql.rb +80 -66
  57. data/lib/coderay/scanners/text.rb +26 -0
  58. data/lib/coderay/scanners/xml.rb +1 -1
  59. data/lib/coderay/scanners/yaml.rb +74 -73
  60. data/lib/coderay/style.rb +10 -7
  61. data/lib/coderay/styles/_map.rb +3 -3
  62. data/lib/coderay/styles/alpha.rb +143 -0
  63. data/lib/coderay/token_kinds.rb +90 -0
  64. data/lib/coderay/tokens.rb +102 -277
  65. data/lib/coderay/tokens_proxy.rb +55 -0
  66. data/lib/coderay/version.rb +3 -0
  67. data/test/functional/basic.rb +200 -18
  68. data/test/functional/examples.rb +130 -0
  69. data/test/functional/for_redcloth.rb +15 -8
  70. data/test/functional/suite.rb +9 -6
  71. metadata +103 -123
  72. data/FOLDERS +0 -53
  73. data/bin/coderay_stylesheet +0 -4
  74. data/lib/coderay/encoders/html/numerization.rb +0 -133
  75. data/lib/coderay/encoders/term.rb +0 -158
  76. data/lib/coderay/encoders/token_class_filter.rb +0 -84
  77. data/lib/coderay/helpers/gzip_simple.rb +0 -123
  78. data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
  79. data/lib/coderay/scanners/plaintext.rb +0 -20
  80. data/lib/coderay/scanners/rhtml.rb +0 -78
  81. data/lib/coderay/scanners/scheme.rb +0 -145
  82. data/lib/coderay/styles/cycnus.rb +0 -152
  83. data/lib/coderay/styles/murphy.rb +0 -134
  84. data/lib/coderay/token_classes.rb +0 -86
  85. data/test/functional/load_plugin_scanner.rb +0 -11
  86. data/test/functional/vhdl.rb +0 -126
  87. data/test/functional/word_list.rb +0 -79
@@ -3,14 +3,19 @@ module Scanners
3
3
 
4
4
  load :html
5
5
 
6
+ # Scanner for PHP.
7
+ #
6
8
  # Original by Stefan Walk.
7
9
  class PHP < Scanner
8
10
 
9
11
  register_for :php
10
12
  file_extension 'php'
13
+ encoding 'BINARY'
11
14
 
12
15
  KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
13
16
 
17
+ protected
18
+
14
19
  def setup
15
20
  @html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
16
21
  end
@@ -20,7 +25,7 @@ module Scanners
20
25
  @html_scanner.reset
21
26
  end
22
27
 
23
- module Words
28
+ module Words # :nodoc:
24
29
 
25
30
  # according to http://www.php.net/manual/en/reserved.keywords.php
26
31
  KEYWORDS = %w[
@@ -176,20 +181,20 @@ module Scanners
176
181
  $argc $argv
177
182
  ]
178
183
 
179
- IDENT_KIND = CaseIgnoringWordList.new(:ident).
180
- add(KEYWORDS, :reserved).
181
- add(TYPES, :pre_type).
182
- add(LANGUAGE_CONSTRUCTS, :reserved).
184
+ IDENT_KIND = WordList::CaseIgnoring.new(:ident).
185
+ add(KEYWORDS, :keyword).
186
+ add(TYPES, :predefined_type).
187
+ add(LANGUAGE_CONSTRUCTS, :keyword).
183
188
  add(BUILTIN_FUNCTIONS, :predefined).
184
- add(CLASSES, :pre_constant).
189
+ add(CLASSES, :predefined_constant).
185
190
  add(EXCEPTIONS, :exception).
186
- add(CONSTANTS, :pre_constant)
191
+ add(CONSTANTS, :predefined_constant)
187
192
 
188
193
  VARIABLE_KIND = WordList.new(:local_variable).
189
194
  add(PREDEFINED, :predefined)
190
195
  end
191
196
 
192
- module RE
197
+ module RE # :nodoc:
193
198
 
194
199
  PHP_START = /
195
200
  <script\s+[^>]*?language\s*=\s*"php"[^>]*?> |
@@ -224,17 +229,13 @@ module Scanners
224
229
 
225
230
  end
226
231
 
227
- def scan_tokens tokens, options
228
- if string.respond_to?(:encoding)
229
- unless string.encoding == Encoding::ASCII_8BIT
230
- self.string = string.encode Encoding::ASCII_8BIT,
231
- :invalid => :replace, :undef => :replace, :replace => '?'
232
- end
233
- end
232
+ protected
233
+
234
+ def scan_tokens encoder, options
234
235
 
235
236
  if check(RE::PHP_START) || # starts with <?
236
- (match?(/\s*<\S/) && exist?(RE::PHP_START)) || # starts with tag and contains <?
237
- exist?(RE::HTML_INDICATOR) ||
237
+ (match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <?
238
+ check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
238
239
  check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
239
240
  # is HTML with embedded PHP, so start with HTML
240
241
  states = [:initial]
@@ -252,29 +253,24 @@ module Scanners
252
253
 
253
254
  until eos?
254
255
 
255
- match = nil
256
- kind = nil
257
-
258
256
  case states.last
259
257
 
260
258
  when :initial # HTML
261
- if scan RE::PHP_START
262
- kind = :inline_delimiter
259
+ if match = scan(RE::PHP_START)
260
+ encoder.text_token match, :inline_delimiter
263
261
  label_expected = true
264
262
  states << :php
265
263
  else
266
264
  match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
267
265
  @html_scanner.tokenize match unless match.empty?
268
- next
269
266
  end
270
267
 
271
268
  when :php
272
269
  if match = scan(/\s+/)
273
- tokens << [match, :space]
274
- next
270
+ encoder.text_token match, :space
275
271
 
276
- elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
277
- kind = :comment
272
+ elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
273
+ encoder.text_token match, :comment
278
274
 
279
275
  elsif match = scan(RE::IDENTIFIER)
280
276
  kind = Words::IDENT_KIND[match]
@@ -285,7 +281,7 @@ module Scanners
285
281
  label_expected = false
286
282
  if kind == :ident && match =~ /^[A-Z]/
287
283
  kind = :constant
288
- elsif kind == :reserved
284
+ elsif kind == :keyword
289
285
  case match
290
286
  when 'class'
291
287
  states << :class_expected
@@ -299,77 +295,68 @@ module Scanners
299
295
  next
300
296
  end
301
297
  end
298
+ encoder.text_token match, kind
302
299
 
303
- elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
300
+ elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
304
301
  label_expected = false
305
- kind = :float
302
+ encoder.text_token match, :float
306
303
 
307
- elsif scan(/0x[0-9a-fA-F]+/)
304
+ elsif match = scan(/0x[0-9a-fA-F]+/)
308
305
  label_expected = false
309
- kind = :hex
306
+ encoder.text_token match, :hex
310
307
 
311
- elsif scan(/\d+/)
308
+ elsif match = scan(/\d+/)
312
309
  label_expected = false
313
- kind = :integer
314
-
315
- elsif scan(/'/)
316
- tokens << [:open, :string]
317
- if modifier
318
- tokens << [modifier, :modifier]
319
- modifier = nil
320
- end
321
- kind = :delimiter
322
- states.push :sqstring
310
+ encoder.text_token match, :integer
323
311
 
324
- elsif match = scan(/["`]/)
325
- tokens << [:open, :string]
312
+ elsif match = scan(/['"`]/)
313
+ encoder.begin_group :string
326
314
  if modifier
327
- tokens << [modifier, :modifier]
315
+ encoder.text_token modifier, :modifier
328
316
  modifier = nil
329
317
  end
330
318
  delimiter = match
331
- kind = :delimiter
332
- states.push :dqstring
319
+ encoder.text_token match, :delimiter
320
+ states.push match == "'" ? :sqstring : :dqstring
333
321
 
334
322
  elsif match = scan(RE::VARIABLE)
335
323
  label_expected = false
336
- kind = Words::VARIABLE_KIND[match]
324
+ encoder.text_token match, Words::VARIABLE_KIND[match]
337
325
 
338
- elsif scan(/\{/)
339
- kind = :operator
326
+ elsif match = scan(/\{/)
327
+ encoder.text_token match, :operator
340
328
  label_expected = true
341
329
  states.push :php
342
330
 
343
- elsif scan(/\}/)
331
+ elsif match = scan(/\}/)
344
332
  if states.size == 1
345
- kind = :error
333
+ encoder.text_token match, :error
346
334
  else
347
335
  states.pop
348
336
  if states.last.is_a?(::Array)
349
337
  delimiter = states.last[1]
350
338
  states[-1] = states.last[0]
351
- tokens << [matched, :delimiter]
352
- tokens << [:close, :inline]
353
- next
339
+ encoder.text_token match, :delimiter
340
+ encoder.end_group :inline
354
341
  else
355
- kind = :operator
342
+ encoder.text_token match, :operator
356
343
  label_expected = true
357
344
  end
358
345
  end
359
346
 
360
- elsif scan(/@/)
347
+ elsif match = scan(/@/)
361
348
  label_expected = false
362
- kind = :exception
349
+ encoder.text_token match, :exception
363
350
 
364
- elsif scan RE::PHP_END
365
- kind = :inline_delimiter
351
+ elsif match = scan(RE::PHP_END)
352
+ encoder.text_token match, :inline_delimiter
366
353
  states = [:initial]
367
354
 
368
355
  elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
369
- tokens << [:open, :string]
370
- warn 'heredoc in heredoc?' if heredoc_delimiter
356
+ encoder.begin_group :string
357
+ # warn 'heredoc in heredoc?' if heredoc_delimiter
371
358
  heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
372
- kind = :delimiter
359
+ encoder.text_token match, :delimiter
373
360
  states.push self[3] ? :sqstring : :dqstring
374
361
  heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
375
362
 
@@ -379,152 +366,141 @@ module Scanners
379
366
  label_expected = true if match == ':'
380
367
  case_expected = false
381
368
  end
382
- kind = :operator
369
+ encoder.text_token match, :operator
383
370
 
384
371
  else
385
- getch
386
- kind = :error
372
+ encoder.text_token getch, :error
387
373
 
388
374
  end
389
375
 
390
376
  when :sqstring
391
- if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
392
- kind = :content
393
- elsif !heredoc_delimiter && scan(/'/)
394
- tokens << [matched, :delimiter]
395
- tokens << [:close, :string]
377
+ if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
378
+ encoder.text_token match, :content
379
+ elsif !heredoc_delimiter && match = scan(/'/)
380
+ encoder.text_token match, :delimiter
381
+ encoder.end_group :string
396
382
  delimiter = nil
397
383
  label_expected = false
398
384
  states.pop
399
- next
400
385
  elsif heredoc_delimiter && match = scan(/\n/)
401
- kind = :content
402
386
  if scan heredoc_delimiter
403
- tokens << ["\n", :content]
404
- tokens << [matched, :delimiter]
405
- tokens << [:close, :string]
387
+ encoder.text_token "\n", :content
388
+ encoder.text_token matched, :delimiter
389
+ encoder.end_group :string
406
390
  heredoc_delimiter = nil
407
391
  label_expected = false
408
392
  states.pop
409
- next
393
+ else
394
+ encoder.text_token match, :content
410
395
  end
411
- elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
412
- kind = :char
413
- elsif scan(/\\./m)
414
- kind = :content
415
- elsif scan(/\\/)
416
- kind = :error
396
+ elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
397
+ encoder.text_token match, :char
398
+ elsif match = scan(/\\./m)
399
+ encoder.text_token match, :content
400
+ elsif match = scan(/\\/)
401
+ encoder.text_token match, :error
402
+ else
403
+ states.pop
417
404
  end
418
405
 
419
406
  when :dqstring
420
- if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
421
- kind = :content
422
- elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/)
423
- tokens << [matched, :delimiter]
424
- tokens << [:close, :string]
407
+ if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
408
+ encoder.text_token match, :content
409
+ elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/)
410
+ encoder.text_token match, :delimiter
411
+ encoder.end_group :string
425
412
  delimiter = nil
426
413
  label_expected = false
427
414
  states.pop
428
- next
429
415
  elsif heredoc_delimiter && match = scan(/\n/)
430
- kind = :content
431
416
  if scan heredoc_delimiter
432
- tokens << ["\n", :content]
433
- tokens << [matched, :delimiter]
434
- tokens << [:close, :string]
417
+ encoder.text_token "\n", :content
418
+ encoder.text_token matched, :delimiter
419
+ encoder.end_group :string
435
420
  heredoc_delimiter = nil
436
421
  label_expected = false
437
422
  states.pop
438
- next
423
+ else
424
+ encoder.text_token match, :content
439
425
  end
440
- elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
441
- kind = :char
442
- elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
443
- kind = :char
444
- elsif scan(/\\./m)
445
- kind = :content
446
- elsif scan(/\\/)
447
- kind = :error
426
+ elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
427
+ encoder.text_token match, :char
428
+ elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
429
+ encoder.text_token match, :char
430
+ elsif match = scan(/\\./m)
431
+ encoder.text_token match, :content
432
+ elsif match = scan(/\\/)
433
+ encoder.text_token match, :error
448
434
  elsif match = scan(/#{RE::VARIABLE}/o)
449
- kind = :local_variable
450
435
  if check(/\[#{RE::IDENTIFIER}\]/o)
451
- tokens << [:open, :inline]
452
- tokens << [match, :local_variable]
453
- tokens << [scan(/\[/), :operator]
454
- tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
455
- tokens << [scan(/\]/), :operator]
456
- tokens << [:close, :inline]
457
- next
436
+ encoder.begin_group :inline
437
+ encoder.text_token match, :local_variable
438
+ encoder.text_token scan(/\[/), :operator
439
+ encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
440
+ encoder.text_token scan(/\]/), :operator
441
+ encoder.end_group :inline
458
442
  elsif check(/\[/)
459
443
  match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
460
- kind = :error
444
+ encoder.text_token match, :error
461
445
  elsif check(/->#{RE::IDENTIFIER}/o)
462
- tokens << [:open, :inline]
463
- tokens << [match, :local_variable]
464
- tokens << [scan(/->/), :operator]
465
- tokens << [scan(/#{RE::IDENTIFIER}/o), :ident]
466
- tokens << [:close, :inline]
467
- next
446
+ encoder.begin_group :inline
447
+ encoder.text_token match, :local_variable
448
+ encoder.text_token scan(/->/), :operator
449
+ encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
450
+ encoder.end_group :inline
468
451
  elsif check(/->/)
469
452
  match << scan(/->/)
470
- kind = :error
453
+ encoder.text_token match, :error
454
+ else
455
+ encoder.text_token match, :local_variable
471
456
  end
472
457
  elsif match = scan(/\{/)
473
458
  if check(/\$/)
474
- kind = :delimiter
459
+ encoder.begin_group :inline
475
460
  states[-1] = [states.last, delimiter]
476
461
  delimiter = nil
477
462
  states.push :php
478
- tokens << [:open, :inline]
463
+ encoder.text_token match, :delimiter
479
464
  else
480
- kind = :string
465
+ encoder.text_token match, :content
481
466
  end
482
- elsif scan(/\$\{#{RE::IDENTIFIER}\}/o)
483
- kind = :local_variable
484
- elsif scan(/\$/)
485
- kind = :content
467
+ elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
468
+ encoder.text_token match, :local_variable
469
+ elsif match = scan(/\$/)
470
+ encoder.text_token match, :content
471
+ else
472
+ states.pop
486
473
  end
487
474
 
488
475
  when :class_expected
489
- if scan(/\s+/)
490
- kind = :space
476
+ if match = scan(/\s+/)
477
+ encoder.text_token match, :space
491
478
  elsif match = scan(/#{RE::IDENTIFIER}/o)
492
- kind = :class
479
+ encoder.text_token match, :class
493
480
  states.pop
494
481
  else
495
482
  states.pop
496
- next
497
483
  end
498
484
 
499
485
  when :function_expected
500
- if scan(/\s+/)
501
- kind = :space
502
- elsif scan(/&/)
503
- kind = :operator
486
+ if match = scan(/\s+/)
487
+ encoder.text_token match, :space
488
+ elsif match = scan(/&/)
489
+ encoder.text_token match, :operator
504
490
  elsif match = scan(/#{RE::IDENTIFIER}/o)
505
- kind = :function
491
+ encoder.text_token match, :function
506
492
  states.pop
507
493
  else
508
494
  states.pop
509
- next
510
495
  end
511
496
 
512
497
  else
513
- raise_inspect 'Unknown state!', tokens, states
498
+ raise_inspect 'Unknown state!', encoder, states
514
499
  end
515
500
 
516
- match ||= matched
517
- if $CODERAY_DEBUG and not kind
518
- raise_inspect 'Error token %p in line %d' %
519
- [[match, kind], line], tokens, states
520
- end
521
- raise_inspect 'Empty token', tokens, states unless match
522
-
523
- tokens << [match, kind]
524
-
525
501
  end
526
502
 
527
- tokens
503
+ encoder
528
504
  end
529
505
 
530
506
  end
@@ -1,12 +1,12 @@
1
1
  module CodeRay
2
2
  module Scanners
3
3
 
4
- # Bases on pygments' PythonLexer, see
4
+ # Scanner for Python. Supports Python 3.
5
+ #
6
+ # Based on pygments' PythonLexer, see
5
7
  # http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
6
8
  class Python < Scanner
7
9
 
8
- include Streamable
9
-
10
10
  register_for :python
11
11
  file_extension 'py'
12
12
 
@@ -16,11 +16,11 @@ module Scanners
16
16
  'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
17
17
  'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
18
18
  'nonlocal', # new in Python 3
19
- ]
19
+ ] # :nodoc:
20
20
 
21
21
  OLD_KEYWORDS = [
22
22
  'exec', 'print', # gone in Python 3
23
- ]
23
+ ] # :nodoc:
24
24
 
25
25
  PREDEFINED_METHODS_AND_TYPES = %w[
26
26
  __import__ abs all any apply basestring bin bool buffer
@@ -32,7 +32,7 @@ module Scanners
32
32
  raw_input reduce reload repr reversed round set setattr slice
33
33
  sorted staticmethod str sum super tuple type unichr unicode
34
34
  vars xrange zip
35
- ]
35
+ ] # :nodoc:
36
36
 
37
37
  PREDEFINED_EXCEPTIONS = %w[
38
38
  ArithmeticError AssertionError AttributeError
@@ -47,23 +47,23 @@ module Scanners
47
47
  TypeError UnboundLocalError UnicodeDecodeError
48
48
  UnicodeEncodeError UnicodeError UnicodeTranslateError
49
49
  UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
50
- ]
50
+ ] # :nodoc:
51
51
 
52
52
  PREDEFINED_VARIABLES_AND_CONSTANTS = [
53
- 'False', 'True', 'None', # "keywords" since Python 3
53
+ 'False', 'True', 'None', # "keywords" since Python 3
54
54
  'self', 'Ellipsis', 'NotImplemented',
55
- ]
55
+ ] # :nodoc:
56
56
 
57
57
  IDENT_KIND = WordList.new(:ident).
58
58
  add(KEYWORDS, :keyword).
59
59
  add(OLD_KEYWORDS, :old_keyword).
60
60
  add(PREDEFINED_METHODS_AND_TYPES, :predefined).
61
- add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant).
62
- add(PREDEFINED_EXCEPTIONS, :exception)
61
+ add(PREDEFINED_VARIABLES_AND_CONSTANTS, :predefined_constant).
62
+ add(PREDEFINED_EXCEPTIONS, :exception) # :nodoc:
63
63
 
64
- NAME = / [^\W\d] \w* /x
65
- ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
66
- UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x
64
+ NAME = / [^\W\d] \w* /x # :nodoc:
65
+ ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
66
+ UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x # :nodoc:
67
67
 
68
68
  OPERATOR = /
69
69
  \.\.\. | # ellipsis
@@ -73,95 +73,103 @@ module Scanners
73
73
  [-+*\/%&|^]=? | # ordinary math and binary logic
74
74
  [~`] | # binary complement and inspection
75
75
  <<=? | >>=? | [<>=]=? | != # comparison and assignment
76
- /x
76
+ /x # :nodoc:
77
77
 
78
- STRING_DELIMITER_REGEXP = Hash.new do |h, delimiter|
79
- h[delimiter] = Regexp.union delimiter
80
- end
78
+ STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter|
79
+ h[delimiter] = Regexp.union delimiter # :nodoc:
80
+ }
81
81
 
82
- STRING_CONTENT_REGEXP = Hash.new do |h, delimiter|
83
- h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x
84
- end
82
+ STRING_CONTENT_REGEXP = Hash.new { |h, delimiter|
83
+ h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x # :nodoc:
84
+ }
85
85
 
86
86
  DEF_NEW_STATE = WordList.new(:initial).
87
87
  add(%w(def), :def_expected).
88
88
  add(%w(import from), :include_expected).
89
- add(%w(class), :class_expected)
89
+ add(%w(class), :class_expected) # :nodoc:
90
90
 
91
91
  DESCRIPTOR = /
92
92
  #{NAME}
93
93
  (?: \. #{NAME} )*
94
94
  | \*
95
- /x
95
+ /x # :nodoc:
96
+
97
+ DOCSTRING_COMING = /
98
+ [ \t]* u?r? ("""|''')
99
+ /x # :nodoc:
96
100
 
97
- def scan_tokens tokens, options
101
+ protected
102
+
103
+ def scan_tokens encoder, options
98
104
 
99
105
  state = :initial
100
106
  string_delimiter = nil
101
107
  string_raw = false
108
+ string_type = nil
109
+ docstring_coming = match?(/#{DOCSTRING_COMING}/o)
102
110
  last_token_dot = false
103
111
  unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
104
112
  from_import_state = []
105
113
 
106
114
  until eos?
107
115
 
108
- kind = nil
109
- match = nil
110
-
111
116
  if state == :string
112
- if scan(STRING_DELIMITER_REGEXP[string_delimiter])
113
- tokens << [matched, :delimiter]
114
- tokens << [:close, :string]
117
+ if match = scan(STRING_DELIMITER_REGEXP[string_delimiter])
118
+ encoder.text_token match, :delimiter
119
+ encoder.end_group string_type
120
+ string_type = nil
115
121
  state = :initial
116
122
  next
117
- elsif string_delimiter.size == 3 && scan(/\n/)
118
- kind = :content
119
- elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
120
- kind = :content
121
- elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
122
- kind = :char
123
- elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
124
- kind = :char
125
- elsif scan(/ \\ . /x)
126
- kind = :content
127
- elsif scan(/ \\ | $ /x)
128
- tokens << [:close, :string]
129
- kind = :error
123
+ elsif string_delimiter.size == 3 && match = scan(/\n/)
124
+ encoder.text_token match, :content
125
+ elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter])
126
+ encoder.text_token match, :content
127
+ elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox)
128
+ encoder.text_token match, :char
129
+ elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox)
130
+ encoder.text_token match, :char
131
+ elsif match = scan(/ \\ . /x)
132
+ encoder.text_token match, :content
133
+ elsif match = scan(/ \\ | $ /x)
134
+ encoder.end_group string_type
135
+ string_type = nil
136
+ encoder.text_token match, :error
130
137
  state = :initial
131
138
  else
132
- raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state
139
+ raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state
133
140
  end
134
141
 
135
- elsif match = scan(/ [ \t]+ | \\\n /x)
136
- tokens << [match, :space]
137
- next
138
-
139
- elsif match = scan(/\n/)
140
- tokens << [match, :space]
141
- state = :initial if state == :include_expected
142
+ elsif match = scan(/ [ \t]+ | \\?\n /x)
143
+ encoder.text_token match, :space
144
+ if match == "\n"
145
+ state = :initial if state == :include_expected
146
+ docstring_coming = true if match?(/#{DOCSTRING_COMING}/o)
147
+ end
142
148
  next
143
149
 
144
150
  elsif match = scan(/ \# [^\n]* /mx)
145
- tokens << [match, :comment]
151
+ encoder.text_token match, :comment
146
152
  next
147
153
 
148
154
  elsif state == :initial
149
155
 
150
- if scan(/#{OPERATOR}/o)
151
- kind = :operator
156
+ if match = scan(/#{OPERATOR}/o)
157
+ encoder.text_token match, :operator
152
158
 
153
159
  elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
154
- tokens << [:open, :string]
155
160
  string_delimiter = self[2]
161
+ string_type = docstring_coming ? :docstring : :string
162
+ docstring_coming = false if docstring_coming
163
+ encoder.begin_group string_type
156
164
  string_raw = false
157
165
  modifiers = self[1]
158
166
  unless modifiers.empty?
159
167
  string_raw = !!modifiers.index(?r)
160
- tokens << [modifiers, :modifier]
168
+ encoder.text_token modifiers, :modifier
161
169
  match = string_delimiter
162
170
  end
163
171
  state = :string
164
- kind = :delimiter
172
+ encoder.text_token match, :delimiter
165
173
 
166
174
  # TODO: backticks
167
175
 
@@ -177,43 +185,45 @@ module Scanners
177
185
  state = DEF_NEW_STATE[match]
178
186
  from_import_state << match.to_sym if state == :include_expected
179
187
  end
188
+ encoder.text_token match, kind
180
189
 
181
- elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
182
- kind = :decorator
190
+ elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/)
191
+ encoder.text_token match, :decorator
183
192
 
184
- elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/)
185
- kind = :hex
193
+ elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/)
194
+ encoder.text_token match, :hex
186
195
 
187
- elsif scan(/0[bB][01]+[lL]?/)
188
- kind = :bin
196
+ elsif match = scan(/0[bB][01]+[lL]?/)
197
+ encoder.text_token match, :binary
189
198
 
190
199
  elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
191
- kind = :float
192
200
  if scan(/[jJ]/)
193
201
  match << matched
194
- kind = :imaginary
202
+ encoder.text_token match, :imaginary
203
+ else
204
+ encoder.text_token match, :float
195
205
  end
196
206
 
197
- elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
198
- kind = :oct
207
+ elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
208
+ encoder.text_token match, :octal
199
209
 
200
210
  elsif match = scan(/\d+([lL])?/)
201
- kind = :integer
202
211
  if self[1] == nil && scan(/[jJ]/)
203
212
  match << matched
204
- kind = :imaginary
213
+ encoder.text_token match, :imaginary
214
+ else
215
+ encoder.text_token match, :integer
205
216
  end
206
217
 
207
218
  else
208
- getch
209
- kind = :error
219
+ encoder.text_token getch, :error
210
220
 
211
221
  end
212
222
 
213
223
  elsif state == :def_expected
214
224
  state = :initial
215
225
  if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
216
- kind = :method
226
+ encoder.text_token match, :method
217
227
  else
218
228
  next
219
229
  end
@@ -221,33 +231,34 @@ module Scanners
221
231
  elsif state == :class_expected
222
232
  state = :initial
223
233
  if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
224
- kind = :class
234
+ encoder.text_token match, :class
225
235
  else
226
236
  next
227
237
  end
228
238
 
229
239
  elsif state == :include_expected
230
240
  if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
231
- kind = :include
232
241
  if match == 'as'
233
- kind = :keyword
242
+ encoder.text_token match, :keyword
234
243
  from_import_state << :as
235
244
  elsif from_import_state.first == :from && match == 'import'
236
- kind = :keyword
245
+ encoder.text_token match, :keyword
237
246
  from_import_state << :import
238
247
  elsif from_import_state.last == :as
239
- # kind = match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
240
- kind = :ident
248
+ # encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
249
+ encoder.text_token match, :ident
241
250
  from_import_state.pop
242
251
  elsif IDENT_KIND[match] == :keyword
243
252
  unscan
244
253
  match = nil
245
254
  state = :initial
246
255
  next
256
+ else
257
+ encoder.text_token match, :include
247
258
  end
248
259
  elsif match = scan(/,/)
249
260
  from_import_state.pop if from_import_state.last == :as
250
- kind = :operator
261
+ encoder.text_token match, :operator
251
262
  else
252
263
  from_import_state = []
253
264
  state = :initial
@@ -255,28 +266,19 @@ module Scanners
255
266
  end
256
267
 
257
268
  else
258
- raise_inspect 'Unknown state', tokens, state
269
+ raise_inspect 'Unknown state', encoder, state
259
270
 
260
271
  end
261
272
 
262
- match ||= matched
263
- if $CODERAY_DEBUG and not kind
264
- raise_inspect 'Error token %p in line %d' %
265
- [[match, kind], line], tokens, state
266
- end
267
- raise_inspect 'Empty token', tokens, state unless match
268
-
269
273
  last_token_dot = match == '.'
270
274
 
271
- tokens << [match, kind]
272
-
273
275
  end
274
276
 
275
277
  if state == :string
276
- tokens << [:close, :string]
278
+ encoder.end_group string_type
277
279
  end
278
280
 
279
- tokens
281
+ encoder
280
282
  end
281
283
 
282
284
  end