coderay 0.9.8 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/{lib/README → README_INDEX.rdoc} +10 -21
- data/Rakefile +6 -6
- data/bin/coderay +193 -64
- data/lib/coderay.rb +61 -105
- data/lib/coderay/duo.rb +17 -21
- data/lib/coderay/encoder.rb +100 -112
- data/lib/coderay/encoders/_map.rb +12 -7
- data/lib/coderay/encoders/comment_filter.rb +12 -30
- data/lib/coderay/encoders/count.rb +29 -11
- data/lib/coderay/encoders/debug.rb +32 -20
- data/lib/coderay/encoders/div.rb +13 -9
- data/lib/coderay/encoders/filter.rb +34 -51
- data/lib/coderay/encoders/html.rb +155 -161
- data/lib/coderay/encoders/html/css.rb +4 -9
- data/lib/coderay/encoders/html/numbering.rb +115 -0
- data/lib/coderay/encoders/html/output.rb +22 -70
- data/lib/coderay/encoders/json.rb +59 -45
- data/lib/coderay/encoders/lines_of_code.rb +12 -57
- data/lib/coderay/encoders/null.rb +6 -14
- data/lib/coderay/encoders/page.rb +13 -9
- data/lib/coderay/encoders/span.rb +13 -9
- data/lib/coderay/encoders/statistic.rb +58 -39
- data/lib/coderay/encoders/terminal.rb +179 -0
- data/lib/coderay/encoders/text.rb +31 -17
- data/lib/coderay/encoders/token_kind_filter.rb +111 -0
- data/lib/coderay/encoders/xml.rb +19 -18
- data/lib/coderay/encoders/yaml.rb +37 -9
- data/lib/coderay/for_redcloth.rb +4 -4
- data/lib/coderay/helpers/file_type.rb +127 -246
- data/lib/coderay/helpers/gzip.rb +41 -0
- data/lib/coderay/helpers/plugin.rb +241 -306
- data/lib/coderay/helpers/word_list.rb +65 -126
- data/lib/coderay/scanner.rb +173 -156
- data/lib/coderay/scanners/_map.rb +18 -17
- data/lib/coderay/scanners/c.rb +63 -77
- data/lib/coderay/scanners/clojure.rb +217 -0
- data/lib/coderay/scanners/cpp.rb +71 -84
- data/lib/coderay/scanners/css.rb +103 -120
- data/lib/coderay/scanners/debug.rb +47 -44
- data/lib/coderay/scanners/delphi.rb +70 -76
- data/lib/coderay/scanners/diff.rb +141 -50
- data/lib/coderay/scanners/erb.rb +81 -0
- data/lib/coderay/scanners/groovy.rb +104 -113
- data/lib/coderay/scanners/haml.rb +168 -0
- data/lib/coderay/scanners/html.rb +181 -110
- data/lib/coderay/scanners/java.rb +73 -75
- data/lib/coderay/scanners/java/builtin_types.rb +2 -0
- data/lib/coderay/scanners/java_script.rb +90 -101
- data/lib/coderay/scanners/json.rb +40 -53
- data/lib/coderay/scanners/php.rb +123 -147
- data/lib/coderay/scanners/python.rb +93 -91
- data/lib/coderay/scanners/raydebug.rb +66 -0
- data/lib/coderay/scanners/ruby.rb +343 -326
- data/lib/coderay/scanners/ruby/patterns.rb +40 -106
- data/lib/coderay/scanners/ruby/string_state.rb +71 -0
- data/lib/coderay/scanners/sql.rb +80 -66
- data/lib/coderay/scanners/text.rb +26 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +74 -73
- data/lib/coderay/style.rb +10 -7
- data/lib/coderay/styles/_map.rb +3 -3
- data/lib/coderay/styles/alpha.rb +143 -0
- data/lib/coderay/token_kinds.rb +90 -0
- data/lib/coderay/tokens.rb +102 -277
- data/lib/coderay/tokens_proxy.rb +55 -0
- data/lib/coderay/version.rb +3 -0
- data/test/functional/basic.rb +200 -18
- data/test/functional/examples.rb +130 -0
- data/test/functional/for_redcloth.rb +15 -8
- data/test/functional/suite.rb +9 -6
- metadata +103 -123
- data/FOLDERS +0 -53
- data/bin/coderay_stylesheet +0 -4
- data/lib/coderay/encoders/html/numerization.rb +0 -133
- data/lib/coderay/encoders/term.rb +0 -158
- data/lib/coderay/encoders/token_class_filter.rb +0 -84
- data/lib/coderay/helpers/gzip_simple.rb +0 -123
- data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
- data/lib/coderay/scanners/plaintext.rb +0 -20
- data/lib/coderay/scanners/rhtml.rb +0 -78
- data/lib/coderay/scanners/scheme.rb +0 -145
- data/lib/coderay/styles/cycnus.rb +0 -152
- data/lib/coderay/styles/murphy.rb +0 -134
- data/lib/coderay/token_classes.rb +0 -86
- data/test/functional/load_plugin_scanner.rb +0 -11
- data/test/functional/vhdl.rb +0 -126
- data/test/functional/word_list.rb +0 -79
data/lib/coderay/scanners/php.rb
CHANGED
@@ -3,14 +3,19 @@ module Scanners
|
|
3
3
|
|
4
4
|
load :html
|
5
5
|
|
6
|
+
# Scanner for PHP.
|
7
|
+
#
|
6
8
|
# Original by Stefan Walk.
|
7
9
|
class PHP < Scanner
|
8
10
|
|
9
11
|
register_for :php
|
10
12
|
file_extension 'php'
|
13
|
+
encoding 'BINARY'
|
11
14
|
|
12
15
|
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
|
13
16
|
|
17
|
+
protected
|
18
|
+
|
14
19
|
def setup
|
15
20
|
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
|
16
21
|
end
|
@@ -20,7 +25,7 @@ module Scanners
|
|
20
25
|
@html_scanner.reset
|
21
26
|
end
|
22
27
|
|
23
|
-
module Words
|
28
|
+
module Words # :nodoc:
|
24
29
|
|
25
30
|
# according to http://www.php.net/manual/en/reserved.keywords.php
|
26
31
|
KEYWORDS = %w[
|
@@ -176,20 +181,20 @@ module Scanners
|
|
176
181
|
$argc $argv
|
177
182
|
]
|
178
183
|
|
179
|
-
IDENT_KIND =
|
180
|
-
add(KEYWORDS, :
|
181
|
-
add(TYPES, :
|
182
|
-
add(LANGUAGE_CONSTRUCTS, :
|
184
|
+
IDENT_KIND = WordList::CaseIgnoring.new(:ident).
|
185
|
+
add(KEYWORDS, :keyword).
|
186
|
+
add(TYPES, :predefined_type).
|
187
|
+
add(LANGUAGE_CONSTRUCTS, :keyword).
|
183
188
|
add(BUILTIN_FUNCTIONS, :predefined).
|
184
|
-
add(CLASSES, :
|
189
|
+
add(CLASSES, :predefined_constant).
|
185
190
|
add(EXCEPTIONS, :exception).
|
186
|
-
add(CONSTANTS, :
|
191
|
+
add(CONSTANTS, :predefined_constant)
|
187
192
|
|
188
193
|
VARIABLE_KIND = WordList.new(:local_variable).
|
189
194
|
add(PREDEFINED, :predefined)
|
190
195
|
end
|
191
196
|
|
192
|
-
module RE
|
197
|
+
module RE # :nodoc:
|
193
198
|
|
194
199
|
PHP_START = /
|
195
200
|
<script\s+[^>]*?language\s*=\s*"php"[^>]*?> |
|
@@ -224,17 +229,13 @@ module Scanners
|
|
224
229
|
|
225
230
|
end
|
226
231
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
self.string = string.encode Encoding::ASCII_8BIT,
|
231
|
-
:invalid => :replace, :undef => :replace, :replace => '?'
|
232
|
-
end
|
233
|
-
end
|
232
|
+
protected
|
233
|
+
|
234
|
+
def scan_tokens encoder, options
|
234
235
|
|
235
236
|
if check(RE::PHP_START) || # starts with <?
|
236
|
-
(match?(/\s*<\S/) &&
|
237
|
-
|
237
|
+
(match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <?
|
238
|
+
check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
|
238
239
|
check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
|
239
240
|
# is HTML with embedded PHP, so start with HTML
|
240
241
|
states = [:initial]
|
@@ -252,29 +253,24 @@ module Scanners
|
|
252
253
|
|
253
254
|
until eos?
|
254
255
|
|
255
|
-
match = nil
|
256
|
-
kind = nil
|
257
|
-
|
258
256
|
case states.last
|
259
257
|
|
260
258
|
when :initial # HTML
|
261
|
-
if scan
|
262
|
-
|
259
|
+
if match = scan(RE::PHP_START)
|
260
|
+
encoder.text_token match, :inline_delimiter
|
263
261
|
label_expected = true
|
264
262
|
states << :php
|
265
263
|
else
|
266
264
|
match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
|
267
265
|
@html_scanner.tokenize match unless match.empty?
|
268
|
-
next
|
269
266
|
end
|
270
267
|
|
271
268
|
when :php
|
272
269
|
if match = scan(/\s+/)
|
273
|
-
|
274
|
-
next
|
270
|
+
encoder.text_token match, :space
|
275
271
|
|
276
|
-
elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
|
277
|
-
|
272
|
+
elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
|
273
|
+
encoder.text_token match, :comment
|
278
274
|
|
279
275
|
elsif match = scan(RE::IDENTIFIER)
|
280
276
|
kind = Words::IDENT_KIND[match]
|
@@ -285,7 +281,7 @@ module Scanners
|
|
285
281
|
label_expected = false
|
286
282
|
if kind == :ident && match =~ /^[A-Z]/
|
287
283
|
kind = :constant
|
288
|
-
elsif kind == :
|
284
|
+
elsif kind == :keyword
|
289
285
|
case match
|
290
286
|
when 'class'
|
291
287
|
states << :class_expected
|
@@ -299,77 +295,68 @@ module Scanners
|
|
299
295
|
next
|
300
296
|
end
|
301
297
|
end
|
298
|
+
encoder.text_token match, kind
|
302
299
|
|
303
|
-
elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
|
300
|
+
elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
|
304
301
|
label_expected = false
|
305
|
-
|
302
|
+
encoder.text_token match, :float
|
306
303
|
|
307
|
-
elsif scan(/0x[0-9a-fA-F]+/)
|
304
|
+
elsif match = scan(/0x[0-9a-fA-F]+/)
|
308
305
|
label_expected = false
|
309
|
-
|
306
|
+
encoder.text_token match, :hex
|
310
307
|
|
311
|
-
elsif scan(/\d+/)
|
308
|
+
elsif match = scan(/\d+/)
|
312
309
|
label_expected = false
|
313
|
-
|
314
|
-
|
315
|
-
elsif scan(/'/)
|
316
|
-
tokens << [:open, :string]
|
317
|
-
if modifier
|
318
|
-
tokens << [modifier, :modifier]
|
319
|
-
modifier = nil
|
320
|
-
end
|
321
|
-
kind = :delimiter
|
322
|
-
states.push :sqstring
|
310
|
+
encoder.text_token match, :integer
|
323
311
|
|
324
|
-
elsif match = scan(/["`]/)
|
325
|
-
|
312
|
+
elsif match = scan(/['"`]/)
|
313
|
+
encoder.begin_group :string
|
326
314
|
if modifier
|
327
|
-
|
315
|
+
encoder.text_token modifier, :modifier
|
328
316
|
modifier = nil
|
329
317
|
end
|
330
318
|
delimiter = match
|
331
|
-
|
332
|
-
states.push :dqstring
|
319
|
+
encoder.text_token match, :delimiter
|
320
|
+
states.push match == "'" ? :sqstring : :dqstring
|
333
321
|
|
334
322
|
elsif match = scan(RE::VARIABLE)
|
335
323
|
label_expected = false
|
336
|
-
|
324
|
+
encoder.text_token match, Words::VARIABLE_KIND[match]
|
337
325
|
|
338
|
-
elsif scan(/\{/)
|
339
|
-
|
326
|
+
elsif match = scan(/\{/)
|
327
|
+
encoder.text_token match, :operator
|
340
328
|
label_expected = true
|
341
329
|
states.push :php
|
342
330
|
|
343
|
-
elsif scan(/\}/)
|
331
|
+
elsif match = scan(/\}/)
|
344
332
|
if states.size == 1
|
345
|
-
|
333
|
+
encoder.text_token match, :error
|
346
334
|
else
|
347
335
|
states.pop
|
348
336
|
if states.last.is_a?(::Array)
|
349
337
|
delimiter = states.last[1]
|
350
338
|
states[-1] = states.last[0]
|
351
|
-
|
352
|
-
|
353
|
-
next
|
339
|
+
encoder.text_token match, :delimiter
|
340
|
+
encoder.end_group :inline
|
354
341
|
else
|
355
|
-
|
342
|
+
encoder.text_token match, :operator
|
356
343
|
label_expected = true
|
357
344
|
end
|
358
345
|
end
|
359
346
|
|
360
|
-
elsif scan(/@/)
|
347
|
+
elsif match = scan(/@/)
|
361
348
|
label_expected = false
|
362
|
-
|
349
|
+
encoder.text_token match, :exception
|
363
350
|
|
364
|
-
elsif scan
|
365
|
-
|
351
|
+
elsif match = scan(RE::PHP_END)
|
352
|
+
encoder.text_token match, :inline_delimiter
|
366
353
|
states = [:initial]
|
367
354
|
|
368
355
|
elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
|
369
|
-
|
370
|
-
warn 'heredoc in heredoc?' if heredoc_delimiter
|
356
|
+
encoder.begin_group :string
|
357
|
+
# warn 'heredoc in heredoc?' if heredoc_delimiter
|
371
358
|
heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
|
372
|
-
|
359
|
+
encoder.text_token match, :delimiter
|
373
360
|
states.push self[3] ? :sqstring : :dqstring
|
374
361
|
heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
|
375
362
|
|
@@ -379,152 +366,141 @@ module Scanners
|
|
379
366
|
label_expected = true if match == ':'
|
380
367
|
case_expected = false
|
381
368
|
end
|
382
|
-
|
369
|
+
encoder.text_token match, :operator
|
383
370
|
|
384
371
|
else
|
385
|
-
getch
|
386
|
-
kind = :error
|
372
|
+
encoder.text_token getch, :error
|
387
373
|
|
388
374
|
end
|
389
375
|
|
390
376
|
when :sqstring
|
391
|
-
if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
|
392
|
-
|
393
|
-
elsif !heredoc_delimiter && scan(/'/)
|
394
|
-
|
395
|
-
|
377
|
+
if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
|
378
|
+
encoder.text_token match, :content
|
379
|
+
elsif !heredoc_delimiter && match = scan(/'/)
|
380
|
+
encoder.text_token match, :delimiter
|
381
|
+
encoder.end_group :string
|
396
382
|
delimiter = nil
|
397
383
|
label_expected = false
|
398
384
|
states.pop
|
399
|
-
next
|
400
385
|
elsif heredoc_delimiter && match = scan(/\n/)
|
401
|
-
kind = :content
|
402
386
|
if scan heredoc_delimiter
|
403
|
-
|
404
|
-
|
405
|
-
|
387
|
+
encoder.text_token "\n", :content
|
388
|
+
encoder.text_token matched, :delimiter
|
389
|
+
encoder.end_group :string
|
406
390
|
heredoc_delimiter = nil
|
407
391
|
label_expected = false
|
408
392
|
states.pop
|
409
|
-
|
393
|
+
else
|
394
|
+
encoder.text_token match, :content
|
410
395
|
end
|
411
|
-
elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
|
412
|
-
|
413
|
-
elsif scan(/\\./m)
|
414
|
-
|
415
|
-
elsif scan(/\\/)
|
416
|
-
|
396
|
+
elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
|
397
|
+
encoder.text_token match, :char
|
398
|
+
elsif match = scan(/\\./m)
|
399
|
+
encoder.text_token match, :content
|
400
|
+
elsif match = scan(/\\/)
|
401
|
+
encoder.text_token match, :error
|
402
|
+
else
|
403
|
+
states.pop
|
417
404
|
end
|
418
405
|
|
419
406
|
when :dqstring
|
420
|
-
if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
|
421
|
-
|
422
|
-
elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/)
|
423
|
-
|
424
|
-
|
407
|
+
if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
|
408
|
+
encoder.text_token match, :content
|
409
|
+
elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/)
|
410
|
+
encoder.text_token match, :delimiter
|
411
|
+
encoder.end_group :string
|
425
412
|
delimiter = nil
|
426
413
|
label_expected = false
|
427
414
|
states.pop
|
428
|
-
next
|
429
415
|
elsif heredoc_delimiter && match = scan(/\n/)
|
430
|
-
kind = :content
|
431
416
|
if scan heredoc_delimiter
|
432
|
-
|
433
|
-
|
434
|
-
|
417
|
+
encoder.text_token "\n", :content
|
418
|
+
encoder.text_token matched, :delimiter
|
419
|
+
encoder.end_group :string
|
435
420
|
heredoc_delimiter = nil
|
436
421
|
label_expected = false
|
437
422
|
states.pop
|
438
|
-
|
423
|
+
else
|
424
|
+
encoder.text_token match, :content
|
439
425
|
end
|
440
|
-
elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
|
441
|
-
|
442
|
-
elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
|
443
|
-
|
444
|
-
elsif scan(/\\./m)
|
445
|
-
|
446
|
-
elsif scan(/\\/)
|
447
|
-
|
426
|
+
elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
|
427
|
+
encoder.text_token match, :char
|
428
|
+
elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
|
429
|
+
encoder.text_token match, :char
|
430
|
+
elsif match = scan(/\\./m)
|
431
|
+
encoder.text_token match, :content
|
432
|
+
elsif match = scan(/\\/)
|
433
|
+
encoder.text_token match, :error
|
448
434
|
elsif match = scan(/#{RE::VARIABLE}/o)
|
449
|
-
kind = :local_variable
|
450
435
|
if check(/\[#{RE::IDENTIFIER}\]/o)
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
next
|
436
|
+
encoder.begin_group :inline
|
437
|
+
encoder.text_token match, :local_variable
|
438
|
+
encoder.text_token scan(/\[/), :operator
|
439
|
+
encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
|
440
|
+
encoder.text_token scan(/\]/), :operator
|
441
|
+
encoder.end_group :inline
|
458
442
|
elsif check(/\[/)
|
459
443
|
match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
|
460
|
-
|
444
|
+
encoder.text_token match, :error
|
461
445
|
elsif check(/->#{RE::IDENTIFIER}/o)
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
next
|
446
|
+
encoder.begin_group :inline
|
447
|
+
encoder.text_token match, :local_variable
|
448
|
+
encoder.text_token scan(/->/), :operator
|
449
|
+
encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
|
450
|
+
encoder.end_group :inline
|
468
451
|
elsif check(/->/)
|
469
452
|
match << scan(/->/)
|
470
|
-
|
453
|
+
encoder.text_token match, :error
|
454
|
+
else
|
455
|
+
encoder.text_token match, :local_variable
|
471
456
|
end
|
472
457
|
elsif match = scan(/\{/)
|
473
458
|
if check(/\$/)
|
474
|
-
|
459
|
+
encoder.begin_group :inline
|
475
460
|
states[-1] = [states.last, delimiter]
|
476
461
|
delimiter = nil
|
477
462
|
states.push :php
|
478
|
-
|
463
|
+
encoder.text_token match, :delimiter
|
479
464
|
else
|
480
|
-
|
465
|
+
encoder.text_token match, :content
|
481
466
|
end
|
482
|
-
elsif scan(/\$\{#{RE::IDENTIFIER}\}/o)
|
483
|
-
|
484
|
-
elsif scan(/\$/)
|
485
|
-
|
467
|
+
elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
|
468
|
+
encoder.text_token match, :local_variable
|
469
|
+
elsif match = scan(/\$/)
|
470
|
+
encoder.text_token match, :content
|
471
|
+
else
|
472
|
+
states.pop
|
486
473
|
end
|
487
474
|
|
488
475
|
when :class_expected
|
489
|
-
if scan(/\s+/)
|
490
|
-
|
476
|
+
if match = scan(/\s+/)
|
477
|
+
encoder.text_token match, :space
|
491
478
|
elsif match = scan(/#{RE::IDENTIFIER}/o)
|
492
|
-
|
479
|
+
encoder.text_token match, :class
|
493
480
|
states.pop
|
494
481
|
else
|
495
482
|
states.pop
|
496
|
-
next
|
497
483
|
end
|
498
484
|
|
499
485
|
when :function_expected
|
500
|
-
if scan(/\s+/)
|
501
|
-
|
502
|
-
elsif scan(/&/)
|
503
|
-
|
486
|
+
if match = scan(/\s+/)
|
487
|
+
encoder.text_token match, :space
|
488
|
+
elsif match = scan(/&/)
|
489
|
+
encoder.text_token match, :operator
|
504
490
|
elsif match = scan(/#{RE::IDENTIFIER}/o)
|
505
|
-
|
491
|
+
encoder.text_token match, :function
|
506
492
|
states.pop
|
507
493
|
else
|
508
494
|
states.pop
|
509
|
-
next
|
510
495
|
end
|
511
496
|
|
512
497
|
else
|
513
|
-
raise_inspect 'Unknown state!',
|
498
|
+
raise_inspect 'Unknown state!', encoder, states
|
514
499
|
end
|
515
500
|
|
516
|
-
match ||= matched
|
517
|
-
if $CODERAY_DEBUG and not kind
|
518
|
-
raise_inspect 'Error token %p in line %d' %
|
519
|
-
[[match, kind], line], tokens, states
|
520
|
-
end
|
521
|
-
raise_inspect 'Empty token', tokens, states unless match
|
522
|
-
|
523
|
-
tokens << [match, kind]
|
524
|
-
|
525
501
|
end
|
526
502
|
|
527
|
-
|
503
|
+
encoder
|
528
504
|
end
|
529
505
|
|
530
506
|
end
|
@@ -1,12 +1,12 @@
|
|
1
1
|
module CodeRay
|
2
2
|
module Scanners
|
3
3
|
|
4
|
-
#
|
4
|
+
# Scanner for Python. Supports Python 3.
|
5
|
+
#
|
6
|
+
# Based on pygments' PythonLexer, see
|
5
7
|
# http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
|
6
8
|
class Python < Scanner
|
7
9
|
|
8
|
-
include Streamable
|
9
|
-
|
10
10
|
register_for :python
|
11
11
|
file_extension 'py'
|
12
12
|
|
@@ -16,11 +16,11 @@ module Scanners
|
|
16
16
|
'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
|
17
17
|
'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
|
18
18
|
'nonlocal', # new in Python 3
|
19
|
-
]
|
19
|
+
] # :nodoc:
|
20
20
|
|
21
21
|
OLD_KEYWORDS = [
|
22
22
|
'exec', 'print', # gone in Python 3
|
23
|
-
]
|
23
|
+
] # :nodoc:
|
24
24
|
|
25
25
|
PREDEFINED_METHODS_AND_TYPES = %w[
|
26
26
|
__import__ abs all any apply basestring bin bool buffer
|
@@ -32,7 +32,7 @@ module Scanners
|
|
32
32
|
raw_input reduce reload repr reversed round set setattr slice
|
33
33
|
sorted staticmethod str sum super tuple type unichr unicode
|
34
34
|
vars xrange zip
|
35
|
-
]
|
35
|
+
] # :nodoc:
|
36
36
|
|
37
37
|
PREDEFINED_EXCEPTIONS = %w[
|
38
38
|
ArithmeticError AssertionError AttributeError
|
@@ -47,23 +47,23 @@ module Scanners
|
|
47
47
|
TypeError UnboundLocalError UnicodeDecodeError
|
48
48
|
UnicodeEncodeError UnicodeError UnicodeTranslateError
|
49
49
|
UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
|
50
|
-
]
|
50
|
+
] # :nodoc:
|
51
51
|
|
52
52
|
PREDEFINED_VARIABLES_AND_CONSTANTS = [
|
53
|
-
'False', 'True', 'None',
|
53
|
+
'False', 'True', 'None', # "keywords" since Python 3
|
54
54
|
'self', 'Ellipsis', 'NotImplemented',
|
55
|
-
]
|
55
|
+
] # :nodoc:
|
56
56
|
|
57
57
|
IDENT_KIND = WordList.new(:ident).
|
58
58
|
add(KEYWORDS, :keyword).
|
59
59
|
add(OLD_KEYWORDS, :old_keyword).
|
60
60
|
add(PREDEFINED_METHODS_AND_TYPES, :predefined).
|
61
|
-
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :
|
62
|
-
add(PREDEFINED_EXCEPTIONS, :exception)
|
61
|
+
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :predefined_constant).
|
62
|
+
add(PREDEFINED_EXCEPTIONS, :exception) # :nodoc:
|
63
63
|
|
64
|
-
NAME = / [^\W\d] \w* /x
|
65
|
-
ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
66
|
-
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x
|
64
|
+
NAME = / [^\W\d] \w* /x # :nodoc:
|
65
|
+
ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
66
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x # :nodoc:
|
67
67
|
|
68
68
|
OPERATOR = /
|
69
69
|
\.\.\. | # ellipsis
|
@@ -73,95 +73,103 @@ module Scanners
|
|
73
73
|
[-+*\/%&|^]=? | # ordinary math and binary logic
|
74
74
|
[~`] | # binary complement and inspection
|
75
75
|
<<=? | >>=? | [<>=]=? | != # comparison and assignment
|
76
|
-
/x
|
76
|
+
/x # :nodoc:
|
77
77
|
|
78
|
-
STRING_DELIMITER_REGEXP = Hash.new
|
79
|
-
h[delimiter] = Regexp.union delimiter
|
80
|
-
|
78
|
+
STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter|
|
79
|
+
h[delimiter] = Regexp.union delimiter # :nodoc:
|
80
|
+
}
|
81
81
|
|
82
|
-
STRING_CONTENT_REGEXP = Hash.new
|
83
|
-
h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x
|
84
|
-
|
82
|
+
STRING_CONTENT_REGEXP = Hash.new { |h, delimiter|
|
83
|
+
h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x # :nodoc:
|
84
|
+
}
|
85
85
|
|
86
86
|
DEF_NEW_STATE = WordList.new(:initial).
|
87
87
|
add(%w(def), :def_expected).
|
88
88
|
add(%w(import from), :include_expected).
|
89
|
-
add(%w(class), :class_expected)
|
89
|
+
add(%w(class), :class_expected) # :nodoc:
|
90
90
|
|
91
91
|
DESCRIPTOR = /
|
92
92
|
#{NAME}
|
93
93
|
(?: \. #{NAME} )*
|
94
94
|
| \*
|
95
|
-
/x
|
95
|
+
/x # :nodoc:
|
96
|
+
|
97
|
+
DOCSTRING_COMING = /
|
98
|
+
[ \t]* u?r? ("""|''')
|
99
|
+
/x # :nodoc:
|
96
100
|
|
97
|
-
|
101
|
+
protected
|
102
|
+
|
103
|
+
def scan_tokens encoder, options
|
98
104
|
|
99
105
|
state = :initial
|
100
106
|
string_delimiter = nil
|
101
107
|
string_raw = false
|
108
|
+
string_type = nil
|
109
|
+
docstring_coming = match?(/#{DOCSTRING_COMING}/o)
|
102
110
|
last_token_dot = false
|
103
111
|
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
|
104
112
|
from_import_state = []
|
105
113
|
|
106
114
|
until eos?
|
107
115
|
|
108
|
-
kind = nil
|
109
|
-
match = nil
|
110
|
-
|
111
116
|
if state == :string
|
112
|
-
if scan(STRING_DELIMITER_REGEXP[string_delimiter])
|
113
|
-
|
114
|
-
|
117
|
+
if match = scan(STRING_DELIMITER_REGEXP[string_delimiter])
|
118
|
+
encoder.text_token match, :delimiter
|
119
|
+
encoder.end_group string_type
|
120
|
+
string_type = nil
|
115
121
|
state = :initial
|
116
122
|
next
|
117
|
-
elsif string_delimiter.size == 3 && scan(/\n/)
|
118
|
-
|
119
|
-
elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
|
120
|
-
|
121
|
-
elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
|
122
|
-
|
123
|
-
elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
|
124
|
-
|
125
|
-
elsif scan(/ \\ . /x)
|
126
|
-
|
127
|
-
elsif scan(/ \\ | $ /x)
|
128
|
-
|
129
|
-
|
123
|
+
elsif string_delimiter.size == 3 && match = scan(/\n/)
|
124
|
+
encoder.text_token match, :content
|
125
|
+
elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter])
|
126
|
+
encoder.text_token match, :content
|
127
|
+
elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox)
|
128
|
+
encoder.text_token match, :char
|
129
|
+
elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox)
|
130
|
+
encoder.text_token match, :char
|
131
|
+
elsif match = scan(/ \\ . /x)
|
132
|
+
encoder.text_token match, :content
|
133
|
+
elsif match = scan(/ \\ | $ /x)
|
134
|
+
encoder.end_group string_type
|
135
|
+
string_type = nil
|
136
|
+
encoder.text_token match, :error
|
130
137
|
state = :initial
|
131
138
|
else
|
132
|
-
raise_inspect "else case \" reached; %p not handled." % peek(1),
|
139
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state
|
133
140
|
end
|
134
141
|
|
135
|
-
elsif match = scan(/ [ \t]+ |
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
state = :initial if state == :include_expected
|
142
|
+
elsif match = scan(/ [ \t]+ | \\?\n /x)
|
143
|
+
encoder.text_token match, :space
|
144
|
+
if match == "\n"
|
145
|
+
state = :initial if state == :include_expected
|
146
|
+
docstring_coming = true if match?(/#{DOCSTRING_COMING}/o)
|
147
|
+
end
|
142
148
|
next
|
143
149
|
|
144
150
|
elsif match = scan(/ \# [^\n]* /mx)
|
145
|
-
|
151
|
+
encoder.text_token match, :comment
|
146
152
|
next
|
147
153
|
|
148
154
|
elsif state == :initial
|
149
155
|
|
150
|
-
if scan(/#{OPERATOR}/o)
|
151
|
-
|
156
|
+
if match = scan(/#{OPERATOR}/o)
|
157
|
+
encoder.text_token match, :operator
|
152
158
|
|
153
159
|
elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
|
154
|
-
tokens << [:open, :string]
|
155
160
|
string_delimiter = self[2]
|
161
|
+
string_type = docstring_coming ? :docstring : :string
|
162
|
+
docstring_coming = false if docstring_coming
|
163
|
+
encoder.begin_group string_type
|
156
164
|
string_raw = false
|
157
165
|
modifiers = self[1]
|
158
166
|
unless modifiers.empty?
|
159
167
|
string_raw = !!modifiers.index(?r)
|
160
|
-
|
168
|
+
encoder.text_token modifiers, :modifier
|
161
169
|
match = string_delimiter
|
162
170
|
end
|
163
171
|
state = :string
|
164
|
-
|
172
|
+
encoder.text_token match, :delimiter
|
165
173
|
|
166
174
|
# TODO: backticks
|
167
175
|
|
@@ -177,43 +185,45 @@ module Scanners
|
|
177
185
|
state = DEF_NEW_STATE[match]
|
178
186
|
from_import_state << match.to_sym if state == :include_expected
|
179
187
|
end
|
188
|
+
encoder.text_token match, kind
|
180
189
|
|
181
|
-
elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
|
182
|
-
|
190
|
+
elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/)
|
191
|
+
encoder.text_token match, :decorator
|
183
192
|
|
184
|
-
elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/)
|
185
|
-
|
193
|
+
elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/)
|
194
|
+
encoder.text_token match, :hex
|
186
195
|
|
187
|
-
elsif scan(/0[bB][01]+[lL]?/)
|
188
|
-
|
196
|
+
elsif match = scan(/0[bB][01]+[lL]?/)
|
197
|
+
encoder.text_token match, :binary
|
189
198
|
|
190
199
|
elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
|
191
|
-
kind = :float
|
192
200
|
if scan(/[jJ]/)
|
193
201
|
match << matched
|
194
|
-
|
202
|
+
encoder.text_token match, :imaginary
|
203
|
+
else
|
204
|
+
encoder.text_token match, :float
|
195
205
|
end
|
196
206
|
|
197
|
-
elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
|
198
|
-
|
207
|
+
elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
|
208
|
+
encoder.text_token match, :octal
|
199
209
|
|
200
210
|
elsif match = scan(/\d+([lL])?/)
|
201
|
-
kind = :integer
|
202
211
|
if self[1] == nil && scan(/[jJ]/)
|
203
212
|
match << matched
|
204
|
-
|
213
|
+
encoder.text_token match, :imaginary
|
214
|
+
else
|
215
|
+
encoder.text_token match, :integer
|
205
216
|
end
|
206
217
|
|
207
218
|
else
|
208
|
-
getch
|
209
|
-
kind = :error
|
219
|
+
encoder.text_token getch, :error
|
210
220
|
|
211
221
|
end
|
212
222
|
|
213
223
|
elsif state == :def_expected
|
214
224
|
state = :initial
|
215
225
|
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
216
|
-
|
226
|
+
encoder.text_token match, :method
|
217
227
|
else
|
218
228
|
next
|
219
229
|
end
|
@@ -221,33 +231,34 @@ module Scanners
|
|
221
231
|
elsif state == :class_expected
|
222
232
|
state = :initial
|
223
233
|
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
224
|
-
|
234
|
+
encoder.text_token match, :class
|
225
235
|
else
|
226
236
|
next
|
227
237
|
end
|
228
238
|
|
229
239
|
elsif state == :include_expected
|
230
240
|
if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
|
231
|
-
kind = :include
|
232
241
|
if match == 'as'
|
233
|
-
|
242
|
+
encoder.text_token match, :keyword
|
234
243
|
from_import_state << :as
|
235
244
|
elsif from_import_state.first == :from && match == 'import'
|
236
|
-
|
245
|
+
encoder.text_token match, :keyword
|
237
246
|
from_import_state << :import
|
238
247
|
elsif from_import_state.last == :as
|
239
|
-
#
|
240
|
-
|
248
|
+
# encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
|
249
|
+
encoder.text_token match, :ident
|
241
250
|
from_import_state.pop
|
242
251
|
elsif IDENT_KIND[match] == :keyword
|
243
252
|
unscan
|
244
253
|
match = nil
|
245
254
|
state = :initial
|
246
255
|
next
|
256
|
+
else
|
257
|
+
encoder.text_token match, :include
|
247
258
|
end
|
248
259
|
elsif match = scan(/,/)
|
249
260
|
from_import_state.pop if from_import_state.last == :as
|
250
|
-
|
261
|
+
encoder.text_token match, :operator
|
251
262
|
else
|
252
263
|
from_import_state = []
|
253
264
|
state = :initial
|
@@ -255,28 +266,19 @@ module Scanners
|
|
255
266
|
end
|
256
267
|
|
257
268
|
else
|
258
|
-
raise_inspect 'Unknown state',
|
269
|
+
raise_inspect 'Unknown state', encoder, state
|
259
270
|
|
260
271
|
end
|
261
272
|
|
262
|
-
match ||= matched
|
263
|
-
if $CODERAY_DEBUG and not kind
|
264
|
-
raise_inspect 'Error token %p in line %d' %
|
265
|
-
[[match, kind], line], tokens, state
|
266
|
-
end
|
267
|
-
raise_inspect 'Empty token', tokens, state unless match
|
268
|
-
|
269
273
|
last_token_dot = match == '.'
|
270
274
|
|
271
|
-
tokens << [match, kind]
|
272
|
-
|
273
275
|
end
|
274
276
|
|
275
277
|
if state == :string
|
276
|
-
|
278
|
+
encoder.end_group string_type
|
277
279
|
end
|
278
280
|
|
279
|
-
|
281
|
+
encoder
|
280
282
|
end
|
281
283
|
|
282
284
|
end
|