coderay 0.9.8 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/{lib/README → README_INDEX.rdoc} +10 -21
- data/Rakefile +6 -6
- data/bin/coderay +193 -64
- data/lib/coderay.rb +61 -105
- data/lib/coderay/duo.rb +17 -21
- data/lib/coderay/encoder.rb +100 -112
- data/lib/coderay/encoders/_map.rb +12 -7
- data/lib/coderay/encoders/comment_filter.rb +12 -30
- data/lib/coderay/encoders/count.rb +29 -11
- data/lib/coderay/encoders/debug.rb +32 -20
- data/lib/coderay/encoders/div.rb +13 -9
- data/lib/coderay/encoders/filter.rb +34 -51
- data/lib/coderay/encoders/html.rb +155 -161
- data/lib/coderay/encoders/html/css.rb +4 -9
- data/lib/coderay/encoders/html/numbering.rb +115 -0
- data/lib/coderay/encoders/html/output.rb +22 -70
- data/lib/coderay/encoders/json.rb +59 -45
- data/lib/coderay/encoders/lines_of_code.rb +12 -57
- data/lib/coderay/encoders/null.rb +6 -14
- data/lib/coderay/encoders/page.rb +13 -9
- data/lib/coderay/encoders/span.rb +13 -9
- data/lib/coderay/encoders/statistic.rb +58 -39
- data/lib/coderay/encoders/terminal.rb +179 -0
- data/lib/coderay/encoders/text.rb +31 -17
- data/lib/coderay/encoders/token_kind_filter.rb +111 -0
- data/lib/coderay/encoders/xml.rb +19 -18
- data/lib/coderay/encoders/yaml.rb +37 -9
- data/lib/coderay/for_redcloth.rb +4 -4
- data/lib/coderay/helpers/file_type.rb +127 -246
- data/lib/coderay/helpers/gzip.rb +41 -0
- data/lib/coderay/helpers/plugin.rb +241 -306
- data/lib/coderay/helpers/word_list.rb +65 -126
- data/lib/coderay/scanner.rb +173 -156
- data/lib/coderay/scanners/_map.rb +18 -17
- data/lib/coderay/scanners/c.rb +63 -77
- data/lib/coderay/scanners/clojure.rb +217 -0
- data/lib/coderay/scanners/cpp.rb +71 -84
- data/lib/coderay/scanners/css.rb +103 -120
- data/lib/coderay/scanners/debug.rb +47 -44
- data/lib/coderay/scanners/delphi.rb +70 -76
- data/lib/coderay/scanners/diff.rb +141 -50
- data/lib/coderay/scanners/erb.rb +81 -0
- data/lib/coderay/scanners/groovy.rb +104 -113
- data/lib/coderay/scanners/haml.rb +168 -0
- data/lib/coderay/scanners/html.rb +181 -110
- data/lib/coderay/scanners/java.rb +73 -75
- data/lib/coderay/scanners/java/builtin_types.rb +2 -0
- data/lib/coderay/scanners/java_script.rb +90 -101
- data/lib/coderay/scanners/json.rb +40 -53
- data/lib/coderay/scanners/php.rb +123 -147
- data/lib/coderay/scanners/python.rb +93 -91
- data/lib/coderay/scanners/raydebug.rb +66 -0
- data/lib/coderay/scanners/ruby.rb +343 -326
- data/lib/coderay/scanners/ruby/patterns.rb +40 -106
- data/lib/coderay/scanners/ruby/string_state.rb +71 -0
- data/lib/coderay/scanners/sql.rb +80 -66
- data/lib/coderay/scanners/text.rb +26 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +74 -73
- data/lib/coderay/style.rb +10 -7
- data/lib/coderay/styles/_map.rb +3 -3
- data/lib/coderay/styles/alpha.rb +143 -0
- data/lib/coderay/token_kinds.rb +90 -0
- data/lib/coderay/tokens.rb +102 -277
- data/lib/coderay/tokens_proxy.rb +55 -0
- data/lib/coderay/version.rb +3 -0
- data/test/functional/basic.rb +200 -18
- data/test/functional/examples.rb +130 -0
- data/test/functional/for_redcloth.rb +15 -8
- data/test/functional/suite.rb +9 -6
- metadata +103 -123
- data/FOLDERS +0 -53
- data/bin/coderay_stylesheet +0 -4
- data/lib/coderay/encoders/html/numerization.rb +0 -133
- data/lib/coderay/encoders/term.rb +0 -158
- data/lib/coderay/encoders/token_class_filter.rb +0 -84
- data/lib/coderay/helpers/gzip_simple.rb +0 -123
- data/lib/coderay/scanners/nitro_xhtml.rb +0 -136
- data/lib/coderay/scanners/plaintext.rb +0 -20
- data/lib/coderay/scanners/rhtml.rb +0 -78
- data/lib/coderay/scanners/scheme.rb +0 -145
- data/lib/coderay/styles/cycnus.rb +0 -152
- data/lib/coderay/styles/murphy.rb +0 -134
- data/lib/coderay/token_classes.rb +0 -86
- data/test/functional/load_plugin_scanner.rb +0 -11
- data/test/functional/vhdl.rb +0 -126
- data/test/functional/word_list.rb +0 -79
data/lib/coderay/scanners/php.rb
CHANGED
@@ -3,14 +3,19 @@ module Scanners
|
|
3
3
|
|
4
4
|
load :html
|
5
5
|
|
6
|
+
# Scanner for PHP.
|
7
|
+
#
|
6
8
|
# Original by Stefan Walk.
|
7
9
|
class PHP < Scanner
|
8
10
|
|
9
11
|
register_for :php
|
10
12
|
file_extension 'php'
|
13
|
+
encoding 'BINARY'
|
11
14
|
|
12
15
|
KINDS_NOT_LOC = HTML::KINDS_NOT_LOC
|
13
16
|
|
17
|
+
protected
|
18
|
+
|
14
19
|
def setup
|
15
20
|
@html_scanner = CodeRay.scanner :html, :tokens => @tokens, :keep_tokens => true, :keep_state => true
|
16
21
|
end
|
@@ -20,7 +25,7 @@ module Scanners
|
|
20
25
|
@html_scanner.reset
|
21
26
|
end
|
22
27
|
|
23
|
-
module Words
|
28
|
+
module Words # :nodoc:
|
24
29
|
|
25
30
|
# according to http://www.php.net/manual/en/reserved.keywords.php
|
26
31
|
KEYWORDS = %w[
|
@@ -176,20 +181,20 @@ module Scanners
|
|
176
181
|
$argc $argv
|
177
182
|
]
|
178
183
|
|
179
|
-
IDENT_KIND =
|
180
|
-
add(KEYWORDS, :
|
181
|
-
add(TYPES, :
|
182
|
-
add(LANGUAGE_CONSTRUCTS, :
|
184
|
+
IDENT_KIND = WordList::CaseIgnoring.new(:ident).
|
185
|
+
add(KEYWORDS, :keyword).
|
186
|
+
add(TYPES, :predefined_type).
|
187
|
+
add(LANGUAGE_CONSTRUCTS, :keyword).
|
183
188
|
add(BUILTIN_FUNCTIONS, :predefined).
|
184
|
-
add(CLASSES, :
|
189
|
+
add(CLASSES, :predefined_constant).
|
185
190
|
add(EXCEPTIONS, :exception).
|
186
|
-
add(CONSTANTS, :
|
191
|
+
add(CONSTANTS, :predefined_constant)
|
187
192
|
|
188
193
|
VARIABLE_KIND = WordList.new(:local_variable).
|
189
194
|
add(PREDEFINED, :predefined)
|
190
195
|
end
|
191
196
|
|
192
|
-
module RE
|
197
|
+
module RE # :nodoc:
|
193
198
|
|
194
199
|
PHP_START = /
|
195
200
|
<script\s+[^>]*?language\s*=\s*"php"[^>]*?> |
|
@@ -224,17 +229,13 @@ module Scanners
|
|
224
229
|
|
225
230
|
end
|
226
231
|
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
self.string = string.encode Encoding::ASCII_8BIT,
|
231
|
-
:invalid => :replace, :undef => :replace, :replace => '?'
|
232
|
-
end
|
233
|
-
end
|
232
|
+
protected
|
233
|
+
|
234
|
+
def scan_tokens encoder, options
|
234
235
|
|
235
236
|
if check(RE::PHP_START) || # starts with <?
|
236
|
-
(match?(/\s*<\S/) &&
|
237
|
-
|
237
|
+
(match?(/\s*<\S/) && check(/.{1,1000}#{RE::PHP_START}/om)) || # starts with tag and contains <?
|
238
|
+
check(/.{0,1000}#{RE::HTML_INDICATOR}/om) ||
|
238
239
|
check(/.{1,100}#{RE::PHP_START}/om) # PHP start after max 100 chars
|
239
240
|
# is HTML with embedded PHP, so start with HTML
|
240
241
|
states = [:initial]
|
@@ -252,29 +253,24 @@ module Scanners
|
|
252
253
|
|
253
254
|
until eos?
|
254
255
|
|
255
|
-
match = nil
|
256
|
-
kind = nil
|
257
|
-
|
258
256
|
case states.last
|
259
257
|
|
260
258
|
when :initial # HTML
|
261
|
-
if scan
|
262
|
-
|
259
|
+
if match = scan(RE::PHP_START)
|
260
|
+
encoder.text_token match, :inline_delimiter
|
263
261
|
label_expected = true
|
264
262
|
states << :php
|
265
263
|
else
|
266
264
|
match = scan_until(/(?=#{RE::PHP_START})/o) || scan_rest
|
267
265
|
@html_scanner.tokenize match unless match.empty?
|
268
|
-
next
|
269
266
|
end
|
270
267
|
|
271
268
|
when :php
|
272
269
|
if match = scan(/\s+/)
|
273
|
-
|
274
|
-
next
|
270
|
+
encoder.text_token match, :space
|
275
271
|
|
276
|
-
elsif scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
|
277
|
-
|
272
|
+
elsif match = scan(%r! (?m: \/\* (?: .*? \*\/ | .* ) ) | (?://|\#) .*? (?=#{RE::PHP_END}|$) !xo)
|
273
|
+
encoder.text_token match, :comment
|
278
274
|
|
279
275
|
elsif match = scan(RE::IDENTIFIER)
|
280
276
|
kind = Words::IDENT_KIND[match]
|
@@ -285,7 +281,7 @@ module Scanners
|
|
285
281
|
label_expected = false
|
286
282
|
if kind == :ident && match =~ /^[A-Z]/
|
287
283
|
kind = :constant
|
288
|
-
elsif kind == :
|
284
|
+
elsif kind == :keyword
|
289
285
|
case match
|
290
286
|
when 'class'
|
291
287
|
states << :class_expected
|
@@ -299,77 +295,68 @@ module Scanners
|
|
299
295
|
next
|
300
296
|
end
|
301
297
|
end
|
298
|
+
encoder.text_token match, kind
|
302
299
|
|
303
|
-
elsif scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
|
300
|
+
elsif match = scan(/(?:\d+\.\d*|\d*\.\d+)(?:e[-+]?\d+)?|\d+e[-+]?\d+/i)
|
304
301
|
label_expected = false
|
305
|
-
|
302
|
+
encoder.text_token match, :float
|
306
303
|
|
307
|
-
elsif scan(/0x[0-9a-fA-F]+/)
|
304
|
+
elsif match = scan(/0x[0-9a-fA-F]+/)
|
308
305
|
label_expected = false
|
309
|
-
|
306
|
+
encoder.text_token match, :hex
|
310
307
|
|
311
|
-
elsif scan(/\d+/)
|
308
|
+
elsif match = scan(/\d+/)
|
312
309
|
label_expected = false
|
313
|
-
|
314
|
-
|
315
|
-
elsif scan(/'/)
|
316
|
-
tokens << [:open, :string]
|
317
|
-
if modifier
|
318
|
-
tokens << [modifier, :modifier]
|
319
|
-
modifier = nil
|
320
|
-
end
|
321
|
-
kind = :delimiter
|
322
|
-
states.push :sqstring
|
310
|
+
encoder.text_token match, :integer
|
323
311
|
|
324
|
-
elsif match = scan(/["`]/)
|
325
|
-
|
312
|
+
elsif match = scan(/['"`]/)
|
313
|
+
encoder.begin_group :string
|
326
314
|
if modifier
|
327
|
-
|
315
|
+
encoder.text_token modifier, :modifier
|
328
316
|
modifier = nil
|
329
317
|
end
|
330
318
|
delimiter = match
|
331
|
-
|
332
|
-
states.push :dqstring
|
319
|
+
encoder.text_token match, :delimiter
|
320
|
+
states.push match == "'" ? :sqstring : :dqstring
|
333
321
|
|
334
322
|
elsif match = scan(RE::VARIABLE)
|
335
323
|
label_expected = false
|
336
|
-
|
324
|
+
encoder.text_token match, Words::VARIABLE_KIND[match]
|
337
325
|
|
338
|
-
elsif scan(/\{/)
|
339
|
-
|
326
|
+
elsif match = scan(/\{/)
|
327
|
+
encoder.text_token match, :operator
|
340
328
|
label_expected = true
|
341
329
|
states.push :php
|
342
330
|
|
343
|
-
elsif scan(/\}/)
|
331
|
+
elsif match = scan(/\}/)
|
344
332
|
if states.size == 1
|
345
|
-
|
333
|
+
encoder.text_token match, :error
|
346
334
|
else
|
347
335
|
states.pop
|
348
336
|
if states.last.is_a?(::Array)
|
349
337
|
delimiter = states.last[1]
|
350
338
|
states[-1] = states.last[0]
|
351
|
-
|
352
|
-
|
353
|
-
next
|
339
|
+
encoder.text_token match, :delimiter
|
340
|
+
encoder.end_group :inline
|
354
341
|
else
|
355
|
-
|
342
|
+
encoder.text_token match, :operator
|
356
343
|
label_expected = true
|
357
344
|
end
|
358
345
|
end
|
359
346
|
|
360
|
-
elsif scan(/@/)
|
347
|
+
elsif match = scan(/@/)
|
361
348
|
label_expected = false
|
362
|
-
|
349
|
+
encoder.text_token match, :exception
|
363
350
|
|
364
|
-
elsif scan
|
365
|
-
|
351
|
+
elsif match = scan(RE::PHP_END)
|
352
|
+
encoder.text_token match, :inline_delimiter
|
366
353
|
states = [:initial]
|
367
354
|
|
368
355
|
elsif match = scan(/<<<(?:(#{RE::IDENTIFIER})|"(#{RE::IDENTIFIER})"|'(#{RE::IDENTIFIER})')/o)
|
369
|
-
|
370
|
-
warn 'heredoc in heredoc?' if heredoc_delimiter
|
356
|
+
encoder.begin_group :string
|
357
|
+
# warn 'heredoc in heredoc?' if heredoc_delimiter
|
371
358
|
heredoc_delimiter = Regexp.escape(self[1] || self[2] || self[3])
|
372
|
-
|
359
|
+
encoder.text_token match, :delimiter
|
373
360
|
states.push self[3] ? :sqstring : :dqstring
|
374
361
|
heredoc_delimiter = /#{heredoc_delimiter}(?=;?$)/
|
375
362
|
|
@@ -379,152 +366,141 @@ module Scanners
|
|
379
366
|
label_expected = true if match == ':'
|
380
367
|
case_expected = false
|
381
368
|
end
|
382
|
-
|
369
|
+
encoder.text_token match, :operator
|
383
370
|
|
384
371
|
else
|
385
|
-
getch
|
386
|
-
kind = :error
|
372
|
+
encoder.text_token getch, :error
|
387
373
|
|
388
374
|
end
|
389
375
|
|
390
376
|
when :sqstring
|
391
|
-
if scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
|
392
|
-
|
393
|
-
elsif !heredoc_delimiter && scan(/'/)
|
394
|
-
|
395
|
-
|
377
|
+
if match = scan(heredoc_delimiter ? /[^\\\n]+/ : /[^'\\]+/)
|
378
|
+
encoder.text_token match, :content
|
379
|
+
elsif !heredoc_delimiter && match = scan(/'/)
|
380
|
+
encoder.text_token match, :delimiter
|
381
|
+
encoder.end_group :string
|
396
382
|
delimiter = nil
|
397
383
|
label_expected = false
|
398
384
|
states.pop
|
399
|
-
next
|
400
385
|
elsif heredoc_delimiter && match = scan(/\n/)
|
401
|
-
kind = :content
|
402
386
|
if scan heredoc_delimiter
|
403
|
-
|
404
|
-
|
405
|
-
|
387
|
+
encoder.text_token "\n", :content
|
388
|
+
encoder.text_token matched, :delimiter
|
389
|
+
encoder.end_group :string
|
406
390
|
heredoc_delimiter = nil
|
407
391
|
label_expected = false
|
408
392
|
states.pop
|
409
|
-
|
393
|
+
else
|
394
|
+
encoder.text_token match, :content
|
410
395
|
end
|
411
|
-
elsif scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
|
412
|
-
|
413
|
-
elsif scan(/\\./m)
|
414
|
-
|
415
|
-
elsif scan(/\\/)
|
416
|
-
|
396
|
+
elsif match = scan(heredoc_delimiter ? /\\\\/ : /\\[\\'\n]/)
|
397
|
+
encoder.text_token match, :char
|
398
|
+
elsif match = scan(/\\./m)
|
399
|
+
encoder.text_token match, :content
|
400
|
+
elsif match = scan(/\\/)
|
401
|
+
encoder.text_token match, :error
|
402
|
+
else
|
403
|
+
states.pop
|
417
404
|
end
|
418
405
|
|
419
406
|
when :dqstring
|
420
|
-
if scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
|
421
|
-
|
422
|
-
elsif !heredoc_delimiter && scan(delimiter == '"' ? /"/ : /`/)
|
423
|
-
|
424
|
-
|
407
|
+
if match = scan(heredoc_delimiter ? /[^${\\\n]+/ : (delimiter == '"' ? /[^"${\\]+/ : /[^`${\\]+/))
|
408
|
+
encoder.text_token match, :content
|
409
|
+
elsif !heredoc_delimiter && match = scan(delimiter == '"' ? /"/ : /`/)
|
410
|
+
encoder.text_token match, :delimiter
|
411
|
+
encoder.end_group :string
|
425
412
|
delimiter = nil
|
426
413
|
label_expected = false
|
427
414
|
states.pop
|
428
|
-
next
|
429
415
|
elsif heredoc_delimiter && match = scan(/\n/)
|
430
|
-
kind = :content
|
431
416
|
if scan heredoc_delimiter
|
432
|
-
|
433
|
-
|
434
|
-
|
417
|
+
encoder.text_token "\n", :content
|
418
|
+
encoder.text_token matched, :delimiter
|
419
|
+
encoder.end_group :string
|
435
420
|
heredoc_delimiter = nil
|
436
421
|
label_expected = false
|
437
422
|
states.pop
|
438
|
-
|
423
|
+
else
|
424
|
+
encoder.text_token match, :content
|
439
425
|
end
|
440
|
-
elsif scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
|
441
|
-
|
442
|
-
elsif scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
|
443
|
-
|
444
|
-
elsif scan(/\\./m)
|
445
|
-
|
446
|
-
elsif scan(/\\/)
|
447
|
-
|
426
|
+
elsif match = scan(/\\(?:x[0-9A-Fa-f]{1,2}|[0-7]{1,3})/)
|
427
|
+
encoder.text_token match, :char
|
428
|
+
elsif match = scan(heredoc_delimiter ? /\\[nrtvf\\$]/ : (delimiter == '"' ? /\\[nrtvf\\$"]/ : /\\[nrtvf\\$`]/))
|
429
|
+
encoder.text_token match, :char
|
430
|
+
elsif match = scan(/\\./m)
|
431
|
+
encoder.text_token match, :content
|
432
|
+
elsif match = scan(/\\/)
|
433
|
+
encoder.text_token match, :error
|
448
434
|
elsif match = scan(/#{RE::VARIABLE}/o)
|
449
|
-
kind = :local_variable
|
450
435
|
if check(/\[#{RE::IDENTIFIER}\]/o)
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
next
|
436
|
+
encoder.begin_group :inline
|
437
|
+
encoder.text_token match, :local_variable
|
438
|
+
encoder.text_token scan(/\[/), :operator
|
439
|
+
encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
|
440
|
+
encoder.text_token scan(/\]/), :operator
|
441
|
+
encoder.end_group :inline
|
458
442
|
elsif check(/\[/)
|
459
443
|
match << scan(/\[['"]?#{RE::IDENTIFIER}?['"]?\]?/o)
|
460
|
-
|
444
|
+
encoder.text_token match, :error
|
461
445
|
elsif check(/->#{RE::IDENTIFIER}/o)
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
next
|
446
|
+
encoder.begin_group :inline
|
447
|
+
encoder.text_token match, :local_variable
|
448
|
+
encoder.text_token scan(/->/), :operator
|
449
|
+
encoder.text_token scan(/#{RE::IDENTIFIER}/o), :ident
|
450
|
+
encoder.end_group :inline
|
468
451
|
elsif check(/->/)
|
469
452
|
match << scan(/->/)
|
470
|
-
|
453
|
+
encoder.text_token match, :error
|
454
|
+
else
|
455
|
+
encoder.text_token match, :local_variable
|
471
456
|
end
|
472
457
|
elsif match = scan(/\{/)
|
473
458
|
if check(/\$/)
|
474
|
-
|
459
|
+
encoder.begin_group :inline
|
475
460
|
states[-1] = [states.last, delimiter]
|
476
461
|
delimiter = nil
|
477
462
|
states.push :php
|
478
|
-
|
463
|
+
encoder.text_token match, :delimiter
|
479
464
|
else
|
480
|
-
|
465
|
+
encoder.text_token match, :content
|
481
466
|
end
|
482
|
-
elsif scan(/\$\{#{RE::IDENTIFIER}\}/o)
|
483
|
-
|
484
|
-
elsif scan(/\$/)
|
485
|
-
|
467
|
+
elsif match = scan(/\$\{#{RE::IDENTIFIER}\}/o)
|
468
|
+
encoder.text_token match, :local_variable
|
469
|
+
elsif match = scan(/\$/)
|
470
|
+
encoder.text_token match, :content
|
471
|
+
else
|
472
|
+
states.pop
|
486
473
|
end
|
487
474
|
|
488
475
|
when :class_expected
|
489
|
-
if scan(/\s+/)
|
490
|
-
|
476
|
+
if match = scan(/\s+/)
|
477
|
+
encoder.text_token match, :space
|
491
478
|
elsif match = scan(/#{RE::IDENTIFIER}/o)
|
492
|
-
|
479
|
+
encoder.text_token match, :class
|
493
480
|
states.pop
|
494
481
|
else
|
495
482
|
states.pop
|
496
|
-
next
|
497
483
|
end
|
498
484
|
|
499
485
|
when :function_expected
|
500
|
-
if scan(/\s+/)
|
501
|
-
|
502
|
-
elsif scan(/&/)
|
503
|
-
|
486
|
+
if match = scan(/\s+/)
|
487
|
+
encoder.text_token match, :space
|
488
|
+
elsif match = scan(/&/)
|
489
|
+
encoder.text_token match, :operator
|
504
490
|
elsif match = scan(/#{RE::IDENTIFIER}/o)
|
505
|
-
|
491
|
+
encoder.text_token match, :function
|
506
492
|
states.pop
|
507
493
|
else
|
508
494
|
states.pop
|
509
|
-
next
|
510
495
|
end
|
511
496
|
|
512
497
|
else
|
513
|
-
raise_inspect 'Unknown state!',
|
498
|
+
raise_inspect 'Unknown state!', encoder, states
|
514
499
|
end
|
515
500
|
|
516
|
-
match ||= matched
|
517
|
-
if $CODERAY_DEBUG and not kind
|
518
|
-
raise_inspect 'Error token %p in line %d' %
|
519
|
-
[[match, kind], line], tokens, states
|
520
|
-
end
|
521
|
-
raise_inspect 'Empty token', tokens, states unless match
|
522
|
-
|
523
|
-
tokens << [match, kind]
|
524
|
-
|
525
501
|
end
|
526
502
|
|
527
|
-
|
503
|
+
encoder
|
528
504
|
end
|
529
505
|
|
530
506
|
end
|
@@ -1,12 +1,12 @@
|
|
1
1
|
module CodeRay
|
2
2
|
module Scanners
|
3
3
|
|
4
|
-
#
|
4
|
+
# Scanner for Python. Supports Python 3.
|
5
|
+
#
|
6
|
+
# Based on pygments' PythonLexer, see
|
5
7
|
# http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
|
6
8
|
class Python < Scanner
|
7
9
|
|
8
|
-
include Streamable
|
9
|
-
|
10
10
|
register_for :python
|
11
11
|
file_extension 'py'
|
12
12
|
|
@@ -16,11 +16,11 @@ module Scanners
|
|
16
16
|
'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
|
17
17
|
'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
|
18
18
|
'nonlocal', # new in Python 3
|
19
|
-
]
|
19
|
+
] # :nodoc:
|
20
20
|
|
21
21
|
OLD_KEYWORDS = [
|
22
22
|
'exec', 'print', # gone in Python 3
|
23
|
-
]
|
23
|
+
] # :nodoc:
|
24
24
|
|
25
25
|
PREDEFINED_METHODS_AND_TYPES = %w[
|
26
26
|
__import__ abs all any apply basestring bin bool buffer
|
@@ -32,7 +32,7 @@ module Scanners
|
|
32
32
|
raw_input reduce reload repr reversed round set setattr slice
|
33
33
|
sorted staticmethod str sum super tuple type unichr unicode
|
34
34
|
vars xrange zip
|
35
|
-
]
|
35
|
+
] # :nodoc:
|
36
36
|
|
37
37
|
PREDEFINED_EXCEPTIONS = %w[
|
38
38
|
ArithmeticError AssertionError AttributeError
|
@@ -47,23 +47,23 @@ module Scanners
|
|
47
47
|
TypeError UnboundLocalError UnicodeDecodeError
|
48
48
|
UnicodeEncodeError UnicodeError UnicodeTranslateError
|
49
49
|
UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
|
50
|
-
]
|
50
|
+
] # :nodoc:
|
51
51
|
|
52
52
|
PREDEFINED_VARIABLES_AND_CONSTANTS = [
|
53
|
-
'False', 'True', 'None',
|
53
|
+
'False', 'True', 'None', # "keywords" since Python 3
|
54
54
|
'self', 'Ellipsis', 'NotImplemented',
|
55
|
-
]
|
55
|
+
] # :nodoc:
|
56
56
|
|
57
57
|
IDENT_KIND = WordList.new(:ident).
|
58
58
|
add(KEYWORDS, :keyword).
|
59
59
|
add(OLD_KEYWORDS, :old_keyword).
|
60
60
|
add(PREDEFINED_METHODS_AND_TYPES, :predefined).
|
61
|
-
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :
|
62
|
-
add(PREDEFINED_EXCEPTIONS, :exception)
|
61
|
+
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :predefined_constant).
|
62
|
+
add(PREDEFINED_EXCEPTIONS, :exception) # :nodoc:
|
63
63
|
|
64
|
-
NAME = / [^\W\d] \w* /x
|
65
|
-
ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
66
|
-
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x
|
64
|
+
NAME = / [^\W\d] \w* /x # :nodoc:
|
65
|
+
ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x # :nodoc:
|
66
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x # :nodoc:
|
67
67
|
|
68
68
|
OPERATOR = /
|
69
69
|
\.\.\. | # ellipsis
|
@@ -73,95 +73,103 @@ module Scanners
|
|
73
73
|
[-+*\/%&|^]=? | # ordinary math and binary logic
|
74
74
|
[~`] | # binary complement and inspection
|
75
75
|
<<=? | >>=? | [<>=]=? | != # comparison and assignment
|
76
|
-
/x
|
76
|
+
/x # :nodoc:
|
77
77
|
|
78
|
-
STRING_DELIMITER_REGEXP = Hash.new
|
79
|
-
h[delimiter] = Regexp.union delimiter
|
80
|
-
|
78
|
+
STRING_DELIMITER_REGEXP = Hash.new { |h, delimiter|
|
79
|
+
h[delimiter] = Regexp.union delimiter # :nodoc:
|
80
|
+
}
|
81
81
|
|
82
|
-
STRING_CONTENT_REGEXP = Hash.new
|
83
|
-
h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x
|
84
|
-
|
82
|
+
STRING_CONTENT_REGEXP = Hash.new { |h, delimiter|
|
83
|
+
h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x # :nodoc:
|
84
|
+
}
|
85
85
|
|
86
86
|
DEF_NEW_STATE = WordList.new(:initial).
|
87
87
|
add(%w(def), :def_expected).
|
88
88
|
add(%w(import from), :include_expected).
|
89
|
-
add(%w(class), :class_expected)
|
89
|
+
add(%w(class), :class_expected) # :nodoc:
|
90
90
|
|
91
91
|
DESCRIPTOR = /
|
92
92
|
#{NAME}
|
93
93
|
(?: \. #{NAME} )*
|
94
94
|
| \*
|
95
|
-
/x
|
95
|
+
/x # :nodoc:
|
96
|
+
|
97
|
+
DOCSTRING_COMING = /
|
98
|
+
[ \t]* u?r? ("""|''')
|
99
|
+
/x # :nodoc:
|
96
100
|
|
97
|
-
|
101
|
+
protected
|
102
|
+
|
103
|
+
def scan_tokens encoder, options
|
98
104
|
|
99
105
|
state = :initial
|
100
106
|
string_delimiter = nil
|
101
107
|
string_raw = false
|
108
|
+
string_type = nil
|
109
|
+
docstring_coming = match?(/#{DOCSTRING_COMING}/o)
|
102
110
|
last_token_dot = false
|
103
111
|
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
|
104
112
|
from_import_state = []
|
105
113
|
|
106
114
|
until eos?
|
107
115
|
|
108
|
-
kind = nil
|
109
|
-
match = nil
|
110
|
-
|
111
116
|
if state == :string
|
112
|
-
if scan(STRING_DELIMITER_REGEXP[string_delimiter])
|
113
|
-
|
114
|
-
|
117
|
+
if match = scan(STRING_DELIMITER_REGEXP[string_delimiter])
|
118
|
+
encoder.text_token match, :delimiter
|
119
|
+
encoder.end_group string_type
|
120
|
+
string_type = nil
|
115
121
|
state = :initial
|
116
122
|
next
|
117
|
-
elsif string_delimiter.size == 3 && scan(/\n/)
|
118
|
-
|
119
|
-
elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
|
120
|
-
|
121
|
-
elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
|
122
|
-
|
123
|
-
elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
|
124
|
-
|
125
|
-
elsif scan(/ \\ . /x)
|
126
|
-
|
127
|
-
elsif scan(/ \\ | $ /x)
|
128
|
-
|
129
|
-
|
123
|
+
elsif string_delimiter.size == 3 && match = scan(/\n/)
|
124
|
+
encoder.text_token match, :content
|
125
|
+
elsif match = scan(STRING_CONTENT_REGEXP[string_delimiter])
|
126
|
+
encoder.text_token match, :content
|
127
|
+
elsif !string_raw && match = scan(/ \\ #{ESCAPE} /ox)
|
128
|
+
encoder.text_token match, :char
|
129
|
+
elsif match = scan(/ \\ #{UNICODE_ESCAPE} /ox)
|
130
|
+
encoder.text_token match, :char
|
131
|
+
elsif match = scan(/ \\ . /x)
|
132
|
+
encoder.text_token match, :content
|
133
|
+
elsif match = scan(/ \\ | $ /x)
|
134
|
+
encoder.end_group string_type
|
135
|
+
string_type = nil
|
136
|
+
encoder.text_token match, :error
|
130
137
|
state = :initial
|
131
138
|
else
|
132
|
-
raise_inspect "else case \" reached; %p not handled." % peek(1),
|
139
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1), encoder, state
|
133
140
|
end
|
134
141
|
|
135
|
-
elsif match = scan(/ [ \t]+ |
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
state = :initial if state == :include_expected
|
142
|
+
elsif match = scan(/ [ \t]+ | \\?\n /x)
|
143
|
+
encoder.text_token match, :space
|
144
|
+
if match == "\n"
|
145
|
+
state = :initial if state == :include_expected
|
146
|
+
docstring_coming = true if match?(/#{DOCSTRING_COMING}/o)
|
147
|
+
end
|
142
148
|
next
|
143
149
|
|
144
150
|
elsif match = scan(/ \# [^\n]* /mx)
|
145
|
-
|
151
|
+
encoder.text_token match, :comment
|
146
152
|
next
|
147
153
|
|
148
154
|
elsif state == :initial
|
149
155
|
|
150
|
-
if scan(/#{OPERATOR}/o)
|
151
|
-
|
156
|
+
if match = scan(/#{OPERATOR}/o)
|
157
|
+
encoder.text_token match, :operator
|
152
158
|
|
153
159
|
elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
|
154
|
-
tokens << [:open, :string]
|
155
160
|
string_delimiter = self[2]
|
161
|
+
string_type = docstring_coming ? :docstring : :string
|
162
|
+
docstring_coming = false if docstring_coming
|
163
|
+
encoder.begin_group string_type
|
156
164
|
string_raw = false
|
157
165
|
modifiers = self[1]
|
158
166
|
unless modifiers.empty?
|
159
167
|
string_raw = !!modifiers.index(?r)
|
160
|
-
|
168
|
+
encoder.text_token modifiers, :modifier
|
161
169
|
match = string_delimiter
|
162
170
|
end
|
163
171
|
state = :string
|
164
|
-
|
172
|
+
encoder.text_token match, :delimiter
|
165
173
|
|
166
174
|
# TODO: backticks
|
167
175
|
|
@@ -177,43 +185,45 @@ module Scanners
|
|
177
185
|
state = DEF_NEW_STATE[match]
|
178
186
|
from_import_state << match.to_sym if state == :include_expected
|
179
187
|
end
|
188
|
+
encoder.text_token match, kind
|
180
189
|
|
181
|
-
elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
|
182
|
-
|
190
|
+
elsif match = scan(/@[a-zA-Z0-9_.]+[lL]?/)
|
191
|
+
encoder.text_token match, :decorator
|
183
192
|
|
184
|
-
elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/)
|
185
|
-
|
193
|
+
elsif match = scan(/0[xX][0-9A-Fa-f]+[lL]?/)
|
194
|
+
encoder.text_token match, :hex
|
186
195
|
|
187
|
-
elsif scan(/0[bB][01]+[lL]?/)
|
188
|
-
|
196
|
+
elsif match = scan(/0[bB][01]+[lL]?/)
|
197
|
+
encoder.text_token match, :binary
|
189
198
|
|
190
199
|
elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
|
191
|
-
kind = :float
|
192
200
|
if scan(/[jJ]/)
|
193
201
|
match << matched
|
194
|
-
|
202
|
+
encoder.text_token match, :imaginary
|
203
|
+
else
|
204
|
+
encoder.text_token match, :float
|
195
205
|
end
|
196
206
|
|
197
|
-
elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
|
198
|
-
|
207
|
+
elsif match = scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
|
208
|
+
encoder.text_token match, :octal
|
199
209
|
|
200
210
|
elsif match = scan(/\d+([lL])?/)
|
201
|
-
kind = :integer
|
202
211
|
if self[1] == nil && scan(/[jJ]/)
|
203
212
|
match << matched
|
204
|
-
|
213
|
+
encoder.text_token match, :imaginary
|
214
|
+
else
|
215
|
+
encoder.text_token match, :integer
|
205
216
|
end
|
206
217
|
|
207
218
|
else
|
208
|
-
getch
|
209
|
-
kind = :error
|
219
|
+
encoder.text_token getch, :error
|
210
220
|
|
211
221
|
end
|
212
222
|
|
213
223
|
elsif state == :def_expected
|
214
224
|
state = :initial
|
215
225
|
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
216
|
-
|
226
|
+
encoder.text_token match, :method
|
217
227
|
else
|
218
228
|
next
|
219
229
|
end
|
@@ -221,33 +231,34 @@ module Scanners
|
|
221
231
|
elsif state == :class_expected
|
222
232
|
state = :initial
|
223
233
|
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
224
|
-
|
234
|
+
encoder.text_token match, :class
|
225
235
|
else
|
226
236
|
next
|
227
237
|
end
|
228
238
|
|
229
239
|
elsif state == :include_expected
|
230
240
|
if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
|
231
|
-
kind = :include
|
232
241
|
if match == 'as'
|
233
|
-
|
242
|
+
encoder.text_token match, :keyword
|
234
243
|
from_import_state << :as
|
235
244
|
elsif from_import_state.first == :from && match == 'import'
|
236
|
-
|
245
|
+
encoder.text_token match, :keyword
|
237
246
|
from_import_state << :import
|
238
247
|
elsif from_import_state.last == :as
|
239
|
-
#
|
240
|
-
|
248
|
+
# encoder.text_token match, match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
|
249
|
+
encoder.text_token match, :ident
|
241
250
|
from_import_state.pop
|
242
251
|
elsif IDENT_KIND[match] == :keyword
|
243
252
|
unscan
|
244
253
|
match = nil
|
245
254
|
state = :initial
|
246
255
|
next
|
256
|
+
else
|
257
|
+
encoder.text_token match, :include
|
247
258
|
end
|
248
259
|
elsif match = scan(/,/)
|
249
260
|
from_import_state.pop if from_import_state.last == :as
|
250
|
-
|
261
|
+
encoder.text_token match, :operator
|
251
262
|
else
|
252
263
|
from_import_state = []
|
253
264
|
state = :initial
|
@@ -255,28 +266,19 @@ module Scanners
|
|
255
266
|
end
|
256
267
|
|
257
268
|
else
|
258
|
-
raise_inspect 'Unknown state',
|
269
|
+
raise_inspect 'Unknown state', encoder, state
|
259
270
|
|
260
271
|
end
|
261
272
|
|
262
|
-
match ||= matched
|
263
|
-
if $CODERAY_DEBUG and not kind
|
264
|
-
raise_inspect 'Error token %p in line %d' %
|
265
|
-
[[match, kind], line], tokens, state
|
266
|
-
end
|
267
|
-
raise_inspect 'Empty token', tokens, state unless match
|
268
|
-
|
269
273
|
last_token_dot = match == '.'
|
270
274
|
|
271
|
-
tokens << [match, kind]
|
272
|
-
|
273
275
|
end
|
274
276
|
|
275
277
|
if state == :string
|
276
|
-
|
278
|
+
encoder.end_group string_type
|
277
279
|
end
|
278
280
|
|
279
|
-
|
281
|
+
encoder
|
280
282
|
end
|
281
283
|
|
282
284
|
end
|