coderay 0.8.357 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/README +4 -3
- data/lib/coderay.rb +2 -1
- data/lib/coderay/encoder.rb +41 -15
- data/lib/coderay/encoders/_map.rb +3 -1
- data/lib/coderay/encoders/comment_filter.rb +43 -0
- data/lib/coderay/encoders/div.rb +2 -3
- data/lib/coderay/encoders/filter.rb +75 -0
- data/lib/coderay/encoders/html.rb +20 -3
- data/lib/coderay/encoders/html/css.rb +1 -1
- data/lib/coderay/encoders/html/numerization.rb +11 -2
- data/lib/coderay/encoders/html/output.rb +10 -1
- data/lib/coderay/encoders/json.rb +69 -0
- data/lib/coderay/encoders/lines_of_code.rb +90 -0
- data/lib/coderay/encoders/page.rb +1 -2
- data/lib/coderay/encoders/span.rb +2 -3
- data/lib/coderay/encoders/term.rb +137 -0
- data/lib/coderay/encoders/text.rb +4 -4
- data/lib/coderay/encoders/token_class_filter.rb +84 -0
- data/lib/coderay/encoders/xml.rb +1 -0
- data/lib/coderay/for_redcloth.rb +9 -4
- data/lib/coderay/helpers/file_type.rb +54 -15
- data/lib/coderay/helpers/plugin.rb +21 -3
- data/lib/coderay/helpers/word_list.rb +19 -4
- data/lib/coderay/scanner.rb +33 -2
- data/lib/coderay/scanners/_map.rb +10 -4
- data/lib/coderay/scanners/c.rb +61 -23
- data/lib/coderay/scanners/cpp.rb +228 -0
- data/lib/coderay/scanners/css.rb +9 -1
- data/lib/coderay/scanners/debug.rb +1 -0
- data/lib/coderay/scanners/delphi.rb +2 -2
- data/lib/coderay/scanners/diff.rb +1 -0
- data/lib/coderay/scanners/groovy.rb +263 -0
- data/lib/coderay/scanners/html.rb +9 -2
- data/lib/coderay/scanners/java.rb +18 -14
- data/lib/coderay/scanners/java_script.rb +42 -13
- data/lib/coderay/scanners/json.rb +7 -1
- data/lib/coderay/scanners/nitro_xhtml.rb +4 -0
- data/lib/coderay/scanners/php.rb +526 -0
- data/lib/coderay/scanners/plaintext.rb +4 -1
- data/lib/coderay/scanners/python.rb +285 -0
- data/lib/coderay/scanners/rhtml.rb +3 -0
- data/lib/coderay/scanners/ruby.rb +29 -11
- data/lib/coderay/scanners/ruby/patterns.rb +26 -20
- data/lib/coderay/scanners/scheme.rb +3 -0
- data/lib/coderay/scanners/sql.rb +162 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +4 -1
- data/lib/coderay/styles/cycnus.rb +11 -7
- data/lib/coderay/token_classes.rb +4 -1
- data/lib/coderay/tokens.rb +50 -46
- metadata +14 -4
- data/lib/coderay/encoders/tokens.rb +0 -44
@@ -4,9 +4,12 @@ module Scanners
|
|
4
4
|
class Plaintext < Scanner
|
5
5
|
|
6
6
|
register_for :plaintext, :plain
|
7
|
+
title 'Plain text'
|
7
8
|
|
8
9
|
include Streamable
|
9
|
-
|
10
|
+
|
11
|
+
KINDS_NOT_LOC = [:plain]
|
12
|
+
|
10
13
|
def scan_tokens tokens, options
|
11
14
|
text = (scan_until(/\z/) || '')
|
12
15
|
tokens << [text, :plain]
|
@@ -0,0 +1,285 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
# Bases on pygments' PythonLexer, see
|
5
|
+
# http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
|
6
|
+
class Python < Scanner
|
7
|
+
|
8
|
+
include Streamable
|
9
|
+
|
10
|
+
register_for :python
|
11
|
+
file_extension 'py'
|
12
|
+
|
13
|
+
KEYWORDS = [
|
14
|
+
'and', 'as', 'assert', 'break', 'class', 'continue', 'def',
|
15
|
+
'del', 'elif', 'else', 'except', 'finally', 'for',
|
16
|
+
'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
|
17
|
+
'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
|
18
|
+
'nonlocal', # new in Python 3
|
19
|
+
]
|
20
|
+
|
21
|
+
OLD_KEYWORDS = [
|
22
|
+
'exec', 'print', # gone in Python 3
|
23
|
+
]
|
24
|
+
|
25
|
+
PREDEFINED_METHODS_AND_TYPES = %w[
|
26
|
+
__import__ abs all any apply basestring bin bool buffer
|
27
|
+
bytearray bytes callable chr classmethod cmp coerce compile
|
28
|
+
complex delattr dict dir divmod enumerate eval execfile exit
|
29
|
+
file filter float frozenset getattr globals hasattr hash hex id
|
30
|
+
input int intern isinstance issubclass iter len list locals
|
31
|
+
long map max min next object oct open ord pow property range
|
32
|
+
raw_input reduce reload repr reversed round set setattr slice
|
33
|
+
sorted staticmethod str sum super tuple type unichr unicode
|
34
|
+
vars xrange zip
|
35
|
+
]
|
36
|
+
|
37
|
+
PREDEFINED_EXCEPTIONS = %w[
|
38
|
+
ArithmeticError AssertionError AttributeError
|
39
|
+
BaseException DeprecationWarning EOFError EnvironmentError
|
40
|
+
Exception FloatingPointError FutureWarning GeneratorExit IOError
|
41
|
+
ImportError ImportWarning IndentationError IndexError KeyError
|
42
|
+
KeyboardInterrupt LookupError MemoryError NameError
|
43
|
+
NotImplemented NotImplementedError OSError OverflowError
|
44
|
+
OverflowWarning PendingDeprecationWarning ReferenceError
|
45
|
+
RuntimeError RuntimeWarning StandardError StopIteration
|
46
|
+
SyntaxError SyntaxWarning SystemError SystemExit TabError
|
47
|
+
TypeError UnboundLocalError UnicodeDecodeError
|
48
|
+
UnicodeEncodeError UnicodeError UnicodeTranslateError
|
49
|
+
UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
|
50
|
+
]
|
51
|
+
|
52
|
+
PREDEFINED_VARIABLES_AND_CONSTANTS = [
|
53
|
+
'False', 'True', 'None', # "keywords" since Python 3
|
54
|
+
'self', 'Ellipsis', 'NotImplemented',
|
55
|
+
]
|
56
|
+
|
57
|
+
IDENT_KIND = WordList.new(:ident).
|
58
|
+
add(KEYWORDS, :keyword).
|
59
|
+
add(OLD_KEYWORDS, :old_keyword).
|
60
|
+
add(PREDEFINED_METHODS_AND_TYPES, :predefined).
|
61
|
+
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant).
|
62
|
+
add(PREDEFINED_EXCEPTIONS, :exception)
|
63
|
+
|
64
|
+
NAME = / [^\W\d] \w* /x
|
65
|
+
ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
66
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x
|
67
|
+
|
68
|
+
OPERATOR = /
|
69
|
+
\.\.\. | # ellipsis
|
70
|
+
\.(?!\d) | # dot but not decimal point
|
71
|
+
[,;:()\[\]{}] | # simple delimiters
|
72
|
+
\/\/=? | \*\*=? | # special math
|
73
|
+
[-+*\/%&|^]=? | # ordinary math and binary logic
|
74
|
+
[~`] | # binary complement and inspection
|
75
|
+
<<=? | >>=? | [<>=]=? | != # comparison and assignment
|
76
|
+
/x
|
77
|
+
|
78
|
+
STRING_DELIMITER_REGEXP = Hash.new do |h, delimiter|
|
79
|
+
h[delimiter] = Regexp.union delimiter
|
80
|
+
end
|
81
|
+
|
82
|
+
STRING_CONTENT_REGEXP = Hash.new do |h, delimiter|
|
83
|
+
h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x
|
84
|
+
end
|
85
|
+
|
86
|
+
DEF_NEW_STATE = WordList.new(:initial).
|
87
|
+
add(%w(def), :def_expected).
|
88
|
+
add(%w(import from), :include_expected).
|
89
|
+
add(%w(class), :class_expected)
|
90
|
+
|
91
|
+
DESCRIPTOR = /
|
92
|
+
#{NAME}
|
93
|
+
(?: \. #{NAME} )*
|
94
|
+
| \*
|
95
|
+
/x
|
96
|
+
|
97
|
+
def scan_tokens tokens, options
|
98
|
+
|
99
|
+
state = :initial
|
100
|
+
string_delimiter = nil
|
101
|
+
string_raw = false
|
102
|
+
import_clause = class_name_follows = last_token_dot = false
|
103
|
+
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
|
104
|
+
from_import_state = []
|
105
|
+
|
106
|
+
until eos?
|
107
|
+
|
108
|
+
kind = nil
|
109
|
+
match = nil
|
110
|
+
|
111
|
+
if state == :string
|
112
|
+
if scan(STRING_DELIMITER_REGEXP[string_delimiter])
|
113
|
+
tokens << [matched, :delimiter]
|
114
|
+
tokens << [:close, :string]
|
115
|
+
state = :initial
|
116
|
+
next
|
117
|
+
elsif string_delimiter.size == 3 && scan(/\n/)
|
118
|
+
kind = :content
|
119
|
+
elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
|
120
|
+
kind = :content
|
121
|
+
elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
|
122
|
+
kind = :char
|
123
|
+
elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
|
124
|
+
kind = :char
|
125
|
+
elsif scan(/ \\ . /x)
|
126
|
+
kind = :content
|
127
|
+
elsif scan(/ \\ | $ /x)
|
128
|
+
tokens << [:close, :string]
|
129
|
+
kind = :error
|
130
|
+
state = :initial
|
131
|
+
else
|
132
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state
|
133
|
+
end
|
134
|
+
|
135
|
+
elsif match = scan(/ [ \t]+ | \\\n /x)
|
136
|
+
tokens << [match, :space]
|
137
|
+
next
|
138
|
+
|
139
|
+
elsif match = scan(/\n/)
|
140
|
+
tokens << [match, :space]
|
141
|
+
state = :initial if state == :include_expected
|
142
|
+
next
|
143
|
+
|
144
|
+
elsif match = scan(/ \# [^\n]* /mx)
|
145
|
+
tokens << [match, :comment]
|
146
|
+
next
|
147
|
+
|
148
|
+
elsif state == :initial
|
149
|
+
|
150
|
+
if scan(/#{OPERATOR}/o)
|
151
|
+
kind = :operator
|
152
|
+
|
153
|
+
elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
|
154
|
+
tokens << [:open, :string]
|
155
|
+
string_delimiter = self[2]
|
156
|
+
string_raw = false
|
157
|
+
modifiers = self[1]
|
158
|
+
unless modifiers.empty?
|
159
|
+
string_raw = !!modifiers.index(?r)
|
160
|
+
tokens << [modifiers, :modifier]
|
161
|
+
match = string_delimiter
|
162
|
+
end
|
163
|
+
state = :string
|
164
|
+
kind = :delimiter
|
165
|
+
|
166
|
+
# TODO: backticks
|
167
|
+
|
168
|
+
elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
169
|
+
kind = IDENT_KIND[match]
|
170
|
+
# TODO: keyword arguments
|
171
|
+
kind = :ident if last_token_dot
|
172
|
+
if kind == :old_keyword
|
173
|
+
kind = check(/\(/) ? :ident : :keyword
|
174
|
+
elsif kind == :predefined && check(/ *=/)
|
175
|
+
kind = :ident
|
176
|
+
elsif kind == :keyword
|
177
|
+
state = DEF_NEW_STATE[match]
|
178
|
+
from_import_state << match.to_sym if state == :include_expected
|
179
|
+
end
|
180
|
+
|
181
|
+
elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
|
182
|
+
kind = :decorator
|
183
|
+
|
184
|
+
elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/)
|
185
|
+
kind = :hex
|
186
|
+
|
187
|
+
elsif scan(/0[bB][01]+[lL]?/)
|
188
|
+
kind = :bin
|
189
|
+
|
190
|
+
elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
|
191
|
+
kind = :float
|
192
|
+
if scan(/[jJ]/)
|
193
|
+
match << matched
|
194
|
+
kind = :imaginary
|
195
|
+
end
|
196
|
+
|
197
|
+
elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
|
198
|
+
kind = :oct
|
199
|
+
|
200
|
+
elsif match = scan(/\d+([lL])?/)
|
201
|
+
kind = :integer
|
202
|
+
if self[1] == nil && scan(/[jJ]/)
|
203
|
+
match << matched
|
204
|
+
kind = :imaginary
|
205
|
+
end
|
206
|
+
|
207
|
+
else
|
208
|
+
getch
|
209
|
+
kind = :error
|
210
|
+
|
211
|
+
end
|
212
|
+
|
213
|
+
elsif state == :def_expected
|
214
|
+
state = :initial
|
215
|
+
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
216
|
+
kind = :method
|
217
|
+
else
|
218
|
+
next
|
219
|
+
end
|
220
|
+
|
221
|
+
elsif state == :class_expected
|
222
|
+
state = :initial
|
223
|
+
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
224
|
+
kind = :class
|
225
|
+
else
|
226
|
+
next
|
227
|
+
end
|
228
|
+
|
229
|
+
elsif state == :include_expected
|
230
|
+
if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
|
231
|
+
kind = :include
|
232
|
+
if match == 'as'
|
233
|
+
kind = :keyword
|
234
|
+
from_import_state << :as
|
235
|
+
elsif from_import_state.first == :from && match == 'import'
|
236
|
+
kind = :keyword
|
237
|
+
from_import_state << :import
|
238
|
+
elsif from_import_state.last == :as
|
239
|
+
# kind = match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
|
240
|
+
kind = :ident
|
241
|
+
from_import_state.pop
|
242
|
+
elsif IDENT_KIND[match] == :keyword
|
243
|
+
unscan
|
244
|
+
match = nil
|
245
|
+
state = :initial
|
246
|
+
next
|
247
|
+
end
|
248
|
+
elsif match = scan(/,/)
|
249
|
+
from_import_state.pop if from_import_state.last == :as
|
250
|
+
kind = :operator
|
251
|
+
else
|
252
|
+
from_import_state = []
|
253
|
+
state = :initial
|
254
|
+
next
|
255
|
+
end
|
256
|
+
|
257
|
+
else
|
258
|
+
raise_inspect 'Unknown state', tokens, state
|
259
|
+
|
260
|
+
end
|
261
|
+
|
262
|
+
match ||= matched
|
263
|
+
if $DEBUG and not kind
|
264
|
+
raise_inspect 'Error token %p in line %d' %
|
265
|
+
[[match, kind], line], tokens, state
|
266
|
+
end
|
267
|
+
raise_inspect 'Empty token', tokens, state unless match
|
268
|
+
|
269
|
+
last_token_dot = match == '.'
|
270
|
+
|
271
|
+
tokens << [match, kind]
|
272
|
+
|
273
|
+
end
|
274
|
+
|
275
|
+
if state == :string
|
276
|
+
tokens << [:close, :string]
|
277
|
+
end
|
278
|
+
|
279
|
+
tokens
|
280
|
+
end
|
281
|
+
|
282
|
+
end
|
283
|
+
|
284
|
+
end
|
285
|
+
end
|
@@ -21,6 +21,10 @@ module Scanners
|
|
21
21
|
file_extension 'rb'
|
22
22
|
|
23
23
|
helper :patterns
|
24
|
+
|
25
|
+
if not defined? EncodingError
|
26
|
+
EncodingError = Class.new Exception
|
27
|
+
end
|
24
28
|
|
25
29
|
private
|
26
30
|
def scan_tokens tokens, options
|
@@ -31,9 +35,10 @@ module Scanners
|
|
31
35
|
state = :initial
|
32
36
|
depth = nil
|
33
37
|
inline_block_stack = []
|
34
|
-
|
38
|
+
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
|
39
|
+
|
35
40
|
patterns = Patterns # avoid constant lookup
|
36
|
-
|
41
|
+
|
37
42
|
until eos?
|
38
43
|
match = nil
|
39
44
|
kind = nil
|
@@ -125,14 +130,14 @@ module Scanners
|
|
125
130
|
if match = scan(/[ \t\f]+/)
|
126
131
|
kind = :space
|
127
132
|
match << scan(/\s*/) unless eos? || heredocs
|
128
|
-
value_expected = true if match.index(?\n)
|
133
|
+
value_expected = true if match.index(?\n)
|
129
134
|
tokens << [match, kind]
|
130
135
|
next
|
131
136
|
|
132
137
|
elsif match = scan(/\\?\n/)
|
133
138
|
kind = :space
|
134
139
|
if match == "\n"
|
135
|
-
value_expected = true
|
140
|
+
value_expected = true
|
136
141
|
state = :initial if state == :undef_comma_expected
|
137
142
|
end
|
138
143
|
if heredocs
|
@@ -154,14 +159,14 @@ module Scanners
|
|
154
159
|
elsif match = scan(/\#.*/) or
|
155
160
|
( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
|
156
161
|
kind = :comment
|
157
|
-
value_expected = true
|
158
162
|
tokens << [match, kind]
|
159
163
|
next
|
160
164
|
|
161
165
|
elsif state == :initial
|
162
166
|
|
163
167
|
# IDENTS #
|
164
|
-
if match = scan(/#{patterns::METHOD_NAME}/
|
168
|
+
if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
|
169
|
+
/#{patterns::METHOD_NAME}/o)
|
165
170
|
if last_token_dot
|
166
171
|
kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
|
167
172
|
else
|
@@ -170,9 +175,9 @@ module Scanners
|
|
170
175
|
kind = :constant
|
171
176
|
elsif kind == :reserved
|
172
177
|
state = patterns::DEF_NEW_STATE[match]
|
178
|
+
value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
|
173
179
|
end
|
174
180
|
end
|
175
|
-
## experimental!
|
176
181
|
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
|
177
182
|
|
178
183
|
elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
|
@@ -180,7 +185,6 @@ module Scanners
|
|
180
185
|
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
|
181
186
|
|
182
187
|
# OPERATORS #
|
183
|
-
# TODO: match (), [], {} as one single operator
|
184
188
|
elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
|
185
189
|
if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
|
186
190
|
value_expected = :set
|
@@ -281,13 +285,24 @@ module Scanners
|
|
281
285
|
|
282
286
|
else
|
283
287
|
kind = :error
|
284
|
-
match = getch
|
288
|
+
match = (scan(/./mu) rescue nil) || getch
|
289
|
+
if !unicode && match.size > 1
|
290
|
+
# warn 'Switching to unicode mode because of char %p' % [match]
|
291
|
+
unicode = true
|
292
|
+
unscan
|
293
|
+
next
|
294
|
+
end
|
285
295
|
|
286
296
|
end
|
287
297
|
|
288
298
|
elsif state == :def_expected
|
289
299
|
state = :initial
|
290
|
-
if
|
300
|
+
if scan(/self\./)
|
301
|
+
tokens << ['self', :pre_constant]
|
302
|
+
tokens << ['.', :operator]
|
303
|
+
end
|
304
|
+
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
|
305
|
+
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
|
291
306
|
kind = :method
|
292
307
|
else
|
293
308
|
next
|
@@ -327,7 +342,10 @@ module Scanners
|
|
327
342
|
end
|
328
343
|
|
329
344
|
elsif state == :alias_expected
|
330
|
-
|
345
|
+
match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
|
346
|
+
/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
|
347
|
+
|
348
|
+
if match
|
331
349
|
tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
|
332
350
|
tokens << [self[2], :space]
|
333
351
|
tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# encoding: utf-8
|
1
2
|
module CodeRay
|
2
3
|
module Scanners
|
3
4
|
|
@@ -31,18 +32,18 @@ module Scanners
|
|
31
32
|
add(RESERVED_WORDS, :reserved).
|
32
33
|
add(PREDEFINED_CONSTANTS, :pre_constant)
|
33
34
|
|
34
|
-
IDENT = /[
|
35
|
+
IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/
|
35
36
|
|
36
37
|
METHOD_NAME = / #{IDENT} [?!]? /ox
|
37
38
|
METHOD_NAME_OPERATOR = /
|
38
39
|
\*\*? # multiplication and power
|
39
|
-
| [-+~]@? # plus, minus, tilde with and without
|
40
|
-
| [\/%&|^`] # division, modulo or format strings,
|
40
|
+
| [-+~]@? # plus, minus, tilde with and without at sign
|
41
|
+
| [\/%&|^`] # division, modulo or format strings, and, or, xor, system
|
41
42
|
| \[\]=? # array getter and setter
|
42
43
|
| << | >> # append or shift left, shift right
|
43
44
|
| <=?>? | >=? # comparison, rocket operator
|
44
45
|
| ===? | =~ # simple equality, case equality, match
|
45
|
-
| ![~=@]? # negation with and without
|
46
|
+
| ![~=@]? # negation with and without at sign, not-equal and not-match
|
46
47
|
/ox
|
47
48
|
METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
|
48
49
|
INSTANCE_VARIABLE = / @ #{IDENT} /ox
|
@@ -59,7 +60,7 @@ module Scanners
|
|
59
60
|
QUOTE_TO_TYPE.default = :string
|
60
61
|
|
61
62
|
REGEXP_MODIFIERS = /[mixounse]*/
|
62
|
-
REGEXP_SYMBOLS = /[
|
63
|
+
REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/
|
63
64
|
|
64
65
|
DECIMAL = /\d+(?:_\d+)*/
|
65
66
|
OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
|
@@ -141,14 +142,19 @@ module Scanners
|
|
141
142
|
| #{CHARACTER}
|
142
143
|
)
|
143
144
|
/x
|
145
|
+
KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[
|
146
|
+
and end in or unless begin
|
147
|
+
defined? ensure redo super until
|
148
|
+
break do next rescue then
|
149
|
+
when case else for retry
|
150
|
+
while elsif if not return
|
151
|
+
yield
|
152
|
+
])
|
144
153
|
|
145
154
|
RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
|
146
155
|
|
147
156
|
RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
|
148
157
|
|
149
|
-
# FIXME: \s and = are only a workaround, they are still allowed
|
150
|
-
# as delimiters.
|
151
|
-
FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx
|
152
158
|
FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
|
153
159
|
|
154
160
|
FancyStringType = {
|
@@ -174,15 +180,15 @@ module Scanners
|
|
174
180
|
CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with <<
|
175
181
|
OPENING_PAREN = CLOSING_PAREN.invert
|
176
182
|
|
177
|
-
STRING_PATTERN = Hash.new
|
183
|
+
STRING_PATTERN = Hash.new do |h, k|
|
178
184
|
delim, interpreted = *k
|
179
|
-
delim_pattern = Regexp.escape(delim)
|
185
|
+
delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
|
180
186
|
if closing_paren = CLOSING_PAREN[delim]
|
181
187
|
delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix
|
182
188
|
delim_pattern << Regexp.escape(closing_paren)
|
183
189
|
end
|
184
|
-
|
185
|
-
|
190
|
+
delim_pattern << '\\\\' unless delim == '\\'
|
191
|
+
|
186
192
|
special_escapes =
|
187
193
|
case interpreted
|
188
194
|
when :regexp_symbols
|
@@ -190,18 +196,18 @@ module Scanners
|
|
190
196
|
when :words
|
191
197
|
'| \s'
|
192
198
|
end
|
193
|
-
|
199
|
+
|
194
200
|
h[k] =
|
195
201
|
if interpreted and not delim == '#'
|
196
|
-
/ (?= [#{delim_pattern}
|
202
|
+
/ (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
|
197
203
|
else
|
198
|
-
/ (?= [#{delim_pattern}
|
204
|
+
/ (?= [#{delim_pattern}] #{special_escapes} ) /mx
|
199
205
|
end
|
200
|
-
|
206
|
+
end
|
201
207
|
|
202
|
-
HEREDOC_PATTERN = Hash.new
|
208
|
+
HEREDOC_PATTERN = Hash.new do |h, k|
|
203
209
|
delim, interpreted, indented = *k
|
204
|
-
delim_pattern = Regexp.escape(delim.dup)
|
210
|
+
delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
|
205
211
|
delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
|
206
212
|
h[k] =
|
207
213
|
if interpreted
|
@@ -209,12 +215,12 @@ module Scanners
|
|
209
215
|
else
|
210
216
|
/ (?= #{delim_pattern}() | \\ ) /mx
|
211
217
|
end
|
212
|
-
|
218
|
+
end
|
213
219
|
|
214
220
|
def initialize kind, interpreted, delim, heredoc = false
|
215
221
|
if heredoc
|
216
222
|
pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
|
217
|
-
delim
|
223
|
+
delim = nil
|
218
224
|
else
|
219
225
|
pattern = STRING_PATTERN[ [delim, interpreted] ]
|
220
226
|
if paren = CLOSING_PAREN[delim]
|