coderay 0.8.357 → 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/README +4 -3
- data/lib/coderay.rb +2 -1
- data/lib/coderay/encoder.rb +41 -15
- data/lib/coderay/encoders/_map.rb +3 -1
- data/lib/coderay/encoders/comment_filter.rb +43 -0
- data/lib/coderay/encoders/div.rb +2 -3
- data/lib/coderay/encoders/filter.rb +75 -0
- data/lib/coderay/encoders/html.rb +20 -3
- data/lib/coderay/encoders/html/css.rb +1 -1
- data/lib/coderay/encoders/html/numerization.rb +11 -2
- data/lib/coderay/encoders/html/output.rb +10 -1
- data/lib/coderay/encoders/json.rb +69 -0
- data/lib/coderay/encoders/lines_of_code.rb +90 -0
- data/lib/coderay/encoders/page.rb +1 -2
- data/lib/coderay/encoders/span.rb +2 -3
- data/lib/coderay/encoders/term.rb +137 -0
- data/lib/coderay/encoders/text.rb +4 -4
- data/lib/coderay/encoders/token_class_filter.rb +84 -0
- data/lib/coderay/encoders/xml.rb +1 -0
- data/lib/coderay/for_redcloth.rb +9 -4
- data/lib/coderay/helpers/file_type.rb +54 -15
- data/lib/coderay/helpers/plugin.rb +21 -3
- data/lib/coderay/helpers/word_list.rb +19 -4
- data/lib/coderay/scanner.rb +33 -2
- data/lib/coderay/scanners/_map.rb +10 -4
- data/lib/coderay/scanners/c.rb +61 -23
- data/lib/coderay/scanners/cpp.rb +228 -0
- data/lib/coderay/scanners/css.rb +9 -1
- data/lib/coderay/scanners/debug.rb +1 -0
- data/lib/coderay/scanners/delphi.rb +2 -2
- data/lib/coderay/scanners/diff.rb +1 -0
- data/lib/coderay/scanners/groovy.rb +263 -0
- data/lib/coderay/scanners/html.rb +9 -2
- data/lib/coderay/scanners/java.rb +18 -14
- data/lib/coderay/scanners/java_script.rb +42 -13
- data/lib/coderay/scanners/json.rb +7 -1
- data/lib/coderay/scanners/nitro_xhtml.rb +4 -0
- data/lib/coderay/scanners/php.rb +526 -0
- data/lib/coderay/scanners/plaintext.rb +4 -1
- data/lib/coderay/scanners/python.rb +285 -0
- data/lib/coderay/scanners/rhtml.rb +3 -0
- data/lib/coderay/scanners/ruby.rb +29 -11
- data/lib/coderay/scanners/ruby/patterns.rb +26 -20
- data/lib/coderay/scanners/scheme.rb +3 -0
- data/lib/coderay/scanners/sql.rb +162 -0
- data/lib/coderay/scanners/xml.rb +1 -1
- data/lib/coderay/scanners/yaml.rb +4 -1
- data/lib/coderay/styles/cycnus.rb +11 -7
- data/lib/coderay/token_classes.rb +4 -1
- data/lib/coderay/tokens.rb +50 -46
- metadata +14 -4
- data/lib/coderay/encoders/tokens.rb +0 -44
@@ -4,9 +4,12 @@ module Scanners
|
|
4
4
|
class Plaintext < Scanner
|
5
5
|
|
6
6
|
register_for :plaintext, :plain
|
7
|
+
title 'Plain text'
|
7
8
|
|
8
9
|
include Streamable
|
9
|
-
|
10
|
+
|
11
|
+
KINDS_NOT_LOC = [:plain]
|
12
|
+
|
10
13
|
def scan_tokens tokens, options
|
11
14
|
text = (scan_until(/\z/) || '')
|
12
15
|
tokens << [text, :plain]
|
@@ -0,0 +1,285 @@
|
|
1
|
+
module CodeRay
|
2
|
+
module Scanners
|
3
|
+
|
4
|
+
# Bases on pygments' PythonLexer, see
|
5
|
+
# http://dev.pocoo.org/projects/pygments/browser/pygments/lexers/agile.py.
|
6
|
+
class Python < Scanner
|
7
|
+
|
8
|
+
include Streamable
|
9
|
+
|
10
|
+
register_for :python
|
11
|
+
file_extension 'py'
|
12
|
+
|
13
|
+
KEYWORDS = [
|
14
|
+
'and', 'as', 'assert', 'break', 'class', 'continue', 'def',
|
15
|
+
'del', 'elif', 'else', 'except', 'finally', 'for',
|
16
|
+
'from', 'global', 'if', 'import', 'in', 'is', 'lambda', 'not',
|
17
|
+
'or', 'pass', 'raise', 'return', 'try', 'while', 'with', 'yield',
|
18
|
+
'nonlocal', # new in Python 3
|
19
|
+
]
|
20
|
+
|
21
|
+
OLD_KEYWORDS = [
|
22
|
+
'exec', 'print', # gone in Python 3
|
23
|
+
]
|
24
|
+
|
25
|
+
PREDEFINED_METHODS_AND_TYPES = %w[
|
26
|
+
__import__ abs all any apply basestring bin bool buffer
|
27
|
+
bytearray bytes callable chr classmethod cmp coerce compile
|
28
|
+
complex delattr dict dir divmod enumerate eval execfile exit
|
29
|
+
file filter float frozenset getattr globals hasattr hash hex id
|
30
|
+
input int intern isinstance issubclass iter len list locals
|
31
|
+
long map max min next object oct open ord pow property range
|
32
|
+
raw_input reduce reload repr reversed round set setattr slice
|
33
|
+
sorted staticmethod str sum super tuple type unichr unicode
|
34
|
+
vars xrange zip
|
35
|
+
]
|
36
|
+
|
37
|
+
PREDEFINED_EXCEPTIONS = %w[
|
38
|
+
ArithmeticError AssertionError AttributeError
|
39
|
+
BaseException DeprecationWarning EOFError EnvironmentError
|
40
|
+
Exception FloatingPointError FutureWarning GeneratorExit IOError
|
41
|
+
ImportError ImportWarning IndentationError IndexError KeyError
|
42
|
+
KeyboardInterrupt LookupError MemoryError NameError
|
43
|
+
NotImplemented NotImplementedError OSError OverflowError
|
44
|
+
OverflowWarning PendingDeprecationWarning ReferenceError
|
45
|
+
RuntimeError RuntimeWarning StandardError StopIteration
|
46
|
+
SyntaxError SyntaxWarning SystemError SystemExit TabError
|
47
|
+
TypeError UnboundLocalError UnicodeDecodeError
|
48
|
+
UnicodeEncodeError UnicodeError UnicodeTranslateError
|
49
|
+
UnicodeWarning UserWarning ValueError Warning ZeroDivisionError
|
50
|
+
]
|
51
|
+
|
52
|
+
PREDEFINED_VARIABLES_AND_CONSTANTS = [
|
53
|
+
'False', 'True', 'None', # "keywords" since Python 3
|
54
|
+
'self', 'Ellipsis', 'NotImplemented',
|
55
|
+
]
|
56
|
+
|
57
|
+
IDENT_KIND = WordList.new(:ident).
|
58
|
+
add(KEYWORDS, :keyword).
|
59
|
+
add(OLD_KEYWORDS, :old_keyword).
|
60
|
+
add(PREDEFINED_METHODS_AND_TYPES, :predefined).
|
61
|
+
add(PREDEFINED_VARIABLES_AND_CONSTANTS, :pre_constant).
|
62
|
+
add(PREDEFINED_EXCEPTIONS, :exception)
|
63
|
+
|
64
|
+
NAME = / [^\W\d] \w* /x
|
65
|
+
ESCAPE = / [abfnrtv\n\\'"] | x[a-fA-F0-9]{1,2} | [0-7]{1,3} /x
|
66
|
+
UNICODE_ESCAPE = / u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} | N\{[-\w ]+\} /x
|
67
|
+
|
68
|
+
OPERATOR = /
|
69
|
+
\.\.\. | # ellipsis
|
70
|
+
\.(?!\d) | # dot but not decimal point
|
71
|
+
[,;:()\[\]{}] | # simple delimiters
|
72
|
+
\/\/=? | \*\*=? | # special math
|
73
|
+
[-+*\/%&|^]=? | # ordinary math and binary logic
|
74
|
+
[~`] | # binary complement and inspection
|
75
|
+
<<=? | >>=? | [<>=]=? | != # comparison and assignment
|
76
|
+
/x
|
77
|
+
|
78
|
+
STRING_DELIMITER_REGEXP = Hash.new do |h, delimiter|
|
79
|
+
h[delimiter] = Regexp.union delimiter
|
80
|
+
end
|
81
|
+
|
82
|
+
STRING_CONTENT_REGEXP = Hash.new do |h, delimiter|
|
83
|
+
h[delimiter] = / [^\\\n]+? (?= \\ | $ | #{Regexp.escape(delimiter)} ) /x
|
84
|
+
end
|
85
|
+
|
86
|
+
DEF_NEW_STATE = WordList.new(:initial).
|
87
|
+
add(%w(def), :def_expected).
|
88
|
+
add(%w(import from), :include_expected).
|
89
|
+
add(%w(class), :class_expected)
|
90
|
+
|
91
|
+
DESCRIPTOR = /
|
92
|
+
#{NAME}
|
93
|
+
(?: \. #{NAME} )*
|
94
|
+
| \*
|
95
|
+
/x
|
96
|
+
|
97
|
+
def scan_tokens tokens, options
|
98
|
+
|
99
|
+
state = :initial
|
100
|
+
string_delimiter = nil
|
101
|
+
string_raw = false
|
102
|
+
import_clause = class_name_follows = last_token_dot = false
|
103
|
+
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
|
104
|
+
from_import_state = []
|
105
|
+
|
106
|
+
until eos?
|
107
|
+
|
108
|
+
kind = nil
|
109
|
+
match = nil
|
110
|
+
|
111
|
+
if state == :string
|
112
|
+
if scan(STRING_DELIMITER_REGEXP[string_delimiter])
|
113
|
+
tokens << [matched, :delimiter]
|
114
|
+
tokens << [:close, :string]
|
115
|
+
state = :initial
|
116
|
+
next
|
117
|
+
elsif string_delimiter.size == 3 && scan(/\n/)
|
118
|
+
kind = :content
|
119
|
+
elsif scan(STRING_CONTENT_REGEXP[string_delimiter])
|
120
|
+
kind = :content
|
121
|
+
elsif !string_raw && scan(/ \\ #{ESCAPE} /ox)
|
122
|
+
kind = :char
|
123
|
+
elsif scan(/ \\ #{UNICODE_ESCAPE} /ox)
|
124
|
+
kind = :char
|
125
|
+
elsif scan(/ \\ . /x)
|
126
|
+
kind = :content
|
127
|
+
elsif scan(/ \\ | $ /x)
|
128
|
+
tokens << [:close, :string]
|
129
|
+
kind = :error
|
130
|
+
state = :initial
|
131
|
+
else
|
132
|
+
raise_inspect "else case \" reached; %p not handled." % peek(1), tokens, state
|
133
|
+
end
|
134
|
+
|
135
|
+
elsif match = scan(/ [ \t]+ | \\\n /x)
|
136
|
+
tokens << [match, :space]
|
137
|
+
next
|
138
|
+
|
139
|
+
elsif match = scan(/\n/)
|
140
|
+
tokens << [match, :space]
|
141
|
+
state = :initial if state == :include_expected
|
142
|
+
next
|
143
|
+
|
144
|
+
elsif match = scan(/ \# [^\n]* /mx)
|
145
|
+
tokens << [match, :comment]
|
146
|
+
next
|
147
|
+
|
148
|
+
elsif state == :initial
|
149
|
+
|
150
|
+
if scan(/#{OPERATOR}/o)
|
151
|
+
kind = :operator
|
152
|
+
|
153
|
+
elsif match = scan(/(u?r?|b)?("""|"|'''|')/i)
|
154
|
+
tokens << [:open, :string]
|
155
|
+
string_delimiter = self[2]
|
156
|
+
string_raw = false
|
157
|
+
modifiers = self[1]
|
158
|
+
unless modifiers.empty?
|
159
|
+
string_raw = !!modifiers.index(?r)
|
160
|
+
tokens << [modifiers, :modifier]
|
161
|
+
match = string_delimiter
|
162
|
+
end
|
163
|
+
state = :string
|
164
|
+
kind = :delimiter
|
165
|
+
|
166
|
+
# TODO: backticks
|
167
|
+
|
168
|
+
elsif match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
169
|
+
kind = IDENT_KIND[match]
|
170
|
+
# TODO: keyword arguments
|
171
|
+
kind = :ident if last_token_dot
|
172
|
+
if kind == :old_keyword
|
173
|
+
kind = check(/\(/) ? :ident : :keyword
|
174
|
+
elsif kind == :predefined && check(/ *=/)
|
175
|
+
kind = :ident
|
176
|
+
elsif kind == :keyword
|
177
|
+
state = DEF_NEW_STATE[match]
|
178
|
+
from_import_state << match.to_sym if state == :include_expected
|
179
|
+
end
|
180
|
+
|
181
|
+
elsif scan(/@[a-zA-Z0-9_.]+[lL]?/)
|
182
|
+
kind = :decorator
|
183
|
+
|
184
|
+
elsif scan(/0[xX][0-9A-Fa-f]+[lL]?/)
|
185
|
+
kind = :hex
|
186
|
+
|
187
|
+
elsif scan(/0[bB][01]+[lL]?/)
|
188
|
+
kind = :bin
|
189
|
+
|
190
|
+
elsif match = scan(/(?:\d*\.\d+|\d+\.\d*)(?:[eE][+-]?\d+)?|\d+[eE][+-]?\d+/)
|
191
|
+
kind = :float
|
192
|
+
if scan(/[jJ]/)
|
193
|
+
match << matched
|
194
|
+
kind = :imaginary
|
195
|
+
end
|
196
|
+
|
197
|
+
elsif scan(/0[oO][0-7]+|0[0-7]+(?![89.eE])[lL]?/)
|
198
|
+
kind = :oct
|
199
|
+
|
200
|
+
elsif match = scan(/\d+([lL])?/)
|
201
|
+
kind = :integer
|
202
|
+
if self[1] == nil && scan(/[jJ]/)
|
203
|
+
match << matched
|
204
|
+
kind = :imaginary
|
205
|
+
end
|
206
|
+
|
207
|
+
else
|
208
|
+
getch
|
209
|
+
kind = :error
|
210
|
+
|
211
|
+
end
|
212
|
+
|
213
|
+
elsif state == :def_expected
|
214
|
+
state = :initial
|
215
|
+
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
216
|
+
kind = :method
|
217
|
+
else
|
218
|
+
next
|
219
|
+
end
|
220
|
+
|
221
|
+
elsif state == :class_expected
|
222
|
+
state = :initial
|
223
|
+
if match = scan(unicode ? /#{NAME}/uo : /#{NAME}/o)
|
224
|
+
kind = :class
|
225
|
+
else
|
226
|
+
next
|
227
|
+
end
|
228
|
+
|
229
|
+
elsif state == :include_expected
|
230
|
+
if match = scan(unicode ? /#{DESCRIPTOR}/uo : /#{DESCRIPTOR}/o)
|
231
|
+
kind = :include
|
232
|
+
if match == 'as'
|
233
|
+
kind = :keyword
|
234
|
+
from_import_state << :as
|
235
|
+
elsif from_import_state.first == :from && match == 'import'
|
236
|
+
kind = :keyword
|
237
|
+
from_import_state << :import
|
238
|
+
elsif from_import_state.last == :as
|
239
|
+
# kind = match[0,1][unicode ? /[[:upper:]]/u : /[[:upper:]]/] ? :class : :method
|
240
|
+
kind = :ident
|
241
|
+
from_import_state.pop
|
242
|
+
elsif IDENT_KIND[match] == :keyword
|
243
|
+
unscan
|
244
|
+
match = nil
|
245
|
+
state = :initial
|
246
|
+
next
|
247
|
+
end
|
248
|
+
elsif match = scan(/,/)
|
249
|
+
from_import_state.pop if from_import_state.last == :as
|
250
|
+
kind = :operator
|
251
|
+
else
|
252
|
+
from_import_state = []
|
253
|
+
state = :initial
|
254
|
+
next
|
255
|
+
end
|
256
|
+
|
257
|
+
else
|
258
|
+
raise_inspect 'Unknown state', tokens, state
|
259
|
+
|
260
|
+
end
|
261
|
+
|
262
|
+
match ||= matched
|
263
|
+
if $DEBUG and not kind
|
264
|
+
raise_inspect 'Error token %p in line %d' %
|
265
|
+
[[match, kind], line], tokens, state
|
266
|
+
end
|
267
|
+
raise_inspect 'Empty token', tokens, state unless match
|
268
|
+
|
269
|
+
last_token_dot = match == '.'
|
270
|
+
|
271
|
+
tokens << [match, kind]
|
272
|
+
|
273
|
+
end
|
274
|
+
|
275
|
+
if state == :string
|
276
|
+
tokens << [:close, :string]
|
277
|
+
end
|
278
|
+
|
279
|
+
tokens
|
280
|
+
end
|
281
|
+
|
282
|
+
end
|
283
|
+
|
284
|
+
end
|
285
|
+
end
|
@@ -21,6 +21,10 @@ module Scanners
|
|
21
21
|
file_extension 'rb'
|
22
22
|
|
23
23
|
helper :patterns
|
24
|
+
|
25
|
+
if not defined? EncodingError
|
26
|
+
EncodingError = Class.new Exception
|
27
|
+
end
|
24
28
|
|
25
29
|
private
|
26
30
|
def scan_tokens tokens, options
|
@@ -31,9 +35,10 @@ module Scanners
|
|
31
35
|
state = :initial
|
32
36
|
depth = nil
|
33
37
|
inline_block_stack = []
|
34
|
-
|
38
|
+
unicode = string.respond_to?(:encoding) && string.encoding.name == 'UTF-8'
|
39
|
+
|
35
40
|
patterns = Patterns # avoid constant lookup
|
36
|
-
|
41
|
+
|
37
42
|
until eos?
|
38
43
|
match = nil
|
39
44
|
kind = nil
|
@@ -125,14 +130,14 @@ module Scanners
|
|
125
130
|
if match = scan(/[ \t\f]+/)
|
126
131
|
kind = :space
|
127
132
|
match << scan(/\s*/) unless eos? || heredocs
|
128
|
-
value_expected = true if match.index(?\n)
|
133
|
+
value_expected = true if match.index(?\n)
|
129
134
|
tokens << [match, kind]
|
130
135
|
next
|
131
136
|
|
132
137
|
elsif match = scan(/\\?\n/)
|
133
138
|
kind = :space
|
134
139
|
if match == "\n"
|
135
|
-
value_expected = true
|
140
|
+
value_expected = true
|
136
141
|
state = :initial if state == :undef_comma_expected
|
137
142
|
end
|
138
143
|
if heredocs
|
@@ -154,14 +159,14 @@ module Scanners
|
|
154
159
|
elsif match = scan(/\#.*/) or
|
155
160
|
( bol? and match = scan(/#{patterns::RUBYDOC_OR_DATA}/o) )
|
156
161
|
kind = :comment
|
157
|
-
value_expected = true
|
158
162
|
tokens << [match, kind]
|
159
163
|
next
|
160
164
|
|
161
165
|
elsif state == :initial
|
162
166
|
|
163
167
|
# IDENTS #
|
164
|
-
if match = scan(/#{patterns::METHOD_NAME}/
|
168
|
+
if match = scan(unicode ? /#{patterns::METHOD_NAME}/uo :
|
169
|
+
/#{patterns::METHOD_NAME}/o)
|
165
170
|
if last_token_dot
|
166
171
|
kind = if match[/^[A-Z]/] and not match?(/\(/) then :constant else :ident end
|
167
172
|
else
|
@@ -170,9 +175,9 @@ module Scanners
|
|
170
175
|
kind = :constant
|
171
176
|
elsif kind == :reserved
|
172
177
|
state = patterns::DEF_NEW_STATE[match]
|
178
|
+
value_expected = :set if patterns::KEYWORDS_EXPECTING_VALUE[match]
|
173
179
|
end
|
174
180
|
end
|
175
|
-
## experimental!
|
176
181
|
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
|
177
182
|
|
178
183
|
elsif last_token_dot and match = scan(/#{patterns::METHOD_NAME_OPERATOR}|\(/o)
|
@@ -180,7 +185,6 @@ module Scanners
|
|
180
185
|
value_expected = :set if check(/#{patterns::VALUE_FOLLOWS}/o)
|
181
186
|
|
182
187
|
# OPERATORS #
|
183
|
-
# TODO: match (), [], {} as one single operator
|
184
188
|
elsif not last_token_dot and match = scan(/ \.\.\.? | (?:\.|::)() | [,\(\)\[\]\{\}] | ==?=? /x)
|
185
189
|
if match !~ / [.\)\]\}] /x or match =~ /\.\.\.?/
|
186
190
|
value_expected = :set
|
@@ -281,13 +285,24 @@ module Scanners
|
|
281
285
|
|
282
286
|
else
|
283
287
|
kind = :error
|
284
|
-
match = getch
|
288
|
+
match = (scan(/./mu) rescue nil) || getch
|
289
|
+
if !unicode && match.size > 1
|
290
|
+
# warn 'Switching to unicode mode because of char %p' % [match]
|
291
|
+
unicode = true
|
292
|
+
unscan
|
293
|
+
next
|
294
|
+
end
|
285
295
|
|
286
296
|
end
|
287
297
|
|
288
298
|
elsif state == :def_expected
|
289
299
|
state = :initial
|
290
|
-
if
|
300
|
+
if scan(/self\./)
|
301
|
+
tokens << ['self', :pre_constant]
|
302
|
+
tokens << ['.', :operator]
|
303
|
+
end
|
304
|
+
if match = scan(unicode ? /(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/uo :
|
305
|
+
/(?>#{patterns::METHOD_NAME_EX})(?!\.|::)/o)
|
291
306
|
kind = :method
|
292
307
|
else
|
293
308
|
next
|
@@ -327,7 +342,10 @@ module Scanners
|
|
327
342
|
end
|
328
343
|
|
329
344
|
elsif state == :alias_expected
|
330
|
-
|
345
|
+
match = scan(unicode ? /(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/uo :
|
346
|
+
/(#{patterns::METHOD_NAME_OR_SYMBOL})([ \t]+)(#{patterns::METHOD_NAME_OR_SYMBOL})/o)
|
347
|
+
|
348
|
+
if match
|
331
349
|
tokens << [self[1], (self[1][0] == ?: ? :symbol : :method)]
|
332
350
|
tokens << [self[2], :space]
|
333
351
|
tokens << [self[3], (self[3][0] == ?: ? :symbol : :method)]
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# encoding: utf-8
|
1
2
|
module CodeRay
|
2
3
|
module Scanners
|
3
4
|
|
@@ -31,18 +32,18 @@ module Scanners
|
|
31
32
|
add(RESERVED_WORDS, :reserved).
|
32
33
|
add(PREDEFINED_CONSTANTS, :pre_constant)
|
33
34
|
|
34
|
-
IDENT = /[
|
35
|
+
IDENT = 'ä'[/[[:alpha:]]/] == 'ä' ? /[[:alpha:]_][[:alnum:]_]*/ : /[^\W\d]\w*/
|
35
36
|
|
36
37
|
METHOD_NAME = / #{IDENT} [?!]? /ox
|
37
38
|
METHOD_NAME_OPERATOR = /
|
38
39
|
\*\*? # multiplication and power
|
39
|
-
| [-+~]@? # plus, minus, tilde with and without
|
40
|
-
| [\/%&|^`] # division, modulo or format strings,
|
40
|
+
| [-+~]@? # plus, minus, tilde with and without at sign
|
41
|
+
| [\/%&|^`] # division, modulo or format strings, and, or, xor, system
|
41
42
|
| \[\]=? # array getter and setter
|
42
43
|
| << | >> # append or shift left, shift right
|
43
44
|
| <=?>? | >=? # comparison, rocket operator
|
44
45
|
| ===? | =~ # simple equality, case equality, match
|
45
|
-
| ![~=@]? # negation with and without
|
46
|
+
| ![~=@]? # negation with and without at sign, not-equal and not-match
|
46
47
|
/ox
|
47
48
|
METHOD_NAME_EX = / #{IDENT} (?:[?!]|=(?!>))? | #{METHOD_NAME_OPERATOR} /ox
|
48
49
|
INSTANCE_VARIABLE = / @ #{IDENT} /ox
|
@@ -59,7 +60,7 @@ module Scanners
|
|
59
60
|
QUOTE_TO_TYPE.default = :string
|
60
61
|
|
61
62
|
REGEXP_MODIFIERS = /[mixounse]*/
|
62
|
-
REGEXP_SYMBOLS = /[
|
63
|
+
REGEXP_SYMBOLS = /[|?*+(){}\[\].^$]/
|
63
64
|
|
64
65
|
DECIMAL = /\d+(?:_\d+)*/
|
65
66
|
OCTAL = /0_?[0-7]+(?:_[0-7]+)*/
|
@@ -141,14 +142,19 @@ module Scanners
|
|
141
142
|
| #{CHARACTER}
|
142
143
|
)
|
143
144
|
/x
|
145
|
+
KEYWORDS_EXPECTING_VALUE = WordList.new.add(%w[
|
146
|
+
and end in or unless begin
|
147
|
+
defined? ensure redo super until
|
148
|
+
break do next rescue then
|
149
|
+
when case else for retry
|
150
|
+
while elsif if not return
|
151
|
+
yield
|
152
|
+
])
|
144
153
|
|
145
154
|
RUBYDOC_OR_DATA = / #{RUBYDOC} | #{DATA} /xo
|
146
155
|
|
147
156
|
RDOC_DATA_START = / ^=begin (?!\S) | ^__END__$ /x
|
148
157
|
|
149
|
-
# FIXME: \s and = are only a workaround, they are still allowed
|
150
|
-
# as delimiters.
|
151
|
-
FANCY_START_SAVE = / % ( [qQwWxsr] | (?![a-zA-Z0-9\s=]) ) ([^a-zA-Z0-9]) /mx
|
152
158
|
FANCY_START_CORRECT = / % ( [qQwWxsr] | (?![a-zA-Z0-9]) ) ([^a-zA-Z0-9]) /mx
|
153
159
|
|
154
160
|
FancyStringType = {
|
@@ -174,15 +180,15 @@ module Scanners
|
|
174
180
|
CLOSING_PAREN.each { |k,v| k.freeze; v.freeze } # debug, if I try to change it with <<
|
175
181
|
OPENING_PAREN = CLOSING_PAREN.invert
|
176
182
|
|
177
|
-
STRING_PATTERN = Hash.new
|
183
|
+
STRING_PATTERN = Hash.new do |h, k|
|
178
184
|
delim, interpreted = *k
|
179
|
-
delim_pattern = Regexp.escape(delim)
|
185
|
+
delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
|
180
186
|
if closing_paren = CLOSING_PAREN[delim]
|
181
187
|
delim_pattern = delim_pattern[0..-1] if defined? JRUBY_VERSION # JRuby fix
|
182
188
|
delim_pattern << Regexp.escape(closing_paren)
|
183
189
|
end
|
184
|
-
|
185
|
-
|
190
|
+
delim_pattern << '\\\\' unless delim == '\\'
|
191
|
+
|
186
192
|
special_escapes =
|
187
193
|
case interpreted
|
188
194
|
when :regexp_symbols
|
@@ -190,18 +196,18 @@ module Scanners
|
|
190
196
|
when :words
|
191
197
|
'| \s'
|
192
198
|
end
|
193
|
-
|
199
|
+
|
194
200
|
h[k] =
|
195
201
|
if interpreted and not delim == '#'
|
196
|
-
/ (?= [#{delim_pattern}
|
202
|
+
/ (?= [#{delim_pattern}] | \# [{$@] #{special_escapes} ) /mx
|
197
203
|
else
|
198
|
-
/ (?= [#{delim_pattern}
|
204
|
+
/ (?= [#{delim_pattern}] #{special_escapes} ) /mx
|
199
205
|
end
|
200
|
-
|
206
|
+
end
|
201
207
|
|
202
|
-
HEREDOC_PATTERN = Hash.new
|
208
|
+
HEREDOC_PATTERN = Hash.new do |h, k|
|
203
209
|
delim, interpreted, indented = *k
|
204
|
-
delim_pattern = Regexp.escape(delim.dup)
|
210
|
+
delim_pattern = Regexp.escape(delim.dup) # dup: workaround for old Ruby
|
205
211
|
delim_pattern = / \n #{ '(?>[\ \t]*)' if indented } #{ Regexp.new delim_pattern } $ /x
|
206
212
|
h[k] =
|
207
213
|
if interpreted
|
@@ -209,12 +215,12 @@ module Scanners
|
|
209
215
|
else
|
210
216
|
/ (?= #{delim_pattern}() | \\ ) /mx
|
211
217
|
end
|
212
|
-
|
218
|
+
end
|
213
219
|
|
214
220
|
def initialize kind, interpreted, delim, heredoc = false
|
215
221
|
if heredoc
|
216
222
|
pattern = HEREDOC_PATTERN[ [delim, interpreted, heredoc == :indented] ]
|
217
|
-
delim
|
223
|
+
delim = nil
|
218
224
|
else
|
219
225
|
pattern = STRING_PATTERN[ [delim, interpreted] ]
|
220
226
|
if paren = CLOSING_PAREN[delim]
|