rouge 0.2.6 → 0.2.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -259,9 +259,22 @@ module Rouge
259
259
  # @example
260
260
  # debug { "hello, world!" }
261
261
  def debug(&b)
262
- @debug = option(:debug) unless instance_variable_defined?(:@debug)
262
+ # This method is a hotspot, unfortunately.
263
+ #
264
+ # For performance reasons, the "debug" option of a lexer cannot
265
+ # be changed once it has begun lexing. This method will redefine
266
+ # itself on the first call to a noop if "debug" is not set.
267
+ if option(:debug)
268
+ class << self
269
+ def debug; puts yield; end
270
+ end
271
+ else
272
+ class << self
273
+ def debug; end
274
+ end
275
+ end
263
276
 
264
- puts(b.call) if @debug
277
+ debug(&b)
265
278
  end
266
279
 
267
280
  # @abstract
@@ -11,18 +11,28 @@ module Rouge
11
11
  ws = %r((?:\s|//.*?\n|/[*].*?[*]/)+)
12
12
  id = /[a-zA-Z_][a-zA-Z0-9_]*/
13
13
 
14
- keywords = %w(
15
- auto break case const continue default do else enum extern
16
- for goto if register restricted return sizeof static struct
17
- switch typedef union volatile virtual while
18
- )
14
+ def self.keywords
15
+ @keywords ||= Set.new %w(
16
+ auto break case const continue default do else enum extern
17
+ for goto if register restricted return sizeof static struct
18
+ switch typedef union volatile virtual while
19
+ )
20
+ end
19
21
 
20
- keywords_type = %w(int long float short double char unsigned signed void)
22
+ def self.keywords_type
23
+ @keywords_type ||= Set.new %w(
24
+ int long float short double char unsigned signed void
25
+ )
26
+ end
21
27
 
22
- __reserved = %w(
23
- asm int8 based except int16 stdcall cdecl fastcall int32
24
- declspec finally int61 try leave
25
- )
28
+ def self.reserved
29
+ @reserved ||= Set.new %w(
30
+ __asm __int8 __based __except __int16 __stdcall __cdecl
31
+ __fastcall __int32 __declspec __finally __int61 __try __leave
32
+ inline _inline __inline naked _naked __naked restrict _restrict
33
+ __restrict thread _thread __thread typename _typename __typename
34
+ )
35
+ end
26
36
 
27
37
  state :whitespace do
28
38
  rule /^#if\s+0\b/, 'Comment.Preproc', :if_0
@@ -41,6 +51,8 @@ module Rouge
41
51
  end
42
52
 
43
53
  state :statements do
54
+ rule /\s+/m, 'Text'
55
+
44
56
  rule /L?"/, 'Literal.String', :string
45
57
  rule %r(L?'(\\.|\\[0-7]{1,3}|\\x[a-f0-9]{1,2}|[^\\'\n])')i, 'Literal.String.Char'
46
58
  rule %r((\d+\.\d*|\.\d+|\d+)[e][+-]?\d+[lu]*)i, 'Literal.Number.Float'
@@ -51,13 +63,20 @@ module Rouge
51
63
  rule %r([~!%^&*+=\|?:<>/-]), 'Operator'
52
64
  rule /[()\[\],.]/, 'Punctuation'
53
65
  rule /\bcase\b/, 'Keyword', :case
54
- rule /(?:#{keywords.join('|')})\b/, 'Keyword'
55
- rule /(?:#{keywords_type.join('|')})\b/, 'Keyword.Type'
56
- rule /(?:_{0,2}inline|naked|restrict|thread|typename)\b/, 'Keyword.Reserved'
57
- rule /__(?:#{__reserved.join('|')})\b/, 'Keyword.Reserved'
58
66
  rule /(?:true|false|NULL)\b/, 'Name.Builtin'
59
- rule id, 'Name'
60
- rule /\s+/m, 'Text'
67
+ rule id do |m|
68
+ name = m[0]
69
+
70
+ if self.class.keywords.include? name
71
+ token 'Keyword'
72
+ elsif self.class.keywords_type.include? name
73
+ token 'Keyword.Type'
74
+ elsif self.class.reserved.include? name
75
+ token 'Keyword.Reserved'
76
+ else
77
+ token 'Name'
78
+ end
79
+ end
61
80
  end
62
81
 
63
82
  state :case do
@@ -208,28 +208,6 @@ module Rouge
208
208
  structure-object symbol synonym-stream t two-way-stream vector
209
209
  )
210
210
 
211
- postprocess 'Name.Variable' do |tok, val|
212
- tok = if BUILTIN_FUNCTIONS.include? val
213
- 'Name.Builtin'
214
- elsif SPECIAL_FORMS.include? val
215
- 'Keyword'
216
- elsif MACROS.include? val
217
- 'Name.Builtin'
218
- elsif LAMBDA_LIST_KEYWORDS.include? val
219
- 'Keyword'
220
- elsif DECLARATIONS.include? val
221
- 'Keyword'
222
- elsif BUILTIN_TYPES.include? val
223
- 'Keyword.Type'
224
- elsif BUILTIN_CLASSES.include? val
225
- 'Name.Class'
226
- else
227
- 'Name.Variable'
228
- end
229
-
230
- token tok, val
231
- end
232
-
233
211
  nonmacro = /\\.|[a-zA-Z0-9!$%&*+-\/<=>?@\[\]^_{}~]/
234
212
  constituent = /#{nonmacro}|[#.:]/
235
213
  terminated = /(?=[ "'()\n,;`])/ # whitespace or terminating macro chars
@@ -319,7 +297,27 @@ module Rouge
319
297
  # functions and variables
320
298
  # note that these get filtered through in stream_tokens
321
299
  rule /\*#{symbol}\*/, 'Name.Variable.Global'
322
- rule symbol, 'Name.Variable'
300
+ rule symbol do |m|
301
+ sym = m[0]
302
+
303
+ if BUILTIN_FUNCTIONS.include? sym
304
+ token 'Name.Builtin'
305
+ elsif SPECIAL_FORMS.include? sym
306
+ token 'Keyword'
307
+ elsif MACROS.include? sym
308
+ token 'Name.Builtin'
309
+ elsif LAMBDA_LIST_KEYWORDS.include? sym
310
+ token 'Keyword'
311
+ elsif DECLARATIONS.include? sym
312
+ token 'Keyword'
313
+ elsif BUILTIN_TYPES.include? sym
314
+ token 'Keyword.Type'
315
+ elsif BUILTIN_CLASSES.include? sym
316
+ token 'Name.Class'
317
+ else
318
+ token 'Name.Variable'
319
+ end
320
+ end
323
321
 
324
322
  rule /\(/, 'Punctuation', :root
325
323
  rule /\)/, 'Punctuation' do
@@ -12,26 +12,37 @@ module Rouge
12
12
  '*.cxx', '*.hxx'
13
13
  mimetypes 'text/x-c++hdr', 'text/x-c++src'
14
14
 
15
- keywords = %w(
16
- asm auto break case catch const const_cast continue
17
- default delete do dynamic_cast else enum explicit export
18
- extern for friend goto if mutable namespace new operator
19
- private protected public register reinterpret_cast return
20
- restrict sizeof static static_cast struct switch template
21
- this throw throws try typedef typeid typename union using
22
- volatile virtual while
23
- )
24
-
25
- keywords_type = %w(
26
- bool int long float short double char unsigned signed void wchar_t
27
- )
28
-
29
- __reserved = %w(
30
- asm int8 based except int16 stdcall cdecl fastcall int32 declspec
31
- finally int64 try leave wchar_t w64 virtual_inheritance uuidof
32
- unaligned super single_inheritance raise noop multiple_inheritance
33
- m128i m128d m128 m64 interface identifier forceinline event assume
34
- )
15
+ def self.keywords
16
+ @keywords ||= Set.new %w(
17
+ asm auto break case catch const const_cast continue
18
+ default delete do dynamic_cast else enum explicit export
19
+ extern for friend goto if mutable namespace new operator
20
+ private protected public register reinterpret_cast return
21
+ restrict sizeof static static_cast struct switch template
22
+ this throw throws try typedef typeid typename union using
23
+ volatile virtual while
24
+ )
25
+ end
26
+
27
+ def self.keywords_type
28
+ @keywords_type ||= Set.new %w(
29
+ bool int long float short double char unsigned signed void wchar_t
30
+ )
31
+ end
32
+
33
+ def self.reserved
34
+ @reserved ||= Set.new %w(
35
+ __asm __int8 __based __except __int16 __stdcall __cdecl
36
+ __fastcall __int32 __declspec __finally __int64 __try
37
+ __leave __wchar_t __w64 __virtual_inheritance __uuidof
38
+ __unaligned __super __single_inheritance __raise __noop
39
+ __multiple_inheritance __m128i __m128d __m128 __m64 __interface
40
+ __identifier __forceinline __event __assume
41
+ inline _inline __inline
42
+ naked _naked __naked
43
+ thread _thread __thread
44
+ )
45
+ end
35
46
 
36
47
  # optional comments or whitespace
37
48
  ws = %r((?:\s|//.*?\n|/[*].*?[*]/)+)
@@ -61,18 +72,27 @@ module Rouge
61
72
  rule %r([~!%^&*+=\|?:<>/-]), 'Operator'
62
73
  rule /[()\[\],.;{}]/, 'Punctuation'
63
74
 
64
- rule /(?:#{keywords.join('|')})\b/, 'Keyword'
65
75
  rule /class\b/, 'Keyword', :classname
66
- rule /(?:#{keywords_type.join('|')})\b/, 'Keyword.Type'
67
- rule /(?:_{0,2}inline|naked|thread)\b/, 'Keyword.Reserved'
68
- rule /__(?:#{__reserved.join('|')})\b/, 'Keyoword.Reserved'
76
+
69
77
  # Offload C++ extensions, http://offload.codeplay.com/
70
78
  rule /(?:__offload|__blockingoffload|__outer)\b/, 'Keyword.Pseudo'
71
79
 
72
80
  rule /(true|false)\b/, 'Keyword.Constant'
73
81
  rule /NULL\b/, 'Name.Builtin'
74
82
  rule /#{id}:(?!:)/, 'Name.Label'
75
- rule id, 'Name'
83
+ rule id do |m|
84
+ name = m[0]
85
+
86
+ if self.class.keywords.include? name
87
+ token 'Keyword'
88
+ elsif self.class.keywords_type.include? name
89
+ token 'Keyword.Type'
90
+ elsif self.class.reserved.include? name
91
+ token 'Keyword.Reserved'
92
+ else
93
+ token 'Name'
94
+ end
95
+ end
76
96
  end
77
97
 
78
98
  state :classname do
@@ -262,8 +262,15 @@ module Rouge
262
262
  rule /(?:deprecated|final|foldable|flushable|inline|recursive)(?=\s)/,
263
263
  'Keyword'
264
264
 
265
- # words, to be postprocessed for builtins and things
266
- rule /\S+/, 'Postprocess.Word'
265
+ rule /\S+/ do |m|
266
+ name = m[0]
267
+
268
+ if self.class.builtins.values.any? { |b| b.include? name }
269
+ token 'Name.Builtin'
270
+ else
271
+ token 'Name'
272
+ end
273
+ end
267
274
  end
268
275
 
269
276
  state :stack_effect do
@@ -286,16 +293,6 @@ module Rouge
286
293
  rule /\s+/, 'Text'
287
294
  rule /\S+/, 'Name.Namespace'
288
295
  end
289
-
290
- postprocess 'Postprocess.Word' do |tok, val|
291
- tok = if self.class.builtins.values.any? { |b| b.include? val }
292
- 'Name.Builtin'
293
- else
294
- 'Name'
295
- end
296
-
297
- token tok, val
298
- end
299
296
  end
300
297
  end
301
298
  end
@@ -43,17 +43,19 @@ module Rouge
43
43
  push :php if start_inline?
44
44
  end
45
45
 
46
- keywords = %w(
47
- and E_PARSE old_function E_ERROR or as E_WARNING parent eval
48
- PHP_OS break exit case extends PHP_VERSION cfunction FALSE
49
- print for require continue foreach require_once declare return
50
- default static do switch die stdClass echo else TRUE elseif
51
- var empty if xor enddeclare include virtual endfor include_once
52
- while endforeach global __FILE__ endif list __LINE__ endswitch
53
- new __sleep endwhile not array __wakeup E_ALL NULL final
54
- php_user_filter interface implements public private protected
55
- abstract clone try catch throw this use namespace
56
- )
46
+ def self.keywords
47
+ @keywords ||= Set.new %w(
48
+ and E_PARSE old_function E_ERROR or as E_WARNING parent eval
49
+ PHP_OS break exit case extends PHP_VERSION cfunction FALSE
50
+ print for require continue foreach require_once declare return
51
+ default static do switch die stdClass echo else TRUE elseif
52
+ var empty if xor enddeclare include virtual endfor include_once
53
+ while endforeach global __FILE__ endif list __LINE__ endswitch
54
+ new __sleep endwhile not array __wakeup E_ALL NULL final
55
+ php_user_filter interface implements public private protected
56
+ abstract clone try catch throw this use namespace
57
+ )
58
+ end
57
59
 
58
60
  state :root do
59
61
  rule /<\?(php|=)?/, 'Comment.Preproc', :php
@@ -93,13 +95,22 @@ module Rouge
93
95
  group 'Keyword'; group 'Text'; group 'Name.Constant'
94
96
  end
95
97
 
96
- rule /(?:#{keywords.join('|')})\b/, 'Keyword'
97
98
  rule /(true|false|null)\b/, 'Keyword.Constant'
98
99
  rule /\$\{\$+[a-z_]\w*\}/i, 'Name.Variable'
99
100
  rule /\$+[a-z_]\w*/i, 'Name.Variable'
100
101
 
101
102
  # may be intercepted for builtin highlighting
102
- rule /[\\a-z_][\\\w]*/i, 'Name.Other'
103
+ rule /[\\a-z_][\\\w]*/i do |m|
104
+ name = m[0]
105
+
106
+ if self.class.keywords.include? name
107
+ token 'Keyword'
108
+ elsif self.builtins.include? name
109
+ token 'Name.Builtin'
110
+ else
111
+ token 'Name.Other'
112
+ end
113
+ end
103
114
 
104
115
  rule /(\d+\.\d*|\d*\.\d+)(e[+-]?\d+)?/i, 'Literal.Number.Float'
105
116
  rule /\d+e[+-]?\d+/i, 'Literal.Number.Float'
@@ -146,12 +157,6 @@ module Rouge
146
157
  rule /\}/, 'Literal.String.Interpol', :pop!
147
158
  mixin :php
148
159
  end
149
-
150
- postprocess 'Name.Other' do |tok, val|
151
- tok = 'Name.Builtin' if builtins.include? val
152
-
153
- token tok, val
154
- end
155
160
  end
156
161
  end
157
162
  end
@@ -5,7 +5,9 @@ module Rouge
5
5
 
6
6
  tag 'shell'
7
7
  aliases 'bash', 'zsh', 'ksh', 'sh'
8
- filenames '*.sh', '*.bash', '*.zsh', '*.ksh'
8
+ filenames '*.sh', '*.bash', '*.zsh', '*.ksh',
9
+ '.bashrc', '.zshrc', '.kshrc', '.profile'
10
+
9
11
  mimetypes 'application/x-sh', 'application/x-shellscript'
10
12
 
11
13
  def self.analyze_text(text)
@@ -41,26 +41,25 @@ module Rouge
41
41
  rule /\b(NONE|bold|italic|underline|dark|light)\b/, 'Name.Builtin'
42
42
 
43
43
  rule /[absg]:\w+\b/, 'Name.Variable'
44
- rule /\b\w+\b/, 'Postprocess.Name'
44
+ rule /\b\w+\b/ do |m|
45
+ name = m[0]
46
+ keywords = self.class.keywords
47
+
48
+ if mapping_contains?(keywords[:command], name)
49
+ token 'Keyword'
50
+ elsif mapping_contains?(keywords[:option], name)
51
+ token 'Name.Builtin'
52
+ elsif mapping_contains?(keywords[:auto], name)
53
+ token 'Name.Builtin'
54
+ else
55
+ token 'Text'
56
+ end
57
+ end
45
58
 
46
59
  # no errors in VimL!
47
60
  rule /./m, 'Text'
48
61
  end
49
62
 
50
- postprocess 'Postprocess.Name' do |tok, name|
51
- keywords = self.class.keywords
52
-
53
- if mapping_contains?(keywords[:command], name)
54
- token 'Keyword', name
55
- elsif mapping_contains?(keywords[:option], name)
56
- token 'Name.Builtin', name
57
- elsif mapping_contains?(keywords[:auto], name)
58
- token 'Name.Builtin', name
59
- else
60
- token 'Text', name
61
- end
62
- end
63
-
64
63
  def mapping_contains?(mapping, word)
65
64
  shortest, longest = find_likely_mapping(mapping, word)
66
65
 
@@ -45,10 +45,15 @@ module Rouge
45
45
  @rules ||= []
46
46
  end
47
47
 
48
- def load!
48
+ def load!(lexer_class)
49
49
  return self if @loaded
50
50
  @loaded = true
51
51
  StateDSL.new(rules).instance_eval(&@defn)
52
+
53
+ rules.map! do |rule|
54
+ rule.is_a?(String) ? lexer_class.get_state(rule) : rule
55
+ end
56
+
52
57
  self
53
58
  end
54
59
  end
@@ -79,20 +84,13 @@ module Rouge
79
84
  # {RegexLexer#token}, and {RegexLexer#delegate}. The first
80
85
  # argument can be used to access the match groups.
81
86
  def rule(re, tok=nil, next_state=nil, &callback)
82
- if block_given?
83
- next_state = tok
87
+ callback ||= case next_state
88
+ when :pop!
89
+ proc { token tok; pop! }
90
+ when Symbol
91
+ proc { token tok; push next_state }
84
92
  else
85
- tok = Token[tok]
86
-
87
- callback = proc do
88
- token tok
89
- case next_state
90
- when :pop!
91
- pop!
92
- when Symbol
93
- push next_state
94
- end # else pass
95
- end
93
+ proc { token tok }
96
94
  end
97
95
 
98
96
  rules << Rule.new(re, callback)
@@ -127,25 +125,6 @@ module Rouge
127
125
  start_procs << b
128
126
  end
129
127
 
130
- # Specify a filter to be applied as the lexer yields tokens.
131
- #
132
- # @param toktype
133
- # The token type to postprocess
134
- # @yield [tok, val]
135
- # The token and the matched value. The block will be evaluated in
136
- # the context of the lexer, and it must yield an equivalent
137
- # token/value pair, usually by calling #token.
138
- def self.postprocess(toktype, &b)
139
- postprocesses << [Token[toktype], b]
140
- end
141
-
142
- # where the postprocess blocks are stored.
143
- # @see postprocess
144
- def self.postprocesses
145
- @postprocesses ||= InheritableList.new(superclass.postprocesses)
146
- end
147
- @postprocesses = []
148
-
149
128
  # Define a new state for this lexer with the given name.
150
129
  # The block will be evaluated in the context of a {StateDSL}.
151
130
  def self.state(name, &b)
@@ -159,7 +138,7 @@ module Rouge
159
138
 
160
139
  state = states[name.to_s]
161
140
  raise "unknown state: #{name}" unless state
162
- state.load!
141
+ state.load!(self)
163
142
  end
164
143
 
165
144
  # @private
@@ -205,21 +184,6 @@ module Rouge
205
184
  #
206
185
  # @see #step #step (where (2.) is implemented)
207
186
  def stream_tokens(stream, &b)
208
- stream_without_postprocessing(stream) do |tok, val|
209
- _, processor = self.class.postprocesses.find { |t, _| t === tok }
210
-
211
- if processor
212
- with_output_stream(b) do
213
- instance_exec(tok, val, &processor)
214
- end
215
- else
216
- yield tok, val
217
- end
218
- end
219
- end
220
-
221
- # @private
222
- def stream_without_postprocessing(stream, &b)
223
187
  until stream.eos?
224
188
  debug { "lexer: #{self.class.tag}" }
225
189
  debug { "stack: #{stack.map(&:name).inspect}" }
@@ -240,35 +204,25 @@ module Rouge
240
204
  # @return false otherwise.
241
205
  def step(state, stream, &b)
242
206
  state.rules.each do |rule|
243
- return true if run_rule(rule, stream, &b)
244
- end
207
+ case rule
208
+ when State
209
+ debug { " entering mixin #{rule.name}" }
210
+ return true if step(rule, stream, &b)
211
+ debug { " exiting mixin #{rule.name}" }
212
+ when Rule
213
+ debug { " trying #{rule.inspect}" }
245
214
 
246
- false
247
- end
215
+ if run_rule(rule, stream)
216
+ debug { " got #{stream[0].inspect}" }
248
217
 
249
- # @private
250
- def run_rule(rule, stream, &b)
251
- case rule
252
- when String
253
- debug { " entering mixin #{rule}" }
254
- res = step(get_state(rule), stream, &b)
255
- debug { " exiting mixin #{rule}" }
256
- res
257
- when Rule
258
- debug { " trying #{rule.inspect}" }
259
- # XXX HACK XXX
260
- # StringScanner's implementation of ^ is b0rken.
261
- # see http://bugs.ruby-lang.org/issues/7092
262
- # TODO: this doesn't cover cases like /(a|^b)/, but it's
263
- # the most common, for now...
264
- return false if rule.beginning_of_line? && !stream.beginning_of_line?
265
-
266
- scan(stream, rule.re) do
267
- debug { " got #{stream[0].inspect}" }
268
-
269
- run_callback(stream, rule.callback, &b)
218
+ run_callback(stream, rule.callback, &b)
219
+
220
+ return true
221
+ end
270
222
  end
271
223
  end
224
+
225
+ false
272
226
  end
273
227
 
274
228
  # @private
@@ -286,28 +240,28 @@ module Rouge
286
240
  MAX_NULL_SCANS = 5
287
241
 
288
242
  # @private
289
- def scan(scanner, re, &b)
290
- @null_steps ||= 0
291
-
292
- if @null_steps >= MAX_NULL_SCANS
243
+ def run_rule(rule, scanner, &b)
244
+ # XXX HACK XXX
245
+ # StringScanner's implementation of ^ is b0rken.
246
+ # see http://bugs.ruby-lang.org/issues/7092
247
+ # TODO: this doesn't cover cases like /(a|^b)/, but it's
248
+ # the most common, for now...
249
+ return false if rule.beginning_of_line? && !scanner.beginning_of_line?
250
+
251
+ if (@null_steps ||= 0) >= MAX_NULL_SCANS
293
252
  debug { " too many scans without consuming the string!" }
294
253
  return false
295
254
  end
296
255
 
297
- scanner.scan(re)
256
+ scanner.scan(rule.re) or return false
298
257
 
299
- if scanner.matched?
300
- if scanner.matched_size == 0
301
- @null_steps += 1
302
- else
303
- @null_steps = 0
304
- end
305
-
306
- yield self
307
- return true
258
+ if scanner.matched_size.zero?
259
+ @null_steps += 1
260
+ else
261
+ @null_steps = 0
308
262
  end
309
263
 
310
- return false
264
+ true
311
265
  end
312
266
 
313
267
  # Yield a token.
@@ -358,7 +312,7 @@ module Rouge
358
312
  push_state = if state_name
359
313
  get_state(state_name)
360
314
  elsif block_given?
361
- State.new(b.inspect, &b).load!
315
+ State.new(b.inspect, &b).load!(self.class)
362
316
  else
363
317
  # use the top of the stack by default
364
318
  self.state
@@ -1,5 +1,5 @@
1
1
  module Rouge
2
2
  def self.version
3
- "0.2.6"
3
+ "0.2.7"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rouge
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-21 00:00:00.000000000 Z
12
+ date: 2012-10-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: thor