rouge 0.2.6 → 0.2.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -259,9 +259,22 @@ module Rouge
259
259
  # @example
260
260
  # debug { "hello, world!" }
261
261
  def debug(&b)
262
- @debug = option(:debug) unless instance_variable_defined?(:@debug)
262
+ # This method is a hotspot, unfortunately.
263
+ #
264
+ # For performance reasons, the "debug" option of a lexer cannot
265
+ # be changed once it has begun lexing. This method will redefine
266
+ # itself on the first call to a noop if "debug" is not set.
267
+ if option(:debug)
268
+ class << self
269
+ def debug; puts yield; end
270
+ end
271
+ else
272
+ class << self
273
+ def debug; end
274
+ end
275
+ end
263
276
 
264
- puts(b.call) if @debug
277
+ debug(&b)
265
278
  end
266
279
 
267
280
  # @abstract
@@ -11,18 +11,28 @@ module Rouge
11
11
  ws = %r((?:\s|//.*?\n|/[*].*?[*]/)+)
12
12
  id = /[a-zA-Z_][a-zA-Z0-9_]*/
13
13
 
14
- keywords = %w(
15
- auto break case const continue default do else enum extern
16
- for goto if register restricted return sizeof static struct
17
- switch typedef union volatile virtual while
18
- )
14
+ def self.keywords
15
+ @keywords ||= Set.new %w(
16
+ auto break case const continue default do else enum extern
17
+ for goto if register restricted return sizeof static struct
18
+ switch typedef union volatile virtual while
19
+ )
20
+ end
19
21
 
20
- keywords_type = %w(int long float short double char unsigned signed void)
22
+ def self.keywords_type
23
+ @keywords_type ||= Set.new %w(
24
+ int long float short double char unsigned signed void
25
+ )
26
+ end
21
27
 
22
- __reserved = %w(
23
- asm int8 based except int16 stdcall cdecl fastcall int32
24
- declspec finally int61 try leave
25
- )
28
+ def self.reserved
29
+ @reserved ||= Set.new %w(
30
+ __asm __int8 __based __except __int16 __stdcall __cdecl
31
+ __fastcall __int32 __declspec __finally __int61 __try __leave
32
+ inline _inline __inline naked _naked __naked restrict _restrict
33
+ __restrict thread _thread __thread typename _typename __typename
34
+ )
35
+ end
26
36
 
27
37
  state :whitespace do
28
38
  rule /^#if\s+0\b/, 'Comment.Preproc', :if_0
@@ -41,6 +51,8 @@ module Rouge
41
51
  end
42
52
 
43
53
  state :statements do
54
+ rule /\s+/m, 'Text'
55
+
44
56
  rule /L?"/, 'Literal.String', :string
45
57
  rule %r(L?'(\\.|\\[0-7]{1,3}|\\x[a-f0-9]{1,2}|[^\\'\n])')i, 'Literal.String.Char'
46
58
  rule %r((\d+\.\d*|\.\d+|\d+)[e][+-]?\d+[lu]*)i, 'Literal.Number.Float'
@@ -51,13 +63,20 @@ module Rouge
51
63
  rule %r([~!%^&*+=\|?:<>/-]), 'Operator'
52
64
  rule /[()\[\],.]/, 'Punctuation'
53
65
  rule /\bcase\b/, 'Keyword', :case
54
- rule /(?:#{keywords.join('|')})\b/, 'Keyword'
55
- rule /(?:#{keywords_type.join('|')})\b/, 'Keyword.Type'
56
- rule /(?:_{0,2}inline|naked|restrict|thread|typename)\b/, 'Keyword.Reserved'
57
- rule /__(?:#{__reserved.join('|')})\b/, 'Keyword.Reserved'
58
66
  rule /(?:true|false|NULL)\b/, 'Name.Builtin'
59
- rule id, 'Name'
60
- rule /\s+/m, 'Text'
67
+ rule id do |m|
68
+ name = m[0]
69
+
70
+ if self.class.keywords.include? name
71
+ token 'Keyword'
72
+ elsif self.class.keywords_type.include? name
73
+ token 'Keyword.Type'
74
+ elsif self.class.reserved.include? name
75
+ token 'Keyword.Reserved'
76
+ else
77
+ token 'Name'
78
+ end
79
+ end
61
80
  end
62
81
 
63
82
  state :case do
@@ -208,28 +208,6 @@ module Rouge
208
208
  structure-object symbol synonym-stream t two-way-stream vector
209
209
  )
210
210
 
211
- postprocess 'Name.Variable' do |tok, val|
212
- tok = if BUILTIN_FUNCTIONS.include? val
213
- 'Name.Builtin'
214
- elsif SPECIAL_FORMS.include? val
215
- 'Keyword'
216
- elsif MACROS.include? val
217
- 'Name.Builtin'
218
- elsif LAMBDA_LIST_KEYWORDS.include? val
219
- 'Keyword'
220
- elsif DECLARATIONS.include? val
221
- 'Keyword'
222
- elsif BUILTIN_TYPES.include? val
223
- 'Keyword.Type'
224
- elsif BUILTIN_CLASSES.include? val
225
- 'Name.Class'
226
- else
227
- 'Name.Variable'
228
- end
229
-
230
- token tok, val
231
- end
232
-
233
211
  nonmacro = /\\.|[a-zA-Z0-9!$%&*+-\/<=>?@\[\]^_{}~]/
234
212
  constituent = /#{nonmacro}|[#.:]/
235
213
  terminated = /(?=[ "'()\n,;`])/ # whitespace or terminating macro chars
@@ -319,7 +297,27 @@ module Rouge
319
297
  # functions and variables
320
298
  # note that these get filtered through in stream_tokens
321
299
  rule /\*#{symbol}\*/, 'Name.Variable.Global'
322
- rule symbol, 'Name.Variable'
300
+ rule symbol do |m|
301
+ sym = m[0]
302
+
303
+ if BUILTIN_FUNCTIONS.include? sym
304
+ token 'Name.Builtin'
305
+ elsif SPECIAL_FORMS.include? sym
306
+ token 'Keyword'
307
+ elsif MACROS.include? sym
308
+ token 'Name.Builtin'
309
+ elsif LAMBDA_LIST_KEYWORDS.include? sym
310
+ token 'Keyword'
311
+ elsif DECLARATIONS.include? sym
312
+ token 'Keyword'
313
+ elsif BUILTIN_TYPES.include? sym
314
+ token 'Keyword.Type'
315
+ elsif BUILTIN_CLASSES.include? sym
316
+ token 'Name.Class'
317
+ else
318
+ token 'Name.Variable'
319
+ end
320
+ end
323
321
 
324
322
  rule /\(/, 'Punctuation', :root
325
323
  rule /\)/, 'Punctuation' do
@@ -12,26 +12,37 @@ module Rouge
12
12
  '*.cxx', '*.hxx'
13
13
  mimetypes 'text/x-c++hdr', 'text/x-c++src'
14
14
 
15
- keywords = %w(
16
- asm auto break case catch const const_cast continue
17
- default delete do dynamic_cast else enum explicit export
18
- extern for friend goto if mutable namespace new operator
19
- private protected public register reinterpret_cast return
20
- restrict sizeof static static_cast struct switch template
21
- this throw throws try typedef typeid typename union using
22
- volatile virtual while
23
- )
24
-
25
- keywords_type = %w(
26
- bool int long float short double char unsigned signed void wchar_t
27
- )
28
-
29
- __reserved = %w(
30
- asm int8 based except int16 stdcall cdecl fastcall int32 declspec
31
- finally int64 try leave wchar_t w64 virtual_inheritance uuidof
32
- unaligned super single_inheritance raise noop multiple_inheritance
33
- m128i m128d m128 m64 interface identifier forceinline event assume
34
- )
15
+ def self.keywords
16
+ @keywords ||= Set.new %w(
17
+ asm auto break case catch const const_cast continue
18
+ default delete do dynamic_cast else enum explicit export
19
+ extern for friend goto if mutable namespace new operator
20
+ private protected public register reinterpret_cast return
21
+ restrict sizeof static static_cast struct switch template
22
+ this throw throws try typedef typeid typename union using
23
+ volatile virtual while
24
+ )
25
+ end
26
+
27
+ def self.keywords_type
28
+ @keywords_type ||= Set.new %w(
29
+ bool int long float short double char unsigned signed void wchar_t
30
+ )
31
+ end
32
+
33
+ def self.reserved
34
+ @reserved ||= Set.new %w(
35
+ __asm __int8 __based __except __int16 __stdcall __cdecl
36
+ __fastcall __int32 __declspec __finally __int64 __try
37
+ __leave __wchar_t __w64 __virtual_inheritance __uuidof
38
+ __unaligned __super __single_inheritance __raise __noop
39
+ __multiple_inheritance __m128i __m128d __m128 __m64 __interface
40
+ __identifier __forceinline __event __assume
41
+ inline _inline __inline
42
+ naked _naked __naked
43
+ thread _thread __thread
44
+ )
45
+ end
35
46
 
36
47
  # optional comments or whitespace
37
48
  ws = %r((?:\s|//.*?\n|/[*].*?[*]/)+)
@@ -61,18 +72,27 @@ module Rouge
61
72
  rule %r([~!%^&*+=\|?:<>/-]), 'Operator'
62
73
  rule /[()\[\],.;{}]/, 'Punctuation'
63
74
 
64
- rule /(?:#{keywords.join('|')})\b/, 'Keyword'
65
75
  rule /class\b/, 'Keyword', :classname
66
- rule /(?:#{keywords_type.join('|')})\b/, 'Keyword.Type'
67
- rule /(?:_{0,2}inline|naked|thread)\b/, 'Keyword.Reserved'
68
- rule /__(?:#{__reserved.join('|')})\b/, 'Keyoword.Reserved'
76
+
69
77
  # Offload C++ extensions, http://offload.codeplay.com/
70
78
  rule /(?:__offload|__blockingoffload|__outer)\b/, 'Keyword.Pseudo'
71
79
 
72
80
  rule /(true|false)\b/, 'Keyword.Constant'
73
81
  rule /NULL\b/, 'Name.Builtin'
74
82
  rule /#{id}:(?!:)/, 'Name.Label'
75
- rule id, 'Name'
83
+ rule id do |m|
84
+ name = m[0]
85
+
86
+ if self.class.keywords.include? name
87
+ token 'Keyword'
88
+ elsif self.class.keywords_type.include? name
89
+ token 'Keyword.Type'
90
+ elsif self.class.reserved.include? name
91
+ token 'Keyword.Reserved'
92
+ else
93
+ token 'Name'
94
+ end
95
+ end
76
96
  end
77
97
 
78
98
  state :classname do
@@ -262,8 +262,15 @@ module Rouge
262
262
  rule /(?:deprecated|final|foldable|flushable|inline|recursive)(?=\s)/,
263
263
  'Keyword'
264
264
 
265
- # words, to be postprocessed for builtins and things
266
- rule /\S+/, 'Postprocess.Word'
265
+ rule /\S+/ do |m|
266
+ name = m[0]
267
+
268
+ if self.class.builtins.values.any? { |b| b.include? name }
269
+ token 'Name.Builtin'
270
+ else
271
+ token 'Name'
272
+ end
273
+ end
267
274
  end
268
275
 
269
276
  state :stack_effect do
@@ -286,16 +293,6 @@ module Rouge
286
293
  rule /\s+/, 'Text'
287
294
  rule /\S+/, 'Name.Namespace'
288
295
  end
289
-
290
- postprocess 'Postprocess.Word' do |tok, val|
291
- tok = if self.class.builtins.values.any? { |b| b.include? val }
292
- 'Name.Builtin'
293
- else
294
- 'Name'
295
- end
296
-
297
- token tok, val
298
- end
299
296
  end
300
297
  end
301
298
  end
@@ -43,17 +43,19 @@ module Rouge
43
43
  push :php if start_inline?
44
44
  end
45
45
 
46
- keywords = %w(
47
- and E_PARSE old_function E_ERROR or as E_WARNING parent eval
48
- PHP_OS break exit case extends PHP_VERSION cfunction FALSE
49
- print for require continue foreach require_once declare return
50
- default static do switch die stdClass echo else TRUE elseif
51
- var empty if xor enddeclare include virtual endfor include_once
52
- while endforeach global __FILE__ endif list __LINE__ endswitch
53
- new __sleep endwhile not array __wakeup E_ALL NULL final
54
- php_user_filter interface implements public private protected
55
- abstract clone try catch throw this use namespace
56
- )
46
+ def self.keywords
47
+ @keywords ||= Set.new %w(
48
+ and E_PARSE old_function E_ERROR or as E_WARNING parent eval
49
+ PHP_OS break exit case extends PHP_VERSION cfunction FALSE
50
+ print for require continue foreach require_once declare return
51
+ default static do switch die stdClass echo else TRUE elseif
52
+ var empty if xor enddeclare include virtual endfor include_once
53
+ while endforeach global __FILE__ endif list __LINE__ endswitch
54
+ new __sleep endwhile not array __wakeup E_ALL NULL final
55
+ php_user_filter interface implements public private protected
56
+ abstract clone try catch throw this use namespace
57
+ )
58
+ end
57
59
 
58
60
  state :root do
59
61
  rule /<\?(php|=)?/, 'Comment.Preproc', :php
@@ -93,13 +95,22 @@ module Rouge
93
95
  group 'Keyword'; group 'Text'; group 'Name.Constant'
94
96
  end
95
97
 
96
- rule /(?:#{keywords.join('|')})\b/, 'Keyword'
97
98
  rule /(true|false|null)\b/, 'Keyword.Constant'
98
99
  rule /\$\{\$+[a-z_]\w*\}/i, 'Name.Variable'
99
100
  rule /\$+[a-z_]\w*/i, 'Name.Variable'
100
101
 
101
102
  # may be intercepted for builtin highlighting
102
- rule /[\\a-z_][\\\w]*/i, 'Name.Other'
103
+ rule /[\\a-z_][\\\w]*/i do |m|
104
+ name = m[0]
105
+
106
+ if self.class.keywords.include? name
107
+ token 'Keyword'
108
+ elsif self.builtins.include? name
109
+ token 'Name.Builtin'
110
+ else
111
+ token 'Name.Other'
112
+ end
113
+ end
103
114
 
104
115
  rule /(\d+\.\d*|\d*\.\d+)(e[+-]?\d+)?/i, 'Literal.Number.Float'
105
116
  rule /\d+e[+-]?\d+/i, 'Literal.Number.Float'
@@ -146,12 +157,6 @@ module Rouge
146
157
  rule /\}/, 'Literal.String.Interpol', :pop!
147
158
  mixin :php
148
159
  end
149
-
150
- postprocess 'Name.Other' do |tok, val|
151
- tok = 'Name.Builtin' if builtins.include? val
152
-
153
- token tok, val
154
- end
155
160
  end
156
161
  end
157
162
  end
@@ -5,7 +5,9 @@ module Rouge
5
5
 
6
6
  tag 'shell'
7
7
  aliases 'bash', 'zsh', 'ksh', 'sh'
8
- filenames '*.sh', '*.bash', '*.zsh', '*.ksh'
8
+ filenames '*.sh', '*.bash', '*.zsh', '*.ksh',
9
+ '.bashrc', '.zshrc', '.kshrc', '.profile'
10
+
9
11
  mimetypes 'application/x-sh', 'application/x-shellscript'
10
12
 
11
13
  def self.analyze_text(text)
@@ -41,26 +41,25 @@ module Rouge
41
41
  rule /\b(NONE|bold|italic|underline|dark|light)\b/, 'Name.Builtin'
42
42
 
43
43
  rule /[absg]:\w+\b/, 'Name.Variable'
44
- rule /\b\w+\b/, 'Postprocess.Name'
44
+ rule /\b\w+\b/ do |m|
45
+ name = m[0]
46
+ keywords = self.class.keywords
47
+
48
+ if mapping_contains?(keywords[:command], name)
49
+ token 'Keyword'
50
+ elsif mapping_contains?(keywords[:option], name)
51
+ token 'Name.Builtin'
52
+ elsif mapping_contains?(keywords[:auto], name)
53
+ token 'Name.Builtin'
54
+ else
55
+ token 'Text'
56
+ end
57
+ end
45
58
 
46
59
  # no errors in VimL!
47
60
  rule /./m, 'Text'
48
61
  end
49
62
 
50
- postprocess 'Postprocess.Name' do |tok, name|
51
- keywords = self.class.keywords
52
-
53
- if mapping_contains?(keywords[:command], name)
54
- token 'Keyword', name
55
- elsif mapping_contains?(keywords[:option], name)
56
- token 'Name.Builtin', name
57
- elsif mapping_contains?(keywords[:auto], name)
58
- token 'Name.Builtin', name
59
- else
60
- token 'Text', name
61
- end
62
- end
63
-
64
63
  def mapping_contains?(mapping, word)
65
64
  shortest, longest = find_likely_mapping(mapping, word)
66
65
 
@@ -45,10 +45,15 @@ module Rouge
45
45
  @rules ||= []
46
46
  end
47
47
 
48
- def load!
48
+ def load!(lexer_class)
49
49
  return self if @loaded
50
50
  @loaded = true
51
51
  StateDSL.new(rules).instance_eval(&@defn)
52
+
53
+ rules.map! do |rule|
54
+ rule.is_a?(String) ? lexer_class.get_state(rule) : rule
55
+ end
56
+
52
57
  self
53
58
  end
54
59
  end
@@ -79,20 +84,13 @@ module Rouge
79
84
  # {RegexLexer#token}, and {RegexLexer#delegate}. The first
80
85
  # argument can be used to access the match groups.
81
86
  def rule(re, tok=nil, next_state=nil, &callback)
82
- if block_given?
83
- next_state = tok
87
+ callback ||= case next_state
88
+ when :pop!
89
+ proc { token tok; pop! }
90
+ when Symbol
91
+ proc { token tok; push next_state }
84
92
  else
85
- tok = Token[tok]
86
-
87
- callback = proc do
88
- token tok
89
- case next_state
90
- when :pop!
91
- pop!
92
- when Symbol
93
- push next_state
94
- end # else pass
95
- end
93
+ proc { token tok }
96
94
  end
97
95
 
98
96
  rules << Rule.new(re, callback)
@@ -127,25 +125,6 @@ module Rouge
127
125
  start_procs << b
128
126
  end
129
127
 
130
- # Specify a filter to be applied as the lexer yields tokens.
131
- #
132
- # @param toktype
133
- # The token type to postprocess
134
- # @yield [tok, val]
135
- # The token and the matched value. The block will be evaluated in
136
- # the context of the lexer, and it must yield an equivalent
137
- # token/value pair, usually by calling #token.
138
- def self.postprocess(toktype, &b)
139
- postprocesses << [Token[toktype], b]
140
- end
141
-
142
- # where the postprocess blocks are stored.
143
- # @see postprocess
144
- def self.postprocesses
145
- @postprocesses ||= InheritableList.new(superclass.postprocesses)
146
- end
147
- @postprocesses = []
148
-
149
128
  # Define a new state for this lexer with the given name.
150
129
  # The block will be evaluated in the context of a {StateDSL}.
151
130
  def self.state(name, &b)
@@ -159,7 +138,7 @@ module Rouge
159
138
 
160
139
  state = states[name.to_s]
161
140
  raise "unknown state: #{name}" unless state
162
- state.load!
141
+ state.load!(self)
163
142
  end
164
143
 
165
144
  # @private
@@ -205,21 +184,6 @@ module Rouge
205
184
  #
206
185
  # @see #step #step (where (2.) is implemented)
207
186
  def stream_tokens(stream, &b)
208
- stream_without_postprocessing(stream) do |tok, val|
209
- _, processor = self.class.postprocesses.find { |t, _| t === tok }
210
-
211
- if processor
212
- with_output_stream(b) do
213
- instance_exec(tok, val, &processor)
214
- end
215
- else
216
- yield tok, val
217
- end
218
- end
219
- end
220
-
221
- # @private
222
- def stream_without_postprocessing(stream, &b)
223
187
  until stream.eos?
224
188
  debug { "lexer: #{self.class.tag}" }
225
189
  debug { "stack: #{stack.map(&:name).inspect}" }
@@ -240,35 +204,25 @@ module Rouge
240
204
  # @return false otherwise.
241
205
  def step(state, stream, &b)
242
206
  state.rules.each do |rule|
243
- return true if run_rule(rule, stream, &b)
244
- end
207
+ case rule
208
+ when State
209
+ debug { " entering mixin #{rule.name}" }
210
+ return true if step(rule, stream, &b)
211
+ debug { " exiting mixin #{rule.name}" }
212
+ when Rule
213
+ debug { " trying #{rule.inspect}" }
245
214
 
246
- false
247
- end
215
+ if run_rule(rule, stream)
216
+ debug { " got #{stream[0].inspect}" }
248
217
 
249
- # @private
250
- def run_rule(rule, stream, &b)
251
- case rule
252
- when String
253
- debug { " entering mixin #{rule}" }
254
- res = step(get_state(rule), stream, &b)
255
- debug { " exiting mixin #{rule}" }
256
- res
257
- when Rule
258
- debug { " trying #{rule.inspect}" }
259
- # XXX HACK XXX
260
- # StringScanner's implementation of ^ is b0rken.
261
- # see http://bugs.ruby-lang.org/issues/7092
262
- # TODO: this doesn't cover cases like /(a|^b)/, but it's
263
- # the most common, for now...
264
- return false if rule.beginning_of_line? && !stream.beginning_of_line?
265
-
266
- scan(stream, rule.re) do
267
- debug { " got #{stream[0].inspect}" }
268
-
269
- run_callback(stream, rule.callback, &b)
218
+ run_callback(stream, rule.callback, &b)
219
+
220
+ return true
221
+ end
270
222
  end
271
223
  end
224
+
225
+ false
272
226
  end
273
227
 
274
228
  # @private
@@ -286,28 +240,28 @@ module Rouge
286
240
  MAX_NULL_SCANS = 5
287
241
 
288
242
  # @private
289
- def scan(scanner, re, &b)
290
- @null_steps ||= 0
291
-
292
- if @null_steps >= MAX_NULL_SCANS
243
+ def run_rule(rule, scanner, &b)
244
+ # XXX HACK XXX
245
+ # StringScanner's implementation of ^ is b0rken.
246
+ # see http://bugs.ruby-lang.org/issues/7092
247
+ # TODO: this doesn't cover cases like /(a|^b)/, but it's
248
+ # the most common, for now...
249
+ return false if rule.beginning_of_line? && !scanner.beginning_of_line?
250
+
251
+ if (@null_steps ||= 0) >= MAX_NULL_SCANS
293
252
  debug { " too many scans without consuming the string!" }
294
253
  return false
295
254
  end
296
255
 
297
- scanner.scan(re)
256
+ scanner.scan(rule.re) or return false
298
257
 
299
- if scanner.matched?
300
- if scanner.matched_size == 0
301
- @null_steps += 1
302
- else
303
- @null_steps = 0
304
- end
305
-
306
- yield self
307
- return true
258
+ if scanner.matched_size.zero?
259
+ @null_steps += 1
260
+ else
261
+ @null_steps = 0
308
262
  end
309
263
 
310
- return false
264
+ true
311
265
  end
312
266
 
313
267
  # Yield a token.
@@ -358,7 +312,7 @@ module Rouge
358
312
  push_state = if state_name
359
313
  get_state(state_name)
360
314
  elsif block_given?
361
- State.new(b.inspect, &b).load!
315
+ State.new(b.inspect, &b).load!(self.class)
362
316
  else
363
317
  # use the top of the stack by default
364
318
  self.state
@@ -1,5 +1,5 @@
1
1
  module Rouge
2
2
  def self.version
3
- "0.2.6"
3
+ "0.2.7"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rouge
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.2.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-21 00:00:00.000000000 Z
12
+ date: 2012-10-22 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: thor