rouge 1.1.0 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. data/Gemfile +2 -0
  2. data/lib/rouge/cli.rb +15 -0
  3. data/lib/rouge/demos/matlab +6 -0
  4. data/lib/rouge/demos/ocaml +12 -0
  5. data/lib/rouge/demos/scala +3 -0
  6. data/lib/rouge/demos/sml +4 -0
  7. data/lib/rouge/demos/vb +4 -0
  8. data/lib/rouge/formatters/html.rb +13 -17
  9. data/lib/rouge/formatters/terminal256.rb +2 -2
  10. data/lib/rouge/lexer.rb +12 -14
  11. data/lib/rouge/lexers/coffeescript.rb +2 -4
  12. data/lib/rouge/lexers/common_lisp.rb +2 -4
  13. data/lib/rouge/lexers/erlang.rb +4 -13
  14. data/lib/rouge/lexers/factor.rb +25 -24
  15. data/lib/rouge/lexers/gherkin.rb +6 -6
  16. data/lib/rouge/lexers/groovy.rb +1 -2
  17. data/lib/rouge/lexers/haml.rb +2 -2
  18. data/lib/rouge/lexers/haskell.rb +18 -20
  19. data/lib/rouge/lexers/http.rb +13 -12
  20. data/lib/rouge/lexers/ini.rb +1 -2
  21. data/lib/rouge/lexers/java.rb +1 -2
  22. data/lib/rouge/lexers/lua.rb +1 -1
  23. data/lib/rouge/lexers/make.rb +4 -5
  24. data/lib/rouge/lexers/markdown.rb +4 -12
  25. data/lib/rouge/lexers/matlab.rb +71 -0
  26. data/lib/rouge/lexers/matlab/builtins.rb +10 -0
  27. data/lib/rouge/lexers/nginx.rb +3 -4
  28. data/lib/rouge/lexers/objective_c.rb +1 -1
  29. data/lib/rouge/lexers/ocaml.rb +109 -0
  30. data/lib/rouge/lexers/perl.rb +2 -5
  31. data/lib/rouge/lexers/php.rb +5 -5
  32. data/lib/rouge/lexers/php/builtins.rb +4 -2
  33. data/lib/rouge/lexers/puppet.rb +1 -2
  34. data/lib/rouge/lexers/python.rb +5 -11
  35. data/lib/rouge/lexers/racket.rb +1 -3
  36. data/lib/rouge/lexers/ruby.rb +8 -8
  37. data/lib/rouge/lexers/rust.rb +3 -3
  38. data/lib/rouge/lexers/sass/common.rb +4 -4
  39. data/lib/rouge/lexers/scala.rb +141 -0
  40. data/lib/rouge/lexers/scheme.rb +1 -3
  41. data/lib/rouge/lexers/sed.rb +4 -4
  42. data/lib/rouge/lexers/shell.rb +1 -2
  43. data/lib/rouge/lexers/smalltalk.rb +6 -7
  44. data/lib/rouge/lexers/sml.rb +236 -6
  45. data/lib/rouge/lexers/tex.rb +1 -4
  46. data/lib/rouge/lexers/toml.rb +1 -2
  47. data/lib/rouge/lexers/vb.rb +162 -0
  48. data/lib/rouge/lexers/viml.rb +1 -1
  49. data/lib/rouge/lexers/yaml.rb +10 -11
  50. data/lib/rouge/regex_lexer.rb +77 -92
  51. data/lib/rouge/token.rb +3 -3
  52. data/lib/rouge/util.rb +4 -4
  53. data/lib/rouge/version.rb +1 -1
  54. metadata +13 -4
@@ -30,10 +30,7 @@ module Rouge
30
30
  rule /\\(begin|end)\{.*?\}/, Name::Tag
31
31
 
32
32
  rule /(\\verb)\b(\S)(.*?)(\2)/ do |m|
33
- group Name::Builtin
34
- group Keyword::Pseudo
35
- group Str::Other
36
- group Keyword::Pseudo
33
+ groups Name::Builtin, Keyword::Pseudo, Str::Other, Keyword::Pseudo
37
34
  end
38
35
 
39
36
  rule command, Keyword, :command
@@ -30,8 +30,7 @@ module Rouge
30
30
  mixin :basic
31
31
 
32
32
  rule /(#{identifier})(\s*)(=)/ do
33
- group Name::Property; group Text
34
- group Punctuation
33
+ groups Name::Property, Text, Punctuation
35
34
  push :value
36
35
  end
37
36
 
@@ -0,0 +1,162 @@
1
+ module Rouge
2
+ module Lexers
3
+ class VisualBasic < RegexLexer
4
+ desc "Visual Basic"
5
+ tag 'vb'
6
+ aliases 'visualbasic'
7
+ filenames '*.vbs'
8
+ mimetypes 'text/x-visualbasic', 'application/x-visualbasic'
9
+
10
+ def self.keywords
11
+ @keywords ||= Set.new %w(
12
+ AddHandler Alias ByRef ByVal CBool CByte CChar CDate CDbl CDec
13
+ CInt CLng CObj CSByte CShort CSng CStr CType CUInt CULng CUShort
14
+ Call Case Catch Class Const Continue Declare Default Delegate
15
+ Dim DirectCast Do Each Else ElseIf End EndIf Enum Erase Error
16
+ Event Exit False Finally For Friend Function Get Global GoSub
17
+ GoTo Handles If Implements Imports Inherits Interface Let
18
+ Lib Loop Me Module MustInherit MustOverride MyBase MyClass
19
+ Namespace Narrowing New Next Not NotInheritable NotOverridable
20
+ Nothing Of On Operator Option Optional Overloads Overridable
21
+ Overrides ParamArray Partial Private Property Protected Public
22
+ RaiseEvent ReDim ReadOnly RemoveHandler Resume Return Select Set
23
+ Shadows Shared Single Static Step Stop Structure Sub SyncLock
24
+ Then Throw To True Try TryCast Using Wend When While Widening
25
+ With WithEvents WriteOnly
26
+ )
27
+ end
28
+
29
+ def self.keywords_type
30
+ @keywords_type ||= Set.new %w(
31
+ Boolean Byte Char Date Decimal Double Integer Long Object
32
+ SByte Short Single String Variant UInteger ULong UShort
33
+ )
34
+ end
35
+
36
+ def self.operator_words
37
+ @operator_words ||= Set.new %w(
38
+ AddressOf And AndAlso As GetType In Is IsNot Like Mod Or OrElse
39
+ TypeOf Xor
40
+ )
41
+ end
42
+
43
+ def self.builtins
44
+ @builtins ||= Set.new %w(
45
+ Console ConsoleColor
46
+ )
47
+ end
48
+
49
+ id = /[a-z_]\w*/i
50
+ upper_id = /[A-Z]\w*/
51
+
52
+ state :whitespace do
53
+ rule /\s+/, Text
54
+ rule /\n/, Text, :bol
55
+ rule /rem\b.*?$/i, Comment::Single
56
+ rule %r(%\{.*?%\})m, Comment::Multiline
57
+ rule /'.*$/, Comment::Single
58
+ end
59
+
60
+ state :bol do
61
+ rule /\s+/, Text
62
+ rule /<.*?>/, Name::Attribute
63
+ rule(//) { :pop! }
64
+ end
65
+
66
+ state :root do
67
+ mixin :whitespace
68
+ rule %r(
69
+ [#]If\b .*? \bThen
70
+ | [#]ElseIf\b .*? \bThen
71
+ | [#]End \s+ If
72
+ | [#]Const
73
+ | [#]ExternalSource .*? \n
74
+ | [#]End \s+ ExternalSource
75
+ | [#]Region .*? \n
76
+ | [#]End \s+ Region
77
+ | [#]ExternalChecksum
78
+ )x, Comment::Preproc
79
+ rule /[.]/, Punctuation, :dotted
80
+ rule /[(){}!#,:]/, Punctuation
81
+ rule /Option\s+(Strict|Explicit|Compare)\s+(On|Off|Binary|Text)/,
82
+ Keyword::Declaration
83
+ rule /End\b/, Keyword, :end
84
+ rule /(Dim|Const)\b/, Keyword, :dim
85
+ rule /(Function|Sub|Property)\b/, Keyword, :funcname
86
+ rule /(Class|Structure|Enum)\b/, Keyword, :classname
87
+ rule /(Module|Namespace|Imports)\b/, Keyword, :namespace
88
+
89
+ rule upper_id do |m|
90
+ match = m[0]
91
+ if self.class.keywords.include? match
92
+ token Keyword
93
+ elsif self.class.keywords_type.include? match
94
+ token Keyword::Type
95
+ elsif self.class.operator_words.include? match
96
+ token Operator::Word
97
+ elsif self.class.builtins.include? match
98
+ token Name::Builtin
99
+ else
100
+ token Name
101
+ end
102
+ end
103
+
104
+ rule(
105
+ %r(&=|[*]=|/=|\\=|\^=|\+=|-=|<<=|>>=|<<|>>|:=|<=|>=|<>|[-&*/\\^+=<>.]),
106
+ Operator
107
+ )
108
+
109
+ rule /"/, Str, :string
110
+ rule /#{id}[%&@!#\$]?/, Name
111
+ rule /#.*?#/, Literal::Date
112
+
113
+ rule /(\d+\.\d*|\d*\.\d+)(f[+-]?\d+)?/i, Num::Float
114
+ rule /\d+([SILDFR]|US|UI|UL)?/, Num::Integer
115
+ rule /&H[0-9a-f]+([SILDFR]|US|UI|UL)?/, Num::Integer
116
+ rule /&O[0-7]+([SILDFR]|US|UI|UL)?/, Num::Integer
117
+
118
+ rule /_\n/, Keyword
119
+ end
120
+
121
+ state :dotted do
122
+ mixin :whitespace
123
+ rule id, Name, :pop!
124
+ end
125
+
126
+ state :string do
127
+ rule /""/, Str::Escape
128
+ rule /"C?/, Str, :pop!
129
+ rule /[^"]+/, Str
130
+ end
131
+
132
+ state :dim do
133
+ mixin :whitespace
134
+ rule id, Name::Variable, :pop!
135
+ rule(//) { pop! }
136
+ end
137
+
138
+ state :funcname do
139
+ mixin :whitespace
140
+ rule id, Name::Function, :pop!
141
+ end
142
+
143
+ state :classname do
144
+ mixin :whitespace
145
+ rule id, Name::Class, :pop!
146
+ end
147
+
148
+ state :namespace do
149
+ mixin :whitespace
150
+ rule /#{id}([.]#{id})*/, Name::Namespace, :pop!
151
+ end
152
+
153
+ state :end do
154
+ mixin :whitespace
155
+ rule /(Function|Sub|Property|Class|Structure|Enum|Module|Namespace)\b/,
156
+ Keyword, :pop!
157
+ rule(//) { pop! }
158
+ end
159
+ end
160
+ end
161
+ end
162
+
@@ -16,7 +16,7 @@ module Rouge
16
16
 
17
17
  state :root do
18
18
  rule /^(\s*)(".*?)$/ do
19
- group Text; group Comment
19
+ groups Text, Comment
20
20
  end
21
21
 
22
22
  rule /^\s*\\/, Str::Escape
@@ -17,7 +17,7 @@ module Rouge
17
17
 
18
18
  # reset the indentation levels
19
19
  def reset_indent
20
- debug { " yaml: reset_indent" }
20
+ puts " yaml: reset_indent" if @debug
21
21
  @indent_stack = [0]
22
22
  @next_indent = 0
23
23
  @block_scalar_indent = nil
@@ -39,12 +39,12 @@ module Rouge
39
39
  # Save a possible indentation level
40
40
  def save_indent(match)
41
41
  @next_indent = match.size
42
- debug { " yaml: indent: #{self.indent}/#@next_indent" }
43
- debug { " yaml: popping indent stack - before: #@indent_stack" }
42
+ puts " yaml: indent: #{self.indent}/#@next_indent" if @debug
43
+ puts " yaml: popping indent stack - before: #@indent_stack" if @debug
44
44
  if dedent?(@next_indent)
45
45
  @indent_stack.pop while dedent?(@next_indent)
46
- debug { " yaml: popping indent stack - after: #@indent_stack" }
47
- debug { " yaml: indent: #{self.indent}/#@next_indent" }
46
+ puts " yaml: popping indent stack - after: #@indent_stack" if @debug
47
+ puts " yaml: indent: #{self.indent}/#@next_indent" if @debug
48
48
 
49
49
  # dedenting to a state not previously indented to is an error
50
50
  [match[0...self.indent], match[self.indent..-1]]
@@ -54,7 +54,7 @@ module Rouge
54
54
  end
55
55
 
56
56
  def continue_indent(match)
57
- debug { " yaml: continue_indent" }
57
+ puts " yaml: continue_indent" if @debug
58
58
  @next_indent += match.size
59
59
  end
60
60
 
@@ -342,8 +342,8 @@ module Rouge
342
342
 
343
343
  state :yaml_directive do
344
344
  rule /([ ]+)(\d+\.\d+)/ do
345
- group Text; group Num
346
- pop!; push :ignored_line
345
+ groups Text, Num
346
+ goto :ignored_line
347
347
  end
348
348
  end
349
349
 
@@ -352,9 +352,8 @@ module Rouge
352
352
  ([ ]+)(!|![\w-]*!) # prefix
353
353
  ([ ]+)(!|!?[\w;/?:@&=+$,.!~*'()\[\]%-]+) # tag handle
354
354
  )x do
355
- group Text; group Keyword::Type
356
- group Text; group Keyword::Type
357
- pop!; push :ignored_line
355
+ groups Text, Keyword::Type, Text, Keyword::Type
356
+ goto :ignored_line
358
357
  end
359
358
  end
360
359
  end
@@ -10,18 +10,10 @@ module Rouge
10
10
  class Rule
11
11
  attr_reader :callback
12
12
  attr_reader :re
13
+ attr_reader :beginning_of_line
13
14
  def initialize(re, callback)
14
15
  @re = re
15
16
  @callback = callback
16
- end
17
-
18
- # Does the regex start with a ^?
19
- #
20
- # Since Regexps are immuntable, this is cached to avoid
21
- # calling Regexp#source more than once.
22
- def beginning_of_line?
23
- return @beginning_of_line if instance_variable_defined?(:@beginning_of_line)
24
-
25
17
  @beginning_of_line = re.source[0] == ?^
26
18
  end
27
19
 
@@ -101,11 +93,25 @@ module Rouge
101
93
  def rule(re, tok=nil, next_state=nil, &callback)
102
94
  callback ||= case next_state
103
95
  when :pop!
104
- proc { token tok; pop! }
96
+ proc do |stream|
97
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
98
+ @output_stream.call(tok, stream[0])
99
+ puts " popping stack: #{1}" if @debug
100
+ @stack.pop or raise 'empty stack!'
101
+ end
105
102
  when Symbol
106
- proc { token tok; push next_state }
103
+ proc do |stream|
104
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
105
+ @output_stream.call(tok, stream[0])
106
+ state = @states[next_state] || self.class.get_state(next_state)
107
+ puts " pushing #{state.name}" if @debug
108
+ @stack.push(state)
109
+ end
107
110
  else
108
- proc { token tok }
111
+ proc do |stream|
112
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
113
+ @output_stream.call(tok, stream[0])
114
+ end
109
115
  end
110
116
 
111
117
  rules << Rule.new(re, callback)
@@ -180,10 +186,8 @@ module Rouge
180
186
  def self.get_state(name)
181
187
  return name if name.is_a? State
182
188
 
183
- name = name.to_s
184
-
185
- states[name] ||= begin
186
- defn = state_definitions[name] or raise "unknown state: #{name.inspect}"
189
+ states[name.to_sym] ||= begin
190
+ defn = state_definitions[name.to_s] or raise "unknown state: #{name.inspect}"
187
191
  defn.to_state(self)
188
192
  end
189
193
  end
@@ -235,39 +239,65 @@ module Rouge
235
239
  stream = StringScanner.new(str)
236
240
 
237
241
  @current_stream = stream
242
+ @output_stream = b
243
+ @states = self.class.states
244
+ @null_steps = 0
238
245
 
239
246
  until stream.eos?
240
- debug { "lexer: #{self.class.tag}" }
241
- debug { "stack: #{stack.map(&:name).inspect}" }
242
- debug { "stream: #{stream.peek(20).inspect}" }
243
- success = step(get_state(state), stream, &b)
247
+ if @debug
248
+ puts "lexer: #{self.class.tag}"
249
+ puts "stack: #{stack.map(&:name).inspect}"
250
+ puts "stream: #{stream.peek(20).inspect}"
251
+ end
252
+
253
+ success = step(state, stream)
244
254
 
245
255
  if !success
246
- debug { " no match, yielding Error" }
256
+ puts " no match, yielding Error" if @debug
247
257
  b.call(Token::Tokens::Error, stream.getch)
248
258
  end
249
259
  end
250
260
  end
251
261
 
262
+ # The number of successive scans permitted without consuming
263
+ # the input stream. If this is exceeded, the match fails.
264
+ MAX_NULL_SCANS = 5
265
+
252
266
  # Runs one step of the lex. Rules in the current state are tried
253
267
  # until one matches, at which point its callback is called.
254
268
  #
255
269
  # @return true if a rule was tried successfully
256
270
  # @return false otherwise.
257
- def step(state, stream, &b)
271
+ def step(state, stream)
258
272
  state.rules.each do |rule|
259
- case rule
260
- when State
261
- debug { " entering mixin #{rule.name}" }
262
- return true if step(rule, stream, &b)
263
- debug { " exiting mixin #{rule.name}" }
264
- when Rule
265
- debug { " trying #{rule.inspect}" }
266
-
267
- if run_rule(rule, stream)
268
- debug { " got #{stream[0].inspect}" }
269
-
270
- run_callback(stream, rule.callback, &b)
273
+ if rule.is_a?(State)
274
+ puts " entering mixin #{rule.name}" if @debug
275
+ return true if step(rule, stream)
276
+ puts " exiting mixin #{rule.name}" if @debug
277
+ else
278
+ puts " trying #{rule.inspect}" if @debug
279
+
280
+ # XXX HACK XXX
281
+ # StringScanner's implementation of ^ is b0rken.
282
+ # see http://bugs.ruby-lang.org/issues/7092
283
+ # TODO: this doesn't cover cases like /(a|^b)/, but it's
284
+ # the most common, for now...
285
+ next if rule.beginning_of_line && !stream.beginning_of_line?
286
+
287
+ if size = stream.skip(rule.re)
288
+ puts " got #{stream[0].inspect}" if @debug
289
+
290
+ instance_exec(stream, &rule.callback)
291
+
292
+ if size.zero?
293
+ @null_steps += 1
294
+ if @null_steps > MAX_NULL_SCANS
295
+ puts " too many scans without consuming the string!" if @debug
296
+ return false
297
+ end
298
+ else
299
+ @null_steps = 0
300
+ end
271
301
 
272
302
  return true
273
303
  end
@@ -277,43 +307,6 @@ module Rouge
277
307
  false
278
308
  end
279
309
 
280
- # @private
281
- def run_callback(stream, callback, &output_stream)
282
- with_output_stream(output_stream) do
283
- @group_count = 0
284
- instance_exec(stream, &callback)
285
- end
286
- end
287
-
288
- # The number of successive scans permitted without consuming
289
- # the input stream. If this is exceeded, the match fails.
290
- MAX_NULL_SCANS = 5
291
-
292
- # @private
293
- def run_rule(rule, scanner)
294
- # XXX HACK XXX
295
- # StringScanner's implementation of ^ is b0rken.
296
- # see http://bugs.ruby-lang.org/issues/7092
297
- # TODO: this doesn't cover cases like /(a|^b)/, but it's
298
- # the most common, for now...
299
- return false if rule.beginning_of_line? && !scanner.beginning_of_line?
300
-
301
- if (@null_steps ||= 0) >= MAX_NULL_SCANS
302
- debug { " too many scans without consuming the string!" }
303
- return false
304
- end
305
-
306
- scanner.scan(rule.re) or return false
307
-
308
- if scanner.matched_size.zero?
309
- @null_steps += 1
310
- else
311
- @null_steps = 0
312
- end
313
-
314
- true
315
- end
316
-
317
310
  # Yield a token.
318
311
  #
319
312
  # @param tok
@@ -321,17 +314,20 @@ module Rouge
321
314
  # @param val
322
315
  # (optional) the string value to yield. If absent, this defaults
323
316
  # to the entire last match.
324
- def token(tok, val=:__absent__)
325
- val = @current_stream[0] if val == :__absent__
317
+ def token(tok, val=@current_stream[0])
326
318
  yield_token(tok, val)
327
319
  end
328
320
 
321
+ # @deprecated
322
+ #
329
323
  # Yield a token with the next matched group. Subsequent calls
330
324
  # to this method will yield subsequent groups.
331
325
  def group(tok)
332
- yield_token(tok, @current_stream[@group_count += 1])
326
+ raise "RegexLexer#group is deprecated: use #groups instead"
333
327
  end
334
328
 
329
+ # Yield tokens corresponding to the matched groups of the current
330
+ # match.
335
331
  def groups(*tokens)
336
332
  tokens.each_with_index do |tok, i|
337
333
  yield_token(tok, @current_stream[i+1])
@@ -348,11 +344,11 @@ module Rouge
348
344
  # @param [String] text
349
345
  # The text to delegate. This defaults to the last matched string.
350
346
  def delegate(lexer, text=nil)
351
- debug { " delegating to #{lexer.inspect}" }
347
+ puts " delegating to #{lexer.inspect}" if @debug
352
348
  text ||= @current_stream[0]
353
349
 
354
350
  lexer.lex(text, :continue => true) do |tok, val|
355
- debug { " delegated token: #{tok.inspect}, #{val.inspect}" }
351
+ puts " delegated token: #{tok.inspect}, #{val.inspect}" if @debug
356
352
  yield_token(tok, val)
357
353
  end
358
354
  end
@@ -374,7 +370,7 @@ module Rouge
374
370
  self.state
375
371
  end
376
372
 
377
- debug { " pushing #{push_state.name}" }
373
+ puts " pushing #{push_state.name}" if @debug
378
374
  stack.push(push_state)
379
375
  end
380
376
 
@@ -383,7 +379,7 @@ module Rouge
383
379
  def pop!(times=1)
384
380
  raise 'empty stack!' if stack.empty?
385
381
 
386
- debug { " popping stack: #{times}" }
382
+ puts " popping stack: #{times}" if @debug
387
383
 
388
384
  stack.pop(times)
389
385
 
@@ -393,12 +389,14 @@ module Rouge
393
389
  # replace the head of the stack with the given state
394
390
  def goto(state_name)
395
391
  raise 'empty stack!' if stack.empty?
392
+
393
+ puts " going to state #{state_name} " if @debug
396
394
  stack[-1] = get_state(state_name)
397
395
  end
398
396
 
399
397
  # reset the stack back to `[:root]`.
400
398
  def reset_stack
401
- debug { ' resetting stack' }
399
+ puts ' resetting stack' if @debug
402
400
  stack.clear
403
401
  stack.push get_state(:root)
404
402
  end
@@ -417,19 +415,6 @@ module Rouge
417
415
  end
418
416
 
419
417
  private
420
- def with_output_stream(output_stream, &b)
421
- old_output_stream = @output_stream
422
- @output_stream = Enumerator::Yielder.new do |tok, val|
423
- debug { " yielding #{tok.qualname}, #{val.inspect}" }
424
- output_stream.call(tok, val)
425
- end
426
-
427
- yield
428
-
429
- ensure
430
- @output_stream = old_output_stream
431
- end
432
-
433
418
  def yield_token(tok, val)
434
419
  return if val.nil? || val.empty?
435
420
  @output_stream.yield(tok, val)