rouge 1.1.0 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. data/Gemfile +2 -0
  2. data/lib/rouge/cli.rb +15 -0
  3. data/lib/rouge/demos/matlab +6 -0
  4. data/lib/rouge/demos/ocaml +12 -0
  5. data/lib/rouge/demos/scala +3 -0
  6. data/lib/rouge/demos/sml +4 -0
  7. data/lib/rouge/demos/vb +4 -0
  8. data/lib/rouge/formatters/html.rb +13 -17
  9. data/lib/rouge/formatters/terminal256.rb +2 -2
  10. data/lib/rouge/lexer.rb +12 -14
  11. data/lib/rouge/lexers/coffeescript.rb +2 -4
  12. data/lib/rouge/lexers/common_lisp.rb +2 -4
  13. data/lib/rouge/lexers/erlang.rb +4 -13
  14. data/lib/rouge/lexers/factor.rb +25 -24
  15. data/lib/rouge/lexers/gherkin.rb +6 -6
  16. data/lib/rouge/lexers/groovy.rb +1 -2
  17. data/lib/rouge/lexers/haml.rb +2 -2
  18. data/lib/rouge/lexers/haskell.rb +18 -20
  19. data/lib/rouge/lexers/http.rb +13 -12
  20. data/lib/rouge/lexers/ini.rb +1 -2
  21. data/lib/rouge/lexers/java.rb +1 -2
  22. data/lib/rouge/lexers/lua.rb +1 -1
  23. data/lib/rouge/lexers/make.rb +4 -5
  24. data/lib/rouge/lexers/markdown.rb +4 -12
  25. data/lib/rouge/lexers/matlab.rb +71 -0
  26. data/lib/rouge/lexers/matlab/builtins.rb +10 -0
  27. data/lib/rouge/lexers/nginx.rb +3 -4
  28. data/lib/rouge/lexers/objective_c.rb +1 -1
  29. data/lib/rouge/lexers/ocaml.rb +109 -0
  30. data/lib/rouge/lexers/perl.rb +2 -5
  31. data/lib/rouge/lexers/php.rb +5 -5
  32. data/lib/rouge/lexers/php/builtins.rb +4 -2
  33. data/lib/rouge/lexers/puppet.rb +1 -2
  34. data/lib/rouge/lexers/python.rb +5 -11
  35. data/lib/rouge/lexers/racket.rb +1 -3
  36. data/lib/rouge/lexers/ruby.rb +8 -8
  37. data/lib/rouge/lexers/rust.rb +3 -3
  38. data/lib/rouge/lexers/sass/common.rb +4 -4
  39. data/lib/rouge/lexers/scala.rb +141 -0
  40. data/lib/rouge/lexers/scheme.rb +1 -3
  41. data/lib/rouge/lexers/sed.rb +4 -4
  42. data/lib/rouge/lexers/shell.rb +1 -2
  43. data/lib/rouge/lexers/smalltalk.rb +6 -7
  44. data/lib/rouge/lexers/sml.rb +236 -6
  45. data/lib/rouge/lexers/tex.rb +1 -4
  46. data/lib/rouge/lexers/toml.rb +1 -2
  47. data/lib/rouge/lexers/vb.rb +162 -0
  48. data/lib/rouge/lexers/viml.rb +1 -1
  49. data/lib/rouge/lexers/yaml.rb +10 -11
  50. data/lib/rouge/regex_lexer.rb +77 -92
  51. data/lib/rouge/token.rb +3 -3
  52. data/lib/rouge/util.rb +4 -4
  53. data/lib/rouge/version.rb +1 -1
  54. metadata +13 -4
@@ -30,10 +30,7 @@ module Rouge
30
30
  rule /\\(begin|end)\{.*?\}/, Name::Tag
31
31
 
32
32
  rule /(\\verb)\b(\S)(.*?)(\2)/ do |m|
33
- group Name::Builtin
34
- group Keyword::Pseudo
35
- group Str::Other
36
- group Keyword::Pseudo
33
+ groups Name::Builtin, Keyword::Pseudo, Str::Other, Keyword::Pseudo
37
34
  end
38
35
 
39
36
  rule command, Keyword, :command
@@ -30,8 +30,7 @@ module Rouge
30
30
  mixin :basic
31
31
 
32
32
  rule /(#{identifier})(\s*)(=)/ do
33
- group Name::Property; group Text
34
- group Punctuation
33
+ groups Name::Property, Text, Punctuation
35
34
  push :value
36
35
  end
37
36
 
@@ -0,0 +1,162 @@
1
+ module Rouge
2
+ module Lexers
3
+ class VisualBasic < RegexLexer
4
+ desc "Visual Basic"
5
+ tag 'vb'
6
+ aliases 'visualbasic'
7
+ filenames '*.vbs'
8
+ mimetypes 'text/x-visualbasic', 'application/x-visualbasic'
9
+
10
+ def self.keywords
11
+ @keywords ||= Set.new %w(
12
+ AddHandler Alias ByRef ByVal CBool CByte CChar CDate CDbl CDec
13
+ CInt CLng CObj CSByte CShort CSng CStr CType CUInt CULng CUShort
14
+ Call Case Catch Class Const Continue Declare Default Delegate
15
+ Dim DirectCast Do Each Else ElseIf End EndIf Enum Erase Error
16
+ Event Exit False Finally For Friend Function Get Global GoSub
17
+ GoTo Handles If Implements Imports Inherits Interface Let
18
+ Lib Loop Me Module MustInherit MustOverride MyBase MyClass
19
+ Namespace Narrowing New Next Not NotInheritable NotOverridable
20
+ Nothing Of On Operator Option Optional Overloads Overridable
21
+ Overrides ParamArray Partial Private Property Protected Public
22
+ RaiseEvent ReDim ReadOnly RemoveHandler Resume Return Select Set
23
+ Shadows Shared Single Static Step Stop Structure Sub SyncLock
24
+ Then Throw To True Try TryCast Using Wend When While Widening
25
+ With WithEvents WriteOnly
26
+ )
27
+ end
28
+
29
+ def self.keywords_type
30
+ @keywords_type ||= Set.new %w(
31
+ Boolean Byte Char Date Decimal Double Integer Long Object
32
+ SByte Short Single String Variant UInteger ULong UShort
33
+ )
34
+ end
35
+
36
+ def self.operator_words
37
+ @operator_words ||= Set.new %w(
38
+ AddressOf And AndAlso As GetType In Is IsNot Like Mod Or OrElse
39
+ TypeOf Xor
40
+ )
41
+ end
42
+
43
+ def self.builtins
44
+ @builtins ||= Set.new %w(
45
+ Console ConsoleColor
46
+ )
47
+ end
48
+
49
+ id = /[a-z_]\w*/i
50
+ upper_id = /[A-Z]\w*/
51
+
52
+ state :whitespace do
53
+ rule /\s+/, Text
54
+ rule /\n/, Text, :bol
55
+ rule /rem\b.*?$/i, Comment::Single
56
+ rule %r(%\{.*?%\})m, Comment::Multiline
57
+ rule /'.*$/, Comment::Single
58
+ end
59
+
60
+ state :bol do
61
+ rule /\s+/, Text
62
+ rule /<.*?>/, Name::Attribute
63
+ rule(//) { :pop! }
64
+ end
65
+
66
+ state :root do
67
+ mixin :whitespace
68
+ rule %r(
69
+ [#]If\b .*? \bThen
70
+ | [#]ElseIf\b .*? \bThen
71
+ | [#]End \s+ If
72
+ | [#]Const
73
+ | [#]ExternalSource .*? \n
74
+ | [#]End \s+ ExternalSource
75
+ | [#]Region .*? \n
76
+ | [#]End \s+ Region
77
+ | [#]ExternalChecksum
78
+ )x, Comment::Preproc
79
+ rule /[.]/, Punctuation, :dotted
80
+ rule /[(){}!#,:]/, Punctuation
81
+ rule /Option\s+(Strict|Explicit|Compare)\s+(On|Off|Binary|Text)/,
82
+ Keyword::Declaration
83
+ rule /End\b/, Keyword, :end
84
+ rule /(Dim|Const)\b/, Keyword, :dim
85
+ rule /(Function|Sub|Property)\b/, Keyword, :funcname
86
+ rule /(Class|Structure|Enum)\b/, Keyword, :classname
87
+ rule /(Module|Namespace|Imports)\b/, Keyword, :namespace
88
+
89
+ rule upper_id do |m|
90
+ match = m[0]
91
+ if self.class.keywords.include? match
92
+ token Keyword
93
+ elsif self.class.keywords_type.include? match
94
+ token Keyword::Type
95
+ elsif self.class.operator_words.include? match
96
+ token Operator::Word
97
+ elsif self.class.builtins.include? match
98
+ token Name::Builtin
99
+ else
100
+ token Name
101
+ end
102
+ end
103
+
104
+ rule(
105
+ %r(&=|[*]=|/=|\\=|\^=|\+=|-=|<<=|>>=|<<|>>|:=|<=|>=|<>|[-&*/\\^+=<>.]),
106
+ Operator
107
+ )
108
+
109
+ rule /"/, Str, :string
110
+ rule /#{id}[%&@!#\$]?/, Name
111
+ rule /#.*?#/, Literal::Date
112
+
113
+ rule /(\d+\.\d*|\d*\.\d+)(f[+-]?\d+)?/i, Num::Float
114
+ rule /\d+([SILDFR]|US|UI|UL)?/, Num::Integer
115
+ rule /&H[0-9a-f]+([SILDFR]|US|UI|UL)?/, Num::Integer
116
+ rule /&O[0-7]+([SILDFR]|US|UI|UL)?/, Num::Integer
117
+
118
+ rule /_\n/, Keyword
119
+ end
120
+
121
+ state :dotted do
122
+ mixin :whitespace
123
+ rule id, Name, :pop!
124
+ end
125
+
126
+ state :string do
127
+ rule /""/, Str::Escape
128
+ rule /"C?/, Str, :pop!
129
+ rule /[^"]+/, Str
130
+ end
131
+
132
+ state :dim do
133
+ mixin :whitespace
134
+ rule id, Name::Variable, :pop!
135
+ rule(//) { pop! }
136
+ end
137
+
138
+ state :funcname do
139
+ mixin :whitespace
140
+ rule id, Name::Function, :pop!
141
+ end
142
+
143
+ state :classname do
144
+ mixin :whitespace
145
+ rule id, Name::Class, :pop!
146
+ end
147
+
148
+ state :namespace do
149
+ mixin :whitespace
150
+ rule /#{id}([.]#{id})*/, Name::Namespace, :pop!
151
+ end
152
+
153
+ state :end do
154
+ mixin :whitespace
155
+ rule /(Function|Sub|Property|Class|Structure|Enum|Module|Namespace)\b/,
156
+ Keyword, :pop!
157
+ rule(//) { pop! }
158
+ end
159
+ end
160
+ end
161
+ end
162
+
@@ -16,7 +16,7 @@ module Rouge
16
16
 
17
17
  state :root do
18
18
  rule /^(\s*)(".*?)$/ do
19
- group Text; group Comment
19
+ groups Text, Comment
20
20
  end
21
21
 
22
22
  rule /^\s*\\/, Str::Escape
@@ -17,7 +17,7 @@ module Rouge
17
17
 
18
18
  # reset the indentation levels
19
19
  def reset_indent
20
- debug { " yaml: reset_indent" }
20
+ puts " yaml: reset_indent" if @debug
21
21
  @indent_stack = [0]
22
22
  @next_indent = 0
23
23
  @block_scalar_indent = nil
@@ -39,12 +39,12 @@ module Rouge
39
39
  # Save a possible indentation level
40
40
  def save_indent(match)
41
41
  @next_indent = match.size
42
- debug { " yaml: indent: #{self.indent}/#@next_indent" }
43
- debug { " yaml: popping indent stack - before: #@indent_stack" }
42
+ puts " yaml: indent: #{self.indent}/#@next_indent" if @debug
43
+ puts " yaml: popping indent stack - before: #@indent_stack" if @debug
44
44
  if dedent?(@next_indent)
45
45
  @indent_stack.pop while dedent?(@next_indent)
46
- debug { " yaml: popping indent stack - after: #@indent_stack" }
47
- debug { " yaml: indent: #{self.indent}/#@next_indent" }
46
+ puts " yaml: popping indent stack - after: #@indent_stack" if @debug
47
+ puts " yaml: indent: #{self.indent}/#@next_indent" if @debug
48
48
 
49
49
  # dedenting to a state not previously indented to is an error
50
50
  [match[0...self.indent], match[self.indent..-1]]
@@ -54,7 +54,7 @@ module Rouge
54
54
  end
55
55
 
56
56
  def continue_indent(match)
57
- debug { " yaml: continue_indent" }
57
+ puts " yaml: continue_indent" if @debug
58
58
  @next_indent += match.size
59
59
  end
60
60
 
@@ -342,8 +342,8 @@ module Rouge
342
342
 
343
343
  state :yaml_directive do
344
344
  rule /([ ]+)(\d+\.\d+)/ do
345
- group Text; group Num
346
- pop!; push :ignored_line
345
+ groups Text, Num
346
+ goto :ignored_line
347
347
  end
348
348
  end
349
349
 
@@ -352,9 +352,8 @@ module Rouge
352
352
  ([ ]+)(!|![\w-]*!) # prefix
353
353
  ([ ]+)(!|!?[\w;/?:@&=+$,.!~*'()\[\]%-]+) # tag handle
354
354
  )x do
355
- group Text; group Keyword::Type
356
- group Text; group Keyword::Type
357
- pop!; push :ignored_line
355
+ groups Text, Keyword::Type, Text, Keyword::Type
356
+ goto :ignored_line
358
357
  end
359
358
  end
360
359
  end
@@ -10,18 +10,10 @@ module Rouge
10
10
  class Rule
11
11
  attr_reader :callback
12
12
  attr_reader :re
13
+ attr_reader :beginning_of_line
13
14
  def initialize(re, callback)
14
15
  @re = re
15
16
  @callback = callback
16
- end
17
-
18
- # Does the regex start with a ^?
19
- #
20
- # Since Regexps are immuntable, this is cached to avoid
21
- # calling Regexp#source more than once.
22
- def beginning_of_line?
23
- return @beginning_of_line if instance_variable_defined?(:@beginning_of_line)
24
-
25
17
  @beginning_of_line = re.source[0] == ?^
26
18
  end
27
19
 
@@ -101,11 +93,25 @@ module Rouge
101
93
  def rule(re, tok=nil, next_state=nil, &callback)
102
94
  callback ||= case next_state
103
95
  when :pop!
104
- proc { token tok; pop! }
96
+ proc do |stream|
97
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
98
+ @output_stream.call(tok, stream[0])
99
+ puts " popping stack: #{1}" if @debug
100
+ @stack.pop or raise 'empty stack!'
101
+ end
105
102
  when Symbol
106
- proc { token tok; push next_state }
103
+ proc do |stream|
104
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
105
+ @output_stream.call(tok, stream[0])
106
+ state = @states[next_state] || self.class.get_state(next_state)
107
+ puts " pushing #{state.name}" if @debug
108
+ @stack.push(state)
109
+ end
107
110
  else
108
- proc { token tok }
111
+ proc do |stream|
112
+ puts " yielding #{tok.qualname}, #{stream[0].inspect}" if @debug
113
+ @output_stream.call(tok, stream[0])
114
+ end
109
115
  end
110
116
 
111
117
  rules << Rule.new(re, callback)
@@ -180,10 +186,8 @@ module Rouge
180
186
  def self.get_state(name)
181
187
  return name if name.is_a? State
182
188
 
183
- name = name.to_s
184
-
185
- states[name] ||= begin
186
- defn = state_definitions[name] or raise "unknown state: #{name.inspect}"
189
+ states[name.to_sym] ||= begin
190
+ defn = state_definitions[name.to_s] or raise "unknown state: #{name.inspect}"
187
191
  defn.to_state(self)
188
192
  end
189
193
  end
@@ -235,39 +239,65 @@ module Rouge
235
239
  stream = StringScanner.new(str)
236
240
 
237
241
  @current_stream = stream
242
+ @output_stream = b
243
+ @states = self.class.states
244
+ @null_steps = 0
238
245
 
239
246
  until stream.eos?
240
- debug { "lexer: #{self.class.tag}" }
241
- debug { "stack: #{stack.map(&:name).inspect}" }
242
- debug { "stream: #{stream.peek(20).inspect}" }
243
- success = step(get_state(state), stream, &b)
247
+ if @debug
248
+ puts "lexer: #{self.class.tag}"
249
+ puts "stack: #{stack.map(&:name).inspect}"
250
+ puts "stream: #{stream.peek(20).inspect}"
251
+ end
252
+
253
+ success = step(state, stream)
244
254
 
245
255
  if !success
246
- debug { " no match, yielding Error" }
256
+ puts " no match, yielding Error" if @debug
247
257
  b.call(Token::Tokens::Error, stream.getch)
248
258
  end
249
259
  end
250
260
  end
251
261
 
262
+ # The number of successive scans permitted without consuming
263
+ # the input stream. If this is exceeded, the match fails.
264
+ MAX_NULL_SCANS = 5
265
+
252
266
  # Runs one step of the lex. Rules in the current state are tried
253
267
  # until one matches, at which point its callback is called.
254
268
  #
255
269
  # @return true if a rule was tried successfully
256
270
  # @return false otherwise.
257
- def step(state, stream, &b)
271
+ def step(state, stream)
258
272
  state.rules.each do |rule|
259
- case rule
260
- when State
261
- debug { " entering mixin #{rule.name}" }
262
- return true if step(rule, stream, &b)
263
- debug { " exiting mixin #{rule.name}" }
264
- when Rule
265
- debug { " trying #{rule.inspect}" }
266
-
267
- if run_rule(rule, stream)
268
- debug { " got #{stream[0].inspect}" }
269
-
270
- run_callback(stream, rule.callback, &b)
273
+ if rule.is_a?(State)
274
+ puts " entering mixin #{rule.name}" if @debug
275
+ return true if step(rule, stream)
276
+ puts " exiting mixin #{rule.name}" if @debug
277
+ else
278
+ puts " trying #{rule.inspect}" if @debug
279
+
280
+ # XXX HACK XXX
281
+ # StringScanner's implementation of ^ is b0rken.
282
+ # see http://bugs.ruby-lang.org/issues/7092
283
+ # TODO: this doesn't cover cases like /(a|^b)/, but it's
284
+ # the most common, for now...
285
+ next if rule.beginning_of_line && !stream.beginning_of_line?
286
+
287
+ if size = stream.skip(rule.re)
288
+ puts " got #{stream[0].inspect}" if @debug
289
+
290
+ instance_exec(stream, &rule.callback)
291
+
292
+ if size.zero?
293
+ @null_steps += 1
294
+ if @null_steps > MAX_NULL_SCANS
295
+ puts " too many scans without consuming the string!" if @debug
296
+ return false
297
+ end
298
+ else
299
+ @null_steps = 0
300
+ end
271
301
 
272
302
  return true
273
303
  end
@@ -277,43 +307,6 @@ module Rouge
277
307
  false
278
308
  end
279
309
 
280
- # @private
281
- def run_callback(stream, callback, &output_stream)
282
- with_output_stream(output_stream) do
283
- @group_count = 0
284
- instance_exec(stream, &callback)
285
- end
286
- end
287
-
288
- # The number of successive scans permitted without consuming
289
- # the input stream. If this is exceeded, the match fails.
290
- MAX_NULL_SCANS = 5
291
-
292
- # @private
293
- def run_rule(rule, scanner)
294
- # XXX HACK XXX
295
- # StringScanner's implementation of ^ is b0rken.
296
- # see http://bugs.ruby-lang.org/issues/7092
297
- # TODO: this doesn't cover cases like /(a|^b)/, but it's
298
- # the most common, for now...
299
- return false if rule.beginning_of_line? && !scanner.beginning_of_line?
300
-
301
- if (@null_steps ||= 0) >= MAX_NULL_SCANS
302
- debug { " too many scans without consuming the string!" }
303
- return false
304
- end
305
-
306
- scanner.scan(rule.re) or return false
307
-
308
- if scanner.matched_size.zero?
309
- @null_steps += 1
310
- else
311
- @null_steps = 0
312
- end
313
-
314
- true
315
- end
316
-
317
310
  # Yield a token.
318
311
  #
319
312
  # @param tok
@@ -321,17 +314,20 @@ module Rouge
321
314
  # @param val
322
315
  # (optional) the string value to yield. If absent, this defaults
323
316
  # to the entire last match.
324
- def token(tok, val=:__absent__)
325
- val = @current_stream[0] if val == :__absent__
317
+ def token(tok, val=@current_stream[0])
326
318
  yield_token(tok, val)
327
319
  end
328
320
 
321
+ # @deprecated
322
+ #
329
323
  # Yield a token with the next matched group. Subsequent calls
330
324
  # to this method will yield subsequent groups.
331
325
  def group(tok)
332
- yield_token(tok, @current_stream[@group_count += 1])
326
+ raise "RegexLexer#group is deprecated: use #groups instead"
333
327
  end
334
328
 
329
+ # Yield tokens corresponding to the matched groups of the current
330
+ # match.
335
331
  def groups(*tokens)
336
332
  tokens.each_with_index do |tok, i|
337
333
  yield_token(tok, @current_stream[i+1])
@@ -348,11 +344,11 @@ module Rouge
348
344
  # @param [String] text
349
345
  # The text to delegate. This defaults to the last matched string.
350
346
  def delegate(lexer, text=nil)
351
- debug { " delegating to #{lexer.inspect}" }
347
+ puts " delegating to #{lexer.inspect}" if @debug
352
348
  text ||= @current_stream[0]
353
349
 
354
350
  lexer.lex(text, :continue => true) do |tok, val|
355
- debug { " delegated token: #{tok.inspect}, #{val.inspect}" }
351
+ puts " delegated token: #{tok.inspect}, #{val.inspect}" if @debug
356
352
  yield_token(tok, val)
357
353
  end
358
354
  end
@@ -374,7 +370,7 @@ module Rouge
374
370
  self.state
375
371
  end
376
372
 
377
- debug { " pushing #{push_state.name}" }
373
+ puts " pushing #{push_state.name}" if @debug
378
374
  stack.push(push_state)
379
375
  end
380
376
 
@@ -383,7 +379,7 @@ module Rouge
383
379
  def pop!(times=1)
384
380
  raise 'empty stack!' if stack.empty?
385
381
 
386
- debug { " popping stack: #{times}" }
382
+ puts " popping stack: #{times}" if @debug
387
383
 
388
384
  stack.pop(times)
389
385
 
@@ -393,12 +389,14 @@ module Rouge
393
389
  # replace the head of the stack with the given state
394
390
  def goto(state_name)
395
391
  raise 'empty stack!' if stack.empty?
392
+
393
+ puts " going to state #{state_name} " if @debug
396
394
  stack[-1] = get_state(state_name)
397
395
  end
398
396
 
399
397
  # reset the stack back to `[:root]`.
400
398
  def reset_stack
401
- debug { ' resetting stack' }
399
+ puts ' resetting stack' if @debug
402
400
  stack.clear
403
401
  stack.push get_state(:root)
404
402
  end
@@ -417,19 +415,6 @@ module Rouge
417
415
  end
418
416
 
419
417
  private
420
- def with_output_stream(output_stream, &b)
421
- old_output_stream = @output_stream
422
- @output_stream = Enumerator::Yielder.new do |tok, val|
423
- debug { " yielding #{tok.qualname}, #{val.inspect}" }
424
- output_stream.call(tok, val)
425
- end
426
-
427
- yield
428
-
429
- ensure
430
- @output_stream = old_output_stream
431
- end
432
-
433
418
  def yield_token(tok, val)
434
419
  return if val.nil? || val.empty?
435
420
  @output_stream.yield(tok, val)