rouge 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +3 -0
- data/lib/rouge.rb +9 -0
- data/lib/rouge/lexer.rb +83 -275
- data/lib/rouge/lexers/common_lisp.rb +19 -24
- data/lib/rouge/lexers/erb.rb +3 -9
- data/lib/rouge/lexers/factor.rb +300 -0
- data/lib/rouge/lexers/haml.rb +238 -0
- data/lib/rouge/lexers/html.rb +1 -0
- data/lib/rouge/lexers/markdown.rb +173 -0
- data/lib/rouge/lexers/php.rb +11 -13
- data/lib/rouge/lexers/ruby.rb +2 -2
- data/lib/rouge/lexers/yaml.rb +358 -0
- data/lib/rouge/regex_lexer.rb +300 -0
- data/lib/rouge/template_lexer.rb +14 -0
- data/lib/rouge/theme.rb +0 -34
- data/lib/rouge/themes/thankful_eyes.rb +4 -3
- data/lib/rouge/util.rb +63 -0
- data/lib/rouge/version.rb +1 -1
- metadata +10 -2
data/Gemfile
CHANGED
data/lib/rouge.rb
CHANGED
@@ -15,13 +15,20 @@ end
|
|
15
15
|
load_dir = Pathname.new(__FILE__).dirname
|
16
16
|
load load_dir.join('rouge/version.rb')
|
17
17
|
|
18
|
+
load load_dir.join('rouge/util.rb')
|
19
|
+
|
18
20
|
load load_dir.join('rouge/text_analyzer.rb')
|
19
21
|
load load_dir.join('rouge/token.rb')
|
22
|
+
|
20
23
|
load load_dir.join('rouge/lexer.rb')
|
24
|
+
load load_dir.join('rouge/regex_lexer.rb')
|
25
|
+
load load_dir.join('rouge/template_lexer.rb')
|
21
26
|
|
22
27
|
load load_dir.join('rouge/lexers/text.rb')
|
23
28
|
load load_dir.join('rouge/lexers/diff.rb')
|
24
29
|
load load_dir.join('rouge/lexers/tex.rb')
|
30
|
+
load load_dir.join('rouge/lexers/markdown.rb')
|
31
|
+
load load_dir.join('rouge/lexers/yaml.rb')
|
25
32
|
|
26
33
|
load load_dir.join('rouge/lexers/make.rb')
|
27
34
|
load load_dir.join('rouge/lexers/shell.rb')
|
@@ -29,6 +36,7 @@ load load_dir.join('rouge/lexers/shell.rb')
|
|
29
36
|
load load_dir.join('rouge/lexers/javascript.rb')
|
30
37
|
load load_dir.join('rouge/lexers/css.rb')
|
31
38
|
load load_dir.join('rouge/lexers/html.rb')
|
39
|
+
load load_dir.join('rouge/lexers/haml.rb')
|
32
40
|
load load_dir.join('rouge/lexers/xml.rb')
|
33
41
|
load load_dir.join('rouge/lexers/php.rb')
|
34
42
|
|
@@ -38,6 +46,7 @@ load load_dir.join('rouge/lexers/tcl.rb')
|
|
38
46
|
load load_dir.join('rouge/lexers/python.rb')
|
39
47
|
load load_dir.join('rouge/lexers/ruby.rb')
|
40
48
|
load load_dir.join('rouge/lexers/perl.rb')
|
49
|
+
load load_dir.join('rouge/lexers/factor.rb')
|
41
50
|
|
42
51
|
load load_dir.join('rouge/lexers/haskell.rb')
|
43
52
|
load load_dir.join('rouge/lexers/scheme.rb')
|
data/lib/rouge/lexer.rb
CHANGED
@@ -4,18 +4,37 @@ require 'strscan'
|
|
4
4
|
module Rouge
|
5
5
|
class Lexer
|
6
6
|
class << self
|
7
|
+
# Lexes `stream` with the given options. The lex is delegated to a
|
8
|
+
# new instance.
|
9
|
+
#
|
10
|
+
# @see #lex
|
7
11
|
def lex(stream, opts={}, &b)
|
8
12
|
new(opts).lex(stream, &b)
|
9
13
|
end
|
10
14
|
|
11
|
-
def default_options
|
15
|
+
def default_options(o={})
|
12
16
|
@default_options ||= {}
|
17
|
+
@default_options.merge!(o)
|
18
|
+
@default_options
|
13
19
|
end
|
14
20
|
|
21
|
+
# Given a string, return the correct lexer class.
|
15
22
|
def find(name)
|
16
23
|
registry[name.to_s]
|
17
24
|
end
|
18
25
|
|
26
|
+
# Guess which lexer to use based on a hash of info.
|
27
|
+
#
|
28
|
+
# @option info :mimetype
|
29
|
+
# A mimetype to guess by
|
30
|
+
# @option info :filename
|
31
|
+
# A filename to guess by
|
32
|
+
# @option info :source
|
33
|
+
# The source itself, which, if guessing by mimetype or filename
|
34
|
+
# fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
|
35
|
+
# other hints.
|
36
|
+
#
|
37
|
+
# @see Lexer.analyze_text
|
19
38
|
def guess(info={})
|
20
39
|
by_mimetype = guess_by_mimetype(info[:mimetype]) if info[:mimetype]
|
21
40
|
return by_mimetype if by_mimetype
|
@@ -67,6 +86,16 @@ module Rouge
|
|
67
86
|
registry[name.to_s] = lexer
|
68
87
|
end
|
69
88
|
|
89
|
+
# Used to specify or get the canonical name of this lexer class.
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# class MyLexer < Lexer
|
93
|
+
# tag 'foo'
|
94
|
+
# end
|
95
|
+
#
|
96
|
+
# MyLexer.tag # => 'foo'
|
97
|
+
#
|
98
|
+
# Lexer.find('foo') # => MyLexer
|
70
99
|
def tag(t=nil)
|
71
100
|
return @tag if t.nil?
|
72
101
|
|
@@ -74,14 +103,35 @@ module Rouge
|
|
74
103
|
aliases @tag
|
75
104
|
end
|
76
105
|
|
106
|
+
# Used to specify alternate names this lexer class may be found by.
|
107
|
+
#
|
108
|
+
# @example
|
109
|
+
# class Erb < Lexer
|
110
|
+
# tag 'erb'
|
111
|
+
# aliases 'eruby', 'rhtml'
|
112
|
+
# end
|
113
|
+
#
|
114
|
+
# Lexer.find('eruby') # => Erb
|
77
115
|
def aliases(*args)
|
78
116
|
args.each { |arg| Lexer.register(arg, self) }
|
79
117
|
end
|
80
118
|
|
119
|
+
# Specify a list of filename globs associated with this lexer
|
120
|
+
#
|
121
|
+
# @example
|
122
|
+
# class Ruby < Lexer
|
123
|
+
# filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile'
|
124
|
+
# end
|
81
125
|
def filenames(*fnames)
|
82
126
|
(@filenames ||= []).concat(fnames)
|
83
127
|
end
|
84
128
|
|
129
|
+
# Specify a list of mimetypes associated with this lexer.
|
130
|
+
#
|
131
|
+
# @example
|
132
|
+
# class Html < Lexer
|
133
|
+
# mimetypes 'text/html', 'application/xhtml+xml'
|
134
|
+
# end
|
85
135
|
def mimetypes(*mts)
|
86
136
|
(@mimetypes ||= []).concat(mts)
|
87
137
|
end
|
@@ -94,7 +144,7 @@ module Rouge
|
|
94
144
|
|
95
145
|
# -*- instance methods -*- #
|
96
146
|
|
97
|
-
def initialize(opts={}
|
147
|
+
def initialize(opts={})
|
98
148
|
options(opts)
|
99
149
|
end
|
100
150
|
|
@@ -112,18 +162,28 @@ module Rouge
|
|
112
162
|
end
|
113
163
|
end
|
114
164
|
|
165
|
+
# Leave a debug message if the `:debug` option is set. The message
|
166
|
+
# is given as a block because some debug messages contain calculated
|
167
|
+
# information that is unnecessary for lexing in the real world.
|
168
|
+
#
|
169
|
+
# @example
|
170
|
+
# debug { "hello, world!" }
|
115
171
|
def debug(&b)
|
116
172
|
puts(b.call) if option :debug
|
117
173
|
end
|
118
174
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
175
|
+
# @abstract
|
176
|
+
#
|
177
|
+
# Called after each lex is finished. The default implementation
|
178
|
+
# is a noop.
|
123
179
|
def reset!
|
124
|
-
# noop, called after each lex is finished
|
125
180
|
end
|
126
181
|
|
182
|
+
# Given a string, yield [token, chunk] pairs. If no block is given,
|
183
|
+
# an enumerator is returned.
|
184
|
+
#
|
185
|
+
# @option opts :continue
|
186
|
+
# Continue the lex from the previous state (i.e. don't call #reset!)
|
127
187
|
def lex(string, opts={}, &b)
|
128
188
|
return enum_for(:lex, string) unless block_given?
|
129
189
|
|
@@ -147,280 +207,28 @@ module Rouge
|
|
147
207
|
b.call(last_token, last_val) if last_token
|
148
208
|
end
|
149
209
|
|
210
|
+
# @abstract
|
211
|
+
#
|
212
|
+
# Yield [token, chunk] pairs, given a prepared input stream. This
|
213
|
+
# must be implemented.
|
214
|
+
#
|
215
|
+
# @param [StringScanner] stream
|
216
|
+
# the stream
|
150
217
|
def stream_tokens(stream, &b)
|
151
218
|
raise 'abstract'
|
152
219
|
end
|
153
220
|
|
154
|
-
#
|
155
|
-
#
|
156
|
-
#
|
221
|
+
# @abstract
|
222
|
+
#
|
223
|
+
# return a number between 0 and 1 indicating the likelihood that
|
224
|
+
# the text given should be lexed with this lexer. The default
|
225
|
+
# implementation returns 0.
|
226
|
+
#
|
227
|
+
# @param [TextAnalyzer] text
|
228
|
+
# the text to be analyzed, with a couple of handy methods on it,
|
229
|
+
# like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
|
157
230
|
def self.analyze_text(text)
|
158
231
|
0
|
159
232
|
end
|
160
233
|
end
|
161
|
-
|
162
|
-
class RegexLexer < Lexer
|
163
|
-
class Rule
|
164
|
-
attr_reader :callback
|
165
|
-
attr_reader :next_state
|
166
|
-
attr_reader :re
|
167
|
-
def initialize(re, callback, next_state)
|
168
|
-
@re = re
|
169
|
-
@callback = callback
|
170
|
-
@next_state = next_state
|
171
|
-
end
|
172
|
-
|
173
|
-
def inspect
|
174
|
-
"#<Rule #{@re.inspect}>"
|
175
|
-
end
|
176
|
-
|
177
|
-
def consume(stream, &b)
|
178
|
-
stream.scan(@re)
|
179
|
-
|
180
|
-
if stream.matched?
|
181
|
-
yield stream
|
182
|
-
return true
|
183
|
-
end
|
184
|
-
|
185
|
-
false
|
186
|
-
end
|
187
|
-
end
|
188
|
-
|
189
|
-
class State
|
190
|
-
attr_reader :name
|
191
|
-
def initialize(lexer_class, name, &defn)
|
192
|
-
@lexer_class = lexer_class
|
193
|
-
@name = name
|
194
|
-
@defn = defn
|
195
|
-
end
|
196
|
-
|
197
|
-
def relative_state(state_name=nil, &b)
|
198
|
-
if state_name
|
199
|
-
@lexer_class.get_state(state_name)
|
200
|
-
else
|
201
|
-
State.new(@lexer_class, b.inspect, &b).load!
|
202
|
-
end
|
203
|
-
end
|
204
|
-
|
205
|
-
def rules
|
206
|
-
@rules ||= []
|
207
|
-
end
|
208
|
-
|
209
|
-
def load!
|
210
|
-
return self if @loaded
|
211
|
-
@loaded = true
|
212
|
-
StateDSL.new(rules).instance_eval(&@defn)
|
213
|
-
self
|
214
|
-
end
|
215
|
-
end
|
216
|
-
|
217
|
-
class StateDSL
|
218
|
-
attr_reader :rules
|
219
|
-
def initialize(rules)
|
220
|
-
@rules = rules
|
221
|
-
end
|
222
|
-
|
223
|
-
def rule(re, tok=nil, next_state=nil, &callback)
|
224
|
-
if block_given?
|
225
|
-
next_state = tok
|
226
|
-
else
|
227
|
-
tok = Token[tok]
|
228
|
-
|
229
|
-
callback = proc do
|
230
|
-
token tok
|
231
|
-
case next_state
|
232
|
-
when :pop!
|
233
|
-
pop!
|
234
|
-
when Symbol
|
235
|
-
push next_state
|
236
|
-
end # else pass
|
237
|
-
end
|
238
|
-
end
|
239
|
-
|
240
|
-
rules << Rule.new(re, callback, next_state)
|
241
|
-
end
|
242
|
-
|
243
|
-
def mixin(lexer_name)
|
244
|
-
rules << lexer_name.to_s
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
def self.states
|
249
|
-
@states ||= {}
|
250
|
-
end
|
251
|
-
|
252
|
-
def self.start_procs
|
253
|
-
@start_procs ||= []
|
254
|
-
end
|
255
|
-
|
256
|
-
def self.start(&b)
|
257
|
-
start_procs << b
|
258
|
-
end
|
259
|
-
|
260
|
-
def self.state(name, &b)
|
261
|
-
name = name.to_s
|
262
|
-
states[name] = State.new(self, name, &b)
|
263
|
-
end
|
264
|
-
|
265
|
-
def self.get_state(name)
|
266
|
-
return name if name.is_a? State
|
267
|
-
|
268
|
-
state = states[name.to_s]
|
269
|
-
raise "unknown state: #{name}" unless state
|
270
|
-
state.load!
|
271
|
-
end
|
272
|
-
|
273
|
-
def self.[](name)
|
274
|
-
get_state(name)
|
275
|
-
end
|
276
|
-
|
277
|
-
def get_state(name)
|
278
|
-
self.class.get_state(name)
|
279
|
-
end
|
280
|
-
|
281
|
-
def stack
|
282
|
-
@stack ||= [get_state(:root)]
|
283
|
-
end
|
284
|
-
|
285
|
-
def state
|
286
|
-
stack.last or raise 'empty stack!'
|
287
|
-
end
|
288
|
-
|
289
|
-
def reset!
|
290
|
-
@scan_state = nil
|
291
|
-
|
292
|
-
self.class.start_procs.each do |pr|
|
293
|
-
instance_eval(&pr)
|
294
|
-
end
|
295
|
-
end
|
296
|
-
|
297
|
-
def stream_tokens(stream, &b)
|
298
|
-
until stream.eos?
|
299
|
-
debug { "lexer: #{self.class.tag}" }
|
300
|
-
debug { "stack: #{stack.map(&:name).inspect}" }
|
301
|
-
debug { "stream: #{stream.peek(20).inspect}" }
|
302
|
-
success = step(get_state(state), stream, &b)
|
303
|
-
|
304
|
-
if !success
|
305
|
-
debug { " no match, yielding Error" }
|
306
|
-
b.call(Token['Error'], stream.getch)
|
307
|
-
end
|
308
|
-
end
|
309
|
-
end
|
310
|
-
|
311
|
-
def step(state, stream, &b)
|
312
|
-
state.rules.each do |rule|
|
313
|
-
return true if run_rule(rule, stream, &b)
|
314
|
-
end
|
315
|
-
|
316
|
-
false
|
317
|
-
end
|
318
|
-
|
319
|
-
def run_rule(rule, stream, &b)
|
320
|
-
case rule
|
321
|
-
when String
|
322
|
-
debug { " entering mixin #{rule}" }
|
323
|
-
res = step(get_state(rule), stream, &b)
|
324
|
-
debug { " exiting mixin #{rule}" }
|
325
|
-
res
|
326
|
-
when Rule
|
327
|
-
debug { " trying #{rule.inspect}" }
|
328
|
-
scan(stream, rule.re) do
|
329
|
-
debug { " got #{stream[0].inspect}" }
|
330
|
-
|
331
|
-
run_callback(stream, &rule.callback).each do |tok, res|
|
332
|
-
debug { " yielding #{tok.to_s.inspect}, #{res.inspect}" }
|
333
|
-
b.call(Token[tok], res)
|
334
|
-
end
|
335
|
-
end
|
336
|
-
end
|
337
|
-
end
|
338
|
-
|
339
|
-
def run_callback(stream, &callback)
|
340
|
-
Enumerator.new do |y|
|
341
|
-
@output_stream = y
|
342
|
-
@group_count = 0
|
343
|
-
@last_matches = stream
|
344
|
-
instance_exec(stream, &callback)
|
345
|
-
@last_matches = nil
|
346
|
-
@output_stream = nil
|
347
|
-
end
|
348
|
-
end
|
349
|
-
|
350
|
-
MAX_NULL_STEPS = 5
|
351
|
-
def scan(scanner, re, &b)
|
352
|
-
@null_steps ||= 0
|
353
|
-
|
354
|
-
if @null_steps >= MAX_NULL_STEPS
|
355
|
-
debug { " too many scans without consuming the string!" }
|
356
|
-
return false
|
357
|
-
end
|
358
|
-
|
359
|
-
scanner.scan(re)
|
360
|
-
|
361
|
-
if scanner.matched?
|
362
|
-
if scanner.matched_size == 0
|
363
|
-
@null_steps += 1
|
364
|
-
else
|
365
|
-
@null_steps = 0
|
366
|
-
end
|
367
|
-
|
368
|
-
yield self
|
369
|
-
return true
|
370
|
-
end
|
371
|
-
|
372
|
-
return false
|
373
|
-
end
|
374
|
-
|
375
|
-
def token(tok, val=:__absent__)
|
376
|
-
val = @last_matches[0] if val == :__absent__
|
377
|
-
val ||= ''
|
378
|
-
|
379
|
-
raise 'no output stream' unless @output_stream
|
380
|
-
|
381
|
-
@output_stream << [Token[tok], val]
|
382
|
-
end
|
383
|
-
|
384
|
-
def group(tok)
|
385
|
-
token(tok, @last_matches[@group_count += 1])
|
386
|
-
end
|
387
|
-
|
388
|
-
def delegate(lexer, text=nil)
|
389
|
-
debug { " delegating to #{lexer.inspect}" }
|
390
|
-
text ||= @last_matches[0]
|
391
|
-
|
392
|
-
lexer.lex(text, :continue => true) do |tok, val|
|
393
|
-
debug { " delegated token: #{tok.inspect}, #{val.inspect}" }
|
394
|
-
token(tok, val)
|
395
|
-
end
|
396
|
-
end
|
397
|
-
|
398
|
-
def push(state_name=nil, &b)
|
399
|
-
# use the top of the stack by default
|
400
|
-
if state_name || b
|
401
|
-
push_state = state.relative_state(state_name, &b)
|
402
|
-
else
|
403
|
-
push_state = self.state
|
404
|
-
end
|
405
|
-
|
406
|
-
debug { " pushing #{push_state.name}" }
|
407
|
-
stack.push(push_state)
|
408
|
-
end
|
409
|
-
|
410
|
-
def pop!
|
411
|
-
raise 'empty stack!' if stack.empty?
|
412
|
-
|
413
|
-
debug { " popping stack" }
|
414
|
-
stack.pop
|
415
|
-
end
|
416
|
-
|
417
|
-
def in_state?(state_name)
|
418
|
-
stack.map(&:name).include? state_name.to_s
|
419
|
-
end
|
420
|
-
|
421
|
-
def state?(state_name)
|
422
|
-
state_name.to_s == state.name
|
423
|
-
end
|
424
|
-
|
425
|
-
end
|
426
234
|
end
|