rouge 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +3 -0
- data/lib/rouge.rb +9 -0
- data/lib/rouge/lexer.rb +83 -275
- data/lib/rouge/lexers/common_lisp.rb +19 -24
- data/lib/rouge/lexers/erb.rb +3 -9
- data/lib/rouge/lexers/factor.rb +300 -0
- data/lib/rouge/lexers/haml.rb +238 -0
- data/lib/rouge/lexers/html.rb +1 -0
- data/lib/rouge/lexers/markdown.rb +173 -0
- data/lib/rouge/lexers/php.rb +11 -13
- data/lib/rouge/lexers/ruby.rb +2 -2
- data/lib/rouge/lexers/yaml.rb +358 -0
- data/lib/rouge/regex_lexer.rb +300 -0
- data/lib/rouge/template_lexer.rb +14 -0
- data/lib/rouge/theme.rb +0 -34
- data/lib/rouge/themes/thankful_eyes.rb +4 -3
- data/lib/rouge/util.rb +63 -0
- data/lib/rouge/version.rb +1 -1
- metadata +10 -2
data/Gemfile
CHANGED
data/lib/rouge.rb
CHANGED
@@ -15,13 +15,20 @@ end
|
|
15
15
|
load_dir = Pathname.new(__FILE__).dirname
|
16
16
|
load load_dir.join('rouge/version.rb')
|
17
17
|
|
18
|
+
load load_dir.join('rouge/util.rb')
|
19
|
+
|
18
20
|
load load_dir.join('rouge/text_analyzer.rb')
|
19
21
|
load load_dir.join('rouge/token.rb')
|
22
|
+
|
20
23
|
load load_dir.join('rouge/lexer.rb')
|
24
|
+
load load_dir.join('rouge/regex_lexer.rb')
|
25
|
+
load load_dir.join('rouge/template_lexer.rb')
|
21
26
|
|
22
27
|
load load_dir.join('rouge/lexers/text.rb')
|
23
28
|
load load_dir.join('rouge/lexers/diff.rb')
|
24
29
|
load load_dir.join('rouge/lexers/tex.rb')
|
30
|
+
load load_dir.join('rouge/lexers/markdown.rb')
|
31
|
+
load load_dir.join('rouge/lexers/yaml.rb')
|
25
32
|
|
26
33
|
load load_dir.join('rouge/lexers/make.rb')
|
27
34
|
load load_dir.join('rouge/lexers/shell.rb')
|
@@ -29,6 +36,7 @@ load load_dir.join('rouge/lexers/shell.rb')
|
|
29
36
|
load load_dir.join('rouge/lexers/javascript.rb')
|
30
37
|
load load_dir.join('rouge/lexers/css.rb')
|
31
38
|
load load_dir.join('rouge/lexers/html.rb')
|
39
|
+
load load_dir.join('rouge/lexers/haml.rb')
|
32
40
|
load load_dir.join('rouge/lexers/xml.rb')
|
33
41
|
load load_dir.join('rouge/lexers/php.rb')
|
34
42
|
|
@@ -38,6 +46,7 @@ load load_dir.join('rouge/lexers/tcl.rb')
|
|
38
46
|
load load_dir.join('rouge/lexers/python.rb')
|
39
47
|
load load_dir.join('rouge/lexers/ruby.rb')
|
40
48
|
load load_dir.join('rouge/lexers/perl.rb')
|
49
|
+
load load_dir.join('rouge/lexers/factor.rb')
|
41
50
|
|
42
51
|
load load_dir.join('rouge/lexers/haskell.rb')
|
43
52
|
load load_dir.join('rouge/lexers/scheme.rb')
|
data/lib/rouge/lexer.rb
CHANGED
@@ -4,18 +4,37 @@ require 'strscan'
|
|
4
4
|
module Rouge
|
5
5
|
class Lexer
|
6
6
|
class << self
|
7
|
+
# Lexes `stream` with the given options. The lex is delegated to a
|
8
|
+
# new instance.
|
9
|
+
#
|
10
|
+
# @see #lex
|
7
11
|
def lex(stream, opts={}, &b)
|
8
12
|
new(opts).lex(stream, &b)
|
9
13
|
end
|
10
14
|
|
11
|
-
def default_options
|
15
|
+
def default_options(o={})
|
12
16
|
@default_options ||= {}
|
17
|
+
@default_options.merge!(o)
|
18
|
+
@default_options
|
13
19
|
end
|
14
20
|
|
21
|
+
# Given a string, return the correct lexer class.
|
15
22
|
def find(name)
|
16
23
|
registry[name.to_s]
|
17
24
|
end
|
18
25
|
|
26
|
+
# Guess which lexer to use based on a hash of info.
|
27
|
+
#
|
28
|
+
# @option info :mimetype
|
29
|
+
# A mimetype to guess by
|
30
|
+
# @option info :filename
|
31
|
+
# A filename to guess by
|
32
|
+
# @option info :source
|
33
|
+
# The source itself, which, if guessing by mimetype or filename
|
34
|
+
# fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
|
35
|
+
# other hints.
|
36
|
+
#
|
37
|
+
# @see Lexer.analyze_text
|
19
38
|
def guess(info={})
|
20
39
|
by_mimetype = guess_by_mimetype(info[:mimetype]) if info[:mimetype]
|
21
40
|
return by_mimetype if by_mimetype
|
@@ -67,6 +86,16 @@ module Rouge
|
|
67
86
|
registry[name.to_s] = lexer
|
68
87
|
end
|
69
88
|
|
89
|
+
# Used to specify or get the canonical name of this lexer class.
|
90
|
+
#
|
91
|
+
# @example
|
92
|
+
# class MyLexer < Lexer
|
93
|
+
# tag 'foo'
|
94
|
+
# end
|
95
|
+
#
|
96
|
+
# MyLexer.tag # => 'foo'
|
97
|
+
#
|
98
|
+
# Lexer.find('foo') # => MyLexer
|
70
99
|
def tag(t=nil)
|
71
100
|
return @tag if t.nil?
|
72
101
|
|
@@ -74,14 +103,35 @@ module Rouge
|
|
74
103
|
aliases @tag
|
75
104
|
end
|
76
105
|
|
106
|
+
# Used to specify alternate names this lexer class may be found by.
|
107
|
+
#
|
108
|
+
# @example
|
109
|
+
# class Erb < Lexer
|
110
|
+
# tag 'erb'
|
111
|
+
# aliases 'eruby', 'rhtml'
|
112
|
+
# end
|
113
|
+
#
|
114
|
+
# Lexer.find('eruby') # => Erb
|
77
115
|
def aliases(*args)
|
78
116
|
args.each { |arg| Lexer.register(arg, self) }
|
79
117
|
end
|
80
118
|
|
119
|
+
# Specify a list of filename globs associated with this lexer
|
120
|
+
#
|
121
|
+
# @example
|
122
|
+
# class Ruby < Lexer
|
123
|
+
# filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile'
|
124
|
+
# end
|
81
125
|
def filenames(*fnames)
|
82
126
|
(@filenames ||= []).concat(fnames)
|
83
127
|
end
|
84
128
|
|
129
|
+
# Specify a list of mimetypes associated with this lexer.
|
130
|
+
#
|
131
|
+
# @example
|
132
|
+
# class Html < Lexer
|
133
|
+
# mimetypes 'text/html', 'application/xhtml+xml'
|
134
|
+
# end
|
85
135
|
def mimetypes(*mts)
|
86
136
|
(@mimetypes ||= []).concat(mts)
|
87
137
|
end
|
@@ -94,7 +144,7 @@ module Rouge
|
|
94
144
|
|
95
145
|
# -*- instance methods -*- #
|
96
146
|
|
97
|
-
def initialize(opts={}
|
147
|
+
def initialize(opts={})
|
98
148
|
options(opts)
|
99
149
|
end
|
100
150
|
|
@@ -112,18 +162,28 @@ module Rouge
|
|
112
162
|
end
|
113
163
|
end
|
114
164
|
|
165
|
+
# Leave a debug message if the `:debug` option is set. The message
|
166
|
+
# is given as a block because some debug messages contain calculated
|
167
|
+
# information that is unnecessary for lexing in the real world.
|
168
|
+
#
|
169
|
+
# @example
|
170
|
+
# debug { "hello, world!" }
|
115
171
|
def debug(&b)
|
116
172
|
puts(b.call) if option :debug
|
117
173
|
end
|
118
174
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
175
|
+
# @abstract
|
176
|
+
#
|
177
|
+
# Called after each lex is finished. The default implementation
|
178
|
+
# is a noop.
|
123
179
|
def reset!
|
124
|
-
# noop, called after each lex is finished
|
125
180
|
end
|
126
181
|
|
182
|
+
# Given a string, yield [token, chunk] pairs. If no block is given,
|
183
|
+
# an enumerator is returned.
|
184
|
+
#
|
185
|
+
# @option opts :continue
|
186
|
+
# Continue the lex from the previous state (i.e. don't call #reset!)
|
127
187
|
def lex(string, opts={}, &b)
|
128
188
|
return enum_for(:lex, string) unless block_given?
|
129
189
|
|
@@ -147,280 +207,28 @@ module Rouge
|
|
147
207
|
b.call(last_token, last_val) if last_token
|
148
208
|
end
|
149
209
|
|
210
|
+
# @abstract
|
211
|
+
#
|
212
|
+
# Yield [token, chunk] pairs, given a prepared input stream. This
|
213
|
+
# must be implemented.
|
214
|
+
#
|
215
|
+
# @param [StringScanner] stream
|
216
|
+
# the stream
|
150
217
|
def stream_tokens(stream, &b)
|
151
218
|
raise 'abstract'
|
152
219
|
end
|
153
220
|
|
154
|
-
#
|
155
|
-
#
|
156
|
-
#
|
221
|
+
# @abstract
|
222
|
+
#
|
223
|
+
# return a number between 0 and 1 indicating the likelihood that
|
224
|
+
# the text given should be lexed with this lexer. The default
|
225
|
+
# implementation returns 0.
|
226
|
+
#
|
227
|
+
# @param [TextAnalyzer] text
|
228
|
+
# the text to be analyzed, with a couple of handy methods on it,
|
229
|
+
# like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
|
157
230
|
def self.analyze_text(text)
|
158
231
|
0
|
159
232
|
end
|
160
233
|
end
|
161
|
-
|
162
|
-
class RegexLexer < Lexer
|
163
|
-
class Rule
|
164
|
-
attr_reader :callback
|
165
|
-
attr_reader :next_state
|
166
|
-
attr_reader :re
|
167
|
-
def initialize(re, callback, next_state)
|
168
|
-
@re = re
|
169
|
-
@callback = callback
|
170
|
-
@next_state = next_state
|
171
|
-
end
|
172
|
-
|
173
|
-
def inspect
|
174
|
-
"#<Rule #{@re.inspect}>"
|
175
|
-
end
|
176
|
-
|
177
|
-
def consume(stream, &b)
|
178
|
-
stream.scan(@re)
|
179
|
-
|
180
|
-
if stream.matched?
|
181
|
-
yield stream
|
182
|
-
return true
|
183
|
-
end
|
184
|
-
|
185
|
-
false
|
186
|
-
end
|
187
|
-
end
|
188
|
-
|
189
|
-
class State
|
190
|
-
attr_reader :name
|
191
|
-
def initialize(lexer_class, name, &defn)
|
192
|
-
@lexer_class = lexer_class
|
193
|
-
@name = name
|
194
|
-
@defn = defn
|
195
|
-
end
|
196
|
-
|
197
|
-
def relative_state(state_name=nil, &b)
|
198
|
-
if state_name
|
199
|
-
@lexer_class.get_state(state_name)
|
200
|
-
else
|
201
|
-
State.new(@lexer_class, b.inspect, &b).load!
|
202
|
-
end
|
203
|
-
end
|
204
|
-
|
205
|
-
def rules
|
206
|
-
@rules ||= []
|
207
|
-
end
|
208
|
-
|
209
|
-
def load!
|
210
|
-
return self if @loaded
|
211
|
-
@loaded = true
|
212
|
-
StateDSL.new(rules).instance_eval(&@defn)
|
213
|
-
self
|
214
|
-
end
|
215
|
-
end
|
216
|
-
|
217
|
-
class StateDSL
|
218
|
-
attr_reader :rules
|
219
|
-
def initialize(rules)
|
220
|
-
@rules = rules
|
221
|
-
end
|
222
|
-
|
223
|
-
def rule(re, tok=nil, next_state=nil, &callback)
|
224
|
-
if block_given?
|
225
|
-
next_state = tok
|
226
|
-
else
|
227
|
-
tok = Token[tok]
|
228
|
-
|
229
|
-
callback = proc do
|
230
|
-
token tok
|
231
|
-
case next_state
|
232
|
-
when :pop!
|
233
|
-
pop!
|
234
|
-
when Symbol
|
235
|
-
push next_state
|
236
|
-
end # else pass
|
237
|
-
end
|
238
|
-
end
|
239
|
-
|
240
|
-
rules << Rule.new(re, callback, next_state)
|
241
|
-
end
|
242
|
-
|
243
|
-
def mixin(lexer_name)
|
244
|
-
rules << lexer_name.to_s
|
245
|
-
end
|
246
|
-
end
|
247
|
-
|
248
|
-
def self.states
|
249
|
-
@states ||= {}
|
250
|
-
end
|
251
|
-
|
252
|
-
def self.start_procs
|
253
|
-
@start_procs ||= []
|
254
|
-
end
|
255
|
-
|
256
|
-
def self.start(&b)
|
257
|
-
start_procs << b
|
258
|
-
end
|
259
|
-
|
260
|
-
def self.state(name, &b)
|
261
|
-
name = name.to_s
|
262
|
-
states[name] = State.new(self, name, &b)
|
263
|
-
end
|
264
|
-
|
265
|
-
def self.get_state(name)
|
266
|
-
return name if name.is_a? State
|
267
|
-
|
268
|
-
state = states[name.to_s]
|
269
|
-
raise "unknown state: #{name}" unless state
|
270
|
-
state.load!
|
271
|
-
end
|
272
|
-
|
273
|
-
def self.[](name)
|
274
|
-
get_state(name)
|
275
|
-
end
|
276
|
-
|
277
|
-
def get_state(name)
|
278
|
-
self.class.get_state(name)
|
279
|
-
end
|
280
|
-
|
281
|
-
def stack
|
282
|
-
@stack ||= [get_state(:root)]
|
283
|
-
end
|
284
|
-
|
285
|
-
def state
|
286
|
-
stack.last or raise 'empty stack!'
|
287
|
-
end
|
288
|
-
|
289
|
-
def reset!
|
290
|
-
@scan_state = nil
|
291
|
-
|
292
|
-
self.class.start_procs.each do |pr|
|
293
|
-
instance_eval(&pr)
|
294
|
-
end
|
295
|
-
end
|
296
|
-
|
297
|
-
def stream_tokens(stream, &b)
|
298
|
-
until stream.eos?
|
299
|
-
debug { "lexer: #{self.class.tag}" }
|
300
|
-
debug { "stack: #{stack.map(&:name).inspect}" }
|
301
|
-
debug { "stream: #{stream.peek(20).inspect}" }
|
302
|
-
success = step(get_state(state), stream, &b)
|
303
|
-
|
304
|
-
if !success
|
305
|
-
debug { " no match, yielding Error" }
|
306
|
-
b.call(Token['Error'], stream.getch)
|
307
|
-
end
|
308
|
-
end
|
309
|
-
end
|
310
|
-
|
311
|
-
def step(state, stream, &b)
|
312
|
-
state.rules.each do |rule|
|
313
|
-
return true if run_rule(rule, stream, &b)
|
314
|
-
end
|
315
|
-
|
316
|
-
false
|
317
|
-
end
|
318
|
-
|
319
|
-
def run_rule(rule, stream, &b)
|
320
|
-
case rule
|
321
|
-
when String
|
322
|
-
debug { " entering mixin #{rule}" }
|
323
|
-
res = step(get_state(rule), stream, &b)
|
324
|
-
debug { " exiting mixin #{rule}" }
|
325
|
-
res
|
326
|
-
when Rule
|
327
|
-
debug { " trying #{rule.inspect}" }
|
328
|
-
scan(stream, rule.re) do
|
329
|
-
debug { " got #{stream[0].inspect}" }
|
330
|
-
|
331
|
-
run_callback(stream, &rule.callback).each do |tok, res|
|
332
|
-
debug { " yielding #{tok.to_s.inspect}, #{res.inspect}" }
|
333
|
-
b.call(Token[tok], res)
|
334
|
-
end
|
335
|
-
end
|
336
|
-
end
|
337
|
-
end
|
338
|
-
|
339
|
-
def run_callback(stream, &callback)
|
340
|
-
Enumerator.new do |y|
|
341
|
-
@output_stream = y
|
342
|
-
@group_count = 0
|
343
|
-
@last_matches = stream
|
344
|
-
instance_exec(stream, &callback)
|
345
|
-
@last_matches = nil
|
346
|
-
@output_stream = nil
|
347
|
-
end
|
348
|
-
end
|
349
|
-
|
350
|
-
MAX_NULL_STEPS = 5
|
351
|
-
def scan(scanner, re, &b)
|
352
|
-
@null_steps ||= 0
|
353
|
-
|
354
|
-
if @null_steps >= MAX_NULL_STEPS
|
355
|
-
debug { " too many scans without consuming the string!" }
|
356
|
-
return false
|
357
|
-
end
|
358
|
-
|
359
|
-
scanner.scan(re)
|
360
|
-
|
361
|
-
if scanner.matched?
|
362
|
-
if scanner.matched_size == 0
|
363
|
-
@null_steps += 1
|
364
|
-
else
|
365
|
-
@null_steps = 0
|
366
|
-
end
|
367
|
-
|
368
|
-
yield self
|
369
|
-
return true
|
370
|
-
end
|
371
|
-
|
372
|
-
return false
|
373
|
-
end
|
374
|
-
|
375
|
-
def token(tok, val=:__absent__)
|
376
|
-
val = @last_matches[0] if val == :__absent__
|
377
|
-
val ||= ''
|
378
|
-
|
379
|
-
raise 'no output stream' unless @output_stream
|
380
|
-
|
381
|
-
@output_stream << [Token[tok], val]
|
382
|
-
end
|
383
|
-
|
384
|
-
def group(tok)
|
385
|
-
token(tok, @last_matches[@group_count += 1])
|
386
|
-
end
|
387
|
-
|
388
|
-
def delegate(lexer, text=nil)
|
389
|
-
debug { " delegating to #{lexer.inspect}" }
|
390
|
-
text ||= @last_matches[0]
|
391
|
-
|
392
|
-
lexer.lex(text, :continue => true) do |tok, val|
|
393
|
-
debug { " delegated token: #{tok.inspect}, #{val.inspect}" }
|
394
|
-
token(tok, val)
|
395
|
-
end
|
396
|
-
end
|
397
|
-
|
398
|
-
def push(state_name=nil, &b)
|
399
|
-
# use the top of the stack by default
|
400
|
-
if state_name || b
|
401
|
-
push_state = state.relative_state(state_name, &b)
|
402
|
-
else
|
403
|
-
push_state = self.state
|
404
|
-
end
|
405
|
-
|
406
|
-
debug { " pushing #{push_state.name}" }
|
407
|
-
stack.push(push_state)
|
408
|
-
end
|
409
|
-
|
410
|
-
def pop!
|
411
|
-
raise 'empty stack!' if stack.empty?
|
412
|
-
|
413
|
-
debug { " popping stack" }
|
414
|
-
stack.pop
|
415
|
-
end
|
416
|
-
|
417
|
-
def in_state?(state_name)
|
418
|
-
stack.map(&:name).include? state_name.to_s
|
419
|
-
end
|
420
|
-
|
421
|
-
def state?(state_name)
|
422
|
-
state_name.to_s == state.name
|
423
|
-
end
|
424
|
-
|
425
|
-
end
|
426
234
|
end
|