rouge 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rouge.rb +7 -2
- data/lib/rouge/formatters/html.rb +4 -3
- data/lib/rouge/lexer.rb +259 -122
- data/lib/rouge/lexers/css.rb +165 -0
- data/lib/rouge/lexers/diff.rb +28 -0
- data/lib/rouge/lexers/html.rb +65 -0
- data/lib/rouge/lexers/javascript.rb +22 -18
- data/lib/rouge/lexers/shell.rb +40 -24
- data/lib/rouge/lexers/text.rb +11 -0
- data/lib/rouge/theme.rb +36 -47
- data/lib/rouge/themes/colorful.rb +63 -0
- data/lib/rouge/themes/thankful_eyes.rb +11 -9
- data/lib/rouge/token.rb +14 -2
- data/lib/rouge/version.rb +1 -1
- metadata +7 -2
data/lib/rouge.rb
CHANGED
@@ -3,8 +3,8 @@ require 'pathname'
|
|
3
3
|
|
4
4
|
module Rouge
|
5
5
|
class << self
|
6
|
-
def highlight(text,
|
7
|
-
lexer = Lexer.find(
|
6
|
+
def highlight(text, lexer, formatter)
|
7
|
+
lexer = Lexer.find(lexer) unless lexer.is_a?(Lexer)
|
8
8
|
raise "unknown lexer #{lexer_name}" unless lexer
|
9
9
|
|
10
10
|
formatter.render(lexer.lex(text))
|
@@ -15,11 +15,16 @@ end
|
|
15
15
|
load_dir = Pathname.new(__FILE__).dirname
|
16
16
|
load load_dir.join('rouge/token.rb')
|
17
17
|
load load_dir.join('rouge/lexer.rb')
|
18
|
+
load load_dir.join('rouge/lexers/text.rb')
|
19
|
+
load load_dir.join('rouge/lexers/diff.rb')
|
18
20
|
load load_dir.join('rouge/lexers/shell.rb')
|
19
21
|
load load_dir.join('rouge/lexers/javascript.rb')
|
22
|
+
load load_dir.join('rouge/lexers/css.rb')
|
23
|
+
load load_dir.join('rouge/lexers/html.rb')
|
20
24
|
|
21
25
|
load load_dir.join('rouge/formatter.rb')
|
22
26
|
load load_dir.join('rouge/formatters/html.rb')
|
23
27
|
|
24
28
|
load load_dir.join('rouge/theme.rb')
|
25
29
|
load load_dir.join('rouge/themes/thankful_eyes.rb')
|
30
|
+
load load_dir.join('rouge/themes/colorful.rb')
|
@@ -1,3 +1,6 @@
|
|
1
|
+
# stdlib
|
2
|
+
require 'cgi'
|
3
|
+
|
1
4
|
module Rouge
|
2
5
|
module Formatters
|
3
6
|
class HTML < Formatter
|
@@ -9,9 +12,7 @@ module Rouge
|
|
9
12
|
yield "<pre class=#{@css_class.inspect}>"
|
10
13
|
tokens.each do |tok, val|
|
11
14
|
# TODO: properly html-encode val
|
12
|
-
val
|
13
|
-
val.gsub! '<', '<'
|
14
|
-
val.gsub! '>', '>'
|
15
|
+
val = CGI.escape_html(val)
|
15
16
|
|
16
17
|
case tok.shortname
|
17
18
|
when ''
|
data/lib/rouge/lexer.rb
CHANGED
@@ -1,8 +1,40 @@
|
|
1
|
+
# stdlib
|
2
|
+
require 'strscan'
|
3
|
+
|
1
4
|
module Rouge
|
2
5
|
class Lexer
|
3
6
|
class << self
|
4
|
-
def
|
5
|
-
|
7
|
+
def make(opts={}, &b)
|
8
|
+
_sup = self
|
9
|
+
|
10
|
+
Class.new(self) do
|
11
|
+
@lazy_load_proc = b
|
12
|
+
@default_options = _sup.default_options.merge(opts)
|
13
|
+
@parent = _sup
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def lex(stream, opts={}, &b)
|
18
|
+
new(opts).lex(stream, &b)
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
def force_load!
|
23
|
+
return self if @force_load
|
24
|
+
@force_load = true
|
25
|
+
@lazy_load_proc && instance_eval(&@lazy_load_proc)
|
26
|
+
|
27
|
+
self
|
28
|
+
end
|
29
|
+
public
|
30
|
+
|
31
|
+
def new(*a, &b)
|
32
|
+
force_load!
|
33
|
+
super(*a, &b)
|
34
|
+
end
|
35
|
+
|
36
|
+
def default_options
|
37
|
+
@default_options ||= {}
|
6
38
|
end
|
7
39
|
|
8
40
|
def find(name)
|
@@ -13,43 +45,51 @@ module Rouge
|
|
13
45
|
registry[name.to_s] = lexer
|
14
46
|
end
|
15
47
|
|
48
|
+
def tag(t=nil)
|
49
|
+
return @tag if t.nil?
|
50
|
+
|
51
|
+
@tag = t.to_s
|
52
|
+
aliases @tag
|
53
|
+
end
|
54
|
+
|
55
|
+
def aliases(*args)
|
56
|
+
args.each { |arg| Lexer.register(arg, self) }
|
57
|
+
end
|
58
|
+
|
59
|
+
def extensions(*exts)
|
60
|
+
exts.each do |ext|
|
61
|
+
Lexer.extension_registry[ext] = self
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def extension_registry
|
66
|
+
@extension_registry ||= {}
|
67
|
+
end
|
68
|
+
|
16
69
|
private
|
17
70
|
def registry
|
18
71
|
@registry ||= {}
|
19
72
|
end
|
20
73
|
end
|
21
74
|
|
22
|
-
|
23
|
-
return @name if n.nil?
|
24
|
-
|
25
|
-
@name = n.to_s
|
26
|
-
aliases @name
|
27
|
-
end
|
28
|
-
|
29
|
-
def aliases(*args)
|
30
|
-
args.each { |arg| Lexer.register(arg, self) }
|
31
|
-
end
|
75
|
+
# -*- instance methods -*- #
|
32
76
|
|
33
77
|
def initialize(opts={}, &b)
|
34
|
-
options
|
78
|
+
options(opts)
|
35
79
|
@lazy_load_proc = b
|
36
80
|
end
|
37
81
|
|
38
|
-
def default_options
|
39
|
-
{}
|
40
|
-
end
|
41
|
-
|
42
82
|
def options(o={})
|
43
|
-
(@options ||=
|
83
|
+
(@options ||= {}).merge!(o)
|
44
84
|
|
45
|
-
@options
|
85
|
+
self.class.default_options.merge(@options)
|
46
86
|
end
|
47
87
|
|
48
88
|
def option(k, v=:absent)
|
49
89
|
if v == :absent
|
50
|
-
options[k
|
90
|
+
options[k]
|
51
91
|
else
|
52
|
-
options({ k
|
92
|
+
options({ k => v })
|
53
93
|
end
|
54
94
|
end
|
55
95
|
|
@@ -61,51 +101,52 @@ module Rouge
|
|
61
101
|
lex(stream).to_a
|
62
102
|
end
|
63
103
|
|
64
|
-
def lex(
|
65
|
-
return enum_for(:lex,
|
104
|
+
def lex(string, &b)
|
105
|
+
return enum_for(:lex, string) unless block_given?
|
66
106
|
|
67
|
-
|
68
|
-
|
107
|
+
last_token = nil
|
108
|
+
last_val = nil
|
109
|
+
stream_tokens(StringScanner.new(string)) do |tok, val|
|
110
|
+
next if val.empty?
|
69
111
|
|
70
|
-
|
71
|
-
|
72
|
-
|
112
|
+
if tok == last_token
|
113
|
+
last_val << val
|
114
|
+
next
|
115
|
+
end
|
73
116
|
|
74
|
-
|
117
|
+
b.call(last_token, last_val) if last_token
|
118
|
+
last_token = tok
|
119
|
+
last_val = val
|
120
|
+
end
|
75
121
|
|
76
|
-
|
77
|
-
|
78
|
-
@force_load = true
|
79
|
-
instance_eval &@lazy_load_proc
|
122
|
+
b.call(last_token, last_val) if last_token
|
123
|
+
end
|
80
124
|
|
81
|
-
|
125
|
+
def stream_tokens(stream, &b)
|
126
|
+
raise 'abstract'
|
82
127
|
end
|
83
128
|
end
|
84
129
|
|
85
130
|
class RegexLexer < Lexer
|
86
131
|
class Rule
|
87
132
|
attr_reader :callback
|
88
|
-
attr_reader :
|
133
|
+
attr_reader :next_state
|
89
134
|
attr_reader :re
|
90
|
-
def initialize(re, callback,
|
91
|
-
@
|
92
|
-
@re = Regexp.new %/\\A(?:#{re.source})/
|
135
|
+
def initialize(re, callback, next_state)
|
136
|
+
@re = re
|
93
137
|
@callback = callback
|
94
|
-
@
|
138
|
+
@next_state = next_state
|
95
139
|
end
|
96
140
|
|
97
141
|
def inspect
|
98
|
-
"#<Rule #{@
|
142
|
+
"#<Rule #{@re.inspect}>"
|
99
143
|
end
|
100
144
|
|
101
145
|
def consume(stream, &b)
|
102
|
-
|
103
|
-
# also, encapsulate the stream in its own class.
|
104
|
-
match = stream.match(@re)
|
146
|
+
stream.scan(@re)
|
105
147
|
|
106
|
-
if
|
107
|
-
stream
|
108
|
-
yield match
|
148
|
+
if stream.matched?
|
149
|
+
yield stream
|
109
150
|
return true
|
110
151
|
end
|
111
152
|
|
@@ -113,122 +154,218 @@ module Rouge
|
|
113
154
|
end
|
114
155
|
end
|
115
156
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
157
|
+
class State
|
158
|
+
attr_reader :name
|
159
|
+
def initialize(lexer_class, name, &defn)
|
160
|
+
@lexer_class = lexer_class
|
161
|
+
@name = name
|
162
|
+
@defn = defn
|
120
163
|
end
|
121
164
|
|
122
|
-
|
123
|
-
|
165
|
+
def relative_state(state_name)
|
166
|
+
@lexer_class.get_state(state_name)
|
167
|
+
end
|
168
|
+
|
169
|
+
def rules
|
170
|
+
@rules ||= []
|
171
|
+
end
|
172
|
+
|
173
|
+
def load!
|
174
|
+
return self if @loaded
|
175
|
+
@loaded = true
|
176
|
+
StateDSL.new(rules).instance_eval(&@defn)
|
177
|
+
self
|
178
|
+
end
|
124
179
|
end
|
125
180
|
|
126
|
-
|
127
|
-
|
128
|
-
|
181
|
+
class ScanState
|
182
|
+
def self.delegate(m, target)
|
183
|
+
define_method(m) do |*a, &b|
|
184
|
+
send(target).send(m, *a, &b)
|
185
|
+
end
|
186
|
+
end
|
129
187
|
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
@
|
188
|
+
attr_accessor :scanner
|
189
|
+
attr_accessor :stack
|
190
|
+
attr_accessor :lexer
|
191
|
+
def initialize(lexer, scanner, stack=nil)
|
192
|
+
@lexer = lexer
|
193
|
+
@scanner = scanner
|
194
|
+
@stack = stack || [lexer.get_state(:root)]
|
195
|
+
end
|
196
|
+
|
197
|
+
def pop!
|
198
|
+
raise 'empty stack!' if stack.empty?
|
199
|
+
|
200
|
+
debug { " popping stack" }
|
201
|
+
stack.pop
|
202
|
+
end
|
203
|
+
|
204
|
+
def push(state_name)
|
205
|
+
debug { " pushing #{state_name}" }
|
206
|
+
stack.push(state.relative_state(state_name))
|
207
|
+
end
|
208
|
+
|
209
|
+
delegate :debug, :lexer
|
210
|
+
|
211
|
+
delegate :[], :scanner
|
212
|
+
delegate :captures, :scanner
|
213
|
+
delegate :peek, :scanner
|
214
|
+
delegate :eos?, :scanner
|
215
|
+
|
216
|
+
def run_callback(&callback)
|
217
|
+
Enumerator.new do |y|
|
218
|
+
@output_stream = y
|
219
|
+
@group_count = 0
|
220
|
+
instance_exec(self, &callback)
|
221
|
+
@output_stream = nil
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def token(tok, val=nil)
|
226
|
+
raise 'no output stream' unless @output_stream
|
227
|
+
|
228
|
+
@output_stream << [Token[tok], val || scanner[0]]
|
229
|
+
end
|
230
|
+
|
231
|
+
def group(tok)
|
232
|
+
token(tok, scanner[@group_count += 1])
|
233
|
+
end
|
234
|
+
|
235
|
+
def delegate(lexer, text=nil)
|
236
|
+
debug { " delegating to #{lexer.name}" }
|
237
|
+
text ||= scanner[0]
|
238
|
+
|
239
|
+
lexer.lex(text) do |tok, val|
|
240
|
+
debug { " delegated token: #{tok.inspect}, #{val.inspect}" }
|
241
|
+
token(tok, val)
|
242
|
+
end
|
243
|
+
end
|
244
|
+
|
245
|
+
def state
|
246
|
+
raise 'empty stack!' if stack.empty?
|
247
|
+
stack.last
|
248
|
+
end
|
249
|
+
|
250
|
+
def scan(re, &b)
|
251
|
+
scanner.scan(re)
|
252
|
+
|
253
|
+
if scanner.matched?
|
254
|
+
yield self
|
255
|
+
return true
|
256
|
+
end
|
257
|
+
|
258
|
+
return false
|
136
259
|
end
|
137
260
|
end
|
138
261
|
|
139
|
-
|
140
|
-
|
141
|
-
|
262
|
+
class StateDSL
|
263
|
+
attr_reader :rules
|
264
|
+
def initialize(rules)
|
265
|
+
@rules = rules
|
266
|
+
end
|
142
267
|
|
143
|
-
|
268
|
+
def rule(re, tok=nil, next_state=nil, &callback)
|
269
|
+
if block_given?
|
270
|
+
next_state = tok
|
271
|
+
else
|
272
|
+
tok = Token[tok]
|
273
|
+
|
274
|
+
callback = proc do |ss|
|
275
|
+
token tok, ss[0]
|
276
|
+
case next_state
|
277
|
+
when :pop!
|
278
|
+
pop!
|
279
|
+
when Symbol
|
280
|
+
push next_state
|
281
|
+
end # else pass
|
282
|
+
end
|
283
|
+
end
|
284
|
+
|
285
|
+
rules << Rule.new(re, callback, next_state)
|
286
|
+
end
|
287
|
+
|
288
|
+
def mixin(lexer_name)
|
289
|
+
rules << lexer_name.to_s
|
290
|
+
end
|
144
291
|
end
|
145
292
|
|
146
|
-
def
|
147
|
-
|
148
|
-
@rules ||= []
|
293
|
+
def self.states
|
294
|
+
@states ||= {}
|
149
295
|
end
|
150
296
|
|
151
|
-
def
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
if token.is_a? String
|
156
|
-
token = Token[token]
|
157
|
-
end
|
297
|
+
def self.state(name, &b)
|
298
|
+
name = name.to_s
|
299
|
+
states[name] = State.new(self, name, &b)
|
300
|
+
end
|
158
301
|
|
159
|
-
|
302
|
+
def initialize(parent=nil, opts={}, &defn)
|
303
|
+
if parent.is_a? Hash
|
304
|
+
opts = parent
|
305
|
+
parent = nil
|
160
306
|
end
|
161
307
|
|
162
|
-
|
308
|
+
@parent = parent
|
309
|
+
super(opts, &defn)
|
163
310
|
end
|
164
311
|
|
165
|
-
def
|
166
|
-
|
167
|
-
stack = [self]
|
312
|
+
def self.get_state(name)
|
313
|
+
return name if name.is_a? State
|
168
314
|
|
169
|
-
|
315
|
+
state = states[name.to_s]
|
316
|
+
raise "unknown state: #{name}" unless state
|
317
|
+
state.load!
|
170
318
|
end
|
171
319
|
|
172
|
-
def
|
173
|
-
|
320
|
+
def self.[](name)
|
321
|
+
get_state(name)
|
322
|
+
end
|
174
323
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
324
|
+
def get_state(name)
|
325
|
+
self.class.get_state(name)
|
326
|
+
end
|
327
|
+
|
328
|
+
def stream_tokens(stream, &b)
|
329
|
+
scan_state = ScanState.new(self, stream)
|
330
|
+
|
331
|
+
stream_with_state(scan_state, &b)
|
332
|
+
end
|
333
|
+
|
334
|
+
def stream_with_state(scan_state, &b)
|
335
|
+
until scan_state.eos?
|
336
|
+
debug { "stack: #{scan_state.stack.map(&:name).inspect}" }
|
337
|
+
debug { "stream: #{scan_state.scanner.peek(20).inspect}" }
|
338
|
+
success = step(get_state(scan_state.state), scan_state, &b)
|
179
339
|
|
180
340
|
if !success
|
181
341
|
debug { " no match, yielding Error" }
|
182
|
-
b.call(Token['Error'],
|
342
|
+
b.call(Token['Error'], scan_state.scanner.getch)
|
183
343
|
end
|
184
344
|
end
|
185
345
|
end
|
186
346
|
|
187
|
-
def step(
|
188
|
-
rules.each do |rule|
|
189
|
-
return true if run_rule(rule,
|
347
|
+
def step(state, scan_state, &b)
|
348
|
+
state.rules.each do |rule|
|
349
|
+
return true if run_rule(rule, scan_state, &b)
|
190
350
|
end
|
191
351
|
|
192
352
|
false
|
193
353
|
end
|
194
354
|
|
195
355
|
private
|
196
|
-
def
|
197
|
-
case o
|
198
|
-
when RegexLexer, :pop!
|
199
|
-
o
|
200
|
-
else
|
201
|
-
lexer o
|
202
|
-
end
|
203
|
-
end
|
204
|
-
|
205
|
-
def run_rule(rule, stream, stack, &b)
|
356
|
+
def run_rule(rule, scan_state, &b)
|
206
357
|
case rule
|
207
|
-
when String
|
208
|
-
|
209
|
-
|
210
|
-
get_lexer(rule).step(stream, stack, &b)
|
358
|
+
when String
|
359
|
+
debug { " entering mixin #{rule}" }
|
360
|
+
step(get_state(rule), scan_state, &b)
|
211
361
|
when Rule
|
212
362
|
debug { " trying #{rule.inspect}" }
|
213
|
-
rule.
|
363
|
+
scan_state.scan(rule.re) do |match|
|
214
364
|
debug { " got #{match[0].inspect}" }
|
215
365
|
|
216
|
-
rule.callback.
|
217
|
-
|
218
|
-
|
219
|
-
end
|
220
|
-
|
221
|
-
debug { " yielding #{tok.name.inspect}, #{res.inspect}" }
|
222
|
-
b.call(tok, res)
|
223
|
-
end
|
224
|
-
|
225
|
-
if rule.next_lexer == :pop!
|
226
|
-
debug { " popping stack" }
|
227
|
-
stack.pop
|
228
|
-
elsif rule.next_lexer
|
229
|
-
lexer = get_lexer(rule.next_lexer)
|
230
|
-
debug { " entering #{lexer.name}" }
|
231
|
-
stack.push lexer
|
366
|
+
scan_state.run_callback(&rule.callback).each do |tok, res|
|
367
|
+
debug { " yielding #{tok.to_s.inspect}, #{res.inspect}" }
|
368
|
+
b.call(Token[tok], res)
|
232
369
|
end
|
233
370
|
end
|
234
371
|
end
|