rouge 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,8 +3,8 @@ require 'pathname'
3
3
 
4
4
  module Rouge
5
5
  class << self
6
- def highlight(text, lexer_name, formatter)
7
- lexer = Lexer.find(lexer_name)
6
+ def highlight(text, lexer, formatter)
7
+ lexer = Lexer.find(lexer) unless lexer.is_a?(Lexer)
8
8
  raise "unknown lexer #{lexer_name}" unless lexer
9
9
 
10
10
  formatter.render(lexer.lex(text))
@@ -15,11 +15,16 @@ end
15
15
  load_dir = Pathname.new(__FILE__).dirname
16
16
  load load_dir.join('rouge/token.rb')
17
17
  load load_dir.join('rouge/lexer.rb')
18
+ load load_dir.join('rouge/lexers/text.rb')
19
+ load load_dir.join('rouge/lexers/diff.rb')
18
20
  load load_dir.join('rouge/lexers/shell.rb')
19
21
  load load_dir.join('rouge/lexers/javascript.rb')
22
+ load load_dir.join('rouge/lexers/css.rb')
23
+ load load_dir.join('rouge/lexers/html.rb')
20
24
 
21
25
  load load_dir.join('rouge/formatter.rb')
22
26
  load load_dir.join('rouge/formatters/html.rb')
23
27
 
24
28
  load load_dir.join('rouge/theme.rb')
25
29
  load load_dir.join('rouge/themes/thankful_eyes.rb')
30
+ load load_dir.join('rouge/themes/colorful.rb')
@@ -1,3 +1,6 @@
1
+ # stdlib
2
+ require 'cgi'
3
+
1
4
  module Rouge
2
5
  module Formatters
3
6
  class HTML < Formatter
@@ -9,9 +12,7 @@ module Rouge
9
12
  yield "<pre class=#{@css_class.inspect}>"
10
13
  tokens.each do |tok, val|
11
14
  # TODO: properly html-encode val
12
- val.gsub! '&', '&amp;'
13
- val.gsub! '<', '&lt;'
14
- val.gsub! '>', '&gt;'
15
+ val = CGI.escape_html(val)
15
16
 
16
17
  case tok.shortname
17
18
  when ''
@@ -1,8 +1,40 @@
1
+ # stdlib
2
+ require 'strscan'
3
+
1
4
  module Rouge
2
5
  class Lexer
3
6
  class << self
4
- def create(opts={}, &b)
5
- new(opts, &b).send(:force_load!)
7
+ def make(opts={}, &b)
8
+ _sup = self
9
+
10
+ Class.new(self) do
11
+ @lazy_load_proc = b
12
+ @default_options = _sup.default_options.merge(opts)
13
+ @parent = _sup
14
+ end
15
+ end
16
+
17
+ def lex(stream, opts={}, &b)
18
+ new(opts).lex(stream, &b)
19
+ end
20
+
21
+ protected
22
+ def force_load!
23
+ return self if @force_load
24
+ @force_load = true
25
+ @lazy_load_proc && instance_eval(&@lazy_load_proc)
26
+
27
+ self
28
+ end
29
+ public
30
+
31
+ def new(*a, &b)
32
+ force_load!
33
+ super(*a, &b)
34
+ end
35
+
36
+ def default_options
37
+ @default_options ||= {}
6
38
  end
7
39
 
8
40
  def find(name)
@@ -13,43 +45,51 @@ module Rouge
13
45
  registry[name.to_s] = lexer
14
46
  end
15
47
 
48
+ def tag(t=nil)
49
+ return @tag if t.nil?
50
+
51
+ @tag = t.to_s
52
+ aliases @tag
53
+ end
54
+
55
+ def aliases(*args)
56
+ args.each { |arg| Lexer.register(arg, self) }
57
+ end
58
+
59
+ def extensions(*exts)
60
+ exts.each do |ext|
61
+ Lexer.extension_registry[ext] = self
62
+ end
63
+ end
64
+
65
+ def extension_registry
66
+ @extension_registry ||= {}
67
+ end
68
+
16
69
  private
17
70
  def registry
18
71
  @registry ||= {}
19
72
  end
20
73
  end
21
74
 
22
- def name(n=nil)
23
- return @name if n.nil?
24
-
25
- @name = n.to_s
26
- aliases @name
27
- end
28
-
29
- def aliases(*args)
30
- args.each { |arg| Lexer.register(arg, self) }
31
- end
75
+ # -*- instance methods -*- #
32
76
 
33
77
  def initialize(opts={}, &b)
34
- options opts
78
+ options(opts)
35
79
  @lazy_load_proc = b
36
80
  end
37
81
 
38
- def default_options
39
- {}
40
- end
41
-
42
82
  def options(o={})
43
- (@options ||= default_options).merge!(o)
83
+ (@options ||= {}).merge!(o)
44
84
 
45
- @options
85
+ self.class.default_options.merge(@options)
46
86
  end
47
87
 
48
88
  def option(k, v=:absent)
49
89
  if v == :absent
50
- options[k.to_s]
90
+ options[k]
51
91
  else
52
- options({ k.to_s => v })
92
+ options({ k => v })
53
93
  end
54
94
  end
55
95
 
@@ -61,51 +101,52 @@ module Rouge
61
101
  lex(stream).to_a
62
102
  end
63
103
 
64
- def lex(stream, &b)
65
- return enum_for(:lex, stream) unless block_given?
104
+ def lex(string, &b)
105
+ return enum_for(:lex, string) unless block_given?
66
106
 
67
- stream_tokens(stream, &b)
68
- end
107
+ last_token = nil
108
+ last_val = nil
109
+ stream_tokens(StringScanner.new(string)) do |tok, val|
110
+ next if val.empty?
69
111
 
70
- def stream_tokens(stream, &b)
71
- raise 'abstract'
72
- end
112
+ if tok == last_token
113
+ last_val << val
114
+ next
115
+ end
73
116
 
74
- protected
117
+ b.call(last_token, last_val) if last_token
118
+ last_token = tok
119
+ last_val = val
120
+ end
75
121
 
76
- def force_load!
77
- return self if @force_load
78
- @force_load = true
79
- instance_eval &@lazy_load_proc
122
+ b.call(last_token, last_val) if last_token
123
+ end
80
124
 
81
- self
125
+ def stream_tokens(stream, &b)
126
+ raise 'abstract'
82
127
  end
83
128
  end
84
129
 
85
130
  class RegexLexer < Lexer
86
131
  class Rule
87
132
  attr_reader :callback
88
- attr_reader :next_lexer
133
+ attr_reader :next_state
89
134
  attr_reader :re
90
- def initialize(re, callback, next_lexer)
91
- @orig_re = re
92
- @re = Regexp.new %/\\A(?:#{re.source})/
135
+ def initialize(re, callback, next_state)
136
+ @re = re
93
137
  @callback = callback
94
- @next_lexer = next_lexer
138
+ @next_state = next_state
95
139
  end
96
140
 
97
141
  def inspect
98
- "#<Rule #{@orig_re.inspect}>"
142
+ "#<Rule #{@re.inspect}>"
99
143
  end
100
144
 
101
145
  def consume(stream, &b)
102
- # TODO: I'm sure there is a much faster way of doing this.
103
- # also, encapsulate the stream in its own class.
104
- match = stream.match(@re)
146
+ stream.scan(@re)
105
147
 
106
- if match
107
- stream.slice!(0...$&.size)
108
- yield match
148
+ if stream.matched?
149
+ yield stream
109
150
  return true
110
151
  end
111
152
 
@@ -113,122 +154,218 @@ module Rouge
113
154
  end
114
155
  end
115
156
 
116
- def initialize(parent=nil, opts={}, &defn)
117
- if parent.is_a? Hash
118
- opts = parent
119
- parent = nil
157
+ class State
158
+ attr_reader :name
159
+ def initialize(lexer_class, name, &defn)
160
+ @lexer_class = lexer_class
161
+ @name = name
162
+ @defn = defn
120
163
  end
121
164
 
122
- @parent = parent
123
- super(opts, &defn)
165
+ def relative_state(state_name)
166
+ @lexer_class.get_state(state_name)
167
+ end
168
+
169
+ def rules
170
+ @rules ||= []
171
+ end
172
+
173
+ def load!
174
+ return self if @loaded
175
+ @loaded = true
176
+ StateDSL.new(rules).instance_eval(&@defn)
177
+ self
178
+ end
124
179
  end
125
180
 
126
- def lexer(name, opts={}, &defn)
127
- @scope ||= {}
128
- name = name.to_s
181
+ class ScanState
182
+ def self.delegate(m, target)
183
+ define_method(m) do |*a, &b|
184
+ send(target).send(m, *a, &b)
185
+ end
186
+ end
129
187
 
130
- if block_given?
131
- l = @scope[name] = RegexLexer.new(self, options.merge(opts), &defn)
132
- l.instance_variable_set :@name, name
133
- l
134
- else
135
- @scope[name] || @parent && @parent.lexer(name)
188
+ attr_accessor :scanner
189
+ attr_accessor :stack
190
+ attr_accessor :lexer
191
+ def initialize(lexer, scanner, stack=nil)
192
+ @lexer = lexer
193
+ @scanner = scanner
194
+ @stack = stack || [lexer.get_state(:root)]
195
+ end
196
+
197
+ def pop!
198
+ raise 'empty stack!' if stack.empty?
199
+
200
+ debug { " popping stack" }
201
+ stack.pop
202
+ end
203
+
204
+ def push(state_name)
205
+ debug { " pushing #{state_name}" }
206
+ stack.push(state.relative_state(state_name))
207
+ end
208
+
209
+ delegate :debug, :lexer
210
+
211
+ delegate :[], :scanner
212
+ delegate :captures, :scanner
213
+ delegate :peek, :scanner
214
+ delegate :eos?, :scanner
215
+
216
+ def run_callback(&callback)
217
+ Enumerator.new do |y|
218
+ @output_stream = y
219
+ @group_count = 0
220
+ instance_exec(self, &callback)
221
+ @output_stream = nil
222
+ end
223
+ end
224
+
225
+ def token(tok, val=nil)
226
+ raise 'no output stream' unless @output_stream
227
+
228
+ @output_stream << [Token[tok], val || scanner[0]]
229
+ end
230
+
231
+ def group(tok)
232
+ token(tok, scanner[@group_count += 1])
233
+ end
234
+
235
+ def delegate(lexer, text=nil)
236
+ debug { " delegating to #{lexer.name}" }
237
+ text ||= scanner[0]
238
+
239
+ lexer.lex(text) do |tok, val|
240
+ debug { " delegated token: #{tok.inspect}, #{val.inspect}" }
241
+ token(tok, val)
242
+ end
243
+ end
244
+
245
+ def state
246
+ raise 'empty stack!' if stack.empty?
247
+ stack.last
248
+ end
249
+
250
+ def scan(re, &b)
251
+ scanner.scan(re)
252
+
253
+ if scanner.matched?
254
+ yield self
255
+ return true
256
+ end
257
+
258
+ return false
136
259
  end
137
260
  end
138
261
 
139
- def mixin(lexer)
140
- lexer = get_lexer(lexer)
141
- lexer.force_load!
262
+ class StateDSL
263
+ attr_reader :rules
264
+ def initialize(rules)
265
+ @rules = rules
266
+ end
142
267
 
143
- rules << lexer
268
+ def rule(re, tok=nil, next_state=nil, &callback)
269
+ if block_given?
270
+ next_state = tok
271
+ else
272
+ tok = Token[tok]
273
+
274
+ callback = proc do |ss|
275
+ token tok, ss[0]
276
+ case next_state
277
+ when :pop!
278
+ pop!
279
+ when Symbol
280
+ push next_state
281
+ end # else pass
282
+ end
283
+ end
284
+
285
+ rules << Rule.new(re, callback, next_state)
286
+ end
287
+
288
+ def mixin(lexer_name)
289
+ rules << lexer_name.to_s
290
+ end
144
291
  end
145
292
 
146
- def rules
147
- force_load!
148
- @rules ||= []
293
+ def self.states
294
+ @states ||= {}
149
295
  end
150
296
 
151
- def rule(re, token=nil, next_lexer=nil, &callback)
152
- if block_given?
153
- next_lexer = token
154
- else
155
- if token.is_a? String
156
- token = Token[token]
157
- end
297
+ def self.state(name, &b)
298
+ name = name.to_s
299
+ states[name] = State.new(self, name, &b)
300
+ end
158
301
 
159
- callback = proc { |match, &b| b.call token, match }
302
+ def initialize(parent=nil, opts={}, &defn)
303
+ if parent.is_a? Hash
304
+ opts = parent
305
+ parent = nil
160
306
  end
161
307
 
162
- rules << Rule.new(re, callback, get_lexer(next_lexer))
308
+ @parent = parent
309
+ super(opts, &defn)
163
310
  end
164
311
 
165
- def stream_tokens(stream, &b)
166
- stream = stream.dup
167
- stack = [self]
312
+ def self.get_state(name)
313
+ return name if name.is_a? State
168
314
 
169
- stream_with_stack(stream.dup, [self], &b)
315
+ state = states[name.to_s]
316
+ raise "unknown state: #{name}" unless state
317
+ state.load!
170
318
  end
171
319
 
172
- def stream_with_stack(stream, stack, &b)
173
- return true if stream.empty?
320
+ def self.[](name)
321
+ get_state(name)
322
+ end
174
323
 
175
- until stream.empty?
176
- debug { "stack: #{stack.map(&:name).inspect}" }
177
- debug { "parsing #{stream.slice(0..20).inspect}" }
178
- success = stack.last.step(stream, stack, &b)
324
+ def get_state(name)
325
+ self.class.get_state(name)
326
+ end
327
+
328
+ def stream_tokens(stream, &b)
329
+ scan_state = ScanState.new(self, stream)
330
+
331
+ stream_with_state(scan_state, &b)
332
+ end
333
+
334
+ def stream_with_state(scan_state, &b)
335
+ until scan_state.eos?
336
+ debug { "stack: #{scan_state.stack.map(&:name).inspect}" }
337
+ debug { "stream: #{scan_state.scanner.peek(20).inspect}" }
338
+ success = step(get_state(scan_state.state), scan_state, &b)
179
339
 
180
340
  if !success
181
341
  debug { " no match, yielding Error" }
182
- b.call(Token['Error'], stream.slice!(0..0))
342
+ b.call(Token['Error'], scan_state.scanner.getch)
183
343
  end
184
344
  end
185
345
  end
186
346
 
187
- def step(stream, stack, &b)
188
- rules.each do |rule|
189
- return true if run_rule(rule, stream, stack, &b)
347
+ def step(state, scan_state, &b)
348
+ state.rules.each do |rule|
349
+ return true if run_rule(rule, scan_state, &b)
190
350
  end
191
351
 
192
352
  false
193
353
  end
194
354
 
195
355
  private
196
- def get_lexer(o)
197
- case o
198
- when RegexLexer, :pop!
199
- o
200
- else
201
- lexer o
202
- end
203
- end
204
-
205
- def run_rule(rule, stream, stack, &b)
356
+ def run_rule(rule, scan_state, &b)
206
357
  case rule
207
- when String, RegexLexer
208
- lexer = get_lexer(rule)
209
- debug { " entering mixin #{lexer.name}" }
210
- get_lexer(rule).step(stream, stack, &b)
358
+ when String
359
+ debug { " entering mixin #{rule}" }
360
+ step(get_state(rule), scan_state, &b)
211
361
  when Rule
212
362
  debug { " trying #{rule.inspect}" }
213
- rule.consume(stream) do |match|
363
+ scan_state.scan(rule.re) do |match|
214
364
  debug { " got #{match[0].inspect}" }
215
365
 
216
- rule.callback.call(*match) do |tok, res|
217
- if tok.is_a? String
218
- tok = Token[tok]
219
- end
220
-
221
- debug { " yielding #{tok.name.inspect}, #{res.inspect}" }
222
- b.call(tok, res)
223
- end
224
-
225
- if rule.next_lexer == :pop!
226
- debug { " popping stack" }
227
- stack.pop
228
- elsif rule.next_lexer
229
- lexer = get_lexer(rule.next_lexer)
230
- debug { " entering #{lexer.name}" }
231
- stack.push lexer
366
+ scan_state.run_callback(&rule.callback).each do |tok, res|
367
+ debug { " yielding #{tok.to_s.inspect}, #{res.inspect}" }
368
+ b.call(Token[tok], res)
232
369
  end
233
370
  end
234
371
  end