rouge 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,8 +3,8 @@ require 'pathname'
3
3
 
4
4
  module Rouge
5
5
  class << self
6
- def highlight(text, lexer_name, formatter)
7
- lexer = Lexer.find(lexer_name)
6
+ def highlight(text, lexer, formatter)
7
+ lexer = Lexer.find(lexer) unless lexer.is_a?(Lexer)
8
8
  raise "unknown lexer #{lexer_name}" unless lexer
9
9
 
10
10
  formatter.render(lexer.lex(text))
@@ -15,11 +15,16 @@ end
15
15
  load_dir = Pathname.new(__FILE__).dirname
16
16
  load load_dir.join('rouge/token.rb')
17
17
  load load_dir.join('rouge/lexer.rb')
18
+ load load_dir.join('rouge/lexers/text.rb')
19
+ load load_dir.join('rouge/lexers/diff.rb')
18
20
  load load_dir.join('rouge/lexers/shell.rb')
19
21
  load load_dir.join('rouge/lexers/javascript.rb')
22
+ load load_dir.join('rouge/lexers/css.rb')
23
+ load load_dir.join('rouge/lexers/html.rb')
20
24
 
21
25
  load load_dir.join('rouge/formatter.rb')
22
26
  load load_dir.join('rouge/formatters/html.rb')
23
27
 
24
28
  load load_dir.join('rouge/theme.rb')
25
29
  load load_dir.join('rouge/themes/thankful_eyes.rb')
30
+ load load_dir.join('rouge/themes/colorful.rb')
@@ -1,3 +1,6 @@
1
+ # stdlib
2
+ require 'cgi'
3
+
1
4
  module Rouge
2
5
  module Formatters
3
6
  class HTML < Formatter
@@ -9,9 +12,7 @@ module Rouge
9
12
  yield "<pre class=#{@css_class.inspect}>"
10
13
  tokens.each do |tok, val|
11
14
  # TODO: properly html-encode val
12
- val.gsub! '&', '&amp;'
13
- val.gsub! '<', '&lt;'
14
- val.gsub! '>', '&gt;'
15
+ val = CGI.escape_html(val)
15
16
 
16
17
  case tok.shortname
17
18
  when ''
@@ -1,8 +1,40 @@
1
+ # stdlib
2
+ require 'strscan'
3
+
1
4
  module Rouge
2
5
  class Lexer
3
6
  class << self
4
- def create(opts={}, &b)
5
- new(opts, &b).send(:force_load!)
7
+ def make(opts={}, &b)
8
+ _sup = self
9
+
10
+ Class.new(self) do
11
+ @lazy_load_proc = b
12
+ @default_options = _sup.default_options.merge(opts)
13
+ @parent = _sup
14
+ end
15
+ end
16
+
17
+ def lex(stream, opts={}, &b)
18
+ new(opts).lex(stream, &b)
19
+ end
20
+
21
+ protected
22
+ def force_load!
23
+ return self if @force_load
24
+ @force_load = true
25
+ @lazy_load_proc && instance_eval(&@lazy_load_proc)
26
+
27
+ self
28
+ end
29
+ public
30
+
31
+ def new(*a, &b)
32
+ force_load!
33
+ super(*a, &b)
34
+ end
35
+
36
+ def default_options
37
+ @default_options ||= {}
6
38
  end
7
39
 
8
40
  def find(name)
@@ -13,43 +45,51 @@ module Rouge
13
45
  registry[name.to_s] = lexer
14
46
  end
15
47
 
48
+ def tag(t=nil)
49
+ return @tag if t.nil?
50
+
51
+ @tag = t.to_s
52
+ aliases @tag
53
+ end
54
+
55
+ def aliases(*args)
56
+ args.each { |arg| Lexer.register(arg, self) }
57
+ end
58
+
59
+ def extensions(*exts)
60
+ exts.each do |ext|
61
+ Lexer.extension_registry[ext] = self
62
+ end
63
+ end
64
+
65
+ def extension_registry
66
+ @extension_registry ||= {}
67
+ end
68
+
16
69
  private
17
70
  def registry
18
71
  @registry ||= {}
19
72
  end
20
73
  end
21
74
 
22
- def name(n=nil)
23
- return @name if n.nil?
24
-
25
- @name = n.to_s
26
- aliases @name
27
- end
28
-
29
- def aliases(*args)
30
- args.each { |arg| Lexer.register(arg, self) }
31
- end
75
+ # -*- instance methods -*- #
32
76
 
33
77
  def initialize(opts={}, &b)
34
- options opts
78
+ options(opts)
35
79
  @lazy_load_proc = b
36
80
  end
37
81
 
38
- def default_options
39
- {}
40
- end
41
-
42
82
  def options(o={})
43
- (@options ||= default_options).merge!(o)
83
+ (@options ||= {}).merge!(o)
44
84
 
45
- @options
85
+ self.class.default_options.merge(@options)
46
86
  end
47
87
 
48
88
  def option(k, v=:absent)
49
89
  if v == :absent
50
- options[k.to_s]
90
+ options[k]
51
91
  else
52
- options({ k.to_s => v })
92
+ options({ k => v })
53
93
  end
54
94
  end
55
95
 
@@ -61,51 +101,52 @@ module Rouge
61
101
  lex(stream).to_a
62
102
  end
63
103
 
64
- def lex(stream, &b)
65
- return enum_for(:lex, stream) unless block_given?
104
+ def lex(string, &b)
105
+ return enum_for(:lex, string) unless block_given?
66
106
 
67
- stream_tokens(stream, &b)
68
- end
107
+ last_token = nil
108
+ last_val = nil
109
+ stream_tokens(StringScanner.new(string)) do |tok, val|
110
+ next if val.empty?
69
111
 
70
- def stream_tokens(stream, &b)
71
- raise 'abstract'
72
- end
112
+ if tok == last_token
113
+ last_val << val
114
+ next
115
+ end
73
116
 
74
- protected
117
+ b.call(last_token, last_val) if last_token
118
+ last_token = tok
119
+ last_val = val
120
+ end
75
121
 
76
- def force_load!
77
- return self if @force_load
78
- @force_load = true
79
- instance_eval &@lazy_load_proc
122
+ b.call(last_token, last_val) if last_token
123
+ end
80
124
 
81
- self
125
+ def stream_tokens(stream, &b)
126
+ raise 'abstract'
82
127
  end
83
128
  end
84
129
 
85
130
  class RegexLexer < Lexer
86
131
  class Rule
87
132
  attr_reader :callback
88
- attr_reader :next_lexer
133
+ attr_reader :next_state
89
134
  attr_reader :re
90
- def initialize(re, callback, next_lexer)
91
- @orig_re = re
92
- @re = Regexp.new %/\\A(?:#{re.source})/
135
+ def initialize(re, callback, next_state)
136
+ @re = re
93
137
  @callback = callback
94
- @next_lexer = next_lexer
138
+ @next_state = next_state
95
139
  end
96
140
 
97
141
  def inspect
98
- "#<Rule #{@orig_re.inspect}>"
142
+ "#<Rule #{@re.inspect}>"
99
143
  end
100
144
 
101
145
  def consume(stream, &b)
102
- # TODO: I'm sure there is a much faster way of doing this.
103
- # also, encapsulate the stream in its own class.
104
- match = stream.match(@re)
146
+ stream.scan(@re)
105
147
 
106
- if match
107
- stream.slice!(0...$&.size)
108
- yield match
148
+ if stream.matched?
149
+ yield stream
109
150
  return true
110
151
  end
111
152
 
@@ -113,122 +154,218 @@ module Rouge
113
154
  end
114
155
  end
115
156
 
116
- def initialize(parent=nil, opts={}, &defn)
117
- if parent.is_a? Hash
118
- opts = parent
119
- parent = nil
157
+ class State
158
+ attr_reader :name
159
+ def initialize(lexer_class, name, &defn)
160
+ @lexer_class = lexer_class
161
+ @name = name
162
+ @defn = defn
120
163
  end
121
164
 
122
- @parent = parent
123
- super(opts, &defn)
165
+ def relative_state(state_name)
166
+ @lexer_class.get_state(state_name)
167
+ end
168
+
169
+ def rules
170
+ @rules ||= []
171
+ end
172
+
173
+ def load!
174
+ return self if @loaded
175
+ @loaded = true
176
+ StateDSL.new(rules).instance_eval(&@defn)
177
+ self
178
+ end
124
179
  end
125
180
 
126
- def lexer(name, opts={}, &defn)
127
- @scope ||= {}
128
- name = name.to_s
181
+ class ScanState
182
+ def self.delegate(m, target)
183
+ define_method(m) do |*a, &b|
184
+ send(target).send(m, *a, &b)
185
+ end
186
+ end
129
187
 
130
- if block_given?
131
- l = @scope[name] = RegexLexer.new(self, options.merge(opts), &defn)
132
- l.instance_variable_set :@name, name
133
- l
134
- else
135
- @scope[name] || @parent && @parent.lexer(name)
188
+ attr_accessor :scanner
189
+ attr_accessor :stack
190
+ attr_accessor :lexer
191
+ def initialize(lexer, scanner, stack=nil)
192
+ @lexer = lexer
193
+ @scanner = scanner
194
+ @stack = stack || [lexer.get_state(:root)]
195
+ end
196
+
197
+ def pop!
198
+ raise 'empty stack!' if stack.empty?
199
+
200
+ debug { " popping stack" }
201
+ stack.pop
202
+ end
203
+
204
+ def push(state_name)
205
+ debug { " pushing #{state_name}" }
206
+ stack.push(state.relative_state(state_name))
207
+ end
208
+
209
+ delegate :debug, :lexer
210
+
211
+ delegate :[], :scanner
212
+ delegate :captures, :scanner
213
+ delegate :peek, :scanner
214
+ delegate :eos?, :scanner
215
+
216
+ def run_callback(&callback)
217
+ Enumerator.new do |y|
218
+ @output_stream = y
219
+ @group_count = 0
220
+ instance_exec(self, &callback)
221
+ @output_stream = nil
222
+ end
223
+ end
224
+
225
+ def token(tok, val=nil)
226
+ raise 'no output stream' unless @output_stream
227
+
228
+ @output_stream << [Token[tok], val || scanner[0]]
229
+ end
230
+
231
+ def group(tok)
232
+ token(tok, scanner[@group_count += 1])
233
+ end
234
+
235
+ def delegate(lexer, text=nil)
236
+ debug { " delegating to #{lexer.name}" }
237
+ text ||= scanner[0]
238
+
239
+ lexer.lex(text) do |tok, val|
240
+ debug { " delegated token: #{tok.inspect}, #{val.inspect}" }
241
+ token(tok, val)
242
+ end
243
+ end
244
+
245
+ def state
246
+ raise 'empty stack!' if stack.empty?
247
+ stack.last
248
+ end
249
+
250
+ def scan(re, &b)
251
+ scanner.scan(re)
252
+
253
+ if scanner.matched?
254
+ yield self
255
+ return true
256
+ end
257
+
258
+ return false
136
259
  end
137
260
  end
138
261
 
139
- def mixin(lexer)
140
- lexer = get_lexer(lexer)
141
- lexer.force_load!
262
+ class StateDSL
263
+ attr_reader :rules
264
+ def initialize(rules)
265
+ @rules = rules
266
+ end
142
267
 
143
- rules << lexer
268
+ def rule(re, tok=nil, next_state=nil, &callback)
269
+ if block_given?
270
+ next_state = tok
271
+ else
272
+ tok = Token[tok]
273
+
274
+ callback = proc do |ss|
275
+ token tok, ss[0]
276
+ case next_state
277
+ when :pop!
278
+ pop!
279
+ when Symbol
280
+ push next_state
281
+ end # else pass
282
+ end
283
+ end
284
+
285
+ rules << Rule.new(re, callback, next_state)
286
+ end
287
+
288
+ def mixin(lexer_name)
289
+ rules << lexer_name.to_s
290
+ end
144
291
  end
145
292
 
146
- def rules
147
- force_load!
148
- @rules ||= []
293
+ def self.states
294
+ @states ||= {}
149
295
  end
150
296
 
151
- def rule(re, token=nil, next_lexer=nil, &callback)
152
- if block_given?
153
- next_lexer = token
154
- else
155
- if token.is_a? String
156
- token = Token[token]
157
- end
297
+ def self.state(name, &b)
298
+ name = name.to_s
299
+ states[name] = State.new(self, name, &b)
300
+ end
158
301
 
159
- callback = proc { |match, &b| b.call token, match }
302
+ def initialize(parent=nil, opts={}, &defn)
303
+ if parent.is_a? Hash
304
+ opts = parent
305
+ parent = nil
160
306
  end
161
307
 
162
- rules << Rule.new(re, callback, get_lexer(next_lexer))
308
+ @parent = parent
309
+ super(opts, &defn)
163
310
  end
164
311
 
165
- def stream_tokens(stream, &b)
166
- stream = stream.dup
167
- stack = [self]
312
+ def self.get_state(name)
313
+ return name if name.is_a? State
168
314
 
169
- stream_with_stack(stream.dup, [self], &b)
315
+ state = states[name.to_s]
316
+ raise "unknown state: #{name}" unless state
317
+ state.load!
170
318
  end
171
319
 
172
- def stream_with_stack(stream, stack, &b)
173
- return true if stream.empty?
320
+ def self.[](name)
321
+ get_state(name)
322
+ end
174
323
 
175
- until stream.empty?
176
- debug { "stack: #{stack.map(&:name).inspect}" }
177
- debug { "parsing #{stream.slice(0..20).inspect}" }
178
- success = stack.last.step(stream, stack, &b)
324
+ def get_state(name)
325
+ self.class.get_state(name)
326
+ end
327
+
328
+ def stream_tokens(stream, &b)
329
+ scan_state = ScanState.new(self, stream)
330
+
331
+ stream_with_state(scan_state, &b)
332
+ end
333
+
334
+ def stream_with_state(scan_state, &b)
335
+ until scan_state.eos?
336
+ debug { "stack: #{scan_state.stack.map(&:name).inspect}" }
337
+ debug { "stream: #{scan_state.scanner.peek(20).inspect}" }
338
+ success = step(get_state(scan_state.state), scan_state, &b)
179
339
 
180
340
  if !success
181
341
  debug { " no match, yielding Error" }
182
- b.call(Token['Error'], stream.slice!(0..0))
342
+ b.call(Token['Error'], scan_state.scanner.getch)
183
343
  end
184
344
  end
185
345
  end
186
346
 
187
- def step(stream, stack, &b)
188
- rules.each do |rule|
189
- return true if run_rule(rule, stream, stack, &b)
347
+ def step(state, scan_state, &b)
348
+ state.rules.each do |rule|
349
+ return true if run_rule(rule, scan_state, &b)
190
350
  end
191
351
 
192
352
  false
193
353
  end
194
354
 
195
355
  private
196
- def get_lexer(o)
197
- case o
198
- when RegexLexer, :pop!
199
- o
200
- else
201
- lexer o
202
- end
203
- end
204
-
205
- def run_rule(rule, stream, stack, &b)
356
+ def run_rule(rule, scan_state, &b)
206
357
  case rule
207
- when String, RegexLexer
208
- lexer = get_lexer(rule)
209
- debug { " entering mixin #{lexer.name}" }
210
- get_lexer(rule).step(stream, stack, &b)
358
+ when String
359
+ debug { " entering mixin #{rule}" }
360
+ step(get_state(rule), scan_state, &b)
211
361
  when Rule
212
362
  debug { " trying #{rule.inspect}" }
213
- rule.consume(stream) do |match|
363
+ scan_state.scan(rule.re) do |match|
214
364
  debug { " got #{match[0].inspect}" }
215
365
 
216
- rule.callback.call(*match) do |tok, res|
217
- if tok.is_a? String
218
- tok = Token[tok]
219
- end
220
-
221
- debug { " yielding #{tok.name.inspect}, #{res.inspect}" }
222
- b.call(tok, res)
223
- end
224
-
225
- if rule.next_lexer == :pop!
226
- debug { " popping stack" }
227
- stack.pop
228
- elsif rule.next_lexer
229
- lexer = get_lexer(rule.next_lexer)
230
- debug { " entering #{lexer.name}" }
231
- stack.push lexer
366
+ scan_state.run_callback(&rule.callback).each do |tok, res|
367
+ debug { " yielding #{tok.to_s.inspect}, #{res.inspect}" }
368
+ b.call(Token[tok], res)
232
369
  end
233
370
  end
234
371
  end