rouge 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -12,3 +12,6 @@ gem 'redcarpet'
12
12
 
13
13
  # for visual tests
14
14
  gem 'sinatra'
15
+
16
+ # docs
17
+ gem 'yard'
@@ -15,13 +15,20 @@ end
15
15
  load_dir = Pathname.new(__FILE__).dirname
16
16
  load load_dir.join('rouge/version.rb')
17
17
 
18
+ load load_dir.join('rouge/util.rb')
19
+
18
20
  load load_dir.join('rouge/text_analyzer.rb')
19
21
  load load_dir.join('rouge/token.rb')
22
+
20
23
  load load_dir.join('rouge/lexer.rb')
24
+ load load_dir.join('rouge/regex_lexer.rb')
25
+ load load_dir.join('rouge/template_lexer.rb')
21
26
 
22
27
  load load_dir.join('rouge/lexers/text.rb')
23
28
  load load_dir.join('rouge/lexers/diff.rb')
24
29
  load load_dir.join('rouge/lexers/tex.rb')
30
+ load load_dir.join('rouge/lexers/markdown.rb')
31
+ load load_dir.join('rouge/lexers/yaml.rb')
25
32
 
26
33
  load load_dir.join('rouge/lexers/make.rb')
27
34
  load load_dir.join('rouge/lexers/shell.rb')
@@ -29,6 +36,7 @@ load load_dir.join('rouge/lexers/shell.rb')
29
36
  load load_dir.join('rouge/lexers/javascript.rb')
30
37
  load load_dir.join('rouge/lexers/css.rb')
31
38
  load load_dir.join('rouge/lexers/html.rb')
39
+ load load_dir.join('rouge/lexers/haml.rb')
32
40
  load load_dir.join('rouge/lexers/xml.rb')
33
41
  load load_dir.join('rouge/lexers/php.rb')
34
42
 
@@ -38,6 +46,7 @@ load load_dir.join('rouge/lexers/tcl.rb')
38
46
  load load_dir.join('rouge/lexers/python.rb')
39
47
  load load_dir.join('rouge/lexers/ruby.rb')
40
48
  load load_dir.join('rouge/lexers/perl.rb')
49
+ load load_dir.join('rouge/lexers/factor.rb')
41
50
 
42
51
  load load_dir.join('rouge/lexers/haskell.rb')
43
52
  load load_dir.join('rouge/lexers/scheme.rb')
@@ -4,18 +4,37 @@ require 'strscan'
4
4
  module Rouge
5
5
  class Lexer
6
6
  class << self
7
+ # Lexes `stream` with the given options. The lex is delegated to a
8
+ # new instance.
9
+ #
10
+ # @see #lex
7
11
  def lex(stream, opts={}, &b)
8
12
  new(opts).lex(stream, &b)
9
13
  end
10
14
 
11
- def default_options
15
+ def default_options(o={})
12
16
  @default_options ||= {}
17
+ @default_options.merge!(o)
18
+ @default_options
13
19
  end
14
20
 
21
+ # Given a string, return the correct lexer class.
15
22
  def find(name)
16
23
  registry[name.to_s]
17
24
  end
18
25
 
26
+ # Guess which lexer to use based on a hash of info.
27
+ #
28
+ # @option info :mimetype
29
+ # A mimetype to guess by
30
+ # @option info :filename
31
+ # A filename to guess by
32
+ # @option info :source
33
+ # The source itself, which, if guessing by mimetype or filename
34
+ # fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
35
+ # other hints.
36
+ #
37
+ # @see Lexer.analyze_text
19
38
  def guess(info={})
20
39
  by_mimetype = guess_by_mimetype(info[:mimetype]) if info[:mimetype]
21
40
  return by_mimetype if by_mimetype
@@ -67,6 +86,16 @@ module Rouge
67
86
  registry[name.to_s] = lexer
68
87
  end
69
88
 
89
+ # Used to specify or get the canonical name of this lexer class.
90
+ #
91
+ # @example
92
+ # class MyLexer < Lexer
93
+ # tag 'foo'
94
+ # end
95
+ #
96
+ # MyLexer.tag # => 'foo'
97
+ #
98
+ # Lexer.find('foo') # => MyLexer
70
99
  def tag(t=nil)
71
100
  return @tag if t.nil?
72
101
 
@@ -74,14 +103,35 @@ module Rouge
74
103
  aliases @tag
75
104
  end
76
105
 
106
+ # Used to specify alternate names this lexer class may be found by.
107
+ #
108
+ # @example
109
+ # class Erb < Lexer
110
+ # tag 'erb'
111
+ # aliases 'eruby', 'rhtml'
112
+ # end
113
+ #
114
+ # Lexer.find('eruby') # => Erb
77
115
  def aliases(*args)
78
116
  args.each { |arg| Lexer.register(arg, self) }
79
117
  end
80
118
 
119
+ # Specify a list of filename globs associated with this lexer
120
+ #
121
+ # @example
122
+ # class Ruby < Lexer
123
+ # filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile'
124
+ # end
81
125
  def filenames(*fnames)
82
126
  (@filenames ||= []).concat(fnames)
83
127
  end
84
128
 
129
+ # Specify a list of mimetypes associated with this lexer.
130
+ #
131
+ # @example
132
+ # class Html < Lexer
133
+ # mimetypes 'text/html', 'application/xhtml+xml'
134
+ # end
85
135
  def mimetypes(*mts)
86
136
  (@mimetypes ||= []).concat(mts)
87
137
  end
@@ -94,7 +144,7 @@ module Rouge
94
144
 
95
145
  # -*- instance methods -*- #
96
146
 
97
- def initialize(opts={}, &b)
147
+ def initialize(opts={})
98
148
  options(opts)
99
149
  end
100
150
 
@@ -112,18 +162,28 @@ module Rouge
112
162
  end
113
163
  end
114
164
 
165
+ # Leave a debug message if the `:debug` option is set. The message
166
+ # is given as a block because some debug messages contain calculated
167
+ # information that is unnecessary for lexing in the real world.
168
+ #
169
+ # @example
170
+ # debug { "hello, world!" }
115
171
  def debug(&b)
116
172
  puts(b.call) if option :debug
117
173
  end
118
174
 
119
- def get_tokens(stream)
120
- lex(stream).to_a
121
- end
122
-
175
+ # @abstract
176
+ #
177
+ # Called after each lex is finished. The default implementation
178
+ # is a noop.
123
179
  def reset!
124
- # noop, called after each lex is finished
125
180
  end
126
181
 
182
+ # Given a string, yield [token, chunk] pairs. If no block is given,
183
+ # an enumerator is returned.
184
+ #
185
+ # @option opts :continue
186
+ # Continue the lex from the previous state (i.e. don't call #reset!)
127
187
  def lex(string, opts={}, &b)
128
188
  return enum_for(:lex, string) unless block_given?
129
189
 
@@ -147,280 +207,28 @@ module Rouge
147
207
  b.call(last_token, last_val) if last_token
148
208
  end
149
209
 
210
+ # @abstract
211
+ #
212
+ # Yield [token, chunk] pairs, given a prepared input stream. This
213
+ # must be implemented.
214
+ #
215
+ # @param [StringScanner] stream
216
+ # the stream
150
217
  def stream_tokens(stream, &b)
151
218
  raise 'abstract'
152
219
  end
153
220
 
154
- # return a number between 0 and 1 indicating the
155
- # likelihood that the text given should be lexed
156
- # with this lexer.
221
+ # @abstract
222
+ #
223
+ # return a number between 0 and 1 indicating the likelihood that
224
+ # the text given should be lexed with this lexer. The default
225
+ # implementation returns 0.
226
+ #
227
+ # @param [TextAnalyzer] text
228
+ # the text to be analyzed, with a couple of handy methods on it,
229
+ # like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
157
230
  def self.analyze_text(text)
158
231
  0
159
232
  end
160
233
  end
161
-
162
- class RegexLexer < Lexer
163
- class Rule
164
- attr_reader :callback
165
- attr_reader :next_state
166
- attr_reader :re
167
- def initialize(re, callback, next_state)
168
- @re = re
169
- @callback = callback
170
- @next_state = next_state
171
- end
172
-
173
- def inspect
174
- "#<Rule #{@re.inspect}>"
175
- end
176
-
177
- def consume(stream, &b)
178
- stream.scan(@re)
179
-
180
- if stream.matched?
181
- yield stream
182
- return true
183
- end
184
-
185
- false
186
- end
187
- end
188
-
189
- class State
190
- attr_reader :name
191
- def initialize(lexer_class, name, &defn)
192
- @lexer_class = lexer_class
193
- @name = name
194
- @defn = defn
195
- end
196
-
197
- def relative_state(state_name=nil, &b)
198
- if state_name
199
- @lexer_class.get_state(state_name)
200
- else
201
- State.new(@lexer_class, b.inspect, &b).load!
202
- end
203
- end
204
-
205
- def rules
206
- @rules ||= []
207
- end
208
-
209
- def load!
210
- return self if @loaded
211
- @loaded = true
212
- StateDSL.new(rules).instance_eval(&@defn)
213
- self
214
- end
215
- end
216
-
217
- class StateDSL
218
- attr_reader :rules
219
- def initialize(rules)
220
- @rules = rules
221
- end
222
-
223
- def rule(re, tok=nil, next_state=nil, &callback)
224
- if block_given?
225
- next_state = tok
226
- else
227
- tok = Token[tok]
228
-
229
- callback = proc do
230
- token tok
231
- case next_state
232
- when :pop!
233
- pop!
234
- when Symbol
235
- push next_state
236
- end # else pass
237
- end
238
- end
239
-
240
- rules << Rule.new(re, callback, next_state)
241
- end
242
-
243
- def mixin(lexer_name)
244
- rules << lexer_name.to_s
245
- end
246
- end
247
-
248
- def self.states
249
- @states ||= {}
250
- end
251
-
252
- def self.start_procs
253
- @start_procs ||= []
254
- end
255
-
256
- def self.start(&b)
257
- start_procs << b
258
- end
259
-
260
- def self.state(name, &b)
261
- name = name.to_s
262
- states[name] = State.new(self, name, &b)
263
- end
264
-
265
- def self.get_state(name)
266
- return name if name.is_a? State
267
-
268
- state = states[name.to_s]
269
- raise "unknown state: #{name}" unless state
270
- state.load!
271
- end
272
-
273
- def self.[](name)
274
- get_state(name)
275
- end
276
-
277
- def get_state(name)
278
- self.class.get_state(name)
279
- end
280
-
281
- def stack
282
- @stack ||= [get_state(:root)]
283
- end
284
-
285
- def state
286
- stack.last or raise 'empty stack!'
287
- end
288
-
289
- def reset!
290
- @scan_state = nil
291
-
292
- self.class.start_procs.each do |pr|
293
- instance_eval(&pr)
294
- end
295
- end
296
-
297
- def stream_tokens(stream, &b)
298
- until stream.eos?
299
- debug { "lexer: #{self.class.tag}" }
300
- debug { "stack: #{stack.map(&:name).inspect}" }
301
- debug { "stream: #{stream.peek(20).inspect}" }
302
- success = step(get_state(state), stream, &b)
303
-
304
- if !success
305
- debug { " no match, yielding Error" }
306
- b.call(Token['Error'], stream.getch)
307
- end
308
- end
309
- end
310
-
311
- def step(state, stream, &b)
312
- state.rules.each do |rule|
313
- return true if run_rule(rule, stream, &b)
314
- end
315
-
316
- false
317
- end
318
-
319
- def run_rule(rule, stream, &b)
320
- case rule
321
- when String
322
- debug { " entering mixin #{rule}" }
323
- res = step(get_state(rule), stream, &b)
324
- debug { " exiting mixin #{rule}" }
325
- res
326
- when Rule
327
- debug { " trying #{rule.inspect}" }
328
- scan(stream, rule.re) do
329
- debug { " got #{stream[0].inspect}" }
330
-
331
- run_callback(stream, &rule.callback).each do |tok, res|
332
- debug { " yielding #{tok.to_s.inspect}, #{res.inspect}" }
333
- b.call(Token[tok], res)
334
- end
335
- end
336
- end
337
- end
338
-
339
- def run_callback(stream, &callback)
340
- Enumerator.new do |y|
341
- @output_stream = y
342
- @group_count = 0
343
- @last_matches = stream
344
- instance_exec(stream, &callback)
345
- @last_matches = nil
346
- @output_stream = nil
347
- end
348
- end
349
-
350
- MAX_NULL_STEPS = 5
351
- def scan(scanner, re, &b)
352
- @null_steps ||= 0
353
-
354
- if @null_steps >= MAX_NULL_STEPS
355
- debug { " too many scans without consuming the string!" }
356
- return false
357
- end
358
-
359
- scanner.scan(re)
360
-
361
- if scanner.matched?
362
- if scanner.matched_size == 0
363
- @null_steps += 1
364
- else
365
- @null_steps = 0
366
- end
367
-
368
- yield self
369
- return true
370
- end
371
-
372
- return false
373
- end
374
-
375
- def token(tok, val=:__absent__)
376
- val = @last_matches[0] if val == :__absent__
377
- val ||= ''
378
-
379
- raise 'no output stream' unless @output_stream
380
-
381
- @output_stream << [Token[tok], val]
382
- end
383
-
384
- def group(tok)
385
- token(tok, @last_matches[@group_count += 1])
386
- end
387
-
388
- def delegate(lexer, text=nil)
389
- debug { " delegating to #{lexer.inspect}" }
390
- text ||= @last_matches[0]
391
-
392
- lexer.lex(text, :continue => true) do |tok, val|
393
- debug { " delegated token: #{tok.inspect}, #{val.inspect}" }
394
- token(tok, val)
395
- end
396
- end
397
-
398
- def push(state_name=nil, &b)
399
- # use the top of the stack by default
400
- if state_name || b
401
- push_state = state.relative_state(state_name, &b)
402
- else
403
- push_state = self.state
404
- end
405
-
406
- debug { " pushing #{push_state.name}" }
407
- stack.push(push_state)
408
- end
409
-
410
- def pop!
411
- raise 'empty stack!' if stack.empty?
412
-
413
- debug { " popping stack" }
414
- stack.pop
415
- end
416
-
417
- def in_state?(state_name)
418
- stack.map(&:name).include? state_name.to_s
419
- end
420
-
421
- def state?(state_name)
422
- state_name.to_s == state.name
423
- end
424
-
425
- end
426
234
  end