rouge 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -12,3 +12,6 @@ gem 'redcarpet'
12
12
 
13
13
  # for visual tests
14
14
  gem 'sinatra'
15
+
16
+ # docs
17
+ gem 'yard'
@@ -15,13 +15,20 @@ end
15
15
  load_dir = Pathname.new(__FILE__).dirname
16
16
  load load_dir.join('rouge/version.rb')
17
17
 
18
+ load load_dir.join('rouge/util.rb')
19
+
18
20
  load load_dir.join('rouge/text_analyzer.rb')
19
21
  load load_dir.join('rouge/token.rb')
22
+
20
23
  load load_dir.join('rouge/lexer.rb')
24
+ load load_dir.join('rouge/regex_lexer.rb')
25
+ load load_dir.join('rouge/template_lexer.rb')
21
26
 
22
27
  load load_dir.join('rouge/lexers/text.rb')
23
28
  load load_dir.join('rouge/lexers/diff.rb')
24
29
  load load_dir.join('rouge/lexers/tex.rb')
30
+ load load_dir.join('rouge/lexers/markdown.rb')
31
+ load load_dir.join('rouge/lexers/yaml.rb')
25
32
 
26
33
  load load_dir.join('rouge/lexers/make.rb')
27
34
  load load_dir.join('rouge/lexers/shell.rb')
@@ -29,6 +36,7 @@ load load_dir.join('rouge/lexers/shell.rb')
29
36
  load load_dir.join('rouge/lexers/javascript.rb')
30
37
  load load_dir.join('rouge/lexers/css.rb')
31
38
  load load_dir.join('rouge/lexers/html.rb')
39
+ load load_dir.join('rouge/lexers/haml.rb')
32
40
  load load_dir.join('rouge/lexers/xml.rb')
33
41
  load load_dir.join('rouge/lexers/php.rb')
34
42
 
@@ -38,6 +46,7 @@ load load_dir.join('rouge/lexers/tcl.rb')
38
46
  load load_dir.join('rouge/lexers/python.rb')
39
47
  load load_dir.join('rouge/lexers/ruby.rb')
40
48
  load load_dir.join('rouge/lexers/perl.rb')
49
+ load load_dir.join('rouge/lexers/factor.rb')
41
50
 
42
51
  load load_dir.join('rouge/lexers/haskell.rb')
43
52
  load load_dir.join('rouge/lexers/scheme.rb')
@@ -4,18 +4,37 @@ require 'strscan'
4
4
  module Rouge
5
5
  class Lexer
6
6
  class << self
7
+ # Lexes `stream` with the given options. The lex is delegated to a
8
+ # new instance.
9
+ #
10
+ # @see #lex
7
11
  def lex(stream, opts={}, &b)
8
12
  new(opts).lex(stream, &b)
9
13
  end
10
14
 
11
- def default_options
15
+ def default_options(o={})
12
16
  @default_options ||= {}
17
+ @default_options.merge!(o)
18
+ @default_options
13
19
  end
14
20
 
21
+ # Given a string, return the correct lexer class.
15
22
  def find(name)
16
23
  registry[name.to_s]
17
24
  end
18
25
 
26
+ # Guess which lexer to use based on a hash of info.
27
+ #
28
+ # @option info :mimetype
29
+ # A mimetype to guess by
30
+ # @option info :filename
31
+ # A filename to guess by
32
+ # @option info :source
33
+ # The source itself, which, if guessing by mimetype or filename
34
+ # fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
35
+ # other hints.
36
+ #
37
+ # @see Lexer.analyze_text
19
38
  def guess(info={})
20
39
  by_mimetype = guess_by_mimetype(info[:mimetype]) if info[:mimetype]
21
40
  return by_mimetype if by_mimetype
@@ -67,6 +86,16 @@ module Rouge
67
86
  registry[name.to_s] = lexer
68
87
  end
69
88
 
89
+ # Used to specify or get the canonical name of this lexer class.
90
+ #
91
+ # @example
92
+ # class MyLexer < Lexer
93
+ # tag 'foo'
94
+ # end
95
+ #
96
+ # MyLexer.tag # => 'foo'
97
+ #
98
+ # Lexer.find('foo') # => MyLexer
70
99
  def tag(t=nil)
71
100
  return @tag if t.nil?
72
101
 
@@ -74,14 +103,35 @@ module Rouge
74
103
  aliases @tag
75
104
  end
76
105
 
106
+ # Used to specify alternate names this lexer class may be found by.
107
+ #
108
+ # @example
109
+ # class Erb < Lexer
110
+ # tag 'erb'
111
+ # aliases 'eruby', 'rhtml'
112
+ # end
113
+ #
114
+ # Lexer.find('eruby') # => Erb
77
115
  def aliases(*args)
78
116
  args.each { |arg| Lexer.register(arg, self) }
79
117
  end
80
118
 
119
+ # Specify a list of filename globs associated with this lexer
120
+ #
121
+ # @example
122
+ # class Ruby < Lexer
123
+ # filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile'
124
+ # end
81
125
  def filenames(*fnames)
82
126
  (@filenames ||= []).concat(fnames)
83
127
  end
84
128
 
129
+ # Specify a list of mimetypes associated with this lexer.
130
+ #
131
+ # @example
132
+ # class Html < Lexer
133
+ # mimetypes 'text/html', 'application/xhtml+xml'
134
+ # end
85
135
  def mimetypes(*mts)
86
136
  (@mimetypes ||= []).concat(mts)
87
137
  end
@@ -94,7 +144,7 @@ module Rouge
94
144
 
95
145
  # -*- instance methods -*- #
96
146
 
97
- def initialize(opts={}, &b)
147
+ def initialize(opts={})
98
148
  options(opts)
99
149
  end
100
150
 
@@ -112,18 +162,28 @@ module Rouge
112
162
  end
113
163
  end
114
164
 
165
+ # Leave a debug message if the `:debug` option is set. The message
166
+ # is given as a block because some debug messages contain calculated
167
+ # information that is unnecessary for lexing in the real world.
168
+ #
169
+ # @example
170
+ # debug { "hello, world!" }
115
171
  def debug(&b)
116
172
  puts(b.call) if option :debug
117
173
  end
118
174
 
119
- def get_tokens(stream)
120
- lex(stream).to_a
121
- end
122
-
175
+ # @abstract
176
+ #
177
+ # Called after each lex is finished. The default implementation
178
+ # is a noop.
123
179
  def reset!
124
- # noop, called after each lex is finished
125
180
  end
126
181
 
182
+ # Given a string, yield [token, chunk] pairs. If no block is given,
183
+ # an enumerator is returned.
184
+ #
185
+ # @option opts :continue
186
+ # Continue the lex from the previous state (i.e. don't call #reset!)
127
187
  def lex(string, opts={}, &b)
128
188
  return enum_for(:lex, string) unless block_given?
129
189
 
@@ -147,280 +207,28 @@ module Rouge
147
207
  b.call(last_token, last_val) if last_token
148
208
  end
149
209
 
210
+ # @abstract
211
+ #
212
+ # Yield [token, chunk] pairs, given a prepared input stream. This
213
+ # must be implemented.
214
+ #
215
+ # @param [StringScanner] stream
216
+ # the stream
150
217
  def stream_tokens(stream, &b)
151
218
  raise 'abstract'
152
219
  end
153
220
 
154
- # return a number between 0 and 1 indicating the
155
- # likelihood that the text given should be lexed
156
- # with this lexer.
221
+ # @abstract
222
+ #
223
+ # return a number between 0 and 1 indicating the likelihood that
224
+ # the text given should be lexed with this lexer. The default
225
+ # implementation returns 0.
226
+ #
227
+ # @param [TextAnalyzer] text
228
+ # the text to be analyzed, with a couple of handy methods on it,
229
+ # like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
157
230
  def self.analyze_text(text)
158
231
  0
159
232
  end
160
233
  end
161
-
162
- class RegexLexer < Lexer
163
- class Rule
164
- attr_reader :callback
165
- attr_reader :next_state
166
- attr_reader :re
167
- def initialize(re, callback, next_state)
168
- @re = re
169
- @callback = callback
170
- @next_state = next_state
171
- end
172
-
173
- def inspect
174
- "#<Rule #{@re.inspect}>"
175
- end
176
-
177
- def consume(stream, &b)
178
- stream.scan(@re)
179
-
180
- if stream.matched?
181
- yield stream
182
- return true
183
- end
184
-
185
- false
186
- end
187
- end
188
-
189
- class State
190
- attr_reader :name
191
- def initialize(lexer_class, name, &defn)
192
- @lexer_class = lexer_class
193
- @name = name
194
- @defn = defn
195
- end
196
-
197
- def relative_state(state_name=nil, &b)
198
- if state_name
199
- @lexer_class.get_state(state_name)
200
- else
201
- State.new(@lexer_class, b.inspect, &b).load!
202
- end
203
- end
204
-
205
- def rules
206
- @rules ||= []
207
- end
208
-
209
- def load!
210
- return self if @loaded
211
- @loaded = true
212
- StateDSL.new(rules).instance_eval(&@defn)
213
- self
214
- end
215
- end
216
-
217
- class StateDSL
218
- attr_reader :rules
219
- def initialize(rules)
220
- @rules = rules
221
- end
222
-
223
- def rule(re, tok=nil, next_state=nil, &callback)
224
- if block_given?
225
- next_state = tok
226
- else
227
- tok = Token[tok]
228
-
229
- callback = proc do
230
- token tok
231
- case next_state
232
- when :pop!
233
- pop!
234
- when Symbol
235
- push next_state
236
- end # else pass
237
- end
238
- end
239
-
240
- rules << Rule.new(re, callback, next_state)
241
- end
242
-
243
- def mixin(lexer_name)
244
- rules << lexer_name.to_s
245
- end
246
- end
247
-
248
- def self.states
249
- @states ||= {}
250
- end
251
-
252
- def self.start_procs
253
- @start_procs ||= []
254
- end
255
-
256
- def self.start(&b)
257
- start_procs << b
258
- end
259
-
260
- def self.state(name, &b)
261
- name = name.to_s
262
- states[name] = State.new(self, name, &b)
263
- end
264
-
265
- def self.get_state(name)
266
- return name if name.is_a? State
267
-
268
- state = states[name.to_s]
269
- raise "unknown state: #{name}" unless state
270
- state.load!
271
- end
272
-
273
- def self.[](name)
274
- get_state(name)
275
- end
276
-
277
- def get_state(name)
278
- self.class.get_state(name)
279
- end
280
-
281
- def stack
282
- @stack ||= [get_state(:root)]
283
- end
284
-
285
- def state
286
- stack.last or raise 'empty stack!'
287
- end
288
-
289
- def reset!
290
- @scan_state = nil
291
-
292
- self.class.start_procs.each do |pr|
293
- instance_eval(&pr)
294
- end
295
- end
296
-
297
- def stream_tokens(stream, &b)
298
- until stream.eos?
299
- debug { "lexer: #{self.class.tag}" }
300
- debug { "stack: #{stack.map(&:name).inspect}" }
301
- debug { "stream: #{stream.peek(20).inspect}" }
302
- success = step(get_state(state), stream, &b)
303
-
304
- if !success
305
- debug { " no match, yielding Error" }
306
- b.call(Token['Error'], stream.getch)
307
- end
308
- end
309
- end
310
-
311
- def step(state, stream, &b)
312
- state.rules.each do |rule|
313
- return true if run_rule(rule, stream, &b)
314
- end
315
-
316
- false
317
- end
318
-
319
- def run_rule(rule, stream, &b)
320
- case rule
321
- when String
322
- debug { " entering mixin #{rule}" }
323
- res = step(get_state(rule), stream, &b)
324
- debug { " exiting mixin #{rule}" }
325
- res
326
- when Rule
327
- debug { " trying #{rule.inspect}" }
328
- scan(stream, rule.re) do
329
- debug { " got #{stream[0].inspect}" }
330
-
331
- run_callback(stream, &rule.callback).each do |tok, res|
332
- debug { " yielding #{tok.to_s.inspect}, #{res.inspect}" }
333
- b.call(Token[tok], res)
334
- end
335
- end
336
- end
337
- end
338
-
339
- def run_callback(stream, &callback)
340
- Enumerator.new do |y|
341
- @output_stream = y
342
- @group_count = 0
343
- @last_matches = stream
344
- instance_exec(stream, &callback)
345
- @last_matches = nil
346
- @output_stream = nil
347
- end
348
- end
349
-
350
- MAX_NULL_STEPS = 5
351
- def scan(scanner, re, &b)
352
- @null_steps ||= 0
353
-
354
- if @null_steps >= MAX_NULL_STEPS
355
- debug { " too many scans without consuming the string!" }
356
- return false
357
- end
358
-
359
- scanner.scan(re)
360
-
361
- if scanner.matched?
362
- if scanner.matched_size == 0
363
- @null_steps += 1
364
- else
365
- @null_steps = 0
366
- end
367
-
368
- yield self
369
- return true
370
- end
371
-
372
- return false
373
- end
374
-
375
- def token(tok, val=:__absent__)
376
- val = @last_matches[0] if val == :__absent__
377
- val ||= ''
378
-
379
- raise 'no output stream' unless @output_stream
380
-
381
- @output_stream << [Token[tok], val]
382
- end
383
-
384
- def group(tok)
385
- token(tok, @last_matches[@group_count += 1])
386
- end
387
-
388
- def delegate(lexer, text=nil)
389
- debug { " delegating to #{lexer.inspect}" }
390
- text ||= @last_matches[0]
391
-
392
- lexer.lex(text, :continue => true) do |tok, val|
393
- debug { " delegated token: #{tok.inspect}, #{val.inspect}" }
394
- token(tok, val)
395
- end
396
- end
397
-
398
- def push(state_name=nil, &b)
399
- # use the top of the stack by default
400
- if state_name || b
401
- push_state = state.relative_state(state_name, &b)
402
- else
403
- push_state = self.state
404
- end
405
-
406
- debug { " pushing #{push_state.name}" }
407
- stack.push(push_state)
408
- end
409
-
410
- def pop!
411
- raise 'empty stack!' if stack.empty?
412
-
413
- debug { " popping stack" }
414
- stack.pop
415
- end
416
-
417
- def in_state?(state_name)
418
- stack.map(&:name).include? state_name.to_s
419
- end
420
-
421
- def state?(state_name)
422
- state_name.to_s == state.name
423
- end
424
-
425
- end
426
234
  end