rugments 1.0.0.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +52 -0
- data/README.md +195 -0
- data/bin/rugmentize +6 -0
- data/lib/rugments/cli.rb +357 -0
- data/lib/rugments/formatter.rb +29 -0
- data/lib/rugments/formatters/html.rb +142 -0
- data/lib/rugments/formatters/null.rb +17 -0
- data/lib/rugments/formatters/terminal256.rb +174 -0
- data/lib/rugments/lexer.rb +431 -0
- data/lib/rugments/lexers/apache/keywords.yml +453 -0
- data/lib/rugments/lexers/apache.rb +67 -0
- data/lib/rugments/lexers/apple_script.rb +366 -0
- data/lib/rugments/lexers/c.rb +210 -0
- data/lib/rugments/lexers/clojure.rb +109 -0
- data/lib/rugments/lexers/coffeescript.rb +172 -0
- data/lib/rugments/lexers/common_lisp.rb +343 -0
- data/lib/rugments/lexers/conf.rb +22 -0
- data/lib/rugments/lexers/cpp.rb +63 -0
- data/lib/rugments/lexers/csharp.rb +85 -0
- data/lib/rugments/lexers/css.rb +269 -0
- data/lib/rugments/lexers/dart.rb +102 -0
- data/lib/rugments/lexers/diff.rb +39 -0
- data/lib/rugments/lexers/elixir.rb +105 -0
- data/lib/rugments/lexers/erb.rb +54 -0
- data/lib/rugments/lexers/erlang.rb +116 -0
- data/lib/rugments/lexers/factor.rb +300 -0
- data/lib/rugments/lexers/gherkin/keywords.rb +13 -0
- data/lib/rugments/lexers/gherkin.rb +135 -0
- data/lib/rugments/lexers/go.rb +176 -0
- data/lib/rugments/lexers/groovy.rb +102 -0
- data/lib/rugments/lexers/haml.rb +226 -0
- data/lib/rugments/lexers/handlebars.rb +77 -0
- data/lib/rugments/lexers/haskell.rb +181 -0
- data/lib/rugments/lexers/html.rb +92 -0
- data/lib/rugments/lexers/http.rb +78 -0
- data/lib/rugments/lexers/ini.rb +55 -0
- data/lib/rugments/lexers/io.rb +66 -0
- data/lib/rugments/lexers/java.rb +74 -0
- data/lib/rugments/lexers/javascript.rb +258 -0
- data/lib/rugments/lexers/literate_coffeescript.rb +31 -0
- data/lib/rugments/lexers/literate_haskell.rb +34 -0
- data/lib/rugments/lexers/llvm.rb +82 -0
- data/lib/rugments/lexers/lua/builtins.rb +21 -0
- data/lib/rugments/lexers/lua.rb +120 -0
- data/lib/rugments/lexers/make.rb +114 -0
- data/lib/rugments/lexers/markdown.rb +151 -0
- data/lib/rugments/lexers/matlab/builtins.rb +10 -0
- data/lib/rugments/lexers/matlab.rb +70 -0
- data/lib/rugments/lexers/moonscript.rb +108 -0
- data/lib/rugments/lexers/nginx.rb +69 -0
- data/lib/rugments/lexers/nim.rb +149 -0
- data/lib/rugments/lexers/objective_c.rb +188 -0
- data/lib/rugments/lexers/ocaml.rb +109 -0
- data/lib/rugments/lexers/perl.rb +195 -0
- data/lib/rugments/lexers/php/builtins.rb +192 -0
- data/lib/rugments/lexers/php.rb +162 -0
- data/lib/rugments/lexers/plain_text.rb +23 -0
- data/lib/rugments/lexers/prolog.rb +62 -0
- data/lib/rugments/lexers/properties.rb +53 -0
- data/lib/rugments/lexers/puppet.rb +126 -0
- data/lib/rugments/lexers/python.rb +225 -0
- data/lib/rugments/lexers/qml.rb +70 -0
- data/lib/rugments/lexers/r.rb +55 -0
- data/lib/rugments/lexers/racket.rb +540 -0
- data/lib/rugments/lexers/ruby.rb +413 -0
- data/lib/rugments/lexers/rust.rb +188 -0
- data/lib/rugments/lexers/sass/common.rb +172 -0
- data/lib/rugments/lexers/sass.rb +72 -0
- data/lib/rugments/lexers/scala.rb +140 -0
- data/lib/rugments/lexers/scheme.rb +109 -0
- data/lib/rugments/lexers/scss.rb +32 -0
- data/lib/rugments/lexers/sed.rb +167 -0
- data/lib/rugments/lexers/shell.rb +150 -0
- data/lib/rugments/lexers/slim.rb +222 -0
- data/lib/rugments/lexers/smalltalk.rb +114 -0
- data/lib/rugments/lexers/sml.rb +345 -0
- data/lib/rugments/lexers/sql.rb +138 -0
- data/lib/rugments/lexers/swift.rb +153 -0
- data/lib/rugments/lexers/tcl.rb +189 -0
- data/lib/rugments/lexers/tex.rb +70 -0
- data/lib/rugments/lexers/toml.rb +68 -0
- data/lib/rugments/lexers/vb.rb +162 -0
- data/lib/rugments/lexers/viml/keywords.rb +11 -0
- data/lib/rugments/lexers/viml.rb +99 -0
- data/lib/rugments/lexers/xml.rb +57 -0
- data/lib/rugments/lexers/yaml.rb +362 -0
- data/lib/rugments/plugins/redcarpet.rb +28 -0
- data/lib/rugments/regex_lexer.rb +432 -0
- data/lib/rugments/template_lexer.rb +23 -0
- data/lib/rugments/text_analyzer.rb +46 -0
- data/lib/rugments/theme.rb +202 -0
- data/lib/rugments/themes/base16.rb +128 -0
- data/lib/rugments/themes/colorful.rb +65 -0
- data/lib/rugments/themes/github.rb +69 -0
- data/lib/rugments/themes/monokai.rb +88 -0
- data/lib/rugments/themes/monokai_sublime.rb +89 -0
- data/lib/rugments/themes/thankful_eyes.rb +69 -0
- data/lib/rugments/token.rb +180 -0
- data/lib/rugments/util.rb +99 -0
- data/lib/rugments/version.rb +3 -0
- data/lib/rugments.rb +33 -0
- metadata +149 -0
@@ -0,0 +1,431 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
require 'cgi'
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
|
6
|
+
module Rugments
|
7
|
+
# @abstract
|
8
|
+
# A lexer transforms text into a stream of `[token, chunk]` pairs.
|
9
|
+
class Lexer
|
10
|
+
include Token::Tokens
|
11
|
+
|
12
|
+
class << self
|
13
|
+
# Lexes `stream` with the given options. The lex is delegated to a
|
14
|
+
# new instance.
|
15
|
+
#
|
16
|
+
# @see #lex
|
17
|
+
def lex(stream, opts = {}, &b)
|
18
|
+
new(opts).lex(stream, &b)
|
19
|
+
end
|
20
|
+
|
21
|
+
def default_options(o = {})
|
22
|
+
@default_options ||= {}
|
23
|
+
@default_options.merge!(o)
|
24
|
+
@default_options
|
25
|
+
end
|
26
|
+
|
27
|
+
# Given a string, return the correct lexer class.
|
28
|
+
def find(name)
|
29
|
+
registry[name.to_s]
|
30
|
+
end
|
31
|
+
|
32
|
+
# Find a lexer, with fancy shiny features.
|
33
|
+
#
|
34
|
+
# * The string you pass can include CGI-style options
|
35
|
+
#
|
36
|
+
# Lexer.find_fancy('erb?parent=tex')
|
37
|
+
#
|
38
|
+
# * You can pass the special name 'guess' so we guess for you,
|
39
|
+
# and you can pass a second argument of the code to guess by
|
40
|
+
#
|
41
|
+
# Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world")
|
42
|
+
#
|
43
|
+
# This is used in the Redcarpet plugin as well as Rouge's own
|
44
|
+
# markdown lexer for highlighting internal code blocks.
|
45
|
+
#
|
46
|
+
def find_fancy(str, code = nil)
|
47
|
+
name, opts = str ? str.split('?', 2) : [nil, '']
|
48
|
+
|
49
|
+
# parse the options hash from a cgi-style string
|
50
|
+
opts = CGI.parse(opts || '').map do |k, vals|
|
51
|
+
[k.to_sym, vals.empty? ? true : vals[0]]
|
52
|
+
end
|
53
|
+
|
54
|
+
opts = Hash[opts]
|
55
|
+
|
56
|
+
lexer_class = case name
|
57
|
+
when 'guess', nil
|
58
|
+
guess(source: code, mimetype: opts[:mimetype])
|
59
|
+
when String
|
60
|
+
find(name)
|
61
|
+
end
|
62
|
+
|
63
|
+
lexer_class && lexer_class.new(opts)
|
64
|
+
end
|
65
|
+
|
66
|
+
# Specify or get this lexer's title. Meant to be human-readable.
|
67
|
+
def title(t=nil)
|
68
|
+
if t.nil?
|
69
|
+
t = tag.capitalize
|
70
|
+
end
|
71
|
+
@title ||= t
|
72
|
+
end
|
73
|
+
|
74
|
+
# Specify or get this lexer's description.
|
75
|
+
def desc(arg = :absent)
|
76
|
+
if arg == :absent
|
77
|
+
@desc
|
78
|
+
else
|
79
|
+
@desc = arg
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
# Specify or get the path name containing a small demo for
|
84
|
+
# this lexer (can be overriden by {demo}).
|
85
|
+
def demo_file(arg = :absent)
|
86
|
+
return @demo_file = Pathname.new(arg) unless arg == :absent
|
87
|
+
|
88
|
+
@demo_file = Pathname.new(__FILE__).dirname.join('demos', tag)
|
89
|
+
end
|
90
|
+
|
91
|
+
# Specify or get a small demo string for this lexer
|
92
|
+
def demo(arg = :absent)
|
93
|
+
return @demo = arg unless arg == :absent
|
94
|
+
|
95
|
+
@demo = File.read(demo_file, encoding: 'utf-8')
|
96
|
+
end
|
97
|
+
|
98
|
+
# @return a list of all lexers.
|
99
|
+
def all
|
100
|
+
registry.values.uniq
|
101
|
+
end
|
102
|
+
|
103
|
+
# Guess which lexer to use based on a hash of info.
|
104
|
+
#
|
105
|
+
# This accepts the same arguments as Lexer.guess, but will never throw
|
106
|
+
# an error. It will return a (possibly empty) list of potential lexers
|
107
|
+
# to use.
|
108
|
+
def guesses(info = {})
|
109
|
+
mimetype, filename, source = info.values_at(:mimetype, :filename, :source)
|
110
|
+
lexers = registry.values.uniq
|
111
|
+
total_size = lexers.size
|
112
|
+
|
113
|
+
lexers = filter_by_mimetype(lexers, mimetype) if mimetype
|
114
|
+
return lexers if lexers.size == 1
|
115
|
+
|
116
|
+
lexers = filter_by_filename(lexers, filename) if filename
|
117
|
+
return lexers if lexers.size == 1
|
118
|
+
|
119
|
+
if source
|
120
|
+
# If we're filtering against *all* lexers, we only use confident return
|
121
|
+
# values from analyze_text. But if we've filtered down already, we can trust
|
122
|
+
# the analysis more.
|
123
|
+
source_threshold = lexers.size < total_size ? 0 : 0.5
|
124
|
+
return [best_by_source(lexers, source, source_threshold)].compact
|
125
|
+
end
|
126
|
+
|
127
|
+
[]
|
128
|
+
end
|
129
|
+
|
130
|
+
class AmbiguousGuess < StandardError
|
131
|
+
attr_reader :alternatives
|
132
|
+
|
133
|
+
def initialize(alternatives)
|
134
|
+
@alternatives = alternatives
|
135
|
+
end
|
136
|
+
|
137
|
+
def message
|
138
|
+
"Ambiguous guess: can't decide between #{alternatives.map(&:tag).inspect}"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
# Guess which lexer to use based on a hash of info.
|
143
|
+
#
|
144
|
+
# @option info :mimetype
|
145
|
+
# A mimetype to guess by
|
146
|
+
# @option info :filename
|
147
|
+
# A filename to guess by
|
148
|
+
# @option info :source
|
149
|
+
# The source itself, which, if guessing by mimetype or filename
|
150
|
+
# fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
|
151
|
+
# other hints.
|
152
|
+
#
|
153
|
+
# @see Lexer.analyze_text
|
154
|
+
# @see Lexer.multi_guess
|
155
|
+
def guess(info = {})
|
156
|
+
lexers = guesses(info)
|
157
|
+
|
158
|
+
return Lexers::PlainText if lexers.empty?
|
159
|
+
return lexers[0] if lexers.size == 1
|
160
|
+
|
161
|
+
fail AmbiguousGuess.new(lexers)
|
162
|
+
end
|
163
|
+
|
164
|
+
def guess_by_mimetype(mt)
|
165
|
+
guess mimetype: mt
|
166
|
+
end
|
167
|
+
|
168
|
+
def guess_by_filename(fname)
|
169
|
+
guess filename: fname
|
170
|
+
end
|
171
|
+
|
172
|
+
def guess_by_source(source)
|
173
|
+
guess source: source
|
174
|
+
end
|
175
|
+
|
176
|
+
private
|
177
|
+
|
178
|
+
def filter_by_mimetype(lexers, mt)
|
179
|
+
filtered = lexers.select { |lexer| lexer.mimetypes.include? mt }
|
180
|
+
filtered.any? ? filtered : lexers
|
181
|
+
end
|
182
|
+
|
183
|
+
# returns a list of lexers that match the given filename with
|
184
|
+
# equal specificity (i.e. number of wildcards in the pattern).
|
185
|
+
# This helps disambiguate between, e.g. the Nginx lexer, which
|
186
|
+
# matches `nginx.conf`, and the Conf lexer, which matches `*.conf`.
|
187
|
+
# In this case, nginx will win because the pattern has no wildcards,
|
188
|
+
# while `*.conf` has one.
|
189
|
+
def filter_by_filename(lexers, fname)
|
190
|
+
fname = File.basename(fname)
|
191
|
+
|
192
|
+
out = []
|
193
|
+
best_seen = nil
|
194
|
+
lexers.each do |lexer|
|
195
|
+
score = lexer.filenames.map do |pattern|
|
196
|
+
if File.fnmatch?(pattern, fname, File::FNM_DOTMATCH)
|
197
|
+
# specificity is better the fewer wildcards there are
|
198
|
+
pattern.scan(/[*?\[]/).size
|
199
|
+
end
|
200
|
+
end.compact.min
|
201
|
+
|
202
|
+
next unless score
|
203
|
+
|
204
|
+
if best_seen.nil? || score < best_seen
|
205
|
+
best_seen = score
|
206
|
+
out = [lexer]
|
207
|
+
elsif score == best_seen
|
208
|
+
out << lexer
|
209
|
+
end
|
210
|
+
end
|
211
|
+
|
212
|
+
out.any? ? out : lexers
|
213
|
+
end
|
214
|
+
|
215
|
+
def best_by_source(lexers, source, threshold = 0)
|
216
|
+
source = case source
|
217
|
+
when String
|
218
|
+
source
|
219
|
+
when ->(s) { s.respond_to? :read }
|
220
|
+
source.read
|
221
|
+
else
|
222
|
+
fail 'invalid source'
|
223
|
+
end
|
224
|
+
|
225
|
+
assert_utf8!(source)
|
226
|
+
|
227
|
+
source = TextAnalyzer.new(source)
|
228
|
+
|
229
|
+
best_result = threshold
|
230
|
+
best_match = nil
|
231
|
+
lexers.each do |lexer|
|
232
|
+
result = lexer.analyze_text(source) || 0
|
233
|
+
return lexer if result == 1
|
234
|
+
|
235
|
+
if result > best_result
|
236
|
+
best_match = lexer
|
237
|
+
best_result = result
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
best_match
|
242
|
+
end
|
243
|
+
|
244
|
+
protected
|
245
|
+
|
246
|
+
# @private
|
247
|
+
def register(name, lexer)
|
248
|
+
registry[name.to_s] = lexer
|
249
|
+
end
|
250
|
+
|
251
|
+
public
|
252
|
+
|
253
|
+
# Used to specify or get the canonical name of this lexer class.
|
254
|
+
#
|
255
|
+
# @example
|
256
|
+
# class MyLexer < Lexer
|
257
|
+
# tag 'foo'
|
258
|
+
# end
|
259
|
+
#
|
260
|
+
# MyLexer.tag # => 'foo'
|
261
|
+
#
|
262
|
+
# Lexer.find('foo') # => MyLexer
|
263
|
+
def tag(t = nil)
|
264
|
+
return @tag if t.nil?
|
265
|
+
|
266
|
+
@tag = t.to_s
|
267
|
+
Lexer.register(@tag, self)
|
268
|
+
end
|
269
|
+
|
270
|
+
# Used to specify alternate names this lexer class may be found by.
|
271
|
+
#
|
272
|
+
# @example
|
273
|
+
# class Erb < Lexer
|
274
|
+
# tag 'erb'
|
275
|
+
# aliases 'eruby', 'rhtml'
|
276
|
+
# end
|
277
|
+
#
|
278
|
+
# Lexer.find('eruby') # => Erb
|
279
|
+
def aliases(*args)
|
280
|
+
args.map!(&:to_s)
|
281
|
+
args.each { |arg| Lexer.register(arg, self) }
|
282
|
+
(@aliases ||= []).concat(args)
|
283
|
+
end
|
284
|
+
|
285
|
+
# Specify a list of filename globs associated with this lexer.
|
286
|
+
#
|
287
|
+
# @example
|
288
|
+
# class Ruby < Lexer
|
289
|
+
# filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile'
|
290
|
+
# end
|
291
|
+
def filenames(*fnames)
|
292
|
+
(@filenames ||= []).concat(fnames)
|
293
|
+
end
|
294
|
+
|
295
|
+
# Specify a list of mimetypes associated with this lexer.
|
296
|
+
#
|
297
|
+
# @example
|
298
|
+
# class Html < Lexer
|
299
|
+
# mimetypes 'text/html', 'application/xhtml+xml'
|
300
|
+
# end
|
301
|
+
def mimetypes(*mts)
|
302
|
+
(@mimetypes ||= []).concat(mts)
|
303
|
+
end
|
304
|
+
|
305
|
+
# @private
|
306
|
+
def assert_utf8!(str)
|
307
|
+
return if %w(US-ASCII UTF-8 ASCII-8BIT).include? str.encoding.name
|
308
|
+
fail EncodingError.new(
|
309
|
+
"Bad encoding: #{str.encoding.names.join(',')}. " \
|
310
|
+
'Please convert your string to UTF-8.'
|
311
|
+
)
|
312
|
+
end
|
313
|
+
|
314
|
+
private
|
315
|
+
|
316
|
+
def registry
|
317
|
+
@registry ||= {}
|
318
|
+
end
|
319
|
+
end
|
320
|
+
|
321
|
+
# -*- instance methods -*- #
|
322
|
+
|
323
|
+
# Create a new lexer with the given options. Individual lexers may
|
324
|
+
# specify extra options. The only current globally accepted option
|
325
|
+
# is `:debug`.
|
326
|
+
#
|
327
|
+
# @option opts :debug
|
328
|
+
# Prints debug information to stdout. The particular info depends
|
329
|
+
# on the lexer in question. In regex lexers, this will log the
|
330
|
+
# state stack at the beginning of each step, along with each regex
|
331
|
+
# tried and each stream consumed. Try it, it's pretty useful.
|
332
|
+
def initialize(opts = {})
|
333
|
+
options(opts)
|
334
|
+
|
335
|
+
@debug = option(:debug)
|
336
|
+
end
|
337
|
+
|
338
|
+
# get and/or specify the options for this lexer.
|
339
|
+
def options(o = {})
|
340
|
+
(@options ||= {}).merge!(o)
|
341
|
+
|
342
|
+
self.class.default_options.merge(@options)
|
343
|
+
end
|
344
|
+
|
345
|
+
# get or specify one option for this lexer
|
346
|
+
def option(k, v = :absent)
|
347
|
+
if v == :absent
|
348
|
+
options[k]
|
349
|
+
else
|
350
|
+
options(k => v)
|
351
|
+
end
|
352
|
+
end
|
353
|
+
|
354
|
+
# @abstract
|
355
|
+
#
|
356
|
+
# Called after each lex is finished. The default implementation
|
357
|
+
# is a noop.
|
358
|
+
def reset!
|
359
|
+
end
|
360
|
+
|
361
|
+
# Given a string, yield [token, chunk] pairs. If no block is given,
|
362
|
+
# an enumerator is returned.
|
363
|
+
#
|
364
|
+
# @option opts :continue
|
365
|
+
# Continue the lex from the previous state (i.e. don't call #reset!)
|
366
|
+
def lex(string, opts = {}, &b)
|
367
|
+
return enum_for(:lex, string) unless block_given?
|
368
|
+
|
369
|
+
Lexer.assert_utf8!(string)
|
370
|
+
|
371
|
+
reset! unless opts[:continue]
|
372
|
+
|
373
|
+
# consolidate consecutive tokens of the same type
|
374
|
+
last_token = nil
|
375
|
+
last_val = nil
|
376
|
+
stream_tokens(string) do |tok, val|
|
377
|
+
next if val.empty?
|
378
|
+
|
379
|
+
if tok == last_token
|
380
|
+
last_val << val
|
381
|
+
next
|
382
|
+
end
|
383
|
+
|
384
|
+
b.call(last_token, last_val) if last_token
|
385
|
+
last_token = tok
|
386
|
+
last_val = val
|
387
|
+
end
|
388
|
+
|
389
|
+
b.call(last_token, last_val) if last_token
|
390
|
+
end
|
391
|
+
|
392
|
+
# delegated to {Lexer.tag}
|
393
|
+
def tag
|
394
|
+
self.class.tag
|
395
|
+
end
|
396
|
+
|
397
|
+
# @abstract
|
398
|
+
#
|
399
|
+
# Yield `[token, chunk]` pairs, given a prepared input stream. This
|
400
|
+
# must be implemented.
|
401
|
+
#
|
402
|
+
# @param [StringScanner] stream
|
403
|
+
# the stream
|
404
|
+
def stream_tokens(_stream, &_b)
|
405
|
+
fail 'abstract'
|
406
|
+
end
|
407
|
+
|
408
|
+
# @abstract
|
409
|
+
#
|
410
|
+
# Return a number between 0 and 1 indicating the likelihood that
|
411
|
+
# the text given should be lexed with this lexer. The default
|
412
|
+
# implementation returns 0. Values under 0.5 will only be used
|
413
|
+
# to disambiguate filename or mimetype matches.
|
414
|
+
#
|
415
|
+
# @param [TextAnalyzer] text
|
416
|
+
# the text to be analyzed, with a couple of handy methods on it,
|
417
|
+
# like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
|
418
|
+
def self.analyze_text(_text)
|
419
|
+
0
|
420
|
+
end
|
421
|
+
end
|
422
|
+
|
423
|
+
module Lexers
|
424
|
+
def self.load_const(const_name, relpath)
|
425
|
+
return if const_defined?(const_name)
|
426
|
+
|
427
|
+
root = Pathname.new(__FILE__).dirname.join('lexers')
|
428
|
+
load root.join(relpath)
|
429
|
+
end
|
430
|
+
end
|
431
|
+
end
|