rugments 1.0.0.beta1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (103) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +52 -0
  3. data/README.md +195 -0
  4. data/bin/rugmentize +6 -0
  5. data/lib/rugments/cli.rb +357 -0
  6. data/lib/rugments/formatter.rb +29 -0
  7. data/lib/rugments/formatters/html.rb +142 -0
  8. data/lib/rugments/formatters/null.rb +17 -0
  9. data/lib/rugments/formatters/terminal256.rb +174 -0
  10. data/lib/rugments/lexer.rb +431 -0
  11. data/lib/rugments/lexers/apache/keywords.yml +453 -0
  12. data/lib/rugments/lexers/apache.rb +67 -0
  13. data/lib/rugments/lexers/apple_script.rb +366 -0
  14. data/lib/rugments/lexers/c.rb +210 -0
  15. data/lib/rugments/lexers/clojure.rb +109 -0
  16. data/lib/rugments/lexers/coffeescript.rb +172 -0
  17. data/lib/rugments/lexers/common_lisp.rb +343 -0
  18. data/lib/rugments/lexers/conf.rb +22 -0
  19. data/lib/rugments/lexers/cpp.rb +63 -0
  20. data/lib/rugments/lexers/csharp.rb +85 -0
  21. data/lib/rugments/lexers/css.rb +269 -0
  22. data/lib/rugments/lexers/dart.rb +102 -0
  23. data/lib/rugments/lexers/diff.rb +39 -0
  24. data/lib/rugments/lexers/elixir.rb +105 -0
  25. data/lib/rugments/lexers/erb.rb +54 -0
  26. data/lib/rugments/lexers/erlang.rb +116 -0
  27. data/lib/rugments/lexers/factor.rb +300 -0
  28. data/lib/rugments/lexers/gherkin/keywords.rb +13 -0
  29. data/lib/rugments/lexers/gherkin.rb +135 -0
  30. data/lib/rugments/lexers/go.rb +176 -0
  31. data/lib/rugments/lexers/groovy.rb +102 -0
  32. data/lib/rugments/lexers/haml.rb +226 -0
  33. data/lib/rugments/lexers/handlebars.rb +77 -0
  34. data/lib/rugments/lexers/haskell.rb +181 -0
  35. data/lib/rugments/lexers/html.rb +92 -0
  36. data/lib/rugments/lexers/http.rb +78 -0
  37. data/lib/rugments/lexers/ini.rb +55 -0
  38. data/lib/rugments/lexers/io.rb +66 -0
  39. data/lib/rugments/lexers/java.rb +74 -0
  40. data/lib/rugments/lexers/javascript.rb +258 -0
  41. data/lib/rugments/lexers/literate_coffeescript.rb +31 -0
  42. data/lib/rugments/lexers/literate_haskell.rb +34 -0
  43. data/lib/rugments/lexers/llvm.rb +82 -0
  44. data/lib/rugments/lexers/lua/builtins.rb +21 -0
  45. data/lib/rugments/lexers/lua.rb +120 -0
  46. data/lib/rugments/lexers/make.rb +114 -0
  47. data/lib/rugments/lexers/markdown.rb +151 -0
  48. data/lib/rugments/lexers/matlab/builtins.rb +10 -0
  49. data/lib/rugments/lexers/matlab.rb +70 -0
  50. data/lib/rugments/lexers/moonscript.rb +108 -0
  51. data/lib/rugments/lexers/nginx.rb +69 -0
  52. data/lib/rugments/lexers/nim.rb +149 -0
  53. data/lib/rugments/lexers/objective_c.rb +188 -0
  54. data/lib/rugments/lexers/ocaml.rb +109 -0
  55. data/lib/rugments/lexers/perl.rb +195 -0
  56. data/lib/rugments/lexers/php/builtins.rb +192 -0
  57. data/lib/rugments/lexers/php.rb +162 -0
  58. data/lib/rugments/lexers/plain_text.rb +23 -0
  59. data/lib/rugments/lexers/prolog.rb +62 -0
  60. data/lib/rugments/lexers/properties.rb +53 -0
  61. data/lib/rugments/lexers/puppet.rb +126 -0
  62. data/lib/rugments/lexers/python.rb +225 -0
  63. data/lib/rugments/lexers/qml.rb +70 -0
  64. data/lib/rugments/lexers/r.rb +55 -0
  65. data/lib/rugments/lexers/racket.rb +540 -0
  66. data/lib/rugments/lexers/ruby.rb +413 -0
  67. data/lib/rugments/lexers/rust.rb +188 -0
  68. data/lib/rugments/lexers/sass/common.rb +172 -0
  69. data/lib/rugments/lexers/sass.rb +72 -0
  70. data/lib/rugments/lexers/scala.rb +140 -0
  71. data/lib/rugments/lexers/scheme.rb +109 -0
  72. data/lib/rugments/lexers/scss.rb +32 -0
  73. data/lib/rugments/lexers/sed.rb +167 -0
  74. data/lib/rugments/lexers/shell.rb +150 -0
  75. data/lib/rugments/lexers/slim.rb +222 -0
  76. data/lib/rugments/lexers/smalltalk.rb +114 -0
  77. data/lib/rugments/lexers/sml.rb +345 -0
  78. data/lib/rugments/lexers/sql.rb +138 -0
  79. data/lib/rugments/lexers/swift.rb +153 -0
  80. data/lib/rugments/lexers/tcl.rb +189 -0
  81. data/lib/rugments/lexers/tex.rb +70 -0
  82. data/lib/rugments/lexers/toml.rb +68 -0
  83. data/lib/rugments/lexers/vb.rb +162 -0
  84. data/lib/rugments/lexers/viml/keywords.rb +11 -0
  85. data/lib/rugments/lexers/viml.rb +99 -0
  86. data/lib/rugments/lexers/xml.rb +57 -0
  87. data/lib/rugments/lexers/yaml.rb +362 -0
  88. data/lib/rugments/plugins/redcarpet.rb +28 -0
  89. data/lib/rugments/regex_lexer.rb +432 -0
  90. data/lib/rugments/template_lexer.rb +23 -0
  91. data/lib/rugments/text_analyzer.rb +46 -0
  92. data/lib/rugments/theme.rb +202 -0
  93. data/lib/rugments/themes/base16.rb +128 -0
  94. data/lib/rugments/themes/colorful.rb +65 -0
  95. data/lib/rugments/themes/github.rb +69 -0
  96. data/lib/rugments/themes/monokai.rb +88 -0
  97. data/lib/rugments/themes/monokai_sublime.rb +89 -0
  98. data/lib/rugments/themes/thankful_eyes.rb +69 -0
  99. data/lib/rugments/token.rb +180 -0
  100. data/lib/rugments/util.rb +99 -0
  101. data/lib/rugments/version.rb +3 -0
  102. data/lib/rugments.rb +33 -0
  103. metadata +149 -0
@@ -0,0 +1,431 @@
1
+ require 'strscan'
2
+ require 'cgi'
3
+ require 'set'
4
+
5
+
6
+ module Rugments
7
+ # @abstract
8
+ # A lexer transforms text into a stream of `[token, chunk]` pairs.
9
+ class Lexer
10
+ include Token::Tokens
11
+
12
+ class << self
13
+ # Lexes `stream` with the given options. The lex is delegated to a
14
+ # new instance.
15
+ #
16
+ # @see #lex
17
+ def lex(stream, opts = {}, &b)
18
+ new(opts).lex(stream, &b)
19
+ end
20
+
21
+ def default_options(o = {})
22
+ @default_options ||= {}
23
+ @default_options.merge!(o)
24
+ @default_options
25
+ end
26
+
27
+ # Given a string, return the correct lexer class.
28
+ def find(name)
29
+ registry[name.to_s]
30
+ end
31
+
32
+ # Find a lexer, with fancy shiny features.
33
+ #
34
+ # * The string you pass can include CGI-style options
35
+ #
36
+ # Lexer.find_fancy('erb?parent=tex')
37
+ #
38
+ # * You can pass the special name 'guess' so we guess for you,
39
+ # and you can pass a second argument of the code to guess by
40
+ #
41
+ # Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world")
42
+ #
43
+ # This is used in the Redcarpet plugin as well as Rouge's own
44
+ # markdown lexer for highlighting internal code blocks.
45
+ #
46
+ def find_fancy(str, code = nil)
47
+ name, opts = str ? str.split('?', 2) : [nil, '']
48
+
49
+ # parse the options hash from a cgi-style string
50
+ opts = CGI.parse(opts || '').map do |k, vals|
51
+ [k.to_sym, vals.empty? ? true : vals[0]]
52
+ end
53
+
54
+ opts = Hash[opts]
55
+
56
+ lexer_class = case name
57
+ when 'guess', nil
58
+ guess(source: code, mimetype: opts[:mimetype])
59
+ when String
60
+ find(name)
61
+ end
62
+
63
+ lexer_class && lexer_class.new(opts)
64
+ end
65
+
66
+ # Specify or get this lexer's title. Meant to be human-readable.
67
+ def title(t=nil)
68
+ if t.nil?
69
+ t = tag.capitalize
70
+ end
71
+ @title ||= t
72
+ end
73
+
74
+ # Specify or get this lexer's description.
75
+ def desc(arg = :absent)
76
+ if arg == :absent
77
+ @desc
78
+ else
79
+ @desc = arg
80
+ end
81
+ end
82
+
83
+ # Specify or get the path name containing a small demo for
84
+ # this lexer (can be overriden by {demo}).
85
+ def demo_file(arg = :absent)
86
+ return @demo_file = Pathname.new(arg) unless arg == :absent
87
+
88
+ @demo_file = Pathname.new(__FILE__).dirname.join('demos', tag)
89
+ end
90
+
91
+ # Specify or get a small demo string for this lexer
92
+ def demo(arg = :absent)
93
+ return @demo = arg unless arg == :absent
94
+
95
+ @demo = File.read(demo_file, encoding: 'utf-8')
96
+ end
97
+
98
+ # @return a list of all lexers.
99
+ def all
100
+ registry.values.uniq
101
+ end
102
+
103
+ # Guess which lexer to use based on a hash of info.
104
+ #
105
+ # This accepts the same arguments as Lexer.guess, but will never throw
106
+ # an error. It will return a (possibly empty) list of potential lexers
107
+ # to use.
108
+ def guesses(info = {})
109
+ mimetype, filename, source = info.values_at(:mimetype, :filename, :source)
110
+ lexers = registry.values.uniq
111
+ total_size = lexers.size
112
+
113
+ lexers = filter_by_mimetype(lexers, mimetype) if mimetype
114
+ return lexers if lexers.size == 1
115
+
116
+ lexers = filter_by_filename(lexers, filename) if filename
117
+ return lexers if lexers.size == 1
118
+
119
+ if source
120
+ # If we're filtering against *all* lexers, we only use confident return
121
+ # values from analyze_text. But if we've filtered down already, we can trust
122
+ # the analysis more.
123
+ source_threshold = lexers.size < total_size ? 0 : 0.5
124
+ return [best_by_source(lexers, source, source_threshold)].compact
125
+ end
126
+
127
+ []
128
+ end
129
+
130
+ class AmbiguousGuess < StandardError
131
+ attr_reader :alternatives
132
+
133
+ def initialize(alternatives)
134
+ @alternatives = alternatives
135
+ end
136
+
137
+ def message
138
+ "Ambiguous guess: can't decide between #{alternatives.map(&:tag).inspect}"
139
+ end
140
+ end
141
+
142
+ # Guess which lexer to use based on a hash of info.
143
+ #
144
+ # @option info :mimetype
145
+ # A mimetype to guess by
146
+ # @option info :filename
147
+ # A filename to guess by
148
+ # @option info :source
149
+ # The source itself, which, if guessing by mimetype or filename
150
+ # fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
151
+ # other hints.
152
+ #
153
+ # @see Lexer.analyze_text
154
+ # @see Lexer.multi_guess
155
+ def guess(info = {})
156
+ lexers = guesses(info)
157
+
158
+ return Lexers::PlainText if lexers.empty?
159
+ return lexers[0] if lexers.size == 1
160
+
161
+ fail AmbiguousGuess.new(lexers)
162
+ end
163
+
164
+ def guess_by_mimetype(mt)
165
+ guess mimetype: mt
166
+ end
167
+
168
+ def guess_by_filename(fname)
169
+ guess filename: fname
170
+ end
171
+
172
+ def guess_by_source(source)
173
+ guess source: source
174
+ end
175
+
176
+ private
177
+
178
+ def filter_by_mimetype(lexers, mt)
179
+ filtered = lexers.select { |lexer| lexer.mimetypes.include? mt }
180
+ filtered.any? ? filtered : lexers
181
+ end
182
+
183
+ # returns a list of lexers that match the given filename with
184
+ # equal specificity (i.e. number of wildcards in the pattern).
185
+ # This helps disambiguate between, e.g. the Nginx lexer, which
186
+ # matches `nginx.conf`, and the Conf lexer, which matches `*.conf`.
187
+ # In this case, nginx will win because the pattern has no wildcards,
188
+ # while `*.conf` has one.
189
+ def filter_by_filename(lexers, fname)
190
+ fname = File.basename(fname)
191
+
192
+ out = []
193
+ best_seen = nil
194
+ lexers.each do |lexer|
195
+ score = lexer.filenames.map do |pattern|
196
+ if File.fnmatch?(pattern, fname, File::FNM_DOTMATCH)
197
+ # specificity is better the fewer wildcards there are
198
+ pattern.scan(/[*?\[]/).size
199
+ end
200
+ end.compact.min
201
+
202
+ next unless score
203
+
204
+ if best_seen.nil? || score < best_seen
205
+ best_seen = score
206
+ out = [lexer]
207
+ elsif score == best_seen
208
+ out << lexer
209
+ end
210
+ end
211
+
212
+ out.any? ? out : lexers
213
+ end
214
+
215
+ def best_by_source(lexers, source, threshold = 0)
216
+ source = case source
217
+ when String
218
+ source
219
+ when ->(s) { s.respond_to? :read }
220
+ source.read
221
+ else
222
+ fail 'invalid source'
223
+ end
224
+
225
+ assert_utf8!(source)
226
+
227
+ source = TextAnalyzer.new(source)
228
+
229
+ best_result = threshold
230
+ best_match = nil
231
+ lexers.each do |lexer|
232
+ result = lexer.analyze_text(source) || 0
233
+ return lexer if result == 1
234
+
235
+ if result > best_result
236
+ best_match = lexer
237
+ best_result = result
238
+ end
239
+ end
240
+
241
+ best_match
242
+ end
243
+
244
+ protected
245
+
246
+ # @private
247
+ def register(name, lexer)
248
+ registry[name.to_s] = lexer
249
+ end
250
+
251
+ public
252
+
253
+ # Used to specify or get the canonical name of this lexer class.
254
+ #
255
+ # @example
256
+ # class MyLexer < Lexer
257
+ # tag 'foo'
258
+ # end
259
+ #
260
+ # MyLexer.tag # => 'foo'
261
+ #
262
+ # Lexer.find('foo') # => MyLexer
263
+ def tag(t = nil)
264
+ return @tag if t.nil?
265
+
266
+ @tag = t.to_s
267
+ Lexer.register(@tag, self)
268
+ end
269
+
270
+ # Used to specify alternate names this lexer class may be found by.
271
+ #
272
+ # @example
273
+ # class Erb < Lexer
274
+ # tag 'erb'
275
+ # aliases 'eruby', 'rhtml'
276
+ # end
277
+ #
278
+ # Lexer.find('eruby') # => Erb
279
+ def aliases(*args)
280
+ args.map!(&:to_s)
281
+ args.each { |arg| Lexer.register(arg, self) }
282
+ (@aliases ||= []).concat(args)
283
+ end
284
+
285
+ # Specify a list of filename globs associated with this lexer.
286
+ #
287
+ # @example
288
+ # class Ruby < Lexer
289
+ # filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile'
290
+ # end
291
+ def filenames(*fnames)
292
+ (@filenames ||= []).concat(fnames)
293
+ end
294
+
295
+ # Specify a list of mimetypes associated with this lexer.
296
+ #
297
+ # @example
298
+ # class Html < Lexer
299
+ # mimetypes 'text/html', 'application/xhtml+xml'
300
+ # end
301
+ def mimetypes(*mts)
302
+ (@mimetypes ||= []).concat(mts)
303
+ end
304
+
305
+ # @private
306
+ def assert_utf8!(str)
307
+ return if %w(US-ASCII UTF-8 ASCII-8BIT).include? str.encoding.name
308
+ fail EncodingError.new(
309
+ "Bad encoding: #{str.encoding.names.join(',')}. " \
310
+ 'Please convert your string to UTF-8.'
311
+ )
312
+ end
313
+
314
+ private
315
+
316
+ def registry
317
+ @registry ||= {}
318
+ end
319
+ end
320
+
321
+ # -*- instance methods -*- #
322
+
323
+ # Create a new lexer with the given options. Individual lexers may
324
+ # specify extra options. The only current globally accepted option
325
+ # is `:debug`.
326
+ #
327
+ # @option opts :debug
328
+ # Prints debug information to stdout. The particular info depends
329
+ # on the lexer in question. In regex lexers, this will log the
330
+ # state stack at the beginning of each step, along with each regex
331
+ # tried and each stream consumed. Try it, it's pretty useful.
332
+ def initialize(opts = {})
333
+ options(opts)
334
+
335
+ @debug = option(:debug)
336
+ end
337
+
338
+ # get and/or specify the options for this lexer.
339
+ def options(o = {})
340
+ (@options ||= {}).merge!(o)
341
+
342
+ self.class.default_options.merge(@options)
343
+ end
344
+
345
+ # get or specify one option for this lexer
346
+ def option(k, v = :absent)
347
+ if v == :absent
348
+ options[k]
349
+ else
350
+ options(k => v)
351
+ end
352
+ end
353
+
354
+ # @abstract
355
+ #
356
+ # Called after each lex is finished. The default implementation
357
+ # is a noop.
358
+ def reset!
359
+ end
360
+
361
+ # Given a string, yield [token, chunk] pairs. If no block is given,
362
+ # an enumerator is returned.
363
+ #
364
+ # @option opts :continue
365
+ # Continue the lex from the previous state (i.e. don't call #reset!)
366
+ def lex(string, opts = {}, &b)
367
+ return enum_for(:lex, string) unless block_given?
368
+
369
+ Lexer.assert_utf8!(string)
370
+
371
+ reset! unless opts[:continue]
372
+
373
+ # consolidate consecutive tokens of the same type
374
+ last_token = nil
375
+ last_val = nil
376
+ stream_tokens(string) do |tok, val|
377
+ next if val.empty?
378
+
379
+ if tok == last_token
380
+ last_val << val
381
+ next
382
+ end
383
+
384
+ b.call(last_token, last_val) if last_token
385
+ last_token = tok
386
+ last_val = val
387
+ end
388
+
389
+ b.call(last_token, last_val) if last_token
390
+ end
391
+
392
+ # delegated to {Lexer.tag}
393
+ def tag
394
+ self.class.tag
395
+ end
396
+
397
+ # @abstract
398
+ #
399
+ # Yield `[token, chunk]` pairs, given a prepared input stream. This
400
+ # must be implemented.
401
+ #
402
+ # @param [StringScanner] stream
403
+ # the stream
404
+ def stream_tokens(_stream, &_b)
405
+ fail 'abstract'
406
+ end
407
+
408
+ # @abstract
409
+ #
410
+ # Return a number between 0 and 1 indicating the likelihood that
411
+ # the text given should be lexed with this lexer. The default
412
+ # implementation returns 0. Values under 0.5 will only be used
413
+ # to disambiguate filename or mimetype matches.
414
+ #
415
+ # @param [TextAnalyzer] text
416
+ # the text to be analyzed, with a couple of handy methods on it,
417
+ # like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
418
+ def self.analyze_text(_text)
419
+ 0
420
+ end
421
+ end
422
+
423
+ module Lexers
424
+ def self.load_const(const_name, relpath)
425
+ return if const_defined?(const_name)
426
+
427
+ root = Pathname.new(__FILE__).dirname.join('lexers')
428
+ load root.join(relpath)
429
+ end
430
+ end
431
+ end