github-linguist 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/linguist +43 -0
- data/lib/linguist.rb +5 -0
- data/lib/linguist/blob_helper.rb +713 -0
- data/lib/linguist/file_blob.rb +56 -0
- data/lib/linguist/language.rb +474 -0
- data/lib/linguist/languages.yml +1379 -0
- data/lib/linguist/mime.rb +91 -0
- data/lib/linguist/mimes.yml +62 -0
- data/lib/linguist/pathname.rb +92 -0
- data/lib/linguist/popular.yml +29 -0
- data/lib/linguist/repository.rb +95 -0
- data/lib/linguist/vendor.yml +96 -0
- metadata +152 -0
data/bin/linguist
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'linguist/file_blob'
|
4
|
+
require 'linguist/repository'
|
5
|
+
|
6
|
+
path = ARGV[0] || Dir.pwd
|
7
|
+
|
8
|
+
if File.directory?(path)
|
9
|
+
repo = Linguist::Repository.from_directory(path)
|
10
|
+
repo.languages.sort_by { |_, size| size }.reverse.each do |language, size|
|
11
|
+
percentage = ((size / repo.size.to_f) * 100).round
|
12
|
+
puts "%-4s %s" % ["#{percentage}%", language]
|
13
|
+
end
|
14
|
+
elsif File.file?(path)
|
15
|
+
blob = Linguist::FileBlob.new(path, Dir.pwd)
|
16
|
+
type = if blob.text?
|
17
|
+
'Text'
|
18
|
+
elsif blob.image?
|
19
|
+
'Image'
|
20
|
+
else
|
21
|
+
'Binary'
|
22
|
+
end
|
23
|
+
|
24
|
+
puts "#{blob.name}: #{blob.loc} lines (#{blob.sloc} sloc)"
|
25
|
+
puts " type: #{type}"
|
26
|
+
puts " extension: #{blob.pathname.extname}"
|
27
|
+
puts " mime type: #{blob.mime_type}"
|
28
|
+
puts " language: #{blob.language}"
|
29
|
+
|
30
|
+
if blob.large?
|
31
|
+
puts " blob is to large to be shown"
|
32
|
+
end
|
33
|
+
|
34
|
+
if blob.generated?
|
35
|
+
puts " appears to be generated source code"
|
36
|
+
end
|
37
|
+
|
38
|
+
if blob.vendored?
|
39
|
+
puts " appears to be a vendored file"
|
40
|
+
end
|
41
|
+
else
|
42
|
+
abort "usage: linguist <path>"
|
43
|
+
end
|
data/lib/linguist.rb
ADDED
@@ -0,0 +1,713 @@
|
|
1
|
+
require 'linguist/language'
|
2
|
+
require 'linguist/mime'
|
3
|
+
require 'linguist/pathname'
|
4
|
+
|
5
|
+
require 'charlock_holmes'
|
6
|
+
require 'escape_utils'
|
7
|
+
require 'pygments'
|
8
|
+
require 'yaml'
|
9
|
+
|
10
|
+
module Linguist
|
11
|
+
# BlobHelper is a mixin for Blobish classes that respond to "name",
|
12
|
+
# "data" and "size" such as Grit::Blob.
|
13
|
+
module BlobHelper
|
14
|
+
# Internal: Get a Pathname wrapper for Blob#name
|
15
|
+
#
|
16
|
+
# Returns a Pathname.
|
17
|
+
def pathname
|
18
|
+
Pathname.new(name || "")
|
19
|
+
end
|
20
|
+
|
21
|
+
# Public: Get the extname of the path
|
22
|
+
#
|
23
|
+
# Examples
|
24
|
+
#
|
25
|
+
# blob(name='foo.rb').extname
|
26
|
+
# # => '.rb'
|
27
|
+
#
|
28
|
+
# Returns a String
|
29
|
+
def extname
|
30
|
+
pathname.extname
|
31
|
+
end
|
32
|
+
|
33
|
+
# Public: Get the actual blob mime type
|
34
|
+
#
|
35
|
+
# Examples
|
36
|
+
#
|
37
|
+
# # => 'text/plain'
|
38
|
+
# # => 'text/html'
|
39
|
+
#
|
40
|
+
# Returns a mime type String.
|
41
|
+
def mime_type
|
42
|
+
@mime_type ||= pathname.mime_type
|
43
|
+
end
|
44
|
+
|
45
|
+
# Public: Get the Content-Type header value
|
46
|
+
#
|
47
|
+
# This value is used when serving raw blobs.
|
48
|
+
#
|
49
|
+
# Examples
|
50
|
+
#
|
51
|
+
# # => 'text/plain; charset=utf-8'
|
52
|
+
# # => 'application/octet-stream'
|
53
|
+
#
|
54
|
+
# Returns a content type String.
|
55
|
+
def content_type
|
56
|
+
@content_type ||= (binary_mime_type? || binary?) ? mime_type :
|
57
|
+
(encoding ? "text/plain; charset=#{encoding.downcase}" : "text/plain")
|
58
|
+
end
|
59
|
+
|
60
|
+
# Public: Get the Content-Disposition header value
|
61
|
+
#
|
62
|
+
# This value is used when serving raw blobs.
|
63
|
+
#
|
64
|
+
# # => "attachment; filename=file.tar"
|
65
|
+
# # => "inline"
|
66
|
+
#
|
67
|
+
# Returns a content disposition String.
|
68
|
+
def disposition
|
69
|
+
if text? || image?
|
70
|
+
'inline'
|
71
|
+
elsif name.nil?
|
72
|
+
"attachment"
|
73
|
+
else
|
74
|
+
"attachment; filename=#{EscapeUtils.escape_url(pathname.basename)}"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def encoding
|
79
|
+
if hash = detect_encoding
|
80
|
+
hash[:encoding]
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# Try to guess the encoding
|
85
|
+
#
|
86
|
+
# Returns: a Hash, with :encoding, :confidence, :type
|
87
|
+
# this will return nil if an error occurred during detection or
|
88
|
+
# no valid encoding could be found
|
89
|
+
def detect_encoding
|
90
|
+
@detect_encoding ||= CharlockHolmes::EncodingDetector.new.detect(data) if data
|
91
|
+
end
|
92
|
+
|
93
|
+
# Public: Is the blob binary according to its mime type
|
94
|
+
#
|
95
|
+
# Return true or false
|
96
|
+
def binary_mime_type?
|
97
|
+
if mime_type = Mime.lookup_mime_type_for(pathname.extname)
|
98
|
+
mime_type.binary?
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Public: Is the blob binary?
|
103
|
+
#
|
104
|
+
# Return true or false
|
105
|
+
def binary?
|
106
|
+
# Large blobs aren't even loaded into memory
|
107
|
+
if data.nil?
|
108
|
+
true
|
109
|
+
|
110
|
+
# Treat blank files as text
|
111
|
+
elsif data == ""
|
112
|
+
false
|
113
|
+
|
114
|
+
# Charlock doesn't know what to think
|
115
|
+
elsif encoding.nil?
|
116
|
+
true
|
117
|
+
|
118
|
+
# If Charlock says its binary
|
119
|
+
else
|
120
|
+
detect_encoding[:type] == :binary
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Public: Is the blob text?
|
125
|
+
#
|
126
|
+
# Return true or false
|
127
|
+
def text?
|
128
|
+
!binary?
|
129
|
+
end
|
130
|
+
|
131
|
+
# Public: Is the blob a supported image format?
|
132
|
+
#
|
133
|
+
# Return true or false
|
134
|
+
def image?
|
135
|
+
['.png', '.jpg', '.jpeg', '.gif'].include?(extname)
|
136
|
+
end
|
137
|
+
|
138
|
+
# Public: Is the blob a possible drupal php file?
|
139
|
+
#
|
140
|
+
# Return true or false
|
141
|
+
def drupal_extname?
|
142
|
+
['.module', '.install', '.test', '.inc'].include?(extname)
|
143
|
+
end
|
144
|
+
|
145
|
+
# Public: Is the blob likely to have a shebang?
|
146
|
+
#
|
147
|
+
# Return true or false
|
148
|
+
def shebang_extname?
|
149
|
+
extname.empty? &&
|
150
|
+
mode &&
|
151
|
+
(mode.to_i(8) & 05) == 05
|
152
|
+
end
|
153
|
+
|
154
|
+
MEGABYTE = 1024 * 1024
|
155
|
+
|
156
|
+
# Public: Is the blob too big to load?
|
157
|
+
#
|
158
|
+
# Return true or false
|
159
|
+
def large?
|
160
|
+
size.to_i > MEGABYTE
|
161
|
+
end
|
162
|
+
|
163
|
+
# Public: Is the blob safe to colorize?
|
164
|
+
#
|
165
|
+
# We use Pygments.rb for syntax highlighting blobs, which
|
166
|
+
# has some quirks and also is essentially 'un-killable' via
|
167
|
+
# normal timeout. To workaround this we try to
|
168
|
+
# carefully handling Pygments.rb anything it can't handle.
|
169
|
+
#
|
170
|
+
# Return true or false
|
171
|
+
def safe_to_colorize?
|
172
|
+
text? && !large? && !high_ratio_of_long_lines?
|
173
|
+
end
|
174
|
+
|
175
|
+
# Internal: Does the blob have a ratio of long lines?
|
176
|
+
#
|
177
|
+
# These types of files are usually going to make Pygments.rb
|
178
|
+
# angry if we try to colorize them.
|
179
|
+
#
|
180
|
+
# Return true or false
|
181
|
+
def high_ratio_of_long_lines?
|
182
|
+
return false if loc == 0
|
183
|
+
size / loc > 5000
|
184
|
+
end
|
185
|
+
|
186
|
+
# Public: Is the blob viewable?
|
187
|
+
#
|
188
|
+
# Non-viewable blobs will just show a "View Raw" link
|
189
|
+
#
|
190
|
+
# Return true or false
|
191
|
+
def viewable?
|
192
|
+
!large? && text?
|
193
|
+
end
|
194
|
+
|
195
|
+
vendored_paths = YAML.load_file(File.expand_path("../vendor.yml", __FILE__))
|
196
|
+
VendoredRegexp = Regexp.new(vendored_paths.join('|'))
|
197
|
+
|
198
|
+
# Public: Is the blob in a vendored directory?
|
199
|
+
#
|
200
|
+
# Vendored files are ignored by language statistics.
|
201
|
+
#
|
202
|
+
# See "vendor.yml" for a list of vendored conventions that match
|
203
|
+
# this pattern.
|
204
|
+
#
|
205
|
+
# Return true or false
|
206
|
+
def vendored?
|
207
|
+
name =~ VendoredRegexp ? true : false
|
208
|
+
end
|
209
|
+
|
210
|
+
# Public: Get each line of data
|
211
|
+
#
|
212
|
+
# Requires Blob#data
|
213
|
+
#
|
214
|
+
# Returns an Array of lines
|
215
|
+
def lines
|
216
|
+
@lines ||= (viewable? && data) ? data.split("\n", -1) : []
|
217
|
+
end
|
218
|
+
|
219
|
+
# Public: Get number of lines of code
|
220
|
+
#
|
221
|
+
# Requires Blob#data
|
222
|
+
#
|
223
|
+
# Returns Integer
|
224
|
+
def loc
|
225
|
+
lines.size
|
226
|
+
end
|
227
|
+
|
228
|
+
# Public: Get number of source lines of code
|
229
|
+
#
|
230
|
+
# Requires Blob#data
|
231
|
+
#
|
232
|
+
# Returns Integer
|
233
|
+
def sloc
|
234
|
+
lines.grep(/\S/).size
|
235
|
+
end
|
236
|
+
|
237
|
+
# Internal: Compute average line length.
|
238
|
+
#
|
239
|
+
# Returns Integer.
|
240
|
+
def average_line_length
|
241
|
+
if lines.any?
|
242
|
+
lines.inject(0) { |n, l| n += l.length } / lines.length
|
243
|
+
else
|
244
|
+
0
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
# Public: Is the blob a generated file?
|
249
|
+
#
|
250
|
+
# Generated source code is supressed in diffs and is ignored by
|
251
|
+
# language statistics.
|
252
|
+
#
|
253
|
+
# Requires Blob#data
|
254
|
+
#
|
255
|
+
# Includes:
|
256
|
+
# - XCode project XML files
|
257
|
+
# - Minified JavaScript
|
258
|
+
# - Compiled CoffeeScript
|
259
|
+
# - PEG.js-generated parsers
|
260
|
+
#
|
261
|
+
# Please add additional test coverage to
|
262
|
+
# `test/test_blob.rb#test_generated` if you make any changes.
|
263
|
+
#
|
264
|
+
# Return true or false
|
265
|
+
def generated?
|
266
|
+
if name == 'Gemfile.lock' || minified_javascript? || compiled_coffeescript? ||
|
267
|
+
xcode_project_file? || generated_net_docfile? || generated_parser?
|
268
|
+
true
|
269
|
+
else
|
270
|
+
false
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
# Internal: Is the blob an XCode project file?
|
275
|
+
#
|
276
|
+
# Generated if the file extension is an XCode project
|
277
|
+
# file extension.
|
278
|
+
#
|
279
|
+
# Returns true of false.
|
280
|
+
def xcode_project_file?
|
281
|
+
['.xib', '.nib', '.storyboard', '.pbxproj', '.xcworkspacedata', '.xcuserstate'].include?(extname)
|
282
|
+
end
|
283
|
+
|
284
|
+
# Internal: Is the blob minified JS?
|
285
|
+
#
|
286
|
+
# Consider JS minified if the average line length is
|
287
|
+
# greater then 100c.
|
288
|
+
#
|
289
|
+
# Returns true or false.
|
290
|
+
def minified_javascript?
|
291
|
+
return unless extname == '.js'
|
292
|
+
average_line_length > 100
|
293
|
+
end
|
294
|
+
|
295
|
+
# Internal: Is the blob of JS a parser generated by PEG.js?
|
296
|
+
#
|
297
|
+
# Requires Blob#data
|
298
|
+
#
|
299
|
+
# PEG.js-generated parsers are not meant to be consumed by humans.
|
300
|
+
#
|
301
|
+
# Return true or false
|
302
|
+
def generated_parser?
|
303
|
+
return false unless extname == '.js'
|
304
|
+
|
305
|
+
# PEG.js-generated parsers include a comment near the top of the file
|
306
|
+
# that marks them as such.
|
307
|
+
if lines[0..4].join('') =~ /^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js/
|
308
|
+
return true
|
309
|
+
end
|
310
|
+
|
311
|
+
false
|
312
|
+
end
|
313
|
+
|
314
|
+
# Internal: Is the blob of JS generated by CoffeeScript?
|
315
|
+
#
|
316
|
+
# Requires Blob#data
|
317
|
+
#
|
318
|
+
# CoffeScript is meant to output JS that would be difficult to
|
319
|
+
# tell if it was generated or not. Look for a number of patterns
|
320
|
+
# output by the CS compiler.
|
321
|
+
#
|
322
|
+
# Return true or false
|
323
|
+
def compiled_coffeescript?
|
324
|
+
return false unless extname == '.js'
|
325
|
+
|
326
|
+
# CoffeeScript generated by > 1.2 include a comment on the first line
|
327
|
+
if lines[0] =~ /^\/\/ Generated by /
|
328
|
+
return true
|
329
|
+
end
|
330
|
+
|
331
|
+
if lines[0] == '(function() {' && # First line is module closure opening
|
332
|
+
lines[-2] == '}).call(this);' && # Second to last line closes module closure
|
333
|
+
lines[-1] == '' # Last line is blank
|
334
|
+
|
335
|
+
score = 0
|
336
|
+
|
337
|
+
lines.each do |line|
|
338
|
+
if line =~ /var /
|
339
|
+
# Underscored temp vars are likely to be Coffee
|
340
|
+
score += 1 * line.gsub(/(_fn|_i|_len|_ref|_results)/).count
|
341
|
+
|
342
|
+
# bind and extend functions are very Coffee specific
|
343
|
+
score += 3 * line.gsub(/(__bind|__extends|__hasProp|__indexOf|__slice)/).count
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
# Require a score of 3. This is fairly arbitrary. Consider
|
348
|
+
# tweaking later.
|
349
|
+
score >= 3
|
350
|
+
else
|
351
|
+
false
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
# Internal: Is this a generated documentation file for a .NET assembly?
|
356
|
+
#
|
357
|
+
# Requires Blob#data
|
358
|
+
#
|
359
|
+
# .NET developers often check in the XML Intellisense file along with an
|
360
|
+
# assembly - however, these don't have a special extension, so we have to
|
361
|
+
# dig into the contents to determine if it's a docfile. Luckily, these files
|
362
|
+
# are extremely structured, so recognizing them is easy.
|
363
|
+
#
|
364
|
+
# Returns true or false
|
365
|
+
def generated_net_docfile?
|
366
|
+
return false unless extname.downcase == ".xml"
|
367
|
+
return false unless lines.count > 3
|
368
|
+
|
369
|
+
# .NET Docfiles always open with <doc> and their first tag is an
|
370
|
+
# <assembly> tag
|
371
|
+
return lines[1].include?("<doc>") &&
|
372
|
+
lines[2].include?("<assembly>") &&
|
373
|
+
lines[-2].include?("</doc>")
|
374
|
+
end
|
375
|
+
|
376
|
+
# Public: Should the blob be indexed for searching?
|
377
|
+
#
|
378
|
+
# Excluded:
|
379
|
+
# - Files over 0.1MB
|
380
|
+
# - Non-text files
|
381
|
+
# - Langauges marked as not searchable
|
382
|
+
# - Generated source files
|
383
|
+
#
|
384
|
+
# Please add additional test coverage to
|
385
|
+
# `test/test_blob.rb#test_indexable` if you make any changes.
|
386
|
+
#
|
387
|
+
# Return true or false
|
388
|
+
def indexable?
|
389
|
+
if binary?
|
390
|
+
false
|
391
|
+
elsif language.nil?
|
392
|
+
false
|
393
|
+
elsif !language.searchable?
|
394
|
+
false
|
395
|
+
elsif generated?
|
396
|
+
false
|
397
|
+
elsif size > 100 * 1024
|
398
|
+
false
|
399
|
+
else
|
400
|
+
true
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
# Public: Detects the Language of the blob.
|
405
|
+
#
|
406
|
+
# May load Blob#data
|
407
|
+
#
|
408
|
+
# Returns a Language or nil if none is detected
|
409
|
+
def language
|
410
|
+
if defined? @language
|
411
|
+
@language
|
412
|
+
else
|
413
|
+
@language = guess_language
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
# Internal: Guess language
|
418
|
+
#
|
419
|
+
# Please add additional test coverage to
|
420
|
+
# `test/test_blob.rb#test_language` if you make any changes.
|
421
|
+
#
|
422
|
+
# Returns a Language or nil
|
423
|
+
def guess_language
|
424
|
+
return if binary_mime_type?
|
425
|
+
|
426
|
+
# Disambiguate between multiple language extensions
|
427
|
+
disambiguate_extension_language ||
|
428
|
+
|
429
|
+
# See if there is a Language for the extension
|
430
|
+
pathname.language ||
|
431
|
+
|
432
|
+
# Look for idioms in first line
|
433
|
+
first_line_language ||
|
434
|
+
|
435
|
+
# Try to detect Language from shebang line
|
436
|
+
shebang_language
|
437
|
+
end
|
438
|
+
|
439
|
+
# Internal: Get the lexer of the blob.
|
440
|
+
#
|
441
|
+
# Returns a Lexer.
|
442
|
+
def lexer
|
443
|
+
language ? language.lexer : Pygments::Lexer.find_by_name('Text only')
|
444
|
+
end
|
445
|
+
|
446
|
+
# Internal: Disambiguates between multiple language extensions.
|
447
|
+
#
|
448
|
+
# Delegates to "guess_EXTENSION_language".
|
449
|
+
#
|
450
|
+
# Please add additional test coverage to
|
451
|
+
# `test/test_blob.rb#test_language` if you add another method.
|
452
|
+
#
|
453
|
+
# Returns a Language or nil.
|
454
|
+
def disambiguate_extension_language
|
455
|
+
if Language.ambiguous?(extname)
|
456
|
+
name = "guess_#{extname.sub(/^\./, '')}_language"
|
457
|
+
send(name) if respond_to?(name)
|
458
|
+
end
|
459
|
+
end
|
460
|
+
|
461
|
+
# Internal: Guess language of .cls files
|
462
|
+
#
|
463
|
+
# Returns a Language.
|
464
|
+
def guess_cls_language
|
465
|
+
if lines.grep(/^(%|\\)/).any?
|
466
|
+
Language['TeX']
|
467
|
+
elsif lines.grep(/^\s*(CLASS|METHOD|INTERFACE).*:\s*/i).any? || lines.grep(/^\s*(USING|DEFINE)/i).any?
|
468
|
+
Language['OpenEdge ABL']
|
469
|
+
elsif lines.grep(/\{$/).any? || lines.grep(/\}$/).any?
|
470
|
+
Language['Apex']
|
471
|
+
elsif lines.grep(/^(\'\*|Attribute|Option|Sub|Private|Protected|Public|Friend)/i).any?
|
472
|
+
Language['Visual Basic']
|
473
|
+
else
|
474
|
+
# The most common language should be the fallback
|
475
|
+
Language['TeX']
|
476
|
+
end
|
477
|
+
end
|
478
|
+
|
479
|
+
# Internal: Guess language of header files (.h).
|
480
|
+
#
|
481
|
+
# Returns a Language.
|
482
|
+
def guess_h_language
|
483
|
+
if lines.grep(/^@(interface|property|private|public|end)/).any?
|
484
|
+
Language['Objective-C']
|
485
|
+
elsif lines.grep(/^class |^\s+(public|protected|private):/).any?
|
486
|
+
Language['C++']
|
487
|
+
else
|
488
|
+
Language['C']
|
489
|
+
end
|
490
|
+
end
|
491
|
+
|
492
|
+
# Internal: Guess language of .m files.
|
493
|
+
#
|
494
|
+
# Objective-C heuristics:
|
495
|
+
# * Keywords ("#import", "#include", "#ifdef", #define, "@end") or "//" and opening "\*" comments
|
496
|
+
#
|
497
|
+
# Matlab heuristics:
|
498
|
+
# * Leading "function " of "classdef " keyword
|
499
|
+
# * "%" comments
|
500
|
+
#
|
501
|
+
# Note: All "#" keywords, e.g., "#import", are guaranteed to be Objective-C. Because the ampersand
|
502
|
+
# is used to created function handles and anonymous functions in Matlab, most "@" keywords are not
|
503
|
+
# safe heuristics. However, "end" is a reserved term in Matlab and can't be used to create a valid
|
504
|
+
# function handle. Because @end is required to close any @implementation, @property, @interface,
|
505
|
+
# @synthesize, etc. directive in Objective-C, only @end needs to be checked for.
|
506
|
+
#
|
507
|
+
# Returns a Language.
|
508
|
+
def guess_m_language
|
509
|
+
# Objective-C keywords or comments
|
510
|
+
if lines.grep(/^#(import|include|ifdef|define)|@end/).any? || lines.grep(/^\s*\/\//).any? || lines.grep(/^\s*\/\*/).any?
|
511
|
+
Language['Objective-C']
|
512
|
+
|
513
|
+
# Matlab file function or class or comments
|
514
|
+
elsif lines.any? && lines.first.match(/^\s*(function |classdef )/) || lines.grep(/^\s*%/).any?
|
515
|
+
Language['Matlab']
|
516
|
+
|
517
|
+
# Fallback to Objective-C, don't want any Matlab false positives
|
518
|
+
else
|
519
|
+
Language['Objective-C']
|
520
|
+
end
|
521
|
+
end
|
522
|
+
|
523
|
+
# Internal: Guess language of .pl files
|
524
|
+
#
|
525
|
+
# The rules for disambiguation are:
|
526
|
+
#
|
527
|
+
# 1. Many perl files begin with a shebang
|
528
|
+
# 2. Most Prolog source files have a rule somewhere (marked by the :- operator)
|
529
|
+
# 3. Default to Perl, because it is more popular
|
530
|
+
#
|
531
|
+
# Returns a Language.
|
532
|
+
def guess_pl_language
|
533
|
+
if shebang_script == 'perl'
|
534
|
+
Language['Perl']
|
535
|
+
elsif lines.grep(/:-/).any?
|
536
|
+
Language['Prolog']
|
537
|
+
else
|
538
|
+
Language['Perl']
|
539
|
+
end
|
540
|
+
end
|
541
|
+
|
542
|
+
# Internal: Guess language of .r files.
|
543
|
+
#
|
544
|
+
# Returns a Language.
|
545
|
+
def guess_r_language
|
546
|
+
if lines.grep(/(rebol|(:\s+func|make\s+object!|^\s*context)\s*\[)/i).any?
|
547
|
+
Language['Rebol']
|
548
|
+
else
|
549
|
+
Language['R']
|
550
|
+
end
|
551
|
+
end
|
552
|
+
|
553
|
+
# Internal: Guess language of .t files.
|
554
|
+
#
|
555
|
+
# Returns a Language.
|
556
|
+
def guess_t_language
|
557
|
+
score = 0
|
558
|
+
score += 1 if lines.grep(/^% /).any?
|
559
|
+
score += data.gsub(/ := /).count
|
560
|
+
score += data.gsub(/proc |procedure |fcn |function /).count
|
561
|
+
score += data.gsub(/var \w+: \w+/).count
|
562
|
+
|
563
|
+
# Tell-tale signs its gotta be Perl
|
564
|
+
if lines.grep(/^(my )?(sub |\$|@|%)\w+/).any?
|
565
|
+
score = 0
|
566
|
+
end
|
567
|
+
|
568
|
+
if score >= 3
|
569
|
+
Language['Turing']
|
570
|
+
else
|
571
|
+
Language['Perl']
|
572
|
+
end
|
573
|
+
end
|
574
|
+
|
575
|
+
# Internal: Guess language of .v files.
|
576
|
+
#
|
577
|
+
# Returns a Language
|
578
|
+
def guess_v_language
|
579
|
+
if lines.grep(/^(\/\*|\/\/|module|parameter|input|output|wire|reg|always|initial|begin|\`)/).any?
|
580
|
+
Language['Verilog']
|
581
|
+
else
|
582
|
+
Language['Coq']
|
583
|
+
end
|
584
|
+
end
|
585
|
+
|
586
|
+
# Internal: Guess language of .gsp files.
|
587
|
+
#
|
588
|
+
# Returns a Language.
|
589
|
+
def guess_gsp_language
|
590
|
+
if lines.grep(/<%|<%@|\$\{|<%|<g:|<meta name="layout"|<r:/).any?
|
591
|
+
Language['Groovy Server Pages']
|
592
|
+
else
|
593
|
+
Language['Gosu']
|
594
|
+
end
|
595
|
+
end
|
596
|
+
|
597
|
+
# Internal: Guess language from the first line.
|
598
|
+
#
|
599
|
+
# Look for leading "<?php" in Drupal files
|
600
|
+
#
|
601
|
+
# Returns a Language.
|
602
|
+
def first_line_language
|
603
|
+
# Only check files with drupal php extensions
|
604
|
+
return unless drupal_extname?
|
605
|
+
|
606
|
+
# Fail fast if blob isn't viewable?
|
607
|
+
return unless viewable?
|
608
|
+
|
609
|
+
if lines.first.to_s =~ /^<\?php/
|
610
|
+
Language['PHP']
|
611
|
+
end
|
612
|
+
end
|
613
|
+
|
614
|
+
# Internal: Extract the script name from the shebang line
|
615
|
+
#
|
616
|
+
# Requires Blob#data
|
617
|
+
#
|
618
|
+
# Examples
|
619
|
+
#
|
620
|
+
# '#!/usr/bin/ruby'
|
621
|
+
# # => 'ruby'
|
622
|
+
#
|
623
|
+
# '#!/usr/bin/env ruby'
|
624
|
+
# # => 'ruby'
|
625
|
+
#
|
626
|
+
# '#!/usr/bash/python2.4'
|
627
|
+
# # => 'python'
|
628
|
+
#
|
629
|
+
# Please add additional test coverage to
|
630
|
+
# `test/test_blob.rb#test_shebang_script` if you make any changes.
|
631
|
+
#
|
632
|
+
# Returns a script name String or nil
|
633
|
+
def shebang_script
|
634
|
+
# Fail fast if blob isn't viewable?
|
635
|
+
return unless viewable?
|
636
|
+
|
637
|
+
if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
|
638
|
+
bang.sub!(/^#! /, '#!')
|
639
|
+
tokens = bang.split(' ')
|
640
|
+
pieces = tokens.first.split('/')
|
641
|
+
if pieces.size > 1
|
642
|
+
script = pieces.last
|
643
|
+
else
|
644
|
+
script = pieces.first.sub('#!', '')
|
645
|
+
end
|
646
|
+
|
647
|
+
script = script == 'env' ? tokens[1] : script
|
648
|
+
|
649
|
+
# python2.4 => python
|
650
|
+
if script =~ /((?:\d+\.?)+)/
|
651
|
+
script.sub! $1, ''
|
652
|
+
end
|
653
|
+
|
654
|
+
# Check for multiline shebang hacks that exec themselves
|
655
|
+
#
|
656
|
+
# #!/bin/sh
|
657
|
+
# exec foo "$0" "$@"
|
658
|
+
#
|
659
|
+
if script == 'sh' &&
|
660
|
+
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
661
|
+
script = $1
|
662
|
+
end
|
663
|
+
|
664
|
+
script
|
665
|
+
end
|
666
|
+
end
|
667
|
+
|
668
|
+
# Internal: Get Language for shebang script
|
669
|
+
#
|
670
|
+
# Returns the Language or nil
|
671
|
+
def shebang_language
|
672
|
+
# Skip file extensions unlikely to have shebangs
|
673
|
+
return unless shebang_extname?
|
674
|
+
|
675
|
+
if script = shebang_script
|
676
|
+
Language[script]
|
677
|
+
end
|
678
|
+
end
|
679
|
+
|
680
|
+
# Public: Highlight syntax of blob
|
681
|
+
#
|
682
|
+
# options - A Hash of options (defaults to {})
|
683
|
+
#
|
684
|
+
# Returns html String
|
685
|
+
def colorize(options = {})
|
686
|
+
return unless safe_to_colorize?
|
687
|
+
options[:options] ||= {}
|
688
|
+
options[:options][:encoding] ||= encoding
|
689
|
+
lexer.highlight(data, options)
|
690
|
+
end
|
691
|
+
|
692
|
+
# Public: Highlight syntax of blob without the outer highlight div
|
693
|
+
# wrapper.
|
694
|
+
#
|
695
|
+
# options - A Hash of options (defaults to {})
|
696
|
+
#
|
697
|
+
# Returns html String
|
698
|
+
def colorize_without_wrapper(options = {})
|
699
|
+
if text = colorize(options)
|
700
|
+
text[%r{<div class="highlight"><pre>(.*?)</pre>\s*</div>}m, 1]
|
701
|
+
else
|
702
|
+
''
|
703
|
+
end
|
704
|
+
end
|
705
|
+
|
706
|
+
Language.overridden_extensions.each do |extension|
|
707
|
+
name = "guess_#{extension.sub(/^\./, '')}_language".to_sym
|
708
|
+
unless instance_methods.map(&:to_sym).include?(name)
|
709
|
+
raise NotImplementedError, "Language##{name} was not defined"
|
710
|
+
end
|
711
|
+
end
|
712
|
+
end
|
713
|
+
end
|