github-linguist 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/linguist +43 -0
- data/lib/linguist.rb +5 -0
- data/lib/linguist/blob_helper.rb +713 -0
- data/lib/linguist/file_blob.rb +56 -0
- data/lib/linguist/language.rb +474 -0
- data/lib/linguist/languages.yml +1379 -0
- data/lib/linguist/mime.rb +91 -0
- data/lib/linguist/mimes.yml +62 -0
- data/lib/linguist/pathname.rb +92 -0
- data/lib/linguist/popular.yml +29 -0
- data/lib/linguist/repository.rb +95 -0
- data/lib/linguist/vendor.yml +96 -0
- metadata +152 -0
data/bin/linguist
ADDED
@@ -0,0 +1,43 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'linguist/file_blob'
|
4
|
+
require 'linguist/repository'
|
5
|
+
|
6
|
+
path = ARGV[0] || Dir.pwd
|
7
|
+
|
8
|
+
if File.directory?(path)
|
9
|
+
repo = Linguist::Repository.from_directory(path)
|
10
|
+
repo.languages.sort_by { |_, size| size }.reverse.each do |language, size|
|
11
|
+
percentage = ((size / repo.size.to_f) * 100).round
|
12
|
+
puts "%-4s %s" % ["#{percentage}%", language]
|
13
|
+
end
|
14
|
+
elsif File.file?(path)
|
15
|
+
blob = Linguist::FileBlob.new(path, Dir.pwd)
|
16
|
+
type = if blob.text?
|
17
|
+
'Text'
|
18
|
+
elsif blob.image?
|
19
|
+
'Image'
|
20
|
+
else
|
21
|
+
'Binary'
|
22
|
+
end
|
23
|
+
|
24
|
+
puts "#{blob.name}: #{blob.loc} lines (#{blob.sloc} sloc)"
|
25
|
+
puts " type: #{type}"
|
26
|
+
puts " extension: #{blob.pathname.extname}"
|
27
|
+
puts " mime type: #{blob.mime_type}"
|
28
|
+
puts " language: #{blob.language}"
|
29
|
+
|
30
|
+
if blob.large?
|
31
|
+
puts " blob is to large to be shown"
|
32
|
+
end
|
33
|
+
|
34
|
+
if blob.generated?
|
35
|
+
puts " appears to be generated source code"
|
36
|
+
end
|
37
|
+
|
38
|
+
if blob.vendored?
|
39
|
+
puts " appears to be a vendored file"
|
40
|
+
end
|
41
|
+
else
|
42
|
+
abort "usage: linguist <path>"
|
43
|
+
end
|
data/lib/linguist.rb
ADDED
@@ -0,0 +1,713 @@
|
|
1
|
+
require 'linguist/language'
|
2
|
+
require 'linguist/mime'
|
3
|
+
require 'linguist/pathname'
|
4
|
+
|
5
|
+
require 'charlock_holmes'
|
6
|
+
require 'escape_utils'
|
7
|
+
require 'pygments'
|
8
|
+
require 'yaml'
|
9
|
+
|
10
|
+
module Linguist
|
11
|
+
# BlobHelper is a mixin for Blobish classes that respond to "name",
|
12
|
+
# "data" and "size" such as Grit::Blob.
|
13
|
+
module BlobHelper
|
14
|
+
# Internal: Get a Pathname wrapper for Blob#name
|
15
|
+
#
|
16
|
+
# Returns a Pathname.
|
17
|
+
def pathname
|
18
|
+
Pathname.new(name || "")
|
19
|
+
end
|
20
|
+
|
21
|
+
# Public: Get the extname of the path
|
22
|
+
#
|
23
|
+
# Examples
|
24
|
+
#
|
25
|
+
# blob(name='foo.rb').extname
|
26
|
+
# # => '.rb'
|
27
|
+
#
|
28
|
+
# Returns a String
|
29
|
+
def extname
|
30
|
+
pathname.extname
|
31
|
+
end
|
32
|
+
|
33
|
+
# Public: Get the actual blob mime type
|
34
|
+
#
|
35
|
+
# Examples
|
36
|
+
#
|
37
|
+
# # => 'text/plain'
|
38
|
+
# # => 'text/html'
|
39
|
+
#
|
40
|
+
# Returns a mime type String.
|
41
|
+
def mime_type
|
42
|
+
@mime_type ||= pathname.mime_type
|
43
|
+
end
|
44
|
+
|
45
|
+
# Public: Get the Content-Type header value
|
46
|
+
#
|
47
|
+
# This value is used when serving raw blobs.
|
48
|
+
#
|
49
|
+
# Examples
|
50
|
+
#
|
51
|
+
# # => 'text/plain; charset=utf-8'
|
52
|
+
# # => 'application/octet-stream'
|
53
|
+
#
|
54
|
+
# Returns a content type String.
|
55
|
+
def content_type
|
56
|
+
@content_type ||= (binary_mime_type? || binary?) ? mime_type :
|
57
|
+
(encoding ? "text/plain; charset=#{encoding.downcase}" : "text/plain")
|
58
|
+
end
|
59
|
+
|
60
|
+
# Public: Get the Content-Disposition header value
|
61
|
+
#
|
62
|
+
# This value is used when serving raw blobs.
|
63
|
+
#
|
64
|
+
# # => "attachment; filename=file.tar"
|
65
|
+
# # => "inline"
|
66
|
+
#
|
67
|
+
# Returns a content disposition String.
|
68
|
+
def disposition
|
69
|
+
if text? || image?
|
70
|
+
'inline'
|
71
|
+
elsif name.nil?
|
72
|
+
"attachment"
|
73
|
+
else
|
74
|
+
"attachment; filename=#{EscapeUtils.escape_url(pathname.basename)}"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def encoding
|
79
|
+
if hash = detect_encoding
|
80
|
+
hash[:encoding]
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# Try to guess the encoding
|
85
|
+
#
|
86
|
+
# Returns: a Hash, with :encoding, :confidence, :type
|
87
|
+
# this will return nil if an error occurred during detection or
|
88
|
+
# no valid encoding could be found
|
89
|
+
def detect_encoding
|
90
|
+
@detect_encoding ||= CharlockHolmes::EncodingDetector.new.detect(data) if data
|
91
|
+
end
|
92
|
+
|
93
|
+
# Public: Is the blob binary according to its mime type
|
94
|
+
#
|
95
|
+
# Return true or false
|
96
|
+
def binary_mime_type?
|
97
|
+
if mime_type = Mime.lookup_mime_type_for(pathname.extname)
|
98
|
+
mime_type.binary?
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Public: Is the blob binary?
|
103
|
+
#
|
104
|
+
# Return true or false
|
105
|
+
def binary?
|
106
|
+
# Large blobs aren't even loaded into memory
|
107
|
+
if data.nil?
|
108
|
+
true
|
109
|
+
|
110
|
+
# Treat blank files as text
|
111
|
+
elsif data == ""
|
112
|
+
false
|
113
|
+
|
114
|
+
# Charlock doesn't know what to think
|
115
|
+
elsif encoding.nil?
|
116
|
+
true
|
117
|
+
|
118
|
+
# If Charlock says its binary
|
119
|
+
else
|
120
|
+
detect_encoding[:type] == :binary
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
# Public: Is the blob text?
|
125
|
+
#
|
126
|
+
# Return true or false
|
127
|
+
def text?
|
128
|
+
!binary?
|
129
|
+
end
|
130
|
+
|
131
|
+
# Public: Is the blob a supported image format?
|
132
|
+
#
|
133
|
+
# Return true or false
|
134
|
+
def image?
|
135
|
+
['.png', '.jpg', '.jpeg', '.gif'].include?(extname)
|
136
|
+
end
|
137
|
+
|
138
|
+
# Public: Is the blob a possible drupal php file?
|
139
|
+
#
|
140
|
+
# Return true or false
|
141
|
+
def drupal_extname?
|
142
|
+
['.module', '.install', '.test', '.inc'].include?(extname)
|
143
|
+
end
|
144
|
+
|
145
|
+
# Public: Is the blob likely to have a shebang?
|
146
|
+
#
|
147
|
+
# Return true or false
|
148
|
+
def shebang_extname?
|
149
|
+
extname.empty? &&
|
150
|
+
mode &&
|
151
|
+
(mode.to_i(8) & 05) == 05
|
152
|
+
end
|
153
|
+
|
154
|
+
MEGABYTE = 1024 * 1024
|
155
|
+
|
156
|
+
# Public: Is the blob too big to load?
|
157
|
+
#
|
158
|
+
# Return true or false
|
159
|
+
def large?
|
160
|
+
size.to_i > MEGABYTE
|
161
|
+
end
|
162
|
+
|
163
|
+
# Public: Is the blob safe to colorize?
|
164
|
+
#
|
165
|
+
# We use Pygments.rb for syntax highlighting blobs, which
|
166
|
+
# has some quirks and also is essentially 'un-killable' via
|
167
|
+
# normal timeout. To workaround this we try to
|
168
|
+
# carefully handling Pygments.rb anything it can't handle.
|
169
|
+
#
|
170
|
+
# Return true or false
|
171
|
+
def safe_to_colorize?
|
172
|
+
text? && !large? && !high_ratio_of_long_lines?
|
173
|
+
end
|
174
|
+
|
175
|
+
# Internal: Does the blob have a ratio of long lines?
|
176
|
+
#
|
177
|
+
# These types of files are usually going to make Pygments.rb
|
178
|
+
# angry if we try to colorize them.
|
179
|
+
#
|
180
|
+
# Return true or false
|
181
|
+
def high_ratio_of_long_lines?
|
182
|
+
return false if loc == 0
|
183
|
+
size / loc > 5000
|
184
|
+
end
|
185
|
+
|
186
|
+
# Public: Is the blob viewable?
|
187
|
+
#
|
188
|
+
# Non-viewable blobs will just show a "View Raw" link
|
189
|
+
#
|
190
|
+
# Return true or false
|
191
|
+
def viewable?
|
192
|
+
!large? && text?
|
193
|
+
end
|
194
|
+
|
195
|
+
vendored_paths = YAML.load_file(File.expand_path("../vendor.yml", __FILE__))
|
196
|
+
VendoredRegexp = Regexp.new(vendored_paths.join('|'))
|
197
|
+
|
198
|
+
# Public: Is the blob in a vendored directory?
|
199
|
+
#
|
200
|
+
# Vendored files are ignored by language statistics.
|
201
|
+
#
|
202
|
+
# See "vendor.yml" for a list of vendored conventions that match
|
203
|
+
# this pattern.
|
204
|
+
#
|
205
|
+
# Return true or false
|
206
|
+
def vendored?
|
207
|
+
name =~ VendoredRegexp ? true : false
|
208
|
+
end
|
209
|
+
|
210
|
+
# Public: Get each line of data
|
211
|
+
#
|
212
|
+
# Requires Blob#data
|
213
|
+
#
|
214
|
+
# Returns an Array of lines
|
215
|
+
def lines
|
216
|
+
@lines ||= (viewable? && data) ? data.split("\n", -1) : []
|
217
|
+
end
|
218
|
+
|
219
|
+
# Public: Get number of lines of code
|
220
|
+
#
|
221
|
+
# Requires Blob#data
|
222
|
+
#
|
223
|
+
# Returns Integer
|
224
|
+
def loc
|
225
|
+
lines.size
|
226
|
+
end
|
227
|
+
|
228
|
+
# Public: Get number of source lines of code
|
229
|
+
#
|
230
|
+
# Requires Blob#data
|
231
|
+
#
|
232
|
+
# Returns Integer
|
233
|
+
def sloc
|
234
|
+
lines.grep(/\S/).size
|
235
|
+
end
|
236
|
+
|
237
|
+
# Internal: Compute average line length.
|
238
|
+
#
|
239
|
+
# Returns Integer.
|
240
|
+
def average_line_length
|
241
|
+
if lines.any?
|
242
|
+
lines.inject(0) { |n, l| n += l.length } / lines.length
|
243
|
+
else
|
244
|
+
0
|
245
|
+
end
|
246
|
+
end
|
247
|
+
|
248
|
+
# Public: Is the blob a generated file?
|
249
|
+
#
|
250
|
+
# Generated source code is supressed in diffs and is ignored by
|
251
|
+
# language statistics.
|
252
|
+
#
|
253
|
+
# Requires Blob#data
|
254
|
+
#
|
255
|
+
# Includes:
|
256
|
+
# - XCode project XML files
|
257
|
+
# - Minified JavaScript
|
258
|
+
# - Compiled CoffeeScript
|
259
|
+
# - PEG.js-generated parsers
|
260
|
+
#
|
261
|
+
# Please add additional test coverage to
|
262
|
+
# `test/test_blob.rb#test_generated` if you make any changes.
|
263
|
+
#
|
264
|
+
# Return true or false
|
265
|
+
def generated?
|
266
|
+
if name == 'Gemfile.lock' || minified_javascript? || compiled_coffeescript? ||
|
267
|
+
xcode_project_file? || generated_net_docfile? || generated_parser?
|
268
|
+
true
|
269
|
+
else
|
270
|
+
false
|
271
|
+
end
|
272
|
+
end
|
273
|
+
|
274
|
+
# Internal: Is the blob an XCode project file?
|
275
|
+
#
|
276
|
+
# Generated if the file extension is an XCode project
|
277
|
+
# file extension.
|
278
|
+
#
|
279
|
+
# Returns true of false.
|
280
|
+
def xcode_project_file?
|
281
|
+
['.xib', '.nib', '.storyboard', '.pbxproj', '.xcworkspacedata', '.xcuserstate'].include?(extname)
|
282
|
+
end
|
283
|
+
|
284
|
+
# Internal: Is the blob minified JS?
|
285
|
+
#
|
286
|
+
# Consider JS minified if the average line length is
|
287
|
+
# greater then 100c.
|
288
|
+
#
|
289
|
+
# Returns true or false.
|
290
|
+
def minified_javascript?
|
291
|
+
return unless extname == '.js'
|
292
|
+
average_line_length > 100
|
293
|
+
end
|
294
|
+
|
295
|
+
# Internal: Is the blob of JS a parser generated by PEG.js?
|
296
|
+
#
|
297
|
+
# Requires Blob#data
|
298
|
+
#
|
299
|
+
# PEG.js-generated parsers are not meant to be consumed by humans.
|
300
|
+
#
|
301
|
+
# Return true or false
|
302
|
+
def generated_parser?
|
303
|
+
return false unless extname == '.js'
|
304
|
+
|
305
|
+
# PEG.js-generated parsers include a comment near the top of the file
|
306
|
+
# that marks them as such.
|
307
|
+
if lines[0..4].join('') =~ /^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js/
|
308
|
+
return true
|
309
|
+
end
|
310
|
+
|
311
|
+
false
|
312
|
+
end
|
313
|
+
|
314
|
+
# Internal: Is the blob of JS generated by CoffeeScript?
|
315
|
+
#
|
316
|
+
# Requires Blob#data
|
317
|
+
#
|
318
|
+
# CoffeScript is meant to output JS that would be difficult to
|
319
|
+
# tell if it was generated or not. Look for a number of patterns
|
320
|
+
# output by the CS compiler.
|
321
|
+
#
|
322
|
+
# Return true or false
|
323
|
+
def compiled_coffeescript?
|
324
|
+
return false unless extname == '.js'
|
325
|
+
|
326
|
+
# CoffeeScript generated by > 1.2 include a comment on the first line
|
327
|
+
if lines[0] =~ /^\/\/ Generated by /
|
328
|
+
return true
|
329
|
+
end
|
330
|
+
|
331
|
+
if lines[0] == '(function() {' && # First line is module closure opening
|
332
|
+
lines[-2] == '}).call(this);' && # Second to last line closes module closure
|
333
|
+
lines[-1] == '' # Last line is blank
|
334
|
+
|
335
|
+
score = 0
|
336
|
+
|
337
|
+
lines.each do |line|
|
338
|
+
if line =~ /var /
|
339
|
+
# Underscored temp vars are likely to be Coffee
|
340
|
+
score += 1 * line.gsub(/(_fn|_i|_len|_ref|_results)/).count
|
341
|
+
|
342
|
+
# bind and extend functions are very Coffee specific
|
343
|
+
score += 3 * line.gsub(/(__bind|__extends|__hasProp|__indexOf|__slice)/).count
|
344
|
+
end
|
345
|
+
end
|
346
|
+
|
347
|
+
# Require a score of 3. This is fairly arbitrary. Consider
|
348
|
+
# tweaking later.
|
349
|
+
score >= 3
|
350
|
+
else
|
351
|
+
false
|
352
|
+
end
|
353
|
+
end
|
354
|
+
|
355
|
+
# Internal: Is this a generated documentation file for a .NET assembly?
|
356
|
+
#
|
357
|
+
# Requires Blob#data
|
358
|
+
#
|
359
|
+
# .NET developers often check in the XML Intellisense file along with an
|
360
|
+
# assembly - however, these don't have a special extension, so we have to
|
361
|
+
# dig into the contents to determine if it's a docfile. Luckily, these files
|
362
|
+
# are extremely structured, so recognizing them is easy.
|
363
|
+
#
|
364
|
+
# Returns true or false
|
365
|
+
def generated_net_docfile?
|
366
|
+
return false unless extname.downcase == ".xml"
|
367
|
+
return false unless lines.count > 3
|
368
|
+
|
369
|
+
# .NET Docfiles always open with <doc> and their first tag is an
|
370
|
+
# <assembly> tag
|
371
|
+
return lines[1].include?("<doc>") &&
|
372
|
+
lines[2].include?("<assembly>") &&
|
373
|
+
lines[-2].include?("</doc>")
|
374
|
+
end
|
375
|
+
|
376
|
+
# Public: Should the blob be indexed for searching?
|
377
|
+
#
|
378
|
+
# Excluded:
|
379
|
+
# - Files over 0.1MB
|
380
|
+
# - Non-text files
|
381
|
+
# - Langauges marked as not searchable
|
382
|
+
# - Generated source files
|
383
|
+
#
|
384
|
+
# Please add additional test coverage to
|
385
|
+
# `test/test_blob.rb#test_indexable` if you make any changes.
|
386
|
+
#
|
387
|
+
# Return true or false
|
388
|
+
def indexable?
|
389
|
+
if binary?
|
390
|
+
false
|
391
|
+
elsif language.nil?
|
392
|
+
false
|
393
|
+
elsif !language.searchable?
|
394
|
+
false
|
395
|
+
elsif generated?
|
396
|
+
false
|
397
|
+
elsif size > 100 * 1024
|
398
|
+
false
|
399
|
+
else
|
400
|
+
true
|
401
|
+
end
|
402
|
+
end
|
403
|
+
|
404
|
+
# Public: Detects the Language of the blob.
|
405
|
+
#
|
406
|
+
# May load Blob#data
|
407
|
+
#
|
408
|
+
# Returns a Language or nil if none is detected
|
409
|
+
def language
|
410
|
+
if defined? @language
|
411
|
+
@language
|
412
|
+
else
|
413
|
+
@language = guess_language
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
# Internal: Guess language
|
418
|
+
#
|
419
|
+
# Please add additional test coverage to
|
420
|
+
# `test/test_blob.rb#test_language` if you make any changes.
|
421
|
+
#
|
422
|
+
# Returns a Language or nil
|
423
|
+
def guess_language
|
424
|
+
return if binary_mime_type?
|
425
|
+
|
426
|
+
# Disambiguate between multiple language extensions
|
427
|
+
disambiguate_extension_language ||
|
428
|
+
|
429
|
+
# See if there is a Language for the extension
|
430
|
+
pathname.language ||
|
431
|
+
|
432
|
+
# Look for idioms in first line
|
433
|
+
first_line_language ||
|
434
|
+
|
435
|
+
# Try to detect Language from shebang line
|
436
|
+
shebang_language
|
437
|
+
end
|
438
|
+
|
439
|
+
# Internal: Get the lexer of the blob.
|
440
|
+
#
|
441
|
+
# Returns a Lexer.
|
442
|
+
def lexer
|
443
|
+
language ? language.lexer : Pygments::Lexer.find_by_name('Text only')
|
444
|
+
end
|
445
|
+
|
446
|
+
# Internal: Disambiguates between multiple language extensions.
|
447
|
+
#
|
448
|
+
# Delegates to "guess_EXTENSION_language".
|
449
|
+
#
|
450
|
+
# Please add additional test coverage to
|
451
|
+
# `test/test_blob.rb#test_language` if you add another method.
|
452
|
+
#
|
453
|
+
# Returns a Language or nil.
|
454
|
+
def disambiguate_extension_language
|
455
|
+
if Language.ambiguous?(extname)
|
456
|
+
name = "guess_#{extname.sub(/^\./, '')}_language"
|
457
|
+
send(name) if respond_to?(name)
|
458
|
+
end
|
459
|
+
end
|
460
|
+
|
461
|
+
# Internal: Guess language of .cls files
|
462
|
+
#
|
463
|
+
# Returns a Language.
|
464
|
+
def guess_cls_language
|
465
|
+
if lines.grep(/^(%|\\)/).any?
|
466
|
+
Language['TeX']
|
467
|
+
elsif lines.grep(/^\s*(CLASS|METHOD|INTERFACE).*:\s*/i).any? || lines.grep(/^\s*(USING|DEFINE)/i).any?
|
468
|
+
Language['OpenEdge ABL']
|
469
|
+
elsif lines.grep(/\{$/).any? || lines.grep(/\}$/).any?
|
470
|
+
Language['Apex']
|
471
|
+
elsif lines.grep(/^(\'\*|Attribute|Option|Sub|Private|Protected|Public|Friend)/i).any?
|
472
|
+
Language['Visual Basic']
|
473
|
+
else
|
474
|
+
# The most common language should be the fallback
|
475
|
+
Language['TeX']
|
476
|
+
end
|
477
|
+
end
|
478
|
+
|
479
|
+
# Internal: Guess language of header files (.h).
|
480
|
+
#
|
481
|
+
# Returns a Language.
|
482
|
+
def guess_h_language
|
483
|
+
if lines.grep(/^@(interface|property|private|public|end)/).any?
|
484
|
+
Language['Objective-C']
|
485
|
+
elsif lines.grep(/^class |^\s+(public|protected|private):/).any?
|
486
|
+
Language['C++']
|
487
|
+
else
|
488
|
+
Language['C']
|
489
|
+
end
|
490
|
+
end
|
491
|
+
|
492
|
+
# Internal: Guess language of .m files.
|
493
|
+
#
|
494
|
+
# Objective-C heuristics:
|
495
|
+
# * Keywords ("#import", "#include", "#ifdef", #define, "@end") or "//" and opening "\*" comments
|
496
|
+
#
|
497
|
+
# Matlab heuristics:
|
498
|
+
# * Leading "function " of "classdef " keyword
|
499
|
+
# * "%" comments
|
500
|
+
#
|
501
|
+
# Note: All "#" keywords, e.g., "#import", are guaranteed to be Objective-C. Because the ampersand
|
502
|
+
# is used to created function handles and anonymous functions in Matlab, most "@" keywords are not
|
503
|
+
# safe heuristics. However, "end" is a reserved term in Matlab and can't be used to create a valid
|
504
|
+
# function handle. Because @end is required to close any @implementation, @property, @interface,
|
505
|
+
# @synthesize, etc. directive in Objective-C, only @end needs to be checked for.
|
506
|
+
#
|
507
|
+
# Returns a Language.
|
508
|
+
def guess_m_language
|
509
|
+
# Objective-C keywords or comments
|
510
|
+
if lines.grep(/^#(import|include|ifdef|define)|@end/).any? || lines.grep(/^\s*\/\//).any? || lines.grep(/^\s*\/\*/).any?
|
511
|
+
Language['Objective-C']
|
512
|
+
|
513
|
+
# Matlab file function or class or comments
|
514
|
+
elsif lines.any? && lines.first.match(/^\s*(function |classdef )/) || lines.grep(/^\s*%/).any?
|
515
|
+
Language['Matlab']
|
516
|
+
|
517
|
+
# Fallback to Objective-C, don't want any Matlab false positives
|
518
|
+
else
|
519
|
+
Language['Objective-C']
|
520
|
+
end
|
521
|
+
end
|
522
|
+
|
523
|
+
# Internal: Guess language of .pl files
|
524
|
+
#
|
525
|
+
# The rules for disambiguation are:
|
526
|
+
#
|
527
|
+
# 1. Many perl files begin with a shebang
|
528
|
+
# 2. Most Prolog source files have a rule somewhere (marked by the :- operator)
|
529
|
+
# 3. Default to Perl, because it is more popular
|
530
|
+
#
|
531
|
+
# Returns a Language.
|
532
|
+
def guess_pl_language
|
533
|
+
if shebang_script == 'perl'
|
534
|
+
Language['Perl']
|
535
|
+
elsif lines.grep(/:-/).any?
|
536
|
+
Language['Prolog']
|
537
|
+
else
|
538
|
+
Language['Perl']
|
539
|
+
end
|
540
|
+
end
|
541
|
+
|
542
|
+
# Internal: Guess language of .r files.
|
543
|
+
#
|
544
|
+
# Returns a Language.
|
545
|
+
def guess_r_language
|
546
|
+
if lines.grep(/(rebol|(:\s+func|make\s+object!|^\s*context)\s*\[)/i).any?
|
547
|
+
Language['Rebol']
|
548
|
+
else
|
549
|
+
Language['R']
|
550
|
+
end
|
551
|
+
end
|
552
|
+
|
553
|
+
# Internal: Guess language of .t files.
|
554
|
+
#
|
555
|
+
# Returns a Language.
|
556
|
+
def guess_t_language
|
557
|
+
score = 0
|
558
|
+
score += 1 if lines.grep(/^% /).any?
|
559
|
+
score += data.gsub(/ := /).count
|
560
|
+
score += data.gsub(/proc |procedure |fcn |function /).count
|
561
|
+
score += data.gsub(/var \w+: \w+/).count
|
562
|
+
|
563
|
+
# Tell-tale signs its gotta be Perl
|
564
|
+
if lines.grep(/^(my )?(sub |\$|@|%)\w+/).any?
|
565
|
+
score = 0
|
566
|
+
end
|
567
|
+
|
568
|
+
if score >= 3
|
569
|
+
Language['Turing']
|
570
|
+
else
|
571
|
+
Language['Perl']
|
572
|
+
end
|
573
|
+
end
|
574
|
+
|
575
|
+
# Internal: Guess language of .v files.
|
576
|
+
#
|
577
|
+
# Returns a Language
|
578
|
+
def guess_v_language
|
579
|
+
if lines.grep(/^(\/\*|\/\/|module|parameter|input|output|wire|reg|always|initial|begin|\`)/).any?
|
580
|
+
Language['Verilog']
|
581
|
+
else
|
582
|
+
Language['Coq']
|
583
|
+
end
|
584
|
+
end
|
585
|
+
|
586
|
+
# Internal: Guess language of .gsp files.
|
587
|
+
#
|
588
|
+
# Returns a Language.
|
589
|
+
def guess_gsp_language
|
590
|
+
if lines.grep(/<%|<%@|\$\{|<%|<g:|<meta name="layout"|<r:/).any?
|
591
|
+
Language['Groovy Server Pages']
|
592
|
+
else
|
593
|
+
Language['Gosu']
|
594
|
+
end
|
595
|
+
end
|
596
|
+
|
597
|
+
# Internal: Guess language from the first line.
|
598
|
+
#
|
599
|
+
# Look for leading "<?php" in Drupal files
|
600
|
+
#
|
601
|
+
# Returns a Language.
|
602
|
+
def first_line_language
|
603
|
+
# Only check files with drupal php extensions
|
604
|
+
return unless drupal_extname?
|
605
|
+
|
606
|
+
# Fail fast if blob isn't viewable?
|
607
|
+
return unless viewable?
|
608
|
+
|
609
|
+
if lines.first.to_s =~ /^<\?php/
|
610
|
+
Language['PHP']
|
611
|
+
end
|
612
|
+
end
|
613
|
+
|
614
|
+
# Internal: Extract the script name from the shebang line
|
615
|
+
#
|
616
|
+
# Requires Blob#data
|
617
|
+
#
|
618
|
+
# Examples
|
619
|
+
#
|
620
|
+
# '#!/usr/bin/ruby'
|
621
|
+
# # => 'ruby'
|
622
|
+
#
|
623
|
+
# '#!/usr/bin/env ruby'
|
624
|
+
# # => 'ruby'
|
625
|
+
#
|
626
|
+
# '#!/usr/bash/python2.4'
|
627
|
+
# # => 'python'
|
628
|
+
#
|
629
|
+
# Please add additional test coverage to
|
630
|
+
# `test/test_blob.rb#test_shebang_script` if you make any changes.
|
631
|
+
#
|
632
|
+
# Returns a script name String or nil
|
633
|
+
def shebang_script
|
634
|
+
# Fail fast if blob isn't viewable?
|
635
|
+
return unless viewable?
|
636
|
+
|
637
|
+
if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
|
638
|
+
bang.sub!(/^#! /, '#!')
|
639
|
+
tokens = bang.split(' ')
|
640
|
+
pieces = tokens.first.split('/')
|
641
|
+
if pieces.size > 1
|
642
|
+
script = pieces.last
|
643
|
+
else
|
644
|
+
script = pieces.first.sub('#!', '')
|
645
|
+
end
|
646
|
+
|
647
|
+
script = script == 'env' ? tokens[1] : script
|
648
|
+
|
649
|
+
# python2.4 => python
|
650
|
+
if script =~ /((?:\d+\.?)+)/
|
651
|
+
script.sub! $1, ''
|
652
|
+
end
|
653
|
+
|
654
|
+
# Check for multiline shebang hacks that exec themselves
|
655
|
+
#
|
656
|
+
# #!/bin/sh
|
657
|
+
# exec foo "$0" "$@"
|
658
|
+
#
|
659
|
+
if script == 'sh' &&
|
660
|
+
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
661
|
+
script = $1
|
662
|
+
end
|
663
|
+
|
664
|
+
script
|
665
|
+
end
|
666
|
+
end
|
667
|
+
|
668
|
+
# Internal: Get Language for shebang script
|
669
|
+
#
|
670
|
+
# Returns the Language or nil
|
671
|
+
def shebang_language
|
672
|
+
# Skip file extensions unlikely to have shebangs
|
673
|
+
return unless shebang_extname?
|
674
|
+
|
675
|
+
if script = shebang_script
|
676
|
+
Language[script]
|
677
|
+
end
|
678
|
+
end
|
679
|
+
|
680
|
+
# Public: Highlight syntax of blob
|
681
|
+
#
|
682
|
+
# options - A Hash of options (defaults to {})
|
683
|
+
#
|
684
|
+
# Returns html String
|
685
|
+
def colorize(options = {})
|
686
|
+
return unless safe_to_colorize?
|
687
|
+
options[:options] ||= {}
|
688
|
+
options[:options][:encoding] ||= encoding
|
689
|
+
lexer.highlight(data, options)
|
690
|
+
end
|
691
|
+
|
692
|
+
# Public: Highlight syntax of blob without the outer highlight div
|
693
|
+
# wrapper.
|
694
|
+
#
|
695
|
+
# options - A Hash of options (defaults to {})
|
696
|
+
#
|
697
|
+
# Returns html String
|
698
|
+
def colorize_without_wrapper(options = {})
|
699
|
+
if text = colorize(options)
|
700
|
+
text[%r{<div class="highlight"><pre>(.*?)</pre>\s*</div>}m, 1]
|
701
|
+
else
|
702
|
+
''
|
703
|
+
end
|
704
|
+
end
|
705
|
+
|
706
|
+
Language.overridden_extensions.each do |extension|
|
707
|
+
name = "guess_#{extension.sub(/^\./, '')}_language".to_sym
|
708
|
+
unless instance_methods.map(&:to_sym).include?(name)
|
709
|
+
raise NotImplementedError, "Language##{name} was not defined"
|
710
|
+
end
|
711
|
+
end
|
712
|
+
end
|
713
|
+
end
|