github-linguist 4.5.4 → 4.5.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/linguist/generated.rb +26 -0
- data/lib/linguist/heuristics.rb +10 -8
- data/lib/linguist/language.rb +1 -1
- data/lib/linguist/languages.json +1 -1
- data/lib/linguist/languages.yml +176 -71
- data/lib/linguist/samples.json +9299 -1454
- data/lib/linguist/samples.rb +2 -5
- data/lib/linguist/shebang.rb +13 -7
- data/lib/linguist/strategy/filename.rb +1 -11
- data/lib/linguist/strategy/modeline.rb +1 -1
- data/lib/linguist/tokenizer.rb +6 -0
- data/lib/linguist/version.rb +1 -1
- metadata +16 -2
data/lib/linguist/samples.rb
CHANGED
@@ -50,16 +50,13 @@ module Linguist
|
|
50
50
|
end
|
51
51
|
else
|
52
52
|
path = File.join(dirname, filename)
|
53
|
-
|
54
|
-
if File.extname(filename) == ""
|
55
|
-
raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
|
56
|
-
end
|
53
|
+
extname = File.extname(filename)
|
57
54
|
|
58
55
|
yield({
|
59
56
|
:path => path,
|
60
57
|
:language => category,
|
61
58
|
:interpreter => Shebang.interpreter(File.read(path)),
|
62
|
-
:extname =>
|
59
|
+
:extname => extname.empty? ? nil : extname
|
63
60
|
})
|
64
61
|
end
|
65
62
|
end
|
data/lib/linguist/shebang.rb
CHANGED
@@ -23,17 +23,20 @@ module Linguist
|
|
23
23
|
# First line must start with #!
|
24
24
|
return unless shebang && shebang.start_with?("#!")
|
25
25
|
|
26
|
-
|
27
|
-
tokens = shebang.sub(/^#!\s*/, '').strip.split(' ')
|
26
|
+
s = StringScanner.new(shebang)
|
28
27
|
|
29
28
|
# There was nothing after the #!
|
30
|
-
return
|
29
|
+
return unless path = s.scan(/^#!\s*\S+/)
|
31
30
|
|
32
|
-
#
|
33
|
-
script =
|
31
|
+
# Keep going
|
32
|
+
script = path.split('/').last
|
34
33
|
|
35
|
-
#
|
36
|
-
|
34
|
+
# if /usr/bin/env type shebang then walk the string
|
35
|
+
if script == 'env'
|
36
|
+
s.scan(/\s+/)
|
37
|
+
s.scan(/.*=[^\s]+\s+/) # skip over variable arguments e.g. foo=bar
|
38
|
+
script = s.scan(/\S+/)
|
39
|
+
end
|
37
40
|
|
38
41
|
# Interpreter was /usr/bin/env with no arguments
|
39
42
|
return unless script
|
@@ -41,6 +44,9 @@ module Linguist
|
|
41
44
|
# "python2.6" -> "python2"
|
42
45
|
script.sub! /(\.\d+)$/, ''
|
43
46
|
|
47
|
+
# #! perl -> perl
|
48
|
+
script.sub! /^#!\s*/, ''
|
49
|
+
|
44
50
|
# Check for multiline shebang hacks that call `exec`
|
45
51
|
if script == 'sh' &&
|
46
52
|
data.lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
@@ -3,17 +3,7 @@ module Linguist
|
|
3
3
|
# Detects language based on filename and/or extension
|
4
4
|
class Filename
|
5
5
|
def self.call(blob, _)
|
6
|
-
|
7
|
-
|
8
|
-
# A bit of an elegant hack. If the file is executable but extensionless,
|
9
|
-
# append a "magic" extension so it can be classified with other
|
10
|
-
# languages that have shebang scripts.
|
11
|
-
extensions = FileBlob.new(name).extensions
|
12
|
-
if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
|
13
|
-
name += ".script!"
|
14
|
-
end
|
15
|
-
|
16
|
-
Language.find_by_filename(name)
|
6
|
+
Language.find_by_filename(blob.name.to_s)
|
17
7
|
end
|
18
8
|
end
|
19
9
|
end
|
@@ -2,7 +2,7 @@ module Linguist
|
|
2
2
|
module Strategy
|
3
3
|
class Modeline
|
4
4
|
EmacsModeline = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
|
5
|
-
VimModeline =
|
5
|
+
VimModeline = /vim:\s*set\s*(?:ft|filetype)=(\w+):/i
|
6
6
|
|
7
7
|
# Public: Detects language based on Vim and Emacs modelines
|
8
8
|
#
|
data/lib/linguist/tokenizer.rb
CHANGED
@@ -22,8 +22,10 @@ module Linguist
|
|
22
22
|
# Start state on token, ignore anything till the next newline
|
23
23
|
SINGLE_LINE_COMMENTS = [
|
24
24
|
'//', # C
|
25
|
+
'--', # Ada, Haskell, AppleScript
|
25
26
|
'#', # Ruby
|
26
27
|
'%', # Tex
|
28
|
+
'"', # Vim
|
27
29
|
]
|
28
30
|
|
29
31
|
# Start state on opening token, ignore anything until the closing
|
@@ -130,6 +132,9 @@ module Linguist
|
|
130
132
|
# extract_shebang("#!/usr/bin/env node")
|
131
133
|
# # => "node"
|
132
134
|
#
|
135
|
+
# extract_shebang("#!/usr/bin/env A=B foo=bar awk -f")
|
136
|
+
# # => "awk"
|
137
|
+
#
|
133
138
|
# Returns String token or nil it couldn't be parsed.
|
134
139
|
def extract_shebang(data)
|
135
140
|
s = StringScanner.new(data)
|
@@ -138,6 +143,7 @@ module Linguist
|
|
138
143
|
script = path.split('/').last
|
139
144
|
if script == 'env'
|
140
145
|
s.scan(/\s+/)
|
146
|
+
s.scan(/.*=[^\s]+\s+/)
|
141
147
|
script = s.scan(/\S+/)
|
142
148
|
end
|
143
149
|
script = script[/[^\d]+/, 0] if script
|
data/lib/linguist/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: github-linguist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.5.
|
4
|
+
version: 4.5.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- GitHub
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: charlock_holmes
|
@@ -136,6 +136,20 @@ dependencies:
|
|
136
136
|
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: color-proximity
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: 0.2.1
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: 0.2.1
|
139
153
|
description: We use this library at GitHub to detect blob languages, highlight code,
|
140
154
|
ignore binary files, suppress generated files in diffs, and generate language breakdown
|
141
155
|
graphs.
|