github-linguist 4.5.4 → 4.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/linguist/generated.rb +26 -0
- data/lib/linguist/heuristics.rb +10 -8
- data/lib/linguist/language.rb +1 -1
- data/lib/linguist/languages.json +1 -1
- data/lib/linguist/languages.yml +176 -71
- data/lib/linguist/samples.json +9299 -1454
- data/lib/linguist/samples.rb +2 -5
- data/lib/linguist/shebang.rb +13 -7
- data/lib/linguist/strategy/filename.rb +1 -11
- data/lib/linguist/strategy/modeline.rb +1 -1
- data/lib/linguist/tokenizer.rb +6 -0
- data/lib/linguist/version.rb +1 -1
- metadata +16 -2
data/lib/linguist/samples.rb
CHANGED
@@ -50,16 +50,13 @@ module Linguist
|
|
50
50
|
end
|
51
51
|
else
|
52
52
|
path = File.join(dirname, filename)
|
53
|
-
|
54
|
-
if File.extname(filename) == ""
|
55
|
-
raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
|
56
|
-
end
|
53
|
+
extname = File.extname(filename)
|
57
54
|
|
58
55
|
yield({
|
59
56
|
:path => path,
|
60
57
|
:language => category,
|
61
58
|
:interpreter => Shebang.interpreter(File.read(path)),
|
62
|
-
:extname =>
|
59
|
+
:extname => extname.empty? ? nil : extname
|
63
60
|
})
|
64
61
|
end
|
65
62
|
end
|
data/lib/linguist/shebang.rb
CHANGED
@@ -23,17 +23,20 @@ module Linguist
|
|
23
23
|
# First line must start with #!
|
24
24
|
return unless shebang && shebang.start_with?("#!")
|
25
25
|
|
26
|
-
|
27
|
-
tokens = shebang.sub(/^#!\s*/, '').strip.split(' ')
|
26
|
+
s = StringScanner.new(shebang)
|
28
27
|
|
29
28
|
# There was nothing after the #!
|
30
|
-
return
|
29
|
+
return unless path = s.scan(/^#!\s*\S+/)
|
31
30
|
|
32
|
-
#
|
33
|
-
script =
|
31
|
+
# Keep going
|
32
|
+
script = path.split('/').last
|
34
33
|
|
35
|
-
#
|
36
|
-
|
34
|
+
# if /usr/bin/env type shebang then walk the string
|
35
|
+
if script == 'env'
|
36
|
+
s.scan(/\s+/)
|
37
|
+
s.scan(/.*=[^\s]+\s+/) # skip over variable arguments e.g. foo=bar
|
38
|
+
script = s.scan(/\S+/)
|
39
|
+
end
|
37
40
|
|
38
41
|
# Interpreter was /usr/bin/env with no arguments
|
39
42
|
return unless script
|
@@ -41,6 +44,9 @@ module Linguist
|
|
41
44
|
# "python2.6" -> "python2"
|
42
45
|
script.sub! /(\.\d+)$/, ''
|
43
46
|
|
47
|
+
# #! perl -> perl
|
48
|
+
script.sub! /^#!\s*/, ''
|
49
|
+
|
44
50
|
# Check for multiline shebang hacks that call `exec`
|
45
51
|
if script == 'sh' &&
|
46
52
|
data.lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
@@ -3,17 +3,7 @@ module Linguist
|
|
3
3
|
# Detects language based on filename and/or extension
|
4
4
|
class Filename
|
5
5
|
def self.call(blob, _)
|
6
|
-
|
7
|
-
|
8
|
-
# A bit of an elegant hack. If the file is executable but extensionless,
|
9
|
-
# append a "magic" extension so it can be classified with other
|
10
|
-
# languages that have shebang scripts.
|
11
|
-
extensions = FileBlob.new(name).extensions
|
12
|
-
if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
|
13
|
-
name += ".script!"
|
14
|
-
end
|
15
|
-
|
16
|
-
Language.find_by_filename(name)
|
6
|
+
Language.find_by_filename(blob.name.to_s)
|
17
7
|
end
|
18
8
|
end
|
19
9
|
end
|
@@ -2,7 +2,7 @@ module Linguist
|
|
2
2
|
module Strategy
|
3
3
|
class Modeline
|
4
4
|
EmacsModeline = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
|
5
|
-
VimModeline =
|
5
|
+
VimModeline = /vim:\s*set\s*(?:ft|filetype)=(\w+):/i
|
6
6
|
|
7
7
|
# Public: Detects language based on Vim and Emacs modelines
|
8
8
|
#
|
data/lib/linguist/tokenizer.rb
CHANGED
@@ -22,8 +22,10 @@ module Linguist
|
|
22
22
|
# Start state on token, ignore anything till the next newline
|
23
23
|
SINGLE_LINE_COMMENTS = [
|
24
24
|
'//', # C
|
25
|
+
'--', # Ada, Haskell, AppleScript
|
25
26
|
'#', # Ruby
|
26
27
|
'%', # Tex
|
28
|
+
'"', # Vim
|
27
29
|
]
|
28
30
|
|
29
31
|
# Start state on opening token, ignore anything until the closing
|
@@ -130,6 +132,9 @@ module Linguist
|
|
130
132
|
# extract_shebang("#!/usr/bin/env node")
|
131
133
|
# # => "node"
|
132
134
|
#
|
135
|
+
# extract_shebang("#!/usr/bin/env A=B foo=bar awk -f")
|
136
|
+
# # => "awk"
|
137
|
+
#
|
133
138
|
# Returns String token or nil it couldn't be parsed.
|
134
139
|
def extract_shebang(data)
|
135
140
|
s = StringScanner.new(data)
|
@@ -138,6 +143,7 @@ module Linguist
|
|
138
143
|
script = path.split('/').last
|
139
144
|
if script == 'env'
|
140
145
|
s.scan(/\s+/)
|
146
|
+
s.scan(/.*=[^\s]+\s+/)
|
141
147
|
script = s.scan(/\S+/)
|
142
148
|
end
|
143
149
|
script = script[/[^\d]+/, 0] if script
|
data/lib/linguist/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: github-linguist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.5.
|
4
|
+
version: 4.5.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- GitHub
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-05-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: charlock_holmes
|
@@ -136,6 +136,20 @@ dependencies:
|
|
136
136
|
- - ">="
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: color-proximity
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: 0.2.1
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: 0.2.1
|
139
153
|
description: We use this library at GitHub to detect blob languages, highlight code,
|
140
154
|
ignore binary files, suppress generated files in diffs, and generate language breakdown
|
141
155
|
graphs.
|