github-linguist 4.0.3 → 4.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/linguist.rb +1 -0
- data/lib/linguist/classifier.rb +19 -0
- data/lib/linguist/file_blob.rb +14 -8
- data/lib/linguist/heuristics.rb +112 -110
- data/lib/linguist/language.rb +39 -46
- data/lib/linguist/languages.json +1 -1
- data/lib/linguist/languages.yml +77 -6
- data/lib/linguist/samples.json +3292 -454
- data/lib/linguist/samples.rb +6 -39
- data/lib/linguist/shebang.rb +44 -0
- data/lib/linguist/strategy/filename.rb +20 -0
- data/lib/linguist/vendor.yml +0 -3
- data/lib/linguist/version.rb +1 -1
- metadata +6 -4
data/lib/linguist/samples.rb
CHANGED
@@ -6,6 +6,7 @@ end
|
|
6
6
|
|
7
7
|
require 'linguist/md5'
|
8
8
|
require 'linguist/classifier'
|
9
|
+
require 'linguist/shebang'
|
9
10
|
|
10
11
|
module Linguist
|
11
12
|
# Model for accessing classifier training data.
|
@@ -52,14 +53,16 @@ module Linguist
|
|
52
53
|
})
|
53
54
|
end
|
54
55
|
else
|
56
|
+
path = File.join(dirname, filename)
|
57
|
+
|
55
58
|
if File.extname(filename) == ""
|
56
|
-
raise "#{
|
59
|
+
raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
|
57
60
|
end
|
58
61
|
|
59
62
|
yield({
|
60
|
-
:path =>
|
63
|
+
:path => path,
|
61
64
|
:language => category,
|
62
|
-
:interpreter =>
|
65
|
+
:interpreter => Shebang.interpreter(File.read(path)),
|
63
66
|
:extname => File.extname(filename)
|
64
67
|
})
|
65
68
|
end
|
@@ -112,40 +115,4 @@ module Linguist
|
|
112
115
|
db
|
113
116
|
end
|
114
117
|
end
|
115
|
-
|
116
|
-
# Used to retrieve the interpreter from the shebang line of a file's
|
117
|
-
# data.
|
118
|
-
def self.interpreter_from_shebang(data)
|
119
|
-
lines = data.lines.to_a
|
120
|
-
|
121
|
-
if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
|
122
|
-
bang.sub!(/^#! /, '#!')
|
123
|
-
tokens = bang.split(' ')
|
124
|
-
pieces = tokens.first.split('/')
|
125
|
-
|
126
|
-
if pieces.size > 1
|
127
|
-
script = pieces.last
|
128
|
-
else
|
129
|
-
script = pieces.first.sub('#!', '')
|
130
|
-
end
|
131
|
-
|
132
|
-
script = script == 'env' ? tokens[1] : script
|
133
|
-
|
134
|
-
# "python2.6" -> "python"
|
135
|
-
if script =~ /((?:\d+\.?)+)/
|
136
|
-
script.sub! $1, ''
|
137
|
-
end
|
138
|
-
|
139
|
-
# Check for multiline shebang hacks that call `exec`
|
140
|
-
if script == 'sh' &&
|
141
|
-
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
142
|
-
script = $1
|
143
|
-
end
|
144
|
-
|
145
|
-
script
|
146
|
-
else
|
147
|
-
nil
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
118
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Linguist
|
2
|
+
class Shebang
|
3
|
+
# Public: Use shebang to detect language of the blob.
|
4
|
+
#
|
5
|
+
# blob - An object that quacks like a blob.
|
6
|
+
#
|
7
|
+
# Examples
|
8
|
+
#
|
9
|
+
# Shebang.call(FileBlob.new("path/to/file"))
|
10
|
+
#
|
11
|
+
# Returns an Array with one Language if the blob has a shebang with a valid
|
12
|
+
# interpreter, or empty if there is no shebang.
|
13
|
+
def self.call(blob, _ = nil)
|
14
|
+
Language.find_by_interpreter interpreter(blob.data)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Public: Get the interpreter from the shebang
|
18
|
+
#
|
19
|
+
# Returns a String or nil
|
20
|
+
def self.interpreter(data)
|
21
|
+
lines = data.lines
|
22
|
+
return unless match = /^#! ?(.*)$/.match(lines.first)
|
23
|
+
|
24
|
+
tokens = match[1].split(' ')
|
25
|
+
script = tokens.first.split('/').last
|
26
|
+
|
27
|
+
script = tokens[1] if script == 'env'
|
28
|
+
|
29
|
+
# If script has an invalid shebang, we might get here
|
30
|
+
return unless script
|
31
|
+
|
32
|
+
# "python2.6" -> "python2"
|
33
|
+
script.sub! $1, '' if script =~ /(\.\d+)$/
|
34
|
+
|
35
|
+
# Check for multiline shebang hacks that call `exec`
|
36
|
+
if script == 'sh' &&
|
37
|
+
lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
38
|
+
script = $1
|
39
|
+
end
|
40
|
+
|
41
|
+
File.basename(script)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Linguist
|
2
|
+
module Strategy
|
3
|
+
# Detects language based on filename and/or extension
|
4
|
+
class Filename
|
5
|
+
def self.call(blob, _)
|
6
|
+
name = blob.name.to_s
|
7
|
+
|
8
|
+
# A bit of an elegant hack. If the file is executable but extensionless,
|
9
|
+
# append a "magic" extension so it can be classified with other
|
10
|
+
# languages that have shebang scripts.
|
11
|
+
extensions = FileBlob.new(name).extensions
|
12
|
+
if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
|
13
|
+
name += ".script!"
|
14
|
+
end
|
15
|
+
|
16
|
+
Language.find_by_filename(name)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/linguist/vendor.yml
CHANGED
data/lib/linguist/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: github-linguist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0
|
4
|
+
version: 4.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- GitHub
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: charlock_holmes
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.22.
|
61
|
+
version: 0.22.0b4
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.22.
|
68
|
+
version: 0.22.0b4
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: mocha
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -147,6 +147,8 @@ files:
|
|
147
147
|
- lib/linguist/repository.rb
|
148
148
|
- lib/linguist/samples.json
|
149
149
|
- lib/linguist/samples.rb
|
150
|
+
- lib/linguist/shebang.rb
|
151
|
+
- lib/linguist/strategy/filename.rb
|
150
152
|
- lib/linguist/tokenizer.rb
|
151
153
|
- lib/linguist/vendor.yml
|
152
154
|
- lib/linguist/version.rb
|