github-linguist 4.0.3 → 4.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/linguist.rb +1 -0
- data/lib/linguist/classifier.rb +19 -0
- data/lib/linguist/file_blob.rb +14 -8
- data/lib/linguist/heuristics.rb +112 -110
- data/lib/linguist/language.rb +39 -46
- data/lib/linguist/languages.json +1 -1
- data/lib/linguist/languages.yml +77 -6
- data/lib/linguist/samples.json +3292 -454
- data/lib/linguist/samples.rb +6 -39
- data/lib/linguist/shebang.rb +44 -0
- data/lib/linguist/strategy/filename.rb +20 -0
- data/lib/linguist/vendor.yml +0 -3
- data/lib/linguist/version.rb +1 -1
- metadata +6 -4
data/lib/linguist/samples.rb
CHANGED
@@ -6,6 +6,7 @@ end
|
|
6
6
|
|
7
7
|
require 'linguist/md5'
|
8
8
|
require 'linguist/classifier'
|
9
|
+
require 'linguist/shebang'
|
9
10
|
|
10
11
|
module Linguist
|
11
12
|
# Model for accessing classifier training data.
|
@@ -52,14 +53,16 @@ module Linguist
|
|
52
53
|
})
|
53
54
|
end
|
54
55
|
else
|
56
|
+
path = File.join(dirname, filename)
|
57
|
+
|
55
58
|
if File.extname(filename) == ""
|
56
|
-
raise "#{
|
59
|
+
raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
|
57
60
|
end
|
58
61
|
|
59
62
|
yield({
|
60
|
-
:path =>
|
63
|
+
:path => path,
|
61
64
|
:language => category,
|
62
|
-
:interpreter =>
|
65
|
+
:interpreter => Shebang.interpreter(File.read(path)),
|
63
66
|
:extname => File.extname(filename)
|
64
67
|
})
|
65
68
|
end
|
@@ -112,40 +115,4 @@ module Linguist
|
|
112
115
|
db
|
113
116
|
end
|
114
117
|
end
|
115
|
-
|
116
|
-
# Used to retrieve the interpreter from the shebang line of a file's
|
117
|
-
# data.
|
118
|
-
def self.interpreter_from_shebang(data)
|
119
|
-
lines = data.lines.to_a
|
120
|
-
|
121
|
-
if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
|
122
|
-
bang.sub!(/^#! /, '#!')
|
123
|
-
tokens = bang.split(' ')
|
124
|
-
pieces = tokens.first.split('/')
|
125
|
-
|
126
|
-
if pieces.size > 1
|
127
|
-
script = pieces.last
|
128
|
-
else
|
129
|
-
script = pieces.first.sub('#!', '')
|
130
|
-
end
|
131
|
-
|
132
|
-
script = script == 'env' ? tokens[1] : script
|
133
|
-
|
134
|
-
# "python2.6" -> "python"
|
135
|
-
if script =~ /((?:\d+\.?)+)/
|
136
|
-
script.sub! $1, ''
|
137
|
-
end
|
138
|
-
|
139
|
-
# Check for multiline shebang hacks that call `exec`
|
140
|
-
if script == 'sh' &&
|
141
|
-
lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
142
|
-
script = $1
|
143
|
-
end
|
144
|
-
|
145
|
-
script
|
146
|
-
else
|
147
|
-
nil
|
148
|
-
end
|
149
|
-
end
|
150
|
-
|
151
118
|
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Linguist
|
2
|
+
class Shebang
|
3
|
+
# Public: Use shebang to detect language of the blob.
|
4
|
+
#
|
5
|
+
# blob - An object that quacks like a blob.
|
6
|
+
#
|
7
|
+
# Examples
|
8
|
+
#
|
9
|
+
# Shebang.call(FileBlob.new("path/to/file"))
|
10
|
+
#
|
11
|
+
# Returns an Array with one Language if the blob has a shebang with a valid
|
12
|
+
# interpreter, or empty if there is no shebang.
|
13
|
+
def self.call(blob, _ = nil)
|
14
|
+
Language.find_by_interpreter interpreter(blob.data)
|
15
|
+
end
|
16
|
+
|
17
|
+
# Public: Get the interpreter from the shebang
|
18
|
+
#
|
19
|
+
# Returns a String or nil
|
20
|
+
def self.interpreter(data)
|
21
|
+
lines = data.lines
|
22
|
+
return unless match = /^#! ?(.*)$/.match(lines.first)
|
23
|
+
|
24
|
+
tokens = match[1].split(' ')
|
25
|
+
script = tokens.first.split('/').last
|
26
|
+
|
27
|
+
script = tokens[1] if script == 'env'
|
28
|
+
|
29
|
+
# If script has an invalid shebang, we might get here
|
30
|
+
return unless script
|
31
|
+
|
32
|
+
# "python2.6" -> "python2"
|
33
|
+
script.sub! $1, '' if script =~ /(\.\d+)$/
|
34
|
+
|
35
|
+
# Check for multiline shebang hacks that call `exec`
|
36
|
+
if script == 'sh' &&
|
37
|
+
lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
|
38
|
+
script = $1
|
39
|
+
end
|
40
|
+
|
41
|
+
File.basename(script)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Linguist
|
2
|
+
module Strategy
|
3
|
+
# Detects language based on filename and/or extension
|
4
|
+
class Filename
|
5
|
+
def self.call(blob, _)
|
6
|
+
name = blob.name.to_s
|
7
|
+
|
8
|
+
# A bit of an elegant hack. If the file is executable but extensionless,
|
9
|
+
# append a "magic" extension so it can be classified with other
|
10
|
+
# languages that have shebang scripts.
|
11
|
+
extensions = FileBlob.new(name).extensions
|
12
|
+
if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
|
13
|
+
name += ".script!"
|
14
|
+
end
|
15
|
+
|
16
|
+
Language.find_by_filename(name)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
data/lib/linguist/vendor.yml
CHANGED
data/lib/linguist/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: github-linguist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.0
|
4
|
+
version: 4.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- GitHub
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-12-02 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: charlock_holmes
|
@@ -58,14 +58,14 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: 0.22.
|
61
|
+
version: 0.22.0b4
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: 0.22.
|
68
|
+
version: 0.22.0b4
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: mocha
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
@@ -147,6 +147,8 @@ files:
|
|
147
147
|
- lib/linguist/repository.rb
|
148
148
|
- lib/linguist/samples.json
|
149
149
|
- lib/linguist/samples.rb
|
150
|
+
- lib/linguist/shebang.rb
|
151
|
+
- lib/linguist/strategy/filename.rb
|
150
152
|
- lib/linguist/tokenizer.rb
|
151
153
|
- lib/linguist/vendor.yml
|
152
154
|
- lib/linguist/version.rb
|