github-linguist 4.0.3 → 4.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -6,6 +6,7 @@ end
6
6
 
7
7
  require 'linguist/md5'
8
8
  require 'linguist/classifier'
9
+ require 'linguist/shebang'
9
10
 
10
11
  module Linguist
11
12
  # Model for accessing classifier training data.
@@ -52,14 +53,16 @@ module Linguist
52
53
  })
53
54
  end
54
55
  else
56
+ path = File.join(dirname, filename)
57
+
55
58
  if File.extname(filename) == ""
56
- raise "#{File.join(dirname, filename)} is missing an extension, maybe it belongs in filenames/ subdir"
59
+ raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
57
60
  end
58
61
 
59
62
  yield({
60
- :path => File.join(dirname, filename),
63
+ :path => path,
61
64
  :language => category,
62
- :interpreter => File.exist?(filename) ? Linguist.interpreter_from_shebang(File.read(filename)) : nil,
65
+ :interpreter => Shebang.interpreter(File.read(path)),
63
66
  :extname => File.extname(filename)
64
67
  })
65
68
  end
@@ -112,40 +115,4 @@ module Linguist
112
115
  db
113
116
  end
114
117
  end
115
-
116
- # Used to retrieve the interpreter from the shebang line of a file's
117
- # data.
118
- def self.interpreter_from_shebang(data)
119
- lines = data.lines.to_a
120
-
121
- if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
122
- bang.sub!(/^#! /, '#!')
123
- tokens = bang.split(' ')
124
- pieces = tokens.first.split('/')
125
-
126
- if pieces.size > 1
127
- script = pieces.last
128
- else
129
- script = pieces.first.sub('#!', '')
130
- end
131
-
132
- script = script == 'env' ? tokens[1] : script
133
-
134
- # "python2.6" -> "python"
135
- if script =~ /((?:\d+\.?)+)/
136
- script.sub! $1, ''
137
- end
138
-
139
- # Check for multiline shebang hacks that call `exec`
140
- if script == 'sh' &&
141
- lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
142
- script = $1
143
- end
144
-
145
- script
146
- else
147
- nil
148
- end
149
- end
150
-
151
118
  end
@@ -0,0 +1,44 @@
1
+ module Linguist
2
+ class Shebang
3
+ # Public: Use shebang to detect language of the blob.
4
+ #
5
+ # blob - An object that quacks like a blob.
6
+ #
7
+ # Examples
8
+ #
9
+ # Shebang.call(FileBlob.new("path/to/file"))
10
+ #
11
+ # Returns an Array with one Language if the blob has a shebang with a valid
12
+ # interpreter, or empty if there is no shebang.
13
+ def self.call(blob, _ = nil)
14
+ Language.find_by_interpreter interpreter(blob.data)
15
+ end
16
+
17
+ # Public: Get the interpreter from the shebang
18
+ #
19
+ # Returns a String or nil
20
+ def self.interpreter(data)
21
+ lines = data.lines
22
+ return unless match = /^#! ?(.*)$/.match(lines.first)
23
+
24
+ tokens = match[1].split(' ')
25
+ script = tokens.first.split('/').last
26
+
27
+ script = tokens[1] if script == 'env'
28
+
29
+ # If script has an invalid shebang, we might get here
30
+ return unless script
31
+
32
+ # "python2.6" -> "python2"
33
+ script.sub! $1, '' if script =~ /(\.\d+)$/
34
+
35
+ # Check for multiline shebang hacks that call `exec`
36
+ if script == 'sh' &&
37
+ lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
38
+ script = $1
39
+ end
40
+
41
+ File.basename(script)
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,20 @@
1
+ module Linguist
2
+ module Strategy
3
+ # Detects language based on filename and/or extension
4
+ class Filename
5
+ def self.call(blob, _)
6
+ name = blob.name.to_s
7
+
8
+ # A bit of an elegant hack. If the file is executable but extensionless,
9
+ # append a "magic" extension so it can be classified with other
10
+ # languages that have shebang scripts.
11
+ extensions = FileBlob.new(name).extensions
12
+ if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
13
+ name += ".script!"
14
+ end
15
+
16
+ Language.find_by_filename(name)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -232,9 +232,6 @@
232
232
  # .DS_Store's
233
233
  - .[Dd][Ss]_[Ss]tore$
234
234
 
235
- # Mercury --use-subdirs
236
- - Mercury/
237
-
238
235
  # R packages
239
236
  - ^vignettes/
240
237
  - ^inst/extdata/
@@ -1,3 +1,3 @@
1
1
  module Linguist
2
- VERSION = "4.0.3"
2
+ VERSION = "4.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: github-linguist
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.3
4
+ version: 4.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - GitHub
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-21 00:00:00.000000000 Z
11
+ date: 2014-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: charlock_holmes
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: 0.22.0b1
61
+ version: 0.22.0b4
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: 0.22.0b1
68
+ version: 0.22.0b4
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: mocha
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -147,6 +147,8 @@ files:
147
147
  - lib/linguist/repository.rb
148
148
  - lib/linguist/samples.json
149
149
  - lib/linguist/samples.rb
150
+ - lib/linguist/shebang.rb
151
+ - lib/linguist/strategy/filename.rb
150
152
  - lib/linguist/tokenizer.rb
151
153
  - lib/linguist/vendor.yml
152
154
  - lib/linguist/version.rb