github-linguist 4.0.3 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,6 +6,7 @@ end
6
6
 
7
7
  require 'linguist/md5'
8
8
  require 'linguist/classifier'
9
+ require 'linguist/shebang'
9
10
 
10
11
  module Linguist
11
12
  # Model for accessing classifier training data.
@@ -52,14 +53,16 @@ module Linguist
52
53
  })
53
54
  end
54
55
  else
56
+ path = File.join(dirname, filename)
57
+
55
58
  if File.extname(filename) == ""
56
- raise "#{File.join(dirname, filename)} is missing an extension, maybe it belongs in filenames/ subdir"
59
+ raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
57
60
  end
58
61
 
59
62
  yield({
60
- :path => File.join(dirname, filename),
63
+ :path => path,
61
64
  :language => category,
62
- :interpreter => File.exist?(filename) ? Linguist.interpreter_from_shebang(File.read(filename)) : nil,
65
+ :interpreter => Shebang.interpreter(File.read(path)),
63
66
  :extname => File.extname(filename)
64
67
  })
65
68
  end
@@ -112,40 +115,4 @@ module Linguist
112
115
  db
113
116
  end
114
117
  end
115
-
116
- # Used to retrieve the interpreter from the shebang line of a file's
117
- # data.
118
- def self.interpreter_from_shebang(data)
119
- lines = data.lines.to_a
120
-
121
- if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
122
- bang.sub!(/^#! /, '#!')
123
- tokens = bang.split(' ')
124
- pieces = tokens.first.split('/')
125
-
126
- if pieces.size > 1
127
- script = pieces.last
128
- else
129
- script = pieces.first.sub('#!', '')
130
- end
131
-
132
- script = script == 'env' ? tokens[1] : script
133
-
134
- # "python2.6" -> "python"
135
- if script =~ /((?:\d+\.?)+)/
136
- script.sub! $1, ''
137
- end
138
-
139
- # Check for multiline shebang hacks that call `exec`
140
- if script == 'sh' &&
141
- lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
142
- script = $1
143
- end
144
-
145
- script
146
- else
147
- nil
148
- end
149
- end
150
-
151
118
  end
@@ -0,0 +1,44 @@
1
+ module Linguist
2
+ class Shebang
3
+ # Public: Use shebang to detect language of the blob.
4
+ #
5
+ # blob - An object that quacks like a blob.
6
+ #
7
+ # Examples
8
+ #
9
+ # Shebang.call(FileBlob.new("path/to/file"))
10
+ #
11
+ # Returns an Array with one Language if the blob has a shebang with a valid
12
+ # interpreter, or empty if there is no shebang.
13
+ def self.call(blob, _ = nil)
14
+ Language.find_by_interpreter interpreter(blob.data)
15
+ end
16
+
17
+ # Public: Get the interpreter from the shebang
18
+ #
19
+ # Returns a String or nil
20
+ def self.interpreter(data)
21
+ lines = data.lines
22
+ return unless match = /^#! ?(.*)$/.match(lines.first)
23
+
24
+ tokens = match[1].split(' ')
25
+ script = tokens.first.split('/').last
26
+
27
+ script = tokens[1] if script == 'env'
28
+
29
+ # If script has an invalid shebang, we might get here
30
+ return unless script
31
+
32
+ # "python2.6" -> "python2"
33
+ script.sub! $1, '' if script =~ /(\.\d+)$/
34
+
35
+ # Check for multiline shebang hacks that call `exec`
36
+ if script == 'sh' &&
37
+ lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
38
+ script = $1
39
+ end
40
+
41
+ File.basename(script)
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,20 @@
1
+ module Linguist
2
+ module Strategy
3
+ # Detects language based on filename and/or extension
4
+ class Filename
5
+ def self.call(blob, _)
6
+ name = blob.name.to_s
7
+
8
+ # A bit of an elegant hack. If the file is executable but extensionless,
9
+ # append a "magic" extension so it can be classified with other
10
+ # languages that have shebang scripts.
11
+ extensions = FileBlob.new(name).extensions
12
+ if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
13
+ name += ".script!"
14
+ end
15
+
16
+ Language.find_by_filename(name)
17
+ end
18
+ end
19
+ end
20
+ end
@@ -232,9 +232,6 @@
232
232
  # .DS_Store's
233
233
  - .[Dd][Ss]_[Ss]tore$
234
234
 
235
- # Mercury --use-subdirs
236
- - Mercury/
237
-
238
235
  # R packages
239
236
  - ^vignettes/
240
237
  - ^inst/extdata/
@@ -1,3 +1,3 @@
1
1
  module Linguist
2
- VERSION = "4.0.3"
2
+ VERSION = "4.2.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: github-linguist
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.3
4
+ version: 4.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - GitHub
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-21 00:00:00.000000000 Z
11
+ date: 2014-12-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: charlock_holmes
@@ -58,14 +58,14 @@ dependencies:
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: 0.22.0b1
61
+ version: 0.22.0b4
62
62
  type: :runtime
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: 0.22.0b1
68
+ version: 0.22.0b4
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: mocha
71
71
  requirement: !ruby/object:Gem::Requirement
@@ -147,6 +147,8 @@ files:
147
147
  - lib/linguist/repository.rb
148
148
  - lib/linguist/samples.json
149
149
  - lib/linguist/samples.rb
150
+ - lib/linguist/shebang.rb
151
+ - lib/linguist/strategy/filename.rb
150
152
  - lib/linguist/tokenizer.rb
151
153
  - lib/linguist/vendor.yml
152
154
  - lib/linguist/version.rb