RubyGems - github-linguist - Versions diffs - 4.5.4 → 4.5.5 - Mend

github-linguist 4.5.4 → 4.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/lib/linguist/generated.rb +26 -0
data/lib/linguist/heuristics.rb +10 -8
data/lib/linguist/language.rb +1 -1
data/lib/linguist/languages.json +1 -1
data/lib/linguist/languages.yml +176 -71
data/lib/linguist/samples.json +9299 -1454
data/lib/linguist/samples.rb +2 -5
data/lib/linguist/shebang.rb +13 -7
data/lib/linguist/strategy/filename.rb +1 -11
data/lib/linguist/strategy/modeline.rb +1 -1
data/lib/linguist/tokenizer.rb +6 -0
data/lib/linguist/version.rb +1 -1
metadata +16 -2

data/lib/linguist/samples.rb CHANGED Viewed

@@ -50,16 +50,13 @@ module Linguist
             end
           else
             path = File.join(dirname, filename)
-            if File.extname(filename) == ""
-              raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
-            end
+            extname = File.extname(filename)
             yield({
               :path     => path,
               :language => category,
               :interpreter => Shebang.interpreter(File.read(path)),
-              :extname  => File.extname(filename)
+              :extname  => extname.empty? ? nil : extname
             })
           end
         end

data/lib/linguist/shebang.rb CHANGED Viewed

@@ -23,17 +23,20 @@ module Linguist
       # First line must start with #!
       return unless shebang && shebang.start_with?("#!")
-      # Get the parts of the shebang without the #!
-      tokens = shebang.sub(/^#!\s*/, '').strip.split(' ')
+      s = StringScanner.new(shebang)
       # There was nothing after the #!
-      return if tokens.empty?
+      return unless path = s.scan(/^#!\s*\S+/)
-      # Get the name of the interpreter
-      script = File.basename(tokens.first)
+      # Keep going
+      script = path.split('/').last
-      # Get next argument if interpreter was /usr/bin/env
-      script = tokens[1] if script == 'env'
+      # if /usr/bin/env type shebang then walk the string
+      if script == 'env'
+        s.scan(/\s+/)
+        s.scan(/.*=[^\s]+\s+/) # skip over variable arguments e.g. foo=bar
+        script = s.scan(/\S+/)
+      end
       # Interpreter was /usr/bin/env with no arguments
       return unless script
@@ -41,6 +44,9 @@ module Linguist
       # "python2.6" -> "python2"
       script.sub! /(\.\d+)$/, ''
+      # #! perl -> perl
+      script.sub! /^#!\s*/, ''
       # Check for multiline shebang hacks that call `exec`
       if script == 'sh' &&
         data.lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }

data/lib/linguist/strategy/filename.rb CHANGED Viewed

@@ -3,17 +3,7 @@ module Linguist
     # Detects language based on filename and/or extension
     class Filename
       def self.call(blob, _)
-        name = blob.name.to_s
-        # A bit of an elegant hack. If the file is executable but extensionless,
-        # append a "magic" extension so it can be classified with other
-        # languages that have shebang scripts.
-        extensions = FileBlob.new(name).extensions
-        if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
-          name += ".script!"
-        end
-        Language.find_by_filename(name)
+        Language.find_by_filename(blob.name.to_s)
       end
     end
   end

data/lib/linguist/strategy/modeline.rb CHANGED Viewed

@@ -2,7 +2,7 @@ module Linguist
   module Strategy
     class Modeline
       EmacsModeline = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
-      VimModeline = /\/\*\s*vim:\s*set\s*(?:ft|filetype)=(\w+):\s*\*\//i
+      VimModeline = /vim:\s*set\s*(?:ft|filetype)=(\w+):/i
       # Public: Detects language based on Vim and Emacs modelines
       #

data/lib/linguist/tokenizer.rb CHANGED Viewed

@@ -22,8 +22,10 @@ module Linguist
     # Start state on token, ignore anything till the next newline
     SINGLE_LINE_COMMENTS = [
       '//', # C
+      '--', # Ada, Haskell, AppleScript
       '#',  # Ruby
       '%',  # Tex
+      '"',  # Vim
     ]
     # Start state on opening token, ignore anything until the closing
@@ -130,6 +132,9 @@ module Linguist
     #   extract_shebang("#!/usr/bin/env node")
     #   # => "node"
     #
+    #   extract_shebang("#!/usr/bin/env A=B foo=bar awk -f")
+    #   # => "awk"
+    #
     # Returns String token or nil it couldn't be parsed.
     def extract_shebang(data)
       s = StringScanner.new(data)
@@ -138,6 +143,7 @@ module Linguist
         script = path.split('/').last
         if script == 'env'
           s.scan(/\s+/)
+          s.scan(/.*=[^\s]+\s+/)
           script = s.scan(/\S+/)
         end
         script = script[/[^\d]+/, 0] if script

data/lib/linguist/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Linguist
-  VERSION = "4.5.4"
+  VERSION = "4.5.5"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: github-linguist
 version: !ruby/object:Gem::Version
-  version: 4.5.4
+  version: 4.5.5
 platform: ruby
 authors:
 - GitHub
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-04-01 00:00:00.000000000 Z
+date: 2015-05-18 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: charlock_holmes
@@ -136,6 +136,20 @@ dependencies:
     - - ">="
       - !ruby/object:Gem::Version
         version: '0'
+- !ruby/object:Gem::Dependency
+  name: color-proximity
+  requirement: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.2.1
+  type: :development
+  prerelease: false
+  version_requirements: !ruby/object:Gem::Requirement
+    requirements:
+    - - "~>"
+      - !ruby/object:Gem::Version
+        version: 0.2.1
 description: We use this library at GitHub to detect blob languages, highlight code,
   ignore binary files, suppress generated files in diffs, and generate language breakdown
   graphs.