RubyGems - github-linguist - Versions diffs - 4.0.3 → 4.2.0 - Mend

github-linguist 4.0.3 → 4.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (15) hide show

checksums.yaml +4 -4
data/lib/linguist.rb +1 -0
data/lib/linguist/classifier.rb +19 -0
data/lib/linguist/file_blob.rb +14 -8
data/lib/linguist/heuristics.rb +112 -110
data/lib/linguist/language.rb +39 -46
data/lib/linguist/languages.json +1 -1
data/lib/linguist/languages.yml +77 -6
data/lib/linguist/samples.json +3292 -454
data/lib/linguist/samples.rb +6 -39
data/lib/linguist/shebang.rb +44 -0
data/lib/linguist/strategy/filename.rb +20 -0
data/lib/linguist/vendor.yml +0 -3
data/lib/linguist/version.rb +1 -1
metadata +6 -4

data/lib/linguist/samples.rb CHANGED Viewed

@@ -6,6 +6,7 @@ end
 require 'linguist/md5'
 require 'linguist/classifier'
+require 'linguist/shebang'
 module Linguist
   # Model for accessing classifier training data.
@@ -52,14 +53,16 @@ module Linguist
               })
             end
           else
+            path = File.join(dirname, filename)
             if File.extname(filename) == ""
-              raise "#{File.join(dirname, filename)} is missing an extension, maybe it belongs in filenames/ subdir"
+              raise "#{path} is missing an extension, maybe it belongs in filenames/ subdir"
             end
             yield({
-              :path     => File.join(dirname, filename),
+              :path     => path,
               :language => category,
-              :interpreter => File.exist?(filename) ? Linguist.interpreter_from_shebang(File.read(filename)) : nil,
+              :interpreter => Shebang.interpreter(File.read(path)),
               :extname  => File.extname(filename)
             })
           end
@@ -112,40 +115,4 @@ module Linguist
       db
     end
   end
-  # Used to retrieve the interpreter from the shebang line of a file's
-  # data.
-  def self.interpreter_from_shebang(data)
-    lines = data.lines.to_a
-    if lines.any? && (match = lines[0].match(/(.+)\n?/)) && (bang = match[0]) =~ /^#!/
-      bang.sub!(/^#! /, '#!')
-      tokens = bang.split(' ')
-      pieces = tokens.first.split('/')
-      if pieces.size > 1
-        script = pieces.last
-      else
-        script = pieces.first.sub('#!', '')
-      end
-      script = script == 'env' ? tokens[1] : script
-      # "python2.6" -> "python"
-      if script =~ /((?:\d+\.?)+)/
-        script.sub! $1, ''
-      end
-      # Check for multiline shebang hacks that call `exec`
-      if script == 'sh' &&
-        lines[0...5].any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
-        script = $1
-      end
-      script
-    else
-      nil
-    end
-  end
 end

data/lib/linguist/shebang.rb ADDED Viewed

@@ -0,0 +1,44 @@
+module Linguist
+  class Shebang
+    # Public: Use shebang to detect language of the blob.
+    #
+    # blob               - An object that quacks like a blob.
+    #
+    # Examples
+    #
+    #   Shebang.call(FileBlob.new("path/to/file"))
+    #
+    # Returns an Array with one Language if the blob has a shebang with a valid
+    # interpreter, or empty if there is no shebang.
+    def self.call(blob, _ = nil)
+      Language.find_by_interpreter interpreter(blob.data)
+    end
+    # Public: Get the interpreter from the shebang
+    #
+    # Returns a String or nil
+    def self.interpreter(data)
+      lines = data.lines
+      return unless match = /^#! ?(.*)$/.match(lines.first)
+      tokens = match[1].split(' ')
+      script = tokens.first.split('/').last
+      script = tokens[1] if script == 'env'
+      # If script has an invalid shebang, we might get here
+      return unless script
+      # "python2.6" -> "python2"
+      script.sub! $1, '' if script =~ /(\.\d+)$/
+      # Check for multiline shebang hacks that call `exec`
+      if script == 'sh' &&
+        lines.first(5).any? { |l| l.match(/exec (\w+).+\$0.+\$@/) }
+        script = $1
+      end
+      File.basename(script)
+    end
+  end
+end

data/lib/linguist/strategy/filename.rb ADDED Viewed

@@ -0,0 +1,20 @@
+module Linguist
+  module Strategy
+    # Detects language based on filename and/or extension
+    class Filename
+      def self.call(blob, _)
+        name = blob.name.to_s
+        # A bit of an elegant hack. If the file is executable but extensionless,
+        # append a "magic" extension so it can be classified with other
+        # languages that have shebang scripts.
+        extensions = FileBlob.new(name).extensions
+        if extensions.empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
+          name += ".script!"
+        end
+        Language.find_by_filename(name)
+      end
+    end
+  end
+end

data/lib/linguist/vendor.yml CHANGED Viewed

@@ -232,9 +232,6 @@
 # .DS_Store's
 - .[Dd][Ss]_[Ss]tore$
-# Mercury --use-subdirs
-- Mercury/
 # R packages
 - ^vignettes/
 - ^inst/extdata/

data/lib/linguist/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Linguist
-  VERSION = "4.0.3"
+  VERSION = "4.2.0"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: github-linguist
 version: !ruby/object:Gem::Version
-  version: 4.0.3
+  version: 4.2.0
 platform: ruby
 authors:
 - GitHub
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2014-11-21 00:00:00.000000000 Z
+date: 2014-12-02 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: charlock_holmes
@@ -58,14 +58,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.22.0b1
+        version: 0.22.0b4
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: 0.22.0b1
+        version: 0.22.0b4
 - !ruby/object:Gem::Dependency
   name: mocha
   requirement: !ruby/object:Gem::Requirement
@@ -147,6 +147,8 @@ files:
 - lib/linguist/repository.rb
 - lib/linguist/samples.json
 - lib/linguist/samples.rb
+- lib/linguist/shebang.rb
+- lib/linguist/strategy/filename.rb
 - lib/linguist/tokenizer.rb
 - lib/linguist/vendor.yml
 - lib/linguist/version.rb