RubyGems - github-linguist - Versions diffs - 2.12.1 → 3.0.0b0 - Mend

github-linguist 2.12.1 → 3.0.0b0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

checksums.yaml +4 -4
data/bin/linguist +3 -1
data/lib/linguist/blob_helper.rb +1 -9
data/lib/linguist/generated.rb +9 -1
data/lib/linguist/language.rb +5 -9
data/lib/linguist/languages.yml +43 -12
data/lib/linguist/lazy_blob.rb +37 -0
data/lib/linguist/repository.rb +97 -53
data/lib/linguist/samples.json +54290 -51669
data/lib/linguist/samples.rb +1 -1
data/lib/linguist/vendor.yml +5 -0
data/lib/linguist/version.rb +1 -1
metadata +25 -25
data/lib/linguist/languages.json +0 -1

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 40a66993dd15837627385790ababecef29990748
-  data.tar.gz: 8fbf44c3c01b92fd5da157e2c5bdd8ca5c6ad620
+  metadata.gz: 5cd8c69614aa4a6bf20c79737e27aaac60ace18c
+  data.tar.gz: 1e3a64bf355a0b72821c88f09b75dcbeffd3a614
 SHA512:
-  metadata.gz: 52c1e96004e19c3f7b1588218c2c4e4f522926cf944f96bf2131c015e232bce0b54111435bd5cccd484fada0d13a6fc6e25e2247325c4fb66e59fd74fa6a71a1
-  data.tar.gz: 817b2cb650828e548596dc8376f9d8cacfcf32977ae13a340bd7f7e0d635e8eb83b590edc3eff73d10ae96f4e2e1fab6b7de44970130612922264d13e0a17f43
+  metadata.gz: d30b9264ca2e44ae46391e86df8fe4f4835ccbe9f4688f1d65d8fd110f0b689370392a17fffa78ea7a89034e2e9acd09612ea4b1611af1f6fae168c90080424e
+  data.tar.gz: 2ccfafb26afd642f7146b7c892a499aab136f7b12b8c703ac2231512a768e8da3e991c0d5fbf9fcd204a896f95379aef799cff18e470d4a4912f10c33ec53ae7

data/bin/linguist CHANGED Viewed

@@ -5,6 +5,7 @@
 require 'linguist/file_blob'
 require 'linguist/repository'
+require 'rugged'
 path = ARGV[0] || Dir.pwd
@@ -18,7 +19,8 @@ ARGV.shift
 breakdown = true if ARGV[0] == "--breakdown"
 if File.directory?(path)
-  repo = Linguist::Repository.from_directory(path)
+  rugged = Rugged::Repository.new(path)
+  repo = Linguist::Repository.new(rugged, rugged.head.target_id)
   repo.languages.sort_by { |_, size| size }.reverse.each do |language, size|
     percentage = ((size / repo.size.to_f) * 100)
     percentage = sprintf '%.2f' % percentage

data/lib/linguist/blob_helper.rb CHANGED Viewed

@@ -313,15 +313,7 @@ module Linguist
     #
     # Returns a Language or nil if none is detected
     def language
-      return @language if defined? @language
-      if defined?(@data) && @data.is_a?(String)
-        data = @data
-      else
-        data = lambda { (binary_mime_type? || binary?) ? "" : self.data }
-      end
-      @language = Language.detect(name.to_s, data, mode)
+      @language ||= Language.detect(self)
     end
     # Internal: Get the lexer of the blob.

data/lib/linguist/generated.rb CHANGED Viewed

@@ -63,7 +63,8 @@ module Linguist
         generated_jni_header? ||
         composer_lock? ||
         node_modules? ||
-        vcr_cassette?
+        vcr_cassette? ||
+        generated_by_zephir?
     end
     # Internal: Is the blob an XCode project file?
@@ -237,6 +238,13 @@ module Linguist
       !!name.match(/composer.lock/)
     end
+    # Internal: Is the blob a generated by Zephir
+    #
+    # Returns true or false.
+    def generated_by_zephir?
+      !!name.match(/.\.zep\.(?:c|h|php)$/)
+    end
     # Is the blob a VCR Cassette file?
     #
     # Returns true or false

data/lib/linguist/language.rb CHANGED Viewed

@@ -92,18 +92,14 @@ module Linguist
     # Public: Detects the Language of the blob.
     #
-    # name - String filename
-    # data - String blob data. A block also maybe passed in for lazy
-    #        loading. This behavior is deprecated and you should always
-    #        pass in a String.
-    # mode - Optional String mode (defaults to nil)
-    #
     # Returns Language or nil.
-    def self.detect(name, data, mode = nil)
+    def self.detect(blob)
+      name = blob.name.to_s
       # A bit of an elegant hack. If the file is executable but extensionless,
       # append a "magic" extension so it can be classified with other
       # languages that have shebang scripts.
-      if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05
+      if File.extname(name).empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
         name += ".script!"
       end
@@ -114,7 +110,7 @@ module Linguist
       # extension at all, in the case of extensionless scripts), we need to continue
       # our detection work
       if possible_languages.length > 1
-        data = data.call() if data.respond_to?(:call)
+        data = blob.data
         possible_language_names = possible_languages.map(&:name)
         # Don't bother with emptiness

data/lib/linguist/languages.yml CHANGED Viewed

@@ -157,6 +157,7 @@ Assembly:
   - nasm
   extensions:
   - .asm
+  - .inc
 Augeas:
   type: programming
@@ -528,15 +529,6 @@ Dart:
   extensions:
   - .dart
-DCPU-16 ASM:
-  type: programming
-  lexer: dasm16
-  extensions:
-  - .dasm16
-  - .dasm
-  aliases:
-  - dasm16
 Diff:
   extensions:
   - .diff
@@ -940,7 +932,7 @@ Hy:
 IDL:
   type: programming
-  lexer: Text only
+  lexer: IDL
   color: "#e3592c"
   extensions:
   - .pro
@@ -959,7 +951,7 @@ Inno Setup:
 Idris:
   type: programming
-  lexer: Text only
+  lexer: Idris
   extensions:
   - .idr
   - .lidr
@@ -998,6 +990,13 @@ Ioke:
   extensions:
   - .ik
+Isabelle:
+  type: programming
+  lexer: Text only
+  color: "#fdcd00"
+  extensions:
+  - .thy
 J:
   type: programming
   lexer: Text only
@@ -1087,6 +1086,8 @@ JavaScript:
   - .pac
   - .sjs
   - .ssjs
+  - .xsjs
+  - .xsjslib
   filenames:
   - Jakefile
   interpreters:
@@ -1285,6 +1286,8 @@ Mathematica:
   type: programming
   extensions:
   - .mathematica
+  - .m
+  - .nb
   lexer: Text only
 Matlab:
@@ -1392,6 +1395,12 @@ Nimrod:
   - .nim
   - .nimrod
+Nix:
+  type: programming
+  lexer: Nix
+  extensions:
+  - .nix
 Nu:
   type: programming
   lexer: Scheme
@@ -1601,7 +1610,7 @@ Perl6:
 Pike:
   type: programming
   color: "#066ab2"
-  lexer: C
+  lexer: Pike
   extensions:
   - .pike
   - .pmod
@@ -1698,6 +1707,7 @@ Python:
   - .gyp
   - .lmi
   - .pyde
+  - .pyp
   - .pyt
   - .pyw
   - .wsgi
@@ -1723,6 +1733,12 @@ QML:
   extensions:
   - .qml
+QMake:
+  lexer: Text only
+  extensions:
+  - .pro
+  - .pri
 R:
   type: programming
   color: "#198ce7"
@@ -1862,12 +1878,15 @@ Ruby:
   interpreters:
   - ruby
   filenames:
+  - .pryrc
   - Appraisals
   - Berksfile
   - Buildfile
   - Gemfile
   - Gemfile.lock
   - Guardfile
+  - Jarfile
+  - Mavenfile
   - Podfile
   - Thorfile
   - Vagrantfile
@@ -2288,6 +2307,7 @@ XML:
   - .launch
   - .mxml
   - .nproj
+  - .nuspec
   - .osm
   - .plist
   - .pluginspec
@@ -2359,6 +2379,17 @@ XSLT:
   - .xslt
   - .xsl
+Xojo:
+  type: programming
+  lexer: VB.net
+  extensions:
+  - .xojo_code
+  - .xojo_menu
+  - .xojo_report
+  - .xojo_script
+  - .xojo_toolbar
+  - .xojo_window
 Xtend:
   type: programming
   extensions:

data/lib/linguist/lazy_blob.rb ADDED Viewed

@@ -0,0 +1,37 @@
+require 'linguist/blob_helper'
+require 'rugged'
+module Linguist
+  class LazyBlob
+    include BlobHelper
+    MAX_SIZE = 128 * 1024
+    attr_reader :repository
+    attr_reader :oid
+    attr_reader :name
+    attr_reader :mode
+    def initialize(repo, oid, name, mode = nil)
+      @repository = repo
+      @oid = oid
+      @name = name
+      @mode = mode
+    end
+    def data
+      load_blob!
+      @data
+    end
+    def size
+      load_blob!
+      @size
+    end
+    protected
+    def load_blob!
+      @data, @size = Rugged::Blob.to_buffer(repository, oid, MAX_SIZE) if @data.nil?
+    end
+  end
+end

data/lib/linguist/repository.rb CHANGED Viewed

@@ -1,4 +1,5 @@
-require 'linguist/file_blob'
+require 'linguist/lazy_blob'
+require 'rugged'
 module Linguist
   # A Repository is an abstraction of a Grit::Repo or a basic file
@@ -7,100 +8,143 @@ module Linguist
   # Its primary purpose is for gathering language statistics across
   # the entire project.
   class Repository
-    # Public: Initialize a new Repository from a File directory
+    attr_reader :repository
+    # Public: Create a new Repository based on the stats of
+    # an existing one
+    def self.incremental(repo, commit_oid, old_commit_oid, old_stats)
+      repo = self.new(repo, commit_oid)
+      repo.load_existing_stats(old_commit_oid, old_stats)
+      repo
+    end
+    # Public: Initialize a new Repository to be analyzed for language
+    # data
     #
-    # base_path - A path String
+    # repo - a Rugged::Repository object
+    # commit_oid - the sha1 of the commit that will be analyzed;
+    #              this is usually the master branch
     #
     # Returns a Repository
-    def self.from_directory(base_path)
-      new Dir["#{base_path}/**/*"].
-        select { |f| File.file?(f) }.
-        map { |path| FileBlob.new(path, base_path) }
+    def initialize(repo, commit_oid)
+      @repository = repo
+      @commit_oid = commit_oid
+      raise TypeError, 'commit_oid must be a commit SHA1' unless commit_oid.is_a?(String)
     end
-    # Public: Initialize a new Repository
+    # Public: Load the results of a previous analysis on this repository
+    # to speed up the new scan.
     #
-    # enum - Enumerator that responds to `each` and
-    #        yields Blob objects
+    # The new analysis will be performed incrementally as to only take
+    # into account the file changes since the last time the repository
+    # was scanned
     #
-    # Returns a Repository
-    def initialize(enum)
-      @enum = enum
-      @computed_stats = false
-      @language = @size = nil
-      @sizes = Hash.new { 0 }
-      @file_breakdown = Hash.new { |h,k| h[k] = Array.new }
+    # old_commit_oid - the sha1 of the commit that was previously analyzed
+    # old_stats - the result of the previous analysis, obtained by calling
+    #             Repository#cache on the old repository
+    #
+    # Returns nothing
+    def load_existing_stats(old_commit_oid, old_stats)
+      @old_commit_oid = old_commit_oid
+      @old_stats = old_stats
+      nil
     end
     # Public: Returns a breakdown of language stats.
     #
     # Examples
     #
-    #   # => { Language['Ruby'] => 46319,
-    #          Language['JavaScript'] => 258 }
+    #   # => { 'Ruby' => 46319,
+    #          'JavaScript' => 258 }
     #
-    # Returns a Hash of Language keys and Integer size values.
+    # Returns a Hash of language names and Integer size values.
     def languages
-      compute_stats
-      @sizes
+      @sizes ||= begin
+        sizes = Hash.new { 0 }
+        cache.each do |_, (language, size)|
+          sizes[language] += size
+        end
+        sizes
+      end
     end
     # Public: Get primary Language of repository.
     #
-    # Returns a Language
+    # Returns a language name
     def language
-      compute_stats
-      @language
+      @language ||= begin
+        primary = languages.max_by { |(_, size)| size }
+        primary && primary[0]
+      end
     end
     # Public: Get the total size of the repository.
     #
     # Returns a byte size Integer
     def size
-      compute_stats
-      @size
+      @size ||= languages.inject(0) { |s,(_,v)| s + v }
     end
     # Public: Return the language breakdown of this repository by file
+    #
+    # Returns a map of language names => [filenames...]
     def breakdown_by_file
-      compute_stats
-      @file_breakdown
+      @file_breakdown ||= begin
+        breakdown = Hash.new { |h,k| h[k] = Array.new }
+        cache.each do |filename, (language, _)|
+          breakdown[language] << filename
+        end
+        breakdown
+      end
     end
-    # Internal: Compute language breakdown for each blob in the Repository.
+    # Public: Return the cached results of the analysis
     #
-    # Returns nothing
-    def compute_stats
-      return if @computed_stats
+    # This is a per-file breakdown that can be passed to other instances
+    # of Linguist::Repository to perform incremental scans
+    #
+    # Returns a map of filename => [language, size]
+    def cache
+      @cache ||= begin
+        if @old_commit_oid == @commit_oid
+          @old_stats
+        else
+          compute_stats(@old_commit_oid, @commit_oid, @old_stats)
+        end
+      end
+    end
-      @enum.each do |blob|
-        # Skip files that are likely binary
-        next if blob.likely_binary?
+    protected
+    def compute_stats(old_commit_oid, commit_oid, cache = nil)
+      file_map = cache ? cache.dup : {}
+      old_tree = old_commit_oid && Rugged::Commit.lookup(repository, old_commit_oid).tree
+      new_tree = Rugged::Commit.lookup(repository, commit_oid).tree
-        # Skip vendored or generated blobs
-        next if blob.vendored? || blob.generated? || blob.language.nil?
+      diff = Rugged::Tree.diff(repository, old_tree, new_tree)
-        # Only include programming languages and acceptable markup languages
-        if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
+      diff.each_delta do |delta|
+        old = delta.old_file[:path]
+        new = delta.new_file[:path]
-          # Build up the per-file breakdown stats
-          @file_breakdown[blob.language.group.name] << blob.name
+        file_map.delete(old)
+        next if delta.binary
-          @sizes[blob.language.group] += blob.size
-        end
-      end
+        if [:added, :modified].include? delta.status
+          mode = delta.new_file[:mode].to_s(8)
+          blob = Linguist::LazyBlob.new(repository, delta.new_file[:oid], new, mode)
-      # Compute total size
-      @size = @sizes.inject(0) { |s,(_,v)| s + v }
+          # Skip vendored or generated blobs
+          next if blob.vendored? || blob.generated? || blob.language.nil?
-      # Get primary language
-      if primary = @sizes.max_by { |(_, size)| size }
-        @language = primary[0]
+          # Only include programming languages and acceptable markup languages
+          if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
+            file_map[new] = [blob.language.group.name, blob.size]
+          end
+        end
       end
-      @computed_stats = true
-      nil
+      file_map
     end
   end
 end