github-linguist 2.10.8 → 2.10.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/bin/linguist +17 -1
 - data/lib/linguist/classifier.rb +3 -3
 - data/lib/linguist/heuristics.rb +11 -3
 - data/lib/linguist/languages.yml +3 -0
 - data/lib/linguist/repository.rb +11 -0
 - data/lib/linguist/vendor.yml +6 -0
 - metadata +7 -4
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: cbb0e1dda522368ab523932eb54c49cd133e80ba
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: e76b8b1147c9f5ea2ec415f7178032b84288db2c
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: a7fe49b518c11ad84c9d1c83ab3288c15785189538b716e4cfa33634e6054b5111b1b2d5b3588f7630a2509619a086d910b1ff4b8f5e1e609af9770c46a0abbe
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: aa1d301167462211d21e40db3747abfd64510f7555c47d03bc56f499feac13476f9a01a13364545b445a2ec8aa64ae89792bec862c48ab276c03e4d31983d32a
         
     | 
    
        data/bin/linguist
    CHANGED
    
    | 
         @@ -1,13 +1,22 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            #!/usr/bin/env ruby
         
     | 
| 
       2 
2 
     | 
    
         | 
| 
       3 
3 
     | 
    
         
             
            # linguist — detect language type for a file, or, given a directory, determine language breakdown
         
     | 
| 
       4 
     | 
    
         
            -
            #     usage: linguist <path>
         
     | 
| 
      
 4 
     | 
    
         
            +
            #     usage: linguist <path> [<--breakdown>]
         
     | 
| 
       5 
5 
     | 
    
         | 
| 
       6 
6 
     | 
    
         
             
            require 'linguist/file_blob'
         
     | 
| 
       7 
7 
     | 
    
         
             
            require 'linguist/repository'
         
     | 
| 
       8 
8 
     | 
    
         | 
| 
       9 
9 
     | 
    
         
             
            path = ARGV[0] || Dir.pwd
         
     | 
| 
       10 
10 
     | 
    
         | 
| 
      
 11 
     | 
    
         
            +
            # special case if not given a directory but still given the --breakdown option
         
     | 
| 
      
 12 
     | 
    
         
            +
            if path == "--breakdown"
         
     | 
| 
      
 13 
     | 
    
         
            +
              path = Dir.pwd
         
     | 
| 
      
 14 
     | 
    
         
            +
              breakdown = true
         
     | 
| 
      
 15 
     | 
    
         
            +
            end
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            ARGV.shift
         
     | 
| 
      
 18 
     | 
    
         
            +
            breakdown = true if ARGV[0] == "--breakdown"
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
       11 
20 
     | 
    
         
             
            if File.directory?(path)
         
     | 
| 
       12 
21 
     | 
    
         
             
              repo = Linguist::Repository.from_directory(path)
         
     | 
| 
       13 
22 
     | 
    
         
             
              repo.languages.sort_by { |_, size| size }.reverse.each do |language, size|
         
     | 
| 
         @@ -15,6 +24,13 @@ if File.directory?(path) 
     | 
|
| 
       15 
24 
     | 
    
         
             
                percentage = sprintf '%.2f' % percentage
         
     | 
| 
       16 
25 
     | 
    
         
             
                puts "%-7s %s" % ["#{percentage}%", language]
         
     | 
| 
       17 
26 
     | 
    
         
             
              end
         
     | 
| 
      
 27 
     | 
    
         
            +
              if breakdown
         
     | 
| 
      
 28 
     | 
    
         
            +
                puts
         
     | 
| 
      
 29 
     | 
    
         
            +
                file_breakdown = repo.breakdown_by_file
         
     | 
| 
      
 30 
     | 
    
         
            +
                file_breakdown.each do |lang, files|
         
     | 
| 
      
 31 
     | 
    
         
            +
                  puts "#{lang}: #{files}"
         
     | 
| 
      
 32 
     | 
    
         
            +
                end
         
     | 
| 
      
 33 
     | 
    
         
            +
              end
         
     | 
| 
       18 
34 
     | 
    
         
             
            elsif File.file?(path)
         
     | 
| 
       19 
35 
     | 
    
         
             
              blob = Linguist::FileBlob.new(path, Dir.pwd)
         
     | 
| 
       20 
36 
     | 
    
         
             
              type = if blob.text?
         
     | 
    
        data/lib/linguist/classifier.rb
    CHANGED
    
    | 
         @@ -83,8 +83,8 @@ module Linguist 
     | 
|
| 
       83 
83 
     | 
    
         
             
                  debug_dump_all_tokens(tokens, languages) if verbosity >= 2
         
     | 
| 
       84 
84 
     | 
    
         | 
| 
       85 
85 
     | 
    
         
             
                  languages.each do |language|
         
     | 
| 
       86 
     | 
    
         
            -
                    debug_dump_probabilities(tokens, language) if verbosity >= 1
         
     | 
| 
       87 
86 
     | 
    
         
             
                    scores[language] = tokens_probability(tokens, language) + language_probability(language)
         
     | 
| 
      
 87 
     | 
    
         
            +
                    debug_dump_probabilities(tokens, language, scores[language]) if verbosity >= 1
         
     | 
| 
       88 
88 
     | 
    
         
             
                  end
         
     | 
| 
       89 
89 
     | 
    
         | 
| 
       90 
90 
     | 
    
         
             
                  scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [score[0], score[1]] }
         
     | 
| 
         @@ -130,9 +130,9 @@ module Linguist 
     | 
|
| 
       130 
130 
     | 
    
         
             
                    @verbosity ||= (ENV['LINGUIST_DEBUG'] || 0).to_i
         
     | 
| 
       131 
131 
     | 
    
         
             
                  end
         
     | 
| 
       132 
132 
     | 
    
         | 
| 
       133 
     | 
    
         
            -
                  def debug_dump_probabilities(tokens, language)
         
     | 
| 
      
 133 
     | 
    
         
            +
                  def debug_dump_probabilities(tokens, language, score)
         
     | 
| 
       134 
134 
     | 
    
         
             
                    printf("%10s = %10.3f + %7.3f = %10.3f\n",
         
     | 
| 
       135 
     | 
    
         
            -
                        language, tokens_probability(tokens, language), language_probability(language),  
     | 
| 
      
 135 
     | 
    
         
            +
                        language, tokens_probability(tokens, language), language_probability(language), score)
         
     | 
| 
       136 
136 
     | 
    
         
             
                  end
         
     | 
| 
       137 
137 
     | 
    
         | 
| 
       138 
138 
     | 
    
         
             
                  # Internal: show a table of probabilities for each <token,language> pair.
         
     | 
    
        data/lib/linguist/heuristics.rb
    CHANGED
    
    | 
         @@ -14,7 +14,10 @@ module Linguist 
     | 
|
| 
       14 
14 
     | 
    
         
             
                def self.find_by_heuristics(data, languages)
         
     | 
| 
       15 
15 
     | 
    
         
             
                  if active?
         
     | 
| 
       16 
16 
     | 
    
         
             
                    if languages.all? { |l| ["Objective-C", "C++"].include?(l) }
         
     | 
| 
       17 
     | 
    
         
            -
                       
     | 
| 
      
 17 
     | 
    
         
            +
                      disambiguate_c(data, languages)
         
     | 
| 
      
 18 
     | 
    
         
            +
                    end
         
     | 
| 
      
 19 
     | 
    
         
            +
                    if languages.all? { |l| ["Perl", "Prolog"].include?(l) }
         
     | 
| 
      
 20 
     | 
    
         
            +
                      disambiguate_pl(data, languages)
         
     | 
| 
       18 
21 
     | 
    
         
             
                    end
         
     | 
| 
       19 
22 
     | 
    
         
             
                  end
         
     | 
| 
       20 
23 
     | 
    
         
             
                end
         
     | 
| 
         @@ -23,14 +26,19 @@ module Linguist 
     | 
|
| 
       23 
26 
     | 
    
         
             
                # We want to shortcut look for Objective-C _and_ now C++ too!
         
     | 
| 
       24 
27 
     | 
    
         
             
                #
         
     | 
| 
       25 
28 
     | 
    
         
             
                # Returns an array of Languages or []
         
     | 
| 
       26 
     | 
    
         
            -
                 
     | 
| 
       27 
     | 
    
         
            -
                def self.disambiguate_h(data, languages)
         
     | 
| 
      
 29 
     | 
    
         
            +
                def self.disambiguate_c(data, languages)
         
     | 
| 
       28 
30 
     | 
    
         
             
                  matches = []
         
     | 
| 
       29 
31 
     | 
    
         
             
                  matches << Language["Objective-C"] if data.include?("@interface")
         
     | 
| 
       30 
32 
     | 
    
         
             
                  matches << Language["C++"] if data.include?("#include <cstdint>")
         
     | 
| 
       31 
33 
     | 
    
         
             
                  matches
         
     | 
| 
       32 
34 
     | 
    
         
             
                end
         
     | 
| 
       33 
35 
     | 
    
         | 
| 
      
 36 
     | 
    
         
            +
                def self_disambiguate_pl(data, languages)
         
     | 
| 
      
 37 
     | 
    
         
            +
                  matches = []
         
     | 
| 
      
 38 
     | 
    
         
            +
                  matches << Language["Prolog"] if data.include?(":-")
         
     | 
| 
      
 39 
     | 
    
         
            +
                  matches
         
     | 
| 
      
 40 
     | 
    
         
            +
                end
         
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
       34 
42 
     | 
    
         
             
                def self.active?
         
     | 
| 
       35 
43 
     | 
    
         
             
                  !!ACTIVE
         
     | 
| 
       36 
44 
     | 
    
         
             
                end
         
     | 
    
        data/lib/linguist/languages.yml
    CHANGED
    
    
    
        data/lib/linguist/repository.rb
    CHANGED
    
    | 
         @@ -29,6 +29,7 @@ module Linguist 
     | 
|
| 
       29 
29 
     | 
    
         
             
                  @computed_stats = false
         
     | 
| 
       30 
30 
     | 
    
         
             
                  @language = @size = nil
         
     | 
| 
       31 
31 
     | 
    
         
             
                  @sizes = Hash.new { 0 }
         
     | 
| 
      
 32 
     | 
    
         
            +
                  @file_breakdown = Hash.new { |h,k| h[k] = Array.new }
         
     | 
| 
       32 
33 
     | 
    
         
             
                end
         
     | 
| 
       33 
34 
     | 
    
         | 
| 
       34 
35 
     | 
    
         
             
                # Public: Returns a breakdown of language stats.
         
     | 
| 
         @@ -60,6 +61,12 @@ module Linguist 
     | 
|
| 
       60 
61 
     | 
    
         
             
                  @size
         
     | 
| 
       61 
62 
     | 
    
         
             
                end
         
     | 
| 
       62 
63 
     | 
    
         | 
| 
      
 64 
     | 
    
         
            +
                # Public: Return the language breakdown of this repository by file
         
     | 
| 
      
 65 
     | 
    
         
            +
                def breakdown_by_file
         
     | 
| 
      
 66 
     | 
    
         
            +
                  compute_stats
         
     | 
| 
      
 67 
     | 
    
         
            +
                  @file_breakdown
         
     | 
| 
      
 68 
     | 
    
         
            +
                end
         
     | 
| 
      
 69 
     | 
    
         
            +
             
     | 
| 
       63 
70 
     | 
    
         
             
                # Internal: Compute language breakdown for each blob in the Repository.
         
     | 
| 
       64 
71 
     | 
    
         
             
                #
         
     | 
| 
       65 
72 
     | 
    
         
             
                # Returns nothing
         
     | 
| 
         @@ -75,6 +82,10 @@ module Linguist 
     | 
|
| 
       75 
82 
     | 
    
         | 
| 
       76 
83 
     | 
    
         
             
                    # Only include programming languages and acceptable markup languages
         
     | 
| 
       77 
84 
     | 
    
         
             
                    if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
         
     | 
| 
      
 85 
     | 
    
         
            +
             
     | 
| 
      
 86 
     | 
    
         
            +
                      # Build up the per-file breakdown stats
         
     | 
| 
      
 87 
     | 
    
         
            +
                      @file_breakdown[blob.language.group.name] << blob.name
         
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
       78 
89 
     | 
    
         
             
                      @sizes[blob.language.group] += blob.size
         
     | 
| 
       79 
90 
     | 
    
         
             
                    end
         
     | 
| 
       80 
91 
     | 
    
         
             
                  end
         
     | 
    
        data/lib/linguist/vendor.yml
    CHANGED
    
    | 
         @@ -27,6 +27,9 @@ 
     | 
|
| 
       27 
27 
     | 
    
         
             
            # Node dependencies
         
     | 
| 
       28 
28 
     | 
    
         
             
            - node_modules/
         
     | 
| 
       29 
29 
     | 
    
         | 
| 
      
 30 
     | 
    
         
            +
            # Bower Components
         
     | 
| 
      
 31 
     | 
    
         
            +
            - bower_components/
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
       30 
33 
     | 
    
         
             
            # Erlang bundles
         
     | 
| 
       31 
34 
     | 
    
         
             
            - ^rebar$
         
     | 
| 
       32 
35 
     | 
    
         | 
| 
         @@ -82,6 +85,9 @@ 
     | 
|
| 
       82 
85 
     | 
    
         
             
            - (^|/)shCore\.js$
         
     | 
| 
       83 
86 
     | 
    
         
             
            - (^|/)shLegacy\.js$
         
     | 
| 
       84 
87 
     | 
    
         | 
| 
      
 88 
     | 
    
         
            +
            # AngularJS
         
     | 
| 
      
 89 
     | 
    
         
            +
            - (^|/)angular([^.]*)(\.min)?\.js$
         
     | 
| 
      
 90 
     | 
    
         
            +
             
     | 
| 
       85 
91 
     | 
    
         
             
            ## Python ##
         
     | 
| 
       86 
92 
     | 
    
         | 
| 
       87 
93 
     | 
    
         
             
            # django
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: github-linguist
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 2.10. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 2.10.9
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - GitHub
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2013-12- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2013-12-29 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: charlock_holmes
         
     | 
| 
         @@ -122,7 +122,9 @@ dependencies: 
     | 
|
| 
       122 
122 
     | 
    
         
             
                - - '>='
         
     | 
| 
       123 
123 
     | 
    
         
             
                  - !ruby/object:Gem::Version
         
     | 
| 
       124 
124 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       125 
     | 
    
         
            -
            description: 
         
     | 
| 
      
 125 
     | 
    
         
            +
            description: We use this library at GitHub to detect blob languages, highlight code,
         
     | 
| 
      
 126 
     | 
    
         
            +
              ignore binary files, suppress generated files in diffs, and generate language breakdown
         
     | 
| 
      
 127 
     | 
    
         
            +
              graphs.
         
     | 
| 
       126 
128 
     | 
    
         
             
            email: 
         
     | 
| 
       127 
129 
     | 
    
         
             
            executables:
         
     | 
| 
       128 
130 
     | 
    
         
             
            - linguist
         
     | 
| 
         @@ -146,7 +148,8 @@ files: 
     | 
|
| 
       146 
148 
     | 
    
         
             
            - lib/linguist.rb
         
     | 
| 
       147 
149 
     | 
    
         
             
            - bin/linguist
         
     | 
| 
       148 
150 
     | 
    
         
             
            homepage: https://github.com/github/linguist
         
     | 
| 
       149 
     | 
    
         
            -
            licenses: 
     | 
| 
      
 151 
     | 
    
         
            +
            licenses:
         
     | 
| 
      
 152 
     | 
    
         
            +
            - MIT
         
     | 
| 
       150 
153 
     | 
    
         
             
            metadata: {}
         
     | 
| 
       151 
154 
     | 
    
         
             
            post_install_message: 
         
     | 
| 
       152 
155 
     | 
    
         
             
            rdoc_options: []
         
     |