github-linguist 2.10.8 → 2.10.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6f0906667498f0302124bfafe4df703d167e1aa7
4
- data.tar.gz: 8b1e758e4b3b18d1a656925248263c74fc9d5b6e
3
+ metadata.gz: cbb0e1dda522368ab523932eb54c49cd133e80ba
4
+ data.tar.gz: e76b8b1147c9f5ea2ec415f7178032b84288db2c
5
5
  SHA512:
6
- metadata.gz: 8b6fac3c2976fde426d9f52bccbefb12a321364ad821f2b6452f05a7b0e0a5892b33dad3a4b5e5dfc26f8f2462f68954e02718995d1fac0442678317a4b52426
7
- data.tar.gz: aab80deaa25c0051500426ae08a88ce5920e249de0e82cc53db0da35c07005dac94fc74336c0effad6774be6aa21c80a80a9626f3264054035ef07cb8c6d6298
6
+ metadata.gz: a7fe49b518c11ad84c9d1c83ab3288c15785189538b716e4cfa33634e6054b5111b1b2d5b3588f7630a2509619a086d910b1ff4b8f5e1e609af9770c46a0abbe
7
+ data.tar.gz: aa1d301167462211d21e40db3747abfd64510f7555c47d03bc56f499feac13476f9a01a13364545b445a2ec8aa64ae89792bec862c48ab276c03e4d31983d32a
data/bin/linguist CHANGED
@@ -1,13 +1,22 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  # linguist — detect language type for a file, or, given a directory, determine language breakdown
4
- # usage: linguist <path>
4
+ # usage: linguist <path> [<--breakdown>]
5
5
 
6
6
  require 'linguist/file_blob'
7
7
  require 'linguist/repository'
8
8
 
9
9
  path = ARGV[0] || Dir.pwd
10
10
 
11
+ # special case if not given a directory but still given the --breakdown option
12
+ if path == "--breakdown"
13
+ path = Dir.pwd
14
+ breakdown = true
15
+ end
16
+
17
+ ARGV.shift
18
+ breakdown = true if ARGV[0] == "--breakdown"
19
+
11
20
  if File.directory?(path)
12
21
  repo = Linguist::Repository.from_directory(path)
13
22
  repo.languages.sort_by { |_, size| size }.reverse.each do |language, size|
@@ -15,6 +24,13 @@ if File.directory?(path)
15
24
  percentage = sprintf '%.2f' % percentage
16
25
  puts "%-7s %s" % ["#{percentage}%", language]
17
26
  end
27
+ if breakdown
28
+ puts
29
+ file_breakdown = repo.breakdown_by_file
30
+ file_breakdown.each do |lang, files|
31
+ puts "#{lang}: #{files}"
32
+ end
33
+ end
18
34
  elsif File.file?(path)
19
35
  blob = Linguist::FileBlob.new(path, Dir.pwd)
20
36
  type = if blob.text?
@@ -83,8 +83,8 @@ module Linguist
83
83
  debug_dump_all_tokens(tokens, languages) if verbosity >= 2
84
84
 
85
85
  languages.each do |language|
86
- debug_dump_probabilities(tokens, language) if verbosity >= 1
87
86
  scores[language] = tokens_probability(tokens, language) + language_probability(language)
87
+ debug_dump_probabilities(tokens, language, scores[language]) if verbosity >= 1
88
88
  end
89
89
 
90
90
  scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [score[0], score[1]] }
@@ -130,9 +130,9 @@ module Linguist
130
130
  @verbosity ||= (ENV['LINGUIST_DEBUG'] || 0).to_i
131
131
  end
132
132
 
133
- def debug_dump_probabilities(tokens, language)
133
+ def debug_dump_probabilities(tokens, language, score)
134
134
  printf("%10s = %10.3f + %7.3f = %10.3f\n",
135
- language, tokens_probability(tokens, language), language_probability(language), scores[language])
135
+ language, tokens_probability(tokens, language), language_probability(language), score)
136
136
  end
137
137
 
138
138
  # Internal: show a table of probabilities for each <token,language> pair.
@@ -14,7 +14,10 @@ module Linguist
14
14
  def self.find_by_heuristics(data, languages)
15
15
  if active?
16
16
  if languages.all? { |l| ["Objective-C", "C++"].include?(l) }
17
- disambiguate_h(data, languages)
17
+ disambiguate_c(data, languages)
18
+ end
19
+ if languages.all? { |l| ["Perl", "Prolog"].include?(l) }
20
+ disambiguate_pl(data, languages)
18
21
  end
19
22
  end
20
23
  end
@@ -23,14 +26,19 @@ module Linguist
23
26
  # We want to shortcut look for Objective-C _and_ now C++ too!
24
27
  #
25
28
  # Returns an array of Languages or []
26
- # TODO rename this method as we're not strictly disambiguating between .h files here.
27
- def self.disambiguate_h(data, languages)
29
+ def self.disambiguate_c(data, languages)
28
30
  matches = []
29
31
  matches << Language["Objective-C"] if data.include?("@interface")
30
32
  matches << Language["C++"] if data.include?("#include <cstdint>")
31
33
  matches
32
34
  end
33
35
 
36
+ def self_disambiguate_pl(data, languages)
37
+ matches = []
38
+ matches << Language["Prolog"] if data.include?(":-")
39
+ matches
40
+ end
41
+
34
42
  def self.active?
35
43
  !!ACTIVE
36
44
  end
@@ -839,6 +839,7 @@ JavaScript:
839
839
  - .jsm
840
840
  - .jss
841
841
  - .jsx
842
+ - .njs
842
843
  - .pac
843
844
  - .sjs
844
845
  - .ssjs
@@ -947,6 +948,8 @@ Lua:
947
948
  extensions:
948
949
  - .nse
949
950
  - .rbxs
951
+ interpreters:
952
+ - lua
950
953
 
951
954
  M:
952
955
  type: programming
@@ -29,6 +29,7 @@ module Linguist
29
29
  @computed_stats = false
30
30
  @language = @size = nil
31
31
  @sizes = Hash.new { 0 }
32
+ @file_breakdown = Hash.new { |h,k| h[k] = Array.new }
32
33
  end
33
34
 
34
35
  # Public: Returns a breakdown of language stats.
@@ -60,6 +61,12 @@ module Linguist
60
61
  @size
61
62
  end
62
63
 
64
+ # Public: Return the language breakdown of this repository by file
65
+ def breakdown_by_file
66
+ compute_stats
67
+ @file_breakdown
68
+ end
69
+
63
70
  # Internal: Compute language breakdown for each blob in the Repository.
64
71
  #
65
72
  # Returns nothing
@@ -75,6 +82,10 @@ module Linguist
75
82
 
76
83
  # Only include programming languages and acceptable markup languages
77
84
  if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
85
+
86
+ # Build up the per-file breakdown stats
87
+ @file_breakdown[blob.language.group.name] << blob.name
88
+
78
89
  @sizes[blob.language.group] += blob.size
79
90
  end
80
91
  end
@@ -27,6 +27,9 @@
27
27
  # Node dependencies
28
28
  - node_modules/
29
29
 
30
+ # Bower Components
31
+ - bower_components/
32
+
30
33
  # Erlang bundles
31
34
  - ^rebar$
32
35
 
@@ -82,6 +85,9 @@
82
85
  - (^|/)shCore\.js$
83
86
  - (^|/)shLegacy\.js$
84
87
 
88
+ # AngularJS
89
+ - (^|/)angular([^.]*)(\.min)?\.js$
90
+
85
91
  ## Python ##
86
92
 
87
93
  # django
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: github-linguist
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.10.8
4
+ version: 2.10.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - GitHub
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-12-27 00:00:00.000000000 Z
11
+ date: 2013-12-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: charlock_holmes
@@ -122,7 +122,9 @@ dependencies:
122
122
  - - '>='
123
123
  - !ruby/object:Gem::Version
124
124
  version: '0'
125
- description:
125
+ description: We use this library at GitHub to detect blob languages, highlight code,
126
+ ignore binary files, suppress generated files in diffs, and generate language breakdown
127
+ graphs.
126
128
  email:
127
129
  executables:
128
130
  - linguist
@@ -146,7 +148,8 @@ files:
146
148
  - lib/linguist.rb
147
149
  - bin/linguist
148
150
  homepage: https://github.com/github/linguist
149
- licenses: []
151
+ licenses:
152
+ - MIT
150
153
  metadata: {}
151
154
  post_install_message:
152
155
  rdoc_options: []