github-linguist 2.10.8 → 2.10.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/linguist +17 -1
- data/lib/linguist/classifier.rb +3 -3
- data/lib/linguist/heuristics.rb +11 -3
- data/lib/linguist/languages.yml +3 -0
- data/lib/linguist/repository.rb +11 -0
- data/lib/linguist/vendor.yml +6 -0
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cbb0e1dda522368ab523932eb54c49cd133e80ba
|
4
|
+
data.tar.gz: e76b8b1147c9f5ea2ec415f7178032b84288db2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7fe49b518c11ad84c9d1c83ab3288c15785189538b716e4cfa33634e6054b5111b1b2d5b3588f7630a2509619a086d910b1ff4b8f5e1e609af9770c46a0abbe
|
7
|
+
data.tar.gz: aa1d301167462211d21e40db3747abfd64510f7555c47d03bc56f499feac13476f9a01a13364545b445a2ec8aa64ae89792bec862c48ab276c03e4d31983d32a
|
data/bin/linguist
CHANGED
@@ -1,13 +1,22 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
# linguist — detect language type for a file, or, given a directory, determine language breakdown
|
4
|
-
# usage: linguist <path>
|
4
|
+
# usage: linguist <path> [<--breakdown>]
|
5
5
|
|
6
6
|
require 'linguist/file_blob'
|
7
7
|
require 'linguist/repository'
|
8
8
|
|
9
9
|
path = ARGV[0] || Dir.pwd
|
10
10
|
|
11
|
+
# special case if not given a directory but still given the --breakdown option
|
12
|
+
if path == "--breakdown"
|
13
|
+
path = Dir.pwd
|
14
|
+
breakdown = true
|
15
|
+
end
|
16
|
+
|
17
|
+
ARGV.shift
|
18
|
+
breakdown = true if ARGV[0] == "--breakdown"
|
19
|
+
|
11
20
|
if File.directory?(path)
|
12
21
|
repo = Linguist::Repository.from_directory(path)
|
13
22
|
repo.languages.sort_by { |_, size| size }.reverse.each do |language, size|
|
@@ -15,6 +24,13 @@ if File.directory?(path)
|
|
15
24
|
percentage = sprintf '%.2f' % percentage
|
16
25
|
puts "%-7s %s" % ["#{percentage}%", language]
|
17
26
|
end
|
27
|
+
if breakdown
|
28
|
+
puts
|
29
|
+
file_breakdown = repo.breakdown_by_file
|
30
|
+
file_breakdown.each do |lang, files|
|
31
|
+
puts "#{lang}: #{files}"
|
32
|
+
end
|
33
|
+
end
|
18
34
|
elsif File.file?(path)
|
19
35
|
blob = Linguist::FileBlob.new(path, Dir.pwd)
|
20
36
|
type = if blob.text?
|
data/lib/linguist/classifier.rb
CHANGED
@@ -83,8 +83,8 @@ module Linguist
|
|
83
83
|
debug_dump_all_tokens(tokens, languages) if verbosity >= 2
|
84
84
|
|
85
85
|
languages.each do |language|
|
86
|
-
debug_dump_probabilities(tokens, language) if verbosity >= 1
|
87
86
|
scores[language] = tokens_probability(tokens, language) + language_probability(language)
|
87
|
+
debug_dump_probabilities(tokens, language, scores[language]) if verbosity >= 1
|
88
88
|
end
|
89
89
|
|
90
90
|
scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [score[0], score[1]] }
|
@@ -130,9 +130,9 @@ module Linguist
|
|
130
130
|
@verbosity ||= (ENV['LINGUIST_DEBUG'] || 0).to_i
|
131
131
|
end
|
132
132
|
|
133
|
-
def debug_dump_probabilities(tokens, language)
|
133
|
+
def debug_dump_probabilities(tokens, language, score)
|
134
134
|
printf("%10s = %10.3f + %7.3f = %10.3f\n",
|
135
|
-
language, tokens_probability(tokens, language), language_probability(language),
|
135
|
+
language, tokens_probability(tokens, language), language_probability(language), score)
|
136
136
|
end
|
137
137
|
|
138
138
|
# Internal: show a table of probabilities for each <token,language> pair.
|
data/lib/linguist/heuristics.rb
CHANGED
@@ -14,7 +14,10 @@ module Linguist
|
|
14
14
|
def self.find_by_heuristics(data, languages)
|
15
15
|
if active?
|
16
16
|
if languages.all? { |l| ["Objective-C", "C++"].include?(l) }
|
17
|
-
|
17
|
+
disambiguate_c(data, languages)
|
18
|
+
end
|
19
|
+
if languages.all? { |l| ["Perl", "Prolog"].include?(l) }
|
20
|
+
disambiguate_pl(data, languages)
|
18
21
|
end
|
19
22
|
end
|
20
23
|
end
|
@@ -23,14 +26,19 @@ module Linguist
|
|
23
26
|
# We want to shortcut look for Objective-C _and_ now C++ too!
|
24
27
|
#
|
25
28
|
# Returns an array of Languages or []
|
26
|
-
|
27
|
-
def self.disambiguate_h(data, languages)
|
29
|
+
def self.disambiguate_c(data, languages)
|
28
30
|
matches = []
|
29
31
|
matches << Language["Objective-C"] if data.include?("@interface")
|
30
32
|
matches << Language["C++"] if data.include?("#include <cstdint>")
|
31
33
|
matches
|
32
34
|
end
|
33
35
|
|
36
|
+
def self_disambiguate_pl(data, languages)
|
37
|
+
matches = []
|
38
|
+
matches << Language["Prolog"] if data.include?(":-")
|
39
|
+
matches
|
40
|
+
end
|
41
|
+
|
34
42
|
def self.active?
|
35
43
|
!!ACTIVE
|
36
44
|
end
|
data/lib/linguist/languages.yml
CHANGED
data/lib/linguist/repository.rb
CHANGED
@@ -29,6 +29,7 @@ module Linguist
|
|
29
29
|
@computed_stats = false
|
30
30
|
@language = @size = nil
|
31
31
|
@sizes = Hash.new { 0 }
|
32
|
+
@file_breakdown = Hash.new { |h,k| h[k] = Array.new }
|
32
33
|
end
|
33
34
|
|
34
35
|
# Public: Returns a breakdown of language stats.
|
@@ -60,6 +61,12 @@ module Linguist
|
|
60
61
|
@size
|
61
62
|
end
|
62
63
|
|
64
|
+
# Public: Return the language breakdown of this repository by file
|
65
|
+
def breakdown_by_file
|
66
|
+
compute_stats
|
67
|
+
@file_breakdown
|
68
|
+
end
|
69
|
+
|
63
70
|
# Internal: Compute language breakdown for each blob in the Repository.
|
64
71
|
#
|
65
72
|
# Returns nothing
|
@@ -75,6 +82,10 @@ module Linguist
|
|
75
82
|
|
76
83
|
# Only include programming languages and acceptable markup languages
|
77
84
|
if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
|
85
|
+
|
86
|
+
# Build up the per-file breakdown stats
|
87
|
+
@file_breakdown[blob.language.group.name] << blob.name
|
88
|
+
|
78
89
|
@sizes[blob.language.group] += blob.size
|
79
90
|
end
|
80
91
|
end
|
data/lib/linguist/vendor.yml
CHANGED
@@ -27,6 +27,9 @@
|
|
27
27
|
# Node dependencies
|
28
28
|
- node_modules/
|
29
29
|
|
30
|
+
# Bower Components
|
31
|
+
- bower_components/
|
32
|
+
|
30
33
|
# Erlang bundles
|
31
34
|
- ^rebar$
|
32
35
|
|
@@ -82,6 +85,9 @@
|
|
82
85
|
- (^|/)shCore\.js$
|
83
86
|
- (^|/)shLegacy\.js$
|
84
87
|
|
88
|
+
# AngularJS
|
89
|
+
- (^|/)angular([^.]*)(\.min)?\.js$
|
90
|
+
|
85
91
|
## Python ##
|
86
92
|
|
87
93
|
# django
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: github-linguist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.10.
|
4
|
+
version: 2.10.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- GitHub
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: charlock_holmes
|
@@ -122,7 +122,9 @@ dependencies:
|
|
122
122
|
- - '>='
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
|
-
description:
|
125
|
+
description: We use this library at GitHub to detect blob languages, highlight code,
|
126
|
+
ignore binary files, suppress generated files in diffs, and generate language breakdown
|
127
|
+
graphs.
|
126
128
|
email:
|
127
129
|
executables:
|
128
130
|
- linguist
|
@@ -146,7 +148,8 @@ files:
|
|
146
148
|
- lib/linguist.rb
|
147
149
|
- bin/linguist
|
148
150
|
homepage: https://github.com/github/linguist
|
149
|
-
licenses:
|
151
|
+
licenses:
|
152
|
+
- MIT
|
150
153
|
metadata: {}
|
151
154
|
post_install_message:
|
152
155
|
rdoc_options: []
|