github-linguist 2.10.8 → 2.10.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/linguist +17 -1
- data/lib/linguist/classifier.rb +3 -3
- data/lib/linguist/heuristics.rb +11 -3
- data/lib/linguist/languages.yml +3 -0
- data/lib/linguist/repository.rb +11 -0
- data/lib/linguist/vendor.yml +6 -0
- metadata +7 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cbb0e1dda522368ab523932eb54c49cd133e80ba
|
4
|
+
data.tar.gz: e76b8b1147c9f5ea2ec415f7178032b84288db2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a7fe49b518c11ad84c9d1c83ab3288c15785189538b716e4cfa33634e6054b5111b1b2d5b3588f7630a2509619a086d910b1ff4b8f5e1e609af9770c46a0abbe
|
7
|
+
data.tar.gz: aa1d301167462211d21e40db3747abfd64510f7555c47d03bc56f499feac13476f9a01a13364545b445a2ec8aa64ae89792bec862c48ab276c03e4d31983d32a
|
data/bin/linguist
CHANGED
@@ -1,13 +1,22 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
# linguist — detect language type for a file, or, given a directory, determine language breakdown
|
4
|
-
# usage: linguist <path>
|
4
|
+
# usage: linguist <path> [<--breakdown>]
|
5
5
|
|
6
6
|
require 'linguist/file_blob'
|
7
7
|
require 'linguist/repository'
|
8
8
|
|
9
9
|
path = ARGV[0] || Dir.pwd
|
10
10
|
|
11
|
+
# special case if not given a directory but still given the --breakdown option
|
12
|
+
if path == "--breakdown"
|
13
|
+
path = Dir.pwd
|
14
|
+
breakdown = true
|
15
|
+
end
|
16
|
+
|
17
|
+
ARGV.shift
|
18
|
+
breakdown = true if ARGV[0] == "--breakdown"
|
19
|
+
|
11
20
|
if File.directory?(path)
|
12
21
|
repo = Linguist::Repository.from_directory(path)
|
13
22
|
repo.languages.sort_by { |_, size| size }.reverse.each do |language, size|
|
@@ -15,6 +24,13 @@ if File.directory?(path)
|
|
15
24
|
percentage = sprintf '%.2f' % percentage
|
16
25
|
puts "%-7s %s" % ["#{percentage}%", language]
|
17
26
|
end
|
27
|
+
if breakdown
|
28
|
+
puts
|
29
|
+
file_breakdown = repo.breakdown_by_file
|
30
|
+
file_breakdown.each do |lang, files|
|
31
|
+
puts "#{lang}: #{files}"
|
32
|
+
end
|
33
|
+
end
|
18
34
|
elsif File.file?(path)
|
19
35
|
blob = Linguist::FileBlob.new(path, Dir.pwd)
|
20
36
|
type = if blob.text?
|
data/lib/linguist/classifier.rb
CHANGED
@@ -83,8 +83,8 @@ module Linguist
|
|
83
83
|
debug_dump_all_tokens(tokens, languages) if verbosity >= 2
|
84
84
|
|
85
85
|
languages.each do |language|
|
86
|
-
debug_dump_probabilities(tokens, language) if verbosity >= 1
|
87
86
|
scores[language] = tokens_probability(tokens, language) + language_probability(language)
|
87
|
+
debug_dump_probabilities(tokens, language, scores[language]) if verbosity >= 1
|
88
88
|
end
|
89
89
|
|
90
90
|
scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [score[0], score[1]] }
|
@@ -130,9 +130,9 @@ module Linguist
|
|
130
130
|
@verbosity ||= (ENV['LINGUIST_DEBUG'] || 0).to_i
|
131
131
|
end
|
132
132
|
|
133
|
-
def debug_dump_probabilities(tokens, language)
|
133
|
+
def debug_dump_probabilities(tokens, language, score)
|
134
134
|
printf("%10s = %10.3f + %7.3f = %10.3f\n",
|
135
|
-
language, tokens_probability(tokens, language), language_probability(language),
|
135
|
+
language, tokens_probability(tokens, language), language_probability(language), score)
|
136
136
|
end
|
137
137
|
|
138
138
|
# Internal: show a table of probabilities for each <token,language> pair.
|
data/lib/linguist/heuristics.rb
CHANGED
@@ -14,7 +14,10 @@ module Linguist
|
|
14
14
|
def self.find_by_heuristics(data, languages)
|
15
15
|
if active?
|
16
16
|
if languages.all? { |l| ["Objective-C", "C++"].include?(l) }
|
17
|
-
|
17
|
+
disambiguate_c(data, languages)
|
18
|
+
end
|
19
|
+
if languages.all? { |l| ["Perl", "Prolog"].include?(l) }
|
20
|
+
disambiguate_pl(data, languages)
|
18
21
|
end
|
19
22
|
end
|
20
23
|
end
|
@@ -23,14 +26,19 @@ module Linguist
|
|
23
26
|
# We want to shortcut look for Objective-C _and_ now C++ too!
|
24
27
|
#
|
25
28
|
# Returns an array of Languages or []
|
26
|
-
|
27
|
-
def self.disambiguate_h(data, languages)
|
29
|
+
def self.disambiguate_c(data, languages)
|
28
30
|
matches = []
|
29
31
|
matches << Language["Objective-C"] if data.include?("@interface")
|
30
32
|
matches << Language["C++"] if data.include?("#include <cstdint>")
|
31
33
|
matches
|
32
34
|
end
|
33
35
|
|
36
|
+
def self_disambiguate_pl(data, languages)
|
37
|
+
matches = []
|
38
|
+
matches << Language["Prolog"] if data.include?(":-")
|
39
|
+
matches
|
40
|
+
end
|
41
|
+
|
34
42
|
def self.active?
|
35
43
|
!!ACTIVE
|
36
44
|
end
|
data/lib/linguist/languages.yml
CHANGED
data/lib/linguist/repository.rb
CHANGED
@@ -29,6 +29,7 @@ module Linguist
|
|
29
29
|
@computed_stats = false
|
30
30
|
@language = @size = nil
|
31
31
|
@sizes = Hash.new { 0 }
|
32
|
+
@file_breakdown = Hash.new { |h,k| h[k] = Array.new }
|
32
33
|
end
|
33
34
|
|
34
35
|
# Public: Returns a breakdown of language stats.
|
@@ -60,6 +61,12 @@ module Linguist
|
|
60
61
|
@size
|
61
62
|
end
|
62
63
|
|
64
|
+
# Public: Return the language breakdown of this repository by file
|
65
|
+
def breakdown_by_file
|
66
|
+
compute_stats
|
67
|
+
@file_breakdown
|
68
|
+
end
|
69
|
+
|
63
70
|
# Internal: Compute language breakdown for each blob in the Repository.
|
64
71
|
#
|
65
72
|
# Returns nothing
|
@@ -75,6 +82,10 @@ module Linguist
|
|
75
82
|
|
76
83
|
# Only include programming languages and acceptable markup languages
|
77
84
|
if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
|
85
|
+
|
86
|
+
# Build up the per-file breakdown stats
|
87
|
+
@file_breakdown[blob.language.group.name] << blob.name
|
88
|
+
|
78
89
|
@sizes[blob.language.group] += blob.size
|
79
90
|
end
|
80
91
|
end
|
data/lib/linguist/vendor.yml
CHANGED
@@ -27,6 +27,9 @@
|
|
27
27
|
# Node dependencies
|
28
28
|
- node_modules/
|
29
29
|
|
30
|
+
# Bower Components
|
31
|
+
- bower_components/
|
32
|
+
|
30
33
|
# Erlang bundles
|
31
34
|
- ^rebar$
|
32
35
|
|
@@ -82,6 +85,9 @@
|
|
82
85
|
- (^|/)shCore\.js$
|
83
86
|
- (^|/)shLegacy\.js$
|
84
87
|
|
88
|
+
# AngularJS
|
89
|
+
- (^|/)angular([^.]*)(\.min)?\.js$
|
90
|
+
|
85
91
|
## Python ##
|
86
92
|
|
87
93
|
# django
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: github-linguist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.10.
|
4
|
+
version: 2.10.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- GitHub
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-12-
|
11
|
+
date: 2013-12-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: charlock_holmes
|
@@ -122,7 +122,9 @@ dependencies:
|
|
122
122
|
- - '>='
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
|
-
description:
|
125
|
+
description: We use this library at GitHub to detect blob languages, highlight code,
|
126
|
+
ignore binary files, suppress generated files in diffs, and generate language breakdown
|
127
|
+
graphs.
|
126
128
|
email:
|
127
129
|
executables:
|
128
130
|
- linguist
|
@@ -146,7 +148,8 @@ files:
|
|
146
148
|
- lib/linguist.rb
|
147
149
|
- bin/linguist
|
148
150
|
homepage: https://github.com/github/linguist
|
149
|
-
licenses:
|
151
|
+
licenses:
|
152
|
+
- MIT
|
150
153
|
metadata: {}
|
151
154
|
post_install_message:
|
152
155
|
rdoc_options: []
|