github-linguist 2.12.1 → 3.0.0b0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/linguist +3 -1
- data/lib/linguist/blob_helper.rb +1 -9
- data/lib/linguist/generated.rb +9 -1
- data/lib/linguist/language.rb +5 -9
- data/lib/linguist/languages.yml +43 -12
- data/lib/linguist/lazy_blob.rb +37 -0
- data/lib/linguist/repository.rb +97 -53
- data/lib/linguist/samples.json +54290 -51669
- data/lib/linguist/samples.rb +1 -1
- data/lib/linguist/vendor.yml +5 -0
- data/lib/linguist/version.rb +1 -1
- metadata +25 -25
- data/lib/linguist/languages.json +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5cd8c69614aa4a6bf20c79737e27aaac60ace18c
|
4
|
+
data.tar.gz: 1e3a64bf355a0b72821c88f09b75dcbeffd3a614
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d30b9264ca2e44ae46391e86df8fe4f4835ccbe9f4688f1d65d8fd110f0b689370392a17fffa78ea7a89034e2e9acd09612ea4b1611af1f6fae168c90080424e
|
7
|
+
data.tar.gz: 2ccfafb26afd642f7146b7c892a499aab136f7b12b8c703ac2231512a768e8da3e991c0d5fbf9fcd204a896f95379aef799cff18e470d4a4912f10c33ec53ae7
|
data/bin/linguist
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
|
6
6
|
require 'linguist/file_blob'
|
7
7
|
require 'linguist/repository'
|
8
|
+
require 'rugged'
|
8
9
|
|
9
10
|
path = ARGV[0] || Dir.pwd
|
10
11
|
|
@@ -18,7 +19,8 @@ ARGV.shift
|
|
18
19
|
breakdown = true if ARGV[0] == "--breakdown"
|
19
20
|
|
20
21
|
if File.directory?(path)
|
21
|
-
|
22
|
+
rugged = Rugged::Repository.new(path)
|
23
|
+
repo = Linguist::Repository.new(rugged, rugged.head.target_id)
|
22
24
|
repo.languages.sort_by { |_, size| size }.reverse.each do |language, size|
|
23
25
|
percentage = ((size / repo.size.to_f) * 100)
|
24
26
|
percentage = sprintf '%.2f' % percentage
|
data/lib/linguist/blob_helper.rb
CHANGED
@@ -313,15 +313,7 @@ module Linguist
|
|
313
313
|
#
|
314
314
|
# Returns a Language or nil if none is detected
|
315
315
|
def language
|
316
|
-
|
317
|
-
|
318
|
-
if defined?(@data) && @data.is_a?(String)
|
319
|
-
data = @data
|
320
|
-
else
|
321
|
-
data = lambda { (binary_mime_type? || binary?) ? "" : self.data }
|
322
|
-
end
|
323
|
-
|
324
|
-
@language = Language.detect(name.to_s, data, mode)
|
316
|
+
@language ||= Language.detect(self)
|
325
317
|
end
|
326
318
|
|
327
319
|
# Internal: Get the lexer of the blob.
|
data/lib/linguist/generated.rb
CHANGED
@@ -63,7 +63,8 @@ module Linguist
|
|
63
63
|
generated_jni_header? ||
|
64
64
|
composer_lock? ||
|
65
65
|
node_modules? ||
|
66
|
-
vcr_cassette?
|
66
|
+
vcr_cassette? ||
|
67
|
+
generated_by_zephir?
|
67
68
|
end
|
68
69
|
|
69
70
|
# Internal: Is the blob an XCode project file?
|
@@ -237,6 +238,13 @@ module Linguist
|
|
237
238
|
!!name.match(/composer.lock/)
|
238
239
|
end
|
239
240
|
|
241
|
+
# Internal: Is the blob a generated by Zephir
|
242
|
+
#
|
243
|
+
# Returns true or false.
|
244
|
+
def generated_by_zephir?
|
245
|
+
!!name.match(/.\.zep\.(?:c|h|php)$/)
|
246
|
+
end
|
247
|
+
|
240
248
|
# Is the blob a VCR Cassette file?
|
241
249
|
#
|
242
250
|
# Returns true or false
|
data/lib/linguist/language.rb
CHANGED
@@ -92,18 +92,14 @@ module Linguist
|
|
92
92
|
|
93
93
|
# Public: Detects the Language of the blob.
|
94
94
|
#
|
95
|
-
# name - String filename
|
96
|
-
# data - String blob data. A block also maybe passed in for lazy
|
97
|
-
# loading. This behavior is deprecated and you should always
|
98
|
-
# pass in a String.
|
99
|
-
# mode - Optional String mode (defaults to nil)
|
100
|
-
#
|
101
95
|
# Returns Language or nil.
|
102
|
-
def self.detect(
|
96
|
+
def self.detect(blob)
|
97
|
+
name = blob.name.to_s
|
98
|
+
|
103
99
|
# A bit of an elegant hack. If the file is executable but extensionless,
|
104
100
|
# append a "magic" extension so it can be classified with other
|
105
101
|
# languages that have shebang scripts.
|
106
|
-
if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05
|
102
|
+
if File.extname(name).empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
|
107
103
|
name += ".script!"
|
108
104
|
end
|
109
105
|
|
@@ -114,7 +110,7 @@ module Linguist
|
|
114
110
|
# extension at all, in the case of extensionless scripts), we need to continue
|
115
111
|
# our detection work
|
116
112
|
if possible_languages.length > 1
|
117
|
-
data =
|
113
|
+
data = blob.data
|
118
114
|
possible_language_names = possible_languages.map(&:name)
|
119
115
|
|
120
116
|
# Don't bother with emptiness
|
data/lib/linguist/languages.yml
CHANGED
@@ -157,6 +157,7 @@ Assembly:
|
|
157
157
|
- nasm
|
158
158
|
extensions:
|
159
159
|
- .asm
|
160
|
+
- .inc
|
160
161
|
|
161
162
|
Augeas:
|
162
163
|
type: programming
|
@@ -528,15 +529,6 @@ Dart:
|
|
528
529
|
extensions:
|
529
530
|
- .dart
|
530
531
|
|
531
|
-
DCPU-16 ASM:
|
532
|
-
type: programming
|
533
|
-
lexer: dasm16
|
534
|
-
extensions:
|
535
|
-
- .dasm16
|
536
|
-
- .dasm
|
537
|
-
aliases:
|
538
|
-
- dasm16
|
539
|
-
|
540
532
|
Diff:
|
541
533
|
extensions:
|
542
534
|
- .diff
|
@@ -940,7 +932,7 @@ Hy:
|
|
940
932
|
|
941
933
|
IDL:
|
942
934
|
type: programming
|
943
|
-
lexer:
|
935
|
+
lexer: IDL
|
944
936
|
color: "#e3592c"
|
945
937
|
extensions:
|
946
938
|
- .pro
|
@@ -959,7 +951,7 @@ Inno Setup:
|
|
959
951
|
|
960
952
|
Idris:
|
961
953
|
type: programming
|
962
|
-
lexer:
|
954
|
+
lexer: Idris
|
963
955
|
extensions:
|
964
956
|
- .idr
|
965
957
|
- .lidr
|
@@ -998,6 +990,13 @@ Ioke:
|
|
998
990
|
extensions:
|
999
991
|
- .ik
|
1000
992
|
|
993
|
+
Isabelle:
|
994
|
+
type: programming
|
995
|
+
lexer: Text only
|
996
|
+
color: "#fdcd00"
|
997
|
+
extensions:
|
998
|
+
- .thy
|
999
|
+
|
1001
1000
|
J:
|
1002
1001
|
type: programming
|
1003
1002
|
lexer: Text only
|
@@ -1087,6 +1086,8 @@ JavaScript:
|
|
1087
1086
|
- .pac
|
1088
1087
|
- .sjs
|
1089
1088
|
- .ssjs
|
1089
|
+
- .xsjs
|
1090
|
+
- .xsjslib
|
1090
1091
|
filenames:
|
1091
1092
|
- Jakefile
|
1092
1093
|
interpreters:
|
@@ -1285,6 +1286,8 @@ Mathematica:
|
|
1285
1286
|
type: programming
|
1286
1287
|
extensions:
|
1287
1288
|
- .mathematica
|
1289
|
+
- .m
|
1290
|
+
- .nb
|
1288
1291
|
lexer: Text only
|
1289
1292
|
|
1290
1293
|
Matlab:
|
@@ -1392,6 +1395,12 @@ Nimrod:
|
|
1392
1395
|
- .nim
|
1393
1396
|
- .nimrod
|
1394
1397
|
|
1398
|
+
Nix:
|
1399
|
+
type: programming
|
1400
|
+
lexer: Nix
|
1401
|
+
extensions:
|
1402
|
+
- .nix
|
1403
|
+
|
1395
1404
|
Nu:
|
1396
1405
|
type: programming
|
1397
1406
|
lexer: Scheme
|
@@ -1601,7 +1610,7 @@ Perl6:
|
|
1601
1610
|
Pike:
|
1602
1611
|
type: programming
|
1603
1612
|
color: "#066ab2"
|
1604
|
-
lexer:
|
1613
|
+
lexer: Pike
|
1605
1614
|
extensions:
|
1606
1615
|
- .pike
|
1607
1616
|
- .pmod
|
@@ -1698,6 +1707,7 @@ Python:
|
|
1698
1707
|
- .gyp
|
1699
1708
|
- .lmi
|
1700
1709
|
- .pyde
|
1710
|
+
- .pyp
|
1701
1711
|
- .pyt
|
1702
1712
|
- .pyw
|
1703
1713
|
- .wsgi
|
@@ -1723,6 +1733,12 @@ QML:
|
|
1723
1733
|
extensions:
|
1724
1734
|
- .qml
|
1725
1735
|
|
1736
|
+
QMake:
|
1737
|
+
lexer: Text only
|
1738
|
+
extensions:
|
1739
|
+
- .pro
|
1740
|
+
- .pri
|
1741
|
+
|
1726
1742
|
R:
|
1727
1743
|
type: programming
|
1728
1744
|
color: "#198ce7"
|
@@ -1862,12 +1878,15 @@ Ruby:
|
|
1862
1878
|
interpreters:
|
1863
1879
|
- ruby
|
1864
1880
|
filenames:
|
1881
|
+
- .pryrc
|
1865
1882
|
- Appraisals
|
1866
1883
|
- Berksfile
|
1867
1884
|
- Buildfile
|
1868
1885
|
- Gemfile
|
1869
1886
|
- Gemfile.lock
|
1870
1887
|
- Guardfile
|
1888
|
+
- Jarfile
|
1889
|
+
- Mavenfile
|
1871
1890
|
- Podfile
|
1872
1891
|
- Thorfile
|
1873
1892
|
- Vagrantfile
|
@@ -2288,6 +2307,7 @@ XML:
|
|
2288
2307
|
- .launch
|
2289
2308
|
- .mxml
|
2290
2309
|
- .nproj
|
2310
|
+
- .nuspec
|
2291
2311
|
- .osm
|
2292
2312
|
- .plist
|
2293
2313
|
- .pluginspec
|
@@ -2359,6 +2379,17 @@ XSLT:
|
|
2359
2379
|
- .xslt
|
2360
2380
|
- .xsl
|
2361
2381
|
|
2382
|
+
Xojo:
|
2383
|
+
type: programming
|
2384
|
+
lexer: VB.net
|
2385
|
+
extensions:
|
2386
|
+
- .xojo_code
|
2387
|
+
- .xojo_menu
|
2388
|
+
- .xojo_report
|
2389
|
+
- .xojo_script
|
2390
|
+
- .xojo_toolbar
|
2391
|
+
- .xojo_window
|
2392
|
+
|
2362
2393
|
Xtend:
|
2363
2394
|
type: programming
|
2364
2395
|
extensions:
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'linguist/blob_helper'
|
2
|
+
require 'rugged'
|
3
|
+
|
4
|
+
module Linguist
|
5
|
+
class LazyBlob
|
6
|
+
include BlobHelper
|
7
|
+
|
8
|
+
MAX_SIZE = 128 * 1024
|
9
|
+
|
10
|
+
attr_reader :repository
|
11
|
+
attr_reader :oid
|
12
|
+
attr_reader :name
|
13
|
+
attr_reader :mode
|
14
|
+
|
15
|
+
def initialize(repo, oid, name, mode = nil)
|
16
|
+
@repository = repo
|
17
|
+
@oid = oid
|
18
|
+
@name = name
|
19
|
+
@mode = mode
|
20
|
+
end
|
21
|
+
|
22
|
+
def data
|
23
|
+
load_blob!
|
24
|
+
@data
|
25
|
+
end
|
26
|
+
|
27
|
+
def size
|
28
|
+
load_blob!
|
29
|
+
@size
|
30
|
+
end
|
31
|
+
|
32
|
+
protected
|
33
|
+
def load_blob!
|
34
|
+
@data, @size = Rugged::Blob.to_buffer(repository, oid, MAX_SIZE) if @data.nil?
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/linguist/repository.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
require 'linguist/
|
1
|
+
require 'linguist/lazy_blob'
|
2
|
+
require 'rugged'
|
2
3
|
|
3
4
|
module Linguist
|
4
5
|
# A Repository is an abstraction of a Grit::Repo or a basic file
|
@@ -7,100 +8,143 @@ module Linguist
|
|
7
8
|
# Its primary purpose is for gathering language statistics across
|
8
9
|
# the entire project.
|
9
10
|
class Repository
|
10
|
-
|
11
|
+
attr_reader :repository
|
12
|
+
|
13
|
+
# Public: Create a new Repository based on the stats of
|
14
|
+
# an existing one
|
15
|
+
def self.incremental(repo, commit_oid, old_commit_oid, old_stats)
|
16
|
+
repo = self.new(repo, commit_oid)
|
17
|
+
repo.load_existing_stats(old_commit_oid, old_stats)
|
18
|
+
repo
|
19
|
+
end
|
20
|
+
|
21
|
+
# Public: Initialize a new Repository to be analyzed for language
|
22
|
+
# data
|
11
23
|
#
|
12
|
-
#
|
24
|
+
# repo - a Rugged::Repository object
|
25
|
+
# commit_oid - the sha1 of the commit that will be analyzed;
|
26
|
+
# this is usually the master branch
|
13
27
|
#
|
14
28
|
# Returns a Repository
|
15
|
-
def
|
16
|
-
|
17
|
-
|
18
|
-
|
29
|
+
def initialize(repo, commit_oid)
|
30
|
+
@repository = repo
|
31
|
+
@commit_oid = commit_oid
|
32
|
+
|
33
|
+
raise TypeError, 'commit_oid must be a commit SHA1' unless commit_oid.is_a?(String)
|
19
34
|
end
|
20
35
|
|
21
|
-
# Public:
|
36
|
+
# Public: Load the results of a previous analysis on this repository
|
37
|
+
# to speed up the new scan.
|
22
38
|
#
|
23
|
-
#
|
24
|
-
#
|
39
|
+
# The new analysis will be performed incrementally as to only take
|
40
|
+
# into account the file changes since the last time the repository
|
41
|
+
# was scanned
|
25
42
|
#
|
26
|
-
#
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
@
|
43
|
+
# old_commit_oid - the sha1 of the commit that was previously analyzed
|
44
|
+
# old_stats - the result of the previous analysis, obtained by calling
|
45
|
+
# Repository#cache on the old repository
|
46
|
+
#
|
47
|
+
# Returns nothing
|
48
|
+
def load_existing_stats(old_commit_oid, old_stats)
|
49
|
+
@old_commit_oid = old_commit_oid
|
50
|
+
@old_stats = old_stats
|
51
|
+
nil
|
33
52
|
end
|
34
53
|
|
35
54
|
# Public: Returns a breakdown of language stats.
|
36
55
|
#
|
37
56
|
# Examples
|
38
57
|
#
|
39
|
-
# # => {
|
40
|
-
#
|
58
|
+
# # => { 'Ruby' => 46319,
|
59
|
+
# 'JavaScript' => 258 }
|
41
60
|
#
|
42
|
-
# Returns a Hash of
|
61
|
+
# Returns a Hash of language names and Integer size values.
|
43
62
|
def languages
|
44
|
-
|
45
|
-
|
63
|
+
@sizes ||= begin
|
64
|
+
sizes = Hash.new { 0 }
|
65
|
+
cache.each do |_, (language, size)|
|
66
|
+
sizes[language] += size
|
67
|
+
end
|
68
|
+
sizes
|
69
|
+
end
|
46
70
|
end
|
47
71
|
|
48
72
|
# Public: Get primary Language of repository.
|
49
73
|
#
|
50
|
-
# Returns a
|
74
|
+
# Returns a language name
|
51
75
|
def language
|
52
|
-
|
53
|
-
|
76
|
+
@language ||= begin
|
77
|
+
primary = languages.max_by { |(_, size)| size }
|
78
|
+
primary && primary[0]
|
79
|
+
end
|
54
80
|
end
|
55
81
|
|
56
82
|
# Public: Get the total size of the repository.
|
57
83
|
#
|
58
84
|
# Returns a byte size Integer
|
59
85
|
def size
|
60
|
-
|
61
|
-
@size
|
86
|
+
@size ||= languages.inject(0) { |s,(_,v)| s + v }
|
62
87
|
end
|
63
88
|
|
64
89
|
# Public: Return the language breakdown of this repository by file
|
90
|
+
#
|
91
|
+
# Returns a map of language names => [filenames...]
|
65
92
|
def breakdown_by_file
|
66
|
-
|
67
|
-
|
93
|
+
@file_breakdown ||= begin
|
94
|
+
breakdown = Hash.new { |h,k| h[k] = Array.new }
|
95
|
+
cache.each do |filename, (language, _)|
|
96
|
+
breakdown[language] << filename
|
97
|
+
end
|
98
|
+
breakdown
|
99
|
+
end
|
68
100
|
end
|
69
101
|
|
70
|
-
#
|
102
|
+
# Public: Return the cached results of the analysis
|
71
103
|
#
|
72
|
-
#
|
73
|
-
|
74
|
-
|
104
|
+
# This is a per-file breakdown that can be passed to other instances
|
105
|
+
# of Linguist::Repository to perform incremental scans
|
106
|
+
#
|
107
|
+
# Returns a map of filename => [language, size]
|
108
|
+
def cache
|
109
|
+
@cache ||= begin
|
110
|
+
if @old_commit_oid == @commit_oid
|
111
|
+
@old_stats
|
112
|
+
else
|
113
|
+
compute_stats(@old_commit_oid, @commit_oid, @old_stats)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
75
117
|
|
76
|
-
|
77
|
-
|
78
|
-
|
118
|
+
protected
|
119
|
+
def compute_stats(old_commit_oid, commit_oid, cache = nil)
|
120
|
+
file_map = cache ? cache.dup : {}
|
121
|
+
old_tree = old_commit_oid && Rugged::Commit.lookup(repository, old_commit_oid).tree
|
122
|
+
new_tree = Rugged::Commit.lookup(repository, commit_oid).tree
|
79
123
|
|
80
|
-
|
81
|
-
next if blob.vendored? || blob.generated? || blob.language.nil?
|
124
|
+
diff = Rugged::Tree.diff(repository, old_tree, new_tree)
|
82
125
|
|
83
|
-
|
84
|
-
|
126
|
+
diff.each_delta do |delta|
|
127
|
+
old = delta.old_file[:path]
|
128
|
+
new = delta.new_file[:path]
|
85
129
|
|
86
|
-
|
87
|
-
|
130
|
+
file_map.delete(old)
|
131
|
+
next if delta.binary
|
88
132
|
|
89
|
-
|
90
|
-
|
91
|
-
|
133
|
+
if [:added, :modified].include? delta.status
|
134
|
+
mode = delta.new_file[:mode].to_s(8)
|
135
|
+
blob = Linguist::LazyBlob.new(repository, delta.new_file[:oid], new, mode)
|
92
136
|
|
93
|
-
|
94
|
-
|
137
|
+
# Skip vendored or generated blobs
|
138
|
+
next if blob.vendored? || blob.generated? || blob.language.nil?
|
95
139
|
|
96
|
-
|
97
|
-
|
98
|
-
|
140
|
+
# Only include programming languages and acceptable markup languages
|
141
|
+
if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
|
142
|
+
file_map[new] = [blob.language.group.name, blob.size]
|
143
|
+
end
|
144
|
+
end
|
99
145
|
end
|
100
146
|
|
101
|
-
|
102
|
-
|
103
|
-
nil
|
147
|
+
file_map
|
104
148
|
end
|
105
149
|
end
|
106
150
|
end
|