github-linguist 2.12.1 → 3.0.0b0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/linguist +3 -1
- data/lib/linguist/blob_helper.rb +1 -9
- data/lib/linguist/generated.rb +9 -1
- data/lib/linguist/language.rb +5 -9
- data/lib/linguist/languages.yml +43 -12
- data/lib/linguist/lazy_blob.rb +37 -0
- data/lib/linguist/repository.rb +97 -53
- data/lib/linguist/samples.json +54290 -51669
- data/lib/linguist/samples.rb +1 -1
- data/lib/linguist/vendor.yml +5 -0
- data/lib/linguist/version.rb +1 -1
- metadata +25 -25
- data/lib/linguist/languages.json +0 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5cd8c69614aa4a6bf20c79737e27aaac60ace18c
|
4
|
+
data.tar.gz: 1e3a64bf355a0b72821c88f09b75dcbeffd3a614
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d30b9264ca2e44ae46391e86df8fe4f4835ccbe9f4688f1d65d8fd110f0b689370392a17fffa78ea7a89034e2e9acd09612ea4b1611af1f6fae168c90080424e
|
7
|
+
data.tar.gz: 2ccfafb26afd642f7146b7c892a499aab136f7b12b8c703ac2231512a768e8da3e991c0d5fbf9fcd204a896f95379aef799cff18e470d4a4912f10c33ec53ae7
|
data/bin/linguist
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
|
6
6
|
require 'linguist/file_blob'
|
7
7
|
require 'linguist/repository'
|
8
|
+
require 'rugged'
|
8
9
|
|
9
10
|
path = ARGV[0] || Dir.pwd
|
10
11
|
|
@@ -18,7 +19,8 @@ ARGV.shift
|
|
18
19
|
breakdown = true if ARGV[0] == "--breakdown"
|
19
20
|
|
20
21
|
if File.directory?(path)
|
21
|
-
|
22
|
+
rugged = Rugged::Repository.new(path)
|
23
|
+
repo = Linguist::Repository.new(rugged, rugged.head.target_id)
|
22
24
|
repo.languages.sort_by { |_, size| size }.reverse.each do |language, size|
|
23
25
|
percentage = ((size / repo.size.to_f) * 100)
|
24
26
|
percentage = sprintf '%.2f' % percentage
|
data/lib/linguist/blob_helper.rb
CHANGED
@@ -313,15 +313,7 @@ module Linguist
|
|
313
313
|
#
|
314
314
|
# Returns a Language or nil if none is detected
|
315
315
|
def language
|
316
|
-
|
317
|
-
|
318
|
-
if defined?(@data) && @data.is_a?(String)
|
319
|
-
data = @data
|
320
|
-
else
|
321
|
-
data = lambda { (binary_mime_type? || binary?) ? "" : self.data }
|
322
|
-
end
|
323
|
-
|
324
|
-
@language = Language.detect(name.to_s, data, mode)
|
316
|
+
@language ||= Language.detect(self)
|
325
317
|
end
|
326
318
|
|
327
319
|
# Internal: Get the lexer of the blob.
|
data/lib/linguist/generated.rb
CHANGED
@@ -63,7 +63,8 @@ module Linguist
|
|
63
63
|
generated_jni_header? ||
|
64
64
|
composer_lock? ||
|
65
65
|
node_modules? ||
|
66
|
-
vcr_cassette?
|
66
|
+
vcr_cassette? ||
|
67
|
+
generated_by_zephir?
|
67
68
|
end
|
68
69
|
|
69
70
|
# Internal: Is the blob an XCode project file?
|
@@ -237,6 +238,13 @@ module Linguist
|
|
237
238
|
!!name.match(/composer.lock/)
|
238
239
|
end
|
239
240
|
|
241
|
+
# Internal: Is the blob a generated by Zephir
|
242
|
+
#
|
243
|
+
# Returns true or false.
|
244
|
+
def generated_by_zephir?
|
245
|
+
!!name.match(/.\.zep\.(?:c|h|php)$/)
|
246
|
+
end
|
247
|
+
|
240
248
|
# Is the blob a VCR Cassette file?
|
241
249
|
#
|
242
250
|
# Returns true or false
|
data/lib/linguist/language.rb
CHANGED
@@ -92,18 +92,14 @@ module Linguist
|
|
92
92
|
|
93
93
|
# Public: Detects the Language of the blob.
|
94
94
|
#
|
95
|
-
# name - String filename
|
96
|
-
# data - String blob data. A block also maybe passed in for lazy
|
97
|
-
# loading. This behavior is deprecated and you should always
|
98
|
-
# pass in a String.
|
99
|
-
# mode - Optional String mode (defaults to nil)
|
100
|
-
#
|
101
95
|
# Returns Language or nil.
|
102
|
-
def self.detect(
|
96
|
+
def self.detect(blob)
|
97
|
+
name = blob.name.to_s
|
98
|
+
|
103
99
|
# A bit of an elegant hack. If the file is executable but extensionless,
|
104
100
|
# append a "magic" extension so it can be classified with other
|
105
101
|
# languages that have shebang scripts.
|
106
|
-
if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05
|
102
|
+
if File.extname(name).empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
|
107
103
|
name += ".script!"
|
108
104
|
end
|
109
105
|
|
@@ -114,7 +110,7 @@ module Linguist
|
|
114
110
|
# extension at all, in the case of extensionless scripts), we need to continue
|
115
111
|
# our detection work
|
116
112
|
if possible_languages.length > 1
|
117
|
-
data =
|
113
|
+
data = blob.data
|
118
114
|
possible_language_names = possible_languages.map(&:name)
|
119
115
|
|
120
116
|
# Don't bother with emptiness
|
data/lib/linguist/languages.yml
CHANGED
@@ -157,6 +157,7 @@ Assembly:
|
|
157
157
|
- nasm
|
158
158
|
extensions:
|
159
159
|
- .asm
|
160
|
+
- .inc
|
160
161
|
|
161
162
|
Augeas:
|
162
163
|
type: programming
|
@@ -528,15 +529,6 @@ Dart:
|
|
528
529
|
extensions:
|
529
530
|
- .dart
|
530
531
|
|
531
|
-
DCPU-16 ASM:
|
532
|
-
type: programming
|
533
|
-
lexer: dasm16
|
534
|
-
extensions:
|
535
|
-
- .dasm16
|
536
|
-
- .dasm
|
537
|
-
aliases:
|
538
|
-
- dasm16
|
539
|
-
|
540
532
|
Diff:
|
541
533
|
extensions:
|
542
534
|
- .diff
|
@@ -940,7 +932,7 @@ Hy:
|
|
940
932
|
|
941
933
|
IDL:
|
942
934
|
type: programming
|
943
|
-
lexer:
|
935
|
+
lexer: IDL
|
944
936
|
color: "#e3592c"
|
945
937
|
extensions:
|
946
938
|
- .pro
|
@@ -959,7 +951,7 @@ Inno Setup:
|
|
959
951
|
|
960
952
|
Idris:
|
961
953
|
type: programming
|
962
|
-
lexer:
|
954
|
+
lexer: Idris
|
963
955
|
extensions:
|
964
956
|
- .idr
|
965
957
|
- .lidr
|
@@ -998,6 +990,13 @@ Ioke:
|
|
998
990
|
extensions:
|
999
991
|
- .ik
|
1000
992
|
|
993
|
+
Isabelle:
|
994
|
+
type: programming
|
995
|
+
lexer: Text only
|
996
|
+
color: "#fdcd00"
|
997
|
+
extensions:
|
998
|
+
- .thy
|
999
|
+
|
1001
1000
|
J:
|
1002
1001
|
type: programming
|
1003
1002
|
lexer: Text only
|
@@ -1087,6 +1086,8 @@ JavaScript:
|
|
1087
1086
|
- .pac
|
1088
1087
|
- .sjs
|
1089
1088
|
- .ssjs
|
1089
|
+
- .xsjs
|
1090
|
+
- .xsjslib
|
1090
1091
|
filenames:
|
1091
1092
|
- Jakefile
|
1092
1093
|
interpreters:
|
@@ -1285,6 +1286,8 @@ Mathematica:
|
|
1285
1286
|
type: programming
|
1286
1287
|
extensions:
|
1287
1288
|
- .mathematica
|
1289
|
+
- .m
|
1290
|
+
- .nb
|
1288
1291
|
lexer: Text only
|
1289
1292
|
|
1290
1293
|
Matlab:
|
@@ -1392,6 +1395,12 @@ Nimrod:
|
|
1392
1395
|
- .nim
|
1393
1396
|
- .nimrod
|
1394
1397
|
|
1398
|
+
Nix:
|
1399
|
+
type: programming
|
1400
|
+
lexer: Nix
|
1401
|
+
extensions:
|
1402
|
+
- .nix
|
1403
|
+
|
1395
1404
|
Nu:
|
1396
1405
|
type: programming
|
1397
1406
|
lexer: Scheme
|
@@ -1601,7 +1610,7 @@ Perl6:
|
|
1601
1610
|
Pike:
|
1602
1611
|
type: programming
|
1603
1612
|
color: "#066ab2"
|
1604
|
-
lexer:
|
1613
|
+
lexer: Pike
|
1605
1614
|
extensions:
|
1606
1615
|
- .pike
|
1607
1616
|
- .pmod
|
@@ -1698,6 +1707,7 @@ Python:
|
|
1698
1707
|
- .gyp
|
1699
1708
|
- .lmi
|
1700
1709
|
- .pyde
|
1710
|
+
- .pyp
|
1701
1711
|
- .pyt
|
1702
1712
|
- .pyw
|
1703
1713
|
- .wsgi
|
@@ -1723,6 +1733,12 @@ QML:
|
|
1723
1733
|
extensions:
|
1724
1734
|
- .qml
|
1725
1735
|
|
1736
|
+
QMake:
|
1737
|
+
lexer: Text only
|
1738
|
+
extensions:
|
1739
|
+
- .pro
|
1740
|
+
- .pri
|
1741
|
+
|
1726
1742
|
R:
|
1727
1743
|
type: programming
|
1728
1744
|
color: "#198ce7"
|
@@ -1862,12 +1878,15 @@ Ruby:
|
|
1862
1878
|
interpreters:
|
1863
1879
|
- ruby
|
1864
1880
|
filenames:
|
1881
|
+
- .pryrc
|
1865
1882
|
- Appraisals
|
1866
1883
|
- Berksfile
|
1867
1884
|
- Buildfile
|
1868
1885
|
- Gemfile
|
1869
1886
|
- Gemfile.lock
|
1870
1887
|
- Guardfile
|
1888
|
+
- Jarfile
|
1889
|
+
- Mavenfile
|
1871
1890
|
- Podfile
|
1872
1891
|
- Thorfile
|
1873
1892
|
- Vagrantfile
|
@@ -2288,6 +2307,7 @@ XML:
|
|
2288
2307
|
- .launch
|
2289
2308
|
- .mxml
|
2290
2309
|
- .nproj
|
2310
|
+
- .nuspec
|
2291
2311
|
- .osm
|
2292
2312
|
- .plist
|
2293
2313
|
- .pluginspec
|
@@ -2359,6 +2379,17 @@ XSLT:
|
|
2359
2379
|
- .xslt
|
2360
2380
|
- .xsl
|
2361
2381
|
|
2382
|
+
Xojo:
|
2383
|
+
type: programming
|
2384
|
+
lexer: VB.net
|
2385
|
+
extensions:
|
2386
|
+
- .xojo_code
|
2387
|
+
- .xojo_menu
|
2388
|
+
- .xojo_report
|
2389
|
+
- .xojo_script
|
2390
|
+
- .xojo_toolbar
|
2391
|
+
- .xojo_window
|
2392
|
+
|
2362
2393
|
Xtend:
|
2363
2394
|
type: programming
|
2364
2395
|
extensions:
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require 'linguist/blob_helper'
|
2
|
+
require 'rugged'
|
3
|
+
|
4
|
+
module Linguist
|
5
|
+
class LazyBlob
|
6
|
+
include BlobHelper
|
7
|
+
|
8
|
+
MAX_SIZE = 128 * 1024
|
9
|
+
|
10
|
+
attr_reader :repository
|
11
|
+
attr_reader :oid
|
12
|
+
attr_reader :name
|
13
|
+
attr_reader :mode
|
14
|
+
|
15
|
+
def initialize(repo, oid, name, mode = nil)
|
16
|
+
@repository = repo
|
17
|
+
@oid = oid
|
18
|
+
@name = name
|
19
|
+
@mode = mode
|
20
|
+
end
|
21
|
+
|
22
|
+
def data
|
23
|
+
load_blob!
|
24
|
+
@data
|
25
|
+
end
|
26
|
+
|
27
|
+
def size
|
28
|
+
load_blob!
|
29
|
+
@size
|
30
|
+
end
|
31
|
+
|
32
|
+
protected
|
33
|
+
def load_blob!
|
34
|
+
@data, @size = Rugged::Blob.to_buffer(repository, oid, MAX_SIZE) if @data.nil?
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
data/lib/linguist/repository.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
require 'linguist/
|
1
|
+
require 'linguist/lazy_blob'
|
2
|
+
require 'rugged'
|
2
3
|
|
3
4
|
module Linguist
|
4
5
|
# A Repository is an abstraction of a Grit::Repo or a basic file
|
@@ -7,100 +8,143 @@ module Linguist
|
|
7
8
|
# Its primary purpose is for gathering language statistics across
|
8
9
|
# the entire project.
|
9
10
|
class Repository
|
10
|
-
|
11
|
+
attr_reader :repository
|
12
|
+
|
13
|
+
# Public: Create a new Repository based on the stats of
|
14
|
+
# an existing one
|
15
|
+
def self.incremental(repo, commit_oid, old_commit_oid, old_stats)
|
16
|
+
repo = self.new(repo, commit_oid)
|
17
|
+
repo.load_existing_stats(old_commit_oid, old_stats)
|
18
|
+
repo
|
19
|
+
end
|
20
|
+
|
21
|
+
# Public: Initialize a new Repository to be analyzed for language
|
22
|
+
# data
|
11
23
|
#
|
12
|
-
#
|
24
|
+
# repo - a Rugged::Repository object
|
25
|
+
# commit_oid - the sha1 of the commit that will be analyzed;
|
26
|
+
# this is usually the master branch
|
13
27
|
#
|
14
28
|
# Returns a Repository
|
15
|
-
def
|
16
|
-
|
17
|
-
|
18
|
-
|
29
|
+
def initialize(repo, commit_oid)
|
30
|
+
@repository = repo
|
31
|
+
@commit_oid = commit_oid
|
32
|
+
|
33
|
+
raise TypeError, 'commit_oid must be a commit SHA1' unless commit_oid.is_a?(String)
|
19
34
|
end
|
20
35
|
|
21
|
-
# Public:
|
36
|
+
# Public: Load the results of a previous analysis on this repository
|
37
|
+
# to speed up the new scan.
|
22
38
|
#
|
23
|
-
#
|
24
|
-
#
|
39
|
+
# The new analysis will be performed incrementally as to only take
|
40
|
+
# into account the file changes since the last time the repository
|
41
|
+
# was scanned
|
25
42
|
#
|
26
|
-
#
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
@
|
43
|
+
# old_commit_oid - the sha1 of the commit that was previously analyzed
|
44
|
+
# old_stats - the result of the previous analysis, obtained by calling
|
45
|
+
# Repository#cache on the old repository
|
46
|
+
#
|
47
|
+
# Returns nothing
|
48
|
+
def load_existing_stats(old_commit_oid, old_stats)
|
49
|
+
@old_commit_oid = old_commit_oid
|
50
|
+
@old_stats = old_stats
|
51
|
+
nil
|
33
52
|
end
|
34
53
|
|
35
54
|
# Public: Returns a breakdown of language stats.
|
36
55
|
#
|
37
56
|
# Examples
|
38
57
|
#
|
39
|
-
# # => {
|
40
|
-
#
|
58
|
+
# # => { 'Ruby' => 46319,
|
59
|
+
# 'JavaScript' => 258 }
|
41
60
|
#
|
42
|
-
# Returns a Hash of
|
61
|
+
# Returns a Hash of language names and Integer size values.
|
43
62
|
def languages
|
44
|
-
|
45
|
-
|
63
|
+
@sizes ||= begin
|
64
|
+
sizes = Hash.new { 0 }
|
65
|
+
cache.each do |_, (language, size)|
|
66
|
+
sizes[language] += size
|
67
|
+
end
|
68
|
+
sizes
|
69
|
+
end
|
46
70
|
end
|
47
71
|
|
48
72
|
# Public: Get primary Language of repository.
|
49
73
|
#
|
50
|
-
# Returns a
|
74
|
+
# Returns a language name
|
51
75
|
def language
|
52
|
-
|
53
|
-
|
76
|
+
@language ||= begin
|
77
|
+
primary = languages.max_by { |(_, size)| size }
|
78
|
+
primary && primary[0]
|
79
|
+
end
|
54
80
|
end
|
55
81
|
|
56
82
|
# Public: Get the total size of the repository.
|
57
83
|
#
|
58
84
|
# Returns a byte size Integer
|
59
85
|
def size
|
60
|
-
|
61
|
-
@size
|
86
|
+
@size ||= languages.inject(0) { |s,(_,v)| s + v }
|
62
87
|
end
|
63
88
|
|
64
89
|
# Public: Return the language breakdown of this repository by file
|
90
|
+
#
|
91
|
+
# Returns a map of language names => [filenames...]
|
65
92
|
def breakdown_by_file
|
66
|
-
|
67
|
-
|
93
|
+
@file_breakdown ||= begin
|
94
|
+
breakdown = Hash.new { |h,k| h[k] = Array.new }
|
95
|
+
cache.each do |filename, (language, _)|
|
96
|
+
breakdown[language] << filename
|
97
|
+
end
|
98
|
+
breakdown
|
99
|
+
end
|
68
100
|
end
|
69
101
|
|
70
|
-
#
|
102
|
+
# Public: Return the cached results of the analysis
|
71
103
|
#
|
72
|
-
#
|
73
|
-
|
74
|
-
|
104
|
+
# This is a per-file breakdown that can be passed to other instances
|
105
|
+
# of Linguist::Repository to perform incremental scans
|
106
|
+
#
|
107
|
+
# Returns a map of filename => [language, size]
|
108
|
+
def cache
|
109
|
+
@cache ||= begin
|
110
|
+
if @old_commit_oid == @commit_oid
|
111
|
+
@old_stats
|
112
|
+
else
|
113
|
+
compute_stats(@old_commit_oid, @commit_oid, @old_stats)
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
75
117
|
|
76
|
-
|
77
|
-
|
78
|
-
|
118
|
+
protected
|
119
|
+
def compute_stats(old_commit_oid, commit_oid, cache = nil)
|
120
|
+
file_map = cache ? cache.dup : {}
|
121
|
+
old_tree = old_commit_oid && Rugged::Commit.lookup(repository, old_commit_oid).tree
|
122
|
+
new_tree = Rugged::Commit.lookup(repository, commit_oid).tree
|
79
123
|
|
80
|
-
|
81
|
-
next if blob.vendored? || blob.generated? || blob.language.nil?
|
124
|
+
diff = Rugged::Tree.diff(repository, old_tree, new_tree)
|
82
125
|
|
83
|
-
|
84
|
-
|
126
|
+
diff.each_delta do |delta|
|
127
|
+
old = delta.old_file[:path]
|
128
|
+
new = delta.new_file[:path]
|
85
129
|
|
86
|
-
|
87
|
-
|
130
|
+
file_map.delete(old)
|
131
|
+
next if delta.binary
|
88
132
|
|
89
|
-
|
90
|
-
|
91
|
-
|
133
|
+
if [:added, :modified].include? delta.status
|
134
|
+
mode = delta.new_file[:mode].to_s(8)
|
135
|
+
blob = Linguist::LazyBlob.new(repository, delta.new_file[:oid], new, mode)
|
92
136
|
|
93
|
-
|
94
|
-
|
137
|
+
# Skip vendored or generated blobs
|
138
|
+
next if blob.vendored? || blob.generated? || blob.language.nil?
|
95
139
|
|
96
|
-
|
97
|
-
|
98
|
-
|
140
|
+
# Only include programming languages and acceptable markup languages
|
141
|
+
if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
|
142
|
+
file_map[new] = [blob.language.group.name, blob.size]
|
143
|
+
end
|
144
|
+
end
|
99
145
|
end
|
100
146
|
|
101
|
-
|
102
|
-
|
103
|
-
nil
|
147
|
+
file_map
|
104
148
|
end
|
105
149
|
end
|
106
150
|
end
|