github-linguist 2.12.1 → 3.0.0b0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 40a66993dd15837627385790ababecef29990748
4
- data.tar.gz: 8fbf44c3c01b92fd5da157e2c5bdd8ca5c6ad620
3
+ metadata.gz: 5cd8c69614aa4a6bf20c79737e27aaac60ace18c
4
+ data.tar.gz: 1e3a64bf355a0b72821c88f09b75dcbeffd3a614
5
5
  SHA512:
6
- metadata.gz: 52c1e96004e19c3f7b1588218c2c4e4f522926cf944f96bf2131c015e232bce0b54111435bd5cccd484fada0d13a6fc6e25e2247325c4fb66e59fd74fa6a71a1
7
- data.tar.gz: 817b2cb650828e548596dc8376f9d8cacfcf32977ae13a340bd7f7e0d635e8eb83b590edc3eff73d10ae96f4e2e1fab6b7de44970130612922264d13e0a17f43
6
+ metadata.gz: d30b9264ca2e44ae46391e86df8fe4f4835ccbe9f4688f1d65d8fd110f0b689370392a17fffa78ea7a89034e2e9acd09612ea4b1611af1f6fae168c90080424e
7
+ data.tar.gz: 2ccfafb26afd642f7146b7c892a499aab136f7b12b8c703ac2231512a768e8da3e991c0d5fbf9fcd204a896f95379aef799cff18e470d4a4912f10c33ec53ae7
data/bin/linguist CHANGED
@@ -5,6 +5,7 @@
5
5
 
6
6
  require 'linguist/file_blob'
7
7
  require 'linguist/repository'
8
+ require 'rugged'
8
9
 
9
10
  path = ARGV[0] || Dir.pwd
10
11
 
@@ -18,7 +19,8 @@ ARGV.shift
18
19
  breakdown = true if ARGV[0] == "--breakdown"
19
20
 
20
21
  if File.directory?(path)
21
- repo = Linguist::Repository.from_directory(path)
22
+ rugged = Rugged::Repository.new(path)
23
+ repo = Linguist::Repository.new(rugged, rugged.head.target_id)
22
24
  repo.languages.sort_by { |_, size| size }.reverse.each do |language, size|
23
25
  percentage = ((size / repo.size.to_f) * 100)
24
26
  percentage = sprintf '%.2f' % percentage
@@ -313,15 +313,7 @@ module Linguist
313
313
  #
314
314
  # Returns a Language or nil if none is detected
315
315
  def language
316
- return @language if defined? @language
317
-
318
- if defined?(@data) && @data.is_a?(String)
319
- data = @data
320
- else
321
- data = lambda { (binary_mime_type? || binary?) ? "" : self.data }
322
- end
323
-
324
- @language = Language.detect(name.to_s, data, mode)
316
+ @language ||= Language.detect(self)
325
317
  end
326
318
 
327
319
  # Internal: Get the lexer of the blob.
@@ -63,7 +63,8 @@ module Linguist
63
63
  generated_jni_header? ||
64
64
  composer_lock? ||
65
65
  node_modules? ||
66
- vcr_cassette?
66
+ vcr_cassette? ||
67
+ generated_by_zephir?
67
68
  end
68
69
 
69
70
  # Internal: Is the blob an XCode project file?
@@ -237,6 +238,13 @@ module Linguist
237
238
  !!name.match(/composer.lock/)
238
239
  end
239
240
 
241
+ # Internal: Is the blob a generated by Zephir
242
+ #
243
+ # Returns true or false.
244
+ def generated_by_zephir?
245
+ !!name.match(/.\.zep\.(?:c|h|php)$/)
246
+ end
247
+
240
248
  # Is the blob a VCR Cassette file?
241
249
  #
242
250
  # Returns true or false
@@ -92,18 +92,14 @@ module Linguist
92
92
 
93
93
  # Public: Detects the Language of the blob.
94
94
  #
95
- # name - String filename
96
- # data - String blob data. A block also maybe passed in for lazy
97
- # loading. This behavior is deprecated and you should always
98
- # pass in a String.
99
- # mode - Optional String mode (defaults to nil)
100
- #
101
95
  # Returns Language or nil.
102
- def self.detect(name, data, mode = nil)
96
+ def self.detect(blob)
97
+ name = blob.name.to_s
98
+
103
99
  # A bit of an elegant hack. If the file is executable but extensionless,
104
100
  # append a "magic" extension so it can be classified with other
105
101
  # languages that have shebang scripts.
106
- if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05
102
+ if File.extname(name).empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
107
103
  name += ".script!"
108
104
  end
109
105
 
@@ -114,7 +110,7 @@ module Linguist
114
110
  # extension at all, in the case of extensionless scripts), we need to continue
115
111
  # our detection work
116
112
  if possible_languages.length > 1
117
- data = data.call() if data.respond_to?(:call)
113
+ data = blob.data
118
114
  possible_language_names = possible_languages.map(&:name)
119
115
 
120
116
  # Don't bother with emptiness
@@ -157,6 +157,7 @@ Assembly:
157
157
  - nasm
158
158
  extensions:
159
159
  - .asm
160
+ - .inc
160
161
 
161
162
  Augeas:
162
163
  type: programming
@@ -528,15 +529,6 @@ Dart:
528
529
  extensions:
529
530
  - .dart
530
531
 
531
- DCPU-16 ASM:
532
- type: programming
533
- lexer: dasm16
534
- extensions:
535
- - .dasm16
536
- - .dasm
537
- aliases:
538
- - dasm16
539
-
540
532
  Diff:
541
533
  extensions:
542
534
  - .diff
@@ -940,7 +932,7 @@ Hy:
940
932
 
941
933
  IDL:
942
934
  type: programming
943
- lexer: Text only
935
+ lexer: IDL
944
936
  color: "#e3592c"
945
937
  extensions:
946
938
  - .pro
@@ -959,7 +951,7 @@ Inno Setup:
959
951
 
960
952
  Idris:
961
953
  type: programming
962
- lexer: Text only
954
+ lexer: Idris
963
955
  extensions:
964
956
  - .idr
965
957
  - .lidr
@@ -998,6 +990,13 @@ Ioke:
998
990
  extensions:
999
991
  - .ik
1000
992
 
993
+ Isabelle:
994
+ type: programming
995
+ lexer: Text only
996
+ color: "#fdcd00"
997
+ extensions:
998
+ - .thy
999
+
1001
1000
  J:
1002
1001
  type: programming
1003
1002
  lexer: Text only
@@ -1087,6 +1086,8 @@ JavaScript:
1087
1086
  - .pac
1088
1087
  - .sjs
1089
1088
  - .ssjs
1089
+ - .xsjs
1090
+ - .xsjslib
1090
1091
  filenames:
1091
1092
  - Jakefile
1092
1093
  interpreters:
@@ -1285,6 +1286,8 @@ Mathematica:
1285
1286
  type: programming
1286
1287
  extensions:
1287
1288
  - .mathematica
1289
+ - .m
1290
+ - .nb
1288
1291
  lexer: Text only
1289
1292
 
1290
1293
  Matlab:
@@ -1392,6 +1395,12 @@ Nimrod:
1392
1395
  - .nim
1393
1396
  - .nimrod
1394
1397
 
1398
+ Nix:
1399
+ type: programming
1400
+ lexer: Nix
1401
+ extensions:
1402
+ - .nix
1403
+
1395
1404
  Nu:
1396
1405
  type: programming
1397
1406
  lexer: Scheme
@@ -1601,7 +1610,7 @@ Perl6:
1601
1610
  Pike:
1602
1611
  type: programming
1603
1612
  color: "#066ab2"
1604
- lexer: C
1613
+ lexer: Pike
1605
1614
  extensions:
1606
1615
  - .pike
1607
1616
  - .pmod
@@ -1698,6 +1707,7 @@ Python:
1698
1707
  - .gyp
1699
1708
  - .lmi
1700
1709
  - .pyde
1710
+ - .pyp
1701
1711
  - .pyt
1702
1712
  - .pyw
1703
1713
  - .wsgi
@@ -1723,6 +1733,12 @@ QML:
1723
1733
  extensions:
1724
1734
  - .qml
1725
1735
 
1736
+ QMake:
1737
+ lexer: Text only
1738
+ extensions:
1739
+ - .pro
1740
+ - .pri
1741
+
1726
1742
  R:
1727
1743
  type: programming
1728
1744
  color: "#198ce7"
@@ -1862,12 +1878,15 @@ Ruby:
1862
1878
  interpreters:
1863
1879
  - ruby
1864
1880
  filenames:
1881
+ - .pryrc
1865
1882
  - Appraisals
1866
1883
  - Berksfile
1867
1884
  - Buildfile
1868
1885
  - Gemfile
1869
1886
  - Gemfile.lock
1870
1887
  - Guardfile
1888
+ - Jarfile
1889
+ - Mavenfile
1871
1890
  - Podfile
1872
1891
  - Thorfile
1873
1892
  - Vagrantfile
@@ -2288,6 +2307,7 @@ XML:
2288
2307
  - .launch
2289
2308
  - .mxml
2290
2309
  - .nproj
2310
+ - .nuspec
2291
2311
  - .osm
2292
2312
  - .plist
2293
2313
  - .pluginspec
@@ -2359,6 +2379,17 @@ XSLT:
2359
2379
  - .xslt
2360
2380
  - .xsl
2361
2381
 
2382
+ Xojo:
2383
+ type: programming
2384
+ lexer: VB.net
2385
+ extensions:
2386
+ - .xojo_code
2387
+ - .xojo_menu
2388
+ - .xojo_report
2389
+ - .xojo_script
2390
+ - .xojo_toolbar
2391
+ - .xojo_window
2392
+
2362
2393
  Xtend:
2363
2394
  type: programming
2364
2395
  extensions:
@@ -0,0 +1,37 @@
1
+ require 'linguist/blob_helper'
2
+ require 'rugged'
3
+
4
+ module Linguist
5
+ class LazyBlob
6
+ include BlobHelper
7
+
8
+ MAX_SIZE = 128 * 1024
9
+
10
+ attr_reader :repository
11
+ attr_reader :oid
12
+ attr_reader :name
13
+ attr_reader :mode
14
+
15
+ def initialize(repo, oid, name, mode = nil)
16
+ @repository = repo
17
+ @oid = oid
18
+ @name = name
19
+ @mode = mode
20
+ end
21
+
22
+ def data
23
+ load_blob!
24
+ @data
25
+ end
26
+
27
+ def size
28
+ load_blob!
29
+ @size
30
+ end
31
+
32
+ protected
33
+ def load_blob!
34
+ @data, @size = Rugged::Blob.to_buffer(repository, oid, MAX_SIZE) if @data.nil?
35
+ end
36
+ end
37
+ end
@@ -1,4 +1,5 @@
1
- require 'linguist/file_blob'
1
+ require 'linguist/lazy_blob'
2
+ require 'rugged'
2
3
 
3
4
  module Linguist
4
5
  # A Repository is an abstraction of a Grit::Repo or a basic file
@@ -7,100 +8,143 @@ module Linguist
7
8
  # Its primary purpose is for gathering language statistics across
8
9
  # the entire project.
9
10
  class Repository
10
- # Public: Initialize a new Repository from a File directory
11
+ attr_reader :repository
12
+
13
+ # Public: Create a new Repository based on the stats of
14
+ # an existing one
15
+ def self.incremental(repo, commit_oid, old_commit_oid, old_stats)
16
+ repo = self.new(repo, commit_oid)
17
+ repo.load_existing_stats(old_commit_oid, old_stats)
18
+ repo
19
+ end
20
+
21
+ # Public: Initialize a new Repository to be analyzed for language
22
+ # data
11
23
  #
12
- # base_path - A path String
24
+ # repo - a Rugged::Repository object
25
+ # commit_oid - the sha1 of the commit that will be analyzed;
26
+ # this is usually the master branch
13
27
  #
14
28
  # Returns a Repository
15
- def self.from_directory(base_path)
16
- new Dir["#{base_path}/**/*"].
17
- select { |f| File.file?(f) }.
18
- map { |path| FileBlob.new(path, base_path) }
29
+ def initialize(repo, commit_oid)
30
+ @repository = repo
31
+ @commit_oid = commit_oid
32
+
33
+ raise TypeError, 'commit_oid must be a commit SHA1' unless commit_oid.is_a?(String)
19
34
  end
20
35
 
21
- # Public: Initialize a new Repository
36
+ # Public: Load the results of a previous analysis on this repository
37
+ # to speed up the new scan.
22
38
  #
23
- # enum - Enumerator that responds to `each` and
24
- # yields Blob objects
39
+ # The new analysis will be performed incrementally as to only take
40
+ # into account the file changes since the last time the repository
41
+ # was scanned
25
42
  #
26
- # Returns a Repository
27
- def initialize(enum)
28
- @enum = enum
29
- @computed_stats = false
30
- @language = @size = nil
31
- @sizes = Hash.new { 0 }
32
- @file_breakdown = Hash.new { |h,k| h[k] = Array.new }
43
+ # old_commit_oid - the sha1 of the commit that was previously analyzed
44
+ # old_stats - the result of the previous analysis, obtained by calling
45
+ # Repository#cache on the old repository
46
+ #
47
+ # Returns nothing
48
+ def load_existing_stats(old_commit_oid, old_stats)
49
+ @old_commit_oid = old_commit_oid
50
+ @old_stats = old_stats
51
+ nil
33
52
  end
34
53
 
35
54
  # Public: Returns a breakdown of language stats.
36
55
  #
37
56
  # Examples
38
57
  #
39
- # # => { Language['Ruby'] => 46319,
40
- # Language['JavaScript'] => 258 }
58
+ # # => { 'Ruby' => 46319,
59
+ # 'JavaScript' => 258 }
41
60
  #
42
- # Returns a Hash of Language keys and Integer size values.
61
+ # Returns a Hash of language names and Integer size values.
43
62
  def languages
44
- compute_stats
45
- @sizes
63
+ @sizes ||= begin
64
+ sizes = Hash.new { 0 }
65
+ cache.each do |_, (language, size)|
66
+ sizes[language] += size
67
+ end
68
+ sizes
69
+ end
46
70
  end
47
71
 
48
72
  # Public: Get primary Language of repository.
49
73
  #
50
- # Returns a Language
74
+ # Returns a language name
51
75
  def language
52
- compute_stats
53
- @language
76
+ @language ||= begin
77
+ primary = languages.max_by { |(_, size)| size }
78
+ primary && primary[0]
79
+ end
54
80
  end
55
81
 
56
82
  # Public: Get the total size of the repository.
57
83
  #
58
84
  # Returns a byte size Integer
59
85
  def size
60
- compute_stats
61
- @size
86
+ @size ||= languages.inject(0) { |s,(_,v)| s + v }
62
87
  end
63
88
 
64
89
  # Public: Return the language breakdown of this repository by file
90
+ #
91
+ # Returns a map of language names => [filenames...]
65
92
  def breakdown_by_file
66
- compute_stats
67
- @file_breakdown
93
+ @file_breakdown ||= begin
94
+ breakdown = Hash.new { |h,k| h[k] = Array.new }
95
+ cache.each do |filename, (language, _)|
96
+ breakdown[language] << filename
97
+ end
98
+ breakdown
99
+ end
68
100
  end
69
101
 
70
- # Internal: Compute language breakdown for each blob in the Repository.
102
+ # Public: Return the cached results of the analysis
71
103
  #
72
- # Returns nothing
73
- def compute_stats
74
- return if @computed_stats
104
+ # This is a per-file breakdown that can be passed to other instances
105
+ # of Linguist::Repository to perform incremental scans
106
+ #
107
+ # Returns a map of filename => [language, size]
108
+ def cache
109
+ @cache ||= begin
110
+ if @old_commit_oid == @commit_oid
111
+ @old_stats
112
+ else
113
+ compute_stats(@old_commit_oid, @commit_oid, @old_stats)
114
+ end
115
+ end
116
+ end
75
117
 
76
- @enum.each do |blob|
77
- # Skip files that are likely binary
78
- next if blob.likely_binary?
118
+ protected
119
+ def compute_stats(old_commit_oid, commit_oid, cache = nil)
120
+ file_map = cache ? cache.dup : {}
121
+ old_tree = old_commit_oid && Rugged::Commit.lookup(repository, old_commit_oid).tree
122
+ new_tree = Rugged::Commit.lookup(repository, commit_oid).tree
79
123
 
80
- # Skip vendored or generated blobs
81
- next if blob.vendored? || blob.generated? || blob.language.nil?
124
+ diff = Rugged::Tree.diff(repository, old_tree, new_tree)
82
125
 
83
- # Only include programming languages and acceptable markup languages
84
- if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
126
+ diff.each_delta do |delta|
127
+ old = delta.old_file[:path]
128
+ new = delta.new_file[:path]
85
129
 
86
- # Build up the per-file breakdown stats
87
- @file_breakdown[blob.language.group.name] << blob.name
130
+ file_map.delete(old)
131
+ next if delta.binary
88
132
 
89
- @sizes[blob.language.group] += blob.size
90
- end
91
- end
133
+ if [:added, :modified].include? delta.status
134
+ mode = delta.new_file[:mode].to_s(8)
135
+ blob = Linguist::LazyBlob.new(repository, delta.new_file[:oid], new, mode)
92
136
 
93
- # Compute total size
94
- @size = @sizes.inject(0) { |s,(_,v)| s + v }
137
+ # Skip vendored or generated blobs
138
+ next if blob.vendored? || blob.generated? || blob.language.nil?
95
139
 
96
- # Get primary language
97
- if primary = @sizes.max_by { |(_, size)| size }
98
- @language = primary[0]
140
+ # Only include programming languages and acceptable markup languages
141
+ if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
142
+ file_map[new] = [blob.language.group.name, blob.size]
143
+ end
144
+ end
99
145
  end
100
146
 
101
- @computed_stats = true
102
-
103
- nil
147
+ file_map
104
148
  end
105
149
  end
106
150
  end