github-linguist 2.12.1 → 3.0.0b0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 40a66993dd15837627385790ababecef29990748
4
- data.tar.gz: 8fbf44c3c01b92fd5da157e2c5bdd8ca5c6ad620
3
+ metadata.gz: 5cd8c69614aa4a6bf20c79737e27aaac60ace18c
4
+ data.tar.gz: 1e3a64bf355a0b72821c88f09b75dcbeffd3a614
5
5
  SHA512:
6
- metadata.gz: 52c1e96004e19c3f7b1588218c2c4e4f522926cf944f96bf2131c015e232bce0b54111435bd5cccd484fada0d13a6fc6e25e2247325c4fb66e59fd74fa6a71a1
7
- data.tar.gz: 817b2cb650828e548596dc8376f9d8cacfcf32977ae13a340bd7f7e0d635e8eb83b590edc3eff73d10ae96f4e2e1fab6b7de44970130612922264d13e0a17f43
6
+ metadata.gz: d30b9264ca2e44ae46391e86df8fe4f4835ccbe9f4688f1d65d8fd110f0b689370392a17fffa78ea7a89034e2e9acd09612ea4b1611af1f6fae168c90080424e
7
+ data.tar.gz: 2ccfafb26afd642f7146b7c892a499aab136f7b12b8c703ac2231512a768e8da3e991c0d5fbf9fcd204a896f95379aef799cff18e470d4a4912f10c33ec53ae7
data/bin/linguist CHANGED
@@ -5,6 +5,7 @@
5
5
 
6
6
  require 'linguist/file_blob'
7
7
  require 'linguist/repository'
8
+ require 'rugged'
8
9
 
9
10
  path = ARGV[0] || Dir.pwd
10
11
 
@@ -18,7 +19,8 @@ ARGV.shift
18
19
  breakdown = true if ARGV[0] == "--breakdown"
19
20
 
20
21
  if File.directory?(path)
21
- repo = Linguist::Repository.from_directory(path)
22
+ rugged = Rugged::Repository.new(path)
23
+ repo = Linguist::Repository.new(rugged, rugged.head.target_id)
22
24
  repo.languages.sort_by { |_, size| size }.reverse.each do |language, size|
23
25
  percentage = ((size / repo.size.to_f) * 100)
24
26
  percentage = sprintf '%.2f' % percentage
@@ -313,15 +313,7 @@ module Linguist
313
313
  #
314
314
  # Returns a Language or nil if none is detected
315
315
  def language
316
- return @language if defined? @language
317
-
318
- if defined?(@data) && @data.is_a?(String)
319
- data = @data
320
- else
321
- data = lambda { (binary_mime_type? || binary?) ? "" : self.data }
322
- end
323
-
324
- @language = Language.detect(name.to_s, data, mode)
316
+ @language ||= Language.detect(self)
325
317
  end
326
318
 
327
319
  # Internal: Get the lexer of the blob.
@@ -63,7 +63,8 @@ module Linguist
63
63
  generated_jni_header? ||
64
64
  composer_lock? ||
65
65
  node_modules? ||
66
- vcr_cassette?
66
+ vcr_cassette? ||
67
+ generated_by_zephir?
67
68
  end
68
69
 
69
70
  # Internal: Is the blob an XCode project file?
@@ -237,6 +238,13 @@ module Linguist
237
238
  !!name.match(/composer.lock/)
238
239
  end
239
240
 
241
+ # Internal: Is the blob a generated by Zephir
242
+ #
243
+ # Returns true or false.
244
+ def generated_by_zephir?
245
+ !!name.match(/.\.zep\.(?:c|h|php)$/)
246
+ end
247
+
240
248
  # Is the blob a VCR Cassette file?
241
249
  #
242
250
  # Returns true or false
@@ -92,18 +92,14 @@ module Linguist
92
92
 
93
93
  # Public: Detects the Language of the blob.
94
94
  #
95
- # name - String filename
96
- # data - String blob data. A block also maybe passed in for lazy
97
- # loading. This behavior is deprecated and you should always
98
- # pass in a String.
99
- # mode - Optional String mode (defaults to nil)
100
- #
101
95
  # Returns Language or nil.
102
- def self.detect(name, data, mode = nil)
96
+ def self.detect(blob)
97
+ name = blob.name.to_s
98
+
103
99
  # A bit of an elegant hack. If the file is executable but extensionless,
104
100
  # append a "magic" extension so it can be classified with other
105
101
  # languages that have shebang scripts.
106
- if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05
102
+ if File.extname(name).empty? && blob.mode && (blob.mode.to_i(8) & 05) == 05
107
103
  name += ".script!"
108
104
  end
109
105
 
@@ -114,7 +110,7 @@ module Linguist
114
110
  # extension at all, in the case of extensionless scripts), we need to continue
115
111
  # our detection work
116
112
  if possible_languages.length > 1
117
- data = data.call() if data.respond_to?(:call)
113
+ data = blob.data
118
114
  possible_language_names = possible_languages.map(&:name)
119
115
 
120
116
  # Don't bother with emptiness
@@ -157,6 +157,7 @@ Assembly:
157
157
  - nasm
158
158
  extensions:
159
159
  - .asm
160
+ - .inc
160
161
 
161
162
  Augeas:
162
163
  type: programming
@@ -528,15 +529,6 @@ Dart:
528
529
  extensions:
529
530
  - .dart
530
531
 
531
- DCPU-16 ASM:
532
- type: programming
533
- lexer: dasm16
534
- extensions:
535
- - .dasm16
536
- - .dasm
537
- aliases:
538
- - dasm16
539
-
540
532
  Diff:
541
533
  extensions:
542
534
  - .diff
@@ -940,7 +932,7 @@ Hy:
940
932
 
941
933
  IDL:
942
934
  type: programming
943
- lexer: Text only
935
+ lexer: IDL
944
936
  color: "#e3592c"
945
937
  extensions:
946
938
  - .pro
@@ -959,7 +951,7 @@ Inno Setup:
959
951
 
960
952
  Idris:
961
953
  type: programming
962
- lexer: Text only
954
+ lexer: Idris
963
955
  extensions:
964
956
  - .idr
965
957
  - .lidr
@@ -998,6 +990,13 @@ Ioke:
998
990
  extensions:
999
991
  - .ik
1000
992
 
993
+ Isabelle:
994
+ type: programming
995
+ lexer: Text only
996
+ color: "#fdcd00"
997
+ extensions:
998
+ - .thy
999
+
1001
1000
  J:
1002
1001
  type: programming
1003
1002
  lexer: Text only
@@ -1087,6 +1086,8 @@ JavaScript:
1087
1086
  - .pac
1088
1087
  - .sjs
1089
1088
  - .ssjs
1089
+ - .xsjs
1090
+ - .xsjslib
1090
1091
  filenames:
1091
1092
  - Jakefile
1092
1093
  interpreters:
@@ -1285,6 +1286,8 @@ Mathematica:
1285
1286
  type: programming
1286
1287
  extensions:
1287
1288
  - .mathematica
1289
+ - .m
1290
+ - .nb
1288
1291
  lexer: Text only
1289
1292
 
1290
1293
  Matlab:
@@ -1392,6 +1395,12 @@ Nimrod:
1392
1395
  - .nim
1393
1396
  - .nimrod
1394
1397
 
1398
+ Nix:
1399
+ type: programming
1400
+ lexer: Nix
1401
+ extensions:
1402
+ - .nix
1403
+
1395
1404
  Nu:
1396
1405
  type: programming
1397
1406
  lexer: Scheme
@@ -1601,7 +1610,7 @@ Perl6:
1601
1610
  Pike:
1602
1611
  type: programming
1603
1612
  color: "#066ab2"
1604
- lexer: C
1613
+ lexer: Pike
1605
1614
  extensions:
1606
1615
  - .pike
1607
1616
  - .pmod
@@ -1698,6 +1707,7 @@ Python:
1698
1707
  - .gyp
1699
1708
  - .lmi
1700
1709
  - .pyde
1710
+ - .pyp
1701
1711
  - .pyt
1702
1712
  - .pyw
1703
1713
  - .wsgi
@@ -1723,6 +1733,12 @@ QML:
1723
1733
  extensions:
1724
1734
  - .qml
1725
1735
 
1736
+ QMake:
1737
+ lexer: Text only
1738
+ extensions:
1739
+ - .pro
1740
+ - .pri
1741
+
1726
1742
  R:
1727
1743
  type: programming
1728
1744
  color: "#198ce7"
@@ -1862,12 +1878,15 @@ Ruby:
1862
1878
  interpreters:
1863
1879
  - ruby
1864
1880
  filenames:
1881
+ - .pryrc
1865
1882
  - Appraisals
1866
1883
  - Berksfile
1867
1884
  - Buildfile
1868
1885
  - Gemfile
1869
1886
  - Gemfile.lock
1870
1887
  - Guardfile
1888
+ - Jarfile
1889
+ - Mavenfile
1871
1890
  - Podfile
1872
1891
  - Thorfile
1873
1892
  - Vagrantfile
@@ -2288,6 +2307,7 @@ XML:
2288
2307
  - .launch
2289
2308
  - .mxml
2290
2309
  - .nproj
2310
+ - .nuspec
2291
2311
  - .osm
2292
2312
  - .plist
2293
2313
  - .pluginspec
@@ -2359,6 +2379,17 @@ XSLT:
2359
2379
  - .xslt
2360
2380
  - .xsl
2361
2381
 
2382
+ Xojo:
2383
+ type: programming
2384
+ lexer: VB.net
2385
+ extensions:
2386
+ - .xojo_code
2387
+ - .xojo_menu
2388
+ - .xojo_report
2389
+ - .xojo_script
2390
+ - .xojo_toolbar
2391
+ - .xojo_window
2392
+
2362
2393
  Xtend:
2363
2394
  type: programming
2364
2395
  extensions:
@@ -0,0 +1,37 @@
1
+ require 'linguist/blob_helper'
2
+ require 'rugged'
3
+
4
+ module Linguist
5
+ class LazyBlob
6
+ include BlobHelper
7
+
8
+ MAX_SIZE = 128 * 1024
9
+
10
+ attr_reader :repository
11
+ attr_reader :oid
12
+ attr_reader :name
13
+ attr_reader :mode
14
+
15
+ def initialize(repo, oid, name, mode = nil)
16
+ @repository = repo
17
+ @oid = oid
18
+ @name = name
19
+ @mode = mode
20
+ end
21
+
22
+ def data
23
+ load_blob!
24
+ @data
25
+ end
26
+
27
+ def size
28
+ load_blob!
29
+ @size
30
+ end
31
+
32
+ protected
33
+ def load_blob!
34
+ @data, @size = Rugged::Blob.to_buffer(repository, oid, MAX_SIZE) if @data.nil?
35
+ end
36
+ end
37
+ end
@@ -1,4 +1,5 @@
1
- require 'linguist/file_blob'
1
+ require 'linguist/lazy_blob'
2
+ require 'rugged'
2
3
 
3
4
  module Linguist
4
5
  # A Repository is an abstraction of a Grit::Repo or a basic file
@@ -7,100 +8,143 @@ module Linguist
7
8
  # Its primary purpose is for gathering language statistics across
8
9
  # the entire project.
9
10
  class Repository
10
- # Public: Initialize a new Repository from a File directory
11
+ attr_reader :repository
12
+
13
+ # Public: Create a new Repository based on the stats of
14
+ # an existing one
15
+ def self.incremental(repo, commit_oid, old_commit_oid, old_stats)
16
+ repo = self.new(repo, commit_oid)
17
+ repo.load_existing_stats(old_commit_oid, old_stats)
18
+ repo
19
+ end
20
+
21
+ # Public: Initialize a new Repository to be analyzed for language
22
+ # data
11
23
  #
12
- # base_path - A path String
24
+ # repo - a Rugged::Repository object
25
+ # commit_oid - the sha1 of the commit that will be analyzed;
26
+ # this is usually the master branch
13
27
  #
14
28
  # Returns a Repository
15
- def self.from_directory(base_path)
16
- new Dir["#{base_path}/**/*"].
17
- select { |f| File.file?(f) }.
18
- map { |path| FileBlob.new(path, base_path) }
29
+ def initialize(repo, commit_oid)
30
+ @repository = repo
31
+ @commit_oid = commit_oid
32
+
33
+ raise TypeError, 'commit_oid must be a commit SHA1' unless commit_oid.is_a?(String)
19
34
  end
20
35
 
21
- # Public: Initialize a new Repository
36
+ # Public: Load the results of a previous analysis on this repository
37
+ # to speed up the new scan.
22
38
  #
23
- # enum - Enumerator that responds to `each` and
24
- # yields Blob objects
39
+ # The new analysis will be performed incrementally as to only take
40
+ # into account the file changes since the last time the repository
41
+ # was scanned
25
42
  #
26
- # Returns a Repository
27
- def initialize(enum)
28
- @enum = enum
29
- @computed_stats = false
30
- @language = @size = nil
31
- @sizes = Hash.new { 0 }
32
- @file_breakdown = Hash.new { |h,k| h[k] = Array.new }
43
+ # old_commit_oid - the sha1 of the commit that was previously analyzed
44
+ # old_stats - the result of the previous analysis, obtained by calling
45
+ # Repository#cache on the old repository
46
+ #
47
+ # Returns nothing
48
+ def load_existing_stats(old_commit_oid, old_stats)
49
+ @old_commit_oid = old_commit_oid
50
+ @old_stats = old_stats
51
+ nil
33
52
  end
34
53
 
35
54
  # Public: Returns a breakdown of language stats.
36
55
  #
37
56
  # Examples
38
57
  #
39
- # # => { Language['Ruby'] => 46319,
40
- # Language['JavaScript'] => 258 }
58
+ # # => { 'Ruby' => 46319,
59
+ # 'JavaScript' => 258 }
41
60
  #
42
- # Returns a Hash of Language keys and Integer size values.
61
+ # Returns a Hash of language names and Integer size values.
43
62
  def languages
44
- compute_stats
45
- @sizes
63
+ @sizes ||= begin
64
+ sizes = Hash.new { 0 }
65
+ cache.each do |_, (language, size)|
66
+ sizes[language] += size
67
+ end
68
+ sizes
69
+ end
46
70
  end
47
71
 
48
72
  # Public: Get primary Language of repository.
49
73
  #
50
- # Returns a Language
74
+ # Returns a language name
51
75
  def language
52
- compute_stats
53
- @language
76
+ @language ||= begin
77
+ primary = languages.max_by { |(_, size)| size }
78
+ primary && primary[0]
79
+ end
54
80
  end
55
81
 
56
82
  # Public: Get the total size of the repository.
57
83
  #
58
84
  # Returns a byte size Integer
59
85
  def size
60
- compute_stats
61
- @size
86
+ @size ||= languages.inject(0) { |s,(_,v)| s + v }
62
87
  end
63
88
 
64
89
  # Public: Return the language breakdown of this repository by file
90
+ #
91
+ # Returns a map of language names => [filenames...]
65
92
  def breakdown_by_file
66
- compute_stats
67
- @file_breakdown
93
+ @file_breakdown ||= begin
94
+ breakdown = Hash.new { |h,k| h[k] = Array.new }
95
+ cache.each do |filename, (language, _)|
96
+ breakdown[language] << filename
97
+ end
98
+ breakdown
99
+ end
68
100
  end
69
101
 
70
- # Internal: Compute language breakdown for each blob in the Repository.
102
+ # Public: Return the cached results of the analysis
71
103
  #
72
- # Returns nothing
73
- def compute_stats
74
- return if @computed_stats
104
+ # This is a per-file breakdown that can be passed to other instances
105
+ # of Linguist::Repository to perform incremental scans
106
+ #
107
+ # Returns a map of filename => [language, size]
108
+ def cache
109
+ @cache ||= begin
110
+ if @old_commit_oid == @commit_oid
111
+ @old_stats
112
+ else
113
+ compute_stats(@old_commit_oid, @commit_oid, @old_stats)
114
+ end
115
+ end
116
+ end
75
117
 
76
- @enum.each do |blob|
77
- # Skip files that are likely binary
78
- next if blob.likely_binary?
118
+ protected
119
+ def compute_stats(old_commit_oid, commit_oid, cache = nil)
120
+ file_map = cache ? cache.dup : {}
121
+ old_tree = old_commit_oid && Rugged::Commit.lookup(repository, old_commit_oid).tree
122
+ new_tree = Rugged::Commit.lookup(repository, commit_oid).tree
79
123
 
80
- # Skip vendored or generated blobs
81
- next if blob.vendored? || blob.generated? || blob.language.nil?
124
+ diff = Rugged::Tree.diff(repository, old_tree, new_tree)
82
125
 
83
- # Only include programming languages and acceptable markup languages
84
- if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
126
+ diff.each_delta do |delta|
127
+ old = delta.old_file[:path]
128
+ new = delta.new_file[:path]
85
129
 
86
- # Build up the per-file breakdown stats
87
- @file_breakdown[blob.language.group.name] << blob.name
130
+ file_map.delete(old)
131
+ next if delta.binary
88
132
 
89
- @sizes[blob.language.group] += blob.size
90
- end
91
- end
133
+ if [:added, :modified].include? delta.status
134
+ mode = delta.new_file[:mode].to_s(8)
135
+ blob = Linguist::LazyBlob.new(repository, delta.new_file[:oid], new, mode)
92
136
 
93
- # Compute total size
94
- @size = @sizes.inject(0) { |s,(_,v)| s + v }
137
+ # Skip vendored or generated blobs
138
+ next if blob.vendored? || blob.generated? || blob.language.nil?
95
139
 
96
- # Get primary language
97
- if primary = @sizes.max_by { |(_, size)| size }
98
- @language = primary[0]
140
+ # Only include programming languages and acceptable markup languages
141
+ if blob.language.type == :programming || Language.detectable_markup.include?(blob.language.name)
142
+ file_map[new] = [blob.language.group.name, blob.size]
143
+ end
144
+ end
99
145
  end
100
146
 
101
- @computed_stats = true
102
-
103
- nil
147
+ file_map
104
148
  end
105
149
  end
106
150
  end