github-linguist 2.10.0 → 2.10.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d32dfca063a58b4fdda048d20fbf9ee735a87a47
4
- data.tar.gz: 8603ce6993bb645e7d847f991142436d999c5e96
3
+ metadata.gz: b0ecbabc138b17a77febdabc0c64f378f67bd13c
4
+ data.tar.gz: ae91bedb24a96d1dfcaf4680dcc5530f475deee0
5
5
  SHA512:
6
- metadata.gz: db134aedc386ccc175b578474a84c75391814b235e01da962b6d2483453cfc80dec48670c5a23c65930fb723c92a0ca48df320c24eeead87fedbe5c6a21a448f
7
- data.tar.gz: 89195a956fe06ab069fff7c4bf61c2324cb397964d818fcd7926eb8548b56e58ad73bf566b9cedd88275d10def55e3909e715bd686754b45b83e0aaacbb717b3
6
+ metadata.gz: ea8ea0349099feaee685a38a6800bef2f4d695b9c422394f5d34c3432da19621912a2f296bd48cc43d989758ff8d45613a4185e86195907b6cf133fd24fa74c3
7
+ data.tar.gz: cb0dd718502e691e8e09baf8753b17e6dc042f408b5d297f139167c62ae8ff5c9348c9f2498435b7a233ecd7e792ef346920f68f4e11d4b8c2c7738b1637637f
@@ -190,9 +190,9 @@ module Linguist
190
190
  # Public: Is the blob safe to colorize?
191
191
  #
192
192
  # We use Pygments for syntax highlighting blobs. Pygments
193
- # can be too slow for very large blobs or for certain
193
+ # can be too slow for very large blobs or for certain
194
194
  # corner-case blobs.
195
- #
195
+ #
196
196
  # Return true or false
197
197
  def safe_to_colorize?
198
198
  !large? && text? && !high_ratio_of_long_lines?
@@ -78,18 +78,13 @@ module Linguist
78
78
  def classify(tokens, languages)
79
79
  return [] if tokens.nil?
80
80
  tokens = Tokenizer.tokenize(tokens) if tokens.is_a?(String)
81
-
82
81
  scores = {}
83
- if verbosity >= 2
84
- dump_all_tokens(tokens, languages)
85
- end
82
+
83
+ debug_dump_all_tokens(tokens, languages) if verbosity >= 2
84
+
86
85
  languages.each do |language|
87
- scores[language] = tokens_probability(tokens, language) +
88
- language_probability(language)
89
- if verbosity >= 1
90
- printf "%10s = %10.3f + %7.3f = %10.3f\n",
91
- language, tokens_probability(tokens, language), language_probability(language), scores[language]
92
- end
86
+ debug_dump_probabilities(tokens, language) if verbosity >= 1
87
+ scores[language] = tokens_probability(tokens, language) + language_probability(language)
93
88
  end
94
89
 
95
90
  scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [score[0], score[1]] }
@@ -135,6 +130,11 @@ module Linguist
135
130
  @verbosity ||= (ENV['LINGUIST_DEBUG'] || 0).to_i
136
131
  end
137
132
 
133
+ def debug_dump_probabilities
134
+ printf("%10s = %10.3f + %7.3f = %10.3f\n",
135
+ language, tokens_probability(tokens, language), language_probability(language), scores[language])
136
+ end
137
+
138
138
  # Internal: show a table of probabilities for each <token,language> pair.
139
139
  #
140
140
  # The number in each table entry is the number of "points" that each
@@ -145,22 +145,22 @@ module Linguist
145
145
  # how much more likely (log of probability ratio) that token is to
146
146
  # appear in one language vs. the least-likely language. Dashes
147
147
  # indicate the least-likely language (and zero points) for each token.
148
- def dump_all_tokens(tokens, languages)
148
+ def debug_dump_all_tokens(tokens, languages)
149
149
  maxlen = tokens.map { |tok| tok.size }.max
150
-
150
+
151
151
  printf "%#{maxlen}s", ""
152
152
  puts " #" + languages.map { |lang| sprintf("%10s", lang) }.join
153
-
153
+
154
154
  token_map = Hash.new(0)
155
155
  tokens.each { |tok| token_map[tok] += 1 }
156
-
156
+
157
157
  token_map.sort.each { |tok, count|
158
158
  arr = languages.map { |lang| [lang, token_probability(tok, lang)] }
159
159
  min = arr.map { |a,b| b }.min
160
160
  minlog = Math.log(min)
161
161
  if !arr.inject(true) { |result, n| result && n[1] == arr[0][1] }
162
162
  printf "%#{maxlen}s%5d", tok, count
163
-
163
+
164
164
  puts arr.map { |ent|
165
165
  ent[1] == min ? " -" : sprintf("%10.3f", count * (Math.log(ent[1]) - minlog))
166
166
  }.join
@@ -60,6 +60,7 @@ module Linguist
60
60
  generated_net_designer_file? ||
61
61
  generated_protocol_buffer? ||
62
62
  generated_jni_header? ||
63
+ composer_lock? ||
63
64
  node_modules?
64
65
  end
65
66
 
@@ -204,5 +205,13 @@ module Linguist
204
205
  def node_modules?
205
206
  !!name.match(/node_modules\//)
206
207
  end
208
+
209
+ # the php composer tool generates a lock file to represent a specific dependency state.
210
+ # In general not meant for humans in pull requests.
211
+ #
212
+ # Returns true or false.
213
+ def composer_lock?
214
+ !!name.match(/composer.lock/)
215
+ end
207
216
  end
208
217
  end
@@ -21,17 +21,27 @@ module Linguist
21
21
  @alias_index = {}
22
22
 
23
23
  @extension_index = Hash.new { |h,k| h[k] = [] }
24
+ @interpreter_index = Hash.new { |h,k| h[k] = [] }
24
25
  @filename_index = Hash.new { |h,k| h[k] = [] }
25
26
  @primary_extension_index = {}
26
27
 
27
28
  # Valid Languages types
28
- TYPES = [:data, :markup, :programming]
29
+ TYPES = [:data, :markup, :programming, :prose]
29
30
 
30
31
  # Names of non-programming languages that we will still detect
31
32
  #
32
33
  # Returns an array
33
34
  def self.detectable_markup
34
- ["CSS", "Less", "Sass", "TeX"]
35
+ ["CSS", "Less", "Sass", "Stylus", "TeX"]
36
+ end
37
+
38
+ # Detect languages by a specific type
39
+ #
40
+ # type - A symbol that exists within TYPES
41
+ #
42
+ # Returns an array
43
+ def self.by_type(type)
44
+ all.select { |h| h.type == type }
35
45
  end
36
46
 
37
47
  # Internal: Create a new Language object
@@ -75,6 +85,10 @@ module Linguist
75
85
 
76
86
  @primary_extension_index[language.primary_extension] = language
77
87
 
88
+ language.interpreters.each do |interpreter|
89
+ @interpreter_index[interpreter] << language
90
+ end
91
+
78
92
  language.filenames.each do |filename|
79
93
  @filename_index[filename] << language
80
94
  end
@@ -103,10 +117,13 @@ module Linguist
103
117
 
104
118
  if possible_languages.length > 1
105
119
  data = data.call() if data.respond_to?(:call)
120
+
106
121
  if data.nil? || data == ""
107
122
  nil
108
- elsif result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
109
- Language[result[0]]
123
+ elsif (result = find_by_shebang(data)) && !result.empty?
124
+ result.first
125
+ elsif classified = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
126
+ Language[classified[0]]
110
127
  end
111
128
  else
112
129
  possible_languages.first
@@ -166,6 +183,20 @@ module Linguist
166
183
  langs.compact.uniq
167
184
  end
168
185
 
186
+ # Public: Look up Languages by shebang line.
187
+ #
188
+ # data - Array of tokens or String data to analyze.
189
+ #
190
+ # Examples
191
+ #
192
+ # Language.find_by_shebang("#!/bin/bash\ndate;")
193
+ # # => [#<Language name="Bash">]
194
+ #
195
+ # Returns the matching Language
196
+ def self.find_by_shebang(data)
197
+ @interpreter_index[Linguist.interpreter_from_shebang(data)]
198
+ end
199
+
169
200
  # Public: Look up Language by its name or lexer.
170
201
  #
171
202
  # name - The String name of the Language
@@ -251,6 +282,7 @@ module Linguist
251
282
 
252
283
  # Set extensions or default to [].
253
284
  @extensions = attributes[:extensions] || []
285
+ @interpreters = attributes[:interpreters] || []
254
286
  @filenames = attributes[:filenames] || []
255
287
 
256
288
  unless @primary_extension = attributes[:primary_extension]
@@ -363,6 +395,15 @@ module Linguist
363
395
  # Returns the extension String.
364
396
  attr_reader :primary_extension
365
397
 
398
+ # Public: Get interpreters
399
+ #
400
+ # Examples
401
+ #
402
+ # # => ['awk', 'gawk', 'mawk' ...]
403
+ #
404
+ # Returns the interpreters Array
405
+ attr_reader :interpreters
406
+
366
407
  # Public: Get filenames
367
408
  #
368
409
  # Examples
@@ -456,6 +497,7 @@ module Linguist
456
497
  end
457
498
 
458
499
  extensions = Samples::DATA['extnames']
500
+ interpreters = Samples::DATA['interpreters']
459
501
  filenames = Samples::DATA['filenames']
460
502
  popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
461
503
 
@@ -470,6 +512,7 @@ module Linguist
470
512
 
471
513
  languages.each do |name, options|
472
514
  options['extensions'] ||= []
515
+ options['interpreters'] ||= []
473
516
  options['filenames'] ||= []
474
517
 
475
518
  if extnames = extensions[name]
@@ -480,6 +523,18 @@ module Linguist
480
523
  end
481
524
  end
482
525
 
526
+ if interpreters == nil
527
+ interpreters = {}
528
+ end
529
+
530
+ if interpreter_names = interpreters[name]
531
+ interpreter_names.each do |interpreter|
532
+ if !options['interpreters'].include?(interpreter)
533
+ options['interpreters'] << interpreter
534
+ end
535
+ end
536
+ end
537
+
483
538
  if fns = filenames[name]
484
539
  fns.each do |filename|
485
540
  if !options['filenames'].include?(filename)
@@ -500,6 +555,7 @@ module Linguist
500
555
  :searchable => options.key?('searchable') ? options['searchable'] : true,
501
556
  :search_term => options['search_term'],
502
557
  :extensions => options['extensions'].sort,
558
+ :interpreters => options['interpreters'].sort,
503
559
  :primary_extension => options['primary_extension'],
504
560
  :filenames => options['filenames'],
505
561
  :popular => popular.include?(name)
@@ -10,6 +10,7 @@
10
10
  # ace_mode - A String name of Ace Mode (if available)
11
11
  # wrap - Boolean wrap to enable line wrapping (default: false)
12
12
  # extension - An Array of associated extensions
13
+ # interpreter - An Array of associated interpreters
13
14
  # primary_extension - A String for the main extension associated with
14
15
  # the language. Must be unique. Used when a Language is picked
15
16
  # from a dropdown and we need to automatically choose an
@@ -22,7 +23,7 @@
22
23
  # Any additions or modifications (even trivial) should have corresponding
23
24
  # test change in `test/test_blob.rb`.
24
25
  #
25
- # Please keep this list alphabetized.
26
+ # Please keep this list alphabetized. Capitalization comes before lower case.
26
27
 
27
28
  ABAP:
28
29
  type: programming
@@ -70,6 +71,7 @@ Ada:
70
71
 
71
72
  Agda:
72
73
  type: programming
74
+ color: "#467C91"
73
75
  primary_extension: .agda
74
76
 
75
77
  ApacheConf:
@@ -101,6 +103,16 @@ Arduino:
101
103
  lexer: C++
102
104
  primary_extension: .ino
103
105
 
106
+ AsciiDoc:
107
+ type: prose
108
+ lexer: Text only
109
+ ace_mode: asciidoc
110
+ wrap: true
111
+ primary_extension: .asciidoc
112
+ extensions:
113
+ - .adoc
114
+ - .asc
115
+
104
116
  Assembly:
105
117
  type: programming
106
118
  lexer: NASM
@@ -140,6 +152,11 @@ Awk:
140
152
  - .gawk
141
153
  - .mawk
142
154
  - .nawk
155
+ interpreters:
156
+ - awk
157
+ - gawk
158
+ - mawk
159
+ - nawk
143
160
 
144
161
  Batchfile:
145
162
  type: programming
@@ -181,6 +198,11 @@ Brainfuck:
181
198
  extensions:
182
199
  - .bf
183
200
 
201
+ Brightscript:
202
+ type: programming
203
+ lexer: Text only
204
+ primary_extension: .brs
205
+
184
206
  Bro:
185
207
  type: programming
186
208
  primary_extension: .bro
@@ -333,6 +355,12 @@ Common Lisp:
333
355
  - .lsp
334
356
  - .ny
335
357
  - .podsl
358
+ interpreters:
359
+ - lisp
360
+ - sbcl
361
+ - ccl
362
+ - clisp
363
+ - ecl
336
364
 
337
365
  Coq:
338
366
  type: programming
@@ -346,6 +374,12 @@ Cpp-ObjDump:
346
374
  - .c++objdump
347
375
  - .cxx-objdump
348
376
 
377
+ Creole:
378
+ type: prose
379
+ lexer: Text only
380
+ wrap: true
381
+ primary_extension: .creole
382
+
349
383
  Cucumber:
350
384
  lexer: Gherkin
351
385
  primary_extension: .feature
@@ -379,7 +413,7 @@ D-ObjDump:
379
413
  DM:
380
414
  type: programming
381
415
  color: "#075ff1"
382
- lexer: Text only
416
+ lexer: C++
383
417
  primary_extension: .dm
384
418
  aliases:
385
419
  - byond
@@ -748,6 +782,7 @@ JSON:
748
782
  - .sublime-settings
749
783
  - .sublime-workspace
750
784
  filenames:
785
+ - .jshintrc
751
786
  - composer.lock
752
787
 
753
788
  Jade:
@@ -918,6 +953,8 @@ Makefile:
918
953
  - makefile
919
954
  - Makefile
920
955
  - GNUmakefile
956
+ interpreters:
957
+ - make
921
958
 
922
959
  Mako:
923
960
  primary_extension: .mako
@@ -925,7 +962,7 @@ Mako:
925
962
  - .mao
926
963
 
927
964
  Markdown:
928
- type: markup
965
+ type: prose
929
966
  lexer: Text only
930
967
  ace_mode: markdown
931
968
  wrap: true
@@ -956,6 +993,12 @@ Max:
956
993
  - .mxt
957
994
  - .pat
958
995
 
996
+ MediaWiki:
997
+ type: prose
998
+ lexer: Text only
999
+ wrap: true
1000
+ primary_extension: .mediawiki
1001
+
959
1002
  MiniD: # Legacy
960
1003
  searchable: false
961
1004
  primary_extension: .minid # Dummy extension
@@ -1091,6 +1134,12 @@ OpenEdge ABL:
1091
1134
  - abl
1092
1135
  primary_extension: .p
1093
1136
 
1137
+ Org:
1138
+ type: prose
1139
+ lexer: Text only
1140
+ wrap: true
1141
+ primary_extension: .org
1142
+
1094
1143
  Oxygene:
1095
1144
  type: programming
1096
1145
  lexer: Text only
@@ -1157,6 +1206,8 @@ Perl:
1157
1206
  - .pm6
1158
1207
  - .pod
1159
1208
  - .psgi
1209
+ interpreters:
1210
+ - perl
1160
1211
 
1161
1212
  Pike:
1162
1213
  type: programming
@@ -1166,6 +1217,13 @@ Pike:
1166
1217
  extensions:
1167
1218
  - .pmod
1168
1219
 
1220
+ Pod:
1221
+ type: prose
1222
+ lexer: Text only
1223
+ ace_mode: perl
1224
+ wrap: true
1225
+ primary_extension: .pod
1226
+
1169
1227
  PogoScript:
1170
1228
  type: programming
1171
1229
  color: "#d80074"
@@ -1224,12 +1282,15 @@ Python:
1224
1282
  primary_extension: .py
1225
1283
  extensions:
1226
1284
  - .gyp
1285
+ - .lmi
1227
1286
  - .pyt
1228
1287
  - .pyw
1229
1288
  - .wsgi
1230
1289
  - .xpy
1231
1290
  filenames:
1232
1291
  - wscript
1292
+ interpreters:
1293
+ - python
1233
1294
 
1234
1295
  Python traceback:
1235
1296
  type: data
@@ -1252,6 +1313,15 @@ R:
1252
1313
  - .R
1253
1314
  filenames:
1254
1315
  - .Rprofile
1316
+ interpreters:
1317
+ - Rscript
1318
+
1319
+ RDoc:
1320
+ type: prose
1321
+ lexer: Text only
1322
+ ace_mode: rdoc
1323
+ wrap: true
1324
+ primary_extension: .rdoc
1255
1325
 
1256
1326
  REALbasic:
1257
1327
  type: programming
@@ -1269,6 +1339,15 @@ RHTML:
1269
1339
  group: HTML
1270
1340
  primary_extension: .rhtml
1271
1341
 
1342
+ RMarkdown:
1343
+ type: markup
1344
+ lexer: Text only
1345
+ wrap: true
1346
+ ace_mode: markdown
1347
+ primary_extension: .rmd
1348
+ extensions:
1349
+ - .Rmd
1350
+
1272
1351
  Racket:
1273
1352
  type: programming
1274
1353
  lexer: Racket
@@ -1339,6 +1418,8 @@ Ruby:
1339
1418
  - .ru
1340
1419
  - .thor
1341
1420
  - .watchr
1421
+ interpreters:
1422
+ - ruby
1342
1423
  filenames:
1343
1424
  - Appraisals
1344
1425
  - Berksfile
@@ -1381,6 +1462,8 @@ Scala:
1381
1462
  ace_mode: scala
1382
1463
  color: "#7dd3b0"
1383
1464
  primary_extension: .scala
1465
+ extensions:
1466
+ - .sc
1384
1467
 
1385
1468
  Scaml:
1386
1469
  group: HTML
@@ -1394,6 +1477,11 @@ Scheme:
1394
1477
  extensions:
1395
1478
  - .sls
1396
1479
  - .ss
1480
+ interpreters:
1481
+ - guile
1482
+ - racket
1483
+ - bigloo
1484
+ - chicken
1397
1485
 
1398
1486
  Scilab:
1399
1487
  type: programming
@@ -1418,6 +1506,10 @@ Shell:
1418
1506
  extensions:
1419
1507
  - .bats
1420
1508
  - .tmux
1509
+ interpreters:
1510
+ - bash
1511
+ - sh
1512
+ - zsh
1421
1513
  filenames:
1422
1514
  - Dockerfile
1423
1515
 
@@ -1446,11 +1538,17 @@ Standard ML:
1446
1538
  - sml
1447
1539
  primary_extension: .sml
1448
1540
 
1541
+ Stylus:
1542
+ type: markup
1543
+ group: CSS
1544
+ lexer: Text only
1545
+ primary_extension: .styl
1546
+
1449
1547
  SuperCollider:
1450
1548
  type: programming
1451
1549
  color: "#46390b"
1452
1550
  lexer: Text only
1453
- primary_extension: .sc
1551
+ primary_extension: .scd
1454
1552
 
1455
1553
  TOML:
1456
1554
  type: data
@@ -1477,7 +1575,9 @@ Tcsh:
1477
1575
 
1478
1576
  TeX:
1479
1577
  type: markup
1578
+ color: "#3D6117"
1480
1579
  ace_mode: latex
1580
+ wrap: true
1481
1581
  aliases:
1482
1582
  - latex
1483
1583
  primary_extension: .tex
@@ -1498,7 +1598,7 @@ Tea:
1498
1598
  primary_extension: .tea
1499
1599
 
1500
1600
  Textile:
1501
- type: markup
1601
+ type: prose
1502
1602
  lexer: Text only
1503
1603
  ace_mode: textile
1504
1604
  wrap: true
@@ -1544,6 +1644,14 @@ VHDL:
1544
1644
  lexer: vhdl
1545
1645
  color: "#543978"
1546
1646
  primary_extension: .vhdl
1647
+ extensions:
1648
+ - .vhd
1649
+ - .vhf
1650
+ - .vhi
1651
+ - .vho
1652
+ - .vhs
1653
+ - .vht
1654
+ - .vhw
1547
1655
 
1548
1656
  Vala:
1549
1657
  type: programming
@@ -1733,7 +1841,7 @@ ooc:
1733
1841
  primary_extension: .ooc
1734
1842
 
1735
1843
  reStructuredText:
1736
- type: markup
1844
+ type: prose
1737
1845
  wrap: true
1738
1846
  search_term: rst
1739
1847
  aliases: