github-linguist 2.10.0 → 2.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d32dfca063a58b4fdda048d20fbf9ee735a87a47
4
- data.tar.gz: 8603ce6993bb645e7d847f991142436d999c5e96
3
+ metadata.gz: b0ecbabc138b17a77febdabc0c64f378f67bd13c
4
+ data.tar.gz: ae91bedb24a96d1dfcaf4680dcc5530f475deee0
5
5
  SHA512:
6
- metadata.gz: db134aedc386ccc175b578474a84c75391814b235e01da962b6d2483453cfc80dec48670c5a23c65930fb723c92a0ca48df320c24eeead87fedbe5c6a21a448f
7
- data.tar.gz: 89195a956fe06ab069fff7c4bf61c2324cb397964d818fcd7926eb8548b56e58ad73bf566b9cedd88275d10def55e3909e715bd686754b45b83e0aaacbb717b3
6
+ metadata.gz: ea8ea0349099feaee685a38a6800bef2f4d695b9c422394f5d34c3432da19621912a2f296bd48cc43d989758ff8d45613a4185e86195907b6cf133fd24fa74c3
7
+ data.tar.gz: cb0dd718502e691e8e09baf8753b17e6dc042f408b5d297f139167c62ae8ff5c9348c9f2498435b7a233ecd7e792ef346920f68f4e11d4b8c2c7738b1637637f
@@ -190,9 +190,9 @@ module Linguist
190
190
  # Public: Is the blob safe to colorize?
191
191
  #
192
192
  # We use Pygments for syntax highlighting blobs. Pygments
193
- # can be too slow for very large blobs or for certain
193
+ # can be too slow for very large blobs or for certain
194
194
  # corner-case blobs.
195
- #
195
+ #
196
196
  # Return true or false
197
197
  def safe_to_colorize?
198
198
  !large? && text? && !high_ratio_of_long_lines?
@@ -78,18 +78,13 @@ module Linguist
78
78
  def classify(tokens, languages)
79
79
  return [] if tokens.nil?
80
80
  tokens = Tokenizer.tokenize(tokens) if tokens.is_a?(String)
81
-
82
81
  scores = {}
83
- if verbosity >= 2
84
- dump_all_tokens(tokens, languages)
85
- end
82
+
83
+ debug_dump_all_tokens(tokens, languages) if verbosity >= 2
84
+
86
85
  languages.each do |language|
87
- scores[language] = tokens_probability(tokens, language) +
88
- language_probability(language)
89
- if verbosity >= 1
90
- printf "%10s = %10.3f + %7.3f = %10.3f\n",
91
- language, tokens_probability(tokens, language), language_probability(language), scores[language]
92
- end
86
+ debug_dump_probabilities(tokens, language) if verbosity >= 1
87
+ scores[language] = tokens_probability(tokens, language) + language_probability(language)
93
88
  end
94
89
 
95
90
  scores.sort { |a, b| b[1] <=> a[1] }.map { |score| [score[0], score[1]] }
@@ -135,6 +130,11 @@ module Linguist
135
130
  @verbosity ||= (ENV['LINGUIST_DEBUG'] || 0).to_i
136
131
  end
137
132
 
133
+ def debug_dump_probabilities
134
+ printf("%10s = %10.3f + %7.3f = %10.3f\n",
135
+ language, tokens_probability(tokens, language), language_probability(language), scores[language])
136
+ end
137
+
138
138
  # Internal: show a table of probabilities for each <token,language> pair.
139
139
  #
140
140
  # The number in each table entry is the number of "points" that each
@@ -145,22 +145,22 @@ module Linguist
145
145
  # how much more likely (log of probability ratio) that token is to
146
146
  # appear in one language vs. the least-likely language. Dashes
147
147
  # indicate the least-likely language (and zero points) for each token.
148
- def dump_all_tokens(tokens, languages)
148
+ def debug_dump_all_tokens(tokens, languages)
149
149
  maxlen = tokens.map { |tok| tok.size }.max
150
-
150
+
151
151
  printf "%#{maxlen}s", ""
152
152
  puts " #" + languages.map { |lang| sprintf("%10s", lang) }.join
153
-
153
+
154
154
  token_map = Hash.new(0)
155
155
  tokens.each { |tok| token_map[tok] += 1 }
156
-
156
+
157
157
  token_map.sort.each { |tok, count|
158
158
  arr = languages.map { |lang| [lang, token_probability(tok, lang)] }
159
159
  min = arr.map { |a,b| b }.min
160
160
  minlog = Math.log(min)
161
161
  if !arr.inject(true) { |result, n| result && n[1] == arr[0][1] }
162
162
  printf "%#{maxlen}s%5d", tok, count
163
-
163
+
164
164
  puts arr.map { |ent|
165
165
  ent[1] == min ? " -" : sprintf("%10.3f", count * (Math.log(ent[1]) - minlog))
166
166
  }.join
@@ -60,6 +60,7 @@ module Linguist
60
60
  generated_net_designer_file? ||
61
61
  generated_protocol_buffer? ||
62
62
  generated_jni_header? ||
63
+ composer_lock? ||
63
64
  node_modules?
64
65
  end
65
66
 
@@ -204,5 +205,13 @@ module Linguist
204
205
  def node_modules?
205
206
  !!name.match(/node_modules\//)
206
207
  end
208
+
209
+ # the php composer tool generates a lock file to represent a specific dependency state.
210
+ # In general not meant for humans in pull requests.
211
+ #
212
+ # Returns true or false.
213
+ def composer_lock?
214
+ !!name.match(/composer.lock/)
215
+ end
207
216
  end
208
217
  end
@@ -21,17 +21,27 @@ module Linguist
21
21
  @alias_index = {}
22
22
 
23
23
  @extension_index = Hash.new { |h,k| h[k] = [] }
24
+ @interpreter_index = Hash.new { |h,k| h[k] = [] }
24
25
  @filename_index = Hash.new { |h,k| h[k] = [] }
25
26
  @primary_extension_index = {}
26
27
 
27
28
  # Valid Languages types
28
- TYPES = [:data, :markup, :programming]
29
+ TYPES = [:data, :markup, :programming, :prose]
29
30
 
30
31
  # Names of non-programming languages that we will still detect
31
32
  #
32
33
  # Returns an array
33
34
  def self.detectable_markup
34
- ["CSS", "Less", "Sass", "TeX"]
35
+ ["CSS", "Less", "Sass", "Stylus", "TeX"]
36
+ end
37
+
38
+ # Detect languages by a specific type
39
+ #
40
+ # type - A symbol that exists within TYPES
41
+ #
42
+ # Returns an array
43
+ def self.by_type(type)
44
+ all.select { |h| h.type == type }
35
45
  end
36
46
 
37
47
  # Internal: Create a new Language object
@@ -75,6 +85,10 @@ module Linguist
75
85
 
76
86
  @primary_extension_index[language.primary_extension] = language
77
87
 
88
+ language.interpreters.each do |interpreter|
89
+ @interpreter_index[interpreter] << language
90
+ end
91
+
78
92
  language.filenames.each do |filename|
79
93
  @filename_index[filename] << language
80
94
  end
@@ -103,10 +117,13 @@ module Linguist
103
117
 
104
118
  if possible_languages.length > 1
105
119
  data = data.call() if data.respond_to?(:call)
120
+
106
121
  if data.nil? || data == ""
107
122
  nil
108
- elsif result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
109
- Language[result[0]]
123
+ elsif (result = find_by_shebang(data)) && !result.empty?
124
+ result.first
125
+ elsif classified = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
126
+ Language[classified[0]]
110
127
  end
111
128
  else
112
129
  possible_languages.first
@@ -166,6 +183,20 @@ module Linguist
166
183
  langs.compact.uniq
167
184
  end
168
185
 
186
+ # Public: Look up Languages by shebang line.
187
+ #
188
+ # data - Array of tokens or String data to analyze.
189
+ #
190
+ # Examples
191
+ #
192
+ # Language.find_by_shebang("#!/bin/bash\ndate;")
193
+ # # => [#<Language name="Bash">]
194
+ #
195
+ # Returns the matching Language
196
+ def self.find_by_shebang(data)
197
+ @interpreter_index[Linguist.interpreter_from_shebang(data)]
198
+ end
199
+
169
200
  # Public: Look up Language by its name or lexer.
170
201
  #
171
202
  # name - The String name of the Language
@@ -251,6 +282,7 @@ module Linguist
251
282
 
252
283
  # Set extensions or default to [].
253
284
  @extensions = attributes[:extensions] || []
285
+ @interpreters = attributes[:interpreters] || []
254
286
  @filenames = attributes[:filenames] || []
255
287
 
256
288
  unless @primary_extension = attributes[:primary_extension]
@@ -363,6 +395,15 @@ module Linguist
363
395
  # Returns the extension String.
364
396
  attr_reader :primary_extension
365
397
 
398
+ # Public: Get interpreters
399
+ #
400
+ # Examples
401
+ #
402
+ # # => ['awk', 'gawk', 'mawk' ...]
403
+ #
404
+ # Returns the interpreters Array
405
+ attr_reader :interpreters
406
+
366
407
  # Public: Get filenames
367
408
  #
368
409
  # Examples
@@ -456,6 +497,7 @@ module Linguist
456
497
  end
457
498
 
458
499
  extensions = Samples::DATA['extnames']
500
+ interpreters = Samples::DATA['interpreters']
459
501
  filenames = Samples::DATA['filenames']
460
502
  popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
461
503
 
@@ -470,6 +512,7 @@ module Linguist
470
512
 
471
513
  languages.each do |name, options|
472
514
  options['extensions'] ||= []
515
+ options['interpreters'] ||= []
473
516
  options['filenames'] ||= []
474
517
 
475
518
  if extnames = extensions[name]
@@ -480,6 +523,18 @@ module Linguist
480
523
  end
481
524
  end
482
525
 
526
+ if interpreters == nil
527
+ interpreters = {}
528
+ end
529
+
530
+ if interpreter_names = interpreters[name]
531
+ interpreter_names.each do |interpreter|
532
+ if !options['interpreters'].include?(interpreter)
533
+ options['interpreters'] << interpreter
534
+ end
535
+ end
536
+ end
537
+
483
538
  if fns = filenames[name]
484
539
  fns.each do |filename|
485
540
  if !options['filenames'].include?(filename)
@@ -500,6 +555,7 @@ module Linguist
500
555
  :searchable => options.key?('searchable') ? options['searchable'] : true,
501
556
  :search_term => options['search_term'],
502
557
  :extensions => options['extensions'].sort,
558
+ :interpreters => options['interpreters'].sort,
503
559
  :primary_extension => options['primary_extension'],
504
560
  :filenames => options['filenames'],
505
561
  :popular => popular.include?(name)
@@ -10,6 +10,7 @@
10
10
  # ace_mode - A String name of Ace Mode (if available)
11
11
  # wrap - Boolean wrap to enable line wrapping (default: false)
12
12
  # extension - An Array of associated extensions
13
+ # interpreter - An Array of associated interpreters
13
14
  # primary_extension - A String for the main extension associated with
14
15
  # the language. Must be unique. Used when a Language is picked
15
16
  # from a dropdown and we need to automatically choose an
@@ -22,7 +23,7 @@
22
23
  # Any additions or modifications (even trivial) should have corresponding
23
24
  # test change in `test/test_blob.rb`.
24
25
  #
25
- # Please keep this list alphabetized.
26
+ # Please keep this list alphabetized. Capitalization comes before lower case.
26
27
 
27
28
  ABAP:
28
29
  type: programming
@@ -70,6 +71,7 @@ Ada:
70
71
 
71
72
  Agda:
72
73
  type: programming
74
+ color: "#467C91"
73
75
  primary_extension: .agda
74
76
 
75
77
  ApacheConf:
@@ -101,6 +103,16 @@ Arduino:
101
103
  lexer: C++
102
104
  primary_extension: .ino
103
105
 
106
+ AsciiDoc:
107
+ type: prose
108
+ lexer: Text only
109
+ ace_mode: asciidoc
110
+ wrap: true
111
+ primary_extension: .asciidoc
112
+ extensions:
113
+ - .adoc
114
+ - .asc
115
+
104
116
  Assembly:
105
117
  type: programming
106
118
  lexer: NASM
@@ -140,6 +152,11 @@ Awk:
140
152
  - .gawk
141
153
  - .mawk
142
154
  - .nawk
155
+ interpreters:
156
+ - awk
157
+ - gawk
158
+ - mawk
159
+ - nawk
143
160
 
144
161
  Batchfile:
145
162
  type: programming
@@ -181,6 +198,11 @@ Brainfuck:
181
198
  extensions:
182
199
  - .bf
183
200
 
201
+ Brightscript:
202
+ type: programming
203
+ lexer: Text only
204
+ primary_extension: .brs
205
+
184
206
  Bro:
185
207
  type: programming
186
208
  primary_extension: .bro
@@ -333,6 +355,12 @@ Common Lisp:
333
355
  - .lsp
334
356
  - .ny
335
357
  - .podsl
358
+ interpreters:
359
+ - lisp
360
+ - sbcl
361
+ - ccl
362
+ - clisp
363
+ - ecl
336
364
 
337
365
  Coq:
338
366
  type: programming
@@ -346,6 +374,12 @@ Cpp-ObjDump:
346
374
  - .c++objdump
347
375
  - .cxx-objdump
348
376
 
377
+ Creole:
378
+ type: prose
379
+ lexer: Text only
380
+ wrap: true
381
+ primary_extension: .creole
382
+
349
383
  Cucumber:
350
384
  lexer: Gherkin
351
385
  primary_extension: .feature
@@ -379,7 +413,7 @@ D-ObjDump:
379
413
  DM:
380
414
  type: programming
381
415
  color: "#075ff1"
382
- lexer: Text only
416
+ lexer: C++
383
417
  primary_extension: .dm
384
418
  aliases:
385
419
  - byond
@@ -748,6 +782,7 @@ JSON:
748
782
  - .sublime-settings
749
783
  - .sublime-workspace
750
784
  filenames:
785
+ - .jshintrc
751
786
  - composer.lock
752
787
 
753
788
  Jade:
@@ -918,6 +953,8 @@ Makefile:
918
953
  - makefile
919
954
  - Makefile
920
955
  - GNUmakefile
956
+ interpreters:
957
+ - make
921
958
 
922
959
  Mako:
923
960
  primary_extension: .mako
@@ -925,7 +962,7 @@ Mako:
925
962
  - .mao
926
963
 
927
964
  Markdown:
928
- type: markup
965
+ type: prose
929
966
  lexer: Text only
930
967
  ace_mode: markdown
931
968
  wrap: true
@@ -956,6 +993,12 @@ Max:
956
993
  - .mxt
957
994
  - .pat
958
995
 
996
+ MediaWiki:
997
+ type: prose
998
+ lexer: Text only
999
+ wrap: true
1000
+ primary_extension: .mediawiki
1001
+
959
1002
  MiniD: # Legacy
960
1003
  searchable: false
961
1004
  primary_extension: .minid # Dummy extension
@@ -1091,6 +1134,12 @@ OpenEdge ABL:
1091
1134
  - abl
1092
1135
  primary_extension: .p
1093
1136
 
1137
+ Org:
1138
+ type: prose
1139
+ lexer: Text only
1140
+ wrap: true
1141
+ primary_extension: .org
1142
+
1094
1143
  Oxygene:
1095
1144
  type: programming
1096
1145
  lexer: Text only
@@ -1157,6 +1206,8 @@ Perl:
1157
1206
  - .pm6
1158
1207
  - .pod
1159
1208
  - .psgi
1209
+ interpreters:
1210
+ - perl
1160
1211
 
1161
1212
  Pike:
1162
1213
  type: programming
@@ -1166,6 +1217,13 @@ Pike:
1166
1217
  extensions:
1167
1218
  - .pmod
1168
1219
 
1220
+ Pod:
1221
+ type: prose
1222
+ lexer: Text only
1223
+ ace_mode: perl
1224
+ wrap: true
1225
+ primary_extension: .pod
1226
+
1169
1227
  PogoScript:
1170
1228
  type: programming
1171
1229
  color: "#d80074"
@@ -1224,12 +1282,15 @@ Python:
1224
1282
  primary_extension: .py
1225
1283
  extensions:
1226
1284
  - .gyp
1285
+ - .lmi
1227
1286
  - .pyt
1228
1287
  - .pyw
1229
1288
  - .wsgi
1230
1289
  - .xpy
1231
1290
  filenames:
1232
1291
  - wscript
1292
+ interpreters:
1293
+ - python
1233
1294
 
1234
1295
  Python traceback:
1235
1296
  type: data
@@ -1252,6 +1313,15 @@ R:
1252
1313
  - .R
1253
1314
  filenames:
1254
1315
  - .Rprofile
1316
+ interpreters:
1317
+ - Rscript
1318
+
1319
+ RDoc:
1320
+ type: prose
1321
+ lexer: Text only
1322
+ ace_mode: rdoc
1323
+ wrap: true
1324
+ primary_extension: .rdoc
1255
1325
 
1256
1326
  REALbasic:
1257
1327
  type: programming
@@ -1269,6 +1339,15 @@ RHTML:
1269
1339
  group: HTML
1270
1340
  primary_extension: .rhtml
1271
1341
 
1342
+ RMarkdown:
1343
+ type: markup
1344
+ lexer: Text only
1345
+ wrap: true
1346
+ ace_mode: markdown
1347
+ primary_extension: .rmd
1348
+ extensions:
1349
+ - .Rmd
1350
+
1272
1351
  Racket:
1273
1352
  type: programming
1274
1353
  lexer: Racket
@@ -1339,6 +1418,8 @@ Ruby:
1339
1418
  - .ru
1340
1419
  - .thor
1341
1420
  - .watchr
1421
+ interpreters:
1422
+ - ruby
1342
1423
  filenames:
1343
1424
  - Appraisals
1344
1425
  - Berksfile
@@ -1381,6 +1462,8 @@ Scala:
1381
1462
  ace_mode: scala
1382
1463
  color: "#7dd3b0"
1383
1464
  primary_extension: .scala
1465
+ extensions:
1466
+ - .sc
1384
1467
 
1385
1468
  Scaml:
1386
1469
  group: HTML
@@ -1394,6 +1477,11 @@ Scheme:
1394
1477
  extensions:
1395
1478
  - .sls
1396
1479
  - .ss
1480
+ interpreters:
1481
+ - guile
1482
+ - racket
1483
+ - bigloo
1484
+ - chicken
1397
1485
 
1398
1486
  Scilab:
1399
1487
  type: programming
@@ -1418,6 +1506,10 @@ Shell:
1418
1506
  extensions:
1419
1507
  - .bats
1420
1508
  - .tmux
1509
+ interpreters:
1510
+ - bash
1511
+ - sh
1512
+ - zsh
1421
1513
  filenames:
1422
1514
  - Dockerfile
1423
1515
 
@@ -1446,11 +1538,17 @@ Standard ML:
1446
1538
  - sml
1447
1539
  primary_extension: .sml
1448
1540
 
1541
+ Stylus:
1542
+ type: markup
1543
+ group: CSS
1544
+ lexer: Text only
1545
+ primary_extension: .styl
1546
+
1449
1547
  SuperCollider:
1450
1548
  type: programming
1451
1549
  color: "#46390b"
1452
1550
  lexer: Text only
1453
- primary_extension: .sc
1551
+ primary_extension: .scd
1454
1552
 
1455
1553
  TOML:
1456
1554
  type: data
@@ -1477,7 +1575,9 @@ Tcsh:
1477
1575
 
1478
1576
  TeX:
1479
1577
  type: markup
1578
+ color: "#3D6117"
1480
1579
  ace_mode: latex
1580
+ wrap: true
1481
1581
  aliases:
1482
1582
  - latex
1483
1583
  primary_extension: .tex
@@ -1498,7 +1598,7 @@ Tea:
1498
1598
  primary_extension: .tea
1499
1599
 
1500
1600
  Textile:
1501
- type: markup
1601
+ type: prose
1502
1602
  lexer: Text only
1503
1603
  ace_mode: textile
1504
1604
  wrap: true
@@ -1544,6 +1644,14 @@ VHDL:
1544
1644
  lexer: vhdl
1545
1645
  color: "#543978"
1546
1646
  primary_extension: .vhdl
1647
+ extensions:
1648
+ - .vhd
1649
+ - .vhf
1650
+ - .vhi
1651
+ - .vho
1652
+ - .vhs
1653
+ - .vht
1654
+ - .vhw
1547
1655
 
1548
1656
  Vala:
1549
1657
  type: programming
@@ -1733,7 +1841,7 @@ ooc:
1733
1841
  primary_extension: .ooc
1734
1842
 
1735
1843
  reStructuredText:
1736
- type: markup
1844
+ type: prose
1737
1845
  wrap: true
1738
1846
  search_term: rst
1739
1847
  aliases: