github-linguist 2.3.3 → 2.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/linguist/blob_helper.rb +26 -2
- data/lib/linguist/language.rb +3 -1
- data/lib/linguist/languages.yml +8 -0
- data/lib/linguist/samples.json +106 -23
- data/lib/linguist/samples.rb +2 -0
- metadata +3 -3
data/lib/linguist/blob_helper.rb
CHANGED
@@ -204,7 +204,31 @@ module Linguist
|
|
204
204
|
#
|
205
205
|
# Returns an Array of lines
|
206
206
|
def lines
|
207
|
-
@lines ||=
|
207
|
+
@lines ||=
|
208
|
+
if viewable? && data
|
209
|
+
data.split(line_split_character, -1)
|
210
|
+
else
|
211
|
+
[]
|
212
|
+
end
|
213
|
+
end
|
214
|
+
|
215
|
+
# Character used to split lines. This is almost always "\n" except when Mac
|
216
|
+
# Format is detected in which case it's "\r".
|
217
|
+
#
|
218
|
+
# Returns a split pattern string.
|
219
|
+
def line_split_character
|
220
|
+
@line_split_character ||= (mac_format?? "\r" : "\n")
|
221
|
+
end
|
222
|
+
|
223
|
+
# Public: Is the data in ** Mac Format **. This format uses \r (0x0d) characters
|
224
|
+
# for line ends and does not include a \n (0x0a).
|
225
|
+
#
|
226
|
+
# Returns true when mac format is detected.
|
227
|
+
def mac_format?
|
228
|
+
return if !viewable?
|
229
|
+
if pos = data[0, 4096].index("\r")
|
230
|
+
data[pos + 1] != ?\n
|
231
|
+
end
|
208
232
|
end
|
209
233
|
|
210
234
|
# Public: Get number of lines of code
|
@@ -278,7 +302,7 @@ module Linguist
|
|
278
302
|
if defined?(@data) && @data.is_a?(String)
|
279
303
|
data = @data
|
280
304
|
else
|
281
|
-
data = lambda { binary_mime_type? ? "" : self.data }
|
305
|
+
data = lambda { (binary_mime_type? || binary?) ? "" : self.data }
|
282
306
|
end
|
283
307
|
|
284
308
|
@language = Language.detect(name.to_s, data, mode)
|
data/lib/linguist/language.rb
CHANGED
@@ -84,7 +84,9 @@ module Linguist
|
|
84
84
|
|
85
85
|
if possible_languages.length > 1
|
86
86
|
data = data.call() if data.respond_to?(:call)
|
87
|
-
if
|
87
|
+
if data.nil? || data == ""
|
88
|
+
nil
|
89
|
+
elsif result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
|
88
90
|
Language[result[0]]
|
89
91
|
end
|
90
92
|
else
|
data/lib/linguist/languages.yml
CHANGED
data/lib/linguist/samples.json
CHANGED
@@ -17,11 +17,11 @@
|
|
17
17
|
".h"
|
18
18
|
],
|
19
19
|
"C++": [
|
20
|
-
".
|
21
|
-
".hpp",
|
22
|
-
".cu",
|
20
|
+
".cc",
|
23
21
|
".cpp",
|
24
|
-
".
|
22
|
+
".cu",
|
23
|
+
".h",
|
24
|
+
".hpp"
|
25
25
|
],
|
26
26
|
"Ceylon": [
|
27
27
|
".ceylon"
|
@@ -41,6 +41,9 @@
|
|
41
41
|
"Diff": [
|
42
42
|
".patch"
|
43
43
|
],
|
44
|
+
"Ecl": [
|
45
|
+
".ecl"
|
46
|
+
],
|
44
47
|
"Emacs Lisp": [
|
45
48
|
".el"
|
46
49
|
],
|
@@ -48,11 +51,11 @@
|
|
48
51
|
".s"
|
49
52
|
],
|
50
53
|
"Gosu": [
|
54
|
+
".gs",
|
51
55
|
".gsp",
|
52
56
|
".gst",
|
53
57
|
".gsx",
|
54
|
-
".vark"
|
55
|
-
".gs"
|
58
|
+
".vark"
|
56
59
|
],
|
57
60
|
"Groovy": [
|
58
61
|
".gradle",
|
@@ -75,9 +78,9 @@
|
|
75
78
|
".script!"
|
76
79
|
],
|
77
80
|
"JSON": [
|
81
|
+
".json",
|
78
82
|
".maxhelp",
|
79
|
-
".maxpat"
|
80
|
-
".json"
|
83
|
+
".maxpat"
|
81
84
|
],
|
82
85
|
"Julia": [
|
83
86
|
".jl"
|
@@ -130,14 +133,14 @@
|
|
130
133
|
".pir"
|
131
134
|
],
|
132
135
|
"Perl": [
|
133
|
-
".pm",
|
134
136
|
".pl",
|
135
|
-
".
|
136
|
-
".script!"
|
137
|
+
".pm",
|
138
|
+
".script!",
|
139
|
+
".t"
|
137
140
|
],
|
138
141
|
"PHP": [
|
139
|
-
".
|
140
|
-
".
|
142
|
+
".module",
|
143
|
+
".php"
|
141
144
|
],
|
142
145
|
"PowerShell": [
|
143
146
|
".ps1",
|
@@ -154,17 +157,17 @@
|
|
154
157
|
".R"
|
155
158
|
],
|
156
159
|
"Racket": [
|
157
|
-
".
|
158
|
-
".
|
160
|
+
".scrbl",
|
161
|
+
".script!"
|
159
162
|
],
|
160
163
|
"Rebol": [
|
161
164
|
".r"
|
162
165
|
],
|
163
166
|
"Ruby": [
|
164
|
-
".rb",
|
165
|
-
".script!",
|
166
167
|
".rabl",
|
167
|
-
".rake"
|
168
|
+
".rake",
|
169
|
+
".rb",
|
170
|
+
".script!"
|
168
171
|
],
|
169
172
|
"Rust": [
|
170
173
|
".rs"
|
@@ -180,17 +183,17 @@
|
|
180
183
|
".sps"
|
181
184
|
],
|
182
185
|
"Scilab": [
|
183
|
-
".sci",
|
184
186
|
".sce",
|
187
|
+
".sci",
|
185
188
|
".tst"
|
186
189
|
],
|
187
190
|
"SCSS": [
|
188
191
|
".scss"
|
189
192
|
],
|
190
193
|
"Shell": [
|
194
|
+
".bash",
|
191
195
|
".script!",
|
192
196
|
".sh",
|
193
|
-
".bash",
|
194
197
|
".zsh"
|
195
198
|
],
|
196
199
|
"Standard ML": [
|
@@ -257,7 +260,7 @@
|
|
257
260
|
".gemrc"
|
258
261
|
]
|
259
262
|
},
|
260
|
-
"tokens_total":
|
263
|
+
"tokens_total": 271187,
|
261
264
|
"languages_total": 275,
|
262
265
|
"tokens": {
|
263
266
|
"Apex": {
|
@@ -9157,6 +9160,84 @@
|
|
9157
9160
|
"d472341..8ad9ffb": 1,
|
9158
9161
|
"+": 3
|
9159
9162
|
},
|
9163
|
+
"Ecl": {
|
9164
|
+
"#option": 1,
|
9165
|
+
"(": 32,
|
9166
|
+
"true": 1,
|
9167
|
+
")": 32,
|
9168
|
+
";": 23,
|
9169
|
+
"namesRecord": 4,
|
9170
|
+
"RECORD": 1,
|
9171
|
+
"string20": 1,
|
9172
|
+
"surname": 1,
|
9173
|
+
"string10": 2,
|
9174
|
+
"forename": 1,
|
9175
|
+
"integer2": 5,
|
9176
|
+
"age": 2,
|
9177
|
+
"dadAge": 1,
|
9178
|
+
"mumAge": 1,
|
9179
|
+
"END": 1,
|
9180
|
+
"namesRecord2": 3,
|
9181
|
+
"record": 1,
|
9182
|
+
"extra": 1,
|
9183
|
+
"end": 1,
|
9184
|
+
"namesTable": 11,
|
9185
|
+
"dataset": 2,
|
9186
|
+
"FLAT": 2,
|
9187
|
+
"namesTable2": 9,
|
9188
|
+
"aveAgeL": 3,
|
9189
|
+
"l": 1,
|
9190
|
+
"l.dadAge": 1,
|
9191
|
+
"+": 16,
|
9192
|
+
"l.mumAge": 1,
|
9193
|
+
"/2": 2,
|
9194
|
+
"aveAgeR": 4,
|
9195
|
+
"r": 1,
|
9196
|
+
"r.dadAge": 1,
|
9197
|
+
"r.mumAge": 1,
|
9198
|
+
"output": 9,
|
9199
|
+
"join": 11,
|
9200
|
+
"left": 2,
|
9201
|
+
"right": 3,
|
9202
|
+
"//Several": 1,
|
9203
|
+
"simple": 1,
|
9204
|
+
"examples": 1,
|
9205
|
+
"of": 1,
|
9206
|
+
"sliding": 2,
|
9207
|
+
"syntax": 1,
|
9208
|
+
"left.age": 8,
|
9209
|
+
"right.age": 12,
|
9210
|
+
"-": 5,
|
9211
|
+
"and": 10,
|
9212
|
+
"<": 1,
|
9213
|
+
"between": 7,
|
9214
|
+
"//Same": 1,
|
9215
|
+
"but": 1,
|
9216
|
+
"on": 1,
|
9217
|
+
"strings.": 1,
|
9218
|
+
"Also": 1,
|
9219
|
+
"includes": 1,
|
9220
|
+
"to": 1,
|
9221
|
+
"ensure": 1,
|
9222
|
+
"sort": 1,
|
9223
|
+
"is": 1,
|
9224
|
+
"done": 1,
|
9225
|
+
"by": 1,
|
9226
|
+
"non": 1,
|
9227
|
+
"before": 1,
|
9228
|
+
"sliding.": 1,
|
9229
|
+
"left.surname": 2,
|
9230
|
+
"right.surname": 4,
|
9231
|
+
"[": 4,
|
9232
|
+
"]": 4,
|
9233
|
+
"all": 1,
|
9234
|
+
"//This": 1,
|
9235
|
+
"should": 1,
|
9236
|
+
"not": 1,
|
9237
|
+
"generate": 1,
|
9238
|
+
"a": 1,
|
9239
|
+
"self": 1
|
9240
|
+
},
|
9160
9241
|
"Emacs Lisp": {
|
9161
9242
|
"(": 1,
|
9162
9243
|
"print": 1,
|
@@ -27671,6 +27752,7 @@
|
|
27671
27752
|
"Dart": 68,
|
27672
27753
|
"Delphi": 30,
|
27673
27754
|
"Diff": 16,
|
27755
|
+
"Ecl": 281,
|
27674
27756
|
"Emacs Lisp": 3,
|
27675
27757
|
"GAS": 133,
|
27676
27758
|
"Gosu": 413,
|
@@ -27741,6 +27823,7 @@
|
|
27741
27823
|
"Dart": 1,
|
27742
27824
|
"Delphi": 1,
|
27743
27825
|
"Diff": 1,
|
27826
|
+
"Ecl": 1,
|
27744
27827
|
"Emacs Lisp": 1,
|
27745
27828
|
"GAS": 1,
|
27746
27829
|
"Gosu": 5,
|
@@ -27761,7 +27844,7 @@
|
|
27761
27844
|
"Nemerle": 1,
|
27762
27845
|
"Nimrod": 1,
|
27763
27846
|
"Nu": 1,
|
27764
|
-
"Objective-C":
|
27847
|
+
"Objective-C": 19,
|
27765
27848
|
"OCaml": 1,
|
27766
27849
|
"Opa": 2,
|
27767
27850
|
"OpenCL": 1,
|
@@ -27798,5 +27881,5 @@
|
|
27798
27881
|
"XSLT": 1,
|
27799
27882
|
"YAML": 1
|
27800
27883
|
},
|
27801
|
-
"md5": "
|
27884
|
+
"md5": "8591cfa68ab6fe3b3dacbcb885be70d0"
|
27802
27885
|
}
|
data/lib/linguist/samples.rb
CHANGED
@@ -76,12 +76,14 @@ module Linguist
|
|
76
76
|
db['extnames'][language_name] ||= []
|
77
77
|
if !db['extnames'][language_name].include?(sample[:extname])
|
78
78
|
db['extnames'][language_name] << sample[:extname]
|
79
|
+
db['extnames'][language_name].sort!
|
79
80
|
end
|
80
81
|
end
|
81
82
|
|
82
83
|
if sample[:filename]
|
83
84
|
db['filenames'][language_name] ||= []
|
84
85
|
db['filenames'][language_name] << sample[:filename]
|
86
|
+
db['filenames'][language_name].sort!
|
85
87
|
end
|
86
88
|
|
87
89
|
data = File.read(sample[:path])
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: github-linguist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.3.
|
4
|
+
version: 2.3.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-09-
|
12
|
+
date: 2012-09-24 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: charlock_holmes
|
@@ -181,7 +181,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
181
181
|
version: '0'
|
182
182
|
requirements: []
|
183
183
|
rubyforge_project:
|
184
|
-
rubygems_version: 1.8.
|
184
|
+
rubygems_version: 1.8.23
|
185
185
|
signing_key:
|
186
186
|
specification_version: 3
|
187
187
|
summary: GitHub Language detection
|