github-linguist 6.4.1 → 7.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/{linguist → github-linguist} +0 -0
- data/grammars/source.abap.json +35 -6
- data/grammars/source.apl.json +4 -8
- data/grammars/source.ballerina.json +82 -3
- data/grammars/source.bdf.json +419 -0
- data/grammars/source.c++.json +25 -3
- data/grammars/source.c.json +6 -3
- data/grammars/source.chapel.json +3 -3
- data/grammars/source.coq.json +2 -2
- data/grammars/source.cs.json +83 -73
- data/grammars/source.csound.json +1 -1
- data/grammars/source.dart.json +1 -1
- data/grammars/source.elixir.json +112 -137
- data/grammars/source.emacs.lisp.json +179 -4
- data/grammars/source.figctrl.json +252 -0
- data/grammars/source.figfont.json +121 -0
- data/grammars/source.fontdir.json +99 -0
- data/grammars/source.fstar.json +439 -0
- data/grammars/source.hx.json +27 -9
- data/grammars/source.j.json +260 -0
- data/grammars/source.julia.json +24 -12
- data/grammars/source.lisp.json +3 -3
- data/grammars/source.mupad.json +1 -1
- data/grammars/source.pony.json +3 -3
- data/grammars/source.powershell.json +2 -2
- data/grammars/source.protobuf.json +87 -5
- data/grammars/source.purescript.json +5 -2
- data/grammars/source.python.json +17 -12
- data/grammars/source.rascal.json +0 -1
- data/grammars/source.reg.json +159 -0
- data/grammars/source.slice.json +2755 -0
- data/grammars/source.solidity.json +2 -2
- data/grammars/source.ts.json +225 -132
- data/grammars/source.tsx.json +235 -142
- data/grammars/source.viml.json +4 -4
- data/grammars/source.xlfd.json +462 -0
- data/grammars/source.yasnippet.json +387 -0
- data/grammars/text.elixir.json +17 -5
- data/grammars/text.html.basic.json +622 -2264
- data/grammars/text.html.elixir.json +10 -1
- data/grammars/text.html.php.blade.json +7 -3
- data/grammars/version +1 -0
- data/lib/linguist.rb +2 -0
- data/lib/linguist/VERSION +1 -1
- data/lib/linguist/generated.rb +14 -1
- data/lib/linguist/heuristics.rb +77 -468
- data/lib/linguist/heuristics.yml +404 -0
- data/lib/linguist/languages.json +1 -1
- data/lib/linguist/languages.yml +86 -6
- data/lib/linguist/samples.json +2624 -483
- data/lib/linguist/strategy/xml.rb +30 -0
- metadata +20 -7
@@ -3367,7 +3367,7 @@
|
|
3367
3367
|
}
|
3368
3368
|
},
|
3369
3369
|
"injections": {
|
3370
|
-
"text.html.php.blade - (meta.embedded | meta.tag | comment.block.blade), L:(text.html.php.blade meta.tag - comment.block.blade), L:(source.js.embedded.html - comment.block.blade)": {
|
3370
|
+
"text.html.php.blade - (meta.embedded | meta.tag | comment.block.blade), L:(text.html.php.blade meta.tag - (comment.block.blade | meta.embedded.block.blade)), L:(source.js.embedded.html - (comment.block.blade | meta.embedded.block.blade))": {
|
3371
3371
|
"patterns": [
|
3372
3372
|
{
|
3373
3373
|
"name": "comment.block.blade",
|
@@ -3501,6 +3501,10 @@
|
|
3501
3501
|
}
|
3502
3502
|
}
|
3503
3503
|
},
|
3504
|
+
{
|
3505
|
+
"name": "comment.blade",
|
3506
|
+
"match": "@(?={{{|{{|{!!|@\\w+(?:::\\w+)?)"
|
3507
|
+
},
|
3504
3508
|
{
|
3505
3509
|
"name": "meta.function.echo.blade",
|
3506
3510
|
"contentName": "source.php",
|
@@ -3677,7 +3681,7 @@
|
|
3677
3681
|
{
|
3678
3682
|
"name": "meta.directive.custom.blade",
|
3679
3683
|
"contentName": "source.php",
|
3680
|
-
"begin": "(?x)\n(?\u003c![A-Za-z0-9_@]) # Prepended @ or literal character escapes the sequence\n(\n @\n \\w+(
|
3684
|
+
"begin": "(?x)\n(?\u003c![A-Za-z0-9_@]) # Prepended @ or literal character escapes the sequence\n(\n @\n \\w+(?:::\\w+)? # Any number/letter sequence, can also be postfixed by ::someOtherString\n [\\t ]* # Whitespace between name and parentheses\n)\n(\\() # Followed by opening parentheses",
|
3681
3685
|
"end": "\\)",
|
3682
3686
|
"patterns": [
|
3683
3687
|
{
|
@@ -3700,7 +3704,7 @@
|
|
3700
3704
|
},
|
3701
3705
|
{
|
3702
3706
|
"name": "entity.name.function.blade",
|
3703
|
-
"match": "(?x)\n(?\u003c![A-Za-z0-9_@]) # Prepended @ or literal character escapes the sequence\n@\n\\w+(
|
3707
|
+
"match": "(?x)\n(?\u003c![A-Za-z0-9_@]) # Prepended @ or literal character escapes the sequence\n@\n\\w+(?:::\\w+)? # Any number/letter sequence, can also be postfixed by ::someOtherString\n\\b # Bounded by word boundary"
|
3704
3708
|
},
|
3705
3709
|
{
|
3706
3710
|
"begin": "(^\\s*)(?=\u003c\\?(?![^?]*\\?\u003e))",
|
data/grammars/version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
7.0.0
|
data/lib/linguist.rb
CHANGED
@@ -7,6 +7,7 @@ require 'linguist/repository'
|
|
7
7
|
require 'linguist/samples'
|
8
8
|
require 'linguist/shebang'
|
9
9
|
require 'linguist/version'
|
10
|
+
require 'linguist/strategy/xml'
|
10
11
|
|
11
12
|
class << Linguist
|
12
13
|
# Public: Detects the Language of the blob.
|
@@ -62,6 +63,7 @@ class << Linguist
|
|
62
63
|
Linguist::Strategy::Filename,
|
63
64
|
Linguist::Shebang,
|
64
65
|
Linguist::Strategy::Extension,
|
66
|
+
Linguist::Strategy::XML,
|
65
67
|
Linguist::Heuristics,
|
66
68
|
Linguist::Classifier
|
67
69
|
]
|
data/lib/linguist/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
7.0.0
|
data/lib/linguist/generated.rb
CHANGED
@@ -89,7 +89,8 @@ module Linguist
|
|
89
89
|
generated_yarn_lock? ||
|
90
90
|
generated_grpc_cpp? ||
|
91
91
|
generated_dart? ||
|
92
|
-
generated_perl_ppport_header?
|
92
|
+
generated_perl_ppport_header? ||
|
93
|
+
generated_gamemakerstudio?
|
93
94
|
end
|
94
95
|
|
95
96
|
# Internal: Is the blob an Xcode file?
|
@@ -577,5 +578,17 @@ module Linguist
|
|
577
578
|
def generated_graphql_relay?
|
578
579
|
!!name.match(/__generated__\//)
|
579
580
|
end
|
581
|
+
|
582
|
+
# Internal: Is this a generated Game Maker Studio (2) metadata file?
|
583
|
+
#
|
584
|
+
# All Game Maker Studio 2 generated files will be JSON, .yy or .yyp, and have
|
585
|
+
# a part that looks like "modelName: GMname" on the 3rd line
|
586
|
+
#
|
587
|
+
# Return true or false
|
588
|
+
def generated_gamemakerstudio?
|
589
|
+
return false unless ['.yy', '.yyp'].include? extname
|
590
|
+
return false unless lines.count > 3
|
591
|
+
return lines[2].match(/\"modelName\"\:\s*\"GM/)
|
592
|
+
end
|
580
593
|
end
|
581
594
|
end
|
data/lib/linguist/heuristics.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
1
3
|
module Linguist
|
2
4
|
# A collection of simple heuristics that can be used to better analyze languages.
|
3
5
|
class Heuristics
|
@@ -17,6 +19,7 @@ module Linguist
|
|
17
19
|
# Returns an Array of languages, or empty if none matched or were inconclusive.
|
18
20
|
def self.call(blob, candidates)
|
19
21
|
return [] if blob.symlink?
|
22
|
+
self.load()
|
20
23
|
|
21
24
|
data = blob.data[0...HEURISTICS_CONSIDER_BYTES]
|
22
25
|
|
@@ -29,509 +32,115 @@ module Linguist
|
|
29
32
|
[] # No heuristics matched
|
30
33
|
end
|
31
34
|
|
32
|
-
# Internal:
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
# heuristic - Block which takes data as an argument and returns a Language or nil.
|
37
|
-
#
|
38
|
-
# Examples
|
39
|
-
#
|
40
|
-
# disambiguate ".pm" do |data|
|
41
|
-
# if data.include?("use strict")
|
42
|
-
# Language["Perl"]
|
43
|
-
# elsif /^[^#]+:-/.match(data)
|
44
|
-
# Language["Prolog"]
|
45
|
-
# end
|
46
|
-
# end
|
47
|
-
#
|
48
|
-
def self.disambiguate(*exts_and_langs, &heuristic)
|
49
|
-
@heuristics << new(exts_and_langs, &heuristic)
|
50
|
-
end
|
51
|
-
|
52
|
-
# Internal: Array of defined heuristics
|
53
|
-
@heuristics = []
|
54
|
-
|
55
|
-
# Internal
|
56
|
-
def initialize(exts_and_langs, &heuristic)
|
57
|
-
@exts_and_langs, @candidates = exts_and_langs.partition {|e| e =~ /\A\./}
|
58
|
-
@heuristic = heuristic
|
59
|
-
end
|
60
|
-
|
61
|
-
# Internal: Check if this heuristic matches the candidate filenames or
|
62
|
-
# languages.
|
63
|
-
def matches?(filename, candidates)
|
64
|
-
filename = filename.downcase
|
65
|
-
candidates = candidates.compact.map(&:name)
|
66
|
-
@exts_and_langs.any? { |ext| filename.end_with?(ext) } ||
|
67
|
-
(candidates.any? &&
|
68
|
-
(@candidates - candidates == [] &&
|
69
|
-
candidates - @candidates == []))
|
70
|
-
end
|
71
|
-
|
72
|
-
# Internal: Perform the heuristic
|
73
|
-
def call(data)
|
74
|
-
@heuristic.call(data)
|
75
|
-
end
|
76
|
-
|
77
|
-
# Common heuristics
|
78
|
-
CPlusPlusRegex = Regexp.union(
|
79
|
-
/^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>/,
|
80
|
-
/^\s*template\s*</,
|
81
|
-
/^[ \t]*try/,
|
82
|
-
/^[ \t]*catch\s*\(/,
|
83
|
-
/^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+/,
|
84
|
-
/^[ \t]*(private|public|protected):$/,
|
85
|
-
/std::\w+/)
|
86
|
-
ObjectiveCRegex = /^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])/
|
87
|
-
Perl5Regex = /\buse\s+(?:strict\b|v?5\.)/
|
88
|
-
Perl6Regex = /^\s*(?:use\s+v6\b|\bmodule\b|\b(?:my\s+)?class\b)/
|
89
|
-
|
90
|
-
disambiguate ".as" do |data|
|
91
|
-
if /^\s*(package\s+[a-z0-9_\.]+|import\s+[a-zA-Z0-9_\.]+;|class\s+[A-Za-z0-9_]+\s+extends\s+[A-Za-z0-9_]+)/.match(data)
|
92
|
-
Language["ActionScript"]
|
93
|
-
else
|
94
|
-
Language["AngelScript"]
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
disambiguate ".asc" do |data|
|
99
|
-
if /^(----[- ]BEGIN|ssh-(rsa|dss)) /.match(data)
|
100
|
-
Language["Public Key"]
|
101
|
-
elsif /^[=-]+(\s|\n)|{{[A-Za-z]/.match(data)
|
102
|
-
Language["AsciiDoc"]
|
103
|
-
elsif /^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])/.match(data)
|
104
|
-
Language["AGS Script"]
|
105
|
-
end
|
106
|
-
end
|
107
|
-
|
108
|
-
disambiguate ".bb" do |data|
|
109
|
-
if /^\s*; /.match(data) || data.include?("End Function")
|
110
|
-
Language["BlitzBasic"]
|
111
|
-
elsif /^\s*(# |include|require)\b/.match(data)
|
112
|
-
Language["BitBake"]
|
113
|
-
end
|
114
|
-
end
|
115
|
-
|
116
|
-
disambiguate ".builds" do |data|
|
117
|
-
if /^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i.match(data)
|
118
|
-
Language["XML"]
|
119
|
-
else
|
120
|
-
Language["Text"]
|
121
|
-
end
|
122
|
-
end
|
123
|
-
|
124
|
-
disambiguate ".ch" do |data|
|
125
|
-
if /^\s*#\s*(if|ifdef|ifndef|define|command|xcommand|translate|xtranslate|include|pragma|undef)\b/i.match(data)
|
126
|
-
Language["xBase"]
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
disambiguate ".cl" do |data|
|
131
|
-
if /^\s*\((defun|in-package|defpackage) /i.match(data)
|
132
|
-
Language["Common Lisp"]
|
133
|
-
elsif /^class/x.match(data)
|
134
|
-
Language["Cool"]
|
135
|
-
elsif /\/\* |\/\/ |^\}/.match(data)
|
136
|
-
Language["OpenCL"]
|
137
|
-
end
|
138
|
-
end
|
139
|
-
|
140
|
-
disambiguate ".cls" do |data|
|
141
|
-
if /\\\w+{/.match(data)
|
142
|
-
Language["TeX"]
|
143
|
-
end
|
144
|
-
end
|
145
|
-
|
146
|
-
disambiguate ".cs" do |data|
|
147
|
-
if /![\w\s]+methodsFor: /.match(data)
|
148
|
-
Language["Smalltalk"]
|
149
|
-
elsif /^\s*namespace\s*[\w\.]+\s*{/.match(data) || /^\s*\/\//.match(data)
|
150
|
-
Language["C#"]
|
151
|
-
end
|
152
|
-
end
|
153
|
-
|
154
|
-
disambiguate ".d" do |data|
|
155
|
-
# see http://dlang.org/spec/grammar
|
156
|
-
# ModuleDeclaration | ImportDeclaration | FuncDeclaration | unittest
|
157
|
-
if /^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*{[^}]*}|unittest\s*(?:\(.*\))?\s*{[^}]*}/.match(data)
|
158
|
-
Language["D"]
|
159
|
-
# see http://dtrace.org/guide/chp-prog.html, http://dtrace.org/guide/chp-profile.html, http://dtrace.org/guide/chp-opt.html
|
160
|
-
elsif /^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+{[^}]*}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)/.match(data)
|
161
|
-
Language["DTrace"]
|
162
|
-
# path/target : dependency \
|
163
|
-
# target : \
|
164
|
-
# : dependency
|
165
|
-
# path/file.ext1 : some/path/../file.ext2
|
166
|
-
elsif /([\/\\].*:\s+.*\s\\$|: \\$|^ : |^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)/.match(data)
|
167
|
-
Language["Makefile"]
|
168
|
-
end
|
169
|
-
end
|
170
|
-
|
171
|
-
disambiguate ".ecl" do |data|
|
172
|
-
if /^[^#]+:-/.match(data)
|
173
|
-
Language["ECLiPSe"]
|
174
|
-
elsif data.include?(":=")
|
175
|
-
Language["ECL"]
|
176
|
-
end
|
177
|
-
end
|
178
|
-
|
179
|
-
disambiguate ".es" do |data|
|
180
|
-
if /^\s*(?:%%|main\s*\(.*?\)\s*->)/.match(data)
|
181
|
-
Language["Erlang"]
|
182
|
-
elsif /(?:\/\/|("|')use strict\1|export\s+default\s|\/\*.*?\*\/)/m.match(data)
|
183
|
-
Language["JavaScript"]
|
184
|
-
end
|
185
|
-
end
|
186
|
-
|
187
|
-
fortran_rx = /^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)/i
|
188
|
-
|
189
|
-
disambiguate ".f" do |data|
|
190
|
-
if /^: /.match(data)
|
191
|
-
Language["Forth"]
|
192
|
-
elsif data.include?("flowop")
|
193
|
-
Language["Filebench WML"]
|
194
|
-
elsif fortran_rx.match(data)
|
195
|
-
Language["Fortran"]
|
196
|
-
end
|
197
|
-
end
|
198
|
-
|
199
|
-
disambiguate ".for" do |data|
|
200
|
-
if /^: /.match(data)
|
201
|
-
Language["Forth"]
|
202
|
-
elsif fortran_rx.match(data)
|
203
|
-
Language["Fortran"]
|
204
|
-
end
|
205
|
-
end
|
206
|
-
|
207
|
-
disambiguate ".fr" do |data|
|
208
|
-
if /^(: |also |new-device|previous )/.match(data)
|
209
|
-
Language["Forth"]
|
210
|
-
elsif /^\s*(import|module|package|data|type) /.match(data)
|
211
|
-
Language["Frege"]
|
212
|
-
else
|
213
|
-
Language["Text"]
|
214
|
-
end
|
215
|
-
end
|
216
|
-
|
217
|
-
disambiguate ".fs" do |data|
|
218
|
-
if /^(: |new-device)/.match(data)
|
219
|
-
Language["Forth"]
|
220
|
-
elsif /^\s*(#light|import|let|module|namespace|open|type)/.match(data)
|
221
|
-
Language["F#"]
|
222
|
-
elsif /^\s*(#version|precision|uniform|varying|vec[234])/.match(data)
|
223
|
-
Language["GLSL"]
|
224
|
-
elsif /#include|#pragma\s+(rs|version)|__attribute__/.match(data)
|
225
|
-
Language["Filterscript"]
|
226
|
-
end
|
227
|
-
end
|
228
|
-
|
229
|
-
disambiguate ".gs" do |data|
|
230
|
-
Language["Gosu"] if /^uses java\./.match(data)
|
231
|
-
end
|
232
|
-
|
233
|
-
disambiguate ".h" do |data|
|
234
|
-
if ObjectiveCRegex.match(data)
|
235
|
-
Language["Objective-C"]
|
236
|
-
elsif CPlusPlusRegex.match(data)
|
237
|
-
Language["C++"]
|
238
|
-
end
|
239
|
-
end
|
240
|
-
|
241
|
-
disambiguate ".inc" do |data|
|
242
|
-
if /^<\?(?:php)?/.match(data)
|
243
|
-
Language["PHP"]
|
244
|
-
elsif /^\s*#(declare|local|macro|while)\s/.match(data)
|
245
|
-
Language["POV-Ray SDL"]
|
246
|
-
end
|
247
|
-
end
|
248
|
-
|
249
|
-
disambiguate ".l" do |data|
|
250
|
-
if /\(def(un|macro)\s/.match(data)
|
251
|
-
Language["Common Lisp"]
|
252
|
-
elsif /^(%[%{}]xs|<.*>)/.match(data)
|
253
|
-
Language["Lex"]
|
254
|
-
elsif /^\.[a-z][a-z](\s|$)/i.match(data)
|
255
|
-
Language["Roff"]
|
256
|
-
elsif /^\((de|class|rel|code|data|must)\s/.match(data)
|
257
|
-
Language["PicoLisp"]
|
35
|
+
# Internal: Load heuristics from 'heuristics.yml'.
|
36
|
+
def self.load()
|
37
|
+
if @heuristics.any?
|
38
|
+
return
|
258
39
|
end
|
259
|
-
end
|
260
40
|
|
261
|
-
|
262
|
-
|
263
|
-
Language["LoomScript"]
|
264
|
-
else
|
265
|
-
Language["LiveScript"]
|
266
|
-
end
|
267
|
-
end
|
41
|
+
data = YAML.load_file(File.expand_path("../heuristics.yml", __FILE__))
|
42
|
+
named_patterns = data['named_patterns'].map { |k,v| [k, self.to_regex(v)] }.to_h
|
268
43
|
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
disambiguate ".m" do |data|
|
278
|
-
if ObjectiveCRegex.match(data)
|
279
|
-
Language["Objective-C"]
|
280
|
-
elsif data.include?(":- module")
|
281
|
-
Language["Mercury"]
|
282
|
-
elsif /^: /.match(data)
|
283
|
-
Language["MUF"]
|
284
|
-
elsif /^\s*;/.match(data)
|
285
|
-
Language["M"]
|
286
|
-
elsif /\*\)$/.match(data)
|
287
|
-
Language["Mathematica"]
|
288
|
-
elsif /^\s*%/.match(data)
|
289
|
-
Language["Matlab"]
|
290
|
-
elsif /^\w+\s*:\s*module\s*{/.match(data)
|
291
|
-
Language["Limbo"]
|
292
|
-
end
|
293
|
-
end
|
294
|
-
|
295
|
-
disambiguate ".md" do |data|
|
296
|
-
if /(^[-a-z0-9=#!\*\[|>])|<\//i.match(data) || data.empty?
|
297
|
-
Language["Markdown"]
|
298
|
-
elsif /^(;;|\(define_)/.match(data)
|
299
|
-
Language["GCC Machine Description"]
|
300
|
-
else
|
301
|
-
Language["Markdown"]
|
302
|
-
end
|
303
|
-
end
|
304
|
-
|
305
|
-
disambiguate ".ml" do |data|
|
306
|
-
if /(^\s*module)|let rec |match\s+(\S+\s)+with/.match(data)
|
307
|
-
Language["OCaml"]
|
308
|
-
elsif /=> |case\s+(\S+\s)+of/.match(data)
|
309
|
-
Language["Standard ML"]
|
44
|
+
data['disambiguations'].each do |disambiguation|
|
45
|
+
exts = disambiguation['extensions']
|
46
|
+
rules = disambiguation['rules']
|
47
|
+
rules.map! do |rule|
|
48
|
+
rule['pattern'] = self.parse_rule(named_patterns, rule)
|
49
|
+
rule
|
50
|
+
end
|
51
|
+
@heuristics << new(exts, rules)
|
310
52
|
end
|
311
53
|
end
|
312
54
|
|
313
|
-
|
314
|
-
if
|
315
|
-
|
316
|
-
|
317
|
-
|
55
|
+
def self.parse_rule(named_patterns, rule)
|
56
|
+
if !rule['and'].nil?
|
57
|
+
rules = rule['and'].map { |block| self.parse_rule(named_patterns, block) }
|
58
|
+
return And.new(rules)
|
59
|
+
elsif !rule['pattern'].nil?
|
60
|
+
return self.to_regex(rule['pattern'])
|
61
|
+
elsif !rule['negative_pattern'].nil?
|
62
|
+
pat = self.to_regex(rule['negative_pattern'])
|
63
|
+
return NegativePattern.new(pat)
|
64
|
+
elsif !rule['named_pattern'].nil?
|
65
|
+
return named_patterns[rule['named_pattern']]
|
318
66
|
else
|
319
|
-
|
67
|
+
return AlwaysMatch.new()
|
320
68
|
end
|
321
69
|
end
|
322
70
|
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
71
|
+
# Internal: Converts a string or array of strings to regexp
|
72
|
+
#
|
73
|
+
# str: string or array of strings. If it is an array of strings,
|
74
|
+
# Regexp.union will be used.
|
75
|
+
def self.to_regex(str)
|
76
|
+
if str.kind_of?(Array)
|
77
|
+
Regexp.union(str.map { |s| Regexp.new(s) })
|
328
78
|
else
|
329
|
-
|
330
|
-
end
|
331
|
-
end
|
332
|
-
|
333
|
-
disambiguate ".n" do |data|
|
334
|
-
if /^[.']/.match(data)
|
335
|
-
Language["Roff"]
|
336
|
-
elsif /^(module|namespace|using)\s/.match(data)
|
337
|
-
Language["Nemerle"]
|
79
|
+
Regexp.new(str)
|
338
80
|
end
|
339
81
|
end
|
340
82
|
|
341
|
-
|
342
|
-
|
343
|
-
Language["XML"]
|
344
|
-
elsif data.include?("THE_TITLE")
|
345
|
-
Language["Text"]
|
346
|
-
end
|
347
|
-
end
|
348
|
-
|
349
|
-
disambiguate ".nl" do |data|
|
350
|
-
if /^(b|g)[0-9]+ /.match(data)
|
351
|
-
Language["NL"]
|
352
|
-
else
|
353
|
-
Language["NewLisp"]
|
354
|
-
end
|
355
|
-
end
|
83
|
+
# Internal: Array of defined heuristics
|
84
|
+
@heuristics = []
|
356
85
|
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
Language["PHP"]
|
362
|
-
end
|
86
|
+
# Internal
|
87
|
+
def initialize(exts_and_langs, rules)
|
88
|
+
@exts_and_langs = exts_and_langs
|
89
|
+
@rules = rules
|
363
90
|
end
|
364
91
|
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
Language["Perl 6"]
|
372
|
-
end
|
92
|
+
# Internal: Check if this heuristic matches the candidate filenames or
|
93
|
+
# languages.
|
94
|
+
def matches?(filename, candidates)
|
95
|
+
filename = filename.downcase
|
96
|
+
candidates = candidates.compact.map(&:name)
|
97
|
+
@exts_and_langs.any? { |ext| filename.end_with?(ext) }
|
373
98
|
end
|
374
99
|
|
375
|
-
|
376
|
-
|
377
|
-
|
378
|
-
|
379
|
-
|
380
|
-
|
381
|
-
|
100
|
+
# Internal: Perform the heuristic
|
101
|
+
def call(data)
|
102
|
+
matched = @rules.find do |rule|
|
103
|
+
rule['pattern'].match(data)
|
104
|
+
end
|
105
|
+
if !matched.nil?
|
106
|
+
languages = matched['language']
|
107
|
+
if languages.is_a?(Array)
|
108
|
+
languages.map{ |l| Language[l] }
|
109
|
+
else
|
110
|
+
Language[languages]
|
111
|
+
end
|
382
112
|
end
|
383
113
|
end
|
384
114
|
|
385
|
-
|
386
|
-
if /^[^\[#]+:-/.match(data)
|
387
|
-
Language["Prolog"]
|
388
|
-
elsif data.include?("last_client=")
|
389
|
-
Language["INI"]
|
390
|
-
elsif data.include?("HEADERS") && data.include?("SOURCES")
|
391
|
-
Language["QMake"]
|
392
|
-
elsif /^\s*function[ \w,]+$/.match(data)
|
393
|
-
Language["IDL"]
|
394
|
-
end
|
395
|
-
end
|
115
|
+
end
|
396
116
|
|
397
|
-
|
398
|
-
if /^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i.match(data)
|
399
|
-
Language["XML"]
|
400
|
-
elsif /\w+\s*=\s*/i.match(data)
|
401
|
-
Language["INI"]
|
402
|
-
end
|
403
|
-
end
|
117
|
+
class And
|
404
118
|
|
405
|
-
|
406
|
-
|
407
|
-
Language["q"]
|
408
|
-
elsif /SELECT\s+[\w*,]+\s+FROM/i.match(data) || /(CREATE|ALTER|DROP)\s(DATABASE|SCHEMA|TABLE)/i.match(data)
|
409
|
-
Language["HiveQL"]
|
410
|
-
end
|
119
|
+
def initialize(pats)
|
120
|
+
@pats = pats
|
411
121
|
end
|
412
122
|
|
413
|
-
|
414
|
-
|
415
|
-
Language["Rebol"]
|
416
|
-
elsif /<-|^\s*#/.match(data)
|
417
|
-
Language["R"]
|
418
|
-
end
|
123
|
+
def match(input)
|
124
|
+
return !@pats.any? { |pat| !pat.match(input) }
|
419
125
|
end
|
420
126
|
|
421
|
-
|
422
|
-
if /^\.!|^\.end lit(?:eral)?\b/i.match(data)
|
423
|
-
Language["RUNOFF"]
|
424
|
-
elsif /^\.\\" /.match(data)
|
425
|
-
Language["Roff"]
|
426
|
-
end
|
427
|
-
end
|
127
|
+
end
|
428
128
|
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
else
|
433
|
-
Language["Ren'Py"]
|
434
|
-
end
|
129
|
+
class AlwaysMatch
|
130
|
+
def match(input)
|
131
|
+
return true
|
435
132
|
end
|
133
|
+
end
|
436
134
|
|
437
|
-
|
438
|
-
if /^(use |fn |mod |pub |macro_rules|impl|#!?\[)/.match(data)
|
439
|
-
Language["Rust"]
|
440
|
-
elsif /#include|#pragma\s+(rs|version)|__attribute__/.match(data)
|
441
|
-
Language["RenderScript"]
|
442
|
-
end
|
443
|
-
end
|
135
|
+
class NegativePattern
|
444
136
|
|
445
|
-
|
446
|
-
|
447
|
-
Language["SuperCollider"]
|
448
|
-
elsif /^\s*import (scala|java)\./.match(data) || /^\s*val\s+\w+\s*=/.match(data) || /^\s*class\b/.match(data)
|
449
|
-
Language["Scala"]
|
450
|
-
end
|
137
|
+
def initialize(pat)
|
138
|
+
@pat = pat
|
451
139
|
end
|
452
140
|
|
453
|
-
|
454
|
-
|
455
|
-
#Postgres
|
456
|
-
Language["PLpgSQL"]
|
457
|
-
elsif /(alter module)|(language sql)|(begin( NOT)+ atomic)/i.match(data) || /signal SQLSTATE '[0-9]+'/i.match(data)
|
458
|
-
#IBM db2
|
459
|
-
Language["SQLPL"]
|
460
|
-
elsif /\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)/i.match(data) || /constructor\W+function/i.match(data)
|
461
|
-
#Oracle
|
462
|
-
Language["PLSQL"]
|
463
|
-
elsif ! /begin|boolean|package|exception/i.match(data)
|
464
|
-
#Generic SQL
|
465
|
-
Language["SQL"]
|
466
|
-
end
|
467
|
-
end
|
468
|
-
|
469
|
-
disambiguate ".srt" do |data|
|
470
|
-
if /^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$/.match(data)
|
471
|
-
Language["SubRip Text"]
|
472
|
-
end
|
473
|
-
end
|
474
|
-
|
475
|
-
disambiguate ".t" do |data|
|
476
|
-
if Perl5Regex.match(data)
|
477
|
-
Language["Perl"]
|
478
|
-
elsif Perl6Regex.match(data)
|
479
|
-
Language["Perl 6"]
|
480
|
-
elsif /^\s*%[ \t]+|^\s*var\s+\w+(\s*:\s*\w+)?\s*:=\s*\w+/.match(data)
|
481
|
-
Language["Turing"]
|
482
|
-
end
|
141
|
+
def match(input)
|
142
|
+
return !@pat.match(input)
|
483
143
|
end
|
484
144
|
|
485
|
-
disambiguate ".toc" do |data|
|
486
|
-
if /^## |@no-lib-strip@/.match(data)
|
487
|
-
Language["World of Warcraft Addon Data"]
|
488
|
-
elsif /^\\(contentsline|defcounter|beamer|boolfalse)/.match(data)
|
489
|
-
Language["TeX"]
|
490
|
-
end
|
491
|
-
end
|
492
|
-
|
493
|
-
disambiguate ".ts" do |data|
|
494
|
-
if /<TS\b/.match(data)
|
495
|
-
Language["XML"]
|
496
|
-
else
|
497
|
-
Language["TypeScript"]
|
498
|
-
end
|
499
|
-
end
|
500
|
-
|
501
|
-
disambiguate ".tst" do |data|
|
502
|
-
if (data.include?("gap> "))
|
503
|
-
Language["GAP"]
|
504
|
-
# Heads up - we don't usually write heuristics like this (with no regex match)
|
505
|
-
else
|
506
|
-
Language["Scilab"]
|
507
|
-
end
|
508
|
-
end
|
509
|
-
|
510
|
-
disambiguate ".tsx" do |data|
|
511
|
-
if /^\s*(import.+(from\s+|require\()['"]react|\/\/\/\s*<reference\s)/.match(data)
|
512
|
-
Language["TypeScript"]
|
513
|
-
elsif /^\s*<\?xml\s+version/i.match(data)
|
514
|
-
Language["XML"]
|
515
|
-
end
|
516
|
-
end
|
517
|
-
|
518
|
-
disambiguate ".w" do |data|
|
519
|
-
if (data.include?("&ANALYZE-SUSPEND _UIB-CODE-BLOCK _CUSTOM _DEFINITIONS"))
|
520
|
-
Language["OpenEdge ABL"]
|
521
|
-
elsif /^@(<|\w+\.)/.match(data)
|
522
|
-
Language["CWeb"]
|
523
|
-
end
|
524
|
-
end
|
525
|
-
|
526
|
-
disambiguate ".x" do |data|
|
527
|
-
if /\b(program|version)\s+\w+\s*{|\bunion\s+\w+\s+switch\s*\(/.match(data)
|
528
|
-
Language["RPC"]
|
529
|
-
elsif /^%(end|ctor|hook|group)\b/.match(data)
|
530
|
-
Language["Logos"]
|
531
|
-
elsif /OUTPUT_ARCH\(|OUTPUT_FORMAT\(|SECTIONS/.match(data)
|
532
|
-
Language["Linker Script"]
|
533
|
-
end
|
534
|
-
end
|
535
|
-
|
536
145
|
end
|
537
146
|
end
|