github-linguist 6.4.1 → 7.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +5 -5
  2. data/bin/{linguist → github-linguist} +0 -0
  3. data/grammars/source.abap.json +35 -6
  4. data/grammars/source.apl.json +4 -8
  5. data/grammars/source.ballerina.json +82 -3
  6. data/grammars/source.bdf.json +419 -0
  7. data/grammars/source.c++.json +25 -3
  8. data/grammars/source.c.json +6 -3
  9. data/grammars/source.chapel.json +3 -3
  10. data/grammars/source.coq.json +2 -2
  11. data/grammars/source.cs.json +83 -73
  12. data/grammars/source.csound.json +1 -1
  13. data/grammars/source.dart.json +1 -1
  14. data/grammars/source.elixir.json +112 -137
  15. data/grammars/source.emacs.lisp.json +179 -4
  16. data/grammars/source.figctrl.json +252 -0
  17. data/grammars/source.figfont.json +121 -0
  18. data/grammars/source.fontdir.json +99 -0
  19. data/grammars/source.fstar.json +439 -0
  20. data/grammars/source.hx.json +27 -9
  21. data/grammars/source.j.json +260 -0
  22. data/grammars/source.julia.json +24 -12
  23. data/grammars/source.lisp.json +3 -3
  24. data/grammars/source.mupad.json +1 -1
  25. data/grammars/source.pony.json +3 -3
  26. data/grammars/source.powershell.json +2 -2
  27. data/grammars/source.protobuf.json +87 -5
  28. data/grammars/source.purescript.json +5 -2
  29. data/grammars/source.python.json +17 -12
  30. data/grammars/source.rascal.json +0 -1
  31. data/grammars/source.reg.json +159 -0
  32. data/grammars/source.slice.json +2755 -0
  33. data/grammars/source.solidity.json +2 -2
  34. data/grammars/source.ts.json +225 -132
  35. data/grammars/source.tsx.json +235 -142
  36. data/grammars/source.viml.json +4 -4
  37. data/grammars/source.xlfd.json +462 -0
  38. data/grammars/source.yasnippet.json +387 -0
  39. data/grammars/text.elixir.json +17 -5
  40. data/grammars/text.html.basic.json +622 -2264
  41. data/grammars/text.html.elixir.json +10 -1
  42. data/grammars/text.html.php.blade.json +7 -3
  43. data/grammars/version +1 -0
  44. data/lib/linguist.rb +2 -0
  45. data/lib/linguist/VERSION +1 -1
  46. data/lib/linguist/generated.rb +14 -1
  47. data/lib/linguist/heuristics.rb +77 -468
  48. data/lib/linguist/heuristics.yml +404 -0
  49. data/lib/linguist/languages.json +1 -1
  50. data/lib/linguist/languages.yml +86 -6
  51. data/lib/linguist/samples.json +2624 -483
  52. data/lib/linguist/strategy/xml.rb +30 -0
  53. metadata +20 -7
@@ -8,5 +8,14 @@
8
8
  {
9
9
  "include": "text.html.basic"
10
10
  }
11
- ]
11
+ ],
12
+ "injections": {
13
+ "R:text.html.elixir meta.tag meta.attribute string.quoted": {
14
+ "patterns": [
15
+ {
16
+ "include": "text.elixir"
17
+ }
18
+ ]
19
+ }
20
+ }
12
21
  }
@@ -3367,7 +3367,7 @@
3367
3367
  }
3368
3368
  },
3369
3369
  "injections": {
3370
- "text.html.php.blade - (meta.embedded | meta.tag | comment.block.blade), L:(text.html.php.blade meta.tag - comment.block.blade), L:(source.js.embedded.html - comment.block.blade)": {
3370
+ "text.html.php.blade - (meta.embedded | meta.tag | comment.block.blade), L:(text.html.php.blade meta.tag - (comment.block.blade | meta.embedded.block.blade)), L:(source.js.embedded.html - (comment.block.blade | meta.embedded.block.blade))": {
3371
3371
  "patterns": [
3372
3372
  {
3373
3373
  "name": "comment.block.blade",
@@ -3501,6 +3501,10 @@
3501
3501
  }
3502
3502
  }
3503
3503
  },
3504
+ {
3505
+ "name": "comment.blade",
3506
+ "match": "@(?={{{|{{|{!!|@\\w+(?:::\\w+)?)"
3507
+ },
3504
3508
  {
3505
3509
  "name": "meta.function.echo.blade",
3506
3510
  "contentName": "source.php",
@@ -3677,7 +3681,7 @@
3677
3681
  {
3678
3682
  "name": "meta.directive.custom.blade",
3679
3683
  "contentName": "source.php",
3680
- "begin": "(?x)\n(?\u003c![A-Za-z0-9_@]) # Prepended @ or literal character escapes the sequence\n(\n @\n \\w+(?:::w+)? # Any number/letter sequence, can also be postfixed by ::someOtherString\n [\\t ]* # Whitespace between name and parentheses\n)\n(\\() # Followed by opening parentheses",
3684
+ "begin": "(?x)\n(?\u003c![A-Za-z0-9_@]) # Prepended @ or literal character escapes the sequence\n(\n @\n \\w+(?:::\\w+)? # Any number/letter sequence, can also be postfixed by ::someOtherString\n [\\t ]* # Whitespace between name and parentheses\n)\n(\\() # Followed by opening parentheses",
3681
3685
  "end": "\\)",
3682
3686
  "patterns": [
3683
3687
  {
@@ -3700,7 +3704,7 @@
3700
3704
  },
3701
3705
  {
3702
3706
  "name": "entity.name.function.blade",
3703
- "match": "(?x)\n(?\u003c![A-Za-z0-9_@]) # Prepended @ or literal character escapes the sequence\n@\n\\w+(?:::w+)? # Any number/letter sequence, can also be postfixed by ::someOtherString\n\\b # Bounded by word boundary"
3707
+ "match": "(?x)\n(?\u003c![A-Za-z0-9_@]) # Prepended @ or literal character escapes the sequence\n@\n\\w+(?:::\\w+)? # Any number/letter sequence, can also be postfixed by ::someOtherString\n\\b # Bounded by word boundary"
3704
3708
  },
3705
3709
  {
3706
3710
  "begin": "(^\\s*)(?=\u003c\\?(?![^?]*\\?\u003e))",
data/grammars/version ADDED
@@ -0,0 +1 @@
1
+ 7.0.0
data/lib/linguist.rb CHANGED
@@ -7,6 +7,7 @@ require 'linguist/repository'
7
7
  require 'linguist/samples'
8
8
  require 'linguist/shebang'
9
9
  require 'linguist/version'
10
+ require 'linguist/strategy/xml'
10
11
 
11
12
  class << Linguist
12
13
  # Public: Detects the Language of the blob.
@@ -62,6 +63,7 @@ class << Linguist
62
63
  Linguist::Strategy::Filename,
63
64
  Linguist::Shebang,
64
65
  Linguist::Strategy::Extension,
66
+ Linguist::Strategy::XML,
65
67
  Linguist::Heuristics,
66
68
  Linguist::Classifier
67
69
  ]
data/lib/linguist/VERSION CHANGED
@@ -1 +1 @@
1
- 6.4.1
1
+ 7.0.0
@@ -89,7 +89,8 @@ module Linguist
89
89
  generated_yarn_lock? ||
90
90
  generated_grpc_cpp? ||
91
91
  generated_dart? ||
92
- generated_perl_ppport_header?
92
+ generated_perl_ppport_header? ||
93
+ generated_gamemakerstudio?
93
94
  end
94
95
 
95
96
  # Internal: Is the blob an Xcode file?
@@ -577,5 +578,17 @@ module Linguist
577
578
  def generated_graphql_relay?
578
579
  !!name.match(/__generated__\//)
579
580
  end
581
+
582
+ # Internal: Is this a generated Game Maker Studio (2) metadata file?
583
+ #
584
+ # All Game Maker Studio 2 generated files will be JSON, .yy or .yyp, and have
585
+ # a part that looks like "modelName: GMname" on the 3rd line
586
+ #
587
+ # Return true or false
588
+ def generated_gamemakerstudio?
589
+ return false unless ['.yy', '.yyp'].include? extname
590
+ return false unless lines.count > 3
591
+ return lines[2].match(/\"modelName\"\:\s*\"GM/)
592
+ end
580
593
  end
581
594
  end
@@ -1,3 +1,5 @@
1
+ require 'yaml'
2
+
1
3
  module Linguist
2
4
  # A collection of simple heuristics that can be used to better analyze languages.
3
5
  class Heuristics
@@ -17,6 +19,7 @@ module Linguist
17
19
  # Returns an Array of languages, or empty if none matched or were inconclusive.
18
20
  def self.call(blob, candidates)
19
21
  return [] if blob.symlink?
22
+ self.load()
20
23
 
21
24
  data = blob.data[0...HEURISTICS_CONSIDER_BYTES]
22
25
 
@@ -29,509 +32,115 @@ module Linguist
29
32
  [] # No heuristics matched
30
33
  end
31
34
 
32
- # Internal: Define a new heuristic.
33
- #
34
- # exts_and_langs - String names of file extensions and languages to
35
- # disambiguate.
36
- # heuristic - Block which takes data as an argument and returns a Language or nil.
37
- #
38
- # Examples
39
- #
40
- # disambiguate ".pm" do |data|
41
- # if data.include?("use strict")
42
- # Language["Perl"]
43
- # elsif /^[^#]+:-/.match(data)
44
- # Language["Prolog"]
45
- # end
46
- # end
47
- #
48
- def self.disambiguate(*exts_and_langs, &heuristic)
49
- @heuristics << new(exts_and_langs, &heuristic)
50
- end
51
-
52
- # Internal: Array of defined heuristics
53
- @heuristics = []
54
-
55
- # Internal
56
- def initialize(exts_and_langs, &heuristic)
57
- @exts_and_langs, @candidates = exts_and_langs.partition {|e| e =~ /\A\./}
58
- @heuristic = heuristic
59
- end
60
-
61
- # Internal: Check if this heuristic matches the candidate filenames or
62
- # languages.
63
- def matches?(filename, candidates)
64
- filename = filename.downcase
65
- candidates = candidates.compact.map(&:name)
66
- @exts_and_langs.any? { |ext| filename.end_with?(ext) } ||
67
- (candidates.any? &&
68
- (@candidates - candidates == [] &&
69
- candidates - @candidates == []))
70
- end
71
-
72
- # Internal: Perform the heuristic
73
- def call(data)
74
- @heuristic.call(data)
75
- end
76
-
77
- # Common heuristics
78
- CPlusPlusRegex = Regexp.union(
79
- /^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>/,
80
- /^\s*template\s*</,
81
- /^[ \t]*try/,
82
- /^[ \t]*catch\s*\(/,
83
- /^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+/,
84
- /^[ \t]*(private|public|protected):$/,
85
- /std::\w+/)
86
- ObjectiveCRegex = /^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])/
87
- Perl5Regex = /\buse\s+(?:strict\b|v?5\.)/
88
- Perl6Regex = /^\s*(?:use\s+v6\b|\bmodule\b|\b(?:my\s+)?class\b)/
89
-
90
- disambiguate ".as" do |data|
91
- if /^\s*(package\s+[a-z0-9_\.]+|import\s+[a-zA-Z0-9_\.]+;|class\s+[A-Za-z0-9_]+\s+extends\s+[A-Za-z0-9_]+)/.match(data)
92
- Language["ActionScript"]
93
- else
94
- Language["AngelScript"]
95
- end
96
- end
97
-
98
- disambiguate ".asc" do |data|
99
- if /^(----[- ]BEGIN|ssh-(rsa|dss)) /.match(data)
100
- Language["Public Key"]
101
- elsif /^[=-]+(\s|\n)|{{[A-Za-z]/.match(data)
102
- Language["AsciiDoc"]
103
- elsif /^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])/.match(data)
104
- Language["AGS Script"]
105
- end
106
- end
107
-
108
- disambiguate ".bb" do |data|
109
- if /^\s*; /.match(data) || data.include?("End Function")
110
- Language["BlitzBasic"]
111
- elsif /^\s*(# |include|require)\b/.match(data)
112
- Language["BitBake"]
113
- end
114
- end
115
-
116
- disambiguate ".builds" do |data|
117
- if /^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i.match(data)
118
- Language["XML"]
119
- else
120
- Language["Text"]
121
- end
122
- end
123
-
124
- disambiguate ".ch" do |data|
125
- if /^\s*#\s*(if|ifdef|ifndef|define|command|xcommand|translate|xtranslate|include|pragma|undef)\b/i.match(data)
126
- Language["xBase"]
127
- end
128
- end
129
-
130
- disambiguate ".cl" do |data|
131
- if /^\s*\((defun|in-package|defpackage) /i.match(data)
132
- Language["Common Lisp"]
133
- elsif /^class/x.match(data)
134
- Language["Cool"]
135
- elsif /\/\* |\/\/ |^\}/.match(data)
136
- Language["OpenCL"]
137
- end
138
- end
139
-
140
- disambiguate ".cls" do |data|
141
- if /\\\w+{/.match(data)
142
- Language["TeX"]
143
- end
144
- end
145
-
146
- disambiguate ".cs" do |data|
147
- if /![\w\s]+methodsFor: /.match(data)
148
- Language["Smalltalk"]
149
- elsif /^\s*namespace\s*[\w\.]+\s*{/.match(data) || /^\s*\/\//.match(data)
150
- Language["C#"]
151
- end
152
- end
153
-
154
- disambiguate ".d" do |data|
155
- # see http://dlang.org/spec/grammar
156
- # ModuleDeclaration | ImportDeclaration | FuncDeclaration | unittest
157
- if /^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*{[^}]*}|unittest\s*(?:\(.*\))?\s*{[^}]*}/.match(data)
158
- Language["D"]
159
- # see http://dtrace.org/guide/chp-prog.html, http://dtrace.org/guide/chp-profile.html, http://dtrace.org/guide/chp-opt.html
160
- elsif /^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+{[^}]*}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)/.match(data)
161
- Language["DTrace"]
162
- # path/target : dependency \
163
- # target : \
164
- # : dependency
165
- # path/file.ext1 : some/path/../file.ext2
166
- elsif /([\/\\].*:\s+.*\s\\$|: \\$|^ : |^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)/.match(data)
167
- Language["Makefile"]
168
- end
169
- end
170
-
171
- disambiguate ".ecl" do |data|
172
- if /^[^#]+:-/.match(data)
173
- Language["ECLiPSe"]
174
- elsif data.include?(":=")
175
- Language["ECL"]
176
- end
177
- end
178
-
179
- disambiguate ".es" do |data|
180
- if /^\s*(?:%%|main\s*\(.*?\)\s*->)/.match(data)
181
- Language["Erlang"]
182
- elsif /(?:\/\/|("|')use strict\1|export\s+default\s|\/\*.*?\*\/)/m.match(data)
183
- Language["JavaScript"]
184
- end
185
- end
186
-
187
- fortran_rx = /^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)/i
188
-
189
- disambiguate ".f" do |data|
190
- if /^: /.match(data)
191
- Language["Forth"]
192
- elsif data.include?("flowop")
193
- Language["Filebench WML"]
194
- elsif fortran_rx.match(data)
195
- Language["Fortran"]
196
- end
197
- end
198
-
199
- disambiguate ".for" do |data|
200
- if /^: /.match(data)
201
- Language["Forth"]
202
- elsif fortran_rx.match(data)
203
- Language["Fortran"]
204
- end
205
- end
206
-
207
- disambiguate ".fr" do |data|
208
- if /^(: |also |new-device|previous )/.match(data)
209
- Language["Forth"]
210
- elsif /^\s*(import|module|package|data|type) /.match(data)
211
- Language["Frege"]
212
- else
213
- Language["Text"]
214
- end
215
- end
216
-
217
- disambiguate ".fs" do |data|
218
- if /^(: |new-device)/.match(data)
219
- Language["Forth"]
220
- elsif /^\s*(#light|import|let|module|namespace|open|type)/.match(data)
221
- Language["F#"]
222
- elsif /^\s*(#version|precision|uniform|varying|vec[234])/.match(data)
223
- Language["GLSL"]
224
- elsif /#include|#pragma\s+(rs|version)|__attribute__/.match(data)
225
- Language["Filterscript"]
226
- end
227
- end
228
-
229
- disambiguate ".gs" do |data|
230
- Language["Gosu"] if /^uses java\./.match(data)
231
- end
232
-
233
- disambiguate ".h" do |data|
234
- if ObjectiveCRegex.match(data)
235
- Language["Objective-C"]
236
- elsif CPlusPlusRegex.match(data)
237
- Language["C++"]
238
- end
239
- end
240
-
241
- disambiguate ".inc" do |data|
242
- if /^<\?(?:php)?/.match(data)
243
- Language["PHP"]
244
- elsif /^\s*#(declare|local|macro|while)\s/.match(data)
245
- Language["POV-Ray SDL"]
246
- end
247
- end
248
-
249
- disambiguate ".l" do |data|
250
- if /\(def(un|macro)\s/.match(data)
251
- Language["Common Lisp"]
252
- elsif /^(%[%{}]xs|<.*>)/.match(data)
253
- Language["Lex"]
254
- elsif /^\.[a-z][a-z](\s|$)/i.match(data)
255
- Language["Roff"]
256
- elsif /^\((de|class|rel|code|data|must)\s/.match(data)
257
- Language["PicoLisp"]
35
+ # Internal: Load heuristics from 'heuristics.yml'.
36
+ def self.load()
37
+ if @heuristics.any?
38
+ return
258
39
  end
259
- end
260
40
 
261
- disambiguate ".ls" do |data|
262
- if /^\s*package\s*[\w\.\/\*\s]*\s*{/.match(data)
263
- Language["LoomScript"]
264
- else
265
- Language["LiveScript"]
266
- end
267
- end
41
+ data = YAML.load_file(File.expand_path("../heuristics.yml", __FILE__))
42
+ named_patterns = data['named_patterns'].map { |k,v| [k, self.to_regex(v)] }.to_h
268
43
 
269
- disambiguate ".lsp", ".lisp" do |data|
270
- if /^\s*\((defun|in-package|defpackage) /i.match(data)
271
- Language["Common Lisp"]
272
- elsif /^\s*\(define /.match(data)
273
- Language["NewLisp"]
274
- end
275
- end
276
-
277
- disambiguate ".m" do |data|
278
- if ObjectiveCRegex.match(data)
279
- Language["Objective-C"]
280
- elsif data.include?(":- module")
281
- Language["Mercury"]
282
- elsif /^: /.match(data)
283
- Language["MUF"]
284
- elsif /^\s*;/.match(data)
285
- Language["M"]
286
- elsif /\*\)$/.match(data)
287
- Language["Mathematica"]
288
- elsif /^\s*%/.match(data)
289
- Language["Matlab"]
290
- elsif /^\w+\s*:\s*module\s*{/.match(data)
291
- Language["Limbo"]
292
- end
293
- end
294
-
295
- disambiguate ".md" do |data|
296
- if /(^[-a-z0-9=#!\*\[|>])|<\//i.match(data) || data.empty?
297
- Language["Markdown"]
298
- elsif /^(;;|\(define_)/.match(data)
299
- Language["GCC Machine Description"]
300
- else
301
- Language["Markdown"]
302
- end
303
- end
304
-
305
- disambiguate ".ml" do |data|
306
- if /(^\s*module)|let rec |match\s+(\S+\s)+with/.match(data)
307
- Language["OCaml"]
308
- elsif /=> |case\s+(\S+\s)+of/.match(data)
309
- Language["Standard ML"]
44
+ data['disambiguations'].each do |disambiguation|
45
+ exts = disambiguation['extensions']
46
+ rules = disambiguation['rules']
47
+ rules.map! do |rule|
48
+ rule['pattern'] = self.parse_rule(named_patterns, rule)
49
+ rule
50
+ end
51
+ @heuristics << new(exts, rules)
310
52
  end
311
53
  end
312
54
 
313
- disambiguate ".mod" do |data|
314
- if data.include?('<!ENTITY ')
315
- Language["XML"]
316
- elsif /^\s*MODULE [\w\.]+;/i.match(data) || /^\s*END [\w\.]+;/i.match(data)
317
- Language["Modula-2"]
55
+ def self.parse_rule(named_patterns, rule)
56
+ if !rule['and'].nil?
57
+ rules = rule['and'].map { |block| self.parse_rule(named_patterns, block) }
58
+ return And.new(rules)
59
+ elsif !rule['pattern'].nil?
60
+ return self.to_regex(rule['pattern'])
61
+ elsif !rule['negative_pattern'].nil?
62
+ pat = self.to_regex(rule['negative_pattern'])
63
+ return NegativePattern.new(pat)
64
+ elsif !rule['named_pattern'].nil?
65
+ return named_patterns[rule['named_pattern']]
318
66
  else
319
- [Language["Linux Kernel Module"], Language["AMPL"]]
67
+ return AlwaysMatch.new()
320
68
  end
321
69
  end
322
70
 
323
- disambiguate ".ms" do |data|
324
- if /^[.'][a-z][a-z](\s|$)/i.match(data)
325
- Language["Roff"]
326
- elsif /(?<!\S)\.(include|globa?l)\s/.match(data) || /(?<!\/\*)(\A|\n)\s*\.[A-Za-z][_A-Za-z0-9]*:/.match(data.gsub(/"([^\\"]|\\.)*"|'([^\\']|\\.)*'|\\\s*(?:--.*)?\n/, ""))
327
- Language["Unix Assembly"]
71
+ # Internal: Converts a string or array of strings to regexp
72
+ #
73
+ # str: string or array of strings. If it is an array of strings,
74
+ # Regexp.union will be used.
75
+ def self.to_regex(str)
76
+ if str.kind_of?(Array)
77
+ Regexp.union(str.map { |s| Regexp.new(s) })
328
78
  else
329
- Language["MAXScript"]
330
- end
331
- end
332
-
333
- disambiguate ".n" do |data|
334
- if /^[.']/.match(data)
335
- Language["Roff"]
336
- elsif /^(module|namespace|using)\s/.match(data)
337
- Language["Nemerle"]
79
+ Regexp.new(str)
338
80
  end
339
81
  end
340
82
 
341
- disambiguate ".ncl" do |data|
342
- if /^\s*<\?xml\s+version/i.match(data)
343
- Language["XML"]
344
- elsif data.include?("THE_TITLE")
345
- Language["Text"]
346
- end
347
- end
348
-
349
- disambiguate ".nl" do |data|
350
- if /^(b|g)[0-9]+ /.match(data)
351
- Language["NL"]
352
- else
353
- Language["NewLisp"]
354
- end
355
- end
83
+ # Internal: Array of defined heuristics
84
+ @heuristics = []
356
85
 
357
- disambiguate ".php" do |data|
358
- if data.include?("<?hh")
359
- Language["Hack"]
360
- elsif /<\?[^h]/.match(data)
361
- Language["PHP"]
362
- end
86
+ # Internal
87
+ def initialize(exts_and_langs, rules)
88
+ @exts_and_langs = exts_and_langs
89
+ @rules = rules
363
90
  end
364
91
 
365
- disambiguate ".pl" do |data|
366
- if /^[^#]*:-/.match(data)
367
- Language["Prolog"]
368
- elsif Perl5Regex.match(data)
369
- Language["Perl"]
370
- elsif Perl6Regex.match(data)
371
- Language["Perl 6"]
372
- end
92
+ # Internal: Check if this heuristic matches the candidate filenames or
93
+ # languages.
94
+ def matches?(filename, candidates)
95
+ filename = filename.downcase
96
+ candidates = candidates.compact.map(&:name)
97
+ @exts_and_langs.any? { |ext| filename.end_with?(ext) }
373
98
  end
374
99
 
375
- disambiguate ".pm" do |data|
376
- if Perl5Regex.match(data)
377
- Language["Perl"]
378
- elsif Perl6Regex.match(data)
379
- Language["Perl 6"]
380
- elsif /^\s*\/\* XPM \*\//.match(data)
381
- Language["XPM"]
100
+ # Internal: Perform the heuristic
101
+ def call(data)
102
+ matched = @rules.find do |rule|
103
+ rule['pattern'].match(data)
104
+ end
105
+ if !matched.nil?
106
+ languages = matched['language']
107
+ if languages.is_a?(Array)
108
+ languages.map{ |l| Language[l] }
109
+ else
110
+ Language[languages]
111
+ end
382
112
  end
383
113
  end
384
114
 
385
- disambiguate ".pro" do |data|
386
- if /^[^\[#]+:-/.match(data)
387
- Language["Prolog"]
388
- elsif data.include?("last_client=")
389
- Language["INI"]
390
- elsif data.include?("HEADERS") && data.include?("SOURCES")
391
- Language["QMake"]
392
- elsif /^\s*function[ \w,]+$/.match(data)
393
- Language["IDL"]
394
- end
395
- end
115
+ end
396
116
 
397
- disambiguate ".props" do |data|
398
- if /^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i.match(data)
399
- Language["XML"]
400
- elsif /\w+\s*=\s*/i.match(data)
401
- Language["INI"]
402
- end
403
- end
117
+ class And
404
118
 
405
- disambiguate ".q" do |data|
406
- if /[A-Z.][\w.]*:{/i.match(data) || /(^|\n)\\(cd?|d|l|p|ts?) /.match(data)
407
- Language["q"]
408
- elsif /SELECT\s+[\w*,]+\s+FROM/i.match(data) || /(CREATE|ALTER|DROP)\s(DATABASE|SCHEMA|TABLE)/i.match(data)
409
- Language["HiveQL"]
410
- end
119
+ def initialize(pats)
120
+ @pats = pats
411
121
  end
412
122
 
413
- disambiguate ".r" do |data|
414
- if /\bRebol\b/i.match(data)
415
- Language["Rebol"]
416
- elsif /<-|^\s*#/.match(data)
417
- Language["R"]
418
- end
123
+ def match(input)
124
+ return !@pats.any? { |pat| !pat.match(input) }
419
125
  end
420
126
 
421
- disambiguate ".rno" do |data|
422
- if /^\.!|^\.end lit(?:eral)?\b/i.match(data)
423
- Language["RUNOFF"]
424
- elsif /^\.\\" /.match(data)
425
- Language["Roff"]
426
- end
427
- end
127
+ end
428
128
 
429
- disambiguate ".rpy" do |data|
430
- if /(^(import|from|class|def)\s)/m.match(data)
431
- Language["Python"]
432
- else
433
- Language["Ren'Py"]
434
- end
129
+ class AlwaysMatch
130
+ def match(input)
131
+ return true
435
132
  end
133
+ end
436
134
 
437
- disambiguate ".rs" do |data|
438
- if /^(use |fn |mod |pub |macro_rules|impl|#!?\[)/.match(data)
439
- Language["Rust"]
440
- elsif /#include|#pragma\s+(rs|version)|__attribute__/.match(data)
441
- Language["RenderScript"]
442
- end
443
- end
135
+ class NegativePattern
444
136
 
445
- disambiguate ".sc" do |data|
446
- if /\^(this|super)\./.match(data) || /^\s*(\+|\*)\s*\w+\s*{/.match(data) || /^\s*~\w+\s*=\./.match(data)
447
- Language["SuperCollider"]
448
- elsif /^\s*import (scala|java)\./.match(data) || /^\s*val\s+\w+\s*=/.match(data) || /^\s*class\b/.match(data)
449
- Language["Scala"]
450
- end
137
+ def initialize(pat)
138
+ @pat = pat
451
139
  end
452
140
 
453
- disambiguate ".sql" do |data|
454
- if /^\\i\b|AS \$\$|LANGUAGE '?plpgsql'?/i.match(data) || /SECURITY (DEFINER|INVOKER)/i.match(data) || /BEGIN( WORK| TRANSACTION)?;/i.match(data)
455
- #Postgres
456
- Language["PLpgSQL"]
457
- elsif /(alter module)|(language sql)|(begin( NOT)+ atomic)/i.match(data) || /signal SQLSTATE '[0-9]+'/i.match(data)
458
- #IBM db2
459
- Language["SQLPL"]
460
- elsif /\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)/i.match(data) || /constructor\W+function/i.match(data)
461
- #Oracle
462
- Language["PLSQL"]
463
- elsif ! /begin|boolean|package|exception/i.match(data)
464
- #Generic SQL
465
- Language["SQL"]
466
- end
467
- end
468
-
469
- disambiguate ".srt" do |data|
470
- if /^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$/.match(data)
471
- Language["SubRip Text"]
472
- end
473
- end
474
-
475
- disambiguate ".t" do |data|
476
- if Perl5Regex.match(data)
477
- Language["Perl"]
478
- elsif Perl6Regex.match(data)
479
- Language["Perl 6"]
480
- elsif /^\s*%[ \t]+|^\s*var\s+\w+(\s*:\s*\w+)?\s*:=\s*\w+/.match(data)
481
- Language["Turing"]
482
- end
141
+ def match(input)
142
+ return !@pat.match(input)
483
143
  end
484
144
 
485
- disambiguate ".toc" do |data|
486
- if /^## |@no-lib-strip@/.match(data)
487
- Language["World of Warcraft Addon Data"]
488
- elsif /^\\(contentsline|defcounter|beamer|boolfalse)/.match(data)
489
- Language["TeX"]
490
- end
491
- end
492
-
493
- disambiguate ".ts" do |data|
494
- if /<TS\b/.match(data)
495
- Language["XML"]
496
- else
497
- Language["TypeScript"]
498
- end
499
- end
500
-
501
- disambiguate ".tst" do |data|
502
- if (data.include?("gap> "))
503
- Language["GAP"]
504
- # Heads up - we don't usually write heuristics like this (with no regex match)
505
- else
506
- Language["Scilab"]
507
- end
508
- end
509
-
510
- disambiguate ".tsx" do |data|
511
- if /^\s*(import.+(from\s+|require\()['"]react|\/\/\/\s*<reference\s)/.match(data)
512
- Language["TypeScript"]
513
- elsif /^\s*<\?xml\s+version/i.match(data)
514
- Language["XML"]
515
- end
516
- end
517
-
518
- disambiguate ".w" do |data|
519
- if (data.include?("&ANALYZE-SUSPEND _UIB-CODE-BLOCK _CUSTOM _DEFINITIONS"))
520
- Language["OpenEdge ABL"]
521
- elsif /^@(<|\w+\.)/.match(data)
522
- Language["CWeb"]
523
- end
524
- end
525
-
526
- disambiguate ".x" do |data|
527
- if /\b(program|version)\s+\w+\s*{|\bunion\s+\w+\s+switch\s*\(/.match(data)
528
- Language["RPC"]
529
- elsif /^%(end|ctor|hook|group)\b/.match(data)
530
- Language["Logos"]
531
- elsif /OUTPUT_ARCH\(|OUTPUT_FORMAT\(|SECTIONS/.match(data)
532
- Language["Linker Script"]
533
- end
534
- end
535
-
536
145
  end
537
146
  end