github-linguist 6.4.1 → 7.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +5 -5
  2. data/bin/{linguist → github-linguist} +0 -0
  3. data/grammars/source.abap.json +35 -6
  4. data/grammars/source.apl.json +4 -8
  5. data/grammars/source.ballerina.json +82 -3
  6. data/grammars/source.bdf.json +419 -0
  7. data/grammars/source.c++.json +25 -3
  8. data/grammars/source.c.json +6 -3
  9. data/grammars/source.chapel.json +3 -3
  10. data/grammars/source.coq.json +2 -2
  11. data/grammars/source.cs.json +83 -73
  12. data/grammars/source.csound.json +1 -1
  13. data/grammars/source.dart.json +1 -1
  14. data/grammars/source.elixir.json +112 -137
  15. data/grammars/source.emacs.lisp.json +179 -4
  16. data/grammars/source.figctrl.json +252 -0
  17. data/grammars/source.figfont.json +121 -0
  18. data/grammars/source.fontdir.json +99 -0
  19. data/grammars/source.fstar.json +439 -0
  20. data/grammars/source.hx.json +27 -9
  21. data/grammars/source.j.json +260 -0
  22. data/grammars/source.julia.json +24 -12
  23. data/grammars/source.lisp.json +3 -3
  24. data/grammars/source.mupad.json +1 -1
  25. data/grammars/source.pony.json +3 -3
  26. data/grammars/source.powershell.json +2 -2
  27. data/grammars/source.protobuf.json +87 -5
  28. data/grammars/source.purescript.json +5 -2
  29. data/grammars/source.python.json +17 -12
  30. data/grammars/source.rascal.json +0 -1
  31. data/grammars/source.reg.json +159 -0
  32. data/grammars/source.slice.json +2755 -0
  33. data/grammars/source.solidity.json +2 -2
  34. data/grammars/source.ts.json +225 -132
  35. data/grammars/source.tsx.json +235 -142
  36. data/grammars/source.viml.json +4 -4
  37. data/grammars/source.xlfd.json +462 -0
  38. data/grammars/source.yasnippet.json +387 -0
  39. data/grammars/text.elixir.json +17 -5
  40. data/grammars/text.html.basic.json +622 -2264
  41. data/grammars/text.html.elixir.json +10 -1
  42. data/grammars/text.html.php.blade.json +7 -3
  43. data/grammars/version +1 -0
  44. data/lib/linguist.rb +2 -0
  45. data/lib/linguist/VERSION +1 -1
  46. data/lib/linguist/generated.rb +14 -1
  47. data/lib/linguist/heuristics.rb +77 -468
  48. data/lib/linguist/heuristics.yml +404 -0
  49. data/lib/linguist/languages.json +1 -1
  50. data/lib/linguist/languages.yml +86 -6
  51. data/lib/linguist/samples.json +2624 -483
  52. data/lib/linguist/strategy/xml.rb +30 -0
  53. metadata +20 -7
@@ -8,5 +8,14 @@
8
8
  {
9
9
  "include": "text.html.basic"
10
10
  }
11
- ]
11
+ ],
12
+ "injections": {
13
+ "R:text.html.elixir meta.tag meta.attribute string.quoted": {
14
+ "patterns": [
15
+ {
16
+ "include": "text.elixir"
17
+ }
18
+ ]
19
+ }
20
+ }
12
21
  }
@@ -3367,7 +3367,7 @@
3367
3367
  }
3368
3368
  },
3369
3369
  "injections": {
3370
- "text.html.php.blade - (meta.embedded | meta.tag | comment.block.blade), L:(text.html.php.blade meta.tag - comment.block.blade), L:(source.js.embedded.html - comment.block.blade)": {
3370
+ "text.html.php.blade - (meta.embedded | meta.tag | comment.block.blade), L:(text.html.php.blade meta.tag - (comment.block.blade | meta.embedded.block.blade)), L:(source.js.embedded.html - (comment.block.blade | meta.embedded.block.blade))": {
3371
3371
  "patterns": [
3372
3372
  {
3373
3373
  "name": "comment.block.blade",
@@ -3501,6 +3501,10 @@
3501
3501
  }
3502
3502
  }
3503
3503
  },
3504
+ {
3505
+ "name": "comment.blade",
3506
+ "match": "@(?={{{|{{|{!!|@\\w+(?:::\\w+)?)"
3507
+ },
3504
3508
  {
3505
3509
  "name": "meta.function.echo.blade",
3506
3510
  "contentName": "source.php",
@@ -3677,7 +3681,7 @@
3677
3681
  {
3678
3682
  "name": "meta.directive.custom.blade",
3679
3683
  "contentName": "source.php",
3680
- "begin": "(?x)\n(?\u003c![A-Za-z0-9_@]) # Prepended @ or literal character escapes the sequence\n(\n @\n \\w+(?:::w+)? # Any number/letter sequence, can also be postfixed by ::someOtherString\n [\\t ]* # Whitespace between name and parentheses\n)\n(\\() # Followed by opening parentheses",
3684
+ "begin": "(?x)\n(?\u003c![A-Za-z0-9_@]) # Prepended @ or literal character escapes the sequence\n(\n @\n \\w+(?:::\\w+)? # Any number/letter sequence, can also be postfixed by ::someOtherString\n [\\t ]* # Whitespace between name and parentheses\n)\n(\\() # Followed by opening parentheses",
3681
3685
  "end": "\\)",
3682
3686
  "patterns": [
3683
3687
  {
@@ -3700,7 +3704,7 @@
3700
3704
  },
3701
3705
  {
3702
3706
  "name": "entity.name.function.blade",
3703
- "match": "(?x)\n(?\u003c![A-Za-z0-9_@]) # Prepended @ or literal character escapes the sequence\n@\n\\w+(?:::w+)? # Any number/letter sequence, can also be postfixed by ::someOtherString\n\\b # Bounded by word boundary"
3707
+ "match": "(?x)\n(?\u003c![A-Za-z0-9_@]) # Prepended @ or literal character escapes the sequence\n@\n\\w+(?:::\\w+)? # Any number/letter sequence, can also be postfixed by ::someOtherString\n\\b # Bounded by word boundary"
3704
3708
  },
3705
3709
  {
3706
3710
  "begin": "(^\\s*)(?=\u003c\\?(?![^?]*\\?\u003e))",
data/grammars/version ADDED
@@ -0,0 +1 @@
1
+ 7.0.0
data/lib/linguist.rb CHANGED
@@ -7,6 +7,7 @@ require 'linguist/repository'
7
7
  require 'linguist/samples'
8
8
  require 'linguist/shebang'
9
9
  require 'linguist/version'
10
+ require 'linguist/strategy/xml'
10
11
 
11
12
  class << Linguist
12
13
  # Public: Detects the Language of the blob.
@@ -62,6 +63,7 @@ class << Linguist
62
63
  Linguist::Strategy::Filename,
63
64
  Linguist::Shebang,
64
65
  Linguist::Strategy::Extension,
66
+ Linguist::Strategy::XML,
65
67
  Linguist::Heuristics,
66
68
  Linguist::Classifier
67
69
  ]
data/lib/linguist/VERSION CHANGED
@@ -1 +1 @@
1
- 6.4.1
1
+ 7.0.0
@@ -89,7 +89,8 @@ module Linguist
89
89
  generated_yarn_lock? ||
90
90
  generated_grpc_cpp? ||
91
91
  generated_dart? ||
92
- generated_perl_ppport_header?
92
+ generated_perl_ppport_header? ||
93
+ generated_gamemakerstudio?
93
94
  end
94
95
 
95
96
  # Internal: Is the blob an Xcode file?
@@ -577,5 +578,17 @@ module Linguist
577
578
  def generated_graphql_relay?
578
579
  !!name.match(/__generated__\//)
579
580
  end
581
+
582
+ # Internal: Is this a generated Game Maker Studio (2) metadata file?
583
+ #
584
+ # All Game Maker Studio 2 generated files will be JSON, .yy or .yyp, and have
585
+ # a part that looks like "modelName: GMname" on the 3rd line
586
+ #
587
+ # Return true or false
588
+ def generated_gamemakerstudio?
589
+ return false unless ['.yy', '.yyp'].include? extname
590
+ return false unless lines.count > 3
591
+ return lines[2].match(/\"modelName\"\:\s*\"GM/)
592
+ end
580
593
  end
581
594
  end
@@ -1,3 +1,5 @@
1
+ require 'yaml'
2
+
1
3
  module Linguist
2
4
  # A collection of simple heuristics that can be used to better analyze languages.
3
5
  class Heuristics
@@ -17,6 +19,7 @@ module Linguist
17
19
  # Returns an Array of languages, or empty if none matched or were inconclusive.
18
20
  def self.call(blob, candidates)
19
21
  return [] if blob.symlink?
22
+ self.load()
20
23
 
21
24
  data = blob.data[0...HEURISTICS_CONSIDER_BYTES]
22
25
 
@@ -29,509 +32,115 @@ module Linguist
29
32
  [] # No heuristics matched
30
33
  end
31
34
 
32
- # Internal: Define a new heuristic.
33
- #
34
- # exts_and_langs - String names of file extensions and languages to
35
- # disambiguate.
36
- # heuristic - Block which takes data as an argument and returns a Language or nil.
37
- #
38
- # Examples
39
- #
40
- # disambiguate ".pm" do |data|
41
- # if data.include?("use strict")
42
- # Language["Perl"]
43
- # elsif /^[^#]+:-/.match(data)
44
- # Language["Prolog"]
45
- # end
46
- # end
47
- #
48
- def self.disambiguate(*exts_and_langs, &heuristic)
49
- @heuristics << new(exts_and_langs, &heuristic)
50
- end
51
-
52
- # Internal: Array of defined heuristics
53
- @heuristics = []
54
-
55
- # Internal
56
- def initialize(exts_and_langs, &heuristic)
57
- @exts_and_langs, @candidates = exts_and_langs.partition {|e| e =~ /\A\./}
58
- @heuristic = heuristic
59
- end
60
-
61
- # Internal: Check if this heuristic matches the candidate filenames or
62
- # languages.
63
- def matches?(filename, candidates)
64
- filename = filename.downcase
65
- candidates = candidates.compact.map(&:name)
66
- @exts_and_langs.any? { |ext| filename.end_with?(ext) } ||
67
- (candidates.any? &&
68
- (@candidates - candidates == [] &&
69
- candidates - @candidates == []))
70
- end
71
-
72
- # Internal: Perform the heuristic
73
- def call(data)
74
- @heuristic.call(data)
75
- end
76
-
77
- # Common heuristics
78
- CPlusPlusRegex = Regexp.union(
79
- /^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>/,
80
- /^\s*template\s*</,
81
- /^[ \t]*try/,
82
- /^[ \t]*catch\s*\(/,
83
- /^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+/,
84
- /^[ \t]*(private|public|protected):$/,
85
- /std::\w+/)
86
- ObjectiveCRegex = /^\s*(@(interface|class|protocol|property|end|synchronised|selector|implementation)\b|#import\s+.+\.h[">])/
87
- Perl5Regex = /\buse\s+(?:strict\b|v?5\.)/
88
- Perl6Regex = /^\s*(?:use\s+v6\b|\bmodule\b|\b(?:my\s+)?class\b)/
89
-
90
- disambiguate ".as" do |data|
91
- if /^\s*(package\s+[a-z0-9_\.]+|import\s+[a-zA-Z0-9_\.]+;|class\s+[A-Za-z0-9_]+\s+extends\s+[A-Za-z0-9_]+)/.match(data)
92
- Language["ActionScript"]
93
- else
94
- Language["AngelScript"]
95
- end
96
- end
97
-
98
- disambiguate ".asc" do |data|
99
- if /^(----[- ]BEGIN|ssh-(rsa|dss)) /.match(data)
100
- Language["Public Key"]
101
- elsif /^[=-]+(\s|\n)|{{[A-Za-z]/.match(data)
102
- Language["AsciiDoc"]
103
- elsif /^(\/\/.+|((import|export)\s+)?(function|int|float|char)\s+((room|repeatedly|on|game)_)?([A-Za-z]+[A-Za-z_0-9]+)\s*[;\(])/.match(data)
104
- Language["AGS Script"]
105
- end
106
- end
107
-
108
- disambiguate ".bb" do |data|
109
- if /^\s*; /.match(data) || data.include?("End Function")
110
- Language["BlitzBasic"]
111
- elsif /^\s*(# |include|require)\b/.match(data)
112
- Language["BitBake"]
113
- end
114
- end
115
-
116
- disambiguate ".builds" do |data|
117
- if /^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i.match(data)
118
- Language["XML"]
119
- else
120
- Language["Text"]
121
- end
122
- end
123
-
124
- disambiguate ".ch" do |data|
125
- if /^\s*#\s*(if|ifdef|ifndef|define|command|xcommand|translate|xtranslate|include|pragma|undef)\b/i.match(data)
126
- Language["xBase"]
127
- end
128
- end
129
-
130
- disambiguate ".cl" do |data|
131
- if /^\s*\((defun|in-package|defpackage) /i.match(data)
132
- Language["Common Lisp"]
133
- elsif /^class/x.match(data)
134
- Language["Cool"]
135
- elsif /\/\* |\/\/ |^\}/.match(data)
136
- Language["OpenCL"]
137
- end
138
- end
139
-
140
- disambiguate ".cls" do |data|
141
- if /\\\w+{/.match(data)
142
- Language["TeX"]
143
- end
144
- end
145
-
146
- disambiguate ".cs" do |data|
147
- if /![\w\s]+methodsFor: /.match(data)
148
- Language["Smalltalk"]
149
- elsif /^\s*namespace\s*[\w\.]+\s*{/.match(data) || /^\s*\/\//.match(data)
150
- Language["C#"]
151
- end
152
- end
153
-
154
- disambiguate ".d" do |data|
155
- # see http://dlang.org/spec/grammar
156
- # ModuleDeclaration | ImportDeclaration | FuncDeclaration | unittest
157
- if /^module\s+[\w.]*\s*;|import\s+[\w\s,.:]*;|\w+\s+\w+\s*\(.*\)(?:\(.*\))?\s*{[^}]*}|unittest\s*(?:\(.*\))?\s*{[^}]*}/.match(data)
158
- Language["D"]
159
- # see http://dtrace.org/guide/chp-prog.html, http://dtrace.org/guide/chp-profile.html, http://dtrace.org/guide/chp-opt.html
160
- elsif /^(\w+:\w*:\w*:\w*|BEGIN|END|provider\s+|(tick|profile)-\w+\s+{[^}]*}|#pragma\s+D\s+(option|attributes|depends_on)\s|#pragma\s+ident\s)/.match(data)
161
- Language["DTrace"]
162
- # path/target : dependency \
163
- # target : \
164
- # : dependency
165
- # path/file.ext1 : some/path/../file.ext2
166
- elsif /([\/\\].*:\s+.*\s\\$|: \\$|^ : |^[\w\s\/\\.]+\w+\.\w+\s*:\s+[\w\s\/\\.]+\w+\.\w+)/.match(data)
167
- Language["Makefile"]
168
- end
169
- end
170
-
171
- disambiguate ".ecl" do |data|
172
- if /^[^#]+:-/.match(data)
173
- Language["ECLiPSe"]
174
- elsif data.include?(":=")
175
- Language["ECL"]
176
- end
177
- end
178
-
179
- disambiguate ".es" do |data|
180
- if /^\s*(?:%%|main\s*\(.*?\)\s*->)/.match(data)
181
- Language["Erlang"]
182
- elsif /(?:\/\/|("|')use strict\1|export\s+default\s|\/\*.*?\*\/)/m.match(data)
183
- Language["JavaScript"]
184
- end
185
- end
186
-
187
- fortran_rx = /^([c*][^abd-z]| (subroutine|program|end|data)\s|\s*!)/i
188
-
189
- disambiguate ".f" do |data|
190
- if /^: /.match(data)
191
- Language["Forth"]
192
- elsif data.include?("flowop")
193
- Language["Filebench WML"]
194
- elsif fortran_rx.match(data)
195
- Language["Fortran"]
196
- end
197
- end
198
-
199
- disambiguate ".for" do |data|
200
- if /^: /.match(data)
201
- Language["Forth"]
202
- elsif fortran_rx.match(data)
203
- Language["Fortran"]
204
- end
205
- end
206
-
207
- disambiguate ".fr" do |data|
208
- if /^(: |also |new-device|previous )/.match(data)
209
- Language["Forth"]
210
- elsif /^\s*(import|module|package|data|type) /.match(data)
211
- Language["Frege"]
212
- else
213
- Language["Text"]
214
- end
215
- end
216
-
217
- disambiguate ".fs" do |data|
218
- if /^(: |new-device)/.match(data)
219
- Language["Forth"]
220
- elsif /^\s*(#light|import|let|module|namespace|open|type)/.match(data)
221
- Language["F#"]
222
- elsif /^\s*(#version|precision|uniform|varying|vec[234])/.match(data)
223
- Language["GLSL"]
224
- elsif /#include|#pragma\s+(rs|version)|__attribute__/.match(data)
225
- Language["Filterscript"]
226
- end
227
- end
228
-
229
- disambiguate ".gs" do |data|
230
- Language["Gosu"] if /^uses java\./.match(data)
231
- end
232
-
233
- disambiguate ".h" do |data|
234
- if ObjectiveCRegex.match(data)
235
- Language["Objective-C"]
236
- elsif CPlusPlusRegex.match(data)
237
- Language["C++"]
238
- end
239
- end
240
-
241
- disambiguate ".inc" do |data|
242
- if /^<\?(?:php)?/.match(data)
243
- Language["PHP"]
244
- elsif /^\s*#(declare|local|macro|while)\s/.match(data)
245
- Language["POV-Ray SDL"]
246
- end
247
- end
248
-
249
- disambiguate ".l" do |data|
250
- if /\(def(un|macro)\s/.match(data)
251
- Language["Common Lisp"]
252
- elsif /^(%[%{}]xs|<.*>)/.match(data)
253
- Language["Lex"]
254
- elsif /^\.[a-z][a-z](\s|$)/i.match(data)
255
- Language["Roff"]
256
- elsif /^\((de|class|rel|code|data|must)\s/.match(data)
257
- Language["PicoLisp"]
35
+ # Internal: Load heuristics from 'heuristics.yml'.
36
+ def self.load()
37
+ if @heuristics.any?
38
+ return
258
39
  end
259
- end
260
40
 
261
- disambiguate ".ls" do |data|
262
- if /^\s*package\s*[\w\.\/\*\s]*\s*{/.match(data)
263
- Language["LoomScript"]
264
- else
265
- Language["LiveScript"]
266
- end
267
- end
41
+ data = YAML.load_file(File.expand_path("../heuristics.yml", __FILE__))
42
+ named_patterns = data['named_patterns'].map { |k,v| [k, self.to_regex(v)] }.to_h
268
43
 
269
- disambiguate ".lsp", ".lisp" do |data|
270
- if /^\s*\((defun|in-package|defpackage) /i.match(data)
271
- Language["Common Lisp"]
272
- elsif /^\s*\(define /.match(data)
273
- Language["NewLisp"]
274
- end
275
- end
276
-
277
- disambiguate ".m" do |data|
278
- if ObjectiveCRegex.match(data)
279
- Language["Objective-C"]
280
- elsif data.include?(":- module")
281
- Language["Mercury"]
282
- elsif /^: /.match(data)
283
- Language["MUF"]
284
- elsif /^\s*;/.match(data)
285
- Language["M"]
286
- elsif /\*\)$/.match(data)
287
- Language["Mathematica"]
288
- elsif /^\s*%/.match(data)
289
- Language["Matlab"]
290
- elsif /^\w+\s*:\s*module\s*{/.match(data)
291
- Language["Limbo"]
292
- end
293
- end
294
-
295
- disambiguate ".md" do |data|
296
- if /(^[-a-z0-9=#!\*\[|>])|<\//i.match(data) || data.empty?
297
- Language["Markdown"]
298
- elsif /^(;;|\(define_)/.match(data)
299
- Language["GCC Machine Description"]
300
- else
301
- Language["Markdown"]
302
- end
303
- end
304
-
305
- disambiguate ".ml" do |data|
306
- if /(^\s*module)|let rec |match\s+(\S+\s)+with/.match(data)
307
- Language["OCaml"]
308
- elsif /=> |case\s+(\S+\s)+of/.match(data)
309
- Language["Standard ML"]
44
+ data['disambiguations'].each do |disambiguation|
45
+ exts = disambiguation['extensions']
46
+ rules = disambiguation['rules']
47
+ rules.map! do |rule|
48
+ rule['pattern'] = self.parse_rule(named_patterns, rule)
49
+ rule
50
+ end
51
+ @heuristics << new(exts, rules)
310
52
  end
311
53
  end
312
54
 
313
- disambiguate ".mod" do |data|
314
- if data.include?('<!ENTITY ')
315
- Language["XML"]
316
- elsif /^\s*MODULE [\w\.]+;/i.match(data) || /^\s*END [\w\.]+;/i.match(data)
317
- Language["Modula-2"]
55
+ def self.parse_rule(named_patterns, rule)
56
+ if !rule['and'].nil?
57
+ rules = rule['and'].map { |block| self.parse_rule(named_patterns, block) }
58
+ return And.new(rules)
59
+ elsif !rule['pattern'].nil?
60
+ return self.to_regex(rule['pattern'])
61
+ elsif !rule['negative_pattern'].nil?
62
+ pat = self.to_regex(rule['negative_pattern'])
63
+ return NegativePattern.new(pat)
64
+ elsif !rule['named_pattern'].nil?
65
+ return named_patterns[rule['named_pattern']]
318
66
  else
319
- [Language["Linux Kernel Module"], Language["AMPL"]]
67
+ return AlwaysMatch.new()
320
68
  end
321
69
  end
322
70
 
323
- disambiguate ".ms" do |data|
324
- if /^[.'][a-z][a-z](\s|$)/i.match(data)
325
- Language["Roff"]
326
- elsif /(?<!\S)\.(include|globa?l)\s/.match(data) || /(?<!\/\*)(\A|\n)\s*\.[A-Za-z][_A-Za-z0-9]*:/.match(data.gsub(/"([^\\"]|\\.)*"|'([^\\']|\\.)*'|\\\s*(?:--.*)?\n/, ""))
327
- Language["Unix Assembly"]
71
+ # Internal: Converts a string or array of strings to regexp
72
+ #
73
+ # str: string or array of strings. If it is an array of strings,
74
+ # Regexp.union will be used.
75
+ def self.to_regex(str)
76
+ if str.kind_of?(Array)
77
+ Regexp.union(str.map { |s| Regexp.new(s) })
328
78
  else
329
- Language["MAXScript"]
330
- end
331
- end
332
-
333
- disambiguate ".n" do |data|
334
- if /^[.']/.match(data)
335
- Language["Roff"]
336
- elsif /^(module|namespace|using)\s/.match(data)
337
- Language["Nemerle"]
79
+ Regexp.new(str)
338
80
  end
339
81
  end
340
82
 
341
- disambiguate ".ncl" do |data|
342
- if /^\s*<\?xml\s+version/i.match(data)
343
- Language["XML"]
344
- elsif data.include?("THE_TITLE")
345
- Language["Text"]
346
- end
347
- end
348
-
349
- disambiguate ".nl" do |data|
350
- if /^(b|g)[0-9]+ /.match(data)
351
- Language["NL"]
352
- else
353
- Language["NewLisp"]
354
- end
355
- end
83
+ # Internal: Array of defined heuristics
84
+ @heuristics = []
356
85
 
357
- disambiguate ".php" do |data|
358
- if data.include?("<?hh")
359
- Language["Hack"]
360
- elsif /<\?[^h]/.match(data)
361
- Language["PHP"]
362
- end
86
+ # Internal
87
+ def initialize(exts_and_langs, rules)
88
+ @exts_and_langs = exts_and_langs
89
+ @rules = rules
363
90
  end
364
91
 
365
- disambiguate ".pl" do |data|
366
- if /^[^#]*:-/.match(data)
367
- Language["Prolog"]
368
- elsif Perl5Regex.match(data)
369
- Language["Perl"]
370
- elsif Perl6Regex.match(data)
371
- Language["Perl 6"]
372
- end
92
+ # Internal: Check if this heuristic matches the candidate filenames or
93
+ # languages.
94
+ def matches?(filename, candidates)
95
+ filename = filename.downcase
96
+ candidates = candidates.compact.map(&:name)
97
+ @exts_and_langs.any? { |ext| filename.end_with?(ext) }
373
98
  end
374
99
 
375
- disambiguate ".pm" do |data|
376
- if Perl5Regex.match(data)
377
- Language["Perl"]
378
- elsif Perl6Regex.match(data)
379
- Language["Perl 6"]
380
- elsif /^\s*\/\* XPM \*\//.match(data)
381
- Language["XPM"]
100
+ # Internal: Perform the heuristic
101
+ def call(data)
102
+ matched = @rules.find do |rule|
103
+ rule['pattern'].match(data)
104
+ end
105
+ if !matched.nil?
106
+ languages = matched['language']
107
+ if languages.is_a?(Array)
108
+ languages.map{ |l| Language[l] }
109
+ else
110
+ Language[languages]
111
+ end
382
112
  end
383
113
  end
384
114
 
385
- disambiguate ".pro" do |data|
386
- if /^[^\[#]+:-/.match(data)
387
- Language["Prolog"]
388
- elsif data.include?("last_client=")
389
- Language["INI"]
390
- elsif data.include?("HEADERS") && data.include?("SOURCES")
391
- Language["QMake"]
392
- elsif /^\s*function[ \w,]+$/.match(data)
393
- Language["IDL"]
394
- end
395
- end
115
+ end
396
116
 
397
- disambiguate ".props" do |data|
398
- if /^(\s*)(<Project|<Import|<Property|<?xml|xmlns)/i.match(data)
399
- Language["XML"]
400
- elsif /\w+\s*=\s*/i.match(data)
401
- Language["INI"]
402
- end
403
- end
117
+ class And
404
118
 
405
- disambiguate ".q" do |data|
406
- if /[A-Z.][\w.]*:{/i.match(data) || /(^|\n)\\(cd?|d|l|p|ts?) /.match(data)
407
- Language["q"]
408
- elsif /SELECT\s+[\w*,]+\s+FROM/i.match(data) || /(CREATE|ALTER|DROP)\s(DATABASE|SCHEMA|TABLE)/i.match(data)
409
- Language["HiveQL"]
410
- end
119
+ def initialize(pats)
120
+ @pats = pats
411
121
  end
412
122
 
413
- disambiguate ".r" do |data|
414
- if /\bRebol\b/i.match(data)
415
- Language["Rebol"]
416
- elsif /<-|^\s*#/.match(data)
417
- Language["R"]
418
- end
123
+ def match(input)
124
+ return !@pats.any? { |pat| !pat.match(input) }
419
125
  end
420
126
 
421
- disambiguate ".rno" do |data|
422
- if /^\.!|^\.end lit(?:eral)?\b/i.match(data)
423
- Language["RUNOFF"]
424
- elsif /^\.\\" /.match(data)
425
- Language["Roff"]
426
- end
427
- end
127
+ end
428
128
 
429
- disambiguate ".rpy" do |data|
430
- if /(^(import|from|class|def)\s)/m.match(data)
431
- Language["Python"]
432
- else
433
- Language["Ren'Py"]
434
- end
129
+ class AlwaysMatch
130
+ def match(input)
131
+ return true
435
132
  end
133
+ end
436
134
 
437
- disambiguate ".rs" do |data|
438
- if /^(use |fn |mod |pub |macro_rules|impl|#!?\[)/.match(data)
439
- Language["Rust"]
440
- elsif /#include|#pragma\s+(rs|version)|__attribute__/.match(data)
441
- Language["RenderScript"]
442
- end
443
- end
135
+ class NegativePattern
444
136
 
445
- disambiguate ".sc" do |data|
446
- if /\^(this|super)\./.match(data) || /^\s*(\+|\*)\s*\w+\s*{/.match(data) || /^\s*~\w+\s*=\./.match(data)
447
- Language["SuperCollider"]
448
- elsif /^\s*import (scala|java)\./.match(data) || /^\s*val\s+\w+\s*=/.match(data) || /^\s*class\b/.match(data)
449
- Language["Scala"]
450
- end
137
+ def initialize(pat)
138
+ @pat = pat
451
139
  end
452
140
 
453
- disambiguate ".sql" do |data|
454
- if /^\\i\b|AS \$\$|LANGUAGE '?plpgsql'?/i.match(data) || /SECURITY (DEFINER|INVOKER)/i.match(data) || /BEGIN( WORK| TRANSACTION)?;/i.match(data)
455
- #Postgres
456
- Language["PLpgSQL"]
457
- elsif /(alter module)|(language sql)|(begin( NOT)+ atomic)/i.match(data) || /signal SQLSTATE '[0-9]+'/i.match(data)
458
- #IBM db2
459
- Language["SQLPL"]
460
- elsif /\$\$PLSQL_|XMLTYPE|sysdate|systimestamp|\.nextval|connect by|AUTHID (DEFINER|CURRENT_USER)/i.match(data) || /constructor\W+function/i.match(data)
461
- #Oracle
462
- Language["PLSQL"]
463
- elsif ! /begin|boolean|package|exception/i.match(data)
464
- #Generic SQL
465
- Language["SQL"]
466
- end
467
- end
468
-
469
- disambiguate ".srt" do |data|
470
- if /^(\d{2}:\d{2}:\d{2},\d{3})\s*(-->)\s*(\d{2}:\d{2}:\d{2},\d{3})$/.match(data)
471
- Language["SubRip Text"]
472
- end
473
- end
474
-
475
- disambiguate ".t" do |data|
476
- if Perl5Regex.match(data)
477
- Language["Perl"]
478
- elsif Perl6Regex.match(data)
479
- Language["Perl 6"]
480
- elsif /^\s*%[ \t]+|^\s*var\s+\w+(\s*:\s*\w+)?\s*:=\s*\w+/.match(data)
481
- Language["Turing"]
482
- end
141
+ def match(input)
142
+ return !@pat.match(input)
483
143
  end
484
144
 
485
- disambiguate ".toc" do |data|
486
- if /^## |@no-lib-strip@/.match(data)
487
- Language["World of Warcraft Addon Data"]
488
- elsif /^\\(contentsline|defcounter|beamer|boolfalse)/.match(data)
489
- Language["TeX"]
490
- end
491
- end
492
-
493
- disambiguate ".ts" do |data|
494
- if /<TS\b/.match(data)
495
- Language["XML"]
496
- else
497
- Language["TypeScript"]
498
- end
499
- end
500
-
501
- disambiguate ".tst" do |data|
502
- if (data.include?("gap> "))
503
- Language["GAP"]
504
- # Heads up - we don't usually write heuristics like this (with no regex match)
505
- else
506
- Language["Scilab"]
507
- end
508
- end
509
-
510
- disambiguate ".tsx" do |data|
511
- if /^\s*(import.+(from\s+|require\()['"]react|\/\/\/\s*<reference\s)/.match(data)
512
- Language["TypeScript"]
513
- elsif /^\s*<\?xml\s+version/i.match(data)
514
- Language["XML"]
515
- end
516
- end
517
-
518
- disambiguate ".w" do |data|
519
- if (data.include?("&ANALYZE-SUSPEND _UIB-CODE-BLOCK _CUSTOM _DEFINITIONS"))
520
- Language["OpenEdge ABL"]
521
- elsif /^@(<|\w+\.)/.match(data)
522
- Language["CWeb"]
523
- end
524
- end
525
-
526
- disambiguate ".x" do |data|
527
- if /\b(program|version)\s+\w+\s*{|\bunion\s+\w+\s+switch\s*\(/.match(data)
528
- Language["RPC"]
529
- elsif /^%(end|ctor|hook|group)\b/.match(data)
530
- Language["Logos"]
531
- elsif /OUTPUT_ARCH\(|OUTPUT_FORMAT\(|SECTIONS/.match(data)
532
- Language["Linker Script"]
533
- end
534
- end
535
-
536
145
  end
537
146
  end