gitlab-linguist 2.9.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,56 @@
1
+ require 'linguist/blob_helper'
2
+
3
+ module Linguist
4
+ # A FileBlob is a wrapper around a File object to make it quack
5
+ # like a Grit::Blob. It provides the basic interface: `name`,
6
+ # `data`, and `size`.
7
+ class FileBlob
8
+ include BlobHelper
9
+
10
+ # Public: Initialize a new FileBlob from a path
11
+ #
12
+ # path - A path String that exists on the file system.
13
+ # base_path - Optional base to relativize the path
14
+ #
15
+ # Returns a FileBlob.
16
+ def initialize(path, base_path = nil)
17
+ @path = path
18
+ @name = base_path ? path.sub("#{base_path}/", '') : path
19
+ end
20
+
21
+ # Public: Filename
22
+ #
23
+ # Examples
24
+ #
25
+ # FileBlob.new("/path/to/linguist/lib/linguist.rb").name
26
+ # # => "/path/to/linguist/lib/linguist.rb"
27
+ #
28
+ # FileBlob.new("/path/to/linguist/lib/linguist.rb",
29
+ # "/path/to/linguist").name
30
+ # # => "lib/linguist.rb"
31
+ #
32
+ # Returns a String
33
+ attr_reader :name
34
+
35
+ # Public: Read file permissions
36
+ #
37
+ # Returns a String like '100644'
38
+ def mode
39
+ File.stat(@path).mode.to_s(8)
40
+ end
41
+
42
+ # Public: Read file contents.
43
+ #
44
+ # Returns a String.
45
+ def data
46
+ File.read(@path)
47
+ end
48
+
49
+ # Public: Get byte size
50
+ #
51
+ # Returns an Integer.
52
+ def size
53
+ File.size(@path)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,185 @@
1
+ module Linguist
2
+ class Generated
3
+ # Public: Is the blob a generated file?
4
+ #
5
+ # name - String filename
6
+ # data - String blob data. A block also maybe passed in for lazy
7
+ # loading. This behavior is deprecated and you should always
8
+ # pass in a String.
9
+ #
10
+ # Return true or false
11
+ def self.generated?(name, data)
12
+ new(name, data).generated?
13
+ end
14
+
15
+ # Internal: Initialize Generated instance
16
+ #
17
+ # name - String filename
18
+ # data - String blob data
19
+ def initialize(name, data)
20
+ @name = name
21
+ @extname = File.extname(name)
22
+ @_data = data
23
+ end
24
+
25
+ attr_reader :name, :extname
26
+
27
+ # Lazy load blob data if block was passed in.
28
+ #
29
+ # Awful, awful stuff happening here.
30
+ #
31
+ # Returns String data.
32
+ def data
33
+ @data ||= @_data.respond_to?(:call) ? @_data.call() : @_data
34
+ end
35
+
36
+ # Public: Get each line of data
37
+ #
38
+ # Returns an Array of lines
39
+ def lines
40
+ # TODO: data should be required to be a String, no nils
41
+ @lines ||= data ? data.split("\n", -1) : []
42
+ end
43
+
44
+ # Internal: Is the blob a generated file?
45
+ #
46
+ # Generated source code is suppressed in diffs and is ignored by
47
+ # language statistics.
48
+ #
49
+ # Please add additional test coverage to
50
+ # `test/test_blob.rb#test_generated` if you make any changes.
51
+ #
52
+ # Return true or false
53
+ def generated?
54
+ name == 'Gemfile.lock' ||
55
+ minified_files? ||
56
+ compiled_coffeescript? ||
57
+ xcode_project_file? ||
58
+ generated_parser? ||
59
+ generated_net_docfile? ||
60
+ generated_net_designer_file? ||
61
+ generated_protocol_buffer?
62
+ end
63
+
64
+ # Internal: Is the blob an XCode project file?
65
+ #
66
+ # Generated if the file extension is an XCode project
67
+ # file extension.
68
+ #
69
+ # Returns true of false.
70
+ def xcode_project_file?
71
+ ['.xib', '.nib', '.storyboard', '.pbxproj', '.xcworkspacedata', '.xcuserstate'].include?(extname)
72
+ end
73
+
74
+ # Internal: Is the blob minified files?
75
+ #
76
+ # Consider a file minified if it contains more than 5% spaces.
77
+ # Currently, only JS and CSS files are detected by this method.
78
+ #
79
+ # Returns true or false.
80
+ def minified_files?
81
+ return unless ['.js', '.css'].include? extname
82
+ if data && data.length > 200
83
+ (data.each_char.count{ |c| c <= ' ' } / data.length.to_f) < 0.05
84
+ else
85
+ false
86
+ end
87
+ end
88
+
89
+ # Internal: Is the blob of JS generated by CoffeeScript?
90
+ #
91
+ # CoffeeScript is meant to output JS that would be difficult to
92
+ # tell if it was generated or not. Look for a number of patterns
93
+ # output by the CS compiler.
94
+ #
95
+ # Return true or false
96
+ def compiled_coffeescript?
97
+ return false unless extname == '.js'
98
+
99
+ # CoffeeScript generated by > 1.2 include a comment on the first line
100
+ if lines[0] =~ /^\/\/ Generated by /
101
+ return true
102
+ end
103
+
104
+ if lines[0] == '(function() {' && # First line is module closure opening
105
+ lines[-2] == '}).call(this);' && # Second to last line closes module closure
106
+ lines[-1] == '' # Last line is blank
107
+
108
+ score = 0
109
+
110
+ lines.each do |line|
111
+ if line =~ /var /
112
+ # Underscored temp vars are likely to be Coffee
113
+ score += 1 * line.gsub(/(_fn|_i|_len|_ref|_results)/).count
114
+
115
+ # bind and extend functions are very Coffee specific
116
+ score += 3 * line.gsub(/(__bind|__extends|__hasProp|__indexOf|__slice)/).count
117
+ end
118
+ end
119
+
120
+ # Require a score of 3. This is fairly arbitrary. Consider
121
+ # tweaking later.
122
+ score >= 3
123
+ else
124
+ false
125
+ end
126
+ end
127
+
128
+ # Internal: Is this a generated documentation file for a .NET assembly?
129
+ #
130
+ # .NET developers often check in the XML Intellisense file along with an
131
+ # assembly - however, these don't have a special extension, so we have to
132
+ # dig into the contents to determine if it's a docfile. Luckily, these files
133
+ # are extremely structured, so recognizing them is easy.
134
+ #
135
+ # Returns true or false
136
+ def generated_net_docfile?
137
+ return false unless extname.downcase == ".xml"
138
+ return false unless lines.count > 3
139
+
140
+ # .NET Docfiles always open with <doc> and their first tag is an
141
+ # <assembly> tag
142
+ return lines[1].include?("<doc>") &&
143
+ lines[2].include?("<assembly>") &&
144
+ lines[-2].include?("</doc>")
145
+ end
146
+
147
+ # Internal: Is this a codegen file for a .NET project?
148
+ #
149
+ # Visual Studio often uses code generation to generate partial classes, and
150
+ # these files can be quite unwieldy. Let's hide them.
151
+ #
152
+ # Returns true or false
153
+ def generated_net_designer_file?
154
+ name.downcase =~ /\.designer\.cs$/
155
+ end
156
+
157
+ # Internal: Is the blob of JS a parser generated by PEG.js?
158
+ #
159
+ # PEG.js-generated parsers are not meant to be consumed by humans.
160
+ #
161
+ # Return true or false
162
+ def generated_parser?
163
+ return false unless extname == '.js'
164
+
165
+ # PEG.js-generated parsers include a comment near the top of the file
166
+ # that marks them as such.
167
+ if lines[0..4].join('') =~ /^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js/
168
+ return true
169
+ end
170
+
171
+ false
172
+ end
173
+
174
+ # Internal: Is the blob a C++, Java or Python source file generated by the
175
+ # Protocol Buffer compiler?
176
+ #
177
+ # Returns true of false.
178
+ def generated_protocol_buffer?
179
+ return false unless ['.py', '.java', '.h', '.cc', '.cpp'].include?(extname)
180
+ return false unless lines.count > 1
181
+
182
+ return lines[0].include?("Generated by the protocol buffer compiler. DO NOT EDIT!")
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,495 @@
1
+ require 'escape_utils'
2
+ require 'pygments'
3
+ require 'yaml'
4
+
5
+ require 'linguist/classifier'
6
+ require 'linguist/samples'
7
+
8
+ module Linguist
9
+ # Language names that are recognizable by GitHub. Defined languages
10
+ # can be highlighted, searched and listed under the Top Languages page.
11
+ #
12
+ # Languages are defined in `lib/linguist/languages.yml`.
13
+ class Language
14
+ @languages = []
15
+ @index = {}
16
+ @name_index = {}
17
+ @alias_index = {}
18
+
19
+ @extension_index = Hash.new { |h,k| h[k] = [] }
20
+ @filename_index = Hash.new { |h,k| h[k] = [] }
21
+ @primary_extension_index = {}
22
+
23
+ # Valid Languages types
24
+ TYPES = [:data, :markup, :programming]
25
+
26
+ # Names of non-programming languages that we will still detect
27
+ #
28
+ # Returns an array
29
+ def self.detectable_markup
30
+ ["AsciiDoc", "CSS", "Creole", "Less", "Markdown", "MediaWiki", "Org", "RDoc", "Sass", "Textile", "reStructuredText"]
31
+ end
32
+
33
+ # Internal: Create a new Language object
34
+ #
35
+ # attributes - A hash of attributes
36
+ #
37
+ # Returns a Language object
38
+ def self.create(attributes = {})
39
+ language = new(attributes)
40
+
41
+ @languages << language
42
+
43
+ # All Language names should be unique. Raise if there is a duplicate.
44
+ if @name_index.key?(language.name)
45
+ raise ArgumentError, "Duplicate language name: #{language.name}"
46
+ end
47
+
48
+ # Language name index
49
+ @index[language.name] = @name_index[language.name] = language
50
+
51
+ language.aliases.each do |name|
52
+ # All Language aliases should be unique. Raise if there is a duplicate.
53
+ if @alias_index.key?(name)
54
+ raise ArgumentError, "Duplicate alias: #{name}"
55
+ end
56
+
57
+ @index[name] = @alias_index[name] = language
58
+ end
59
+
60
+ language.extensions.each do |extension|
61
+ if extension !~ /^\./
62
+ raise ArgumentError, "Extension is missing a '.': #{extension.inspect}"
63
+ end
64
+
65
+ @extension_index[extension] << language
66
+ end
67
+
68
+ if @primary_extension_index.key?(language.primary_extension)
69
+ raise ArgumentError, "Duplicate primary extension: #{language.primary_extension}"
70
+ end
71
+
72
+ @primary_extension_index[language.primary_extension] = language
73
+
74
+ language.filenames.each do |filename|
75
+ @filename_index[filename] << language
76
+ end
77
+
78
+ language
79
+ end
80
+
81
+ # Public: Detects the Language of the blob.
82
+ #
83
+ # name - String filename
84
+ # data - String blob data. A block also maybe passed in for lazy
85
+ # loading. This behavior is deprecated and you should always
86
+ # pass in a String.
87
+ # mode - Optional String mode (defaults to nil)
88
+ #
89
+ # Returns Language or nil.
90
+ def self.detect(name, data, mode = nil)
91
+ # A bit of an elegant hack. If the file is executable but extensionless,
92
+ # append a "magic" extension so it can be classified with other
93
+ # languages that have shebang scripts.
94
+ if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05
95
+ name += ".script!"
96
+ end
97
+
98
+ possible_languages = find_by_filename(name)
99
+
100
+ if possible_languages.length > 1
101
+ data = data.call() if data.respond_to?(:call)
102
+ if data.nil? || data == ""
103
+ nil
104
+ elsif result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
105
+ Language[result[0]]
106
+ end
107
+ else
108
+ possible_languages.first
109
+ end
110
+ end
111
+
112
+ # Public: Get all Languages
113
+ #
114
+ # Returns an Array of Languages
115
+ def self.all
116
+ @languages
117
+ end
118
+
119
+ # Public: Look up Language by its proper name.
120
+ #
121
+ # name - The String name of the Language
122
+ #
123
+ # Examples
124
+ #
125
+ # Language.find_by_name('Ruby')
126
+ # # => #<Language name="Ruby">
127
+ #
128
+ # Returns the Language or nil if none was found.
129
+ def self.find_by_name(name)
130
+ @name_index[name]
131
+ end
132
+
133
+ # Public: Look up Language by one of its aliases.
134
+ #
135
+ # name - A String alias of the Language
136
+ #
137
+ # Examples
138
+ #
139
+ # Language.find_by_alias('cpp')
140
+ # # => #<Language name="C++">
141
+ #
142
+ # Returns the Lexer or nil if none was found.
143
+ def self.find_by_alias(name)
144
+ @alias_index[name]
145
+ end
146
+
147
+ # Public: Look up Languages by filename.
148
+ #
149
+ # filename - The path String.
150
+ #
151
+ # Examples
152
+ #
153
+ # Language.find_by_filename('foo.rb')
154
+ # # => [#<Language name="Ruby">]
155
+ #
156
+ # Returns all matching Languages or [] if none were found.
157
+ def self.find_by_filename(filename)
158
+ basename, extname = File.basename(filename), File.extname(filename)
159
+ langs = [@primary_extension_index[extname]] +
160
+ @filename_index[basename] +
161
+ @extension_index[extname]
162
+ langs.compact.uniq
163
+ end
164
+
165
+ # Public: Look up Language by its name or lexer.
166
+ #
167
+ # name - The String name of the Language
168
+ #
169
+ # Examples
170
+ #
171
+ # Language['Ruby']
172
+ # # => #<Language name="Ruby">
173
+ #
174
+ # Language['ruby']
175
+ # # => #<Language name="Ruby">
176
+ #
177
+ # Returns the Language or nil if none was found.
178
+ def self.[](name)
179
+ @index[name]
180
+ end
181
+
182
+ # Public: A List of popular languages
183
+ #
184
+ # Popular languages are sorted to the top of language chooser
185
+ # dropdowns.
186
+ #
187
+ # This list is configured in "popular.yml".
188
+ #
189
+ # Returns an Array of Lexers.
190
+ def self.popular
191
+ @popular ||= all.select(&:popular?).sort_by { |lang| lang.name.downcase }
192
+ end
193
+
194
+ # Public: A List of non-popular languages
195
+ #
196
+ # Unpopular languages appear below popular ones in language
197
+ # chooser dropdowns.
198
+ #
199
+ # This list is created from all the languages not listed in "popular.yml".
200
+ #
201
+ # Returns an Array of Lexers.
202
+ def self.unpopular
203
+ @unpopular ||= all.select(&:unpopular?).sort_by { |lang| lang.name.downcase }
204
+ end
205
+
206
+ # Public: A List of languages with assigned colors.
207
+ #
208
+ # Returns an Array of Languages.
209
+ def self.colors
210
+ @colors ||= all.select(&:color).sort_by { |lang| lang.name.downcase }
211
+ end
212
+
213
+ # Public: A List of languages compatible with Ace.
214
+ #
215
+ # Returns an Array of Languages.
216
+ def self.ace_modes
217
+ @ace_modes ||= all.select(&:ace_mode).sort_by { |lang| lang.name.downcase }
218
+ end
219
+
220
+ # Internal: Initialize a new Language
221
+ #
222
+ # attributes - A hash of attributes
223
+ def initialize(attributes = {})
224
+ # @name is required
225
+ @name = attributes[:name] || raise(ArgumentError, "missing name")
226
+
227
+ # Set type
228
+ @type = attributes[:type] ? attributes[:type].to_sym : nil
229
+ if @type && !TYPES.include?(@type)
230
+ raise ArgumentError, "invalid type: #{@type}"
231
+ end
232
+
233
+ @color = attributes[:color]
234
+
235
+ # Set aliases
236
+ @aliases = [default_alias_name] + (attributes[:aliases] || [])
237
+
238
+ # Lookup Lexer object
239
+ @lexer = Pygments::Lexer.find_by_name(attributes[:lexer] || name) ||
240
+ raise(ArgumentError, "#{@name} is missing lexer")
241
+
242
+ @ace_mode = attributes[:ace_mode]
243
+ @wrap = attributes[:wrap] || false
244
+
245
+ # Set legacy search term
246
+ @search_term = attributes[:search_term] || default_alias_name
247
+
248
+ # Set extensions or default to [].
249
+ @extensions = attributes[:extensions] || []
250
+ @filenames = attributes[:filenames] || []
251
+
252
+ unless @primary_extension = attributes[:primary_extension]
253
+ raise ArgumentError, "#{@name} is missing primary extension"
254
+ end
255
+
256
+ # Prepend primary extension unless its already included
257
+ if primary_extension && !extensions.include?(primary_extension)
258
+ @extensions = [primary_extension] + extensions
259
+ end
260
+
261
+ # Set popular, and searchable flags
262
+ @popular = attributes.key?(:popular) ? attributes[:popular] : false
263
+ @searchable = attributes.key?(:searchable) ? attributes[:searchable] : true
264
+
265
+ # If group name is set, save the name so we can lazy load it later
266
+ if attributes[:group_name]
267
+ @group = nil
268
+ @group_name = attributes[:group_name]
269
+
270
+ # Otherwise we can set it to self now
271
+ else
272
+ @group = self
273
+ end
274
+ end
275
+
276
+ # Public: Get proper name
277
+ #
278
+ # Examples
279
+ #
280
+ # # => "Ruby"
281
+ # # => "Python"
282
+ # # => "Perl"
283
+ #
284
+ # Returns the name String
285
+ attr_reader :name
286
+
287
+ # Public: Get type.
288
+ #
289
+ # Returns a type Symbol or nil.
290
+ attr_reader :type
291
+
292
+ # Public: Get color.
293
+ #
294
+ # Returns a hex color String.
295
+ attr_reader :color
296
+
297
+ # Public: Get aliases
298
+ #
299
+ # Examples
300
+ #
301
+ # Language['C++'].aliases
302
+ # # => ["cpp"]
303
+ #
304
+ # Returns an Array of String names
305
+ attr_reader :aliases
306
+
307
+ # Deprecated: Get code search term
308
+ #
309
+ # Examples
310
+ #
311
+ # # => "ruby"
312
+ # # => "python"
313
+ # # => "perl"
314
+ #
315
+ # Returns the name String
316
+ attr_reader :search_term
317
+
318
+ # Public: Get Lexer
319
+ #
320
+ # Returns the Lexer
321
+ attr_reader :lexer
322
+
323
+ # Public: Get Ace mode
324
+ #
325
+ # Examples
326
+ #
327
+ # # => "text"
328
+ # # => "javascript"
329
+ # # => "c_cpp"
330
+ #
331
+ # Returns a String name or nil
332
+ attr_reader :ace_mode
333
+
334
+ # Public: Should language lines be wrapped
335
+ #
336
+ # Returns true or false
337
+ attr_reader :wrap
338
+
339
+ # Public: Get extensions
340
+ #
341
+ # Examples
342
+ #
343
+ # # => ['.rb', '.rake', ...]
344
+ #
345
+ # Returns the extensions Array
346
+ attr_reader :extensions
347
+
348
+ # Deprecated: Get primary extension
349
+ #
350
+ # Defaults to the first extension but can be overridden
351
+ # in the languages.yml.
352
+ #
353
+ # The primary extension can not be nil. Tests should verify this.
354
+ #
355
+ # This attribute is only used by app/helpers/gists_helper.rb for
356
+ # creating the language dropdown. It really should be using `name`
357
+ # instead. Would like to drop primary extension.
358
+ #
359
+ # Returns the extension String.
360
+ attr_reader :primary_extension
361
+
362
+ # Public: Get filenames
363
+ #
364
+ # Examples
365
+ #
366
+ # # => ['Rakefile', ...]
367
+ #
368
+ # Returns the extensions Array
369
+ attr_reader :filenames
370
+
371
+ # Public: Get URL escaped name.
372
+ #
373
+ # Examples
374
+ #
375
+ # "C%23"
376
+ # "C%2B%2B"
377
+ # "Common%20Lisp"
378
+ #
379
+ # Returns the escaped String.
380
+ def escaped_name
381
+ EscapeUtils.escape_url(name).gsub('+', '%20')
382
+ end
383
+
384
+ # Internal: Get default alias name
385
+ #
386
+ # Returns the alias name String
387
+ def default_alias_name
388
+ name.downcase.gsub(/\s/, '-')
389
+ end
390
+
391
+ # Public: Get Language group
392
+ #
393
+ # Returns a Language
394
+ def group
395
+ @group ||= Language.find_by_name(@group_name)
396
+ end
397
+
398
+ # Public: Is it popular?
399
+ #
400
+ # Returns true or false
401
+ def popular?
402
+ @popular
403
+ end
404
+
405
+ # Public: Is it not popular?
406
+ #
407
+ # Returns true or false
408
+ def unpopular?
409
+ !popular?
410
+ end
411
+
412
+ # Public: Is it searchable?
413
+ #
414
+ # Unsearchable languages won't by indexed by solr and won't show
415
+ # up in the code search dropdown.
416
+ #
417
+ # Returns true or false
418
+ def searchable?
419
+ @searchable
420
+ end
421
+
422
+ # Public: Highlight syntax of text
423
+ #
424
+ # text - String of code to be highlighted
425
+ # options - A Hash of options (defaults to {})
426
+ #
427
+ # Returns html String
428
+ def colorize(text, options = {})
429
+ lexer.highlight(text, options = {})
430
+ end
431
+
432
+ # Public: Return name as String representation
433
+ def to_s
434
+ name
435
+ end
436
+
437
+ def ==(other)
438
+ eql?(other)
439
+ end
440
+
441
+ def eql?(other)
442
+ equal?(other)
443
+ end
444
+
445
+ def hash
446
+ name.hash
447
+ end
448
+
449
+ def inspect
450
+ "#<#{self.class} name=#{name}>"
451
+ end
452
+ end
453
+
454
+ extensions = Samples::DATA['extnames']
455
+ filenames = Samples::DATA['filenames']
456
+ popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
457
+
458
+ YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
459
+ options['extensions'] ||= []
460
+ options['filenames'] ||= []
461
+
462
+ if extnames = extensions[name]
463
+ extnames.each do |extname|
464
+ if !options['extensions'].include?(extname)
465
+ options['extensions'] << extname
466
+ end
467
+ end
468
+ end
469
+
470
+ if fns = filenames[name]
471
+ fns.each do |filename|
472
+ if !options['filenames'].include?(filename)
473
+ options['filenames'] << filename
474
+ end
475
+ end
476
+ end
477
+
478
+ Language.create(
479
+ :name => name,
480
+ :color => options['color'],
481
+ :type => options['type'],
482
+ :aliases => options['aliases'],
483
+ :lexer => options['lexer'],
484
+ :ace_mode => options['ace_mode'],
485
+ :wrap => options['wrap'],
486
+ :group_name => options['group'],
487
+ :searchable => options.key?('searchable') ? options['searchable'] : true,
488
+ :search_term => options['search_term'],
489
+ :extensions => options['extensions'].sort,
490
+ :primary_extension => options['primary_extension'],
491
+ :filenames => options['filenames'],
492
+ :popular => popular.include?(name)
493
+ )
494
+ end
495
+ end