gitlab-linguist 2.9.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,56 @@
1
+ require 'linguist/blob_helper'
2
+
3
+ module Linguist
4
+ # A FileBlob is a wrapper around a File object to make it quack
5
+ # like a Grit::Blob. It provides the basic interface: `name`,
6
+ # `data`, and `size`.
7
+ class FileBlob
8
+ include BlobHelper
9
+
10
+ # Public: Initialize a new FileBlob from a path
11
+ #
12
+ # path - A path String that exists on the file system.
13
+ # base_path - Optional base to relativize the path
14
+ #
15
+ # Returns a FileBlob.
16
+ def initialize(path, base_path = nil)
17
+ @path = path
18
+ @name = base_path ? path.sub("#{base_path}/", '') : path
19
+ end
20
+
21
+ # Public: Filename
22
+ #
23
+ # Examples
24
+ #
25
+ # FileBlob.new("/path/to/linguist/lib/linguist.rb").name
26
+ # # => "/path/to/linguist/lib/linguist.rb"
27
+ #
28
+ # FileBlob.new("/path/to/linguist/lib/linguist.rb",
29
+ # "/path/to/linguist").name
30
+ # # => "lib/linguist.rb"
31
+ #
32
+ # Returns a String
33
+ attr_reader :name
34
+
35
+ # Public: Read file permissions
36
+ #
37
+ # Returns a String like '100644'
38
+ def mode
39
+ File.stat(@path).mode.to_s(8)
40
+ end
41
+
42
+ # Public: Read file contents.
43
+ #
44
+ # Returns a String.
45
+ def data
46
+ File.read(@path)
47
+ end
48
+
49
+ # Public: Get byte size
50
+ #
51
+ # Returns an Integer.
52
+ def size
53
+ File.size(@path)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,185 @@
1
+ module Linguist
2
+ class Generated
3
+ # Public: Is the blob a generated file?
4
+ #
5
+ # name - String filename
6
+ # data - String blob data. A block also maybe passed in for lazy
7
+ # loading. This behavior is deprecated and you should always
8
+ # pass in a String.
9
+ #
10
+ # Return true or false
11
+ def self.generated?(name, data)
12
+ new(name, data).generated?
13
+ end
14
+
15
+ # Internal: Initialize Generated instance
16
+ #
17
+ # name - String filename
18
+ # data - String blob data
19
+ def initialize(name, data)
20
+ @name = name
21
+ @extname = File.extname(name)
22
+ @_data = data
23
+ end
24
+
25
+ attr_reader :name, :extname
26
+
27
+ # Lazy load blob data if block was passed in.
28
+ #
29
+ # Awful, awful stuff happening here.
30
+ #
31
+ # Returns String data.
32
+ def data
33
+ @data ||= @_data.respond_to?(:call) ? @_data.call() : @_data
34
+ end
35
+
36
+ # Public: Get each line of data
37
+ #
38
+ # Returns an Array of lines
39
+ def lines
40
+ # TODO: data should be required to be a String, no nils
41
+ @lines ||= data ? data.split("\n", -1) : []
42
+ end
43
+
44
+ # Internal: Is the blob a generated file?
45
+ #
46
+ # Generated source code is suppressed in diffs and is ignored by
47
+ # language statistics.
48
+ #
49
+ # Please add additional test coverage to
50
+ # `test/test_blob.rb#test_generated` if you make any changes.
51
+ #
52
+ # Return true or false
53
+ def generated?
54
+ name == 'Gemfile.lock' ||
55
+ minified_files? ||
56
+ compiled_coffeescript? ||
57
+ xcode_project_file? ||
58
+ generated_parser? ||
59
+ generated_net_docfile? ||
60
+ generated_net_designer_file? ||
61
+ generated_protocol_buffer?
62
+ end
63
+
64
+ # Internal: Is the blob an XCode project file?
65
+ #
66
+ # Generated if the file extension is an XCode project
67
+ # file extension.
68
+ #
69
+ # Returns true of false.
70
+ def xcode_project_file?
71
+ ['.xib', '.nib', '.storyboard', '.pbxproj', '.xcworkspacedata', '.xcuserstate'].include?(extname)
72
+ end
73
+
74
+ # Internal: Is the blob minified files?
75
+ #
76
+ # Consider a file minified if it contains more than 5% spaces.
77
+ # Currently, only JS and CSS files are detected by this method.
78
+ #
79
+ # Returns true or false.
80
+ def minified_files?
81
+ return unless ['.js', '.css'].include? extname
82
+ if data && data.length > 200
83
+ (data.each_char.count{ |c| c <= ' ' } / data.length.to_f) < 0.05
84
+ else
85
+ false
86
+ end
87
+ end
88
+
89
+ # Internal: Is the blob of JS generated by CoffeeScript?
90
+ #
91
+ # CoffeeScript is meant to output JS that would be difficult to
92
+ # tell if it was generated or not. Look for a number of patterns
93
+ # output by the CS compiler.
94
+ #
95
+ # Return true or false
96
+ def compiled_coffeescript?
97
+ return false unless extname == '.js'
98
+
99
+ # CoffeeScript generated by > 1.2 include a comment on the first line
100
+ if lines[0] =~ /^\/\/ Generated by /
101
+ return true
102
+ end
103
+
104
+ if lines[0] == '(function() {' && # First line is module closure opening
105
+ lines[-2] == '}).call(this);' && # Second to last line closes module closure
106
+ lines[-1] == '' # Last line is blank
107
+
108
+ score = 0
109
+
110
+ lines.each do |line|
111
+ if line =~ /var /
112
+ # Underscored temp vars are likely to be Coffee
113
+ score += 1 * line.gsub(/(_fn|_i|_len|_ref|_results)/).count
114
+
115
+ # bind and extend functions are very Coffee specific
116
+ score += 3 * line.gsub(/(__bind|__extends|__hasProp|__indexOf|__slice)/).count
117
+ end
118
+ end
119
+
120
+ # Require a score of 3. This is fairly arbitrary. Consider
121
+ # tweaking later.
122
+ score >= 3
123
+ else
124
+ false
125
+ end
126
+ end
127
+
128
+ # Internal: Is this a generated documentation file for a .NET assembly?
129
+ #
130
+ # .NET developers often check in the XML Intellisense file along with an
131
+ # assembly - however, these don't have a special extension, so we have to
132
+ # dig into the contents to determine if it's a docfile. Luckily, these files
133
+ # are extremely structured, so recognizing them is easy.
134
+ #
135
+ # Returns true or false
136
+ def generated_net_docfile?
137
+ return false unless extname.downcase == ".xml"
138
+ return false unless lines.count > 3
139
+
140
+ # .NET Docfiles always open with <doc> and their first tag is an
141
+ # <assembly> tag
142
+ return lines[1].include?("<doc>") &&
143
+ lines[2].include?("<assembly>") &&
144
+ lines[-2].include?("</doc>")
145
+ end
146
+
147
+ # Internal: Is this a codegen file for a .NET project?
148
+ #
149
+ # Visual Studio often uses code generation to generate partial classes, and
150
+ # these files can be quite unwieldy. Let's hide them.
151
+ #
152
+ # Returns true or false
153
+ def generated_net_designer_file?
154
+ name.downcase =~ /\.designer\.cs$/
155
+ end
156
+
157
+ # Internal: Is the blob of JS a parser generated by PEG.js?
158
+ #
159
+ # PEG.js-generated parsers are not meant to be consumed by humans.
160
+ #
161
+ # Return true or false
162
+ def generated_parser?
163
+ return false unless extname == '.js'
164
+
165
+ # PEG.js-generated parsers include a comment near the top of the file
166
+ # that marks them as such.
167
+ if lines[0..4].join('') =~ /^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js/
168
+ return true
169
+ end
170
+
171
+ false
172
+ end
173
+
174
+ # Internal: Is the blob a C++, Java or Python source file generated by the
175
+ # Protocol Buffer compiler?
176
+ #
177
+ # Returns true of false.
178
+ def generated_protocol_buffer?
179
+ return false unless ['.py', '.java', '.h', '.cc', '.cpp'].include?(extname)
180
+ return false unless lines.count > 1
181
+
182
+ return lines[0].include?("Generated by the protocol buffer compiler. DO NOT EDIT!")
183
+ end
184
+ end
185
+ end
@@ -0,0 +1,495 @@
1
+ require 'escape_utils'
2
+ require 'pygments'
3
+ require 'yaml'
4
+
5
+ require 'linguist/classifier'
6
+ require 'linguist/samples'
7
+
8
+ module Linguist
9
+ # Language names that are recognizable by GitHub. Defined languages
10
+ # can be highlighted, searched and listed under the Top Languages page.
11
+ #
12
+ # Languages are defined in `lib/linguist/languages.yml`.
13
+ class Language
14
+ @languages = []
15
+ @index = {}
16
+ @name_index = {}
17
+ @alias_index = {}
18
+
19
+ @extension_index = Hash.new { |h,k| h[k] = [] }
20
+ @filename_index = Hash.new { |h,k| h[k] = [] }
21
+ @primary_extension_index = {}
22
+
23
+ # Valid Languages types
24
+ TYPES = [:data, :markup, :programming]
25
+
26
+ # Names of non-programming languages that we will still detect
27
+ #
28
+ # Returns an array
29
+ def self.detectable_markup
30
+ ["AsciiDoc", "CSS", "Creole", "Less", "Markdown", "MediaWiki", "Org", "RDoc", "Sass", "Textile", "reStructuredText"]
31
+ end
32
+
33
+ # Internal: Create a new Language object
34
+ #
35
+ # attributes - A hash of attributes
36
+ #
37
+ # Returns a Language object
38
+ def self.create(attributes = {})
39
+ language = new(attributes)
40
+
41
+ @languages << language
42
+
43
+ # All Language names should be unique. Raise if there is a duplicate.
44
+ if @name_index.key?(language.name)
45
+ raise ArgumentError, "Duplicate language name: #{language.name}"
46
+ end
47
+
48
+ # Language name index
49
+ @index[language.name] = @name_index[language.name] = language
50
+
51
+ language.aliases.each do |name|
52
+ # All Language aliases should be unique. Raise if there is a duplicate.
53
+ if @alias_index.key?(name)
54
+ raise ArgumentError, "Duplicate alias: #{name}"
55
+ end
56
+
57
+ @index[name] = @alias_index[name] = language
58
+ end
59
+
60
+ language.extensions.each do |extension|
61
+ if extension !~ /^\./
62
+ raise ArgumentError, "Extension is missing a '.': #{extension.inspect}"
63
+ end
64
+
65
+ @extension_index[extension] << language
66
+ end
67
+
68
+ if @primary_extension_index.key?(language.primary_extension)
69
+ raise ArgumentError, "Duplicate primary extension: #{language.primary_extension}"
70
+ end
71
+
72
+ @primary_extension_index[language.primary_extension] = language
73
+
74
+ language.filenames.each do |filename|
75
+ @filename_index[filename] << language
76
+ end
77
+
78
+ language
79
+ end
80
+
81
+ # Public: Detects the Language of the blob.
82
+ #
83
+ # name - String filename
84
+ # data - String blob data. A block also maybe passed in for lazy
85
+ # loading. This behavior is deprecated and you should always
86
+ # pass in a String.
87
+ # mode - Optional String mode (defaults to nil)
88
+ #
89
+ # Returns Language or nil.
90
+ def self.detect(name, data, mode = nil)
91
+ # A bit of an elegant hack. If the file is executable but extensionless,
92
+ # append a "magic" extension so it can be classified with other
93
+ # languages that have shebang scripts.
94
+ if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05
95
+ name += ".script!"
96
+ end
97
+
98
+ possible_languages = find_by_filename(name)
99
+
100
+ if possible_languages.length > 1
101
+ data = data.call() if data.respond_to?(:call)
102
+ if data.nil? || data == ""
103
+ nil
104
+ elsif result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
105
+ Language[result[0]]
106
+ end
107
+ else
108
+ possible_languages.first
109
+ end
110
+ end
111
+
112
+ # Public: Get all Languages
113
+ #
114
+ # Returns an Array of Languages
115
+ def self.all
116
+ @languages
117
+ end
118
+
119
+ # Public: Look up Language by its proper name.
120
+ #
121
+ # name - The String name of the Language
122
+ #
123
+ # Examples
124
+ #
125
+ # Language.find_by_name('Ruby')
126
+ # # => #<Language name="Ruby">
127
+ #
128
+ # Returns the Language or nil if none was found.
129
+ def self.find_by_name(name)
130
+ @name_index[name]
131
+ end
132
+
133
+ # Public: Look up Language by one of its aliases.
134
+ #
135
+ # name - A String alias of the Language
136
+ #
137
+ # Examples
138
+ #
139
+ # Language.find_by_alias('cpp')
140
+ # # => #<Language name="C++">
141
+ #
142
+ # Returns the Lexer or nil if none was found.
143
+ def self.find_by_alias(name)
144
+ @alias_index[name]
145
+ end
146
+
147
+ # Public: Look up Languages by filename.
148
+ #
149
+ # filename - The path String.
150
+ #
151
+ # Examples
152
+ #
153
+ # Language.find_by_filename('foo.rb')
154
+ # # => [#<Language name="Ruby">]
155
+ #
156
+ # Returns all matching Languages or [] if none were found.
157
+ def self.find_by_filename(filename)
158
+ basename, extname = File.basename(filename), File.extname(filename)
159
+ langs = [@primary_extension_index[extname]] +
160
+ @filename_index[basename] +
161
+ @extension_index[extname]
162
+ langs.compact.uniq
163
+ end
164
+
165
+ # Public: Look up Language by its name or lexer.
166
+ #
167
+ # name - The String name of the Language
168
+ #
169
+ # Examples
170
+ #
171
+ # Language['Ruby']
172
+ # # => #<Language name="Ruby">
173
+ #
174
+ # Language['ruby']
175
+ # # => #<Language name="Ruby">
176
+ #
177
+ # Returns the Language or nil if none was found.
178
+ def self.[](name)
179
+ @index[name]
180
+ end
181
+
182
+ # Public: A List of popular languages
183
+ #
184
+ # Popular languages are sorted to the top of language chooser
185
+ # dropdowns.
186
+ #
187
+ # This list is configured in "popular.yml".
188
+ #
189
+ # Returns an Array of Lexers.
190
+ def self.popular
191
+ @popular ||= all.select(&:popular?).sort_by { |lang| lang.name.downcase }
192
+ end
193
+
194
+ # Public: A List of non-popular languages
195
+ #
196
+ # Unpopular languages appear below popular ones in language
197
+ # chooser dropdowns.
198
+ #
199
+ # This list is created from all the languages not listed in "popular.yml".
200
+ #
201
+ # Returns an Array of Lexers.
202
+ def self.unpopular
203
+ @unpopular ||= all.select(&:unpopular?).sort_by { |lang| lang.name.downcase }
204
+ end
205
+
206
+ # Public: A List of languages with assigned colors.
207
+ #
208
+ # Returns an Array of Languages.
209
+ def self.colors
210
+ @colors ||= all.select(&:color).sort_by { |lang| lang.name.downcase }
211
+ end
212
+
213
+ # Public: A List of languages compatible with Ace.
214
+ #
215
+ # Returns an Array of Languages.
216
+ def self.ace_modes
217
+ @ace_modes ||= all.select(&:ace_mode).sort_by { |lang| lang.name.downcase }
218
+ end
219
+
220
+ # Internal: Initialize a new Language
221
+ #
222
+ # attributes - A hash of attributes
223
+ def initialize(attributes = {})
224
+ # @name is required
225
+ @name = attributes[:name] || raise(ArgumentError, "missing name")
226
+
227
+ # Set type
228
+ @type = attributes[:type] ? attributes[:type].to_sym : nil
229
+ if @type && !TYPES.include?(@type)
230
+ raise ArgumentError, "invalid type: #{@type}"
231
+ end
232
+
233
+ @color = attributes[:color]
234
+
235
+ # Set aliases
236
+ @aliases = [default_alias_name] + (attributes[:aliases] || [])
237
+
238
+ # Lookup Lexer object
239
+ @lexer = Pygments::Lexer.find_by_name(attributes[:lexer] || name) ||
240
+ raise(ArgumentError, "#{@name} is missing lexer")
241
+
242
+ @ace_mode = attributes[:ace_mode]
243
+ @wrap = attributes[:wrap] || false
244
+
245
+ # Set legacy search term
246
+ @search_term = attributes[:search_term] || default_alias_name
247
+
248
+ # Set extensions or default to [].
249
+ @extensions = attributes[:extensions] || []
250
+ @filenames = attributes[:filenames] || []
251
+
252
+ unless @primary_extension = attributes[:primary_extension]
253
+ raise ArgumentError, "#{@name} is missing primary extension"
254
+ end
255
+
256
+ # Prepend primary extension unless its already included
257
+ if primary_extension && !extensions.include?(primary_extension)
258
+ @extensions = [primary_extension] + extensions
259
+ end
260
+
261
+ # Set popular, and searchable flags
262
+ @popular = attributes.key?(:popular) ? attributes[:popular] : false
263
+ @searchable = attributes.key?(:searchable) ? attributes[:searchable] : true
264
+
265
+ # If group name is set, save the name so we can lazy load it later
266
+ if attributes[:group_name]
267
+ @group = nil
268
+ @group_name = attributes[:group_name]
269
+
270
+ # Otherwise we can set it to self now
271
+ else
272
+ @group = self
273
+ end
274
+ end
275
+
276
+ # Public: Get proper name
277
+ #
278
+ # Examples
279
+ #
280
+ # # => "Ruby"
281
+ # # => "Python"
282
+ # # => "Perl"
283
+ #
284
+ # Returns the name String
285
+ attr_reader :name
286
+
287
+ # Public: Get type.
288
+ #
289
+ # Returns a type Symbol or nil.
290
+ attr_reader :type
291
+
292
+ # Public: Get color.
293
+ #
294
+ # Returns a hex color String.
295
+ attr_reader :color
296
+
297
+ # Public: Get aliases
298
+ #
299
+ # Examples
300
+ #
301
+ # Language['C++'].aliases
302
+ # # => ["cpp"]
303
+ #
304
+ # Returns an Array of String names
305
+ attr_reader :aliases
306
+
307
+ # Deprecated: Get code search term
308
+ #
309
+ # Examples
310
+ #
311
+ # # => "ruby"
312
+ # # => "python"
313
+ # # => "perl"
314
+ #
315
+ # Returns the name String
316
+ attr_reader :search_term
317
+
318
+ # Public: Get Lexer
319
+ #
320
+ # Returns the Lexer
321
+ attr_reader :lexer
322
+
323
+ # Public: Get Ace mode
324
+ #
325
+ # Examples
326
+ #
327
+ # # => "text"
328
+ # # => "javascript"
329
+ # # => "c_cpp"
330
+ #
331
+ # Returns a String name or nil
332
+ attr_reader :ace_mode
333
+
334
+ # Public: Should language lines be wrapped
335
+ #
336
+ # Returns true or false
337
+ attr_reader :wrap
338
+
339
+ # Public: Get extensions
340
+ #
341
+ # Examples
342
+ #
343
+ # # => ['.rb', '.rake', ...]
344
+ #
345
+ # Returns the extensions Array
346
+ attr_reader :extensions
347
+
348
+ # Deprecated: Get primary extension
349
+ #
350
+ # Defaults to the first extension but can be overridden
351
+ # in the languages.yml.
352
+ #
353
+ # The primary extension can not be nil. Tests should verify this.
354
+ #
355
+ # This attribute is only used by app/helpers/gists_helper.rb for
356
+ # creating the language dropdown. It really should be using `name`
357
+ # instead. Would like to drop primary extension.
358
+ #
359
+ # Returns the extension String.
360
+ attr_reader :primary_extension
361
+
362
+ # Public: Get filenames
363
+ #
364
+ # Examples
365
+ #
366
+ # # => ['Rakefile', ...]
367
+ #
368
+ # Returns the extensions Array
369
+ attr_reader :filenames
370
+
371
+ # Public: Get URL escaped name.
372
+ #
373
+ # Examples
374
+ #
375
+ # "C%23"
376
+ # "C%2B%2B"
377
+ # "Common%20Lisp"
378
+ #
379
+ # Returns the escaped String.
380
+ def escaped_name
381
+ EscapeUtils.escape_url(name).gsub('+', '%20')
382
+ end
383
+
384
+ # Internal: Get default alias name
385
+ #
386
+ # Returns the alias name String
387
+ def default_alias_name
388
+ name.downcase.gsub(/\s/, '-')
389
+ end
390
+
391
+ # Public: Get Language group
392
+ #
393
+ # Returns a Language
394
+ def group
395
+ @group ||= Language.find_by_name(@group_name)
396
+ end
397
+
398
+ # Public: Is it popular?
399
+ #
400
+ # Returns true or false
401
+ def popular?
402
+ @popular
403
+ end
404
+
405
+ # Public: Is it not popular?
406
+ #
407
+ # Returns true or false
408
+ def unpopular?
409
+ !popular?
410
+ end
411
+
412
+ # Public: Is it searchable?
413
+ #
414
+ # Unsearchable languages won't by indexed by solr and won't show
415
+ # up in the code search dropdown.
416
+ #
417
+ # Returns true or false
418
+ def searchable?
419
+ @searchable
420
+ end
421
+
422
+ # Public: Highlight syntax of text
423
+ #
424
+ # text - String of code to be highlighted
425
+ # options - A Hash of options (defaults to {})
426
+ #
427
+ # Returns html String
428
+ def colorize(text, options = {})
429
+ lexer.highlight(text, options = {})
430
+ end
431
+
432
+ # Public: Return name as String representation
433
+ def to_s
434
+ name
435
+ end
436
+
437
+ def ==(other)
438
+ eql?(other)
439
+ end
440
+
441
+ def eql?(other)
442
+ equal?(other)
443
+ end
444
+
445
+ def hash
446
+ name.hash
447
+ end
448
+
449
+ def inspect
450
+ "#<#{self.class} name=#{name}>"
451
+ end
452
+ end
453
+
454
+ extensions = Samples::DATA['extnames']
455
+ filenames = Samples::DATA['filenames']
456
+ popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
457
+
458
+ YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
459
+ options['extensions'] ||= []
460
+ options['filenames'] ||= []
461
+
462
+ if extnames = extensions[name]
463
+ extnames.each do |extname|
464
+ if !options['extensions'].include?(extname)
465
+ options['extensions'] << extname
466
+ end
467
+ end
468
+ end
469
+
470
+ if fns = filenames[name]
471
+ fns.each do |filename|
472
+ if !options['filenames'].include?(filename)
473
+ options['filenames'] << filename
474
+ end
475
+ end
476
+ end
477
+
478
+ Language.create(
479
+ :name => name,
480
+ :color => options['color'],
481
+ :type => options['type'],
482
+ :aliases => options['aliases'],
483
+ :lexer => options['lexer'],
484
+ :ace_mode => options['ace_mode'],
485
+ :wrap => options['wrap'],
486
+ :group_name => options['group'],
487
+ :searchable => options.key?('searchable') ? options['searchable'] : true,
488
+ :search_term => options['search_term'],
489
+ :extensions => options['extensions'].sort,
490
+ :primary_extension => options['primary_extension'],
491
+ :filenames => options['filenames'],
492
+ :popular => popular.include?(name)
493
+ )
494
+ end
495
+ end