geothird-linguist 2.6.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,56 @@
1
+ require 'linguist/blob_helper'
2
+
3
+ module Linguist
4
+ # A FileBlob is a wrapper around a File object to make it quack
5
+ # like a Grit::Blob. It provides the basic interface: `name`,
6
+ # `data`, and `size`.
7
+ class FileBlob
8
+ include BlobHelper
9
+
10
+ # Public: Initialize a new FileBlob from a path
11
+ #
12
+ # path - A path String that exists on the file system.
13
+ # base_path - Optional base to relativize the path
14
+ #
15
+ # Returns a FileBlob.
16
+ def initialize(path, base_path = nil)
17
+ @path = path
18
+ @name = base_path ? path.sub("#{base_path}/", '') : path
19
+ end
20
+
21
+ # Public: Filename
22
+ #
23
+ # Examples
24
+ #
25
+ # FileBlob.new("/path/to/linguist/lib/linguist.rb").name
26
+ # # => "/path/to/linguist/lib/linguist.rb"
27
+ #
28
+ # FileBlob.new("/path/to/linguist/lib/linguist.rb",
29
+ # "/path/to/linguist").name
30
+ # # => "lib/linguist.rb"
31
+ #
32
+ # Returns a String
33
+ attr_reader :name
34
+
35
+ # Public: Read file permissions
36
+ #
37
+ # Returns a String like '100644'
38
+ def mode
39
+ File.stat(@path).mode.to_s(8)
40
+ end
41
+
42
+ # Public: Read file contents.
43
+ #
44
+ # Returns a String.
45
+ def data
46
+ File.read(@path)
47
+ end
48
+
49
+ # Public: Get byte size
50
+ #
51
+ # Returns an Integer.
52
+ def size
53
+ File.size(@path)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,175 @@
1
+ module Linguist
2
+ class Generated
3
+ # Public: Is the blob a generated file?
4
+ #
5
+ # name - String filename
6
+ # data - String blob data. A block also maybe passed in for lazy
7
+ # loading. This behavior is deprecated and you should always
8
+ # pass in a String.
9
+ #
10
+ # Return true or false
11
+ def self.generated?(name, data)
12
+ new(name, data).generated?
13
+ end
14
+
15
+ # Internal: Initialize Generated instance
16
+ #
17
+ # name - String filename
18
+ # data - String blob data
19
+ def initialize(name, data)
20
+ @name = name
21
+ @extname = File.extname(name)
22
+ @_data = data
23
+ end
24
+
25
+ attr_reader :name, :extname
26
+
27
+ # Lazy load blob data if block was passed in.
28
+ #
29
+ # Awful, awful stuff happening here.
30
+ #
31
+ # Returns String data.
32
+ def data
33
+ @data ||= @_data.respond_to?(:call) ? @_data.call() : @_data
34
+ end
35
+
36
+ # Public: Get each line of data
37
+ #
38
+ # Returns an Array of lines
39
+ def lines
40
+ # TODO: data should be required to be a String, no nils
41
+ @lines ||= data ? data.split("\n", -1) : []
42
+ end
43
+
44
+ # Internal: Is the blob a generated file?
45
+ #
46
+ # Generated source code is supressed in diffs and is ignored by
47
+ # language statistics.
48
+ #
49
+ # Please add additional test coverage to
50
+ # `test/test_blob.rb#test_generated` if you make any changes.
51
+ #
52
+ # Return true or false
53
+ def generated?
54
+ name == 'Gemfile.lock' ||
55
+ minified_javascript? ||
56
+ compiled_coffeescript? ||
57
+ xcode_project_file? ||
58
+ generated_net_docfile? ||
59
+ generated_parser? ||
60
+ compiled_cython_file?
61
+ end
62
+
63
+ # Internal: Is the blob an XCode project file?
64
+ #
65
+ # Generated if the file extension is an XCode project
66
+ # file extension.
67
+ #
68
+ # Returns true of false.
69
+ def xcode_project_file?
70
+ ['.xib', '.nib', '.storyboard', '.pbxproj', '.xcworkspacedata', '.xcuserstate'].include?(extname)
71
+ end
72
+
73
+ # Internal: Is the blob minified JS?
74
+ #
75
+ # Consider JS minified if the average line length is
76
+ # greater then 100c.
77
+ #
78
+ # Returns true or false.
79
+ def minified_javascript?
80
+ return unless extname == '.js'
81
+ if lines.any?
82
+ (lines.inject(0) { |n, l| n += l.length } / lines.length) > 100
83
+ else
84
+ false
85
+ end
86
+ end
87
+
88
+ # Internal: Is the blob of JS generated by CoffeeScript?
89
+ #
90
+ # CoffeeScript is meant to output JS that would be difficult to
91
+ # tell if it was generated or not. Look for a number of patterns
92
+ # output by the CS compiler.
93
+ #
94
+ # Return true or false
95
+ def compiled_coffeescript?
96
+ return false unless extname == '.js'
97
+
98
+ # CoffeeScript generated by > 1.2 include a comment on the first line
99
+ if lines[0] =~ /^\/\/ Generated by /
100
+ return true
101
+ end
102
+
103
+ if lines[0] == '(function() {' && # First line is module closure opening
104
+ lines[-2] == '}).call(this);' && # Second to last line closes module closure
105
+ lines[-1] == '' # Last line is blank
106
+
107
+ score = 0
108
+
109
+ lines.each do |line|
110
+ if line =~ /var /
111
+ # Underscored temp vars are likely to be Coffee
112
+ score += 1 * line.gsub(/(_fn|_i|_len|_ref|_results)/).count
113
+
114
+ # bind and extend functions are very Coffee specific
115
+ score += 3 * line.gsub(/(__bind|__extends|__hasProp|__indexOf|__slice)/).count
116
+ end
117
+ end
118
+
119
+ # Require a score of 3. This is fairly arbitrary. Consider
120
+ # tweaking later.
121
+ score >= 3
122
+ else
123
+ false
124
+ end
125
+ end
126
+
127
+ # Internal: Is this a generated documentation file for a .NET assembly?
128
+ #
129
+ # .NET developers often check in the XML Intellisense file along with an
130
+ # assembly - however, these don't have a special extension, so we have to
131
+ # dig into the contents to determine if it's a docfile. Luckily, these files
132
+ # are extremely structured, so recognizing them is easy.
133
+ #
134
+ # Returns true or false
135
+ def generated_net_docfile?
136
+ return false unless extname.downcase == ".xml"
137
+ return false unless lines.count > 3
138
+
139
+ # .NET Docfiles always open with <doc> and their first tag is an
140
+ # <assembly> tag
141
+ return lines[1].include?("<doc>") &&
142
+ lines[2].include?("<assembly>") &&
143
+ lines[-2].include?("</doc>")
144
+ end
145
+
146
+ # Internal: Is the blob of JS a parser generated by PEG.js?
147
+ #
148
+ # PEG.js-generated parsers are not meant to be consumed by humans.
149
+ #
150
+ # Return true or false
151
+ def generated_parser?
152
+ return false unless extname == '.js'
153
+
154
+ # PEG.js-generated parsers include a comment near the top of the file
155
+ # that marks them as such.
156
+ if lines[0..4].join('') =~ /^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js/
157
+ return true
158
+ end
159
+
160
+ false
161
+ end
162
+
163
+ # Internal: Is this a compiled C/C++ file from Cython?
164
+ #
165
+ # Cython-compiled C/C++ files typically contain:
166
+ # /* Generated by Cython x.x.x on ... */
167
+ # on the first line.
168
+ #
169
+ # Return true or false
170
+ def compiled_cython_file?
171
+ return false unless ['.c', '.cpp'].include? extname
172
+ return lines[0].include?("Generated by Cython")
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,481 @@
1
+ require 'escape_utils'
2
+ require 'pygments'
3
+ require 'yaml'
4
+
5
+ require 'linguist/classifier'
6
+ require 'linguist/samples'
7
+
8
+ module Linguist
9
+ # Language names that are recognizable by GitHub. Defined languages
10
+ # can be highlighted, searched and listed under the Top Languages page.
11
+ #
12
+ # Languages are defined in `lib/linguist/languages.yml`.
13
+ class Language
14
+ @languages = []
15
+ @index = {}
16
+ @name_index = {}
17
+ @alias_index = {}
18
+ @extension_index = Hash.new { |h,k| h[k] = [] }
19
+ @filename_index = Hash.new { |h,k| h[k] = [] }
20
+
21
+ # Valid Languages types
22
+ TYPES = [:data, :markup, :programming]
23
+
24
+ # Internal: Create a new Language object
25
+ #
26
+ # attributes - A hash of attributes
27
+ #
28
+ # Returns a Language object
29
+ def self.create(attributes = {})
30
+ language = new(attributes)
31
+
32
+ @languages << language
33
+
34
+ # All Language names should be unique. Raise if there is a duplicate.
35
+ if @name_index.key?(language.name)
36
+ raise ArgumentError, "Duplicate language name: #{language.name}"
37
+ end
38
+
39
+ # Language name index
40
+ @index[language.name] = @name_index[language.name] = language
41
+
42
+ language.aliases.each do |name|
43
+ # All Language aliases should be unique. Raise if there is a duplicate.
44
+ if @alias_index.key?(name)
45
+ raise ArgumentError, "Duplicate alias: #{name}"
46
+ end
47
+
48
+ @index[name] = @alias_index[name] = language
49
+ end
50
+
51
+ language.extensions.each do |extension|
52
+ if extension !~ /^\./
53
+ raise ArgumentError, "Extension is missing a '.': #{extension.inspect}"
54
+ end
55
+
56
+ @extension_index[extension] << language
57
+ end
58
+
59
+ language.filenames.each do |filename|
60
+ @filename_index[filename] << language
61
+ end
62
+
63
+ language
64
+ end
65
+
66
+ # Public: Detects the Language of the blob.
67
+ #
68
+ # name - String filename
69
+ # data - String blob data. A block also maybe passed in for lazy
70
+ # loading. This behavior is deprecated and you should always
71
+ # pass in a String.
72
+ # mode - Optional String mode (defaults to nil)
73
+ #
74
+ # Returns Language or nil.
75
+ def self.detect(name, data, mode = nil)
76
+ # A bit of an elegant hack. If the file is exectable but extensionless,
77
+ # append a "magic" extension so it can be classified with other
78
+ # languages that have shebang scripts.
79
+ if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05
80
+ name += ".script!"
81
+ end
82
+
83
+ possible_languages = find_by_filename(name)
84
+
85
+ if possible_languages.length > 1
86
+ data = data.call() if data.respond_to?(:call)
87
+ if data.nil? || data == ""
88
+ nil
89
+ elsif result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
90
+ Language[result[0]]
91
+ end
92
+ else
93
+ possible_languages.first
94
+ end
95
+ end
96
+
97
+ # Public: Get all Languages
98
+ #
99
+ # Returns an Array of Languages
100
+ def self.all
101
+ @languages
102
+ end
103
+
104
+ # Public: Look up Language by its proper name.
105
+ #
106
+ # name - The String name of the Language
107
+ #
108
+ # Examples
109
+ #
110
+ # Language.find_by_name('Ruby')
111
+ # # => #<Language name="Ruby">
112
+ #
113
+ # Returns the Language or nil if none was found.
114
+ def self.find_by_name(name)
115
+ @name_index[name]
116
+ end
117
+
118
+ # Public: Look up Language by one of its aliases.
119
+ #
120
+ # name - A String alias of the Language
121
+ #
122
+ # Examples
123
+ #
124
+ # Language.find_by_alias('cpp')
125
+ # # => #<Language name="C++">
126
+ #
127
+ # Returns the Lexer or nil if none was found.
128
+ def self.find_by_alias(name)
129
+ @alias_index[name]
130
+ end
131
+
132
+ # Public: Look up Languages by filename.
133
+ #
134
+ # filename - The path String.
135
+ #
136
+ # Examples
137
+ #
138
+ # Language.find_by_filename('foo.rb')
139
+ # # => [#<Language name="Ruby">]
140
+ #
141
+ # Returns all matching Languages or [] if none were found.
142
+ def self.find_by_filename(filename)
143
+ basename, extname = File.basename(filename), File.extname(filename)
144
+ @filename_index[basename] + @extension_index[extname]
145
+ end
146
+
147
+ # Public: Look up Language by its name or lexer.
148
+ #
149
+ # name - The String name of the Language
150
+ #
151
+ # Examples
152
+ #
153
+ # Language['Ruby']
154
+ # # => #<Language name="Ruby">
155
+ #
156
+ # Language['ruby']
157
+ # # => #<Language name="Ruby">
158
+ #
159
+ # Returns the Language or nil if none was found.
160
+ def self.[](name)
161
+ @index[name]
162
+ end
163
+
164
+ # Public: A List of popular languages
165
+ #
166
+ # Popular languages are sorted to the top of language chooser
167
+ # dropdowns.
168
+ #
169
+ # This list is configured in "popular.yml".
170
+ #
171
+ # Returns an Array of Lexers.
172
+ def self.popular
173
+ @popular ||= all.select(&:popular?).sort_by { |lang| lang.name.downcase }
174
+ end
175
+
176
+ # Public: A List of non-popular languages
177
+ #
178
+ # Unpopular languages appear below popular ones in language
179
+ # chooser dropdowns.
180
+ #
181
+ # This list is created from all the languages not listed in "popular.yml".
182
+ #
183
+ # Returns an Array of Lexers.
184
+ def self.unpopular
185
+ @unpopular ||= all.select(&:unpopular?).sort_by { |lang| lang.name.downcase }
186
+ end
187
+
188
+ # Public: A List of languages with assigned colors.
189
+ #
190
+ # Returns an Array of Languages.
191
+ def self.colors
192
+ @colors ||= all.select(&:color).sort_by { |lang| lang.name.downcase }
193
+ end
194
+
195
+ # Public: A List of languages compatible with Ace.
196
+ #
197
+ # Returns an Array of Languages.
198
+ def self.ace_modes
199
+ @ace_modes ||= all.select(&:ace_mode).sort_by { |lang| lang.name.downcase }
200
+ end
201
+
202
+ # Internal: Initialize a new Language
203
+ #
204
+ # attributes - A hash of attributes
205
+ def initialize(attributes = {})
206
+ # @name is required
207
+ @name = attributes[:name] || raise(ArgumentError, "missing name")
208
+
209
+ # Set type
210
+ @type = attributes[:type] ? attributes[:type].to_sym : nil
211
+ if @type && !TYPES.include?(@type)
212
+ raise ArgumentError, "invalid type: #{@type}"
213
+ end
214
+
215
+ @color = attributes[:color]
216
+
217
+ # Set aliases
218
+ @aliases = [default_alias_name] + (attributes[:aliases] || [])
219
+
220
+ # Lookup Lexer object
221
+ @lexer = Pygments::Lexer.find_by_name(attributes[:lexer] || name) ||
222
+ raise(ArgumentError, "#{@name} is missing lexer")
223
+
224
+ @ace_mode = attributes[:ace_mode]
225
+ @wrap = attributes[:wrap] || false
226
+
227
+ # Set legacy search term
228
+ @search_term = attributes[:search_term] || default_alias_name
229
+
230
+ # Set extensions or default to [].
231
+ @extensions = attributes[:extensions] || []
232
+ @filenames = attributes[:filenames] || []
233
+
234
+ unless @primary_extension = attributes[:primary_extension]
235
+ raise ArgumentError, "#{@name} is missing primary extension"
236
+ end
237
+
238
+ # Prepend primary extension unless its already included
239
+ if primary_extension && !extensions.include?(primary_extension)
240
+ @extensions = [primary_extension] + extensions
241
+ end
242
+
243
+ # Set popular, and searchable flags
244
+ @popular = attributes.key?(:popular) ? attributes[:popular] : false
245
+ @searchable = attributes.key?(:searchable) ? attributes[:searchable] : true
246
+
247
+ # If group name is set, save the name so we can lazy load it later
248
+ if attributes[:group_name]
249
+ @group = nil
250
+ @group_name = attributes[:group_name]
251
+
252
+ # Otherwise we can set it to self now
253
+ else
254
+ @group = self
255
+ end
256
+ end
257
+
258
+ # Public: Get proper name
259
+ #
260
+ # Examples
261
+ #
262
+ # # => "Ruby"
263
+ # # => "Python"
264
+ # # => "Perl"
265
+ #
266
+ # Returns the name String
267
+ attr_reader :name
268
+
269
+ # Public: Get type.
270
+ #
271
+ # Returns a type Symbol or nil.
272
+ attr_reader :type
273
+
274
+ # Public: Get color.
275
+ #
276
+ # Returns a hex color String.
277
+ attr_reader :color
278
+
279
+ # Public: Get aliases
280
+ #
281
+ # Examples
282
+ #
283
+ # Language['C++'].aliases
284
+ # # => ["cpp"]
285
+ #
286
+ # Returns an Array of String names
287
+ attr_reader :aliases
288
+
289
+ # Deprecated: Get code search term
290
+ #
291
+ # Examples
292
+ #
293
+ # # => "ruby"
294
+ # # => "python"
295
+ # # => "perl"
296
+ #
297
+ # Returns the name String
298
+ attr_reader :search_term
299
+
300
+ # Public: Get Lexer
301
+ #
302
+ # Returns the Lexer
303
+ attr_reader :lexer
304
+
305
+ # Public: Get Ace mode
306
+ #
307
+ # Examples
308
+ #
309
+ # # => "text"
310
+ # # => "javascript"
311
+ # # => "c_cpp"
312
+ #
313
+ # Returns a String name or nil
314
+ attr_reader :ace_mode
315
+
316
+ # Public: Should language lines be wrapped
317
+ #
318
+ # Returns true or false
319
+ attr_reader :wrap
320
+
321
+ # Public: Get extensions
322
+ #
323
+ # Examples
324
+ #
325
+ # # => ['.rb', '.rake', ...]
326
+ #
327
+ # Returns the extensions Array
328
+ attr_reader :extensions
329
+
330
+ # Deprecated: Get primary extension
331
+ #
332
+ # Defaults to the first extension but can be overriden
333
+ # in the languages.yml.
334
+ #
335
+ # The primary extension can not be nil. Tests should verify this.
336
+ #
337
+ # This attribute is only used by app/helpers/gists_helper.rb for
338
+ # creating the language dropdown. It really should be using `name`
339
+ # instead. Would like to drop primary extension.
340
+ #
341
+ # Returns the extension String.
342
+ attr_reader :primary_extension
343
+
344
+ # Public: Get filenames
345
+ #
346
+ # Examples
347
+ #
348
+ # # => ['Rakefile', ...]
349
+ #
350
+ # Returns the extensions Array
351
+ attr_reader :filenames
352
+
353
+ # Public: Get URL escaped name.
354
+ #
355
+ # Examples
356
+ #
357
+ # "C%23"
358
+ # "C%2B%2B"
359
+ # "Common%20Lisp"
360
+ #
361
+ # Returns the escaped String.
362
+ def escaped_name
363
+ EscapeUtils.escape_url(name).gsub('+', '%20')
364
+ end
365
+
366
+ # Internal: Get default alias name
367
+ #
368
+ # Returns the alias name String
369
+ def default_alias_name
370
+ name.downcase.gsub(/\s/, '-')
371
+ end
372
+
373
+ # Public: Get Language group
374
+ #
375
+ # Returns a Language
376
+ def group
377
+ @group ||= Language.find_by_name(@group_name)
378
+ end
379
+
380
+ # Public: Is it popular?
381
+ #
382
+ # Returns true or false
383
+ def popular?
384
+ @popular
385
+ end
386
+
387
+ # Public: Is it not popular?
388
+ #
389
+ # Returns true or false
390
+ def unpopular?
391
+ !popular?
392
+ end
393
+
394
+ # Public: Is it searchable?
395
+ #
396
+ # Unsearchable languages won't by indexed by solr and won't show
397
+ # up in the code search dropdown.
398
+ #
399
+ # Returns true or false
400
+ def searchable?
401
+ @searchable
402
+ end
403
+
404
+ # Public: Highlight syntax of text
405
+ #
406
+ # text - String of code to be highlighted
407
+ # options - A Hash of options (defaults to {})
408
+ #
409
+ # Returns html String
410
+ def colorize(text, options = {})
411
+ lexer.highlight(text, options = {})
412
+ end
413
+
414
+ # Public: Return name as String representation
415
+ def to_s
416
+ name
417
+ end
418
+
419
+ def ==(other)
420
+ eql?(other)
421
+ end
422
+
423
+ def eql?(other)
424
+ equal?(other)
425
+ end
426
+
427
+ def hash
428
+ name.hash
429
+ end
430
+
431
+ def inspect
432
+ "#<#{self.class} name=#{name}>"
433
+ end
434
+ end
435
+
436
+ extensions = Samples::DATA['extnames']
437
+ filenames = Samples::DATA['filenames']
438
+ popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
439
+
440
+ YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
441
+ options['extensions'] ||= []
442
+ options['filenames'] ||= []
443
+
444
+ if extnames = extensions[name]
445
+ extnames.each do |extname|
446
+ if !options['extensions'].include?(extname)
447
+ options['extensions'] << extname
448
+ else
449
+ warn "#{name} #{extname.inspect} is already defined in samples/. Remove from languages.yml."
450
+ end
451
+ end
452
+ end
453
+
454
+ if fns = filenames[name]
455
+ fns.each do |filename|
456
+ if !options['filenames'].include?(filename)
457
+ options['filenames'] << filename
458
+ else
459
+ warn "#{name} #{filename.inspect} is already defined in samples/. Remove from languages.yml."
460
+ end
461
+ end
462
+ end
463
+
464
+ Language.create(
465
+ :name => name,
466
+ :color => options['color'],
467
+ :type => options['type'],
468
+ :aliases => options['aliases'],
469
+ :lexer => options['lexer'],
470
+ :ace_mode => options['ace_mode'],
471
+ :wrap => options['wrap'],
472
+ :group_name => options['group'],
473
+ :searchable => options.key?('searchable') ? options['searchable'] : true,
474
+ :search_term => options['search_term'],
475
+ :extensions => options['extensions'].sort,
476
+ :primary_extension => options['primary_extension'],
477
+ :filenames => options['filenames'],
478
+ :popular => popular.include?(name)
479
+ )
480
+ end
481
+ end