geothird-linguist 2.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,56 @@
1
+ require 'linguist/blob_helper'
2
+
3
+ module Linguist
4
+ # A FileBlob is a wrapper around a File object to make it quack
5
+ # like a Grit::Blob. It provides the basic interface: `name`,
6
+ # `data`, and `size`.
7
+ class FileBlob
8
+ include BlobHelper
9
+
10
+ # Public: Initialize a new FileBlob from a path
11
+ #
12
+ # path - A path String that exists on the file system.
13
+ # base_path - Optional base to relativize the path
14
+ #
15
+ # Returns a FileBlob.
16
+ def initialize(path, base_path = nil)
17
+ @path = path
18
+ @name = base_path ? path.sub("#{base_path}/", '') : path
19
+ end
20
+
21
+ # Public: Filename
22
+ #
23
+ # Examples
24
+ #
25
+ # FileBlob.new("/path/to/linguist/lib/linguist.rb").name
26
+ # # => "/path/to/linguist/lib/linguist.rb"
27
+ #
28
+ # FileBlob.new("/path/to/linguist/lib/linguist.rb",
29
+ # "/path/to/linguist").name
30
+ # # => "lib/linguist.rb"
31
+ #
32
+ # Returns a String
33
+ attr_reader :name
34
+
35
+ # Public: Read file permissions
36
+ #
37
+ # Returns a String like '100644'
38
+ def mode
39
+ File.stat(@path).mode.to_s(8)
40
+ end
41
+
42
+ # Public: Read file contents.
43
+ #
44
+ # Returns a String.
45
+ def data
46
+ File.read(@path)
47
+ end
48
+
49
+ # Public: Get byte size
50
+ #
51
+ # Returns an Integer.
52
+ def size
53
+ File.size(@path)
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,175 @@
1
+ module Linguist
2
+ class Generated
3
+ # Public: Is the blob a generated file?
4
+ #
5
+ # name - String filename
6
+ # data - String blob data. A block also maybe passed in for lazy
7
+ # loading. This behavior is deprecated and you should always
8
+ # pass in a String.
9
+ #
10
+ # Return true or false
11
+ def self.generated?(name, data)
12
+ new(name, data).generated?
13
+ end
14
+
15
+ # Internal: Initialize Generated instance
16
+ #
17
+ # name - String filename
18
+ # data - String blob data
19
+ def initialize(name, data)
20
+ @name = name
21
+ @extname = File.extname(name)
22
+ @_data = data
23
+ end
24
+
25
+ attr_reader :name, :extname
26
+
27
+ # Lazy load blob data if block was passed in.
28
+ #
29
+ # Awful, awful stuff happening here.
30
+ #
31
+ # Returns String data.
32
+ def data
33
+ @data ||= @_data.respond_to?(:call) ? @_data.call() : @_data
34
+ end
35
+
36
+ # Public: Get each line of data
37
+ #
38
+ # Returns an Array of lines
39
+ def lines
40
+ # TODO: data should be required to be a String, no nils
41
+ @lines ||= data ? data.split("\n", -1) : []
42
+ end
43
+
44
+ # Internal: Is the blob a generated file?
45
+ #
46
+ # Generated source code is supressed in diffs and is ignored by
47
+ # language statistics.
48
+ #
49
+ # Please add additional test coverage to
50
+ # `test/test_blob.rb#test_generated` if you make any changes.
51
+ #
52
+ # Return true or false
53
+ def generated?
54
+ name == 'Gemfile.lock' ||
55
+ minified_javascript? ||
56
+ compiled_coffeescript? ||
57
+ xcode_project_file? ||
58
+ generated_net_docfile? ||
59
+ generated_parser? ||
60
+ compiled_cython_file?
61
+ end
62
+
63
+ # Internal: Is the blob an XCode project file?
64
+ #
65
+ # Generated if the file extension is an XCode project
66
+ # file extension.
67
+ #
68
+ # Returns true of false.
69
+ def xcode_project_file?
70
+ ['.xib', '.nib', '.storyboard', '.pbxproj', '.xcworkspacedata', '.xcuserstate'].include?(extname)
71
+ end
72
+
73
+ # Internal: Is the blob minified JS?
74
+ #
75
+ # Consider JS minified if the average line length is
76
+ # greater then 100c.
77
+ #
78
+ # Returns true or false.
79
+ def minified_javascript?
80
+ return unless extname == '.js'
81
+ if lines.any?
82
+ (lines.inject(0) { |n, l| n += l.length } / lines.length) > 100
83
+ else
84
+ false
85
+ end
86
+ end
87
+
88
+ # Internal: Is the blob of JS generated by CoffeeScript?
89
+ #
90
+ # CoffeeScript is meant to output JS that would be difficult to
91
+ # tell if it was generated or not. Look for a number of patterns
92
+ # output by the CS compiler.
93
+ #
94
+ # Return true or false
95
+ def compiled_coffeescript?
96
+ return false unless extname == '.js'
97
+
98
+ # CoffeeScript generated by > 1.2 include a comment on the first line
99
+ if lines[0] =~ /^\/\/ Generated by /
100
+ return true
101
+ end
102
+
103
+ if lines[0] == '(function() {' && # First line is module closure opening
104
+ lines[-2] == '}).call(this);' && # Second to last line closes module closure
105
+ lines[-1] == '' # Last line is blank
106
+
107
+ score = 0
108
+
109
+ lines.each do |line|
110
+ if line =~ /var /
111
+ # Underscored temp vars are likely to be Coffee
112
+ score += 1 * line.gsub(/(_fn|_i|_len|_ref|_results)/).count
113
+
114
+ # bind and extend functions are very Coffee specific
115
+ score += 3 * line.gsub(/(__bind|__extends|__hasProp|__indexOf|__slice)/).count
116
+ end
117
+ end
118
+
119
+ # Require a score of 3. This is fairly arbitrary. Consider
120
+ # tweaking later.
121
+ score >= 3
122
+ else
123
+ false
124
+ end
125
+ end
126
+
127
+ # Internal: Is this a generated documentation file for a .NET assembly?
128
+ #
129
+ # .NET developers often check in the XML Intellisense file along with an
130
+ # assembly - however, these don't have a special extension, so we have to
131
+ # dig into the contents to determine if it's a docfile. Luckily, these files
132
+ # are extremely structured, so recognizing them is easy.
133
+ #
134
+ # Returns true or false
135
+ def generated_net_docfile?
136
+ return false unless extname.downcase == ".xml"
137
+ return false unless lines.count > 3
138
+
139
+ # .NET Docfiles always open with <doc> and their first tag is an
140
+ # <assembly> tag
141
+ return lines[1].include?("<doc>") &&
142
+ lines[2].include?("<assembly>") &&
143
+ lines[-2].include?("</doc>")
144
+ end
145
+
146
+ # Internal: Is the blob of JS a parser generated by PEG.js?
147
+ #
148
+ # PEG.js-generated parsers are not meant to be consumed by humans.
149
+ #
150
+ # Return true or false
151
+ def generated_parser?
152
+ return false unless extname == '.js'
153
+
154
+ # PEG.js-generated parsers include a comment near the top of the file
155
+ # that marks them as such.
156
+ if lines[0..4].join('') =~ /^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js/
157
+ return true
158
+ end
159
+
160
+ false
161
+ end
162
+
163
+ # Internal: Is this a compiled C/C++ file from Cython?
164
+ #
165
+ # Cython-compiled C/C++ files typically contain:
166
+ # /* Generated by Cython x.x.x on ... */
167
+ # on the first line.
168
+ #
169
+ # Return true or false
170
+ def compiled_cython_file?
171
+ return false unless ['.c', '.cpp'].include? extname
172
+ return lines[0].include?("Generated by Cython")
173
+ end
174
+ end
175
+ end
@@ -0,0 +1,481 @@
1
+ require 'escape_utils'
2
+ require 'pygments'
3
+ require 'yaml'
4
+
5
+ require 'linguist/classifier'
6
+ require 'linguist/samples'
7
+
8
+ module Linguist
9
+ # Language names that are recognizable by GitHub. Defined languages
10
+ # can be highlighted, searched and listed under the Top Languages page.
11
+ #
12
+ # Languages are defined in `lib/linguist/languages.yml`.
13
+ class Language
14
+ @languages = []
15
+ @index = {}
16
+ @name_index = {}
17
+ @alias_index = {}
18
+ @extension_index = Hash.new { |h,k| h[k] = [] }
19
+ @filename_index = Hash.new { |h,k| h[k] = [] }
20
+
21
+ # Valid Languages types
22
+ TYPES = [:data, :markup, :programming]
23
+
24
+ # Internal: Create a new Language object
25
+ #
26
+ # attributes - A hash of attributes
27
+ #
28
+ # Returns a Language object
29
+ def self.create(attributes = {})
30
+ language = new(attributes)
31
+
32
+ @languages << language
33
+
34
+ # All Language names should be unique. Raise if there is a duplicate.
35
+ if @name_index.key?(language.name)
36
+ raise ArgumentError, "Duplicate language name: #{language.name}"
37
+ end
38
+
39
+ # Language name index
40
+ @index[language.name] = @name_index[language.name] = language
41
+
42
+ language.aliases.each do |name|
43
+ # All Language aliases should be unique. Raise if there is a duplicate.
44
+ if @alias_index.key?(name)
45
+ raise ArgumentError, "Duplicate alias: #{name}"
46
+ end
47
+
48
+ @index[name] = @alias_index[name] = language
49
+ end
50
+
51
+ language.extensions.each do |extension|
52
+ if extension !~ /^\./
53
+ raise ArgumentError, "Extension is missing a '.': #{extension.inspect}"
54
+ end
55
+
56
+ @extension_index[extension] << language
57
+ end
58
+
59
+ language.filenames.each do |filename|
60
+ @filename_index[filename] << language
61
+ end
62
+
63
+ language
64
+ end
65
+
66
+ # Public: Detects the Language of the blob.
67
+ #
68
+ # name - String filename
69
+ # data - String blob data. A block also maybe passed in for lazy
70
+ # loading. This behavior is deprecated and you should always
71
+ # pass in a String.
72
+ # mode - Optional String mode (defaults to nil)
73
+ #
74
+ # Returns Language or nil.
75
+ def self.detect(name, data, mode = nil)
76
+ # A bit of an elegant hack. If the file is exectable but extensionless,
77
+ # append a "magic" extension so it can be classified with other
78
+ # languages that have shebang scripts.
79
+ if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05
80
+ name += ".script!"
81
+ end
82
+
83
+ possible_languages = find_by_filename(name)
84
+
85
+ if possible_languages.length > 1
86
+ data = data.call() if data.respond_to?(:call)
87
+ if data.nil? || data == ""
88
+ nil
89
+ elsif result = Classifier.classify(Samples::DATA, data, possible_languages.map(&:name)).first
90
+ Language[result[0]]
91
+ end
92
+ else
93
+ possible_languages.first
94
+ end
95
+ end
96
+
97
+ # Public: Get all Languages
98
+ #
99
+ # Returns an Array of Languages
100
+ def self.all
101
+ @languages
102
+ end
103
+
104
+ # Public: Look up Language by its proper name.
105
+ #
106
+ # name - The String name of the Language
107
+ #
108
+ # Examples
109
+ #
110
+ # Language.find_by_name('Ruby')
111
+ # # => #<Language name="Ruby">
112
+ #
113
+ # Returns the Language or nil if none was found.
114
+ def self.find_by_name(name)
115
+ @name_index[name]
116
+ end
117
+
118
+ # Public: Look up Language by one of its aliases.
119
+ #
120
+ # name - A String alias of the Language
121
+ #
122
+ # Examples
123
+ #
124
+ # Language.find_by_alias('cpp')
125
+ # # => #<Language name="C++">
126
+ #
127
+ # Returns the Lexer or nil if none was found.
128
+ def self.find_by_alias(name)
129
+ @alias_index[name]
130
+ end
131
+
132
+ # Public: Look up Languages by filename.
133
+ #
134
+ # filename - The path String.
135
+ #
136
+ # Examples
137
+ #
138
+ # Language.find_by_filename('foo.rb')
139
+ # # => [#<Language name="Ruby">]
140
+ #
141
+ # Returns all matching Languages or [] if none were found.
142
+ def self.find_by_filename(filename)
143
+ basename, extname = File.basename(filename), File.extname(filename)
144
+ @filename_index[basename] + @extension_index[extname]
145
+ end
146
+
147
+ # Public: Look up Language by its name or lexer.
148
+ #
149
+ # name - The String name of the Language
150
+ #
151
+ # Examples
152
+ #
153
+ # Language['Ruby']
154
+ # # => #<Language name="Ruby">
155
+ #
156
+ # Language['ruby']
157
+ # # => #<Language name="Ruby">
158
+ #
159
+ # Returns the Language or nil if none was found.
160
+ def self.[](name)
161
+ @index[name]
162
+ end
163
+
164
+ # Public: A List of popular languages
165
+ #
166
+ # Popular languages are sorted to the top of language chooser
167
+ # dropdowns.
168
+ #
169
+ # This list is configured in "popular.yml".
170
+ #
171
+ # Returns an Array of Lexers.
172
+ def self.popular
173
+ @popular ||= all.select(&:popular?).sort_by { |lang| lang.name.downcase }
174
+ end
175
+
176
+ # Public: A List of non-popular languages
177
+ #
178
+ # Unpopular languages appear below popular ones in language
179
+ # chooser dropdowns.
180
+ #
181
+ # This list is created from all the languages not listed in "popular.yml".
182
+ #
183
+ # Returns an Array of Lexers.
184
+ def self.unpopular
185
+ @unpopular ||= all.select(&:unpopular?).sort_by { |lang| lang.name.downcase }
186
+ end
187
+
188
+ # Public: A List of languages with assigned colors.
189
+ #
190
+ # Returns an Array of Languages.
191
+ def self.colors
192
+ @colors ||= all.select(&:color).sort_by { |lang| lang.name.downcase }
193
+ end
194
+
195
+ # Public: A List of languages compatible with Ace.
196
+ #
197
+ # Returns an Array of Languages.
198
+ def self.ace_modes
199
+ @ace_modes ||= all.select(&:ace_mode).sort_by { |lang| lang.name.downcase }
200
+ end
201
+
202
+ # Internal: Initialize a new Language
203
+ #
204
+ # attributes - A hash of attributes
205
+ def initialize(attributes = {})
206
+ # @name is required
207
+ @name = attributes[:name] || raise(ArgumentError, "missing name")
208
+
209
+ # Set type
210
+ @type = attributes[:type] ? attributes[:type].to_sym : nil
211
+ if @type && !TYPES.include?(@type)
212
+ raise ArgumentError, "invalid type: #{@type}"
213
+ end
214
+
215
+ @color = attributes[:color]
216
+
217
+ # Set aliases
218
+ @aliases = [default_alias_name] + (attributes[:aliases] || [])
219
+
220
+ # Lookup Lexer object
221
+ @lexer = Pygments::Lexer.find_by_name(attributes[:lexer] || name) ||
222
+ raise(ArgumentError, "#{@name} is missing lexer")
223
+
224
+ @ace_mode = attributes[:ace_mode]
225
+ @wrap = attributes[:wrap] || false
226
+
227
+ # Set legacy search term
228
+ @search_term = attributes[:search_term] || default_alias_name
229
+
230
+ # Set extensions or default to [].
231
+ @extensions = attributes[:extensions] || []
232
+ @filenames = attributes[:filenames] || []
233
+
234
+ unless @primary_extension = attributes[:primary_extension]
235
+ raise ArgumentError, "#{@name} is missing primary extension"
236
+ end
237
+
238
+ # Prepend primary extension unless its already included
239
+ if primary_extension && !extensions.include?(primary_extension)
240
+ @extensions = [primary_extension] + extensions
241
+ end
242
+
243
+ # Set popular, and searchable flags
244
+ @popular = attributes.key?(:popular) ? attributes[:popular] : false
245
+ @searchable = attributes.key?(:searchable) ? attributes[:searchable] : true
246
+
247
+ # If group name is set, save the name so we can lazy load it later
248
+ if attributes[:group_name]
249
+ @group = nil
250
+ @group_name = attributes[:group_name]
251
+
252
+ # Otherwise we can set it to self now
253
+ else
254
+ @group = self
255
+ end
256
+ end
257
+
258
+ # Public: Get proper name
259
+ #
260
+ # Examples
261
+ #
262
+ # # => "Ruby"
263
+ # # => "Python"
264
+ # # => "Perl"
265
+ #
266
+ # Returns the name String
267
+ attr_reader :name
268
+
269
+ # Public: Get type.
270
+ #
271
+ # Returns a type Symbol or nil.
272
+ attr_reader :type
273
+
274
+ # Public: Get color.
275
+ #
276
+ # Returns a hex color String.
277
+ attr_reader :color
278
+
279
+ # Public: Get aliases
280
+ #
281
+ # Examples
282
+ #
283
+ # Language['C++'].aliases
284
+ # # => ["cpp"]
285
+ #
286
+ # Returns an Array of String names
287
+ attr_reader :aliases
288
+
289
+ # Deprecated: Get code search term
290
+ #
291
+ # Examples
292
+ #
293
+ # # => "ruby"
294
+ # # => "python"
295
+ # # => "perl"
296
+ #
297
+ # Returns the name String
298
+ attr_reader :search_term
299
+
300
+ # Public: Get Lexer
301
+ #
302
+ # Returns the Lexer
303
+ attr_reader :lexer
304
+
305
+ # Public: Get Ace mode
306
+ #
307
+ # Examples
308
+ #
309
+ # # => "text"
310
+ # # => "javascript"
311
+ # # => "c_cpp"
312
+ #
313
+ # Returns a String name or nil
314
+ attr_reader :ace_mode
315
+
316
+ # Public: Should language lines be wrapped
317
+ #
318
+ # Returns true or false
319
+ attr_reader :wrap
320
+
321
+ # Public: Get extensions
322
+ #
323
+ # Examples
324
+ #
325
+ # # => ['.rb', '.rake', ...]
326
+ #
327
+ # Returns the extensions Array
328
+ attr_reader :extensions
329
+
330
+ # Deprecated: Get primary extension
331
+ #
332
+ # Defaults to the first extension but can be overriden
333
+ # in the languages.yml.
334
+ #
335
+ # The primary extension can not be nil. Tests should verify this.
336
+ #
337
+ # This attribute is only used by app/helpers/gists_helper.rb for
338
+ # creating the language dropdown. It really should be using `name`
339
+ # instead. Would like to drop primary extension.
340
+ #
341
+ # Returns the extension String.
342
+ attr_reader :primary_extension
343
+
344
+ # Public: Get filenames
345
+ #
346
+ # Examples
347
+ #
348
+ # # => ['Rakefile', ...]
349
+ #
350
+ # Returns the extensions Array
351
+ attr_reader :filenames
352
+
353
+ # Public: Get URL escaped name.
354
+ #
355
+ # Examples
356
+ #
357
+ # "C%23"
358
+ # "C%2B%2B"
359
+ # "Common%20Lisp"
360
+ #
361
+ # Returns the escaped String.
362
+ def escaped_name
363
+ EscapeUtils.escape_url(name).gsub('+', '%20')
364
+ end
365
+
366
+ # Internal: Get default alias name
367
+ #
368
+ # Returns the alias name String
369
+ def default_alias_name
370
+ name.downcase.gsub(/\s/, '-')
371
+ end
372
+
373
+ # Public: Get Language group
374
+ #
375
+ # Returns a Language
376
+ def group
377
+ @group ||= Language.find_by_name(@group_name)
378
+ end
379
+
380
+ # Public: Is it popular?
381
+ #
382
+ # Returns true or false
383
+ def popular?
384
+ @popular
385
+ end
386
+
387
+ # Public: Is it not popular?
388
+ #
389
+ # Returns true or false
390
+ def unpopular?
391
+ !popular?
392
+ end
393
+
394
+ # Public: Is it searchable?
395
+ #
396
+ # Unsearchable languages won't by indexed by solr and won't show
397
+ # up in the code search dropdown.
398
+ #
399
+ # Returns true or false
400
+ def searchable?
401
+ @searchable
402
+ end
403
+
404
+ # Public: Highlight syntax of text
405
+ #
406
+ # text - String of code to be highlighted
407
+ # options - A Hash of options (defaults to {})
408
+ #
409
+ # Returns html String
410
+ def colorize(text, options = {})
411
+ lexer.highlight(text, options = {})
412
+ end
413
+
414
+ # Public: Return name as String representation
415
+ def to_s
416
+ name
417
+ end
418
+
419
+ def ==(other)
420
+ eql?(other)
421
+ end
422
+
423
+ def eql?(other)
424
+ equal?(other)
425
+ end
426
+
427
+ def hash
428
+ name.hash
429
+ end
430
+
431
+ def inspect
432
+ "#<#{self.class} name=#{name}>"
433
+ end
434
+ end
435
+
436
+ extensions = Samples::DATA['extnames']
437
+ filenames = Samples::DATA['filenames']
438
+ popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
439
+
440
+ YAML.load_file(File.expand_path("../languages.yml", __FILE__)).each do |name, options|
441
+ options['extensions'] ||= []
442
+ options['filenames'] ||= []
443
+
444
+ if extnames = extensions[name]
445
+ extnames.each do |extname|
446
+ if !options['extensions'].include?(extname)
447
+ options['extensions'] << extname
448
+ else
449
+ warn "#{name} #{extname.inspect} is already defined in samples/. Remove from languages.yml."
450
+ end
451
+ end
452
+ end
453
+
454
+ if fns = filenames[name]
455
+ fns.each do |filename|
456
+ if !options['filenames'].include?(filename)
457
+ options['filenames'] << filename
458
+ else
459
+ warn "#{name} #{filename.inspect} is already defined in samples/. Remove from languages.yml."
460
+ end
461
+ end
462
+ end
463
+
464
+ Language.create(
465
+ :name => name,
466
+ :color => options['color'],
467
+ :type => options['type'],
468
+ :aliases => options['aliases'],
469
+ :lexer => options['lexer'],
470
+ :ace_mode => options['ace_mode'],
471
+ :wrap => options['wrap'],
472
+ :group_name => options['group'],
473
+ :searchable => options.key?('searchable') ? options['searchable'] : true,
474
+ :search_term => options['search_term'],
475
+ :extensions => options['extensions'].sort,
476
+ :primary_extension => options['primary_extension'],
477
+ :filenames => options['filenames'],
478
+ :popular => popular.include?(name)
479
+ )
480
+ end
481
+ end