tongue 0.2.10.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/tongue +46 -0
- data/lib/linguist.rb +6 -0
- data/lib/linguist/blob_helper.rb +333 -0
- data/lib/linguist/classifier.rb +171 -0
- data/lib/linguist/file_blob.rb +58 -0
- data/lib/linguist/generated.rb +241 -0
- data/lib/linguist/heuristics.rb +38 -0
- data/lib/linguist/language.rb +578 -0
- data/lib/linguist/languages.yml +1901 -0
- data/lib/linguist/md5.rb +38 -0
- data/lib/linguist/popular.yml +29 -0
- data/lib/linguist/repository.rb +95 -0
- data/lib/linguist/samples.json +47115 -0
- data/lib/linguist/samples.rb +149 -0
- data/lib/linguist/tokenizer.rb +198 -0
- data/lib/linguist/vendor.yml +167 -0
- metadata +143 -0
@@ -0,0 +1,58 @@
|
|
1
|
+
require 'linguist/blob_helper'
|
2
|
+
|
3
|
+
module Linguist
|
4
|
+
# A FileBlob is a wrapper around a File object to make it quack
|
5
|
+
# like a Grit::Blob. It provides the basic interface: `name`,
|
6
|
+
# `data`, and `size`.
|
7
|
+
class FileBlob
|
8
|
+
include BlobHelper
|
9
|
+
|
10
|
+
# Public: Initialize a new FileBlob from a path
|
11
|
+
#
|
12
|
+
# path - A path String that exists on the file system.
|
13
|
+
# base_path - Optional base to relativize the path
|
14
|
+
#
|
15
|
+
# Returns a FileBlob.
|
16
|
+
def initialize(path, base_path = nil)
|
17
|
+
@path = path
|
18
|
+
@name = base_path ? path.sub("#{base_path}/", '') : path
|
19
|
+
end
|
20
|
+
|
21
|
+
# Public: Filename
|
22
|
+
#
|
23
|
+
# Examples
|
24
|
+
#
|
25
|
+
# FileBlob.new("/path/to/linguist/lib/linguist.rb").name
|
26
|
+
# # => "/path/to/linguist/lib/linguist.rb"
|
27
|
+
#
|
28
|
+
# FileBlob.new("/path/to/linguist/lib/linguist.rb",
|
29
|
+
# "/path/to/linguist").name
|
30
|
+
# # => "lib/linguist.rb"
|
31
|
+
#
|
32
|
+
# Returns a String
|
33
|
+
attr_reader :name
|
34
|
+
|
35
|
+
# Public: Read file permissions
|
36
|
+
#
|
37
|
+
# Returns a String like '100644'
|
38
|
+
def mode
|
39
|
+
File.stat(@path).mode.to_s(8)
|
40
|
+
end
|
41
|
+
|
42
|
+
# Public: Read file contents.
|
43
|
+
#
|
44
|
+
# Returns a String.
|
45
|
+
def data
|
46
|
+
data = File.read(@path).encode!('UTF-8', :invalid => :replace,
|
47
|
+
:undefined => :replace).byteslice(0..3072)
|
48
|
+
data = data.valid_encoding? ? data : nil; return data
|
49
|
+
end
|
50
|
+
|
51
|
+
# Public: Get byte size
|
52
|
+
#
|
53
|
+
# Returns an Integer.
|
54
|
+
def size
|
55
|
+
File.size(@path)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,241 @@
|
|
1
|
+
module Linguist
|
2
|
+
class Generated
|
3
|
+
# Public: Is the blob a generated file?
|
4
|
+
#
|
5
|
+
# name - String filename
|
6
|
+
# data - String blob data. A block also maybe passed in for lazy
|
7
|
+
# loading. This behavior is deprecated and you should always
|
8
|
+
# pass in a String.
|
9
|
+
#
|
10
|
+
# Return true or false
|
11
|
+
def self.generated?(name, data)
|
12
|
+
new(name, data).generated?
|
13
|
+
end
|
14
|
+
|
15
|
+
# Internal: Initialize Generated instance
|
16
|
+
#
|
17
|
+
# name - String filename
|
18
|
+
# data - String blob data
|
19
|
+
def initialize(name, data)
|
20
|
+
@name = name
|
21
|
+
@extname = File.extname(name)
|
22
|
+
@_data = data
|
23
|
+
end
|
24
|
+
|
25
|
+
attr_reader :name, :extname
|
26
|
+
|
27
|
+
# Lazy load blob data if block was passed in.
|
28
|
+
#
|
29
|
+
# Awful, awful stuff happening here.
|
30
|
+
#
|
31
|
+
# Returns String data.
|
32
|
+
def data
|
33
|
+
@data ||= @_data.respond_to?(:call) ? @_data.call() : @_data
|
34
|
+
end
|
35
|
+
|
36
|
+
# Public: Get each line of data
|
37
|
+
#
|
38
|
+
# Returns an Array of lines
|
39
|
+
def lines
|
40
|
+
# TODO: data should be required to be a String, no nils
|
41
|
+
@lines ||= data ? data.split("\n", -1) : []
|
42
|
+
end
|
43
|
+
|
44
|
+
# Internal: Is the blob a generated file?
|
45
|
+
#
|
46
|
+
# Generated source code is suppressed in diffs and is ignored by
|
47
|
+
# language statistics.
|
48
|
+
#
|
49
|
+
# Please add additional test coverage to
|
50
|
+
# `test/test_blob.rb#test_generated` if you make any changes.
|
51
|
+
#
|
52
|
+
# Return true or false
|
53
|
+
def generated?
|
54
|
+
name == 'Gemfile.lock' ||
|
55
|
+
minified_files? ||
|
56
|
+
compiled_coffeescript? ||
|
57
|
+
xcode_project_file? ||
|
58
|
+
generated_parser? ||
|
59
|
+
generated_net_docfile? ||
|
60
|
+
generated_net_designer_file? ||
|
61
|
+
generated_postscript? ||
|
62
|
+
generated_protocol_buffer? ||
|
63
|
+
generated_jni_header? ||
|
64
|
+
composer_lock? ||
|
65
|
+
node_modules?
|
66
|
+
end
|
67
|
+
|
68
|
+
# Internal: Is the blob an XCode project file?
|
69
|
+
#
|
70
|
+
# Generated if the file extension is an XCode project
|
71
|
+
# file extension.
|
72
|
+
#
|
73
|
+
# Returns true of false.
|
74
|
+
def xcode_project_file?
|
75
|
+
['.xib', '.nib', '.storyboard', '.pbxproj', '.xcworkspacedata', '.xcuserstate'].include?(extname)
|
76
|
+
end
|
77
|
+
|
78
|
+
# Internal: Is the blob minified files?
|
79
|
+
#
|
80
|
+
# Consider a file minified if the average line length is
|
81
|
+
# greater then 110c.
|
82
|
+
#
|
83
|
+
# Currently, only JS and CSS files are detected by this method.
|
84
|
+
#
|
85
|
+
# Returns true or false.
|
86
|
+
def minified_files?
|
87
|
+
return unless ['.js', '.css'].include? extname
|
88
|
+
if lines.any?
|
89
|
+
(lines.inject(0) { |n, l| n += l.length } / lines.length) > 110
|
90
|
+
else
|
91
|
+
false
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
# Internal: Is the blob of JS generated by CoffeeScript?
|
96
|
+
#
|
97
|
+
# CoffeeScript is meant to output JS that would be difficult to
|
98
|
+
# tell if it was generated or not. Look for a number of patterns
|
99
|
+
# output by the CS compiler.
|
100
|
+
#
|
101
|
+
# Return true or false
|
102
|
+
def compiled_coffeescript?
|
103
|
+
return false unless extname == '.js'
|
104
|
+
|
105
|
+
# CoffeeScript generated by > 1.2 include a comment on the first line
|
106
|
+
if lines[0] =~ /^\/\/ Generated by /
|
107
|
+
return true
|
108
|
+
end
|
109
|
+
|
110
|
+
if lines[0] == '(function() {' && # First line is module closure opening
|
111
|
+
lines[-2] == '}).call(this);' && # Second to last line closes module closure
|
112
|
+
lines[-1] == '' # Last line is blank
|
113
|
+
|
114
|
+
score = 0
|
115
|
+
|
116
|
+
lines.each do |line|
|
117
|
+
if line =~ /var /
|
118
|
+
# Underscored temp vars are likely to be Coffee
|
119
|
+
score += 1 * line.gsub(/(_fn|_i|_len|_ref|_results)/).count
|
120
|
+
|
121
|
+
# bind and extend functions are very Coffee specific
|
122
|
+
score += 3 * line.gsub(/(__bind|__extends|__hasProp|__indexOf|__slice)/).count
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# Require a score of 3. This is fairly arbitrary. Consider
|
127
|
+
# tweaking later.
|
128
|
+
score >= 3
|
129
|
+
else
|
130
|
+
false
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# Internal: Is this a generated documentation file for a .NET assembly?
|
135
|
+
#
|
136
|
+
# .NET developers often check in the XML Intellisense file along with an
|
137
|
+
# assembly - however, these don't have a special extension, so we have to
|
138
|
+
# dig into the contents to determine if it's a docfile. Luckily, these files
|
139
|
+
# are extremely structured, so recognizing them is easy.
|
140
|
+
#
|
141
|
+
# Returns true or false
|
142
|
+
def generated_net_docfile?
|
143
|
+
return false unless extname.downcase == ".xml"
|
144
|
+
return false unless lines.count > 3
|
145
|
+
|
146
|
+
# .NET Docfiles always open with <doc> and their first tag is an
|
147
|
+
# <assembly> tag
|
148
|
+
return lines[1].include?("<doc>") &&
|
149
|
+
lines[2].include?("<assembly>") &&
|
150
|
+
lines[-2].include?("</doc>")
|
151
|
+
end
|
152
|
+
|
153
|
+
# Internal: Is this a codegen file for a .NET project?
|
154
|
+
#
|
155
|
+
# Visual Studio often uses code generation to generate partial classes, and
|
156
|
+
# these files can be quite unwieldy. Let's hide them.
|
157
|
+
#
|
158
|
+
# Returns true or false
|
159
|
+
def generated_net_designer_file?
|
160
|
+
name.downcase =~ /\.designer\.cs$/
|
161
|
+
end
|
162
|
+
|
163
|
+
# Internal: Is the blob of JS a parser generated by PEG.js?
|
164
|
+
#
|
165
|
+
# PEG.js-generated parsers are not meant to be consumed by humans.
|
166
|
+
#
|
167
|
+
# Return true or false
|
168
|
+
def generated_parser?
|
169
|
+
return false unless extname == '.js'
|
170
|
+
|
171
|
+
# PEG.js-generated parsers include a comment near the top of the file
|
172
|
+
# that marks them as such.
|
173
|
+
if lines[0..4].join('') =~ /^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js/
|
174
|
+
return true
|
175
|
+
end
|
176
|
+
|
177
|
+
false
|
178
|
+
end
|
179
|
+
|
180
|
+
# Internal: Is the blob of PostScript generated?
|
181
|
+
#
|
182
|
+
# PostScript files are often generated by other programs. If they tell us so,
|
183
|
+
# we can detect them.
|
184
|
+
#
|
185
|
+
# Returns true or false.
|
186
|
+
def generated_postscript?
|
187
|
+
return false unless ['.ps', '.eps'].include? extname
|
188
|
+
|
189
|
+
# We analyze the "%%Creator:" comment, which contains the author/generator
|
190
|
+
# of the file. If there is one, it should be in one of the first few lines.
|
191
|
+
creator = lines[0..9].find {|line| line =~ /^%%Creator: /}
|
192
|
+
return false if creator.nil?
|
193
|
+
|
194
|
+
# Most generators write their version number, while human authors' or companies'
|
195
|
+
# names don't contain numbers. So look if the line contains digits. Also
|
196
|
+
# look for some special cases without version numbers.
|
197
|
+
return creator =~ /[0-9]/ ||
|
198
|
+
creator.include?("mpage") ||
|
199
|
+
creator.include?("draw") ||
|
200
|
+
creator.include?("ImageMagick")
|
201
|
+
end
|
202
|
+
|
203
|
+
# Internal: Is the blob a C++, Java or Python source file generated by the
|
204
|
+
# Protocol Buffer compiler?
|
205
|
+
#
|
206
|
+
# Returns true of false.
|
207
|
+
def generated_protocol_buffer?
|
208
|
+
return false unless ['.py', '.java', '.h', '.cc', '.cpp'].include?(extname)
|
209
|
+
return false unless lines.count > 1
|
210
|
+
|
211
|
+
return lines[0].include?("Generated by the protocol buffer compiler. DO NOT EDIT!")
|
212
|
+
end
|
213
|
+
|
214
|
+
# Internal: Is the blob a C/C++ header generated by the Java JNI tool javah?
|
215
|
+
#
|
216
|
+
# Returns true of false.
|
217
|
+
def generated_jni_header?
|
218
|
+
return false unless extname == '.h'
|
219
|
+
return false unless lines.count > 2
|
220
|
+
|
221
|
+
return lines[0].include?("/* DO NOT EDIT THIS FILE - it is machine generated */") &&
|
222
|
+
lines[1].include?("#include <jni.h>")
|
223
|
+
end
|
224
|
+
|
225
|
+
# node_modules/ can contain large amounts of files, in general not meant
|
226
|
+
# for humans in pull requests.
|
227
|
+
#
|
228
|
+
# Returns true or false.
|
229
|
+
def node_modules?
|
230
|
+
!!name.match(/node_modules\//)
|
231
|
+
end
|
232
|
+
|
233
|
+
# the php composer tool generates a lock file to represent a specific dependency state.
|
234
|
+
# In general not meant for humans in pull requests.
|
235
|
+
#
|
236
|
+
# Returns true or false.
|
237
|
+
def composer_lock?
|
238
|
+
!!name.match(/composer.lock/)
|
239
|
+
end
|
240
|
+
end
|
241
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Linguist
|
2
|
+
# A collection of simple heuristics that can be used to better analyze languages.
|
3
|
+
class Heuristics
|
4
|
+
ACTIVE = false
|
5
|
+
|
6
|
+
# Public: Given an array of String language names,
|
7
|
+
# apply heuristics against the given data and return an array
|
8
|
+
# of matching languages, or nil.
|
9
|
+
#
|
10
|
+
# data - Array of tokens or String data to analyze.
|
11
|
+
# languages - Array of language name Strings to restrict to.
|
12
|
+
#
|
13
|
+
# Returns an array of Languages or []
|
14
|
+
def self.find_by_heuristics(data, languages)
|
15
|
+
if active?
|
16
|
+
if languages.all? { |l| ["Objective-C", "C++"].include?(l) }
|
17
|
+
disambiguate_h(data, languages)
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# .h extensions are ambigious between C, C++, and Objective-C.
|
23
|
+
# We want to shortcut look for Objective-C _and_ now C++ too!
|
24
|
+
#
|
25
|
+
# Returns an array of Languages or []
|
26
|
+
# TODO rename this method as we're not strictly disambiguating between .h files here.
|
27
|
+
def self.disambiguate_h(data, languages)
|
28
|
+
matches = []
|
29
|
+
matches << Language["Objective-C"] if data.include?("@interface")
|
30
|
+
matches << Language["C++"] if data.include?("#include <cstdint>")
|
31
|
+
matches
|
32
|
+
end
|
33
|
+
|
34
|
+
def self.active?
|
35
|
+
!!ACTIVE
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,578 @@
|
|
1
|
+
require 'escape_utils'
|
2
|
+
require 'pygments'
|
3
|
+
require 'yaml'
|
4
|
+
begin
|
5
|
+
require 'json'
|
6
|
+
rescue LoadError
|
7
|
+
end
|
8
|
+
|
9
|
+
require 'linguist/classifier'
|
10
|
+
require 'linguist/heuristics'
|
11
|
+
require 'linguist/samples'
|
12
|
+
|
13
|
+
module Linguist
|
14
|
+
# Language names that are recognizable by GitHub. Defined languages
|
15
|
+
# can be highlighted, searched and listed under the Top Languages page.
|
16
|
+
#
|
17
|
+
# Languages are defined in `lib/linguist/languages.yml`.
|
18
|
+
class Language
|
19
|
+
@languages = []
|
20
|
+
@index = {}
|
21
|
+
@name_index = {}
|
22
|
+
@alias_index = {}
|
23
|
+
|
24
|
+
@extension_index = Hash.new { |h,k| h[k] = [] }
|
25
|
+
@interpreter_index = Hash.new { |h,k| h[k] = [] }
|
26
|
+
@filename_index = Hash.new { |h,k| h[k] = [] }
|
27
|
+
@primary_extension_index = {}
|
28
|
+
|
29
|
+
# Valid Languages types
|
30
|
+
TYPES = [:data, :markup, :programming, :prose]
|
31
|
+
|
32
|
+
# Names of non-programming languages that we will still detect
|
33
|
+
#
|
34
|
+
# Returns an array
|
35
|
+
def self.detectable_markup
|
36
|
+
["CSS", "Less", "Sass", "SCSS", "Stylus", "TeX"]
|
37
|
+
end
|
38
|
+
|
39
|
+
# Detect languages by a specific type
|
40
|
+
#
|
41
|
+
# type - A symbol that exists within TYPES
|
42
|
+
#
|
43
|
+
# Returns an array
|
44
|
+
def self.by_type(type)
|
45
|
+
all.select { |h| h.type == type }
|
46
|
+
end
|
47
|
+
|
48
|
+
# Internal: Create a new Language object
|
49
|
+
#
|
50
|
+
# attributes - A hash of attributes
|
51
|
+
#
|
52
|
+
# Returns a Language object
|
53
|
+
def self.create(attributes = {})
|
54
|
+
language = new(attributes)
|
55
|
+
|
56
|
+
@languages << language
|
57
|
+
|
58
|
+
# All Language names should be unique. Raise if there is a duplicate.
|
59
|
+
if @name_index.key?(language.name)
|
60
|
+
raise ArgumentError, "Duplicate language name: #{language.name}"
|
61
|
+
end
|
62
|
+
|
63
|
+
# Language name index
|
64
|
+
@index[language.name] = @name_index[language.name] = language
|
65
|
+
|
66
|
+
language.aliases.each do |name|
|
67
|
+
# All Language aliases should be unique. Raise if there is a duplicate.
|
68
|
+
if @alias_index.key?(name)
|
69
|
+
raise ArgumentError, "Duplicate alias: #{name}"
|
70
|
+
end
|
71
|
+
|
72
|
+
@index[name] = @alias_index[name] = language
|
73
|
+
end
|
74
|
+
|
75
|
+
language.extensions.each do |extension|
|
76
|
+
if extension !~ /^\./
|
77
|
+
raise ArgumentError, "Extension is missing a '.': #{extension.inspect}"
|
78
|
+
end
|
79
|
+
|
80
|
+
@extension_index[extension] << language
|
81
|
+
end
|
82
|
+
|
83
|
+
if @primary_extension_index.key?(language.primary_extension)
|
84
|
+
raise ArgumentError, "Duplicate primary extension: #{language.primary_extension}"
|
85
|
+
end
|
86
|
+
|
87
|
+
@primary_extension_index[language.primary_extension] = language
|
88
|
+
|
89
|
+
language.interpreters.each do |interpreter|
|
90
|
+
@interpreter_index[interpreter] << language
|
91
|
+
end
|
92
|
+
|
93
|
+
language.filenames.each do |filename|
|
94
|
+
@filename_index[filename] << language
|
95
|
+
end
|
96
|
+
|
97
|
+
language
|
98
|
+
end
|
99
|
+
|
100
|
+
# Public: Detects the Language of the blob.
|
101
|
+
#
|
102
|
+
# name - String filename
|
103
|
+
# data - String blob data. A block also maybe passed in for lazy
|
104
|
+
# loading. This behavior is deprecated and you should always
|
105
|
+
# pass in a String.
|
106
|
+
# mode - Optional String mode (defaults to nil)
|
107
|
+
#
|
108
|
+
# Returns Language or nil.
|
109
|
+
def self.detect(name, data, mode = nil)
|
110
|
+
# A bit of an elegant hack. If the file is executable but extensionless,
|
111
|
+
# append a "magic" extension so it can be classified with other
|
112
|
+
# languages that have shebang scripts.
|
113
|
+
if File.extname(name).empty? && mode && (mode.to_i(8) & 05) == 05
|
114
|
+
name += ".script!"
|
115
|
+
end
|
116
|
+
|
117
|
+
# First try to find languages that match based on filename.
|
118
|
+
possible_languages = find_by_filename(name)
|
119
|
+
|
120
|
+
# If there is more than one possible language with that extension (or no
|
121
|
+
# extension at all, in the case of extensionless scripts), we need to continue
|
122
|
+
# our detection work
|
123
|
+
if possible_languages.length > 1
|
124
|
+
data = data.call() if data.respond_to?(:call)
|
125
|
+
possible_language_names = possible_languages.map(&:name)
|
126
|
+
|
127
|
+
# Don't bother with emptiness
|
128
|
+
if data.nil? || data == ""
|
129
|
+
nil
|
130
|
+
# Check if there's a shebang line and use that as authoritative
|
131
|
+
elsif (result = find_by_shebang(data)) && !result.empty?
|
132
|
+
result.first
|
133
|
+
# No shebang. Still more work to do. Try to find it with our heuristics.
|
134
|
+
elsif (determined = Heuristics.find_by_heuristics(data, possible_language_names)) && !determined.empty?
|
135
|
+
determined.first
|
136
|
+
# Lastly, fall back to the probablistic classifier.
|
137
|
+
elsif classified = Classifier.classify(Samples::DATA, data, possible_language_names ).first
|
138
|
+
# Return the actual Language object based of the string language name (i.e., first element of `#classify`)
|
139
|
+
Language[classified[0]]
|
140
|
+
end
|
141
|
+
else
|
142
|
+
# Simplest and most common case, we can just return the one match based on extension
|
143
|
+
possible_languages.first
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
# Public: Get all Languages
|
148
|
+
#
|
149
|
+
# Returns an Array of Languages
|
150
|
+
def self.all
|
151
|
+
@languages
|
152
|
+
end
|
153
|
+
|
154
|
+
# Public: Look up Language by its proper name.
|
155
|
+
#
|
156
|
+
# name - The String name of the Language
|
157
|
+
#
|
158
|
+
# Examples
|
159
|
+
#
|
160
|
+
# Language.find_by_name('Ruby')
|
161
|
+
# # => #<Language name="Ruby">
|
162
|
+
#
|
163
|
+
# Returns the Language or nil if none was found.
|
164
|
+
def self.find_by_name(name)
|
165
|
+
@name_index[name]
|
166
|
+
end
|
167
|
+
|
168
|
+
# Public: Look up Language by one of its aliases.
|
169
|
+
#
|
170
|
+
# name - A String alias of the Language
|
171
|
+
#
|
172
|
+
# Examples
|
173
|
+
#
|
174
|
+
# Language.find_by_alias('cpp')
|
175
|
+
# # => #<Language name="C++">
|
176
|
+
#
|
177
|
+
# Returns the Lexer or nil if none was found.
|
178
|
+
def self.find_by_alias(name)
|
179
|
+
@alias_index[name]
|
180
|
+
end
|
181
|
+
|
182
|
+
# Public: Look up Languages by filename.
|
183
|
+
#
|
184
|
+
# filename - The path String.
|
185
|
+
#
|
186
|
+
# Examples
|
187
|
+
#
|
188
|
+
# Language.find_by_filename('foo.rb')
|
189
|
+
# # => [#<Language name="Ruby">]
|
190
|
+
#
|
191
|
+
# Returns all matching Languages or [] if none were found.
|
192
|
+
def self.find_by_filename(filename)
|
193
|
+
basename, extname = File.basename(filename), File.extname(filename)
|
194
|
+
langs = [@primary_extension_index[extname]] +
|
195
|
+
@filename_index[basename] +
|
196
|
+
@extension_index[extname]
|
197
|
+
langs.compact.uniq
|
198
|
+
end
|
199
|
+
|
200
|
+
# Public: Look up Languages by shebang line.
|
201
|
+
#
|
202
|
+
# data - Array of tokens or String data to analyze.
|
203
|
+
#
|
204
|
+
# Examples
|
205
|
+
#
|
206
|
+
# Language.find_by_shebang("#!/bin/bash\ndate;")
|
207
|
+
# # => [#<Language name="Bash">]
|
208
|
+
#
|
209
|
+
# Returns the matching Language
|
210
|
+
def self.find_by_shebang(data)
|
211
|
+
@interpreter_index[Linguist.interpreter_from_shebang(data)]
|
212
|
+
end
|
213
|
+
|
214
|
+
# Public: Look up Language by its name or lexer.
|
215
|
+
#
|
216
|
+
# name - The String name of the Language
|
217
|
+
#
|
218
|
+
# Examples
|
219
|
+
#
|
220
|
+
# Language['Ruby']
|
221
|
+
# # => #<Language name="Ruby">
|
222
|
+
#
|
223
|
+
# Language['ruby']
|
224
|
+
# # => #<Language name="Ruby">
|
225
|
+
#
|
226
|
+
# Returns the Language or nil if none was found.
|
227
|
+
def self.[](name)
|
228
|
+
@index[name]
|
229
|
+
end
|
230
|
+
|
231
|
+
# Public: A List of popular languages
|
232
|
+
#
|
233
|
+
# Popular languages are sorted to the top of language chooser
|
234
|
+
# dropdowns.
|
235
|
+
#
|
236
|
+
# This list is configured in "popular.yml".
|
237
|
+
#
|
238
|
+
# Returns an Array of Lexers.
|
239
|
+
def self.popular
|
240
|
+
@popular ||= all.select(&:popular?).sort_by { |lang| lang.name.downcase }
|
241
|
+
end
|
242
|
+
|
243
|
+
# Public: A List of non-popular languages
|
244
|
+
#
|
245
|
+
# Unpopular languages appear below popular ones in language
|
246
|
+
# chooser dropdowns.
|
247
|
+
#
|
248
|
+
# This list is created from all the languages not listed in "popular.yml".
|
249
|
+
#
|
250
|
+
# Returns an Array of Lexers.
|
251
|
+
def self.unpopular
|
252
|
+
@unpopular ||= all.select(&:unpopular?).sort_by { |lang| lang.name.downcase }
|
253
|
+
end
|
254
|
+
|
255
|
+
# Public: A List of languages with assigned colors.
|
256
|
+
#
|
257
|
+
# Returns an Array of Languages.
|
258
|
+
def self.colors
|
259
|
+
@colors ||= all.select(&:color).sort_by { |lang| lang.name.downcase }
|
260
|
+
end
|
261
|
+
|
262
|
+
# Public: A List of languages compatible with Ace.
|
263
|
+
#
|
264
|
+
# Returns an Array of Languages.
|
265
|
+
def self.ace_modes
|
266
|
+
@ace_modes ||= all.select(&:ace_mode).sort_by { |lang| lang.name.downcase }
|
267
|
+
end
|
268
|
+
|
269
|
+
# Internal: Initialize a new Language
|
270
|
+
#
|
271
|
+
# attributes - A hash of attributes
|
272
|
+
def initialize(attributes = {})
|
273
|
+
# @name is required
|
274
|
+
@name = attributes[:name] || raise(ArgumentError, "missing name")
|
275
|
+
|
276
|
+
# Set type
|
277
|
+
@type = attributes[:type] ? attributes[:type].to_sym : nil
|
278
|
+
if @type && !TYPES.include?(@type)
|
279
|
+
raise ArgumentError, "invalid type: #{@type}"
|
280
|
+
end
|
281
|
+
|
282
|
+
@color = attributes[:color]
|
283
|
+
|
284
|
+
# Set aliases
|
285
|
+
@aliases = [default_alias_name] + (attributes[:aliases] || [])
|
286
|
+
|
287
|
+
# Lookup Lexer object
|
288
|
+
@lexer = Pygments::Lexer.find_by_name(attributes[:lexer] || name) ||
|
289
|
+
raise(ArgumentError, "#{@name} is missing lexer")
|
290
|
+
|
291
|
+
@ace_mode = attributes[:ace_mode]
|
292
|
+
@wrap = attributes[:wrap] || false
|
293
|
+
|
294
|
+
# Set legacy search term
|
295
|
+
@search_term = attributes[:search_term] || default_alias_name
|
296
|
+
|
297
|
+
# Set extensions or default to [].
|
298
|
+
@extensions = attributes[:extensions] || []
|
299
|
+
@interpreters = attributes[:interpreters] || []
|
300
|
+
@filenames = attributes[:filenames] || []
|
301
|
+
|
302
|
+
unless @primary_extension = attributes[:primary_extension]
|
303
|
+
raise ArgumentError, "#{@name} is missing primary extension"
|
304
|
+
end
|
305
|
+
|
306
|
+
# Prepend primary extension unless its already included
|
307
|
+
if primary_extension && !extensions.include?(primary_extension)
|
308
|
+
@extensions = [primary_extension] + extensions
|
309
|
+
end
|
310
|
+
|
311
|
+
# Set popular, and searchable flags
|
312
|
+
@popular = attributes.key?(:popular) ? attributes[:popular] : false
|
313
|
+
@searchable = attributes.key?(:searchable) ? attributes[:searchable] : true
|
314
|
+
|
315
|
+
# If group name is set, save the name so we can lazy load it later
|
316
|
+
if attributes[:group_name]
|
317
|
+
@group = nil
|
318
|
+
@group_name = attributes[:group_name]
|
319
|
+
|
320
|
+
# Otherwise we can set it to self now
|
321
|
+
else
|
322
|
+
@group = self
|
323
|
+
end
|
324
|
+
end
|
325
|
+
|
326
|
+
# Public: Get proper name
|
327
|
+
#
|
328
|
+
# Examples
|
329
|
+
#
|
330
|
+
# # => "Ruby"
|
331
|
+
# # => "Python"
|
332
|
+
# # => "Perl"
|
333
|
+
#
|
334
|
+
# Returns the name String
|
335
|
+
attr_reader :name
|
336
|
+
|
337
|
+
# Public: Get type.
|
338
|
+
#
|
339
|
+
# Returns a type Symbol or nil.
|
340
|
+
attr_reader :type
|
341
|
+
|
342
|
+
# Public: Get color.
|
343
|
+
#
|
344
|
+
# Returns a hex color String.
|
345
|
+
attr_reader :color
|
346
|
+
|
347
|
+
# Public: Get aliases
|
348
|
+
#
|
349
|
+
# Examples
|
350
|
+
#
|
351
|
+
# Language['C++'].aliases
|
352
|
+
# # => ["cpp"]
|
353
|
+
#
|
354
|
+
# Returns an Array of String names
|
355
|
+
attr_reader :aliases
|
356
|
+
|
357
|
+
# Deprecated: Get code search term
|
358
|
+
#
|
359
|
+
# Examples
|
360
|
+
#
|
361
|
+
# # => "ruby"
|
362
|
+
# # => "python"
|
363
|
+
# # => "perl"
|
364
|
+
#
|
365
|
+
# Returns the name String
|
366
|
+
attr_reader :search_term
|
367
|
+
|
368
|
+
# Public: Get Lexer
|
369
|
+
#
|
370
|
+
# Returns the Lexer
|
371
|
+
attr_reader :lexer
|
372
|
+
|
373
|
+
# Public: Get Ace mode
|
374
|
+
#
|
375
|
+
# Examples
|
376
|
+
#
|
377
|
+
# # => "text"
|
378
|
+
# # => "javascript"
|
379
|
+
# # => "c_cpp"
|
380
|
+
#
|
381
|
+
# Returns a String name or nil
|
382
|
+
attr_reader :ace_mode
|
383
|
+
|
384
|
+
# Public: Should language lines be wrapped
|
385
|
+
#
|
386
|
+
# Returns true or false
|
387
|
+
attr_reader :wrap
|
388
|
+
|
389
|
+
# Public: Get extensions
|
390
|
+
#
|
391
|
+
# Examples
|
392
|
+
#
|
393
|
+
# # => ['.rb', '.rake', ...]
|
394
|
+
#
|
395
|
+
# Returns the extensions Array
|
396
|
+
attr_reader :extensions
|
397
|
+
|
398
|
+
# Deprecated: Get primary extension
|
399
|
+
#
|
400
|
+
# Defaults to the first extension but can be overridden
|
401
|
+
# in the languages.yml.
|
402
|
+
#
|
403
|
+
# The primary extension can not be nil. Tests should verify this.
|
404
|
+
#
|
405
|
+
# This attribute is only used by app/helpers/gists_helper.rb for
|
406
|
+
# creating the language dropdown. It really should be using `name`
|
407
|
+
# instead. Would like to drop primary extension.
|
408
|
+
#
|
409
|
+
# Returns the extension String.
|
410
|
+
attr_reader :primary_extension
|
411
|
+
|
412
|
+
# Public: Get interpreters
|
413
|
+
#
|
414
|
+
# Examples
|
415
|
+
#
|
416
|
+
# # => ['awk', 'gawk', 'mawk' ...]
|
417
|
+
#
|
418
|
+
# Returns the interpreters Array
|
419
|
+
attr_reader :interpreters
|
420
|
+
|
421
|
+
# Public: Get filenames
|
422
|
+
#
|
423
|
+
# Examples
|
424
|
+
#
|
425
|
+
# # => ['Rakefile', ...]
|
426
|
+
#
|
427
|
+
# Returns the extensions Array
|
428
|
+
attr_reader :filenames
|
429
|
+
|
430
|
+
# Public: Get URL escaped name.
|
431
|
+
#
|
432
|
+
# Examples
|
433
|
+
#
|
434
|
+
# "C%23"
|
435
|
+
# "C%2B%2B"
|
436
|
+
# "Common%20Lisp"
|
437
|
+
#
|
438
|
+
# Returns the escaped String.
|
439
|
+
def escaped_name
|
440
|
+
EscapeUtils.escape_url(name).gsub('+', '%20')
|
441
|
+
end
|
442
|
+
|
443
|
+
# Internal: Get default alias name
|
444
|
+
#
|
445
|
+
# Returns the alias name String
|
446
|
+
def default_alias_name
|
447
|
+
name.downcase.gsub(/\s/, '-')
|
448
|
+
end
|
449
|
+
|
450
|
+
# Public: Get Language group
|
451
|
+
#
|
452
|
+
# Returns a Language
|
453
|
+
def group
|
454
|
+
@group ||= Language.find_by_name(@group_name)
|
455
|
+
end
|
456
|
+
|
457
|
+
# Public: Is it popular?
|
458
|
+
#
|
459
|
+
# Returns true or false
|
460
|
+
def popular?
|
461
|
+
@popular
|
462
|
+
end
|
463
|
+
|
464
|
+
# Public: Is it not popular?
|
465
|
+
#
|
466
|
+
# Returns true or false
|
467
|
+
def unpopular?
|
468
|
+
!popular?
|
469
|
+
end
|
470
|
+
|
471
|
+
# Public: Is it searchable?
|
472
|
+
#
|
473
|
+
# Unsearchable languages won't by indexed by solr and won't show
|
474
|
+
# up in the code search dropdown.
|
475
|
+
#
|
476
|
+
# Returns true or false
|
477
|
+
def searchable?
|
478
|
+
@searchable
|
479
|
+
end
|
480
|
+
|
481
|
+
# Public: Highlight syntax of text
|
482
|
+
#
|
483
|
+
# text - String of code to be highlighted
|
484
|
+
# options - A Hash of options (defaults to {})
|
485
|
+
#
|
486
|
+
# Returns html String
|
487
|
+
def colorize(text, options = {})
|
488
|
+
lexer.highlight(text, options = {})
|
489
|
+
end
|
490
|
+
|
491
|
+
# Public: Return name as String representation
|
492
|
+
def to_s
|
493
|
+
name
|
494
|
+
end
|
495
|
+
|
496
|
+
def ==(other)
|
497
|
+
eql?(other)
|
498
|
+
end
|
499
|
+
|
500
|
+
def eql?(other)
|
501
|
+
equal?(other)
|
502
|
+
end
|
503
|
+
|
504
|
+
def hash
|
505
|
+
name.hash
|
506
|
+
end
|
507
|
+
|
508
|
+
def inspect
|
509
|
+
"#<#{self.class} name=#{name}>"
|
510
|
+
end
|
511
|
+
end
|
512
|
+
|
513
|
+
extensions = Samples::DATA['extnames']
|
514
|
+
interpreters = Samples::DATA['interpreters']
|
515
|
+
filenames = Samples::DATA['filenames']
|
516
|
+
popular = YAML.load_file(File.expand_path("../popular.yml", __FILE__))
|
517
|
+
|
518
|
+
languages_yml = File.expand_path("../languages.yml", __FILE__)
|
519
|
+
languages_json = File.expand_path("../languages.json", __FILE__)
|
520
|
+
|
521
|
+
if File.exist?(languages_json) && defined?(JSON)
|
522
|
+
languages = JSON.load(File.read(languages_json))
|
523
|
+
else
|
524
|
+
languages = YAML.load_file(languages_yml)
|
525
|
+
end
|
526
|
+
|
527
|
+
languages.each do |name, options|
|
528
|
+
options['extensions'] ||= []
|
529
|
+
options['interpreters'] ||= []
|
530
|
+
options['filenames'] ||= []
|
531
|
+
|
532
|
+
if extnames = extensions[name]
|
533
|
+
extnames.each do |extname|
|
534
|
+
if !options['extensions'].include?(extname)
|
535
|
+
options['extensions'] << extname
|
536
|
+
end
|
537
|
+
end
|
538
|
+
end
|
539
|
+
|
540
|
+
if interpreters == nil
|
541
|
+
interpreters = {}
|
542
|
+
end
|
543
|
+
|
544
|
+
if interpreter_names = interpreters[name]
|
545
|
+
interpreter_names.each do |interpreter|
|
546
|
+
if !options['interpreters'].include?(interpreter)
|
547
|
+
options['interpreters'] << interpreter
|
548
|
+
end
|
549
|
+
end
|
550
|
+
end
|
551
|
+
|
552
|
+
if fns = filenames[name]
|
553
|
+
fns.each do |filename|
|
554
|
+
if !options['filenames'].include?(filename)
|
555
|
+
options['filenames'] << filename
|
556
|
+
end
|
557
|
+
end
|
558
|
+
end
|
559
|
+
|
560
|
+
Language.create(
|
561
|
+
:name => name,
|
562
|
+
:color => options['color'],
|
563
|
+
:type => options['type'],
|
564
|
+
:aliases => options['aliases'],
|
565
|
+
:lexer => options['lexer'],
|
566
|
+
:ace_mode => options['ace_mode'],
|
567
|
+
:wrap => options['wrap'],
|
568
|
+
:group_name => options['group'],
|
569
|
+
:searchable => options.key?('searchable') ? options['searchable'] : true,
|
570
|
+
:search_term => options['search_term'],
|
571
|
+
:extensions => options['extensions'].sort,
|
572
|
+
:interpreters => options['interpreters'].sort,
|
573
|
+
:primary_extension => options['primary_extension'],
|
574
|
+
:filenames => options['filenames'],
|
575
|
+
:popular => popular.include?(name)
|
576
|
+
)
|
577
|
+
end
|
578
|
+
end
|