github-linguist 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/linguist +43 -0
- data/lib/linguist.rb +5 -0
- data/lib/linguist/blob_helper.rb +713 -0
- data/lib/linguist/file_blob.rb +56 -0
- data/lib/linguist/language.rb +474 -0
- data/lib/linguist/languages.yml +1379 -0
- data/lib/linguist/mime.rb +91 -0
- data/lib/linguist/mimes.yml +62 -0
- data/lib/linguist/pathname.rb +92 -0
- data/lib/linguist/popular.yml +29 -0
- data/lib/linguist/repository.rb +95 -0
- data/lib/linguist/vendor.yml +96 -0
- metadata +152 -0
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'mime/types'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class MIME::Type
|
5
|
+
attr_accessor :override
|
6
|
+
end
|
7
|
+
|
8
|
+
# Register additional mime type extensions
|
9
|
+
#
|
10
|
+
# Follows same format as mime-types data file
|
11
|
+
# https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
|
12
|
+
File.read(File.expand_path("../mimes.yml", __FILE__)).lines.each do |line|
|
13
|
+
# Regexp was cargo culted from mime-types lib
|
14
|
+
next unless line =~ %r{^
|
15
|
+
#{MIME::Type::MEDIA_TYPE_RE}
|
16
|
+
(?:\s@([^\s]+))?
|
17
|
+
(?:\s:(#{MIME::Type::ENCODING_RE}))?
|
18
|
+
}x
|
19
|
+
|
20
|
+
mediatype = $1
|
21
|
+
subtype = $2
|
22
|
+
extensions = $3
|
23
|
+
encoding = $4
|
24
|
+
|
25
|
+
# Lookup existing mime type
|
26
|
+
mime_type = MIME::Types["#{mediatype}/#{subtype}"].first ||
|
27
|
+
# Or create a new instance
|
28
|
+
MIME::Type.new("#{mediatype}/#{subtype}")
|
29
|
+
|
30
|
+
if extensions
|
31
|
+
extensions.split(/,/).each do |extension|
|
32
|
+
mime_type.extensions << extension
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
if encoding
|
37
|
+
mime_type.encoding = encoding
|
38
|
+
end
|
39
|
+
|
40
|
+
mime_type.override = true
|
41
|
+
|
42
|
+
# Kind of hacky, but we need to reindex the mime type after making changes
|
43
|
+
MIME::Types.add_type_variant(mime_type)
|
44
|
+
MIME::Types.index_extensions(mime_type)
|
45
|
+
end
|
46
|
+
|
47
|
+
module Linguist
|
48
|
+
module Mime
|
49
|
+
# Internal: Look up mime type for extension.
|
50
|
+
#
|
51
|
+
# ext - The extension String. May include leading "."
|
52
|
+
#
|
53
|
+
# Examples
|
54
|
+
#
|
55
|
+
# Mime.mime_for('.html')
|
56
|
+
# # => 'text/html'
|
57
|
+
#
|
58
|
+
# Mime.mime_for('txt')
|
59
|
+
# # => 'text/plain'
|
60
|
+
#
|
61
|
+
# Return mime type String otherwise falls back to 'text/plain'.
|
62
|
+
def self.mime_for(ext)
|
63
|
+
mime_type = lookup_mime_type_for(ext)
|
64
|
+
mime_type ? mime_type.to_s : 'text/plain'
|
65
|
+
end
|
66
|
+
|
67
|
+
# Internal: Lookup mime type for extension or mime type
|
68
|
+
#
|
69
|
+
# ext_or_mime_type - A file extension ".txt" or mime type "text/plain".
|
70
|
+
#
|
71
|
+
# Returns a MIME::Type
|
72
|
+
def self.lookup_mime_type_for(ext_or_mime_type)
|
73
|
+
ext_or_mime_type ||= ''
|
74
|
+
|
75
|
+
if ext_or_mime_type =~ /\w+\/\w+/
|
76
|
+
guesses = ::MIME::Types[ext_or_mime_type]
|
77
|
+
else
|
78
|
+
guesses = ::MIME::Types.type_for(ext_or_mime_type)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Use custom override first
|
82
|
+
guesses.detect { |type| type.override } ||
|
83
|
+
|
84
|
+
# Prefer text mime types over binary
|
85
|
+
guesses.detect { |type| type.ascii? } ||
|
86
|
+
|
87
|
+
# Otherwise use the first guess
|
88
|
+
guesses.first
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Additional types to add to MIME::Types
|
2
|
+
#
|
3
|
+
# MIME types are used to set the Content-Type of raw binary blobs. All text
|
4
|
+
# blobs are served as text/plain regardless of their type to ensure they
|
5
|
+
# open in the browser rather than downloading.
|
6
|
+
#
|
7
|
+
# The encoding helps determine whether a file should be treated as plain
|
8
|
+
# text or binary. By default, a mime type's encoding is base64 (binary).
|
9
|
+
# These types will show a "View Raw" link. To force a type to render as
|
10
|
+
# plain text, set it to 8bit for UTF-8. text/* types will be treated as
|
11
|
+
# text by default.
|
12
|
+
#
|
13
|
+
# <type> @<extensions> :<encoding>
|
14
|
+
#
|
15
|
+
# type - mediatype/subtype
|
16
|
+
# extensions - comma seperated extension list
|
17
|
+
# encoding - base64 (binary), 7bit (ASCII), 8bit (UTF-8), or
|
18
|
+
# quoted-printable (Printable ASCII).
|
19
|
+
#
|
20
|
+
# Follows same format as mime-types data file
|
21
|
+
# https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
|
22
|
+
#
|
23
|
+
# Any additions or modifications (even trivial) should have corresponding
|
24
|
+
# test change in `test/test_mime.rb`.
|
25
|
+
|
26
|
+
# TODO: Lookup actual types
|
27
|
+
application/octet-stream @a,blend,gem,graffle,ipa,lib,mcz,nib,o,ogv,otf,pfx,pigx,plgx,psd,sib,spl,sqlite3,swc,ucode,xpi
|
28
|
+
|
29
|
+
# Please keep this list alphabetized
|
30
|
+
application/java-archive @ear,war
|
31
|
+
application/netcdf :8bit
|
32
|
+
application/ogg @ogg
|
33
|
+
application/postscript :base64
|
34
|
+
application/vnd.adobe.air-application-installer-package+zip @air
|
35
|
+
application/vnd.mozilla.xul+xml :8bit
|
36
|
+
application/vnd.oasis.opendocument.presentation @odp
|
37
|
+
application/vnd.oasis.opendocument.spreadsheet @ods
|
38
|
+
application/vnd.oasis.opendocument.text @odt
|
39
|
+
application/vnd.openofficeorg.extension @oxt
|
40
|
+
application/vnd.openxmlformats-officedocument.presentationml.presentation @pptx
|
41
|
+
application/x-chrome-extension @crx
|
42
|
+
application/x-iwork-keynote-sffkey @key
|
43
|
+
application/x-iwork-numbers-sffnumbers @numbers
|
44
|
+
application/x-iwork-pages-sffpages @pages
|
45
|
+
application/x-ms-xbap @xbap :8bit
|
46
|
+
application/x-parrot-bytecode @pbc
|
47
|
+
application/x-shockwave-flash @swf
|
48
|
+
application/x-silverlight-app @xap
|
49
|
+
application/x-supercollider @sc :8bit
|
50
|
+
application/x-troff-ms :8bit
|
51
|
+
application/x-wais-source :8bit
|
52
|
+
application/xaml+xml @xaml :8bit
|
53
|
+
application/xslt+xml @xslt :8bit
|
54
|
+
image/x-icns @icns
|
55
|
+
text/cache-manifest @manifest
|
56
|
+
text/plain @cu,cxx
|
57
|
+
text/x-logtalk @lgt
|
58
|
+
text/x-nemerle @n
|
59
|
+
text/x-nimrod @nim
|
60
|
+
text/x-ocaml @ml,mli,mll,mly,sig,sml
|
61
|
+
text/x-rust @rs,rc
|
62
|
+
text/x-scheme @rkt,scm,sls,sps,ss
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'linguist/language'
|
2
|
+
require 'linguist/mime'
|
3
|
+
require 'pygments'
|
4
|
+
|
5
|
+
module Linguist
|
6
|
+
# Similar to ::Pathname, Linguist::Pathname wraps a path string and
|
7
|
+
# provides helpful query methods. Its useful when you only have a
|
8
|
+
# filename but not a blob and need to figure out the language of the file.
|
9
|
+
class Pathname
|
10
|
+
# Public: Initialize a Pathname
|
11
|
+
#
|
12
|
+
# path - A filename String. The file may or maybe actually exist.
|
13
|
+
#
|
14
|
+
# Returns a Pathname.
|
15
|
+
def initialize(path)
|
16
|
+
@path = path
|
17
|
+
end
|
18
|
+
|
19
|
+
# Public: Get the basename of the path
|
20
|
+
#
|
21
|
+
# Examples
|
22
|
+
#
|
23
|
+
# Pathname.new('sub/dir/file.rb').basename
|
24
|
+
# # => 'file.rb'
|
25
|
+
#
|
26
|
+
# Returns a String.
|
27
|
+
def basename
|
28
|
+
File.basename(@path)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Public: Get the extname of the path
|
32
|
+
#
|
33
|
+
# Examples
|
34
|
+
#
|
35
|
+
# Pathname.new('.rb').extname
|
36
|
+
# # => '.rb'
|
37
|
+
#
|
38
|
+
# Pathname.new('file.rb').extname
|
39
|
+
# # => '.rb'
|
40
|
+
#
|
41
|
+
# Returns a String.
|
42
|
+
def extname
|
43
|
+
File.extname(@path)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Public: Get the language of the path
|
47
|
+
#
|
48
|
+
# The path extension name is the only heuristic used to detect the
|
49
|
+
# language name.
|
50
|
+
#
|
51
|
+
# Examples
|
52
|
+
#
|
53
|
+
# Pathname.new('file.rb').language
|
54
|
+
# # => Language['Ruby']
|
55
|
+
#
|
56
|
+
# Returns a Language or nil if none was found.
|
57
|
+
def language
|
58
|
+
@language ||= Language.find_by_filename(@path)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Internal: Get the lexer of the path
|
62
|
+
#
|
63
|
+
# Returns a Lexer.
|
64
|
+
def lexer
|
65
|
+
language ? language.lexer : Pygments::Lexer.find_by_name('Text only')
|
66
|
+
end
|
67
|
+
|
68
|
+
# Public: Get the mime type
|
69
|
+
#
|
70
|
+
# Examples
|
71
|
+
#
|
72
|
+
# Pathname.new('index.html').mime_type
|
73
|
+
# # => 'text/html'
|
74
|
+
#
|
75
|
+
# Returns a mime type String.
|
76
|
+
def mime_type
|
77
|
+
@mime_type ||= Mime.mime_for(extname)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Public: Return self as String
|
81
|
+
#
|
82
|
+
# Returns a String
|
83
|
+
def to_s
|
84
|
+
@path.dup
|
85
|
+
end
|
86
|
+
|
87
|
+
def eql?(other)
|
88
|
+
other.is_a?(self.class) && @path == other.to_s
|
89
|
+
end
|
90
|
+
alias_method :==, :eql?
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Popular languages appear at the top of language dropdowns
|
2
|
+
#
|
3
|
+
# This file should only be edited by GitHub staff
|
4
|
+
|
5
|
+
- ActionScript
|
6
|
+
- Bash
|
7
|
+
- C
|
8
|
+
- C#
|
9
|
+
- C++
|
10
|
+
- CSS
|
11
|
+
- Common Lisp
|
12
|
+
- Diff
|
13
|
+
- Emacs Lisp
|
14
|
+
- Erlang
|
15
|
+
- HTML
|
16
|
+
- Haskell
|
17
|
+
- Java
|
18
|
+
- JavaScript
|
19
|
+
- Lua
|
20
|
+
- Objective-C
|
21
|
+
- PHP
|
22
|
+
- Perl
|
23
|
+
- Python
|
24
|
+
- Ruby
|
25
|
+
- SQL
|
26
|
+
- Scala
|
27
|
+
- Scheme
|
28
|
+
- TeX
|
29
|
+
- XML
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'linguist/file_blob'
|
2
|
+
|
3
|
+
module Linguist
|
4
|
+
# A Repository is an abstraction of a Grit::Repo or a basic file
|
5
|
+
# system tree. It holds a list of paths pointing to Blobish objects.
|
6
|
+
#
|
7
|
+
# Its primary purpose is for gathering language statistics across
|
8
|
+
# the entire project.
|
9
|
+
class Repository
|
10
|
+
# Public: Initialize a new Repository from a File directory
|
11
|
+
#
|
12
|
+
# base_path - A path String
|
13
|
+
#
|
14
|
+
# Returns a Repository
|
15
|
+
def self.from_directory(base_path)
|
16
|
+
new Dir["#{base_path}/**/*"].
|
17
|
+
select { |f| File.file?(f) }.
|
18
|
+
map { |path| FileBlob.new(path, base_path) }
|
19
|
+
end
|
20
|
+
|
21
|
+
# Public: Initialize a new Repository
|
22
|
+
#
|
23
|
+
# enum - Enumerator that responds to `each` and
|
24
|
+
# yields Blob objects
|
25
|
+
#
|
26
|
+
# Returns a Repository
|
27
|
+
def initialize(enum)
|
28
|
+
@enum = enum
|
29
|
+
@computed_stats = false
|
30
|
+
@language = @size = nil
|
31
|
+
@sizes = Hash.new { 0 }
|
32
|
+
end
|
33
|
+
|
34
|
+
# Public: Returns a breakdown of language stats.
|
35
|
+
#
|
36
|
+
# Examples
|
37
|
+
#
|
38
|
+
# # => { Language['Ruby'] => 46319,
|
39
|
+
# Language['JavaScript'] => 258 }
|
40
|
+
#
|
41
|
+
# Returns a Hash of Language keys and Integer size values.
|
42
|
+
def languages
|
43
|
+
compute_stats
|
44
|
+
@sizes
|
45
|
+
end
|
46
|
+
|
47
|
+
# Public: Get primary Language of repository.
|
48
|
+
#
|
49
|
+
# Returns a Language
|
50
|
+
def language
|
51
|
+
compute_stats
|
52
|
+
@language
|
53
|
+
end
|
54
|
+
|
55
|
+
# Public: Get the total size of the repository.
|
56
|
+
#
|
57
|
+
# Returns a byte size Integer
|
58
|
+
def size
|
59
|
+
compute_stats
|
60
|
+
@size
|
61
|
+
end
|
62
|
+
|
63
|
+
# Internal: Compute language breakdown for each blob in the Repository.
|
64
|
+
#
|
65
|
+
# Returns nothing
|
66
|
+
def compute_stats
|
67
|
+
return if @computed_stats
|
68
|
+
|
69
|
+
@enum.each do |blob|
|
70
|
+
# Skip binary file extensions
|
71
|
+
next if blob.binary_mime_type?
|
72
|
+
|
73
|
+
# Skip vendored or generated blobs
|
74
|
+
next if blob.vendored? || blob.generated? || blob.language.nil?
|
75
|
+
|
76
|
+
# Only include programming languages
|
77
|
+
if blob.language.type == :programming
|
78
|
+
@sizes[blob.language.group] += blob.size
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Compute total size
|
83
|
+
@size = @sizes.inject(0) { |s,(k,v)| s + v }
|
84
|
+
|
85
|
+
# Get primary language
|
86
|
+
if primary = @sizes.max_by { |(_, size)| size }
|
87
|
+
@language = primary[0]
|
88
|
+
end
|
89
|
+
|
90
|
+
@computed_stats = true
|
91
|
+
|
92
|
+
nil
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# Vendored files and directories are excluded from language
|
2
|
+
# statistics.
|
3
|
+
#
|
4
|
+
# Lines in this file are Regexps that are matched against the file
|
5
|
+
# pathname.
|
6
|
+
#
|
7
|
+
# Please add additional test coverage to
|
8
|
+
# `test/test_blob.rb#test_vendored` if you make any changes.
|
9
|
+
|
10
|
+
## Vendor Conventions ##
|
11
|
+
|
12
|
+
# Caches
|
13
|
+
- cache/
|
14
|
+
|
15
|
+
# C deps
|
16
|
+
# https://github.com/joyent/node
|
17
|
+
- ^deps/
|
18
|
+
- ^tools/
|
19
|
+
|
20
|
+
# Node depedencies
|
21
|
+
- node_modules/
|
22
|
+
|
23
|
+
# Vendored depedencies
|
24
|
+
- vendor/
|
25
|
+
|
26
|
+
|
27
|
+
## Commonly Bundled JavaScript frameworks ##
|
28
|
+
|
29
|
+
# jQuery
|
30
|
+
- (^|/)jquery([^.]*)(\.min)?\.js$
|
31
|
+
- (^|/)jquery\-\d\.\d(\.\d)?(\.min)?\.js$
|
32
|
+
|
33
|
+
# Prototype
|
34
|
+
- (^|/)prototype(.*)\.js$
|
35
|
+
- (^|/)effects\.js$
|
36
|
+
- (^|/)controls\.js$
|
37
|
+
- (^|/)dragdrop\.js$
|
38
|
+
|
39
|
+
# MooTools
|
40
|
+
- (^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$
|
41
|
+
|
42
|
+
# Dojo
|
43
|
+
- (^|/)dojo\.js$
|
44
|
+
|
45
|
+
# MochiKit
|
46
|
+
- (^|/)MochiKit\.js$
|
47
|
+
|
48
|
+
# YUI
|
49
|
+
- (^|/)yahoo-([^.]*)\.js$
|
50
|
+
- (^|/)yui([^.]*)\.js$
|
51
|
+
|
52
|
+
# LESS css
|
53
|
+
- (^|/)less([^.]*)(\.min)?\.js$
|
54
|
+
- (^|/)less\-\d+\.\d+\.\d+(\.min)?\.js$
|
55
|
+
|
56
|
+
# WYS editors
|
57
|
+
- (^|/)ckeditor\.js$
|
58
|
+
- (^|/)tiny_mce([^.]*)\.js$
|
59
|
+
- (^|/)tiny_mce/(langs|plugins|themes|utils)
|
60
|
+
|
61
|
+
# MathJax
|
62
|
+
- (^|/)MathJax/
|
63
|
+
|
64
|
+
## Python ##
|
65
|
+
|
66
|
+
# Fabric
|
67
|
+
- ^fabfile\.py$
|
68
|
+
|
69
|
+
# WAF
|
70
|
+
- ^waf$
|
71
|
+
|
72
|
+
|
73
|
+
## Obj-C ##
|
74
|
+
|
75
|
+
# Sparkle
|
76
|
+
- (^|/)Sparkle/
|
77
|
+
|
78
|
+
## .NET ##
|
79
|
+
|
80
|
+
# Visual Studio IntelliSense
|
81
|
+
- -vsdoc\.js$
|
82
|
+
|
83
|
+
# jQuery validation plugin (MS bundles this with asp.net mvc)
|
84
|
+
- (^|/)jquery([^.]*)\.validate(\.min)?\.js$
|
85
|
+
|
86
|
+
# Microsoft Ajax
|
87
|
+
- (^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$
|
88
|
+
|
89
|
+
# NuGet
|
90
|
+
- ^[Pp]ackages/
|
91
|
+
|
92
|
+
# ExtJS
|
93
|
+
- (^|/)extjs/
|
94
|
+
|
95
|
+
# Samples folders
|
96
|
+
- ^[Ss]amples/
|