github-linguist 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/linguist +43 -0
- data/lib/linguist.rb +5 -0
- data/lib/linguist/blob_helper.rb +713 -0
- data/lib/linguist/file_blob.rb +56 -0
- data/lib/linguist/language.rb +474 -0
- data/lib/linguist/languages.yml +1379 -0
- data/lib/linguist/mime.rb +91 -0
- data/lib/linguist/mimes.yml +62 -0
- data/lib/linguist/pathname.rb +92 -0
- data/lib/linguist/popular.yml +29 -0
- data/lib/linguist/repository.rb +95 -0
- data/lib/linguist/vendor.yml +96 -0
- metadata +152 -0
@@ -0,0 +1,91 @@
|
|
1
|
+
require 'mime/types'
|
2
|
+
require 'yaml'
|
3
|
+
|
4
|
+
class MIME::Type
|
5
|
+
attr_accessor :override
|
6
|
+
end
|
7
|
+
|
8
|
+
# Register additional mime type extensions
|
9
|
+
#
|
10
|
+
# Follows same format as mime-types data file
|
11
|
+
# https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
|
12
|
+
File.read(File.expand_path("../mimes.yml", __FILE__)).lines.each do |line|
|
13
|
+
# Regexp was cargo culted from mime-types lib
|
14
|
+
next unless line =~ %r{^
|
15
|
+
#{MIME::Type::MEDIA_TYPE_RE}
|
16
|
+
(?:\s@([^\s]+))?
|
17
|
+
(?:\s:(#{MIME::Type::ENCODING_RE}))?
|
18
|
+
}x
|
19
|
+
|
20
|
+
mediatype = $1
|
21
|
+
subtype = $2
|
22
|
+
extensions = $3
|
23
|
+
encoding = $4
|
24
|
+
|
25
|
+
# Lookup existing mime type
|
26
|
+
mime_type = MIME::Types["#{mediatype}/#{subtype}"].first ||
|
27
|
+
# Or create a new instance
|
28
|
+
MIME::Type.new("#{mediatype}/#{subtype}")
|
29
|
+
|
30
|
+
if extensions
|
31
|
+
extensions.split(/,/).each do |extension|
|
32
|
+
mime_type.extensions << extension
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
if encoding
|
37
|
+
mime_type.encoding = encoding
|
38
|
+
end
|
39
|
+
|
40
|
+
mime_type.override = true
|
41
|
+
|
42
|
+
# Kind of hacky, but we need to reindex the mime type after making changes
|
43
|
+
MIME::Types.add_type_variant(mime_type)
|
44
|
+
MIME::Types.index_extensions(mime_type)
|
45
|
+
end
|
46
|
+
|
47
|
+
module Linguist
|
48
|
+
module Mime
|
49
|
+
# Internal: Look up mime type for extension.
|
50
|
+
#
|
51
|
+
# ext - The extension String. May include leading "."
|
52
|
+
#
|
53
|
+
# Examples
|
54
|
+
#
|
55
|
+
# Mime.mime_for('.html')
|
56
|
+
# # => 'text/html'
|
57
|
+
#
|
58
|
+
# Mime.mime_for('txt')
|
59
|
+
# # => 'text/plain'
|
60
|
+
#
|
61
|
+
# Return mime type String otherwise falls back to 'text/plain'.
|
62
|
+
def self.mime_for(ext)
|
63
|
+
mime_type = lookup_mime_type_for(ext)
|
64
|
+
mime_type ? mime_type.to_s : 'text/plain'
|
65
|
+
end
|
66
|
+
|
67
|
+
# Internal: Lookup mime type for extension or mime type
|
68
|
+
#
|
69
|
+
# ext_or_mime_type - A file extension ".txt" or mime type "text/plain".
|
70
|
+
#
|
71
|
+
# Returns a MIME::Type
|
72
|
+
def self.lookup_mime_type_for(ext_or_mime_type)
|
73
|
+
ext_or_mime_type ||= ''
|
74
|
+
|
75
|
+
if ext_or_mime_type =~ /\w+\/\w+/
|
76
|
+
guesses = ::MIME::Types[ext_or_mime_type]
|
77
|
+
else
|
78
|
+
guesses = ::MIME::Types.type_for(ext_or_mime_type)
|
79
|
+
end
|
80
|
+
|
81
|
+
# Use custom override first
|
82
|
+
guesses.detect { |type| type.override } ||
|
83
|
+
|
84
|
+
# Prefer text mime types over binary
|
85
|
+
guesses.detect { |type| type.ascii? } ||
|
86
|
+
|
87
|
+
# Otherwise use the first guess
|
88
|
+
guesses.first
|
89
|
+
end
|
90
|
+
end
|
91
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# Additional types to add to MIME::Types
|
2
|
+
#
|
3
|
+
# MIME types are used to set the Content-Type of raw binary blobs. All text
|
4
|
+
# blobs are served as text/plain regardless of their type to ensure they
|
5
|
+
# open in the browser rather than downloading.
|
6
|
+
#
|
7
|
+
# The encoding helps determine whether a file should be treated as plain
|
8
|
+
# text or binary. By default, a mime type's encoding is base64 (binary).
|
9
|
+
# These types will show a "View Raw" link. To force a type to render as
|
10
|
+
# plain text, set it to 8bit for UTF-8. text/* types will be treated as
|
11
|
+
# text by default.
|
12
|
+
#
|
13
|
+
# <type> @<extensions> :<encoding>
|
14
|
+
#
|
15
|
+
# type - mediatype/subtype
|
16
|
+
# extensions - comma seperated extension list
|
17
|
+
# encoding - base64 (binary), 7bit (ASCII), 8bit (UTF-8), or
|
18
|
+
# quoted-printable (Printable ASCII).
|
19
|
+
#
|
20
|
+
# Follows same format as mime-types data file
|
21
|
+
# https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
|
22
|
+
#
|
23
|
+
# Any additions or modifications (even trivial) should have corresponding
|
24
|
+
# test change in `test/test_mime.rb`.
|
25
|
+
|
26
|
+
# TODO: Lookup actual types
|
27
|
+
application/octet-stream @a,blend,gem,graffle,ipa,lib,mcz,nib,o,ogv,otf,pfx,pigx,plgx,psd,sib,spl,sqlite3,swc,ucode,xpi
|
28
|
+
|
29
|
+
# Please keep this list alphabetized
|
30
|
+
application/java-archive @ear,war
|
31
|
+
application/netcdf :8bit
|
32
|
+
application/ogg @ogg
|
33
|
+
application/postscript :base64
|
34
|
+
application/vnd.adobe.air-application-installer-package+zip @air
|
35
|
+
application/vnd.mozilla.xul+xml :8bit
|
36
|
+
application/vnd.oasis.opendocument.presentation @odp
|
37
|
+
application/vnd.oasis.opendocument.spreadsheet @ods
|
38
|
+
application/vnd.oasis.opendocument.text @odt
|
39
|
+
application/vnd.openofficeorg.extension @oxt
|
40
|
+
application/vnd.openxmlformats-officedocument.presentationml.presentation @pptx
|
41
|
+
application/x-chrome-extension @crx
|
42
|
+
application/x-iwork-keynote-sffkey @key
|
43
|
+
application/x-iwork-numbers-sffnumbers @numbers
|
44
|
+
application/x-iwork-pages-sffpages @pages
|
45
|
+
application/x-ms-xbap @xbap :8bit
|
46
|
+
application/x-parrot-bytecode @pbc
|
47
|
+
application/x-shockwave-flash @swf
|
48
|
+
application/x-silverlight-app @xap
|
49
|
+
application/x-supercollider @sc :8bit
|
50
|
+
application/x-troff-ms :8bit
|
51
|
+
application/x-wais-source :8bit
|
52
|
+
application/xaml+xml @xaml :8bit
|
53
|
+
application/xslt+xml @xslt :8bit
|
54
|
+
image/x-icns @icns
|
55
|
+
text/cache-manifest @manifest
|
56
|
+
text/plain @cu,cxx
|
57
|
+
text/x-logtalk @lgt
|
58
|
+
text/x-nemerle @n
|
59
|
+
text/x-nimrod @nim
|
60
|
+
text/x-ocaml @ml,mli,mll,mly,sig,sml
|
61
|
+
text/x-rust @rs,rc
|
62
|
+
text/x-scheme @rkt,scm,sls,sps,ss
|
@@ -0,0 +1,92 @@
|
|
1
|
+
require 'linguist/language'
|
2
|
+
require 'linguist/mime'
|
3
|
+
require 'pygments'
|
4
|
+
|
5
|
+
module Linguist
|
6
|
+
# Similar to ::Pathname, Linguist::Pathname wraps a path string and
|
7
|
+
# provides helpful query methods. Its useful when you only have a
|
8
|
+
# filename but not a blob and need to figure out the language of the file.
|
9
|
+
class Pathname
|
10
|
+
# Public: Initialize a Pathname
|
11
|
+
#
|
12
|
+
# path - A filename String. The file may or maybe actually exist.
|
13
|
+
#
|
14
|
+
# Returns a Pathname.
|
15
|
+
def initialize(path)
|
16
|
+
@path = path
|
17
|
+
end
|
18
|
+
|
19
|
+
# Public: Get the basename of the path
|
20
|
+
#
|
21
|
+
# Examples
|
22
|
+
#
|
23
|
+
# Pathname.new('sub/dir/file.rb').basename
|
24
|
+
# # => 'file.rb'
|
25
|
+
#
|
26
|
+
# Returns a String.
|
27
|
+
def basename
|
28
|
+
File.basename(@path)
|
29
|
+
end
|
30
|
+
|
31
|
+
# Public: Get the extname of the path
|
32
|
+
#
|
33
|
+
# Examples
|
34
|
+
#
|
35
|
+
# Pathname.new('.rb').extname
|
36
|
+
# # => '.rb'
|
37
|
+
#
|
38
|
+
# Pathname.new('file.rb').extname
|
39
|
+
# # => '.rb'
|
40
|
+
#
|
41
|
+
# Returns a String.
|
42
|
+
def extname
|
43
|
+
File.extname(@path)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Public: Get the language of the path
|
47
|
+
#
|
48
|
+
# The path extension name is the only heuristic used to detect the
|
49
|
+
# language name.
|
50
|
+
#
|
51
|
+
# Examples
|
52
|
+
#
|
53
|
+
# Pathname.new('file.rb').language
|
54
|
+
# # => Language['Ruby']
|
55
|
+
#
|
56
|
+
# Returns a Language or nil if none was found.
|
57
|
+
def language
|
58
|
+
@language ||= Language.find_by_filename(@path)
|
59
|
+
end
|
60
|
+
|
61
|
+
# Internal: Get the lexer of the path
|
62
|
+
#
|
63
|
+
# Returns a Lexer.
|
64
|
+
def lexer
|
65
|
+
language ? language.lexer : Pygments::Lexer.find_by_name('Text only')
|
66
|
+
end
|
67
|
+
|
68
|
+
# Public: Get the mime type
|
69
|
+
#
|
70
|
+
# Examples
|
71
|
+
#
|
72
|
+
# Pathname.new('index.html').mime_type
|
73
|
+
# # => 'text/html'
|
74
|
+
#
|
75
|
+
# Returns a mime type String.
|
76
|
+
def mime_type
|
77
|
+
@mime_type ||= Mime.mime_for(extname)
|
78
|
+
end
|
79
|
+
|
80
|
+
# Public: Return self as String
|
81
|
+
#
|
82
|
+
# Returns a String
|
83
|
+
def to_s
|
84
|
+
@path.dup
|
85
|
+
end
|
86
|
+
|
87
|
+
def eql?(other)
|
88
|
+
other.is_a?(self.class) && @path == other.to_s
|
89
|
+
end
|
90
|
+
alias_method :==, :eql?
|
91
|
+
end
|
92
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# Popular languages appear at the top of language dropdowns
|
2
|
+
#
|
3
|
+
# This file should only be edited by GitHub staff
|
4
|
+
|
5
|
+
- ActionScript
|
6
|
+
- Bash
|
7
|
+
- C
|
8
|
+
- C#
|
9
|
+
- C++
|
10
|
+
- CSS
|
11
|
+
- Common Lisp
|
12
|
+
- Diff
|
13
|
+
- Emacs Lisp
|
14
|
+
- Erlang
|
15
|
+
- HTML
|
16
|
+
- Haskell
|
17
|
+
- Java
|
18
|
+
- JavaScript
|
19
|
+
- Lua
|
20
|
+
- Objective-C
|
21
|
+
- PHP
|
22
|
+
- Perl
|
23
|
+
- Python
|
24
|
+
- Ruby
|
25
|
+
- SQL
|
26
|
+
- Scala
|
27
|
+
- Scheme
|
28
|
+
- TeX
|
29
|
+
- XML
|
@@ -0,0 +1,95 @@
|
|
1
|
+
require 'linguist/file_blob'
|
2
|
+
|
3
|
+
module Linguist
|
4
|
+
# A Repository is an abstraction of a Grit::Repo or a basic file
|
5
|
+
# system tree. It holds a list of paths pointing to Blobish objects.
|
6
|
+
#
|
7
|
+
# Its primary purpose is for gathering language statistics across
|
8
|
+
# the entire project.
|
9
|
+
class Repository
|
10
|
+
# Public: Initialize a new Repository from a File directory
|
11
|
+
#
|
12
|
+
# base_path - A path String
|
13
|
+
#
|
14
|
+
# Returns a Repository
|
15
|
+
def self.from_directory(base_path)
|
16
|
+
new Dir["#{base_path}/**/*"].
|
17
|
+
select { |f| File.file?(f) }.
|
18
|
+
map { |path| FileBlob.new(path, base_path) }
|
19
|
+
end
|
20
|
+
|
21
|
+
# Public: Initialize a new Repository
|
22
|
+
#
|
23
|
+
# enum - Enumerator that responds to `each` and
|
24
|
+
# yields Blob objects
|
25
|
+
#
|
26
|
+
# Returns a Repository
|
27
|
+
def initialize(enum)
|
28
|
+
@enum = enum
|
29
|
+
@computed_stats = false
|
30
|
+
@language = @size = nil
|
31
|
+
@sizes = Hash.new { 0 }
|
32
|
+
end
|
33
|
+
|
34
|
+
# Public: Returns a breakdown of language stats.
|
35
|
+
#
|
36
|
+
# Examples
|
37
|
+
#
|
38
|
+
# # => { Language['Ruby'] => 46319,
|
39
|
+
# Language['JavaScript'] => 258 }
|
40
|
+
#
|
41
|
+
# Returns a Hash of Language keys and Integer size values.
|
42
|
+
def languages
|
43
|
+
compute_stats
|
44
|
+
@sizes
|
45
|
+
end
|
46
|
+
|
47
|
+
# Public: Get primary Language of repository.
|
48
|
+
#
|
49
|
+
# Returns a Language
|
50
|
+
def language
|
51
|
+
compute_stats
|
52
|
+
@language
|
53
|
+
end
|
54
|
+
|
55
|
+
# Public: Get the total size of the repository.
|
56
|
+
#
|
57
|
+
# Returns a byte size Integer
|
58
|
+
def size
|
59
|
+
compute_stats
|
60
|
+
@size
|
61
|
+
end
|
62
|
+
|
63
|
+
# Internal: Compute language breakdown for each blob in the Repository.
|
64
|
+
#
|
65
|
+
# Returns nothing
|
66
|
+
def compute_stats
|
67
|
+
return if @computed_stats
|
68
|
+
|
69
|
+
@enum.each do |blob|
|
70
|
+
# Skip binary file extensions
|
71
|
+
next if blob.binary_mime_type?
|
72
|
+
|
73
|
+
# Skip vendored or generated blobs
|
74
|
+
next if blob.vendored? || blob.generated? || blob.language.nil?
|
75
|
+
|
76
|
+
# Only include programming languages
|
77
|
+
if blob.language.type == :programming
|
78
|
+
@sizes[blob.language.group] += blob.size
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
# Compute total size
|
83
|
+
@size = @sizes.inject(0) { |s,(k,v)| s + v }
|
84
|
+
|
85
|
+
# Get primary language
|
86
|
+
if primary = @sizes.max_by { |(_, size)| size }
|
87
|
+
@language = primary[0]
|
88
|
+
end
|
89
|
+
|
90
|
+
@computed_stats = true
|
91
|
+
|
92
|
+
nil
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
# Vendored files and directories are excluded from language
|
2
|
+
# statistics.
|
3
|
+
#
|
4
|
+
# Lines in this file are Regexps that are matched against the file
|
5
|
+
# pathname.
|
6
|
+
#
|
7
|
+
# Please add additional test coverage to
|
8
|
+
# `test/test_blob.rb#test_vendored` if you make any changes.
|
9
|
+
|
10
|
+
## Vendor Conventions ##
|
11
|
+
|
12
|
+
# Caches
|
13
|
+
- cache/
|
14
|
+
|
15
|
+
# C deps
|
16
|
+
# https://github.com/joyent/node
|
17
|
+
- ^deps/
|
18
|
+
- ^tools/
|
19
|
+
|
20
|
+
# Node depedencies
|
21
|
+
- node_modules/
|
22
|
+
|
23
|
+
# Vendored depedencies
|
24
|
+
- vendor/
|
25
|
+
|
26
|
+
|
27
|
+
## Commonly Bundled JavaScript frameworks ##
|
28
|
+
|
29
|
+
# jQuery
|
30
|
+
- (^|/)jquery([^.]*)(\.min)?\.js$
|
31
|
+
- (^|/)jquery\-\d\.\d(\.\d)?(\.min)?\.js$
|
32
|
+
|
33
|
+
# Prototype
|
34
|
+
- (^|/)prototype(.*)\.js$
|
35
|
+
- (^|/)effects\.js$
|
36
|
+
- (^|/)controls\.js$
|
37
|
+
- (^|/)dragdrop\.js$
|
38
|
+
|
39
|
+
# MooTools
|
40
|
+
- (^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$
|
41
|
+
|
42
|
+
# Dojo
|
43
|
+
- (^|/)dojo\.js$
|
44
|
+
|
45
|
+
# MochiKit
|
46
|
+
- (^|/)MochiKit\.js$
|
47
|
+
|
48
|
+
# YUI
|
49
|
+
- (^|/)yahoo-([^.]*)\.js$
|
50
|
+
- (^|/)yui([^.]*)\.js$
|
51
|
+
|
52
|
+
# LESS css
|
53
|
+
- (^|/)less([^.]*)(\.min)?\.js$
|
54
|
+
- (^|/)less\-\d+\.\d+\.\d+(\.min)?\.js$
|
55
|
+
|
56
|
+
# WYS editors
|
57
|
+
- (^|/)ckeditor\.js$
|
58
|
+
- (^|/)tiny_mce([^.]*)\.js$
|
59
|
+
- (^|/)tiny_mce/(langs|plugins|themes|utils)
|
60
|
+
|
61
|
+
# MathJax
|
62
|
+
- (^|/)MathJax/
|
63
|
+
|
64
|
+
## Python ##
|
65
|
+
|
66
|
+
# Fabric
|
67
|
+
- ^fabfile\.py$
|
68
|
+
|
69
|
+
# WAF
|
70
|
+
- ^waf$
|
71
|
+
|
72
|
+
|
73
|
+
## Obj-C ##
|
74
|
+
|
75
|
+
# Sparkle
|
76
|
+
- (^|/)Sparkle/
|
77
|
+
|
78
|
+
## .NET ##
|
79
|
+
|
80
|
+
# Visual Studio IntelliSense
|
81
|
+
- -vsdoc\.js$
|
82
|
+
|
83
|
+
# jQuery validation plugin (MS bundles this with asp.net mvc)
|
84
|
+
- (^|/)jquery([^.]*)\.validate(\.min)?\.js$
|
85
|
+
|
86
|
+
# Microsoft Ajax
|
87
|
+
- (^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$
|
88
|
+
|
89
|
+
# NuGet
|
90
|
+
- ^[Pp]ackages/
|
91
|
+
|
92
|
+
# ExtJS
|
93
|
+
- (^|/)extjs/
|
94
|
+
|
95
|
+
# Samples folders
|
96
|
+
- ^[Ss]amples/
|