github-linguist 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,91 @@
1
+ require 'mime/types'
2
+ require 'yaml'
3
+
4
+ class MIME::Type
5
+ attr_accessor :override
6
+ end
7
+
8
+ # Register additional mime type extensions
9
+ #
10
+ # Follows same format as mime-types data file
11
+ # https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
12
+ File.read(File.expand_path("../mimes.yml", __FILE__)).lines.each do |line|
13
+ # Regexp was cargo culted from mime-types lib
14
+ next unless line =~ %r{^
15
+ #{MIME::Type::MEDIA_TYPE_RE}
16
+ (?:\s@([^\s]+))?
17
+ (?:\s:(#{MIME::Type::ENCODING_RE}))?
18
+ }x
19
+
20
+ mediatype = $1
21
+ subtype = $2
22
+ extensions = $3
23
+ encoding = $4
24
+
25
+ # Lookup existing mime type
26
+ mime_type = MIME::Types["#{mediatype}/#{subtype}"].first ||
27
+ # Or create a new instance
28
+ MIME::Type.new("#{mediatype}/#{subtype}")
29
+
30
+ if extensions
31
+ extensions.split(/,/).each do |extension|
32
+ mime_type.extensions << extension
33
+ end
34
+ end
35
+
36
+ if encoding
37
+ mime_type.encoding = encoding
38
+ end
39
+
40
+ mime_type.override = true
41
+
42
+ # Kind of hacky, but we need to reindex the mime type after making changes
43
+ MIME::Types.add_type_variant(mime_type)
44
+ MIME::Types.index_extensions(mime_type)
45
+ end
46
+
47
+ module Linguist
48
+ module Mime
49
+ # Internal: Look up mime type for extension.
50
+ #
51
+ # ext - The extension String. May include leading "."
52
+ #
53
+ # Examples
54
+ #
55
+ # Mime.mime_for('.html')
56
+ # # => 'text/html'
57
+ #
58
+ # Mime.mime_for('txt')
59
+ # # => 'text/plain'
60
+ #
61
+ # Return mime type String otherwise falls back to 'text/plain'.
62
+ def self.mime_for(ext)
63
+ mime_type = lookup_mime_type_for(ext)
64
+ mime_type ? mime_type.to_s : 'text/plain'
65
+ end
66
+
67
+ # Internal: Lookup mime type for extension or mime type
68
+ #
69
+ # ext_or_mime_type - A file extension ".txt" or mime type "text/plain".
70
+ #
71
+ # Returns a MIME::Type
72
+ def self.lookup_mime_type_for(ext_or_mime_type)
73
+ ext_or_mime_type ||= ''
74
+
75
+ if ext_or_mime_type =~ /\w+\/\w+/
76
+ guesses = ::MIME::Types[ext_or_mime_type]
77
+ else
78
+ guesses = ::MIME::Types.type_for(ext_or_mime_type)
79
+ end
80
+
81
+ # Use custom override first
82
+ guesses.detect { |type| type.override } ||
83
+
84
+ # Prefer text mime types over binary
85
+ guesses.detect { |type| type.ascii? } ||
86
+
87
+ # Otherwise use the first guess
88
+ guesses.first
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,62 @@
1
+ # Additional types to add to MIME::Types
2
+ #
3
+ # MIME types are used to set the Content-Type of raw binary blobs. All text
4
+ # blobs are served as text/plain regardless of their type to ensure they
5
+ # open in the browser rather than downloading.
6
+ #
7
+ # The encoding helps determine whether a file should be treated as plain
8
+ # text or binary. By default, a mime type's encoding is base64 (binary).
9
+ # These types will show a "View Raw" link. To force a type to render as
10
+ # plain text, set it to 8bit for UTF-8. text/* types will be treated as
11
+ # text by default.
12
+ #
13
+ # <type> @<extensions> :<encoding>
14
+ #
15
+ # type - mediatype/subtype
16
+ # extensions - comma seperated extension list
17
+ # encoding - base64 (binary), 7bit (ASCII), 8bit (UTF-8), or
18
+ # quoted-printable (Printable ASCII).
19
+ #
20
+ # Follows same format as mime-types data file
21
+ # https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
22
+ #
23
+ # Any additions or modifications (even trivial) should have corresponding
24
+ # test change in `test/test_mime.rb`.
25
+
26
+ # TODO: Lookup actual types
27
+ application/octet-stream @a,blend,gem,graffle,ipa,lib,mcz,nib,o,ogv,otf,pfx,pigx,plgx,psd,sib,spl,sqlite3,swc,ucode,xpi
28
+
29
+ # Please keep this list alphabetized
30
+ application/java-archive @ear,war
31
+ application/netcdf :8bit
32
+ application/ogg @ogg
33
+ application/postscript :base64
34
+ application/vnd.adobe.air-application-installer-package+zip @air
35
+ application/vnd.mozilla.xul+xml :8bit
36
+ application/vnd.oasis.opendocument.presentation @odp
37
+ application/vnd.oasis.opendocument.spreadsheet @ods
38
+ application/vnd.oasis.opendocument.text @odt
39
+ application/vnd.openofficeorg.extension @oxt
40
+ application/vnd.openxmlformats-officedocument.presentationml.presentation @pptx
41
+ application/x-chrome-extension @crx
42
+ application/x-iwork-keynote-sffkey @key
43
+ application/x-iwork-numbers-sffnumbers @numbers
44
+ application/x-iwork-pages-sffpages @pages
45
+ application/x-ms-xbap @xbap :8bit
46
+ application/x-parrot-bytecode @pbc
47
+ application/x-shockwave-flash @swf
48
+ application/x-silverlight-app @xap
49
+ application/x-supercollider @sc :8bit
50
+ application/x-troff-ms :8bit
51
+ application/x-wais-source :8bit
52
+ application/xaml+xml @xaml :8bit
53
+ application/xslt+xml @xslt :8bit
54
+ image/x-icns @icns
55
+ text/cache-manifest @manifest
56
+ text/plain @cu,cxx
57
+ text/x-logtalk @lgt
58
+ text/x-nemerle @n
59
+ text/x-nimrod @nim
60
+ text/x-ocaml @ml,mli,mll,mly,sig,sml
61
+ text/x-rust @rs,rc
62
+ text/x-scheme @rkt,scm,sls,sps,ss
@@ -0,0 +1,92 @@
1
+ require 'linguist/language'
2
+ require 'linguist/mime'
3
+ require 'pygments'
4
+
5
+ module Linguist
6
+ # Similar to ::Pathname, Linguist::Pathname wraps a path string and
7
+ # provides helpful query methods. Its useful when you only have a
8
+ # filename but not a blob and need to figure out the language of the file.
9
+ class Pathname
10
+ # Public: Initialize a Pathname
11
+ #
12
+ # path - A filename String. The file may or maybe actually exist.
13
+ #
14
+ # Returns a Pathname.
15
+ def initialize(path)
16
+ @path = path
17
+ end
18
+
19
+ # Public: Get the basename of the path
20
+ #
21
+ # Examples
22
+ #
23
+ # Pathname.new('sub/dir/file.rb').basename
24
+ # # => 'file.rb'
25
+ #
26
+ # Returns a String.
27
+ def basename
28
+ File.basename(@path)
29
+ end
30
+
31
+ # Public: Get the extname of the path
32
+ #
33
+ # Examples
34
+ #
35
+ # Pathname.new('.rb').extname
36
+ # # => '.rb'
37
+ #
38
+ # Pathname.new('file.rb').extname
39
+ # # => '.rb'
40
+ #
41
+ # Returns a String.
42
+ def extname
43
+ File.extname(@path)
44
+ end
45
+
46
+ # Public: Get the language of the path
47
+ #
48
+ # The path extension name is the only heuristic used to detect the
49
+ # language name.
50
+ #
51
+ # Examples
52
+ #
53
+ # Pathname.new('file.rb').language
54
+ # # => Language['Ruby']
55
+ #
56
+ # Returns a Language or nil if none was found.
57
+ def language
58
+ @language ||= Language.find_by_filename(@path)
59
+ end
60
+
61
+ # Internal: Get the lexer of the path
62
+ #
63
+ # Returns a Lexer.
64
+ def lexer
65
+ language ? language.lexer : Pygments::Lexer.find_by_name('Text only')
66
+ end
67
+
68
+ # Public: Get the mime type
69
+ #
70
+ # Examples
71
+ #
72
+ # Pathname.new('index.html').mime_type
73
+ # # => 'text/html'
74
+ #
75
+ # Returns a mime type String.
76
+ def mime_type
77
+ @mime_type ||= Mime.mime_for(extname)
78
+ end
79
+
80
+ # Public: Return self as String
81
+ #
82
+ # Returns a String
83
+ def to_s
84
+ @path.dup
85
+ end
86
+
87
+ def eql?(other)
88
+ other.is_a?(self.class) && @path == other.to_s
89
+ end
90
+ alias_method :==, :eql?
91
+ end
92
+ end
@@ -0,0 +1,29 @@
1
+ # Popular languages appear at the top of language dropdowns
2
+ #
3
+ # This file should only be edited by GitHub staff
4
+
5
+ - ActionScript
6
+ - Bash
7
+ - C
8
+ - C#
9
+ - C++
10
+ - CSS
11
+ - Common Lisp
12
+ - Diff
13
+ - Emacs Lisp
14
+ - Erlang
15
+ - HTML
16
+ - Haskell
17
+ - Java
18
+ - JavaScript
19
+ - Lua
20
+ - Objective-C
21
+ - PHP
22
+ - Perl
23
+ - Python
24
+ - Ruby
25
+ - SQL
26
+ - Scala
27
+ - Scheme
28
+ - TeX
29
+ - XML
@@ -0,0 +1,95 @@
1
+ require 'linguist/file_blob'
2
+
3
+ module Linguist
4
+ # A Repository is an abstraction of a Grit::Repo or a basic file
5
+ # system tree. It holds a list of paths pointing to Blobish objects.
6
+ #
7
+ # Its primary purpose is for gathering language statistics across
8
+ # the entire project.
9
+ class Repository
10
+ # Public: Initialize a new Repository from a File directory
11
+ #
12
+ # base_path - A path String
13
+ #
14
+ # Returns a Repository
15
+ def self.from_directory(base_path)
16
+ new Dir["#{base_path}/**/*"].
17
+ select { |f| File.file?(f) }.
18
+ map { |path| FileBlob.new(path, base_path) }
19
+ end
20
+
21
+ # Public: Initialize a new Repository
22
+ #
23
+ # enum - Enumerator that responds to `each` and
24
+ # yields Blob objects
25
+ #
26
+ # Returns a Repository
27
+ def initialize(enum)
28
+ @enum = enum
29
+ @computed_stats = false
30
+ @language = @size = nil
31
+ @sizes = Hash.new { 0 }
32
+ end
33
+
34
+ # Public: Returns a breakdown of language stats.
35
+ #
36
+ # Examples
37
+ #
38
+ # # => { Language['Ruby'] => 46319,
39
+ # Language['JavaScript'] => 258 }
40
+ #
41
+ # Returns a Hash of Language keys and Integer size values.
42
+ def languages
43
+ compute_stats
44
+ @sizes
45
+ end
46
+
47
+ # Public: Get primary Language of repository.
48
+ #
49
+ # Returns a Language
50
+ def language
51
+ compute_stats
52
+ @language
53
+ end
54
+
55
+ # Public: Get the total size of the repository.
56
+ #
57
+ # Returns a byte size Integer
58
+ def size
59
+ compute_stats
60
+ @size
61
+ end
62
+
63
+ # Internal: Compute language breakdown for each blob in the Repository.
64
+ #
65
+ # Returns nothing
66
+ def compute_stats
67
+ return if @computed_stats
68
+
69
+ @enum.each do |blob|
70
+ # Skip binary file extensions
71
+ next if blob.binary_mime_type?
72
+
73
+ # Skip vendored or generated blobs
74
+ next if blob.vendored? || blob.generated? || blob.language.nil?
75
+
76
+ # Only include programming languages
77
+ if blob.language.type == :programming
78
+ @sizes[blob.language.group] += blob.size
79
+ end
80
+ end
81
+
82
+ # Compute total size
83
+ @size = @sizes.inject(0) { |s,(k,v)| s + v }
84
+
85
+ # Get primary language
86
+ if primary = @sizes.max_by { |(_, size)| size }
87
+ @language = primary[0]
88
+ end
89
+
90
+ @computed_stats = true
91
+
92
+ nil
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,96 @@
1
+ # Vendored files and directories are excluded from language
2
+ # statistics.
3
+ #
4
+ # Lines in this file are Regexps that are matched against the file
5
+ # pathname.
6
+ #
7
+ # Please add additional test coverage to
8
+ # `test/test_blob.rb#test_vendored` if you make any changes.
9
+
10
+ ## Vendor Conventions ##
11
+
12
+ # Caches
13
+ - cache/
14
+
15
+ # C deps
16
+ # https://github.com/joyent/node
17
+ - ^deps/
18
+ - ^tools/
19
+
20
+ # Node depedencies
21
+ - node_modules/
22
+
23
+ # Vendored depedencies
24
+ - vendor/
25
+
26
+
27
+ ## Commonly Bundled JavaScript frameworks ##
28
+
29
+ # jQuery
30
+ - (^|/)jquery([^.]*)(\.min)?\.js$
31
+ - (^|/)jquery\-\d\.\d(\.\d)?(\.min)?\.js$
32
+
33
+ # Prototype
34
+ - (^|/)prototype(.*)\.js$
35
+ - (^|/)effects\.js$
36
+ - (^|/)controls\.js$
37
+ - (^|/)dragdrop\.js$
38
+
39
+ # MooTools
40
+ - (^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$
41
+
42
+ # Dojo
43
+ - (^|/)dojo\.js$
44
+
45
+ # MochiKit
46
+ - (^|/)MochiKit\.js$
47
+
48
+ # YUI
49
+ - (^|/)yahoo-([^.]*)\.js$
50
+ - (^|/)yui([^.]*)\.js$
51
+
52
+ # LESS css
53
+ - (^|/)less([^.]*)(\.min)?\.js$
54
+ - (^|/)less\-\d+\.\d+\.\d+(\.min)?\.js$
55
+
56
+ # WYS editors
57
+ - (^|/)ckeditor\.js$
58
+ - (^|/)tiny_mce([^.]*)\.js$
59
+ - (^|/)tiny_mce/(langs|plugins|themes|utils)
60
+
61
+ # MathJax
62
+ - (^|/)MathJax/
63
+
64
+ ## Python ##
65
+
66
+ # Fabric
67
+ - ^fabfile\.py$
68
+
69
+ # WAF
70
+ - ^waf$
71
+
72
+
73
+ ## Obj-C ##
74
+
75
+ # Sparkle
76
+ - (^|/)Sparkle/
77
+
78
+ ## .NET ##
79
+
80
+ # Visual Studio IntelliSense
81
+ - -vsdoc\.js$
82
+
83
+ # jQuery validation plugin (MS bundles this with asp.net mvc)
84
+ - (^|/)jquery([^.]*)\.validate(\.min)?\.js$
85
+
86
+ # Microsoft Ajax
87
+ - (^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$
88
+
89
+ # NuGet
90
+ - ^[Pp]ackages/
91
+
92
+ # ExtJS
93
+ - (^|/)extjs/
94
+
95
+ # Samples folders
96
+ - ^[Ss]amples/