github-linguist 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,91 @@
1
+ require 'mime/types'
2
+ require 'yaml'
3
+
4
+ class MIME::Type
5
+ attr_accessor :override
6
+ end
7
+
8
+ # Register additional mime type extensions
9
+ #
10
+ # Follows same format as mime-types data file
11
+ # https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
12
+ File.read(File.expand_path("../mimes.yml", __FILE__)).lines.each do |line|
13
+ # Regexp was cargo culted from mime-types lib
14
+ next unless line =~ %r{^
15
+ #{MIME::Type::MEDIA_TYPE_RE}
16
+ (?:\s@([^\s]+))?
17
+ (?:\s:(#{MIME::Type::ENCODING_RE}))?
18
+ }x
19
+
20
+ mediatype = $1
21
+ subtype = $2
22
+ extensions = $3
23
+ encoding = $4
24
+
25
+ # Lookup existing mime type
26
+ mime_type = MIME::Types["#{mediatype}/#{subtype}"].first ||
27
+ # Or create a new instance
28
+ MIME::Type.new("#{mediatype}/#{subtype}")
29
+
30
+ if extensions
31
+ extensions.split(/,/).each do |extension|
32
+ mime_type.extensions << extension
33
+ end
34
+ end
35
+
36
+ if encoding
37
+ mime_type.encoding = encoding
38
+ end
39
+
40
+ mime_type.override = true
41
+
42
+ # Kind of hacky, but we need to reindex the mime type after making changes
43
+ MIME::Types.add_type_variant(mime_type)
44
+ MIME::Types.index_extensions(mime_type)
45
+ end
46
+
47
+ module Linguist
48
+ module Mime
49
+ # Internal: Look up mime type for extension.
50
+ #
51
+ # ext - The extension String. May include leading "."
52
+ #
53
+ # Examples
54
+ #
55
+ # Mime.mime_for('.html')
56
+ # # => 'text/html'
57
+ #
58
+ # Mime.mime_for('txt')
59
+ # # => 'text/plain'
60
+ #
61
+ # Return mime type String otherwise falls back to 'text/plain'.
62
+ def self.mime_for(ext)
63
+ mime_type = lookup_mime_type_for(ext)
64
+ mime_type ? mime_type.to_s : 'text/plain'
65
+ end
66
+
67
+ # Internal: Lookup mime type for extension or mime type
68
+ #
69
+ # ext_or_mime_type - A file extension ".txt" or mime type "text/plain".
70
+ #
71
+ # Returns a MIME::Type
72
+ def self.lookup_mime_type_for(ext_or_mime_type)
73
+ ext_or_mime_type ||= ''
74
+
75
+ if ext_or_mime_type =~ /\w+\/\w+/
76
+ guesses = ::MIME::Types[ext_or_mime_type]
77
+ else
78
+ guesses = ::MIME::Types.type_for(ext_or_mime_type)
79
+ end
80
+
81
+ # Use custom override first
82
+ guesses.detect { |type| type.override } ||
83
+
84
+ # Prefer text mime types over binary
85
+ guesses.detect { |type| type.ascii? } ||
86
+
87
+ # Otherwise use the first guess
88
+ guesses.first
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,62 @@
1
+ # Additional types to add to MIME::Types
2
+ #
3
+ # MIME types are used to set the Content-Type of raw binary blobs. All text
4
+ # blobs are served as text/plain regardless of their type to ensure they
5
+ # open in the browser rather than downloading.
6
+ #
7
+ # The encoding helps determine whether a file should be treated as plain
8
+ # text or binary. By default, a mime type's encoding is base64 (binary).
9
+ # These types will show a "View Raw" link. To force a type to render as
10
+ # plain text, set it to 8bit for UTF-8. text/* types will be treated as
11
+ # text by default.
12
+ #
13
+ # <type> @<extensions> :<encoding>
14
+ #
15
+ # type - mediatype/subtype
16
+ # extensions - comma seperated extension list
17
+ # encoding - base64 (binary), 7bit (ASCII), 8bit (UTF-8), or
18
+ # quoted-printable (Printable ASCII).
19
+ #
20
+ # Follows same format as mime-types data file
21
+ # https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
22
+ #
23
+ # Any additions or modifications (even trivial) should have corresponding
24
+ # test change in `test/test_mime.rb`.
25
+
26
+ # TODO: Lookup actual types
27
+ application/octet-stream @a,blend,gem,graffle,ipa,lib,mcz,nib,o,ogv,otf,pfx,pigx,plgx,psd,sib,spl,sqlite3,swc,ucode,xpi
28
+
29
+ # Please keep this list alphabetized
30
+ application/java-archive @ear,war
31
+ application/netcdf :8bit
32
+ application/ogg @ogg
33
+ application/postscript :base64
34
+ application/vnd.adobe.air-application-installer-package+zip @air
35
+ application/vnd.mozilla.xul+xml :8bit
36
+ application/vnd.oasis.opendocument.presentation @odp
37
+ application/vnd.oasis.opendocument.spreadsheet @ods
38
+ application/vnd.oasis.opendocument.text @odt
39
+ application/vnd.openofficeorg.extension @oxt
40
+ application/vnd.openxmlformats-officedocument.presentationml.presentation @pptx
41
+ application/x-chrome-extension @crx
42
+ application/x-iwork-keynote-sffkey @key
43
+ application/x-iwork-numbers-sffnumbers @numbers
44
+ application/x-iwork-pages-sffpages @pages
45
+ application/x-ms-xbap @xbap :8bit
46
+ application/x-parrot-bytecode @pbc
47
+ application/x-shockwave-flash @swf
48
+ application/x-silverlight-app @xap
49
+ application/x-supercollider @sc :8bit
50
+ application/x-troff-ms :8bit
51
+ application/x-wais-source :8bit
52
+ application/xaml+xml @xaml :8bit
53
+ application/xslt+xml @xslt :8bit
54
+ image/x-icns @icns
55
+ text/cache-manifest @manifest
56
+ text/plain @cu,cxx
57
+ text/x-logtalk @lgt
58
+ text/x-nemerle @n
59
+ text/x-nimrod @nim
60
+ text/x-ocaml @ml,mli,mll,mly,sig,sml
61
+ text/x-rust @rs,rc
62
+ text/x-scheme @rkt,scm,sls,sps,ss
@@ -0,0 +1,92 @@
1
+ require 'linguist/language'
2
+ require 'linguist/mime'
3
+ require 'pygments'
4
+
5
+ module Linguist
6
+ # Similar to ::Pathname, Linguist::Pathname wraps a path string and
7
+ # provides helpful query methods. Its useful when you only have a
8
+ # filename but not a blob and need to figure out the language of the file.
9
+ class Pathname
10
+ # Public: Initialize a Pathname
11
+ #
12
+ # path - A filename String. The file may or maybe actually exist.
13
+ #
14
+ # Returns a Pathname.
15
+ def initialize(path)
16
+ @path = path
17
+ end
18
+
19
+ # Public: Get the basename of the path
20
+ #
21
+ # Examples
22
+ #
23
+ # Pathname.new('sub/dir/file.rb').basename
24
+ # # => 'file.rb'
25
+ #
26
+ # Returns a String.
27
+ def basename
28
+ File.basename(@path)
29
+ end
30
+
31
+ # Public: Get the extname of the path
32
+ #
33
+ # Examples
34
+ #
35
+ # Pathname.new('.rb').extname
36
+ # # => '.rb'
37
+ #
38
+ # Pathname.new('file.rb').extname
39
+ # # => '.rb'
40
+ #
41
+ # Returns a String.
42
+ def extname
43
+ File.extname(@path)
44
+ end
45
+
46
+ # Public: Get the language of the path
47
+ #
48
+ # The path extension name is the only heuristic used to detect the
49
+ # language name.
50
+ #
51
+ # Examples
52
+ #
53
+ # Pathname.new('file.rb').language
54
+ # # => Language['Ruby']
55
+ #
56
+ # Returns a Language or nil if none was found.
57
+ def language
58
+ @language ||= Language.find_by_filename(@path)
59
+ end
60
+
61
+ # Internal: Get the lexer of the path
62
+ #
63
+ # Returns a Lexer.
64
+ def lexer
65
+ language ? language.lexer : Pygments::Lexer.find_by_name('Text only')
66
+ end
67
+
68
+ # Public: Get the mime type
69
+ #
70
+ # Examples
71
+ #
72
+ # Pathname.new('index.html').mime_type
73
+ # # => 'text/html'
74
+ #
75
+ # Returns a mime type String.
76
+ def mime_type
77
+ @mime_type ||= Mime.mime_for(extname)
78
+ end
79
+
80
+ # Public: Return self as String
81
+ #
82
+ # Returns a String
83
+ def to_s
84
+ @path.dup
85
+ end
86
+
87
+ def eql?(other)
88
+ other.is_a?(self.class) && @path == other.to_s
89
+ end
90
+ alias_method :==, :eql?
91
+ end
92
+ end
@@ -0,0 +1,29 @@
1
+ # Popular languages appear at the top of language dropdowns
2
+ #
3
+ # This file should only be edited by GitHub staff
4
+
5
+ - ActionScript
6
+ - Bash
7
+ - C
8
+ - C#
9
+ - C++
10
+ - CSS
11
+ - Common Lisp
12
+ - Diff
13
+ - Emacs Lisp
14
+ - Erlang
15
+ - HTML
16
+ - Haskell
17
+ - Java
18
+ - JavaScript
19
+ - Lua
20
+ - Objective-C
21
+ - PHP
22
+ - Perl
23
+ - Python
24
+ - Ruby
25
+ - SQL
26
+ - Scala
27
+ - Scheme
28
+ - TeX
29
+ - XML
@@ -0,0 +1,95 @@
1
+ require 'linguist/file_blob'
2
+
3
+ module Linguist
4
+ # A Repository is an abstraction of a Grit::Repo or a basic file
5
+ # system tree. It holds a list of paths pointing to Blobish objects.
6
+ #
7
+ # Its primary purpose is for gathering language statistics across
8
+ # the entire project.
9
+ class Repository
10
+ # Public: Initialize a new Repository from a File directory
11
+ #
12
+ # base_path - A path String
13
+ #
14
+ # Returns a Repository
15
+ def self.from_directory(base_path)
16
+ new Dir["#{base_path}/**/*"].
17
+ select { |f| File.file?(f) }.
18
+ map { |path| FileBlob.new(path, base_path) }
19
+ end
20
+
21
+ # Public: Initialize a new Repository
22
+ #
23
+ # enum - Enumerator that responds to `each` and
24
+ # yields Blob objects
25
+ #
26
+ # Returns a Repository
27
+ def initialize(enum)
28
+ @enum = enum
29
+ @computed_stats = false
30
+ @language = @size = nil
31
+ @sizes = Hash.new { 0 }
32
+ end
33
+
34
+ # Public: Returns a breakdown of language stats.
35
+ #
36
+ # Examples
37
+ #
38
+ # # => { Language['Ruby'] => 46319,
39
+ # Language['JavaScript'] => 258 }
40
+ #
41
+ # Returns a Hash of Language keys and Integer size values.
42
+ def languages
43
+ compute_stats
44
+ @sizes
45
+ end
46
+
47
+ # Public: Get primary Language of repository.
48
+ #
49
+ # Returns a Language
50
+ def language
51
+ compute_stats
52
+ @language
53
+ end
54
+
55
+ # Public: Get the total size of the repository.
56
+ #
57
+ # Returns a byte size Integer
58
+ def size
59
+ compute_stats
60
+ @size
61
+ end
62
+
63
+ # Internal: Compute language breakdown for each blob in the Repository.
64
+ #
65
+ # Returns nothing
66
+ def compute_stats
67
+ return if @computed_stats
68
+
69
+ @enum.each do |blob|
70
+ # Skip binary file extensions
71
+ next if blob.binary_mime_type?
72
+
73
+ # Skip vendored or generated blobs
74
+ next if blob.vendored? || blob.generated? || blob.language.nil?
75
+
76
+ # Only include programming languages
77
+ if blob.language.type == :programming
78
+ @sizes[blob.language.group] += blob.size
79
+ end
80
+ end
81
+
82
+ # Compute total size
83
+ @size = @sizes.inject(0) { |s,(k,v)| s + v }
84
+
85
+ # Get primary language
86
+ if primary = @sizes.max_by { |(_, size)| size }
87
+ @language = primary[0]
88
+ end
89
+
90
+ @computed_stats = true
91
+
92
+ nil
93
+ end
94
+ end
95
+ end
@@ -0,0 +1,96 @@
1
+ # Vendored files and directories are excluded from language
2
+ # statistics.
3
+ #
4
+ # Lines in this file are Regexps that are matched against the file
5
+ # pathname.
6
+ #
7
+ # Please add additional test coverage to
8
+ # `test/test_blob.rb#test_vendored` if you make any changes.
9
+
10
+ ## Vendor Conventions ##
11
+
12
+ # Caches
13
+ - cache/
14
+
15
+ # C deps
16
+ # https://github.com/joyent/node
17
+ - ^deps/
18
+ - ^tools/
19
+
20
+ # Node depedencies
21
+ - node_modules/
22
+
23
+ # Vendored depedencies
24
+ - vendor/
25
+
26
+
27
+ ## Commonly Bundled JavaScript frameworks ##
28
+
29
+ # jQuery
30
+ - (^|/)jquery([^.]*)(\.min)?\.js$
31
+ - (^|/)jquery\-\d\.\d(\.\d)?(\.min)?\.js$
32
+
33
+ # Prototype
34
+ - (^|/)prototype(.*)\.js$
35
+ - (^|/)effects\.js$
36
+ - (^|/)controls\.js$
37
+ - (^|/)dragdrop\.js$
38
+
39
+ # MooTools
40
+ - (^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$
41
+
42
+ # Dojo
43
+ - (^|/)dojo\.js$
44
+
45
+ # MochiKit
46
+ - (^|/)MochiKit\.js$
47
+
48
+ # YUI
49
+ - (^|/)yahoo-([^.]*)\.js$
50
+ - (^|/)yui([^.]*)\.js$
51
+
52
+ # LESS css
53
+ - (^|/)less([^.]*)(\.min)?\.js$
54
+ - (^|/)less\-\d+\.\d+\.\d+(\.min)?\.js$
55
+
56
+ # WYS editors
57
+ - (^|/)ckeditor\.js$
58
+ - (^|/)tiny_mce([^.]*)\.js$
59
+ - (^|/)tiny_mce/(langs|plugins|themes|utils)
60
+
61
+ # MathJax
62
+ - (^|/)MathJax/
63
+
64
+ ## Python ##
65
+
66
+ # Fabric
67
+ - ^fabfile\.py$
68
+
69
+ # WAF
70
+ - ^waf$
71
+
72
+
73
+ ## Obj-C ##
74
+
75
+ # Sparkle
76
+ - (^|/)Sparkle/
77
+
78
+ ## .NET ##
79
+
80
+ # Visual Studio IntelliSense
81
+ - -vsdoc\.js$
82
+
83
+ # jQuery validation plugin (MS bundles this with asp.net mvc)
84
+ - (^|/)jquery([^.]*)\.validate(\.min)?\.js$
85
+
86
+ # Microsoft Ajax
87
+ - (^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$
88
+
89
+ # NuGet
90
+ - ^[Pp]ackages/
91
+
92
+ # ExtJS
93
+ - (^|/)extjs/
94
+
95
+ # Samples folders
96
+ - ^[Ss]amples/