github-linguist 2.2.1 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/linguist +0 -1
- data/lib/linguist.rb +0 -1
- data/lib/linguist/blob_helper.rb +33 -15
- data/lib/linguist/samples.json +5678 -4086
- data/lib/linguist/tokenizer.rb +8 -5
- metadata +4 -6
- data/lib/linguist/mime.rb +0 -91
- data/lib/linguist/mimes.yml +0 -62
data/lib/linguist/tokenizer.rb
CHANGED
@@ -16,12 +16,15 @@ module Linguist
|
|
16
16
|
new.extract_tokens(data)
|
17
17
|
end
|
18
18
|
|
19
|
+
# Start state on token, ignore anything till the next newline
|
19
20
|
SINGLE_LINE_COMMENTS = [
|
20
21
|
'//', # C
|
21
22
|
'#', # Ruby
|
22
23
|
'%', # Tex
|
23
24
|
]
|
24
25
|
|
26
|
+
# Start state on opening token, ignore anything until the closing
|
27
|
+
# token is reached.
|
25
28
|
MULTI_LINE_COMMENTS = [
|
26
29
|
['/*', '*/'], # C
|
27
30
|
['<!--', '-->'], # XML
|
@@ -30,7 +33,7 @@ module Linguist
|
|
30
33
|
]
|
31
34
|
|
32
35
|
START_SINGLE_LINE_COMMENT = Regexp.compile(SINGLE_LINE_COMMENTS.map { |c|
|
33
|
-
"
|
36
|
+
"\s*#{Regexp.escape(c)} "
|
34
37
|
}.join("|"))
|
35
38
|
|
36
39
|
START_MULTI_LINE_COMMENT = Regexp.compile(MULTI_LINE_COMMENTS.map { |c|
|
@@ -58,16 +61,16 @@ module Linguist
|
|
58
61
|
end
|
59
62
|
|
60
63
|
# Single line comment
|
61
|
-
elsif token = s.scan(START_SINGLE_LINE_COMMENT)
|
62
|
-
tokens << token.strip
|
64
|
+
elsif s.beginning_of_line? && token = s.scan(START_SINGLE_LINE_COMMENT)
|
65
|
+
# tokens << token.strip
|
63
66
|
s.skip_until(/\n|\Z/)
|
64
67
|
|
65
68
|
# Multiline comments
|
66
69
|
elsif token = s.scan(START_MULTI_LINE_COMMENT)
|
67
|
-
tokens << token
|
70
|
+
# tokens << token
|
68
71
|
close_token = MULTI_LINE_COMMENTS.assoc(token)[1]
|
69
72
|
s.skip_until(Regexp.compile(Regexp.escape(close_token)))
|
70
|
-
tokens << close_token
|
73
|
+
# tokens << close_token
|
71
74
|
|
72
75
|
# Skip single or double quoted strings
|
73
76
|
elsif s.scan(/"/)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: github-linguist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-08-
|
12
|
+
date: 2012-08-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: charlock_holmes
|
@@ -50,7 +50,7 @@ dependencies:
|
|
50
50
|
requirements:
|
51
51
|
- - ~>
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: '1.
|
53
|
+
version: '1.19'
|
54
54
|
type: :runtime
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -58,7 +58,7 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '1.
|
61
|
+
version: '1.19'
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: pygments.rb
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -137,8 +137,6 @@ files:
|
|
137
137
|
- lib/linguist/language.rb
|
138
138
|
- lib/linguist/languages.yml
|
139
139
|
- lib/linguist/md5.rb
|
140
|
-
- lib/linguist/mime.rb
|
141
|
-
- lib/linguist/mimes.yml
|
142
140
|
- lib/linguist/popular.yml
|
143
141
|
- lib/linguist/repository.rb
|
144
142
|
- lib/linguist/samples.json
|
data/lib/linguist/mime.rb
DELETED
@@ -1,91 +0,0 @@
|
|
1
|
-
require 'mime/types'
|
2
|
-
require 'yaml'
|
3
|
-
|
4
|
-
class MIME::Type
|
5
|
-
attr_accessor :override
|
6
|
-
end
|
7
|
-
|
8
|
-
# Register additional mime type extensions
|
9
|
-
#
|
10
|
-
# Follows same format as mime-types data file
|
11
|
-
# https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
|
12
|
-
File.read(File.expand_path("../mimes.yml", __FILE__)).lines.each do |line|
|
13
|
-
# Regexp was cargo culted from mime-types lib
|
14
|
-
next unless line =~ %r{^
|
15
|
-
#{MIME::Type::MEDIA_TYPE_RE}
|
16
|
-
(?:\s@([^\s]+))?
|
17
|
-
(?:\s:(#{MIME::Type::ENCODING_RE}))?
|
18
|
-
}x
|
19
|
-
|
20
|
-
mediatype = $1
|
21
|
-
subtype = $2
|
22
|
-
extensions = $3
|
23
|
-
encoding = $4
|
24
|
-
|
25
|
-
# Lookup existing mime type
|
26
|
-
mime_type = MIME::Types["#{mediatype}/#{subtype}"].first ||
|
27
|
-
# Or create a new instance
|
28
|
-
MIME::Type.new("#{mediatype}/#{subtype}")
|
29
|
-
|
30
|
-
if extensions
|
31
|
-
extensions.split(/,/).each do |extension|
|
32
|
-
mime_type.extensions << extension
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
if encoding
|
37
|
-
mime_type.encoding = encoding
|
38
|
-
end
|
39
|
-
|
40
|
-
mime_type.override = true
|
41
|
-
|
42
|
-
# Kind of hacky, but we need to reindex the mime type after making changes
|
43
|
-
MIME::Types.add_type_variant(mime_type)
|
44
|
-
MIME::Types.index_extensions(mime_type)
|
45
|
-
end
|
46
|
-
|
47
|
-
module Linguist
|
48
|
-
module Mime
|
49
|
-
# Internal: Look up mime type for extension.
|
50
|
-
#
|
51
|
-
# ext - The extension String. May include leading "."
|
52
|
-
#
|
53
|
-
# Examples
|
54
|
-
#
|
55
|
-
# Mime.mime_for('.html')
|
56
|
-
# # => 'text/html'
|
57
|
-
#
|
58
|
-
# Mime.mime_for('txt')
|
59
|
-
# # => 'text/plain'
|
60
|
-
#
|
61
|
-
# Return mime type String otherwise falls back to 'text/plain'.
|
62
|
-
def self.mime_for(ext)
|
63
|
-
mime_type = lookup_mime_type_for(ext)
|
64
|
-
mime_type ? mime_type.to_s : 'text/plain'
|
65
|
-
end
|
66
|
-
|
67
|
-
# Internal: Lookup mime type for extension or mime type
|
68
|
-
#
|
69
|
-
# ext_or_mime_type - A file extension ".txt" or mime type "text/plain".
|
70
|
-
#
|
71
|
-
# Returns a MIME::Type
|
72
|
-
def self.lookup_mime_type_for(ext_or_mime_type)
|
73
|
-
ext_or_mime_type ||= ''
|
74
|
-
|
75
|
-
if ext_or_mime_type =~ /\w+\/\w+/
|
76
|
-
guesses = ::MIME::Types[ext_or_mime_type]
|
77
|
-
else
|
78
|
-
guesses = ::MIME::Types.type_for(ext_or_mime_type)
|
79
|
-
end
|
80
|
-
|
81
|
-
# Use custom override first
|
82
|
-
guesses.detect { |type| type.override } ||
|
83
|
-
|
84
|
-
# Prefer text mime types over binary
|
85
|
-
guesses.detect { |type| type.ascii? } ||
|
86
|
-
|
87
|
-
# Otherwise use the first guess
|
88
|
-
guesses.first
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
data/lib/linguist/mimes.yml
DELETED
@@ -1,62 +0,0 @@
|
|
1
|
-
# Additional types to add to MIME::Types
|
2
|
-
#
|
3
|
-
# MIME types are used to set the Content-Type of raw binary blobs. All text
|
4
|
-
# blobs are served as text/plain regardless of their type to ensure they
|
5
|
-
# open in the browser rather than downloading.
|
6
|
-
#
|
7
|
-
# The encoding helps determine whether a file should be treated as plain
|
8
|
-
# text or binary. By default, a mime type's encoding is base64 (binary).
|
9
|
-
# These types will show a "View Raw" link. To force a type to render as
|
10
|
-
# plain text, set it to 8bit for UTF-8. text/* types will be treated as
|
11
|
-
# text by default.
|
12
|
-
#
|
13
|
-
# <type> @<extensions> :<encoding>
|
14
|
-
#
|
15
|
-
# type - mediatype/subtype
|
16
|
-
# extensions - comma seperated extension list
|
17
|
-
# encoding - base64 (binary), 7bit (ASCII), 8bit (UTF-8), or
|
18
|
-
# quoted-printable (Printable ASCII).
|
19
|
-
#
|
20
|
-
# Follows same format as mime-types data file
|
21
|
-
# https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
|
22
|
-
#
|
23
|
-
# Any additions or modifications (even trivial) should have corresponding
|
24
|
-
# test change in `test/test_mime.rb`.
|
25
|
-
|
26
|
-
# TODO: Lookup actual types
|
27
|
-
application/octet-stream @a,blend,gem,graffle,ipa,lib,mcz,nib,o,ogv,otf,pfx,pigx,plgx,psd,sib,spl,sqlite3,swc,ucode,xpi
|
28
|
-
|
29
|
-
# Please keep this list alphabetized
|
30
|
-
application/java-archive @ear,war
|
31
|
-
application/netcdf :8bit
|
32
|
-
application/ogg @ogg
|
33
|
-
application/postscript :base64
|
34
|
-
application/vnd.adobe.air-application-installer-package+zip @air
|
35
|
-
application/vnd.mozilla.xul+xml :8bit
|
36
|
-
application/vnd.oasis.opendocument.presentation @odp
|
37
|
-
application/vnd.oasis.opendocument.spreadsheet @ods
|
38
|
-
application/vnd.oasis.opendocument.text @odt
|
39
|
-
application/vnd.openofficeorg.extension @oxt
|
40
|
-
application/vnd.openxmlformats-officedocument.presentationml.presentation @pptx
|
41
|
-
application/x-chrome-extension @crx
|
42
|
-
application/x-iwork-keynote-sffkey @key
|
43
|
-
application/x-iwork-numbers-sffnumbers @numbers
|
44
|
-
application/x-iwork-pages-sffpages @pages
|
45
|
-
application/x-ms-xbap @xbap :8bit
|
46
|
-
application/x-parrot-bytecode @pbc
|
47
|
-
application/x-shockwave-flash @swf
|
48
|
-
application/x-silverlight-app @xap
|
49
|
-
application/x-supercollider @sc :8bit
|
50
|
-
application/x-troff-ms :8bit
|
51
|
-
application/x-wais-source :8bit
|
52
|
-
application/xaml+xml @xaml :8bit
|
53
|
-
application/xslt+xml @xslt :8bit
|
54
|
-
image/x-icns @icns
|
55
|
-
text/cache-manifest @manifest
|
56
|
-
text/plain @cu,cxx
|
57
|
-
text/x-logtalk @lgt
|
58
|
-
text/x-nemerle @n
|
59
|
-
text/x-nimrod @nim
|
60
|
-
text/x-ocaml @ml,mli,mll,mly,sig,sml
|
61
|
-
text/x-rust @rs,rc
|
62
|
-
text/x-scheme @rkt,scm,sls,sps,ss
|