github-linguist 2.2.1 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/linguist +0 -1
- data/lib/linguist.rb +0 -1
- data/lib/linguist/blob_helper.rb +33 -15
- data/lib/linguist/samples.json +5678 -4086
- data/lib/linguist/tokenizer.rb +8 -5
- metadata +4 -6
- data/lib/linguist/mime.rb +0 -91
- data/lib/linguist/mimes.yml +0 -62
data/lib/linguist/tokenizer.rb
CHANGED
@@ -16,12 +16,15 @@ module Linguist
|
|
16
16
|
new.extract_tokens(data)
|
17
17
|
end
|
18
18
|
|
19
|
+
# Start state on token, ignore anything till the next newline
|
19
20
|
SINGLE_LINE_COMMENTS = [
|
20
21
|
'//', # C
|
21
22
|
'#', # Ruby
|
22
23
|
'%', # Tex
|
23
24
|
]
|
24
25
|
|
26
|
+
# Start state on opening token, ignore anything until the closing
|
27
|
+
# token is reached.
|
25
28
|
MULTI_LINE_COMMENTS = [
|
26
29
|
['/*', '*/'], # C
|
27
30
|
['<!--', '-->'], # XML
|
@@ -30,7 +33,7 @@ module Linguist
|
|
30
33
|
]
|
31
34
|
|
32
35
|
START_SINGLE_LINE_COMMENT = Regexp.compile(SINGLE_LINE_COMMENTS.map { |c|
|
33
|
-
"
|
36
|
+
"\s*#{Regexp.escape(c)} "
|
34
37
|
}.join("|"))
|
35
38
|
|
36
39
|
START_MULTI_LINE_COMMENT = Regexp.compile(MULTI_LINE_COMMENTS.map { |c|
|
@@ -58,16 +61,16 @@ module Linguist
|
|
58
61
|
end
|
59
62
|
|
60
63
|
# Single line comment
|
61
|
-
elsif token = s.scan(START_SINGLE_LINE_COMMENT)
|
62
|
-
tokens << token.strip
|
64
|
+
elsif s.beginning_of_line? && token = s.scan(START_SINGLE_LINE_COMMENT)
|
65
|
+
# tokens << token.strip
|
63
66
|
s.skip_until(/\n|\Z/)
|
64
67
|
|
65
68
|
# Multiline comments
|
66
69
|
elsif token = s.scan(START_MULTI_LINE_COMMENT)
|
67
|
-
tokens << token
|
70
|
+
# tokens << token
|
68
71
|
close_token = MULTI_LINE_COMMENTS.assoc(token)[1]
|
69
72
|
s.skip_until(Regexp.compile(Regexp.escape(close_token)))
|
70
|
-
tokens << close_token
|
73
|
+
# tokens << close_token
|
71
74
|
|
72
75
|
# Skip single or double quoted strings
|
73
76
|
elsif s.scan(/"/)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: github-linguist
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-08-
|
12
|
+
date: 2012-08-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: charlock_holmes
|
@@ -50,7 +50,7 @@ dependencies:
|
|
50
50
|
requirements:
|
51
51
|
- - ~>
|
52
52
|
- !ruby/object:Gem::Version
|
53
|
-
version: '1.
|
53
|
+
version: '1.19'
|
54
54
|
type: :runtime
|
55
55
|
prerelease: false
|
56
56
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -58,7 +58,7 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - ~>
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '1.
|
61
|
+
version: '1.19'
|
62
62
|
- !ruby/object:Gem::Dependency
|
63
63
|
name: pygments.rb
|
64
64
|
requirement: !ruby/object:Gem::Requirement
|
@@ -137,8 +137,6 @@ files:
|
|
137
137
|
- lib/linguist/language.rb
|
138
138
|
- lib/linguist/languages.yml
|
139
139
|
- lib/linguist/md5.rb
|
140
|
-
- lib/linguist/mime.rb
|
141
|
-
- lib/linguist/mimes.yml
|
142
140
|
- lib/linguist/popular.yml
|
143
141
|
- lib/linguist/repository.rb
|
144
142
|
- lib/linguist/samples.json
|
data/lib/linguist/mime.rb
DELETED
@@ -1,91 +0,0 @@
|
|
1
|
-
require 'mime/types'
|
2
|
-
require 'yaml'
|
3
|
-
|
4
|
-
class MIME::Type
|
5
|
-
attr_accessor :override
|
6
|
-
end
|
7
|
-
|
8
|
-
# Register additional mime type extensions
|
9
|
-
#
|
10
|
-
# Follows same format as mime-types data file
|
11
|
-
# https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
|
12
|
-
File.read(File.expand_path("../mimes.yml", __FILE__)).lines.each do |line|
|
13
|
-
# Regexp was cargo culted from mime-types lib
|
14
|
-
next unless line =~ %r{^
|
15
|
-
#{MIME::Type::MEDIA_TYPE_RE}
|
16
|
-
(?:\s@([^\s]+))?
|
17
|
-
(?:\s:(#{MIME::Type::ENCODING_RE}))?
|
18
|
-
}x
|
19
|
-
|
20
|
-
mediatype = $1
|
21
|
-
subtype = $2
|
22
|
-
extensions = $3
|
23
|
-
encoding = $4
|
24
|
-
|
25
|
-
# Lookup existing mime type
|
26
|
-
mime_type = MIME::Types["#{mediatype}/#{subtype}"].first ||
|
27
|
-
# Or create a new instance
|
28
|
-
MIME::Type.new("#{mediatype}/#{subtype}")
|
29
|
-
|
30
|
-
if extensions
|
31
|
-
extensions.split(/,/).each do |extension|
|
32
|
-
mime_type.extensions << extension
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
if encoding
|
37
|
-
mime_type.encoding = encoding
|
38
|
-
end
|
39
|
-
|
40
|
-
mime_type.override = true
|
41
|
-
|
42
|
-
# Kind of hacky, but we need to reindex the mime type after making changes
|
43
|
-
MIME::Types.add_type_variant(mime_type)
|
44
|
-
MIME::Types.index_extensions(mime_type)
|
45
|
-
end
|
46
|
-
|
47
|
-
module Linguist
|
48
|
-
module Mime
|
49
|
-
# Internal: Look up mime type for extension.
|
50
|
-
#
|
51
|
-
# ext - The extension String. May include leading "."
|
52
|
-
#
|
53
|
-
# Examples
|
54
|
-
#
|
55
|
-
# Mime.mime_for('.html')
|
56
|
-
# # => 'text/html'
|
57
|
-
#
|
58
|
-
# Mime.mime_for('txt')
|
59
|
-
# # => 'text/plain'
|
60
|
-
#
|
61
|
-
# Return mime type String otherwise falls back to 'text/plain'.
|
62
|
-
def self.mime_for(ext)
|
63
|
-
mime_type = lookup_mime_type_for(ext)
|
64
|
-
mime_type ? mime_type.to_s : 'text/plain'
|
65
|
-
end
|
66
|
-
|
67
|
-
# Internal: Lookup mime type for extension or mime type
|
68
|
-
#
|
69
|
-
# ext_or_mime_type - A file extension ".txt" or mime type "text/plain".
|
70
|
-
#
|
71
|
-
# Returns a MIME::Type
|
72
|
-
def self.lookup_mime_type_for(ext_or_mime_type)
|
73
|
-
ext_or_mime_type ||= ''
|
74
|
-
|
75
|
-
if ext_or_mime_type =~ /\w+\/\w+/
|
76
|
-
guesses = ::MIME::Types[ext_or_mime_type]
|
77
|
-
else
|
78
|
-
guesses = ::MIME::Types.type_for(ext_or_mime_type)
|
79
|
-
end
|
80
|
-
|
81
|
-
# Use custom override first
|
82
|
-
guesses.detect { |type| type.override } ||
|
83
|
-
|
84
|
-
# Prefer text mime types over binary
|
85
|
-
guesses.detect { |type| type.ascii? } ||
|
86
|
-
|
87
|
-
# Otherwise use the first guess
|
88
|
-
guesses.first
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
data/lib/linguist/mimes.yml
DELETED
@@ -1,62 +0,0 @@
|
|
1
|
-
# Additional types to add to MIME::Types
|
2
|
-
#
|
3
|
-
# MIME types are used to set the Content-Type of raw binary blobs. All text
|
4
|
-
# blobs are served as text/plain regardless of their type to ensure they
|
5
|
-
# open in the browser rather than downloading.
|
6
|
-
#
|
7
|
-
# The encoding helps determine whether a file should be treated as plain
|
8
|
-
# text or binary. By default, a mime type's encoding is base64 (binary).
|
9
|
-
# These types will show a "View Raw" link. To force a type to render as
|
10
|
-
# plain text, set it to 8bit for UTF-8. text/* types will be treated as
|
11
|
-
# text by default.
|
12
|
-
#
|
13
|
-
# <type> @<extensions> :<encoding>
|
14
|
-
#
|
15
|
-
# type - mediatype/subtype
|
16
|
-
# extensions - comma seperated extension list
|
17
|
-
# encoding - base64 (binary), 7bit (ASCII), 8bit (UTF-8), or
|
18
|
-
# quoted-printable (Printable ASCII).
|
19
|
-
#
|
20
|
-
# Follows same format as mime-types data file
|
21
|
-
# https://github.com/halostatue/mime-types/blob/master/lib/mime/types.rb.data
|
22
|
-
#
|
23
|
-
# Any additions or modifications (even trivial) should have corresponding
|
24
|
-
# test change in `test/test_mime.rb`.
|
25
|
-
|
26
|
-
# TODO: Lookup actual types
|
27
|
-
application/octet-stream @a,blend,gem,graffle,ipa,lib,mcz,nib,o,ogv,otf,pfx,pigx,plgx,psd,sib,spl,sqlite3,swc,ucode,xpi
|
28
|
-
|
29
|
-
# Please keep this list alphabetized
|
30
|
-
application/java-archive @ear,war
|
31
|
-
application/netcdf :8bit
|
32
|
-
application/ogg @ogg
|
33
|
-
application/postscript :base64
|
34
|
-
application/vnd.adobe.air-application-installer-package+zip @air
|
35
|
-
application/vnd.mozilla.xul+xml :8bit
|
36
|
-
application/vnd.oasis.opendocument.presentation @odp
|
37
|
-
application/vnd.oasis.opendocument.spreadsheet @ods
|
38
|
-
application/vnd.oasis.opendocument.text @odt
|
39
|
-
application/vnd.openofficeorg.extension @oxt
|
40
|
-
application/vnd.openxmlformats-officedocument.presentationml.presentation @pptx
|
41
|
-
application/x-chrome-extension @crx
|
42
|
-
application/x-iwork-keynote-sffkey @key
|
43
|
-
application/x-iwork-numbers-sffnumbers @numbers
|
44
|
-
application/x-iwork-pages-sffpages @pages
|
45
|
-
application/x-ms-xbap @xbap :8bit
|
46
|
-
application/x-parrot-bytecode @pbc
|
47
|
-
application/x-shockwave-flash @swf
|
48
|
-
application/x-silverlight-app @xap
|
49
|
-
application/x-supercollider @sc :8bit
|
50
|
-
application/x-troff-ms :8bit
|
51
|
-
application/x-wais-source :8bit
|
52
|
-
application/xaml+xml @xaml :8bit
|
53
|
-
application/xslt+xml @xslt :8bit
|
54
|
-
image/x-icns @icns
|
55
|
-
text/cache-manifest @manifest
|
56
|
-
text/plain @cu,cxx
|
57
|
-
text/x-logtalk @lgt
|
58
|
-
text/x-nemerle @n
|
59
|
-
text/x-nimrod @nim
|
60
|
-
text/x-ocaml @ml,mli,mll,mly,sig,sml
|
61
|
-
text/x-rust @rs,rc
|
62
|
-
text/x-scheme @rkt,scm,sls,sps,ss
|