rouge 2.2.1 → 3.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +4 -4
- data/lib/rouge.rb +9 -7
- data/lib/rouge/cli.rb +36 -2
- data/lib/rouge/guessers/disambiguation.rb +88 -0
- data/lib/rouge/guessers/glob_mapping.rb +3 -6
- data/lib/rouge/guessers/modeline.rb +4 -3
- data/lib/rouge/guessers/source.rb +6 -16
- data/lib/rouge/guessers/util.rb +32 -0
- data/lib/rouge/lexer.rb +20 -11
- data/lib/rouge/lexers/apiblueprint.rb +0 -4
- data/lib/rouge/lexers/awk.rb +2 -2
- data/lib/rouge/lexers/biml.rb +2 -2
- data/lib/rouge/lexers/c.rb +0 -5
- data/lib/rouge/lexers/coffeescript.rb +2 -2
- data/lib/rouge/lexers/coq.rb +0 -4
- data/lib/rouge/lexers/diff.rb +4 -4
- data/lib/rouge/lexers/digdag.rb +0 -4
- data/lib/rouge/lexers/erb.rb +0 -4
- data/lib/rouge/lexers/erlang.rb +0 -4
- data/lib/rouge/lexers/factor.rb +2 -2
- data/lib/rouge/lexers/gherkin.rb +2 -2
- data/lib/rouge/lexers/go.rb +0 -4
- data/lib/rouge/lexers/groovy.rb +2 -2
- data/lib/rouge/lexers/haml.rb +0 -4
- data/lib/rouge/lexers/haskell.rb +4 -4
- data/lib/rouge/lexers/html.rb +3 -3
- data/lib/rouge/lexers/idlang.rb +0 -6
- data/lib/rouge/lexers/ini.rb +0 -4
- data/lib/rouge/lexers/io.rb +2 -2
- data/lib/rouge/lexers/javascript.rb +1 -1
- data/lib/rouge/lexers/julia.rb +2 -2
- data/lib/rouge/lexers/kotlin.rb +9 -14
- data/lib/rouge/lexers/lasso.rb +3 -6
- data/lib/rouge/lexers/llvm.rb +0 -4
- data/lib/rouge/lexers/lua.rb +2 -2
- data/lib/rouge/lexers/make.rb +0 -4
- data/lib/rouge/lexers/matlab.rb +0 -4
- data/lib/rouge/lexers/moonscript.rb +2 -2
- data/lib/rouge/lexers/mosel.rb +3 -3
- data/lib/rouge/lexers/nasm.rb +0 -5
- data/lib/rouge/lexers/objective_c.rb +0 -14
- data/lib/rouge/lexers/perl.rb +2 -3
- data/lib/rouge/lexers/php.rb +2 -4
- data/lib/rouge/lexers/plist.rb +0 -4
- data/lib/rouge/lexers/praat.rb +2 -2
- data/lib/rouge/lexers/prolog.rb +0 -5
- data/lib/rouge/lexers/properties.rb +0 -4
- data/lib/rouge/lexers/puppet.rb +3 -3
- data/lib/rouge/lexers/python.rb +2 -2
- data/lib/rouge/lexers/q.rb +0 -4
- data/lib/rouge/lexers/r.rb +2 -2
- data/lib/rouge/lexers/racket.rb +5 -4
- data/lib/rouge/lexers/ruby.rb +2 -2
- data/lib/rouge/lexers/rust.rb +2 -2
- data/lib/rouge/lexers/sed.rb +2 -2
- data/lib/rouge/lexers/shell.rb +2 -2
- data/lib/rouge/lexers/smarty.rb +0 -11
- data/lib/rouge/lexers/sml.rb +0 -4
- data/lib/rouge/lexers/tap.rb +0 -4
- data/lib/rouge/lexers/tcl.rb +4 -4
- data/lib/rouge/lexers/tex.rb +2 -2
- data/lib/rouge/lexers/toml.rb +0 -4
- data/lib/rouge/lexers/tulip.rb +2 -3
- data/lib/rouge/lexers/turtle.rb +5 -14
- data/lib/rouge/lexers/vue.rb +0 -4
- data/lib/rouge/lexers/wollok.rb +0 -4
- data/lib/rouge/lexers/xml.rb +4 -6
- data/lib/rouge/lexers/yaml.rb +2 -2
- data/lib/rouge/version.rb +1 -1
- data/rouge.gemspec +1 -0
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 105d563933ea57ff19cca03ab1fd98fc3b01ea3a
|
4
|
+
data.tar.gz: 72e6098b1419ff753ddb8bf748d6822d5fc5c1a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6346ab18ab19b7758e3f37eec75ddeac72a11edacd4416783dbfaccb33e3aadda9841373db11436a9fe4fbdd457a8bb2f4575f79b2c546e596b9d9cc829612e3
|
7
|
+
data.tar.gz: 9df1ed34e20dfbb2733ebef31d80792547913afdff931c556e73ff731875968233f12e1c62e91cfbaa84d4653b684aeec5e9a4778ff16fb3c5279585b974a846
|
data/Gemfile
CHANGED
@@ -3,12 +3,12 @@ source 'http://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
gem 'bundler', '~> 1.15'
|
6
|
-
gem 'rake'
|
6
|
+
gem 'rake'
|
7
7
|
|
8
|
-
gem 'minitest', '
|
9
|
-
gem '
|
8
|
+
gem 'minitest', '>= 5.0'
|
9
|
+
gem 'minitest-power_assert'
|
10
10
|
|
11
|
-
gem 'rubocop', '~> 0.49.1'
|
11
|
+
gem 'rubocop', '~> 0.49.1'
|
12
12
|
|
13
13
|
# don't try to install redcarpet under jruby
|
14
14
|
gem 'redcarpet', :platforms => :ruby
|
data/lib/rouge.rb
CHANGED
@@ -41,13 +41,6 @@ load load_dir.join('rouge/util.rb')
|
|
41
41
|
load load_dir.join('rouge/text_analyzer.rb')
|
42
42
|
load load_dir.join('rouge/token.rb')
|
43
43
|
|
44
|
-
load load_dir.join('rouge/guesser.rb')
|
45
|
-
load load_dir.join('rouge/guessers/glob_mapping.rb')
|
46
|
-
load load_dir.join('rouge/guessers/modeline.rb')
|
47
|
-
load load_dir.join('rouge/guessers/filename.rb')
|
48
|
-
load load_dir.join('rouge/guessers/mimetype.rb')
|
49
|
-
load load_dir.join('rouge/guessers/source.rb')
|
50
|
-
|
51
44
|
load load_dir.join('rouge/lexer.rb')
|
52
45
|
load load_dir.join('rouge/regex_lexer.rb')
|
53
46
|
load load_dir.join('rouge/template_lexer.rb')
|
@@ -57,6 +50,15 @@ Dir.glob(lexers_dir.join('*.rb')).each do |f|
|
|
57
50
|
Rouge::Lexers.load_lexer(Pathname.new(f).relative_path_from(lexers_dir).to_s)
|
58
51
|
end
|
59
52
|
|
53
|
+
load load_dir.join('rouge/guesser.rb')
|
54
|
+
load load_dir.join('rouge/guessers/util.rb')
|
55
|
+
load load_dir.join('rouge/guessers/glob_mapping.rb')
|
56
|
+
load load_dir.join('rouge/guessers/modeline.rb')
|
57
|
+
load load_dir.join('rouge/guessers/filename.rb')
|
58
|
+
load load_dir.join('rouge/guessers/mimetype.rb')
|
59
|
+
load load_dir.join('rouge/guessers/source.rb')
|
60
|
+
load load_dir.join('rouge/guessers/disambiguation.rb')
|
61
|
+
|
60
62
|
load load_dir.join('rouge/formatter.rb')
|
61
63
|
load load_dir.join('rouge/formatters/html.rb')
|
62
64
|
load load_dir.join('rouge/formatters/html_table.rb')
|
data/lib/rouge/cli.rb
CHANGED
@@ -13,9 +13,9 @@ module Rouge
|
|
13
13
|
def file
|
14
14
|
case input
|
15
15
|
when '-'
|
16
|
-
IO.new($stdin.fileno, '
|
16
|
+
IO.new($stdin.fileno, 'rt:bom|utf-8')
|
17
17
|
when String
|
18
|
-
File.new(input, '
|
18
|
+
File.new(input, 'rt:bom|utf-8')
|
19
19
|
when ->(i){ i.respond_to? :read }
|
20
20
|
input
|
21
21
|
end
|
@@ -44,6 +44,7 @@ module Rouge
|
|
44
44
|
yield %| help #{Help.desc}|
|
45
45
|
yield %| style #{Style.desc}|
|
46
46
|
yield %| list #{List.desc}|
|
47
|
+
yield %| guess #{Guess.desc}|
|
47
48
|
yield %| version #{Version.desc}|
|
48
49
|
yield %||
|
49
50
|
yield %|See `rougify help <command>` for more info.|
|
@@ -97,6 +98,8 @@ module Rouge
|
|
97
98
|
Style
|
98
99
|
when 'list'
|
99
100
|
List
|
101
|
+
when 'guess'
|
102
|
+
Guess
|
100
103
|
end
|
101
104
|
end
|
102
105
|
|
@@ -375,6 +378,37 @@ module Rouge
|
|
375
378
|
end
|
376
379
|
end
|
377
380
|
|
381
|
+
class Guess < CLI
|
382
|
+
def self.desc
|
383
|
+
"guess the languages of file"
|
384
|
+
end
|
385
|
+
|
386
|
+
def self.parse(args)
|
387
|
+
new(input_file: args.shift)
|
388
|
+
end
|
389
|
+
|
390
|
+
attr_reader :input_file, :input_source
|
391
|
+
|
392
|
+
def initialize(opts)
|
393
|
+
@input_file = opts[:input_file] || '-'
|
394
|
+
@input_source = FileReader.new(@input_file).read
|
395
|
+
end
|
396
|
+
|
397
|
+
def lexers
|
398
|
+
Lexer.guesses(
|
399
|
+
filename: input_file,
|
400
|
+
source: input_source,
|
401
|
+
)
|
402
|
+
end
|
403
|
+
|
404
|
+
def run
|
405
|
+
lexers.each do |l|
|
406
|
+
puts "{ tag: #{l.tag.inspect}, title: #{l.title.inspect}, desc: #{l.desc.inspect} }"
|
407
|
+
end
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
411
|
+
|
378
412
|
private_class_method
|
379
413
|
def self.normalize_syntax(argv)
|
380
414
|
out = []
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module Rouge
|
2
|
+
module Guessers
|
3
|
+
class Disambiguation < Guesser
|
4
|
+
include Util
|
5
|
+
include Lexers
|
6
|
+
|
7
|
+
def initialize(filename, source)
|
8
|
+
@filename = File.basename(filename)
|
9
|
+
@source = source
|
10
|
+
end
|
11
|
+
|
12
|
+
def filter(lexers)
|
13
|
+
return lexers if lexers.size == 1
|
14
|
+
return lexers if lexers.size == Lexer.all.size
|
15
|
+
|
16
|
+
@analyzer = TextAnalyzer.new(get_source(@source))
|
17
|
+
|
18
|
+
self.class.disambiguators.each do |disambiguator|
|
19
|
+
next unless disambiguator.match?(@filename)
|
20
|
+
|
21
|
+
filtered = disambiguator.decide!(self)
|
22
|
+
return filtered if filtered
|
23
|
+
end
|
24
|
+
|
25
|
+
return lexers
|
26
|
+
end
|
27
|
+
|
28
|
+
def contains?(text)
|
29
|
+
return @analyzer.include?(text)
|
30
|
+
end
|
31
|
+
|
32
|
+
def matches?(re)
|
33
|
+
return !!(@analyzer =~ re)
|
34
|
+
end
|
35
|
+
|
36
|
+
@disambiguators = []
|
37
|
+
def self.disambiguate(*patterns, &decider)
|
38
|
+
@disambiguators << Disambiguator.new(patterns, &decider)
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.disambiguators
|
42
|
+
@disambiguators
|
43
|
+
end
|
44
|
+
|
45
|
+
class Disambiguator
|
46
|
+
include Util
|
47
|
+
|
48
|
+
def initialize(patterns, &decider)
|
49
|
+
@patterns = patterns
|
50
|
+
@decider = decider
|
51
|
+
end
|
52
|
+
|
53
|
+
def decide!(guesser)
|
54
|
+
out = guesser.instance_eval(&@decider)
|
55
|
+
case out
|
56
|
+
when Array then out
|
57
|
+
when nil then nil
|
58
|
+
else [out]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def match?(filename)
|
63
|
+
@patterns.any? { |p| test_glob(p, filename) }
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
disambiguate '*.pl' do
|
68
|
+
next Perl if contains?('my $')
|
69
|
+
next Prolog if contains?(':-')
|
70
|
+
next Prolog if matches?(/\A\w+(\(\w+\,\s*\w+\))*\./)
|
71
|
+
end
|
72
|
+
|
73
|
+
disambiguate '*.h' do
|
74
|
+
next ObjectiveC if matches?(/@(end|implementation|protocol|property)\b/)
|
75
|
+
next ObjectiveC if contains?('@"')
|
76
|
+
|
77
|
+
C
|
78
|
+
end
|
79
|
+
|
80
|
+
disambiguate '*.m' do
|
81
|
+
next ObjectiveC if matches?(/@(end|implementation|protocol|property)\b/)
|
82
|
+
next ObjectiveC if contains?('@"')
|
83
|
+
|
84
|
+
next Matlab if matches?(/^\s*?%/)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -3,6 +3,8 @@ module Rouge
|
|
3
3
|
# This class allows for custom behavior
|
4
4
|
# with glob -> lexer name mappings
|
5
5
|
class GlobMapping < Guesser
|
6
|
+
include Util
|
7
|
+
|
6
8
|
def self.by_pairs(mapping, filename)
|
7
9
|
glob_map = {}
|
8
10
|
mapping.each do |(glob, lexer_name)|
|
@@ -29,18 +31,13 @@ module Rouge
|
|
29
31
|
|
30
32
|
collect_best(lexers) do |lexer|
|
31
33
|
score = (@glob_map[lexer.name] || []).map do |pattern|
|
32
|
-
if
|
34
|
+
if test_glob(pattern, basename)
|
33
35
|
# specificity is better the fewer wildcards there are
|
34
36
|
-pattern.scan(/[*?\[]/).size
|
35
37
|
end
|
36
38
|
end.compact.min
|
37
39
|
end
|
38
40
|
end
|
39
|
-
|
40
|
-
private
|
41
|
-
def test_pattern(pattern, path)
|
42
|
-
File.fnmatch?(pattern, path, File::FNM_DOTMATCH | File::FNM_CASEFOLD)
|
43
|
-
end
|
44
41
|
end
|
45
42
|
end
|
46
43
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module Rouge
|
2
2
|
module Guessers
|
3
3
|
class Modeline < Guesser
|
4
|
+
include Util
|
5
|
+
|
4
6
|
# [jneen] regexen stolen from linguist
|
5
7
|
EMACS_MODELINE = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
|
6
8
|
|
@@ -25,10 +27,9 @@ module Rouge
|
|
25
27
|
# don't bother reading the stream if we've already decided
|
26
28
|
return lexers if lexers.size == 1
|
27
29
|
|
28
|
-
source_text = @source
|
29
|
-
source_text = source_text.read if source_text.respond_to? :read
|
30
|
+
source_text = get_source(@source)
|
30
31
|
|
31
|
-
lines = source_text.split(/\
|
32
|
+
lines = source_text.split(/\n/)
|
32
33
|
|
33
34
|
search_space = (lines.first(@lines) + lines.last(@lines)).join("\n")
|
34
35
|
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module Rouge
|
2
2
|
module Guessers
|
3
3
|
class Source < Guesser
|
4
|
+
include Util
|
5
|
+
|
4
6
|
attr_reader :source
|
5
7
|
def initialize(source)
|
6
8
|
@source = source
|
@@ -11,27 +13,15 @@ module Rouge
|
|
11
13
|
# we've already filtered to 1
|
12
14
|
return lexers if lexers.size == 1
|
13
15
|
|
14
|
-
|
15
|
-
# values from analyze_text. But if we've filtered down already, we can trust
|
16
|
-
# the analysis more.
|
17
|
-
threshold = lexers.size < 10 ? 0 : 0.5
|
18
|
-
|
19
|
-
source_text = case @source
|
20
|
-
when String
|
21
|
-
@source
|
22
|
-
when ->(s){ s.respond_to? :read }
|
23
|
-
@source.read
|
24
|
-
else
|
25
|
-
raise 'invalid source'
|
26
|
-
end
|
16
|
+
source_text = get_source(@source)
|
27
17
|
|
28
18
|
Lexer.assert_utf8!(source_text)
|
29
19
|
|
30
20
|
source_text = TextAnalyzer.new(source_text)
|
31
21
|
|
32
|
-
collect_best(lexers
|
33
|
-
next unless lexer.methods(false).include? :
|
34
|
-
lexer.
|
22
|
+
collect_best(lexers) do |lexer|
|
23
|
+
next unless lexer.methods(false).include? :detect?
|
24
|
+
lexer.detect?(source_text) ? 1 : nil
|
35
25
|
end
|
36
26
|
end
|
37
27
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Rouge
|
2
|
+
module Guessers
|
3
|
+
module Util
|
4
|
+
module SourceNormalizer
|
5
|
+
UTF8_BOM = "\xEF\xBB\xBF"
|
6
|
+
UTF8_BOM_RE = /\A#{UTF8_BOM}/
|
7
|
+
|
8
|
+
# @param [String,nil] source
|
9
|
+
# @return [String,nil]
|
10
|
+
def self.normalize(source)
|
11
|
+
source.sub(UTF8_BOM_RE, '').gsub(/\r\n/, "\n")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_glob(pattern, path)
|
16
|
+
File.fnmatch?(pattern, path, File::FNM_DOTMATCH | File::FNM_CASEFOLD)
|
17
|
+
end
|
18
|
+
|
19
|
+
# @param [String,IO] source
|
20
|
+
# @return [String]
|
21
|
+
def get_source(source)
|
22
|
+
if source.respond_to?(:to_str)
|
23
|
+
SourceNormalizer.normalize(source.to_str)
|
24
|
+
elsif source.respond_to?(:read)
|
25
|
+
SourceNormalizer.normalize(source.read)
|
26
|
+
else
|
27
|
+
raise ArgumentError, "Invalid source: #{source.inspect}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/rouge/lexer.rb
CHANGED
@@ -22,7 +22,9 @@ module Rouge
|
|
22
22
|
new(opts).lex(stream, &b)
|
23
23
|
end
|
24
24
|
|
25
|
-
# Given a string, return the correct lexer class.
|
25
|
+
# Given a name in string, return the correct lexer class.
|
26
|
+
# @param [String] name
|
27
|
+
# @return [Class<Rouge::Lexer>,nil]
|
26
28
|
def find(name)
|
27
29
|
registry[name.to_s]
|
28
30
|
end
|
@@ -42,6 +44,7 @@ module Rouge
|
|
42
44
|
# markdown lexer for highlighting internal code blocks.
|
43
45
|
#
|
44
46
|
def find_fancy(str, code=nil, additional_options={})
|
47
|
+
|
45
48
|
if str && !str.include?('?') && str != 'guess'
|
46
49
|
lexer_class = find(str)
|
47
50
|
return lexer_class && lexer_class.new(additional_options)
|
@@ -109,7 +112,7 @@ module Rouge
|
|
109
112
|
def demo(arg=:absent)
|
110
113
|
return @demo = arg unless arg == :absent
|
111
114
|
|
112
|
-
@demo = File.read(demo_file,
|
115
|
+
@demo = File.read(demo_file, mode: 'rt:bom|utf-8')
|
113
116
|
end
|
114
117
|
|
115
118
|
# @return a list of all lexers.
|
@@ -133,6 +136,7 @@ module Rouge
|
|
133
136
|
guessers << Guessers::Filename.new(filename) if filename
|
134
137
|
guessers << Guessers::Modeline.new(source) if source
|
135
138
|
guessers << Guessers::Source.new(source) if source
|
139
|
+
guessers << Guessers::Disambiguation.new(filename, source) if source && filename
|
136
140
|
|
137
141
|
Guesser.guess(guessers, Lexer.all)
|
138
142
|
end
|
@@ -147,16 +151,23 @@ module Rouge
|
|
147
151
|
# The source itself, which, if guessing by mimetype or filename
|
148
152
|
# fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
|
149
153
|
# other hints.
|
154
|
+
# @param [Proc] fallback called if multiple lexers are detected.
|
155
|
+
# If omitted, Guesser::Ambiguous is raised.
|
150
156
|
#
|
151
|
-
# @see Lexer.
|
157
|
+
# @see Lexer.detect?
|
152
158
|
# @see Lexer.guesses
|
153
|
-
|
159
|
+
# @return [Class<Rouge::Lexer>]
|
160
|
+
def guess(info={}, &fallback)
|
154
161
|
lexers = guesses(info)
|
155
162
|
|
156
163
|
return Lexers::PlainText if lexers.empty?
|
157
164
|
return lexers[0] if lexers.size == 1
|
158
165
|
|
159
|
-
|
166
|
+
if fallback
|
167
|
+
fallback.call(lexers)
|
168
|
+
else
|
169
|
+
raise Guesser::Ambiguous.new(lexers)
|
170
|
+
end
|
160
171
|
end
|
161
172
|
|
162
173
|
def guess_by_mimetype(mt)
|
@@ -425,16 +436,14 @@ module Rouge
|
|
425
436
|
|
426
437
|
# @abstract
|
427
438
|
#
|
428
|
-
# Return
|
429
|
-
#
|
430
|
-
# implementation returns 0. Values under 0.5 will only be used
|
431
|
-
# to disambiguate filename or mimetype matches.
|
439
|
+
# Return true if there is an in-text indication (such as a shebang
|
440
|
+
# or DOCTYPE declaration) that this lexer should be used.
|
432
441
|
#
|
433
442
|
# @param [TextAnalyzer] text
|
434
443
|
# the text to be analyzed, with a couple of handy methods on it,
|
435
444
|
# like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
|
436
|
-
def self.
|
437
|
-
|
445
|
+
def self.detect?(text)
|
446
|
+
false
|
438
447
|
end
|
439
448
|
end
|
440
449
|
|
data/lib/rouge/lexers/awk.rb
CHANGED