rouge 2.2.1 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -4
- data/lib/rouge.rb +9 -7
- data/lib/rouge/cli.rb +36 -2
- data/lib/rouge/guessers/disambiguation.rb +88 -0
- data/lib/rouge/guessers/glob_mapping.rb +3 -6
- data/lib/rouge/guessers/modeline.rb +4 -3
- data/lib/rouge/guessers/source.rb +6 -16
- data/lib/rouge/guessers/util.rb +32 -0
- data/lib/rouge/lexer.rb +20 -11
- data/lib/rouge/lexers/apiblueprint.rb +0 -4
- data/lib/rouge/lexers/awk.rb +2 -2
- data/lib/rouge/lexers/biml.rb +2 -2
- data/lib/rouge/lexers/c.rb +0 -5
- data/lib/rouge/lexers/coffeescript.rb +2 -2
- data/lib/rouge/lexers/coq.rb +0 -4
- data/lib/rouge/lexers/diff.rb +4 -4
- data/lib/rouge/lexers/digdag.rb +0 -4
- data/lib/rouge/lexers/erb.rb +0 -4
- data/lib/rouge/lexers/erlang.rb +0 -4
- data/lib/rouge/lexers/factor.rb +2 -2
- data/lib/rouge/lexers/gherkin.rb +2 -2
- data/lib/rouge/lexers/go.rb +0 -4
- data/lib/rouge/lexers/groovy.rb +2 -2
- data/lib/rouge/lexers/haml.rb +0 -4
- data/lib/rouge/lexers/haskell.rb +4 -4
- data/lib/rouge/lexers/html.rb +3 -3
- data/lib/rouge/lexers/idlang.rb +0 -6
- data/lib/rouge/lexers/ini.rb +0 -4
- data/lib/rouge/lexers/io.rb +2 -2
- data/lib/rouge/lexers/javascript.rb +1 -1
- data/lib/rouge/lexers/julia.rb +2 -2
- data/lib/rouge/lexers/kotlin.rb +9 -14
- data/lib/rouge/lexers/lasso.rb +3 -6
- data/lib/rouge/lexers/llvm.rb +0 -4
- data/lib/rouge/lexers/lua.rb +2 -2
- data/lib/rouge/lexers/make.rb +0 -4
- data/lib/rouge/lexers/matlab.rb +0 -4
- data/lib/rouge/lexers/moonscript.rb +2 -2
- data/lib/rouge/lexers/mosel.rb +3 -3
- data/lib/rouge/lexers/nasm.rb +0 -5
- data/lib/rouge/lexers/objective_c.rb +0 -14
- data/lib/rouge/lexers/perl.rb +2 -3
- data/lib/rouge/lexers/php.rb +2 -4
- data/lib/rouge/lexers/plist.rb +0 -4
- data/lib/rouge/lexers/praat.rb +2 -2
- data/lib/rouge/lexers/prolog.rb +0 -5
- data/lib/rouge/lexers/properties.rb +0 -4
- data/lib/rouge/lexers/puppet.rb +3 -3
- data/lib/rouge/lexers/python.rb +2 -2
- data/lib/rouge/lexers/q.rb +0 -4
- data/lib/rouge/lexers/r.rb +2 -2
- data/lib/rouge/lexers/racket.rb +5 -4
- data/lib/rouge/lexers/ruby.rb +2 -2
- data/lib/rouge/lexers/rust.rb +2 -2
- data/lib/rouge/lexers/sed.rb +2 -2
- data/lib/rouge/lexers/shell.rb +2 -2
- data/lib/rouge/lexers/smarty.rb +0 -11
- data/lib/rouge/lexers/sml.rb +0 -4
- data/lib/rouge/lexers/tap.rb +0 -4
- data/lib/rouge/lexers/tcl.rb +4 -4
- data/lib/rouge/lexers/tex.rb +2 -2
- data/lib/rouge/lexers/toml.rb +0 -4
- data/lib/rouge/lexers/tulip.rb +2 -3
- data/lib/rouge/lexers/turtle.rb +5 -14
- data/lib/rouge/lexers/vue.rb +0 -4
- data/lib/rouge/lexers/wollok.rb +0 -4
- data/lib/rouge/lexers/xml.rb +4 -6
- data/lib/rouge/lexers/yaml.rb +2 -2
- data/lib/rouge/version.rb +1 -1
- data/rouge.gemspec +1 -0
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 105d563933ea57ff19cca03ab1fd98fc3b01ea3a
|
4
|
+
data.tar.gz: 72e6098b1419ff753ddb8bf748d6822d5fc5c1a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6346ab18ab19b7758e3f37eec75ddeac72a11edacd4416783dbfaccb33e3aadda9841373db11436a9fe4fbdd457a8bb2f4575f79b2c546e596b9d9cc829612e3
|
7
|
+
data.tar.gz: 9df1ed34e20dfbb2733ebef31d80792547913afdff931c556e73ff731875968233f12e1c62e91cfbaa84d4653b684aeec5e9a4778ff16fb3c5279585b974a846
|
data/Gemfile
CHANGED
@@ -3,12 +3,12 @@ source 'http://rubygems.org'
|
|
3
3
|
gemspec
|
4
4
|
|
5
5
|
gem 'bundler', '~> 1.15'
|
6
|
-
gem 'rake'
|
6
|
+
gem 'rake'
|
7
7
|
|
8
|
-
gem 'minitest', '
|
9
|
-
gem '
|
8
|
+
gem 'minitest', '>= 5.0'
|
9
|
+
gem 'minitest-power_assert'
|
10
10
|
|
11
|
-
gem 'rubocop', '~> 0.49.1'
|
11
|
+
gem 'rubocop', '~> 0.49.1'
|
12
12
|
|
13
13
|
# don't try to install redcarpet under jruby
|
14
14
|
gem 'redcarpet', :platforms => :ruby
|
data/lib/rouge.rb
CHANGED
@@ -41,13 +41,6 @@ load load_dir.join('rouge/util.rb')
|
|
41
41
|
load load_dir.join('rouge/text_analyzer.rb')
|
42
42
|
load load_dir.join('rouge/token.rb')
|
43
43
|
|
44
|
-
load load_dir.join('rouge/guesser.rb')
|
45
|
-
load load_dir.join('rouge/guessers/glob_mapping.rb')
|
46
|
-
load load_dir.join('rouge/guessers/modeline.rb')
|
47
|
-
load load_dir.join('rouge/guessers/filename.rb')
|
48
|
-
load load_dir.join('rouge/guessers/mimetype.rb')
|
49
|
-
load load_dir.join('rouge/guessers/source.rb')
|
50
|
-
|
51
44
|
load load_dir.join('rouge/lexer.rb')
|
52
45
|
load load_dir.join('rouge/regex_lexer.rb')
|
53
46
|
load load_dir.join('rouge/template_lexer.rb')
|
@@ -57,6 +50,15 @@ Dir.glob(lexers_dir.join('*.rb')).each do |f|
|
|
57
50
|
Rouge::Lexers.load_lexer(Pathname.new(f).relative_path_from(lexers_dir).to_s)
|
58
51
|
end
|
59
52
|
|
53
|
+
load load_dir.join('rouge/guesser.rb')
|
54
|
+
load load_dir.join('rouge/guessers/util.rb')
|
55
|
+
load load_dir.join('rouge/guessers/glob_mapping.rb')
|
56
|
+
load load_dir.join('rouge/guessers/modeline.rb')
|
57
|
+
load load_dir.join('rouge/guessers/filename.rb')
|
58
|
+
load load_dir.join('rouge/guessers/mimetype.rb')
|
59
|
+
load load_dir.join('rouge/guessers/source.rb')
|
60
|
+
load load_dir.join('rouge/guessers/disambiguation.rb')
|
61
|
+
|
60
62
|
load load_dir.join('rouge/formatter.rb')
|
61
63
|
load load_dir.join('rouge/formatters/html.rb')
|
62
64
|
load load_dir.join('rouge/formatters/html_table.rb')
|
data/lib/rouge/cli.rb
CHANGED
@@ -13,9 +13,9 @@ module Rouge
|
|
13
13
|
def file
|
14
14
|
case input
|
15
15
|
when '-'
|
16
|
-
IO.new($stdin.fileno, '
|
16
|
+
IO.new($stdin.fileno, 'rt:bom|utf-8')
|
17
17
|
when String
|
18
|
-
File.new(input, '
|
18
|
+
File.new(input, 'rt:bom|utf-8')
|
19
19
|
when ->(i){ i.respond_to? :read }
|
20
20
|
input
|
21
21
|
end
|
@@ -44,6 +44,7 @@ module Rouge
|
|
44
44
|
yield %| help #{Help.desc}|
|
45
45
|
yield %| style #{Style.desc}|
|
46
46
|
yield %| list #{List.desc}|
|
47
|
+
yield %| guess #{Guess.desc}|
|
47
48
|
yield %| version #{Version.desc}|
|
48
49
|
yield %||
|
49
50
|
yield %|See `rougify help <command>` for more info.|
|
@@ -97,6 +98,8 @@ module Rouge
|
|
97
98
|
Style
|
98
99
|
when 'list'
|
99
100
|
List
|
101
|
+
when 'guess'
|
102
|
+
Guess
|
100
103
|
end
|
101
104
|
end
|
102
105
|
|
@@ -375,6 +378,37 @@ module Rouge
|
|
375
378
|
end
|
376
379
|
end
|
377
380
|
|
381
|
+
class Guess < CLI
|
382
|
+
def self.desc
|
383
|
+
"guess the languages of file"
|
384
|
+
end
|
385
|
+
|
386
|
+
def self.parse(args)
|
387
|
+
new(input_file: args.shift)
|
388
|
+
end
|
389
|
+
|
390
|
+
attr_reader :input_file, :input_source
|
391
|
+
|
392
|
+
def initialize(opts)
|
393
|
+
@input_file = opts[:input_file] || '-'
|
394
|
+
@input_source = FileReader.new(@input_file).read
|
395
|
+
end
|
396
|
+
|
397
|
+
def lexers
|
398
|
+
Lexer.guesses(
|
399
|
+
filename: input_file,
|
400
|
+
source: input_source,
|
401
|
+
)
|
402
|
+
end
|
403
|
+
|
404
|
+
def run
|
405
|
+
lexers.each do |l|
|
406
|
+
puts "{ tag: #{l.tag.inspect}, title: #{l.title.inspect}, desc: #{l.desc.inspect} }"
|
407
|
+
end
|
408
|
+
end
|
409
|
+
end
|
410
|
+
|
411
|
+
|
378
412
|
private_class_method
|
379
413
|
def self.normalize_syntax(argv)
|
380
414
|
out = []
|
@@ -0,0 +1,88 @@
|
|
1
|
+
module Rouge
|
2
|
+
module Guessers
|
3
|
+
class Disambiguation < Guesser
|
4
|
+
include Util
|
5
|
+
include Lexers
|
6
|
+
|
7
|
+
def initialize(filename, source)
|
8
|
+
@filename = File.basename(filename)
|
9
|
+
@source = source
|
10
|
+
end
|
11
|
+
|
12
|
+
def filter(lexers)
|
13
|
+
return lexers if lexers.size == 1
|
14
|
+
return lexers if lexers.size == Lexer.all.size
|
15
|
+
|
16
|
+
@analyzer = TextAnalyzer.new(get_source(@source))
|
17
|
+
|
18
|
+
self.class.disambiguators.each do |disambiguator|
|
19
|
+
next unless disambiguator.match?(@filename)
|
20
|
+
|
21
|
+
filtered = disambiguator.decide!(self)
|
22
|
+
return filtered if filtered
|
23
|
+
end
|
24
|
+
|
25
|
+
return lexers
|
26
|
+
end
|
27
|
+
|
28
|
+
def contains?(text)
|
29
|
+
return @analyzer.include?(text)
|
30
|
+
end
|
31
|
+
|
32
|
+
def matches?(re)
|
33
|
+
return !!(@analyzer =~ re)
|
34
|
+
end
|
35
|
+
|
36
|
+
@disambiguators = []
|
37
|
+
def self.disambiguate(*patterns, &decider)
|
38
|
+
@disambiguators << Disambiguator.new(patterns, &decider)
|
39
|
+
end
|
40
|
+
|
41
|
+
def self.disambiguators
|
42
|
+
@disambiguators
|
43
|
+
end
|
44
|
+
|
45
|
+
class Disambiguator
|
46
|
+
include Util
|
47
|
+
|
48
|
+
def initialize(patterns, &decider)
|
49
|
+
@patterns = patterns
|
50
|
+
@decider = decider
|
51
|
+
end
|
52
|
+
|
53
|
+
def decide!(guesser)
|
54
|
+
out = guesser.instance_eval(&@decider)
|
55
|
+
case out
|
56
|
+
when Array then out
|
57
|
+
when nil then nil
|
58
|
+
else [out]
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
def match?(filename)
|
63
|
+
@patterns.any? { |p| test_glob(p, filename) }
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
disambiguate '*.pl' do
|
68
|
+
next Perl if contains?('my $')
|
69
|
+
next Prolog if contains?(':-')
|
70
|
+
next Prolog if matches?(/\A\w+(\(\w+\,\s*\w+\))*\./)
|
71
|
+
end
|
72
|
+
|
73
|
+
disambiguate '*.h' do
|
74
|
+
next ObjectiveC if matches?(/@(end|implementation|protocol|property)\b/)
|
75
|
+
next ObjectiveC if contains?('@"')
|
76
|
+
|
77
|
+
C
|
78
|
+
end
|
79
|
+
|
80
|
+
disambiguate '*.m' do
|
81
|
+
next ObjectiveC if matches?(/@(end|implementation|protocol|property)\b/)
|
82
|
+
next ObjectiveC if contains?('@"')
|
83
|
+
|
84
|
+
next Matlab if matches?(/^\s*?%/)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -3,6 +3,8 @@ module Rouge
|
|
3
3
|
# This class allows for custom behavior
|
4
4
|
# with glob -> lexer name mappings
|
5
5
|
class GlobMapping < Guesser
|
6
|
+
include Util
|
7
|
+
|
6
8
|
def self.by_pairs(mapping, filename)
|
7
9
|
glob_map = {}
|
8
10
|
mapping.each do |(glob, lexer_name)|
|
@@ -29,18 +31,13 @@ module Rouge
|
|
29
31
|
|
30
32
|
collect_best(lexers) do |lexer|
|
31
33
|
score = (@glob_map[lexer.name] || []).map do |pattern|
|
32
|
-
if
|
34
|
+
if test_glob(pattern, basename)
|
33
35
|
# specificity is better the fewer wildcards there are
|
34
36
|
-pattern.scan(/[*?\[]/).size
|
35
37
|
end
|
36
38
|
end.compact.min
|
37
39
|
end
|
38
40
|
end
|
39
|
-
|
40
|
-
private
|
41
|
-
def test_pattern(pattern, path)
|
42
|
-
File.fnmatch?(pattern, path, File::FNM_DOTMATCH | File::FNM_CASEFOLD)
|
43
|
-
end
|
44
41
|
end
|
45
42
|
end
|
46
43
|
end
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module Rouge
|
2
2
|
module Guessers
|
3
3
|
class Modeline < Guesser
|
4
|
+
include Util
|
5
|
+
|
4
6
|
# [jneen] regexen stolen from linguist
|
5
7
|
EMACS_MODELINE = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
|
6
8
|
|
@@ -25,10 +27,9 @@ module Rouge
|
|
25
27
|
# don't bother reading the stream if we've already decided
|
26
28
|
return lexers if lexers.size == 1
|
27
29
|
|
28
|
-
source_text = @source
|
29
|
-
source_text = source_text.read if source_text.respond_to? :read
|
30
|
+
source_text = get_source(@source)
|
30
31
|
|
31
|
-
lines = source_text.split(/\
|
32
|
+
lines = source_text.split(/\n/)
|
32
33
|
|
33
34
|
search_space = (lines.first(@lines) + lines.last(@lines)).join("\n")
|
34
35
|
|
@@ -1,6 +1,8 @@
|
|
1
1
|
module Rouge
|
2
2
|
module Guessers
|
3
3
|
class Source < Guesser
|
4
|
+
include Util
|
5
|
+
|
4
6
|
attr_reader :source
|
5
7
|
def initialize(source)
|
6
8
|
@source = source
|
@@ -11,27 +13,15 @@ module Rouge
|
|
11
13
|
# we've already filtered to 1
|
12
14
|
return lexers if lexers.size == 1
|
13
15
|
|
14
|
-
|
15
|
-
# values from analyze_text. But if we've filtered down already, we can trust
|
16
|
-
# the analysis more.
|
17
|
-
threshold = lexers.size < 10 ? 0 : 0.5
|
18
|
-
|
19
|
-
source_text = case @source
|
20
|
-
when String
|
21
|
-
@source
|
22
|
-
when ->(s){ s.respond_to? :read }
|
23
|
-
@source.read
|
24
|
-
else
|
25
|
-
raise 'invalid source'
|
26
|
-
end
|
16
|
+
source_text = get_source(@source)
|
27
17
|
|
28
18
|
Lexer.assert_utf8!(source_text)
|
29
19
|
|
30
20
|
source_text = TextAnalyzer.new(source_text)
|
31
21
|
|
32
|
-
collect_best(lexers
|
33
|
-
next unless lexer.methods(false).include? :
|
34
|
-
lexer.
|
22
|
+
collect_best(lexers) do |lexer|
|
23
|
+
next unless lexer.methods(false).include? :detect?
|
24
|
+
lexer.detect?(source_text) ? 1 : nil
|
35
25
|
end
|
36
26
|
end
|
37
27
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Rouge
|
2
|
+
module Guessers
|
3
|
+
module Util
|
4
|
+
module SourceNormalizer
|
5
|
+
UTF8_BOM = "\xEF\xBB\xBF"
|
6
|
+
UTF8_BOM_RE = /\A#{UTF8_BOM}/
|
7
|
+
|
8
|
+
# @param [String,nil] source
|
9
|
+
# @return [String,nil]
|
10
|
+
def self.normalize(source)
|
11
|
+
source.sub(UTF8_BOM_RE, '').gsub(/\r\n/, "\n")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_glob(pattern, path)
|
16
|
+
File.fnmatch?(pattern, path, File::FNM_DOTMATCH | File::FNM_CASEFOLD)
|
17
|
+
end
|
18
|
+
|
19
|
+
# @param [String,IO] source
|
20
|
+
# @return [String]
|
21
|
+
def get_source(source)
|
22
|
+
if source.respond_to?(:to_str)
|
23
|
+
SourceNormalizer.normalize(source.to_str)
|
24
|
+
elsif source.respond_to?(:read)
|
25
|
+
SourceNormalizer.normalize(source.read)
|
26
|
+
else
|
27
|
+
raise ArgumentError, "Invalid source: #{source.inspect}"
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/rouge/lexer.rb
CHANGED
@@ -22,7 +22,9 @@ module Rouge
|
|
22
22
|
new(opts).lex(stream, &b)
|
23
23
|
end
|
24
24
|
|
25
|
-
# Given a string, return the correct lexer class.
|
25
|
+
# Given a name in string, return the correct lexer class.
|
26
|
+
# @param [String] name
|
27
|
+
# @return [Class<Rouge::Lexer>,nil]
|
26
28
|
def find(name)
|
27
29
|
registry[name.to_s]
|
28
30
|
end
|
@@ -42,6 +44,7 @@ module Rouge
|
|
42
44
|
# markdown lexer for highlighting internal code blocks.
|
43
45
|
#
|
44
46
|
def find_fancy(str, code=nil, additional_options={})
|
47
|
+
|
45
48
|
if str && !str.include?('?') && str != 'guess'
|
46
49
|
lexer_class = find(str)
|
47
50
|
return lexer_class && lexer_class.new(additional_options)
|
@@ -109,7 +112,7 @@ module Rouge
|
|
109
112
|
def demo(arg=:absent)
|
110
113
|
return @demo = arg unless arg == :absent
|
111
114
|
|
112
|
-
@demo = File.read(demo_file,
|
115
|
+
@demo = File.read(demo_file, mode: 'rt:bom|utf-8')
|
113
116
|
end
|
114
117
|
|
115
118
|
# @return a list of all lexers.
|
@@ -133,6 +136,7 @@ module Rouge
|
|
133
136
|
guessers << Guessers::Filename.new(filename) if filename
|
134
137
|
guessers << Guessers::Modeline.new(source) if source
|
135
138
|
guessers << Guessers::Source.new(source) if source
|
139
|
+
guessers << Guessers::Disambiguation.new(filename, source) if source && filename
|
136
140
|
|
137
141
|
Guesser.guess(guessers, Lexer.all)
|
138
142
|
end
|
@@ -147,16 +151,23 @@ module Rouge
|
|
147
151
|
# The source itself, which, if guessing by mimetype or filename
|
148
152
|
# fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
|
149
153
|
# other hints.
|
154
|
+
# @param [Proc] fallback called if multiple lexers are detected.
|
155
|
+
# If omitted, Guesser::Ambiguous is raised.
|
150
156
|
#
|
151
|
-
# @see Lexer.
|
157
|
+
# @see Lexer.detect?
|
152
158
|
# @see Lexer.guesses
|
153
|
-
|
159
|
+
# @return [Class<Rouge::Lexer>]
|
160
|
+
def guess(info={}, &fallback)
|
154
161
|
lexers = guesses(info)
|
155
162
|
|
156
163
|
return Lexers::PlainText if lexers.empty?
|
157
164
|
return lexers[0] if lexers.size == 1
|
158
165
|
|
159
|
-
|
166
|
+
if fallback
|
167
|
+
fallback.call(lexers)
|
168
|
+
else
|
169
|
+
raise Guesser::Ambiguous.new(lexers)
|
170
|
+
end
|
160
171
|
end
|
161
172
|
|
162
173
|
def guess_by_mimetype(mt)
|
@@ -425,16 +436,14 @@ module Rouge
|
|
425
436
|
|
426
437
|
# @abstract
|
427
438
|
#
|
428
|
-
# Return
|
429
|
-
#
|
430
|
-
# implementation returns 0. Values under 0.5 will only be used
|
431
|
-
# to disambiguate filename or mimetype matches.
|
439
|
+
# Return true if there is an in-text indication (such as a shebang
|
440
|
+
# or DOCTYPE declaration) that this lexer should be used.
|
432
441
|
#
|
433
442
|
# @param [TextAnalyzer] text
|
434
443
|
# the text to be analyzed, with a couple of handy methods on it,
|
435
444
|
# like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
|
436
|
-
def self.
|
437
|
-
|
445
|
+
def self.detect?(text)
|
446
|
+
false
|
438
447
|
end
|
439
448
|
end
|
440
449
|
|
data/lib/rouge/lexers/awk.rb
CHANGED