rouge 2.2.1 → 3.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +4 -4
  3. data/lib/rouge.rb +9 -7
  4. data/lib/rouge/cli.rb +36 -2
  5. data/lib/rouge/guessers/disambiguation.rb +88 -0
  6. data/lib/rouge/guessers/glob_mapping.rb +3 -6
  7. data/lib/rouge/guessers/modeline.rb +4 -3
  8. data/lib/rouge/guessers/source.rb +6 -16
  9. data/lib/rouge/guessers/util.rb +32 -0
  10. data/lib/rouge/lexer.rb +20 -11
  11. data/lib/rouge/lexers/apiblueprint.rb +0 -4
  12. data/lib/rouge/lexers/awk.rb +2 -2
  13. data/lib/rouge/lexers/biml.rb +2 -2
  14. data/lib/rouge/lexers/c.rb +0 -5
  15. data/lib/rouge/lexers/coffeescript.rb +2 -2
  16. data/lib/rouge/lexers/coq.rb +0 -4
  17. data/lib/rouge/lexers/diff.rb +4 -4
  18. data/lib/rouge/lexers/digdag.rb +0 -4
  19. data/lib/rouge/lexers/erb.rb +0 -4
  20. data/lib/rouge/lexers/erlang.rb +0 -4
  21. data/lib/rouge/lexers/factor.rb +2 -2
  22. data/lib/rouge/lexers/gherkin.rb +2 -2
  23. data/lib/rouge/lexers/go.rb +0 -4
  24. data/lib/rouge/lexers/groovy.rb +2 -2
  25. data/lib/rouge/lexers/haml.rb +0 -4
  26. data/lib/rouge/lexers/haskell.rb +4 -4
  27. data/lib/rouge/lexers/html.rb +3 -3
  28. data/lib/rouge/lexers/idlang.rb +0 -6
  29. data/lib/rouge/lexers/ini.rb +0 -4
  30. data/lib/rouge/lexers/io.rb +2 -2
  31. data/lib/rouge/lexers/javascript.rb +1 -1
  32. data/lib/rouge/lexers/julia.rb +2 -2
  33. data/lib/rouge/lexers/kotlin.rb +9 -14
  34. data/lib/rouge/lexers/lasso.rb +3 -6
  35. data/lib/rouge/lexers/llvm.rb +0 -4
  36. data/lib/rouge/lexers/lua.rb +2 -2
  37. data/lib/rouge/lexers/make.rb +0 -4
  38. data/lib/rouge/lexers/matlab.rb +0 -4
  39. data/lib/rouge/lexers/moonscript.rb +2 -2
  40. data/lib/rouge/lexers/mosel.rb +3 -3
  41. data/lib/rouge/lexers/nasm.rb +0 -5
  42. data/lib/rouge/lexers/objective_c.rb +0 -14
  43. data/lib/rouge/lexers/perl.rb +2 -3
  44. data/lib/rouge/lexers/php.rb +2 -4
  45. data/lib/rouge/lexers/plist.rb +0 -4
  46. data/lib/rouge/lexers/praat.rb +2 -2
  47. data/lib/rouge/lexers/prolog.rb +0 -5
  48. data/lib/rouge/lexers/properties.rb +0 -4
  49. data/lib/rouge/lexers/puppet.rb +3 -3
  50. data/lib/rouge/lexers/python.rb +2 -2
  51. data/lib/rouge/lexers/q.rb +0 -4
  52. data/lib/rouge/lexers/r.rb +2 -2
  53. data/lib/rouge/lexers/racket.rb +5 -4
  54. data/lib/rouge/lexers/ruby.rb +2 -2
  55. data/lib/rouge/lexers/rust.rb +2 -2
  56. data/lib/rouge/lexers/sed.rb +2 -2
  57. data/lib/rouge/lexers/shell.rb +2 -2
  58. data/lib/rouge/lexers/smarty.rb +0 -11
  59. data/lib/rouge/lexers/sml.rb +0 -4
  60. data/lib/rouge/lexers/tap.rb +0 -4
  61. data/lib/rouge/lexers/tcl.rb +4 -4
  62. data/lib/rouge/lexers/tex.rb +2 -2
  63. data/lib/rouge/lexers/toml.rb +0 -4
  64. data/lib/rouge/lexers/tulip.rb +2 -3
  65. data/lib/rouge/lexers/turtle.rb +5 -14
  66. data/lib/rouge/lexers/vue.rb +0 -4
  67. data/lib/rouge/lexers/wollok.rb +0 -4
  68. data/lib/rouge/lexers/xml.rb +4 -6
  69. data/lib/rouge/lexers/yaml.rb +2 -2
  70. data/lib/rouge/version.rb +1 -1
  71. data/rouge.gemspec +1 -0
  72. metadata +6 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 35a33896b2a596c94fc465bae5ef1e65874fd6b3
4
- data.tar.gz: 5b0c4180e0fc5748e8a35d139d3f37a8e0030d7e
3
+ metadata.gz: 105d563933ea57ff19cca03ab1fd98fc3b01ea3a
4
+ data.tar.gz: 72e6098b1419ff753ddb8bf748d6822d5fc5c1a5
5
5
  SHA512:
6
- metadata.gz: 9d461f6335a3b1df5312cc0c259d49a9a5a0e21f0752028898eebbb736913f39bf04a28a91578579ef84362280f1945eef8464a682c7c141a969817172b48de7
7
- data.tar.gz: 1f0dbb4f04334ea77858b6bcf3c3795abf82597cac61ef2a08ff08f526fe6fa47ba64a600715ec94c52bb59157ad9b48c73ad5fae600aed80a541c744270dd65
6
+ metadata.gz: 6346ab18ab19b7758e3f37eec75ddeac72a11edacd4416783dbfaccb33e3aadda9841373db11436a9fe4fbdd457a8bb2f4575f79b2c546e596b9d9cc829612e3
7
+ data.tar.gz: 9df1ed34e20dfbb2733ebef31d80792547913afdff931c556e73ff731875968233f12e1c62e91cfbaa84d4653b684aeec5e9a4778ff16fb3c5279585b974a846
data/Gemfile CHANGED
@@ -3,12 +3,12 @@ source 'http://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  gem 'bundler', '~> 1.15'
6
- gem 'rake', '~> 12.0'
6
+ gem 'rake'
7
7
 
8
- gem 'minitest', '~> 4.0'
9
- gem 'wrong'
8
+ gem 'minitest', '>= 5.0'
9
+ gem 'minitest-power_assert'
10
10
 
11
- gem 'rubocop', '~> 0.49.1' if RUBY_VERSION >= '2.0.0'
11
+ gem 'rubocop', '~> 0.49.1'
12
12
 
13
13
  # don't try to install redcarpet under jruby
14
14
  gem 'redcarpet', :platforms => :ruby
@@ -41,13 +41,6 @@ load load_dir.join('rouge/util.rb')
41
41
  load load_dir.join('rouge/text_analyzer.rb')
42
42
  load load_dir.join('rouge/token.rb')
43
43
 
44
- load load_dir.join('rouge/guesser.rb')
45
- load load_dir.join('rouge/guessers/glob_mapping.rb')
46
- load load_dir.join('rouge/guessers/modeline.rb')
47
- load load_dir.join('rouge/guessers/filename.rb')
48
- load load_dir.join('rouge/guessers/mimetype.rb')
49
- load load_dir.join('rouge/guessers/source.rb')
50
-
51
44
  load load_dir.join('rouge/lexer.rb')
52
45
  load load_dir.join('rouge/regex_lexer.rb')
53
46
  load load_dir.join('rouge/template_lexer.rb')
@@ -57,6 +50,15 @@ Dir.glob(lexers_dir.join('*.rb')).each do |f|
57
50
  Rouge::Lexers.load_lexer(Pathname.new(f).relative_path_from(lexers_dir).to_s)
58
51
  end
59
52
 
53
+ load load_dir.join('rouge/guesser.rb')
54
+ load load_dir.join('rouge/guessers/util.rb')
55
+ load load_dir.join('rouge/guessers/glob_mapping.rb')
56
+ load load_dir.join('rouge/guessers/modeline.rb')
57
+ load load_dir.join('rouge/guessers/filename.rb')
58
+ load load_dir.join('rouge/guessers/mimetype.rb')
59
+ load load_dir.join('rouge/guessers/source.rb')
60
+ load load_dir.join('rouge/guessers/disambiguation.rb')
61
+
60
62
  load load_dir.join('rouge/formatter.rb')
61
63
  load load_dir.join('rouge/formatters/html.rb')
62
64
  load load_dir.join('rouge/formatters/html_table.rb')
@@ -13,9 +13,9 @@ module Rouge
13
13
  def file
14
14
  case input
15
15
  when '-'
16
- IO.new($stdin.fileno, 'r:utf-8')
16
+ IO.new($stdin.fileno, 'rt:bom|utf-8')
17
17
  when String
18
- File.new(input, 'r:utf-8')
18
+ File.new(input, 'rt:bom|utf-8')
19
19
  when ->(i){ i.respond_to? :read }
20
20
  input
21
21
  end
@@ -44,6 +44,7 @@ module Rouge
44
44
  yield %| help #{Help.desc}|
45
45
  yield %| style #{Style.desc}|
46
46
  yield %| list #{List.desc}|
47
+ yield %| guess #{Guess.desc}|
47
48
  yield %| version #{Version.desc}|
48
49
  yield %||
49
50
  yield %|See `rougify help <command>` for more info.|
@@ -97,6 +98,8 @@ module Rouge
97
98
  Style
98
99
  when 'list'
99
100
  List
101
+ when 'guess'
102
+ Guess
100
103
  end
101
104
  end
102
105
 
@@ -375,6 +378,37 @@ module Rouge
375
378
  end
376
379
  end
377
380
 
381
+ class Guess < CLI
382
+ def self.desc
383
+ "guess the languages of file"
384
+ end
385
+
386
+ def self.parse(args)
387
+ new(input_file: args.shift)
388
+ end
389
+
390
+ attr_reader :input_file, :input_source
391
+
392
+ def initialize(opts)
393
+ @input_file = opts[:input_file] || '-'
394
+ @input_source = FileReader.new(@input_file).read
395
+ end
396
+
397
+ def lexers
398
+ Lexer.guesses(
399
+ filename: input_file,
400
+ source: input_source,
401
+ )
402
+ end
403
+
404
+ def run
405
+ lexers.each do |l|
406
+ puts "{ tag: #{l.tag.inspect}, title: #{l.title.inspect}, desc: #{l.desc.inspect} }"
407
+ end
408
+ end
409
+ end
410
+
411
+
378
412
  private_class_method
379
413
  def self.normalize_syntax(argv)
380
414
  out = []
@@ -0,0 +1,88 @@
1
+ module Rouge
2
+ module Guessers
3
+ class Disambiguation < Guesser
4
+ include Util
5
+ include Lexers
6
+
7
+ def initialize(filename, source)
8
+ @filename = File.basename(filename)
9
+ @source = source
10
+ end
11
+
12
+ def filter(lexers)
13
+ return lexers if lexers.size == 1
14
+ return lexers if lexers.size == Lexer.all.size
15
+
16
+ @analyzer = TextAnalyzer.new(get_source(@source))
17
+
18
+ self.class.disambiguators.each do |disambiguator|
19
+ next unless disambiguator.match?(@filename)
20
+
21
+ filtered = disambiguator.decide!(self)
22
+ return filtered if filtered
23
+ end
24
+
25
+ return lexers
26
+ end
27
+
28
+ def contains?(text)
29
+ return @analyzer.include?(text)
30
+ end
31
+
32
+ def matches?(re)
33
+ return !!(@analyzer =~ re)
34
+ end
35
+
36
+ @disambiguators = []
37
+ def self.disambiguate(*patterns, &decider)
38
+ @disambiguators << Disambiguator.new(patterns, &decider)
39
+ end
40
+
41
+ def self.disambiguators
42
+ @disambiguators
43
+ end
44
+
45
+ class Disambiguator
46
+ include Util
47
+
48
+ def initialize(patterns, &decider)
49
+ @patterns = patterns
50
+ @decider = decider
51
+ end
52
+
53
+ def decide!(guesser)
54
+ out = guesser.instance_eval(&@decider)
55
+ case out
56
+ when Array then out
57
+ when nil then nil
58
+ else [out]
59
+ end
60
+ end
61
+
62
+ def match?(filename)
63
+ @patterns.any? { |p| test_glob(p, filename) }
64
+ end
65
+ end
66
+
67
+ disambiguate '*.pl' do
68
+ next Perl if contains?('my $')
69
+ next Prolog if contains?(':-')
70
+ next Prolog if matches?(/\A\w+(\(\w+\,\s*\w+\))*\./)
71
+ end
72
+
73
+ disambiguate '*.h' do
74
+ next ObjectiveC if matches?(/@(end|implementation|protocol|property)\b/)
75
+ next ObjectiveC if contains?('@"')
76
+
77
+ C
78
+ end
79
+
80
+ disambiguate '*.m' do
81
+ next ObjectiveC if matches?(/@(end|implementation|protocol|property)\b/)
82
+ next ObjectiveC if contains?('@"')
83
+
84
+ next Matlab if matches?(/^\s*?%/)
85
+ end
86
+ end
87
+ end
88
+ end
@@ -3,6 +3,8 @@ module Rouge
3
3
  # This class allows for custom behavior
4
4
  # with glob -> lexer name mappings
5
5
  class GlobMapping < Guesser
6
+ include Util
7
+
6
8
  def self.by_pairs(mapping, filename)
7
9
  glob_map = {}
8
10
  mapping.each do |(glob, lexer_name)|
@@ -29,18 +31,13 @@ module Rouge
29
31
 
30
32
  collect_best(lexers) do |lexer|
31
33
  score = (@glob_map[lexer.name] || []).map do |pattern|
32
- if test_pattern(pattern, basename)
34
+ if test_glob(pattern, basename)
33
35
  # specificity is better the fewer wildcards there are
34
36
  -pattern.scan(/[*?\[]/).size
35
37
  end
36
38
  end.compact.min
37
39
  end
38
40
  end
39
-
40
- private
41
- def test_pattern(pattern, path)
42
- File.fnmatch?(pattern, path, File::FNM_DOTMATCH | File::FNM_CASEFOLD)
43
- end
44
41
  end
45
42
  end
46
43
  end
@@ -1,6 +1,8 @@
1
1
  module Rouge
2
2
  module Guessers
3
3
  class Modeline < Guesser
4
+ include Util
5
+
4
6
  # [jneen] regexen stolen from linguist
5
7
  EMACS_MODELINE = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
6
8
 
@@ -25,10 +27,9 @@ module Rouge
25
27
  # don't bother reading the stream if we've already decided
26
28
  return lexers if lexers.size == 1
27
29
 
28
- source_text = @source
29
- source_text = source_text.read if source_text.respond_to? :read
30
+ source_text = get_source(@source)
30
31
 
31
- lines = source_text.split(/\r?\n/)
32
+ lines = source_text.split(/\n/)
32
33
 
33
34
  search_space = (lines.first(@lines) + lines.last(@lines)).join("\n")
34
35
 
@@ -1,6 +1,8 @@
1
1
  module Rouge
2
2
  module Guessers
3
3
  class Source < Guesser
4
+ include Util
5
+
4
6
  attr_reader :source
5
7
  def initialize(source)
6
8
  @source = source
@@ -11,27 +13,15 @@ module Rouge
11
13
  # we've already filtered to 1
12
14
  return lexers if lexers.size == 1
13
15
 
14
- # If we're filtering against *all* lexers, we only use confident return
15
- # values from analyze_text. But if we've filtered down already, we can trust
16
- # the analysis more.
17
- threshold = lexers.size < 10 ? 0 : 0.5
18
-
19
- source_text = case @source
20
- when String
21
- @source
22
- when ->(s){ s.respond_to? :read }
23
- @source.read
24
- else
25
- raise 'invalid source'
26
- end
16
+ source_text = get_source(@source)
27
17
 
28
18
  Lexer.assert_utf8!(source_text)
29
19
 
30
20
  source_text = TextAnalyzer.new(source_text)
31
21
 
32
- collect_best(lexers, threshold: threshold) do |lexer|
33
- next unless lexer.methods(false).include? :analyze_text
34
- lexer.analyze_text(source_text)
22
+ collect_best(lexers) do |lexer|
23
+ next unless lexer.methods(false).include? :detect?
24
+ lexer.detect?(source_text) ? 1 : nil
35
25
  end
36
26
  end
37
27
  end
@@ -0,0 +1,32 @@
1
+ module Rouge
2
+ module Guessers
3
+ module Util
4
+ module SourceNormalizer
5
+ UTF8_BOM = "\xEF\xBB\xBF"
6
+ UTF8_BOM_RE = /\A#{UTF8_BOM}/
7
+
8
+ # @param [String,nil] source
9
+ # @return [String,nil]
10
+ def self.normalize(source)
11
+ source.sub(UTF8_BOM_RE, '').gsub(/\r\n/, "\n")
12
+ end
13
+ end
14
+
15
+ def test_glob(pattern, path)
16
+ File.fnmatch?(pattern, path, File::FNM_DOTMATCH | File::FNM_CASEFOLD)
17
+ end
18
+
19
+ # @param [String,IO] source
20
+ # @return [String]
21
+ def get_source(source)
22
+ if source.respond_to?(:to_str)
23
+ SourceNormalizer.normalize(source.to_str)
24
+ elsif source.respond_to?(:read)
25
+ SourceNormalizer.normalize(source.read)
26
+ else
27
+ raise ArgumentError, "Invalid source: #{source.inspect}"
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -22,7 +22,9 @@ module Rouge
22
22
  new(opts).lex(stream, &b)
23
23
  end
24
24
 
25
- # Given a string, return the correct lexer class.
25
+ # Given a name in string, return the correct lexer class.
26
+ # @param [String] name
27
+ # @return [Class<Rouge::Lexer>,nil]
26
28
  def find(name)
27
29
  registry[name.to_s]
28
30
  end
@@ -42,6 +44,7 @@ module Rouge
42
44
  # markdown lexer for highlighting internal code blocks.
43
45
  #
44
46
  def find_fancy(str, code=nil, additional_options={})
47
+
45
48
  if str && !str.include?('?') && str != 'guess'
46
49
  lexer_class = find(str)
47
50
  return lexer_class && lexer_class.new(additional_options)
@@ -109,7 +112,7 @@ module Rouge
109
112
  def demo(arg=:absent)
110
113
  return @demo = arg unless arg == :absent
111
114
 
112
- @demo = File.read(demo_file, encoding: 'utf-8')
115
+ @demo = File.read(demo_file, mode: 'rt:bom|utf-8')
113
116
  end
114
117
 
115
118
  # @return a list of all lexers.
@@ -133,6 +136,7 @@ module Rouge
133
136
  guessers << Guessers::Filename.new(filename) if filename
134
137
  guessers << Guessers::Modeline.new(source) if source
135
138
  guessers << Guessers::Source.new(source) if source
139
+ guessers << Guessers::Disambiguation.new(filename, source) if source && filename
136
140
 
137
141
  Guesser.guess(guessers, Lexer.all)
138
142
  end
@@ -147,16 +151,23 @@ module Rouge
147
151
  # The source itself, which, if guessing by mimetype or filename
148
152
  # fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
149
153
  # other hints.
154
+ # @param [Proc] fallback called if multiple lexers are detected.
155
+ # If omitted, Guesser::Ambiguous is raised.
150
156
  #
151
- # @see Lexer.analyze_text
157
+ # @see Lexer.detect?
152
158
  # @see Lexer.guesses
153
- def guess(info={})
159
+ # @return [Class<Rouge::Lexer>]
160
+ def guess(info={}, &fallback)
154
161
  lexers = guesses(info)
155
162
 
156
163
  return Lexers::PlainText if lexers.empty?
157
164
  return lexers[0] if lexers.size == 1
158
165
 
159
- raise Guesser::Ambiguous.new(lexers)
166
+ if fallback
167
+ fallback.call(lexers)
168
+ else
169
+ raise Guesser::Ambiguous.new(lexers)
170
+ end
160
171
  end
161
172
 
162
173
  def guess_by_mimetype(mt)
@@ -425,16 +436,14 @@ module Rouge
425
436
 
426
437
  # @abstract
427
438
  #
428
- # Return a number between 0 and 1 indicating the likelihood that
429
- # the text given should be lexed with this lexer. The default
430
- # implementation returns 0. Values under 0.5 will only be used
431
- # to disambiguate filename or mimetype matches.
439
+ # Return true if there is an in-text indication (such as a shebang
440
+ # or DOCTYPE declaration) that this lexer should be used.
432
441
  #
433
442
  # @param [TextAnalyzer] text
434
443
  # the text to be analyzed, with a couple of handy methods on it,
435
444
  # like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
436
- def self.analyze_text(text)
437
- 0
445
+ def self.detect?(text)
446
+ false
438
447
  end
439
448
  end
440
449
 
@@ -11,10 +11,6 @@ module Rouge
11
11
  filenames '*.apib'
12
12
  mimetypes 'text/vnd.apiblueprint'
13
13
 
14
- def self.analyze_text(text)
15
- return 1 if text.start_with?('FORMAT: 1A\n')
16
- end
17
-
18
14
  prepend :root do
19
15
  # Metadata
20
16
  rule(/(\S+)(:\s*)(.*)$/) do
@@ -10,8 +10,8 @@ module Rouge
10
10
  filenames '*.awk'
11
11
  mimetypes 'application/x-awk'
12
12
 
13
- def self.analyze_text(text)
14
- return 1 if text.shebang?('awk')
13
+ def self.detect?(text)
14
+ return true if text.shebang?('awk')
15
15
  end
16
16
 
17
17
  id = /[$a-zA-Z_][a-zA-Z0-9_]*/
@@ -8,8 +8,8 @@ module Rouge
8
8
  tag 'biml'
9
9
  filenames '*.biml'
10
10
 
11
- def self.analyze_text(text)
12
- return 1 if text =~ /<\s*Biml\b/
11
+ def self.detect?(text)
12
+ return true if text =~ /<\s*Biml\b/
13
13
  end
14
14
 
15
15
  prepend :root do