rouge 2.2.1 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +4 -4
  3. data/lib/rouge.rb +9 -7
  4. data/lib/rouge/cli.rb +36 -2
  5. data/lib/rouge/guessers/disambiguation.rb +88 -0
  6. data/lib/rouge/guessers/glob_mapping.rb +3 -6
  7. data/lib/rouge/guessers/modeline.rb +4 -3
  8. data/lib/rouge/guessers/source.rb +6 -16
  9. data/lib/rouge/guessers/util.rb +32 -0
  10. data/lib/rouge/lexer.rb +20 -11
  11. data/lib/rouge/lexers/apiblueprint.rb +0 -4
  12. data/lib/rouge/lexers/awk.rb +2 -2
  13. data/lib/rouge/lexers/biml.rb +2 -2
  14. data/lib/rouge/lexers/c.rb +0 -5
  15. data/lib/rouge/lexers/coffeescript.rb +2 -2
  16. data/lib/rouge/lexers/coq.rb +0 -4
  17. data/lib/rouge/lexers/diff.rb +4 -4
  18. data/lib/rouge/lexers/digdag.rb +0 -4
  19. data/lib/rouge/lexers/erb.rb +0 -4
  20. data/lib/rouge/lexers/erlang.rb +0 -4
  21. data/lib/rouge/lexers/factor.rb +2 -2
  22. data/lib/rouge/lexers/gherkin.rb +2 -2
  23. data/lib/rouge/lexers/go.rb +0 -4
  24. data/lib/rouge/lexers/groovy.rb +2 -2
  25. data/lib/rouge/lexers/haml.rb +0 -4
  26. data/lib/rouge/lexers/haskell.rb +4 -4
  27. data/lib/rouge/lexers/html.rb +3 -3
  28. data/lib/rouge/lexers/idlang.rb +0 -6
  29. data/lib/rouge/lexers/ini.rb +0 -4
  30. data/lib/rouge/lexers/io.rb +2 -2
  31. data/lib/rouge/lexers/javascript.rb +1 -1
  32. data/lib/rouge/lexers/julia.rb +2 -2
  33. data/lib/rouge/lexers/kotlin.rb +9 -14
  34. data/lib/rouge/lexers/lasso.rb +3 -6
  35. data/lib/rouge/lexers/llvm.rb +0 -4
  36. data/lib/rouge/lexers/lua.rb +2 -2
  37. data/lib/rouge/lexers/make.rb +0 -4
  38. data/lib/rouge/lexers/matlab.rb +0 -4
  39. data/lib/rouge/lexers/moonscript.rb +2 -2
  40. data/lib/rouge/lexers/mosel.rb +3 -3
  41. data/lib/rouge/lexers/nasm.rb +0 -5
  42. data/lib/rouge/lexers/objective_c.rb +0 -14
  43. data/lib/rouge/lexers/perl.rb +2 -3
  44. data/lib/rouge/lexers/php.rb +2 -4
  45. data/lib/rouge/lexers/plist.rb +0 -4
  46. data/lib/rouge/lexers/praat.rb +2 -2
  47. data/lib/rouge/lexers/prolog.rb +0 -5
  48. data/lib/rouge/lexers/properties.rb +0 -4
  49. data/lib/rouge/lexers/puppet.rb +3 -3
  50. data/lib/rouge/lexers/python.rb +2 -2
  51. data/lib/rouge/lexers/q.rb +0 -4
  52. data/lib/rouge/lexers/r.rb +2 -2
  53. data/lib/rouge/lexers/racket.rb +5 -4
  54. data/lib/rouge/lexers/ruby.rb +2 -2
  55. data/lib/rouge/lexers/rust.rb +2 -2
  56. data/lib/rouge/lexers/sed.rb +2 -2
  57. data/lib/rouge/lexers/shell.rb +2 -2
  58. data/lib/rouge/lexers/smarty.rb +0 -11
  59. data/lib/rouge/lexers/sml.rb +0 -4
  60. data/lib/rouge/lexers/tap.rb +0 -4
  61. data/lib/rouge/lexers/tcl.rb +4 -4
  62. data/lib/rouge/lexers/tex.rb +2 -2
  63. data/lib/rouge/lexers/toml.rb +0 -4
  64. data/lib/rouge/lexers/tulip.rb +2 -3
  65. data/lib/rouge/lexers/turtle.rb +5 -14
  66. data/lib/rouge/lexers/vue.rb +0 -4
  67. data/lib/rouge/lexers/wollok.rb +0 -4
  68. data/lib/rouge/lexers/xml.rb +4 -6
  69. data/lib/rouge/lexers/yaml.rb +2 -2
  70. data/lib/rouge/version.rb +1 -1
  71. data/rouge.gemspec +1 -0
  72. metadata +6 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 35a33896b2a596c94fc465bae5ef1e65874fd6b3
4
- data.tar.gz: 5b0c4180e0fc5748e8a35d139d3f37a8e0030d7e
3
+ metadata.gz: 105d563933ea57ff19cca03ab1fd98fc3b01ea3a
4
+ data.tar.gz: 72e6098b1419ff753ddb8bf748d6822d5fc5c1a5
5
5
  SHA512:
6
- metadata.gz: 9d461f6335a3b1df5312cc0c259d49a9a5a0e21f0752028898eebbb736913f39bf04a28a91578579ef84362280f1945eef8464a682c7c141a969817172b48de7
7
- data.tar.gz: 1f0dbb4f04334ea77858b6bcf3c3795abf82597cac61ef2a08ff08f526fe6fa47ba64a600715ec94c52bb59157ad9b48c73ad5fae600aed80a541c744270dd65
6
+ metadata.gz: 6346ab18ab19b7758e3f37eec75ddeac72a11edacd4416783dbfaccb33e3aadda9841373db11436a9fe4fbdd457a8bb2f4575f79b2c546e596b9d9cc829612e3
7
+ data.tar.gz: 9df1ed34e20dfbb2733ebef31d80792547913afdff931c556e73ff731875968233f12e1c62e91cfbaa84d4653b684aeec5e9a4778ff16fb3c5279585b974a846
data/Gemfile CHANGED
@@ -3,12 +3,12 @@ source 'http://rubygems.org'
3
3
  gemspec
4
4
 
5
5
  gem 'bundler', '~> 1.15'
6
- gem 'rake', '~> 12.0'
6
+ gem 'rake'
7
7
 
8
- gem 'minitest', '~> 4.0'
9
- gem 'wrong'
8
+ gem 'minitest', '>= 5.0'
9
+ gem 'minitest-power_assert'
10
10
 
11
- gem 'rubocop', '~> 0.49.1' if RUBY_VERSION >= '2.0.0'
11
+ gem 'rubocop', '~> 0.49.1'
12
12
 
13
13
  # don't try to install redcarpet under jruby
14
14
  gem 'redcarpet', :platforms => :ruby
@@ -41,13 +41,6 @@ load load_dir.join('rouge/util.rb')
41
41
  load load_dir.join('rouge/text_analyzer.rb')
42
42
  load load_dir.join('rouge/token.rb')
43
43
 
44
- load load_dir.join('rouge/guesser.rb')
45
- load load_dir.join('rouge/guessers/glob_mapping.rb')
46
- load load_dir.join('rouge/guessers/modeline.rb')
47
- load load_dir.join('rouge/guessers/filename.rb')
48
- load load_dir.join('rouge/guessers/mimetype.rb')
49
- load load_dir.join('rouge/guessers/source.rb')
50
-
51
44
  load load_dir.join('rouge/lexer.rb')
52
45
  load load_dir.join('rouge/regex_lexer.rb')
53
46
  load load_dir.join('rouge/template_lexer.rb')
@@ -57,6 +50,15 @@ Dir.glob(lexers_dir.join('*.rb')).each do |f|
57
50
  Rouge::Lexers.load_lexer(Pathname.new(f).relative_path_from(lexers_dir).to_s)
58
51
  end
59
52
 
53
+ load load_dir.join('rouge/guesser.rb')
54
+ load load_dir.join('rouge/guessers/util.rb')
55
+ load load_dir.join('rouge/guessers/glob_mapping.rb')
56
+ load load_dir.join('rouge/guessers/modeline.rb')
57
+ load load_dir.join('rouge/guessers/filename.rb')
58
+ load load_dir.join('rouge/guessers/mimetype.rb')
59
+ load load_dir.join('rouge/guessers/source.rb')
60
+ load load_dir.join('rouge/guessers/disambiguation.rb')
61
+
60
62
  load load_dir.join('rouge/formatter.rb')
61
63
  load load_dir.join('rouge/formatters/html.rb')
62
64
  load load_dir.join('rouge/formatters/html_table.rb')
@@ -13,9 +13,9 @@ module Rouge
13
13
  def file
14
14
  case input
15
15
  when '-'
16
- IO.new($stdin.fileno, 'r:utf-8')
16
+ IO.new($stdin.fileno, 'rt:bom|utf-8')
17
17
  when String
18
- File.new(input, 'r:utf-8')
18
+ File.new(input, 'rt:bom|utf-8')
19
19
  when ->(i){ i.respond_to? :read }
20
20
  input
21
21
  end
@@ -44,6 +44,7 @@ module Rouge
44
44
  yield %| help #{Help.desc}|
45
45
  yield %| style #{Style.desc}|
46
46
  yield %| list #{List.desc}|
47
+ yield %| guess #{Guess.desc}|
47
48
  yield %| version #{Version.desc}|
48
49
  yield %||
49
50
  yield %|See `rougify help <command>` for more info.|
@@ -97,6 +98,8 @@ module Rouge
97
98
  Style
98
99
  when 'list'
99
100
  List
101
+ when 'guess'
102
+ Guess
100
103
  end
101
104
  end
102
105
 
@@ -375,6 +378,37 @@ module Rouge
375
378
  end
376
379
  end
377
380
 
381
+ class Guess < CLI
382
+ def self.desc
383
+ "guess the languages of file"
384
+ end
385
+
386
+ def self.parse(args)
387
+ new(input_file: args.shift)
388
+ end
389
+
390
+ attr_reader :input_file, :input_source
391
+
392
+ def initialize(opts)
393
+ @input_file = opts[:input_file] || '-'
394
+ @input_source = FileReader.new(@input_file).read
395
+ end
396
+
397
+ def lexers
398
+ Lexer.guesses(
399
+ filename: input_file,
400
+ source: input_source,
401
+ )
402
+ end
403
+
404
+ def run
405
+ lexers.each do |l|
406
+ puts "{ tag: #{l.tag.inspect}, title: #{l.title.inspect}, desc: #{l.desc.inspect} }"
407
+ end
408
+ end
409
+ end
410
+
411
+
378
412
  private_class_method
379
413
  def self.normalize_syntax(argv)
380
414
  out = []
@@ -0,0 +1,88 @@
1
+ module Rouge
2
+ module Guessers
3
+ class Disambiguation < Guesser
4
+ include Util
5
+ include Lexers
6
+
7
+ def initialize(filename, source)
8
+ @filename = File.basename(filename)
9
+ @source = source
10
+ end
11
+
12
+ def filter(lexers)
13
+ return lexers if lexers.size == 1
14
+ return lexers if lexers.size == Lexer.all.size
15
+
16
+ @analyzer = TextAnalyzer.new(get_source(@source))
17
+
18
+ self.class.disambiguators.each do |disambiguator|
19
+ next unless disambiguator.match?(@filename)
20
+
21
+ filtered = disambiguator.decide!(self)
22
+ return filtered if filtered
23
+ end
24
+
25
+ return lexers
26
+ end
27
+
28
+ def contains?(text)
29
+ return @analyzer.include?(text)
30
+ end
31
+
32
+ def matches?(re)
33
+ return !!(@analyzer =~ re)
34
+ end
35
+
36
+ @disambiguators = []
37
+ def self.disambiguate(*patterns, &decider)
38
+ @disambiguators << Disambiguator.new(patterns, &decider)
39
+ end
40
+
41
+ def self.disambiguators
42
+ @disambiguators
43
+ end
44
+
45
+ class Disambiguator
46
+ include Util
47
+
48
+ def initialize(patterns, &decider)
49
+ @patterns = patterns
50
+ @decider = decider
51
+ end
52
+
53
+ def decide!(guesser)
54
+ out = guesser.instance_eval(&@decider)
55
+ case out
56
+ when Array then out
57
+ when nil then nil
58
+ else [out]
59
+ end
60
+ end
61
+
62
+ def match?(filename)
63
+ @patterns.any? { |p| test_glob(p, filename) }
64
+ end
65
+ end
66
+
67
+ disambiguate '*.pl' do
68
+ next Perl if contains?('my $')
69
+ next Prolog if contains?(':-')
70
+ next Prolog if matches?(/\A\w+(\(\w+\,\s*\w+\))*\./)
71
+ end
72
+
73
+ disambiguate '*.h' do
74
+ next ObjectiveC if matches?(/@(end|implementation|protocol|property)\b/)
75
+ next ObjectiveC if contains?('@"')
76
+
77
+ C
78
+ end
79
+
80
+ disambiguate '*.m' do
81
+ next ObjectiveC if matches?(/@(end|implementation|protocol|property)\b/)
82
+ next ObjectiveC if contains?('@"')
83
+
84
+ next Matlab if matches?(/^\s*?%/)
85
+ end
86
+ end
87
+ end
88
+ end
@@ -3,6 +3,8 @@ module Rouge
3
3
  # This class allows for custom behavior
4
4
  # with glob -> lexer name mappings
5
5
  class GlobMapping < Guesser
6
+ include Util
7
+
6
8
  def self.by_pairs(mapping, filename)
7
9
  glob_map = {}
8
10
  mapping.each do |(glob, lexer_name)|
@@ -29,18 +31,13 @@ module Rouge
29
31
 
30
32
  collect_best(lexers) do |lexer|
31
33
  score = (@glob_map[lexer.name] || []).map do |pattern|
32
- if test_pattern(pattern, basename)
34
+ if test_glob(pattern, basename)
33
35
  # specificity is better the fewer wildcards there are
34
36
  -pattern.scan(/[*?\[]/).size
35
37
  end
36
38
  end.compact.min
37
39
  end
38
40
  end
39
-
40
- private
41
- def test_pattern(pattern, path)
42
- File.fnmatch?(pattern, path, File::FNM_DOTMATCH | File::FNM_CASEFOLD)
43
- end
44
41
  end
45
42
  end
46
43
  end
@@ -1,6 +1,8 @@
1
1
  module Rouge
2
2
  module Guessers
3
3
  class Modeline < Guesser
4
+ include Util
5
+
4
6
  # [jneen] regexen stolen from linguist
5
7
  EMACS_MODELINE = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
6
8
 
@@ -25,10 +27,9 @@ module Rouge
25
27
  # don't bother reading the stream if we've already decided
26
28
  return lexers if lexers.size == 1
27
29
 
28
- source_text = @source
29
- source_text = source_text.read if source_text.respond_to? :read
30
+ source_text = get_source(@source)
30
31
 
31
- lines = source_text.split(/\r?\n/)
32
+ lines = source_text.split(/\n/)
32
33
 
33
34
  search_space = (lines.first(@lines) + lines.last(@lines)).join("\n")
34
35
 
@@ -1,6 +1,8 @@
1
1
  module Rouge
2
2
  module Guessers
3
3
  class Source < Guesser
4
+ include Util
5
+
4
6
  attr_reader :source
5
7
  def initialize(source)
6
8
  @source = source
@@ -11,27 +13,15 @@ module Rouge
11
13
  # we've already filtered to 1
12
14
  return lexers if lexers.size == 1
13
15
 
14
- # If we're filtering against *all* lexers, we only use confident return
15
- # values from analyze_text. But if we've filtered down already, we can trust
16
- # the analysis more.
17
- threshold = lexers.size < 10 ? 0 : 0.5
18
-
19
- source_text = case @source
20
- when String
21
- @source
22
- when ->(s){ s.respond_to? :read }
23
- @source.read
24
- else
25
- raise 'invalid source'
26
- end
16
+ source_text = get_source(@source)
27
17
 
28
18
  Lexer.assert_utf8!(source_text)
29
19
 
30
20
  source_text = TextAnalyzer.new(source_text)
31
21
 
32
- collect_best(lexers, threshold: threshold) do |lexer|
33
- next unless lexer.methods(false).include? :analyze_text
34
- lexer.analyze_text(source_text)
22
+ collect_best(lexers) do |lexer|
23
+ next unless lexer.methods(false).include? :detect?
24
+ lexer.detect?(source_text) ? 1 : nil
35
25
  end
36
26
  end
37
27
  end
@@ -0,0 +1,32 @@
1
+ module Rouge
2
+ module Guessers
3
+ module Util
4
+ module SourceNormalizer
5
+ UTF8_BOM = "\xEF\xBB\xBF"
6
+ UTF8_BOM_RE = /\A#{UTF8_BOM}/
7
+
8
+ # @param [String,nil] source
9
+ # @return [String,nil]
10
+ def self.normalize(source)
11
+ source.sub(UTF8_BOM_RE, '').gsub(/\r\n/, "\n")
12
+ end
13
+ end
14
+
15
+ def test_glob(pattern, path)
16
+ File.fnmatch?(pattern, path, File::FNM_DOTMATCH | File::FNM_CASEFOLD)
17
+ end
18
+
19
+ # @param [String,IO] source
20
+ # @return [String]
21
+ def get_source(source)
22
+ if source.respond_to?(:to_str)
23
+ SourceNormalizer.normalize(source.to_str)
24
+ elsif source.respond_to?(:read)
25
+ SourceNormalizer.normalize(source.read)
26
+ else
27
+ raise ArgumentError, "Invalid source: #{source.inspect}"
28
+ end
29
+ end
30
+ end
31
+ end
32
+ end
@@ -22,7 +22,9 @@ module Rouge
22
22
  new(opts).lex(stream, &b)
23
23
  end
24
24
 
25
- # Given a string, return the correct lexer class.
25
+ # Given a name in string, return the correct lexer class.
26
+ # @param [String] name
27
+ # @return [Class<Rouge::Lexer>,nil]
26
28
  def find(name)
27
29
  registry[name.to_s]
28
30
  end
@@ -42,6 +44,7 @@ module Rouge
42
44
  # markdown lexer for highlighting internal code blocks.
43
45
  #
44
46
  def find_fancy(str, code=nil, additional_options={})
47
+
45
48
  if str && !str.include?('?') && str != 'guess'
46
49
  lexer_class = find(str)
47
50
  return lexer_class && lexer_class.new(additional_options)
@@ -109,7 +112,7 @@ module Rouge
109
112
  def demo(arg=:absent)
110
113
  return @demo = arg unless arg == :absent
111
114
 
112
- @demo = File.read(demo_file, encoding: 'utf-8')
115
+ @demo = File.read(demo_file, mode: 'rt:bom|utf-8')
113
116
  end
114
117
 
115
118
  # @return a list of all lexers.
@@ -133,6 +136,7 @@ module Rouge
133
136
  guessers << Guessers::Filename.new(filename) if filename
134
137
  guessers << Guessers::Modeline.new(source) if source
135
138
  guessers << Guessers::Source.new(source) if source
139
+ guessers << Guessers::Disambiguation.new(filename, source) if source && filename
136
140
 
137
141
  Guesser.guess(guessers, Lexer.all)
138
142
  end
@@ -147,16 +151,23 @@ module Rouge
147
151
  # The source itself, which, if guessing by mimetype or filename
148
152
  # fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
149
153
  # other hints.
154
+ # @param [Proc] fallback called if multiple lexers are detected.
155
+ # If omitted, Guesser::Ambiguous is raised.
150
156
  #
151
- # @see Lexer.analyze_text
157
+ # @see Lexer.detect?
152
158
  # @see Lexer.guesses
153
- def guess(info={})
159
+ # @return [Class<Rouge::Lexer>]
160
+ def guess(info={}, &fallback)
154
161
  lexers = guesses(info)
155
162
 
156
163
  return Lexers::PlainText if lexers.empty?
157
164
  return lexers[0] if lexers.size == 1
158
165
 
159
- raise Guesser::Ambiguous.new(lexers)
166
+ if fallback
167
+ fallback.call(lexers)
168
+ else
169
+ raise Guesser::Ambiguous.new(lexers)
170
+ end
160
171
  end
161
172
 
162
173
  def guess_by_mimetype(mt)
@@ -425,16 +436,14 @@ module Rouge
425
436
 
426
437
  # @abstract
427
438
  #
428
- # Return a number between 0 and 1 indicating the likelihood that
429
- # the text given should be lexed with this lexer. The default
430
- # implementation returns 0. Values under 0.5 will only be used
431
- # to disambiguate filename or mimetype matches.
439
+ # Return true if there is an in-text indication (such as a shebang
440
+ # or DOCTYPE declaration) that this lexer should be used.
432
441
  #
433
442
  # @param [TextAnalyzer] text
434
443
  # the text to be analyzed, with a couple of handy methods on it,
435
444
  # like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
436
- def self.analyze_text(text)
437
- 0
445
+ def self.detect?(text)
446
+ false
438
447
  end
439
448
  end
440
449
 
@@ -11,10 +11,6 @@ module Rouge
11
11
  filenames '*.apib'
12
12
  mimetypes 'text/vnd.apiblueprint'
13
13
 
14
- def self.analyze_text(text)
15
- return 1 if text.start_with?('FORMAT: 1A\n')
16
- end
17
-
18
14
  prepend :root do
19
15
  # Metadata
20
16
  rule(/(\S+)(:\s*)(.*)$/) do
@@ -10,8 +10,8 @@ module Rouge
10
10
  filenames '*.awk'
11
11
  mimetypes 'application/x-awk'
12
12
 
13
- def self.analyze_text(text)
14
- return 1 if text.shebang?('awk')
13
+ def self.detect?(text)
14
+ return true if text.shebang?('awk')
15
15
  end
16
16
 
17
17
  id = /[$a-zA-Z_][a-zA-Z0-9_]*/
@@ -8,8 +8,8 @@ module Rouge
8
8
  tag 'biml'
9
9
  filenames '*.biml'
10
10
 
11
- def self.analyze_text(text)
12
- return 1 if text =~ /<\s*Biml\b/
11
+ def self.detect?(text)
12
+ return true if text =~ /<\s*Biml\b/
13
13
  end
14
14
 
15
15
  prepend :root do