rouge 1.11.0 → 1.11.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0d52689800e9b41571b3c09b3412f672a0e96de8
4
- data.tar.gz: f0b4629db72f7122e8ff0760487f9193edec8f9c
3
+ metadata.gz: 0efe4d1a5c6a98c1b4a532ef1b703ee60ceb6a1c
4
+ data.tar.gz: 6fa2c0ac804354f3e058137869ab151b922c0864
5
5
  SHA512:
6
- metadata.gz: cf7d9859a295ad5e9e269eecf77e3477d3bf93b062f19e0095b81135e66265375ddbfb1c986a155a4ed84921366ce2914e0ee900040c24f58d488921417a8928
7
- data.tar.gz: b4de6ef45285cf75dac6d9558341955e5c29b30f06ea6a7584c99b1febde9a63915fa2a5442839fa3c7cc51da57dd51a2152adcfb98f09e83abba0524e11c359
6
+ metadata.gz: ab710b89b387ac5675cd31467023eff9600fd7131b51966fe72394569e1ec9f92f3bb70b8448a51e11de12bd83b9217b2b364d120ec5faa4e2a46626371d591d
7
+ data.tar.gz: 902c5c1d3c88a160b8e140b6f8140a53627e10334ba3e4b628c44da1c057406cc374e3e21f8cbb6b74cba04bcd5e58c54cc6714fa8fe83484ae2d0d47f73fbcd
@@ -36,6 +36,13 @@ load load_dir.join('rouge/util.rb')
36
36
  load load_dir.join('rouge/text_analyzer.rb')
37
37
  load load_dir.join('rouge/token.rb')
38
38
 
39
+ load load_dir.join('rouge/guesser.rb')
40
+ load load_dir.join('rouge/guessers/glob_mapping.rb')
41
+ load load_dir.join('rouge/guessers/modeline.rb')
42
+ load load_dir.join('rouge/guessers/filename.rb')
43
+ load load_dir.join('rouge/guessers/mimetype.rb')
44
+ load load_dir.join('rouge/guessers/source.rb')
45
+
39
46
  load load_dir.join('rouge/lexer.rb')
40
47
  load load_dir.join('rouge/regex_lexer.rb')
41
48
  load load_dir.join('rouge/template_lexer.rb')
@@ -58,3 +65,4 @@ load load_dir.join('rouge/themes/github.rb')
58
65
  load load_dir.join('rouge/themes/monokai.rb')
59
66
  load load_dir.join('rouge/themes/molokai.rb')
60
67
  load load_dir.join('rouge/themes/monokai_sublime.rb')
68
+ load load_dir.join('rouge/themes/gruvbox.rb')
@@ -13,9 +13,9 @@ module Rouge
13
13
  def file
14
14
  case input
15
15
  when '-'
16
- $stdin
16
+ IO.new($stdin.fileno, 'r:utf-8')
17
17
  when String
18
- File.new(input)
18
+ File.new(input, 'r:utf-8')
19
19
  when ->(i){ i.respond_to? :read }
20
20
  input
21
21
  end
@@ -23,7 +23,7 @@ module Rouge
23
23
 
24
24
  def read
25
25
  @read ||= begin
26
- File.read(file, encoding: 'utf-8')
26
+ file.read
27
27
  rescue => e
28
28
  $stderr.puts "unable to open #{input}: #{e.message}"
29
29
  exit 1
@@ -0,0 +1,18 @@
1
+ component accessors="true" {
2
+
3
+ property type="string" name="firstName" default="";
4
+ property string username;
5
+
6
+ function init(){
7
+ return this;
8
+ }
9
+
10
+ public any function submitOrder( required product, coupon="", boolean results=true ){
11
+
12
+ var foo = function( required string baz, x=true, y=false ){
13
+ return "bar!";
14
+ };
15
+
16
+ return foo;
17
+ }
18
+ }
@@ -0,0 +1,3 @@
1
+ fun main(args: Array<String>) {
2
+ println("Hello, world!")
3
+ }
@@ -0,0 +1,14 @@
1
+ program FizzBuzz(output);
2
+ var
3
+ i: Integer;
4
+ begin
5
+ for i := 1 to 100 do
6
+ if i mod 15 = 0 then
7
+ WriteLn('FizzBuzz')
8
+ else if i mod 3 = 0 then
9
+ WriteLn('Fizz')
10
+ else if i mod 5 = 0 then
11
+ WriteLn('Buzz')
12
+ else
13
+ WriteLn(i)
14
+ end.
@@ -0,0 +1,46 @@
1
+ module Rouge
2
+ class Guesser
3
+ def self.guess(guessers, lexers)
4
+ original_size = lexers.size
5
+
6
+ guessers.each do |g|
7
+ new_lexers = case g
8
+ when Guesser then g.filter(lexers)
9
+ when proc { |x| x.respond_to? :call } then g.call(lexers)
10
+ else raise "bad guesser: #{g}"
11
+ end
12
+
13
+ lexers = new_lexers && new_lexers.any? ? new_lexers : lexers
14
+ end
15
+
16
+ # if we haven't filtered the input at *all*,
17
+ # then we have no idea what language it is,
18
+ # so we bail and return [].
19
+ lexers.size < original_size ? lexers : []
20
+ end
21
+
22
+ def collect_best(lexers, opts={}, &scorer)
23
+ best = []
24
+ best_score = opts[:threshold]
25
+
26
+ lexers.each do |lexer|
27
+ score = scorer.call(lexer)
28
+
29
+ next if score.nil?
30
+
31
+ if best_score.nil? || score > best_score
32
+ best_score = score
33
+ best = [lexer]
34
+ elsif score == best_score
35
+ best << lexer
36
+ end
37
+ end
38
+
39
+ best
40
+ end
41
+
42
+ def filter(lexers)
43
+ raise 'abstract'
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,25 @@
1
+ module Rouge
2
+ module Guessers
3
+ class Filename < Guesser
4
+ attr_reader :fname
5
+ def initialize(filename)
6
+ @filename = filename
7
+ end
8
+
9
+ # returns a list of lexers that match the given filename with
10
+ # equal specificity (i.e. number of wildcards in the pattern).
11
+ # This helps disambiguate between, e.g. the Nginx lexer, which
12
+ # matches `nginx.conf`, and the Conf lexer, which matches `*.conf`.
13
+ # In this case, nginx will win because the pattern has no wildcards,
14
+ # while `*.conf` has one.
15
+ def filter(lexers)
16
+ mapping = {}
17
+ lexers.each do |lexer|
18
+ mapping[lexer.name] = lexer.filenames || []
19
+ end
20
+
21
+ GlobMapping.new(mapping, @filename).filter(lexers)
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,46 @@
1
+ module Rouge
2
+ module Guessers
3
+ # This class allows for custom behavior
4
+ # with glob -> lexer name mappings
5
+ class GlobMapping < Guesser
6
+ def self.by_pairs(mapping, filename)
7
+ glob_map = {}
8
+ mapping.each do |(glob, lexer_name)|
9
+ lexer = Lexer.find(lexer_name)
10
+
11
+ # ignore unknown lexers
12
+ next unless lexer
13
+
14
+ glob_map[lexer.name] ||= []
15
+ glob_map[lexer.name] << glob
16
+ end
17
+
18
+ new(glob_map, filename)
19
+ end
20
+
21
+ attr_reader :glob_map, :filename
22
+ def initialize(glob_map, filename)
23
+ @glob_map = glob_map
24
+ @filename = filename
25
+ end
26
+
27
+ def filter(lexers)
28
+ basename = File.basename(filename)
29
+
30
+ collect_best(lexers) do |lexer|
31
+ score = (@glob_map[lexer.name] || []).map do |pattern|
32
+ if test_pattern(pattern, basename)
33
+ # specificity is better the fewer wildcards there are
34
+ -pattern.scan(/[*?\[]/).size
35
+ end
36
+ end.compact.min
37
+ end
38
+ end
39
+
40
+ private
41
+ def test_pattern(pattern, path)
42
+ File.fnmatch?(pattern, path, File::FNM_DOTMATCH | File::FNM_CASEFOLD)
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,14 @@
1
+ module Rouge
2
+ module Guessers
3
+ class Mimetype < Guesser
4
+ attr_reader :mimetype
5
+ def initialize(mimetype)
6
+ @mimetype = mimetype
7
+ end
8
+
9
+ def filter(lexers)
10
+ lexers.select { |lexer| lexer.mimetypes.include? @mimetype }
11
+ end
12
+ end
13
+ end
14
+ end
@@ -0,0 +1,42 @@
1
+ module Rouge
2
+ module Guessers
3
+ class Modeline < Guesser
4
+ # [jneen] regexen stolen from linguist
5
+ EMACS_MODELINE = /-\*-\s*(?:(?!mode)[\w-]+\s*:\s*(?:[\w+-]+)\s*;?\s*)*(?:mode\s*:)?\s*([\w+-]+)\s*(?:;\s*(?!mode)[\w-]+\s*:\s*[\w+-]+\s*)*;?\s*-\*-/i
6
+
7
+ # First form vim modeline
8
+ # [text]{white}{vi:|vim:|ex:}[white]{options}
9
+ # ex: 'vim: syntax=ruby'
10
+ VIM_MODELINE_1 = /(?:vim|vi|ex):\s*(?:ft|filetype|syntax)=(\w+)\s?/i
11
+
12
+ # Second form vim modeline (compatible with some versions of Vi)
13
+ # [text]{white}{vi:|vim:|Vim:|ex:}[white]se[t] {options}:[text]
14
+ # ex: 'vim set syntax=ruby:'
15
+ VIM_MODELINE_2 = /(?:vim|vi|Vim|ex):\s*se(?:t)?.*\s(?:ft|filetype|syntax)=(\w+)\s?.*:/i
16
+
17
+ MODELINES = [EMACS_MODELINE, VIM_MODELINE_1, VIM_MODELINE_2]
18
+
19
+ def initialize(source, opts={})
20
+ @source = source
21
+ @lines = opts[:lines] || 5
22
+ end
23
+
24
+ def filter(lexers)
25
+ # don't bother reading the stream if we've already decided
26
+ return lexers if lexers.size == 1
27
+
28
+ source_text = @source
29
+ source_text = source_text.read if source_text.respond_to? :read
30
+
31
+ lines = source_text.split(/\r?\n/)
32
+
33
+ search_space = (lines.first(@lines) + lines.last(@lines)).join("\n")
34
+
35
+ matches = MODELINES.map { |re| re.match(search_space) }.compact
36
+ match_set = Set.new(matches.map { |m| m[1] })
37
+
38
+ lexers.select { |l| (Set.new([l.tag] + l.aliases) & match_set).any? }
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,39 @@
1
+ module Rouge
2
+ module Guessers
3
+ class Source < Guesser
4
+ attr_reader :source
5
+ def initialize(source)
6
+ @source = source
7
+ end
8
+
9
+ def filter(lexers)
10
+ # don't bother reading the input if
11
+ # we've already filtered to 1
12
+ return lexers if lexers.size == 1
13
+
14
+ # If we're filtering against *all* lexers, we only use confident return
15
+ # values from analyze_text. But if we've filtered down already, we can trust
16
+ # the analysis more.
17
+ threshold = lexers.size < 10 ? 0 : 0.5
18
+
19
+ source_text = case @source
20
+ when String
21
+ @source
22
+ when ->(s){ s.respond_to? :read }
23
+ @source.read
24
+ else
25
+ raise 'invalid source'
26
+ end
27
+
28
+ Lexer.assert_utf8!(source_text)
29
+
30
+ source_text = TextAnalyzer.new(source_text)
31
+
32
+ collect_best(lexers, threshold: threshold) do |lexer|
33
+ next unless lexer.methods(false).include? :analyze_text
34
+ lexer.analyze_text(source_text)
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -109,26 +109,17 @@ module Rouge
109
109
  # to use.
110
110
  def guesses(info={})
111
111
  mimetype, filename, source = info.values_at(:mimetype, :filename, :source)
112
- lexers = registry.values.uniq
113
- total_size = lexers.size
114
-
115
- lexers = filter_by_mimetype(lexers, mimetype) if mimetype
116
- return lexers if lexers.size == 1
117
-
118
- lexers = filter_by_filename(lexers, filename) if filename
119
- return lexers if lexers.size == 1
120
-
121
- if source
122
- # If we're filtering against *all* lexers, we only use confident return
123
- # values from analyze_text. But if we've filtered down already, we can trust
124
- # the analysis more.
125
- source_threshold = lexers.size < total_size ? 0 : 0.5
126
- return [best_by_source(lexers, source, source_threshold)].compact
127
- elsif lexers.size < total_size
128
- return lexers
129
- else
130
- return []
131
- end
112
+ custom_globs = info[:custom_globs]
113
+
114
+ guessers = (info[:guessers] || []).dup
115
+
116
+ guessers << Guessers::Mimetype.new(mimetype) if mimetype
117
+ guessers << Guessers::GlobMapping.by_pairs(custom_globs, filename) if custom_globs && filename
118
+ guessers << Guessers::Filename.new(filename) if filename
119
+ guessers << Guessers::Modeline.new(source) if source
120
+ guessers << Guessers::Source.new(source) if source
121
+
122
+ Guesser.guess(guessers, Lexer.all)
132
123
  end
133
124
 
134
125
  class AmbiguousGuess < StandardError
@@ -175,71 +166,6 @@ module Rouge
175
166
  end
176
167
 
177
168
  private
178
- def filter_by_mimetype(lexers, mt)
179
- filtered = lexers.select { |lexer| lexer.mimetypes.include? mt }
180
- filtered.any? ? filtered : lexers
181
- end
182
-
183
- # returns a list of lexers that match the given filename with
184
- # equal specificity (i.e. number of wildcards in the pattern).
185
- # This helps disambiguate between, e.g. the Nginx lexer, which
186
- # matches `nginx.conf`, and the Conf lexer, which matches `*.conf`.
187
- # In this case, nginx will win because the pattern has no wildcards,
188
- # while `*.conf` has one.
189
- def filter_by_filename(lexers, fname)
190
- fname = File.basename(fname)
191
-
192
- out = []
193
- best_seen = nil
194
- lexers.each do |lexer|
195
- score = lexer.filenames.map do |pattern|
196
- if File.fnmatch?(pattern, fname, File::FNM_DOTMATCH)
197
- # specificity is better the fewer wildcards there are
198
- pattern.scan(/[*?\[]/).size
199
- end
200
- end.compact.min
201
-
202
- next unless score
203
-
204
- if best_seen.nil? || score < best_seen
205
- best_seen = score
206
- out = [lexer]
207
- elsif score == best_seen
208
- out << lexer
209
- end
210
- end
211
-
212
- out.any? ? out : lexers
213
- end
214
-
215
- def best_by_source(lexers, source, threshold=0)
216
- source = case source
217
- when String
218
- source
219
- when ->(s){ s.respond_to? :read }
220
- source.read
221
- else
222
- raise 'invalid source'
223
- end
224
-
225
- assert_utf8!(source)
226
-
227
- source = TextAnalyzer.new(source)
228
-
229
- best_result = threshold
230
- best_match = nil
231
- lexers.each do |lexer|
232
- result = lexer.analyze_text(source) || 0
233
- return lexer if result == 1
234
-
235
- if result > best_result
236
- best_match = lexer
237
- best_result = result
238
- end
239
- end
240
-
241
- best_match
242
- end
243
169
 
244
170
  protected
245
171
  # @private
@@ -0,0 +1,153 @@
1
+ # -*- coding: utf-8 -*- #
2
+
3
+ module Rouge
4
+ module Lexers
5
+
6
+ class Cfscript < RegexLexer
7
+ title "CFScript"
8
+ desc 'CFScript, the CFML scripting language'
9
+ tag 'cfscript'
10
+ aliases 'cfc'
11
+ filenames '*.cfc'
12
+
13
+ def self.keywords
14
+ @keywords ||= %w(
15
+ if else var xml default break switch do try catch throw in continue for return while required
16
+ )
17
+ end
18
+
19
+ def self.declarations
20
+ @declarations ||= %w(
21
+ component property function remote public package private
22
+ )
23
+ end
24
+
25
+ def self.types
26
+ @types ||= %w(
27
+ any array binary boolean component date guid numeric query string struct uuid void xml
28
+ )
29
+ end
30
+
31
+ constants = %w(application session client cookie super this variables arguments cgi)
32
+
33
+
34
+ operators = %w(\+\+ -- && \|\| <= >= < > == != mod eq lt gt lte gte not is and or xor eqv imp equal contains \? )
35
+ dotted_id = /[$a-zA-Z_][a-zA-Z0-9_.]*/
36
+
37
+ state :root do
38
+ mixin :comments_and_whitespace
39
+ rule /(?:#{operators.join('|')}|does not contain|greater than(?: or equal to)?|less than(?: or equal to)?)\b/i, Operator, :expr_start
40
+ rule %r([-<>+*%&|\^/!=]=?), Operator, :expr_start
41
+
42
+ rule /[(\[,]/, Punctuation, :expr_start
43
+ rule /;/, Punctuation, :statement
44
+ rule /[)\].]/, Punctuation
45
+
46
+ rule /[?]/ do
47
+ token Punctuation
48
+ push :ternary
49
+ push :expr_start
50
+ end
51
+
52
+ rule /[{}]/, Punctuation, :statement
53
+
54
+ rule /(?:#{constants.join('|')})\b/, Name::Constant
55
+ rule /(?:true|false|null)\b/, Keyword::Constant
56
+ rule /import\b/, Keyword::Namespace, :import
57
+ rule /(#{dotted_id})(\s*)(:)(\s*)/ do
58
+ groups Name, Text, Punctuation, Text
59
+ push :expr_start
60
+ end
61
+
62
+ rule /([A-Za-z_$][\w.]*)(\s*)(\()/ do |m|
63
+ if self.class.keywords.include? m[1]
64
+ token Keyword, m[1]
65
+ token Text, m[2]
66
+ token Punctuation, m[3]
67
+ else
68
+ token Name::Function, m[1]
69
+ token Text, m[2]
70
+ token Punctuation, m[3]
71
+ end
72
+ end
73
+
74
+ rule dotted_id do |m|
75
+ if self.class.declarations.include? m[0]
76
+ token Keyword::Declaration
77
+ push :expr_start
78
+ elsif self.class.keywords.include? m[0]
79
+ token Keyword
80
+ push :expr_start
81
+ elsif self.class.types.include? m[0]
82
+ token Keyword::Type
83
+ push :expr_start
84
+ else
85
+ token Name::Other
86
+ end
87
+ end
88
+
89
+ rule /[0-9][0-9]*\.[0-9]+([eE][0-9]+)?[fd]?/, Num::Float
90
+ rule /0x[0-9a-fA-F]+/, Num::Hex
91
+ rule /[0-9]+/, Num::Integer
92
+ rule /"(\\\\|\\"|[^"])*"/, Str::Double
93
+ rule /'(\\\\|\\'|[^'])*'/, Str::Single
94
+
95
+ end
96
+
97
+ # same as java, broken out
98
+ state :comments_and_whitespace do
99
+ rule /\s+/, Text
100
+ rule %r(//.*?$), Comment::Single
101
+ rule %r(/\*.*?\*/)m, Comment::Multiline
102
+ end
103
+
104
+ state :expr_start do
105
+ mixin :comments_and_whitespace
106
+
107
+ rule /[{]/, Punctuation, :object
108
+
109
+ rule //, Text, :pop!
110
+ end
111
+
112
+ state :statement do
113
+
114
+ rule /[{}]/, Punctuation
115
+
116
+ mixin :expr_start
117
+ end
118
+
119
+ # object literals
120
+ state :object do
121
+ mixin :comments_and_whitespace
122
+ rule /[}]/ do
123
+ token Punctuation
124
+ push :expr_start
125
+ end
126
+
127
+ rule /(#{dotted_id})(\s*)(:)/ do
128
+ groups Name::Other, Text, Punctuation
129
+ push :expr_start
130
+ end
131
+
132
+ rule /:/, Punctuation
133
+ mixin :root
134
+ end
135
+
136
+ # ternary expressions, where <dotted_id>: is not a label!
137
+ state :ternary do
138
+ rule /:/ do
139
+ token Punctuation
140
+ goto :expr_start
141
+ end
142
+
143
+ mixin :root
144
+ end
145
+
146
+ state :import do
147
+ rule /\s+/m, Text
148
+ rule /[a-z0-9_.]+\*?/i, Name::Namespace, :pop!
149
+ end
150
+
151
+ end
152
+ end
153
+ end