gitlab-rouge 1.9.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +23 -0
- data/LICENSE +186 -0
- data/bin/rougify +16 -0
- data/gitlab-rouge.gemspec +17 -0
- data/lib/rouge.rb +57 -0
- data/lib/rouge/cli.rb +363 -0
- data/lib/rouge/demos/apache +21 -0
- data/lib/rouge/demos/applescript +2 -0
- data/lib/rouge/demos/c +8 -0
- data/lib/rouge/demos/clojure +5 -0
- data/lib/rouge/demos/coffeescript +5 -0
- data/lib/rouge/demos/common_lisp +1 -0
- data/lib/rouge/demos/conf +4 -0
- data/lib/rouge/demos/cpp +8 -0
- data/lib/rouge/demos/csharp +5 -0
- data/lib/rouge/demos/css +4 -0
- data/lib/rouge/demos/dart +6 -0
- data/lib/rouge/demos/diff +7 -0
- data/lib/rouge/demos/elixir +1 -0
- data/lib/rouge/demos/erb +1 -0
- data/lib/rouge/demos/erlang +7 -0
- data/lib/rouge/demos/factor +5 -0
- data/lib/rouge/demos/gherkin +17 -0
- data/lib/rouge/demos/glsl +14 -0
- data/lib/rouge/demos/go +7 -0
- data/lib/rouge/demos/groovy +9 -0
- data/lib/rouge/demos/haml +5 -0
- data/lib/rouge/demos/handlebars +7 -0
- data/lib/rouge/demos/haskell +6 -0
- data/lib/rouge/demos/html +8 -0
- data/lib/rouge/demos/http +14 -0
- data/lib/rouge/demos/ini +4 -0
- data/lib/rouge/demos/io +11 -0
- data/lib/rouge/demos/java +5 -0
- data/lib/rouge/demos/javascript +1 -0
- data/lib/rouge/demos/json +1 -0
- data/lib/rouge/demos/json-doc +1 -0
- data/lib/rouge/demos/liquid +11 -0
- data/lib/rouge/demos/literate_coffeescript +3 -0
- data/lib/rouge/demos/literate_haskell +7 -0
- data/lib/rouge/demos/llvm +20 -0
- data/lib/rouge/demos/lua +12 -0
- data/lib/rouge/demos/make +6 -0
- data/lib/rouge/demos/markdown +4 -0
- data/lib/rouge/demos/matlab +6 -0
- data/lib/rouge/demos/moonscript +16 -0
- data/lib/rouge/demos/nginx +5 -0
- data/lib/rouge/demos/nim +27 -0
- data/lib/rouge/demos/objective_c +14 -0
- data/lib/rouge/demos/ocaml +12 -0
- data/lib/rouge/demos/perl +5 -0
- data/lib/rouge/demos/php +3 -0
- data/lib/rouge/demos/plaintext +1 -0
- data/lib/rouge/demos/powershell +49 -0
- data/lib/rouge/demos/prolog +9 -0
- data/lib/rouge/demos/properties +7 -0
- data/lib/rouge/demos/puppet +6 -0
- data/lib/rouge/demos/python +6 -0
- data/lib/rouge/demos/qml +9 -0
- data/lib/rouge/demos/r +8 -0
- data/lib/rouge/demos/racket +24 -0
- data/lib/rouge/demos/ruby +9 -0
- data/lib/rouge/demos/rust +12 -0
- data/lib/rouge/demos/sass +3 -0
- data/lib/rouge/demos/scala +3 -0
- data/lib/rouge/demos/scheme +4 -0
- data/lib/rouge/demos/scss +5 -0
- data/lib/rouge/demos/sed +4 -0
- data/lib/rouge/demos/shell +2 -0
- data/lib/rouge/demos/slim +17 -0
- data/lib/rouge/demos/smalltalk +6 -0
- data/lib/rouge/demos/sml +4 -0
- data/lib/rouge/demos/sql +1 -0
- data/lib/rouge/demos/swift +5 -0
- data/lib/rouge/demos/tcl +1 -0
- data/lib/rouge/demos/tex +1 -0
- data/lib/rouge/demos/toml +9 -0
- data/lib/rouge/demos/tulip +14 -0
- data/lib/rouge/demos/vb +4 -0
- data/lib/rouge/demos/viml +5 -0
- data/lib/rouge/demos/xml +2 -0
- data/lib/rouge/demos/yaml +4 -0
- data/lib/rouge/formatter.rb +50 -0
- data/lib/rouge/formatters/html.rb +117 -0
- data/lib/rouge/formatters/null.rb +19 -0
- data/lib/rouge/formatters/terminal256.rb +176 -0
- data/lib/rouge/lexer.rb +443 -0
- data/lib/rouge/lexers/apache.rb +68 -0
- data/lib/rouge/lexers/apache/keywords.yml +453 -0
- data/lib/rouge/lexers/apple_script.rb +367 -0
- data/lib/rouge/lexers/c.rb +212 -0
- data/lib/rouge/lexers/clojure.rb +112 -0
- data/lib/rouge/lexers/coffeescript.rb +174 -0
- data/lib/rouge/lexers/common_lisp.rb +345 -0
- data/lib/rouge/lexers/conf.rb +24 -0
- data/lib/rouge/lexers/cpp.rb +66 -0
- data/lib/rouge/lexers/csharp.rb +88 -0
- data/lib/rouge/lexers/css.rb +271 -0
- data/lib/rouge/lexers/dart.rb +104 -0
- data/lib/rouge/lexers/diff.rb +31 -0
- data/lib/rouge/lexers/elixir.rb +108 -0
- data/lib/rouge/lexers/erb.rb +56 -0
- data/lib/rouge/lexers/erlang.rb +118 -0
- data/lib/rouge/lexers/factor.rb +302 -0
- data/lib/rouge/lexers/gherkin.rb +137 -0
- data/lib/rouge/lexers/gherkin/keywords.rb +14 -0
- data/lib/rouge/lexers/glsl.rb +135 -0
- data/lib/rouge/lexers/go.rb +178 -0
- data/lib/rouge/lexers/groovy.rb +104 -0
- data/lib/rouge/lexers/haml.rb +228 -0
- data/lib/rouge/lexers/handlebars.rb +79 -0
- data/lib/rouge/lexers/haskell.rb +183 -0
- data/lib/rouge/lexers/html.rb +94 -0
- data/lib/rouge/lexers/http.rb +80 -0
- data/lib/rouge/lexers/ini.rb +57 -0
- data/lib/rouge/lexers/io.rb +68 -0
- data/lib/rouge/lexers/java.rb +76 -0
- data/lib/rouge/lexers/javascript.rb +297 -0
- data/lib/rouge/lexers/liquid.rb +287 -0
- data/lib/rouge/lexers/literate_coffeescript.rb +33 -0
- data/lib/rouge/lexers/literate_haskell.rb +36 -0
- data/lib/rouge/lexers/llvm.rb +84 -0
- data/lib/rouge/lexers/lua.rb +122 -0
- data/lib/rouge/lexers/lua/builtins.rb +22 -0
- data/lib/rouge/lexers/make.rb +116 -0
- data/lib/rouge/lexers/markdown.rb +154 -0
- data/lib/rouge/lexers/matlab.rb +74 -0
- data/lib/rouge/lexers/matlab/builtins.rb +11 -0
- data/lib/rouge/lexers/moonscript.rb +110 -0
- data/lib/rouge/lexers/nginx.rb +71 -0
- data/lib/rouge/lexers/nim.rb +152 -0
- data/lib/rouge/lexers/objective_c.rb +197 -0
- data/lib/rouge/lexers/ocaml.rb +111 -0
- data/lib/rouge/lexers/perl.rb +197 -0
- data/lib/rouge/lexers/php.rb +173 -0
- data/lib/rouge/lexers/php/builtins.rb +204 -0
- data/lib/rouge/lexers/plain_text.rb +25 -0
- data/lib/rouge/lexers/powershell.rb +96 -0
- data/lib/rouge/lexers/prolog.rb +64 -0
- data/lib/rouge/lexers/properties.rb +55 -0
- data/lib/rouge/lexers/puppet.rb +128 -0
- data/lib/rouge/lexers/python.rb +228 -0
- data/lib/rouge/lexers/qml.rb +72 -0
- data/lib/rouge/lexers/r.rb +56 -0
- data/lib/rouge/lexers/racket.rb +542 -0
- data/lib/rouge/lexers/ruby.rb +415 -0
- data/lib/rouge/lexers/rust.rb +191 -0
- data/lib/rouge/lexers/sass.rb +74 -0
- data/lib/rouge/lexers/sass/common.rb +180 -0
- data/lib/rouge/lexers/scala.rb +142 -0
- data/lib/rouge/lexers/scheme.rb +112 -0
- data/lib/rouge/lexers/scss.rb +34 -0
- data/lib/rouge/lexers/sed.rb +170 -0
- data/lib/rouge/lexers/shell.rb +152 -0
- data/lib/rouge/lexers/slim.rb +228 -0
- data/lib/rouge/lexers/smalltalk.rb +116 -0
- data/lib/rouge/lexers/sml.rb +347 -0
- data/lib/rouge/lexers/sql.rb +140 -0
- data/lib/rouge/lexers/swift.rb +144 -0
- data/lib/rouge/lexers/tcl.rb +192 -0
- data/lib/rouge/lexers/tex.rb +72 -0
- data/lib/rouge/lexers/toml.rb +71 -0
- data/lib/rouge/lexers/tulip.rb +75 -0
- data/lib/rouge/lexers/vb.rb +164 -0
- data/lib/rouge/lexers/viml.rb +101 -0
- data/lib/rouge/lexers/viml/keywords.rb +12 -0
- data/lib/rouge/lexers/xml.rb +59 -0
- data/lib/rouge/lexers/yaml.rb +364 -0
- data/lib/rouge/plugins/redcarpet.rb +30 -0
- data/lib/rouge/regex_lexer.rb +439 -0
- data/lib/rouge/template_lexer.rb +22 -0
- data/lib/rouge/text_analyzer.rb +48 -0
- data/lib/rouge/theme.rb +195 -0
- data/lib/rouge/themes/base16.rb +130 -0
- data/lib/rouge/themes/colorful.rb +67 -0
- data/lib/rouge/themes/github.rb +71 -0
- data/lib/rouge/themes/molokai.rb +82 -0
- data/lib/rouge/themes/monokai.rb +92 -0
- data/lib/rouge/themes/monokai_sublime.rb +90 -0
- data/lib/rouge/themes/thankful_eyes.rb +71 -0
- data/lib/rouge/token.rb +182 -0
- data/lib/rouge/util.rb +101 -0
- data/lib/rouge/version.rb +7 -0
- metadata +231 -0
data/lib/rouge/lexer.rb
ADDED
@@ -0,0 +1,443 @@
|
|
1
|
+
# -*- coding: utf-8 -*- #
|
2
|
+
|
3
|
+
# stdlib
|
4
|
+
require 'strscan'
|
5
|
+
require 'cgi'
|
6
|
+
require 'set'
|
7
|
+
|
8
|
+
module Rouge
|
9
|
+
# @abstract
|
10
|
+
# A lexer transforms text into a stream of `[token, chunk]` pairs.
|
11
|
+
class Lexer
|
12
|
+
include Token::Tokens
|
13
|
+
|
14
|
+
class << self
|
15
|
+
# Lexes `stream` with the given options. The lex is delegated to a
|
16
|
+
# new instance.
|
17
|
+
#
|
18
|
+
# @see #lex
|
19
|
+
def lex(stream, opts={}, &b)
|
20
|
+
new(opts).lex(stream, &b)
|
21
|
+
end
|
22
|
+
|
23
|
+
def default_options(o={})
|
24
|
+
@default_options ||= {}
|
25
|
+
@default_options.merge!(o)
|
26
|
+
@default_options
|
27
|
+
end
|
28
|
+
|
29
|
+
# Given a string, return the correct lexer class.
|
30
|
+
def find(name)
|
31
|
+
registry[name.to_s]
|
32
|
+
end
|
33
|
+
|
34
|
+
# Find a lexer, with fancy shiny features.
|
35
|
+
#
|
36
|
+
# * The string you pass can include CGI-style options
|
37
|
+
#
|
38
|
+
# Lexer.find_fancy('erb?parent=tex')
|
39
|
+
#
|
40
|
+
# * You can pass the special name 'guess' so we guess for you,
|
41
|
+
# and you can pass a second argument of the code to guess by
|
42
|
+
#
|
43
|
+
# Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world")
|
44
|
+
#
|
45
|
+
# This is used in the Redcarpet plugin as well as Rouge's own
|
46
|
+
# markdown lexer for highlighting internal code blocks.
|
47
|
+
#
|
48
|
+
def find_fancy(str, code=nil)
|
49
|
+
name, opts = str ? str.split('?', 2) : [nil, '']
|
50
|
+
|
51
|
+
# parse the options hash from a cgi-style string
|
52
|
+
opts = CGI.parse(opts || '').map do |k, vals|
|
53
|
+
[ k.to_sym, vals.empty? ? true : vals[0] ]
|
54
|
+
end
|
55
|
+
|
56
|
+
opts = Hash[opts]
|
57
|
+
|
58
|
+
lexer_class = case name
|
59
|
+
when 'guess', nil
|
60
|
+
self.guess(:source => code, :mimetype => opts[:mimetype])
|
61
|
+
when String
|
62
|
+
self.find(name)
|
63
|
+
end
|
64
|
+
|
65
|
+
lexer_class && lexer_class.new(opts)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Specify or get this lexer's title. Meant to be human-readable.
|
69
|
+
def title(t=nil)
|
70
|
+
if t.nil?
|
71
|
+
t = tag.capitalize
|
72
|
+
end
|
73
|
+
@title ||= t
|
74
|
+
end
|
75
|
+
|
76
|
+
# Specify or get this lexer's description.
|
77
|
+
def desc(arg=:absent)
|
78
|
+
if arg == :absent
|
79
|
+
@desc
|
80
|
+
else
|
81
|
+
@desc = arg
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Specify or get the path name containing a small demo for
|
86
|
+
# this lexer (can be overriden by {demo}).
|
87
|
+
def demo_file(arg=:absent)
|
88
|
+
return @demo_file = Pathname.new(arg) unless arg == :absent
|
89
|
+
|
90
|
+
@demo_file = Pathname.new(__FILE__).dirname.join('demos', tag)
|
91
|
+
end
|
92
|
+
|
93
|
+
# Specify or get a small demo string for this lexer
|
94
|
+
def demo(arg=:absent)
|
95
|
+
return @demo = arg unless arg == :absent
|
96
|
+
|
97
|
+
@demo = File.read(demo_file, encoding: 'utf-8')
|
98
|
+
end
|
99
|
+
|
100
|
+
# @return a list of all lexers.
|
101
|
+
def all
|
102
|
+
registry.values.uniq
|
103
|
+
end
|
104
|
+
|
105
|
+
# Guess which lexer to use based on a hash of info.
|
106
|
+
#
|
107
|
+
# This accepts the same arguments as Lexer.guess, but will never throw
|
108
|
+
# an error. It will return a (possibly empty) list of potential lexers
|
109
|
+
# to use.
|
110
|
+
def guesses(info={})
|
111
|
+
mimetype, filename, source = info.values_at(:mimetype, :filename, :source)
|
112
|
+
lexers = registry.values.uniq
|
113
|
+
total_size = lexers.size
|
114
|
+
|
115
|
+
lexers = filter_by_mimetype(lexers, mimetype) if mimetype
|
116
|
+
return lexers if lexers.size == 1
|
117
|
+
|
118
|
+
lexers = filter_by_filename(lexers, filename) if filename
|
119
|
+
return lexers if lexers.size == 1
|
120
|
+
|
121
|
+
if source
|
122
|
+
# If we're filtering against *all* lexers, we only use confident return
|
123
|
+
# values from analyze_text. But if we've filtered down already, we can trust
|
124
|
+
# the analysis more.
|
125
|
+
source_threshold = lexers.size < total_size ? 0 : 0.5
|
126
|
+
return [best_by_source(lexers, source, source_threshold)].compact
|
127
|
+
end
|
128
|
+
|
129
|
+
[]
|
130
|
+
end
|
131
|
+
|
132
|
+
class AmbiguousGuess < StandardError
|
133
|
+
attr_reader :alternatives
|
134
|
+
def initialize(alternatives); @alternatives = alternatives; end
|
135
|
+
|
136
|
+
def message
|
137
|
+
"Ambiguous guess: can't decide between #{alternatives.map(&:tag).inspect}"
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Guess which lexer to use based on a hash of info.
|
142
|
+
#
|
143
|
+
# @option info :mimetype
|
144
|
+
# A mimetype to guess by
|
145
|
+
# @option info :filename
|
146
|
+
# A filename to guess by
|
147
|
+
# @option info :source
|
148
|
+
# The source itself, which, if guessing by mimetype or filename
|
149
|
+
# fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
|
150
|
+
# other hints.
|
151
|
+
#
|
152
|
+
# @see Lexer.analyze_text
|
153
|
+
# @see Lexer.guesses
|
154
|
+
def guess(info={})
|
155
|
+
lexers = guesses(info)
|
156
|
+
|
157
|
+
return Lexers::PlainText if lexers.empty?
|
158
|
+
return lexers[0] if lexers.size == 1
|
159
|
+
|
160
|
+
raise AmbiguousGuess.new(lexers)
|
161
|
+
end
|
162
|
+
|
163
|
+
def guess_by_mimetype(mt)
|
164
|
+
guess :mimetype => mt
|
165
|
+
end
|
166
|
+
|
167
|
+
def guess_by_filename(fname)
|
168
|
+
guess :filename => fname
|
169
|
+
end
|
170
|
+
|
171
|
+
def guess_by_source(source)
|
172
|
+
guess :source => source
|
173
|
+
end
|
174
|
+
|
175
|
+
private
|
176
|
+
def filter_by_mimetype(lexers, mt)
|
177
|
+
filtered = lexers.select { |lexer| lexer.mimetypes.include? mt }
|
178
|
+
filtered.any? ? filtered : lexers
|
179
|
+
end
|
180
|
+
|
181
|
+
# returns a list of lexers that match the given filename with
|
182
|
+
# equal specificity (i.e. number of wildcards in the pattern).
|
183
|
+
# This helps disambiguate between, e.g. the Nginx lexer, which
|
184
|
+
# matches `nginx.conf`, and the Conf lexer, which matches `*.conf`.
|
185
|
+
# In this case, nginx will win because the pattern has no wildcards,
|
186
|
+
# while `*.conf` has one.
|
187
|
+
def filter_by_filename(lexers, fname)
|
188
|
+
fname = File.basename(fname)
|
189
|
+
|
190
|
+
out = []
|
191
|
+
best_seen = nil
|
192
|
+
lexers.each do |lexer|
|
193
|
+
score = lexer.filenames.map do |pattern|
|
194
|
+
if File.fnmatch?(pattern, fname, File::FNM_DOTMATCH)
|
195
|
+
# specificity is better the fewer wildcards there are
|
196
|
+
pattern.scan(/[*?\[]/).size
|
197
|
+
end
|
198
|
+
end.compact.min
|
199
|
+
|
200
|
+
next unless score
|
201
|
+
|
202
|
+
if best_seen.nil? || score < best_seen
|
203
|
+
best_seen = score
|
204
|
+
out = [lexer]
|
205
|
+
elsif score == best_seen
|
206
|
+
out << lexer
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
out.any? ? out : lexers
|
211
|
+
end
|
212
|
+
|
213
|
+
def best_by_source(lexers, source, threshold=0)
|
214
|
+
source = case source
|
215
|
+
when String
|
216
|
+
source
|
217
|
+
when ->(s){ s.respond_to? :read }
|
218
|
+
source.read
|
219
|
+
else
|
220
|
+
raise 'invalid source'
|
221
|
+
end
|
222
|
+
|
223
|
+
assert_utf8!(source)
|
224
|
+
|
225
|
+
source = TextAnalyzer.new(source)
|
226
|
+
|
227
|
+
best_result = threshold
|
228
|
+
best_match = nil
|
229
|
+
lexers.each do |lexer|
|
230
|
+
result = lexer.analyze_text(source) || 0
|
231
|
+
return lexer if result == 1
|
232
|
+
|
233
|
+
if result > best_result
|
234
|
+
best_match = lexer
|
235
|
+
best_result = result
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
best_match
|
240
|
+
end
|
241
|
+
|
242
|
+
protected
|
243
|
+
# @private
|
244
|
+
def register(name, lexer)
|
245
|
+
registry[name.to_s] = lexer
|
246
|
+
end
|
247
|
+
|
248
|
+
public
|
249
|
+
# Used to specify or get the canonical name of this lexer class.
|
250
|
+
#
|
251
|
+
# @example
|
252
|
+
# class MyLexer < Lexer
|
253
|
+
# tag 'foo'
|
254
|
+
# end
|
255
|
+
#
|
256
|
+
# MyLexer.tag # => 'foo'
|
257
|
+
#
|
258
|
+
# Lexer.find('foo') # => MyLexer
|
259
|
+
def tag(t=nil)
|
260
|
+
return @tag if t.nil?
|
261
|
+
|
262
|
+
@tag = t.to_s
|
263
|
+
Lexer.register(@tag, self)
|
264
|
+
end
|
265
|
+
|
266
|
+
# Used to specify alternate names this lexer class may be found by.
|
267
|
+
#
|
268
|
+
# @example
|
269
|
+
# class Erb < Lexer
|
270
|
+
# tag 'erb'
|
271
|
+
# aliases 'eruby', 'rhtml'
|
272
|
+
# end
|
273
|
+
#
|
274
|
+
# Lexer.find('eruby') # => Erb
|
275
|
+
def aliases(*args)
|
276
|
+
args.map!(&:to_s)
|
277
|
+
args.each { |arg| Lexer.register(arg, self) }
|
278
|
+
(@aliases ||= []).concat(args)
|
279
|
+
end
|
280
|
+
|
281
|
+
# Specify a list of filename globs associated with this lexer.
|
282
|
+
#
|
283
|
+
# @example
|
284
|
+
# class Ruby < Lexer
|
285
|
+
# filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile'
|
286
|
+
# end
|
287
|
+
def filenames(*fnames)
|
288
|
+
(@filenames ||= []).concat(fnames)
|
289
|
+
end
|
290
|
+
|
291
|
+
# Specify a list of mimetypes associated with this lexer.
|
292
|
+
#
|
293
|
+
# @example
|
294
|
+
# class Html < Lexer
|
295
|
+
# mimetypes 'text/html', 'application/xhtml+xml'
|
296
|
+
# end
|
297
|
+
def mimetypes(*mts)
|
298
|
+
(@mimetypes ||= []).concat(mts)
|
299
|
+
end
|
300
|
+
|
301
|
+
# @private
|
302
|
+
def assert_utf8!(str)
|
303
|
+
return if %w(US-ASCII UTF-8 ASCII-8BIT).include? str.encoding.name
|
304
|
+
raise EncodingError.new(
|
305
|
+
"Bad encoding: #{str.encoding.names.join(',')}. " +
|
306
|
+
"Please convert your string to UTF-8."
|
307
|
+
)
|
308
|
+
end
|
309
|
+
|
310
|
+
private
|
311
|
+
def registry
|
312
|
+
@registry ||= {}
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
# -*- instance methods -*- #
|
317
|
+
|
318
|
+
# Create a new lexer with the given options. Individual lexers may
|
319
|
+
# specify extra options. The only current globally accepted option
|
320
|
+
# is `:debug`.
|
321
|
+
#
|
322
|
+
# @option opts :debug
|
323
|
+
# Prints debug information to stdout. The particular info depends
|
324
|
+
# on the lexer in question. In regex lexers, this will log the
|
325
|
+
# state stack at the beginning of each step, along with each regex
|
326
|
+
# tried and each stream consumed. Try it, it's pretty useful.
|
327
|
+
def initialize(opts={})
|
328
|
+
options(opts)
|
329
|
+
|
330
|
+
@debug = option(:debug)
|
331
|
+
end
|
332
|
+
|
333
|
+
# get and/or specify the options for this lexer.
|
334
|
+
def options(o={})
|
335
|
+
(@options ||= {}).merge!(o)
|
336
|
+
|
337
|
+
self.class.default_options.merge(@options)
|
338
|
+
end
|
339
|
+
|
340
|
+
# get or specify one option for this lexer
|
341
|
+
def option(k, v=:absent)
|
342
|
+
if v == :absent
|
343
|
+
options[k]
|
344
|
+
else
|
345
|
+
options({ k => v })
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
# @deprecated
|
350
|
+
# Instead of `debug { "foo" }`, simply `puts "foo" if @debug`.
|
351
|
+
#
|
352
|
+
# Leave a debug message if the `:debug` option is set. The message
|
353
|
+
# is given as a block because some debug messages contain calculated
|
354
|
+
# information that is unnecessary for lexing in the real world.
|
355
|
+
#
|
356
|
+
# Calls to this method should be guarded with "if @debug" for best
|
357
|
+
# performance when debugging is turned off.
|
358
|
+
#
|
359
|
+
# @example
|
360
|
+
# debug { "hello, world!" } if @debug
|
361
|
+
def debug
|
362
|
+
warn "Lexer#debug is deprecated. Simply puts if @debug instead."
|
363
|
+
puts yield if @debug
|
364
|
+
end
|
365
|
+
|
366
|
+
# @abstract
|
367
|
+
#
|
368
|
+
# Called after each lex is finished. The default implementation
|
369
|
+
# is a noop.
|
370
|
+
def reset!
|
371
|
+
end
|
372
|
+
|
373
|
+
# Given a string, yield [token, chunk] pairs. If no block is given,
|
374
|
+
# an enumerator is returned.
|
375
|
+
#
|
376
|
+
# @option opts :continue
|
377
|
+
# Continue the lex from the previous state (i.e. don't call #reset!)
|
378
|
+
def lex(string, opts={}, &b)
|
379
|
+
return enum_for(:lex, string, opts) unless block_given?
|
380
|
+
|
381
|
+
Lexer.assert_utf8!(string)
|
382
|
+
|
383
|
+
reset! unless opts[:continue]
|
384
|
+
|
385
|
+
# consolidate consecutive tokens of the same type
|
386
|
+
last_token = nil
|
387
|
+
last_val = nil
|
388
|
+
stream_tokens(string) do |tok, val|
|
389
|
+
next if val.empty?
|
390
|
+
|
391
|
+
if tok == last_token
|
392
|
+
last_val << val
|
393
|
+
next
|
394
|
+
end
|
395
|
+
|
396
|
+
b.call(last_token, last_val) if last_token
|
397
|
+
last_token = tok
|
398
|
+
last_val = val
|
399
|
+
end
|
400
|
+
|
401
|
+
b.call(last_token, last_val) if last_token
|
402
|
+
end
|
403
|
+
|
404
|
+
# delegated to {Lexer.tag}
|
405
|
+
def tag
|
406
|
+
self.class.tag
|
407
|
+
end
|
408
|
+
|
409
|
+
# @abstract
|
410
|
+
#
|
411
|
+
# Yield `[token, chunk]` pairs, given a prepared input stream. This
|
412
|
+
# must be implemented.
|
413
|
+
#
|
414
|
+
# @param [StringScanner] stream
|
415
|
+
# the stream
|
416
|
+
def stream_tokens(stream, &b)
|
417
|
+
raise 'abstract'
|
418
|
+
end
|
419
|
+
|
420
|
+
# @abstract
|
421
|
+
#
|
422
|
+
# Return a number between 0 and 1 indicating the likelihood that
|
423
|
+
# the text given should be lexed with this lexer. The default
|
424
|
+
# implementation returns 0. Values under 0.5 will only be used
|
425
|
+
# to disambiguate filename or mimetype matches.
|
426
|
+
#
|
427
|
+
# @param [TextAnalyzer] text
|
428
|
+
# the text to be analyzed, with a couple of handy methods on it,
|
429
|
+
# like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
|
430
|
+
def self.analyze_text(text)
|
431
|
+
0
|
432
|
+
end
|
433
|
+
end
|
434
|
+
|
435
|
+
module Lexers
|
436
|
+
def self.load_const(const_name, relpath)
|
437
|
+
return if const_defined?(const_name)
|
438
|
+
|
439
|
+
root = Pathname.new(__FILE__).dirname.join('lexers')
|
440
|
+
load root.join(relpath)
|
441
|
+
end
|
442
|
+
end
|
443
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Rouge
|
4
|
+
module Lexers
|
5
|
+
class Apache < RegexLexer
|
6
|
+
title "Apache"
|
7
|
+
desc 'configuration files for Apache web server'
|
8
|
+
tag 'apache'
|
9
|
+
mimetypes 'text/x-httpd-conf', 'text/x-apache-conf'
|
10
|
+
filenames '.htaccess', 'httpd.conf'
|
11
|
+
|
12
|
+
class << self
|
13
|
+
attr_reader :keywords
|
14
|
+
end
|
15
|
+
# Load Apache keywords from separate YML file
|
16
|
+
@keywords = ::YAML.load(File.open(Pathname.new(__FILE__).dirname.join('apache/keywords.yml')))
|
17
|
+
|
18
|
+
def name_for_token(token)
|
19
|
+
if self.class.keywords[:sections].include? token
|
20
|
+
Name::Class
|
21
|
+
elsif self.class.keywords[:directives].include? token
|
22
|
+
Name::Label
|
23
|
+
elsif self.class.keywords[:values].include? token
|
24
|
+
Literal::String::Symbol
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
state :whitespace do
|
29
|
+
rule /\#.*?\n/, Comment
|
30
|
+
rule /[\s\n]+/m, Text
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
state :root do
|
35
|
+
mixin :whitespace
|
36
|
+
|
37
|
+
rule /(<\/?)(\w+)/ do |m|
|
38
|
+
groups Punctuation, name_for_token(m[2])
|
39
|
+
push :section
|
40
|
+
end
|
41
|
+
|
42
|
+
rule /\w+/ do |m|
|
43
|
+
token name_for_token(m[0])
|
44
|
+
push :directive
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
state :section do
|
49
|
+
mixin :whitespace
|
50
|
+
|
51
|
+
# Match section arguments
|
52
|
+
rule /([^>]+)?(>\n)/ do |m|
|
53
|
+
groups Literal::String::Regex, Punctuation
|
54
|
+
pop!
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
state :directive do
|
59
|
+
# Match value literals and other directive arguments
|
60
|
+
rule /(\w+)*(.*?(\n|$))/ do |m|
|
61
|
+
token name_for_token(m[1]), m[1]
|
62
|
+
token Text, m[2]
|
63
|
+
pop!
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|