gitlab-rouge 1.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +23 -0
- data/LICENSE +186 -0
- data/bin/rougify +16 -0
- data/gitlab-rouge.gemspec +17 -0
- data/lib/rouge.rb +57 -0
- data/lib/rouge/cli.rb +363 -0
- data/lib/rouge/demos/apache +21 -0
- data/lib/rouge/demos/applescript +2 -0
- data/lib/rouge/demos/c +8 -0
- data/lib/rouge/demos/clojure +5 -0
- data/lib/rouge/demos/coffeescript +5 -0
- data/lib/rouge/demos/common_lisp +1 -0
- data/lib/rouge/demos/conf +4 -0
- data/lib/rouge/demos/cpp +8 -0
- data/lib/rouge/demos/csharp +5 -0
- data/lib/rouge/demos/css +4 -0
- data/lib/rouge/demos/dart +6 -0
- data/lib/rouge/demos/diff +7 -0
- data/lib/rouge/demos/elixir +1 -0
- data/lib/rouge/demos/erb +1 -0
- data/lib/rouge/demos/erlang +7 -0
- data/lib/rouge/demos/factor +5 -0
- data/lib/rouge/demos/gherkin +17 -0
- data/lib/rouge/demos/glsl +14 -0
- data/lib/rouge/demos/go +7 -0
- data/lib/rouge/demos/groovy +9 -0
- data/lib/rouge/demos/haml +5 -0
- data/lib/rouge/demos/handlebars +7 -0
- data/lib/rouge/demos/haskell +6 -0
- data/lib/rouge/demos/html +8 -0
- data/lib/rouge/demos/http +14 -0
- data/lib/rouge/demos/ini +4 -0
- data/lib/rouge/demos/io +11 -0
- data/lib/rouge/demos/java +5 -0
- data/lib/rouge/demos/javascript +1 -0
- data/lib/rouge/demos/json +1 -0
- data/lib/rouge/demos/json-doc +1 -0
- data/lib/rouge/demos/liquid +11 -0
- data/lib/rouge/demos/literate_coffeescript +3 -0
- data/lib/rouge/demos/literate_haskell +7 -0
- data/lib/rouge/demos/llvm +20 -0
- data/lib/rouge/demos/lua +12 -0
- data/lib/rouge/demos/make +6 -0
- data/lib/rouge/demos/markdown +4 -0
- data/lib/rouge/demos/matlab +6 -0
- data/lib/rouge/demos/moonscript +16 -0
- data/lib/rouge/demos/nginx +5 -0
- data/lib/rouge/demos/nim +27 -0
- data/lib/rouge/demos/objective_c +14 -0
- data/lib/rouge/demos/ocaml +12 -0
- data/lib/rouge/demos/perl +5 -0
- data/lib/rouge/demos/php +3 -0
- data/lib/rouge/demos/plaintext +1 -0
- data/lib/rouge/demos/powershell +49 -0
- data/lib/rouge/demos/prolog +9 -0
- data/lib/rouge/demos/properties +7 -0
- data/lib/rouge/demos/puppet +6 -0
- data/lib/rouge/demos/python +6 -0
- data/lib/rouge/demos/qml +9 -0
- data/lib/rouge/demos/r +8 -0
- data/lib/rouge/demos/racket +24 -0
- data/lib/rouge/demos/ruby +9 -0
- data/lib/rouge/demos/rust +12 -0
- data/lib/rouge/demos/sass +3 -0
- data/lib/rouge/demos/scala +3 -0
- data/lib/rouge/demos/scheme +4 -0
- data/lib/rouge/demos/scss +5 -0
- data/lib/rouge/demos/sed +4 -0
- data/lib/rouge/demos/shell +2 -0
- data/lib/rouge/demos/slim +17 -0
- data/lib/rouge/demos/smalltalk +6 -0
- data/lib/rouge/demos/sml +4 -0
- data/lib/rouge/demos/sql +1 -0
- data/lib/rouge/demos/swift +5 -0
- data/lib/rouge/demos/tcl +1 -0
- data/lib/rouge/demos/tex +1 -0
- data/lib/rouge/demos/toml +9 -0
- data/lib/rouge/demos/tulip +14 -0
- data/lib/rouge/demos/vb +4 -0
- data/lib/rouge/demos/viml +5 -0
- data/lib/rouge/demos/xml +2 -0
- data/lib/rouge/demos/yaml +4 -0
- data/lib/rouge/formatter.rb +50 -0
- data/lib/rouge/formatters/html.rb +117 -0
- data/lib/rouge/formatters/null.rb +19 -0
- data/lib/rouge/formatters/terminal256.rb +176 -0
- data/lib/rouge/lexer.rb +443 -0
- data/lib/rouge/lexers/apache.rb +68 -0
- data/lib/rouge/lexers/apache/keywords.yml +453 -0
- data/lib/rouge/lexers/apple_script.rb +367 -0
- data/lib/rouge/lexers/c.rb +212 -0
- data/lib/rouge/lexers/clojure.rb +112 -0
- data/lib/rouge/lexers/coffeescript.rb +174 -0
- data/lib/rouge/lexers/common_lisp.rb +345 -0
- data/lib/rouge/lexers/conf.rb +24 -0
- data/lib/rouge/lexers/cpp.rb +66 -0
- data/lib/rouge/lexers/csharp.rb +88 -0
- data/lib/rouge/lexers/css.rb +271 -0
- data/lib/rouge/lexers/dart.rb +104 -0
- data/lib/rouge/lexers/diff.rb +31 -0
- data/lib/rouge/lexers/elixir.rb +108 -0
- data/lib/rouge/lexers/erb.rb +56 -0
- data/lib/rouge/lexers/erlang.rb +118 -0
- data/lib/rouge/lexers/factor.rb +302 -0
- data/lib/rouge/lexers/gherkin.rb +137 -0
- data/lib/rouge/lexers/gherkin/keywords.rb +14 -0
- data/lib/rouge/lexers/glsl.rb +135 -0
- data/lib/rouge/lexers/go.rb +178 -0
- data/lib/rouge/lexers/groovy.rb +104 -0
- data/lib/rouge/lexers/haml.rb +228 -0
- data/lib/rouge/lexers/handlebars.rb +79 -0
- data/lib/rouge/lexers/haskell.rb +183 -0
- data/lib/rouge/lexers/html.rb +94 -0
- data/lib/rouge/lexers/http.rb +80 -0
- data/lib/rouge/lexers/ini.rb +57 -0
- data/lib/rouge/lexers/io.rb +68 -0
- data/lib/rouge/lexers/java.rb +76 -0
- data/lib/rouge/lexers/javascript.rb +297 -0
- data/lib/rouge/lexers/liquid.rb +287 -0
- data/lib/rouge/lexers/literate_coffeescript.rb +33 -0
- data/lib/rouge/lexers/literate_haskell.rb +36 -0
- data/lib/rouge/lexers/llvm.rb +84 -0
- data/lib/rouge/lexers/lua.rb +122 -0
- data/lib/rouge/lexers/lua/builtins.rb +22 -0
- data/lib/rouge/lexers/make.rb +116 -0
- data/lib/rouge/lexers/markdown.rb +154 -0
- data/lib/rouge/lexers/matlab.rb +74 -0
- data/lib/rouge/lexers/matlab/builtins.rb +11 -0
- data/lib/rouge/lexers/moonscript.rb +110 -0
- data/lib/rouge/lexers/nginx.rb +71 -0
- data/lib/rouge/lexers/nim.rb +152 -0
- data/lib/rouge/lexers/objective_c.rb +197 -0
- data/lib/rouge/lexers/ocaml.rb +111 -0
- data/lib/rouge/lexers/perl.rb +197 -0
- data/lib/rouge/lexers/php.rb +173 -0
- data/lib/rouge/lexers/php/builtins.rb +204 -0
- data/lib/rouge/lexers/plain_text.rb +25 -0
- data/lib/rouge/lexers/powershell.rb +96 -0
- data/lib/rouge/lexers/prolog.rb +64 -0
- data/lib/rouge/lexers/properties.rb +55 -0
- data/lib/rouge/lexers/puppet.rb +128 -0
- data/lib/rouge/lexers/python.rb +228 -0
- data/lib/rouge/lexers/qml.rb +72 -0
- data/lib/rouge/lexers/r.rb +56 -0
- data/lib/rouge/lexers/racket.rb +542 -0
- data/lib/rouge/lexers/ruby.rb +415 -0
- data/lib/rouge/lexers/rust.rb +191 -0
- data/lib/rouge/lexers/sass.rb +74 -0
- data/lib/rouge/lexers/sass/common.rb +180 -0
- data/lib/rouge/lexers/scala.rb +142 -0
- data/lib/rouge/lexers/scheme.rb +112 -0
- data/lib/rouge/lexers/scss.rb +34 -0
- data/lib/rouge/lexers/sed.rb +170 -0
- data/lib/rouge/lexers/shell.rb +152 -0
- data/lib/rouge/lexers/slim.rb +228 -0
- data/lib/rouge/lexers/smalltalk.rb +116 -0
- data/lib/rouge/lexers/sml.rb +347 -0
- data/lib/rouge/lexers/sql.rb +140 -0
- data/lib/rouge/lexers/swift.rb +144 -0
- data/lib/rouge/lexers/tcl.rb +192 -0
- data/lib/rouge/lexers/tex.rb +72 -0
- data/lib/rouge/lexers/toml.rb +71 -0
- data/lib/rouge/lexers/tulip.rb +75 -0
- data/lib/rouge/lexers/vb.rb +164 -0
- data/lib/rouge/lexers/viml.rb +101 -0
- data/lib/rouge/lexers/viml/keywords.rb +12 -0
- data/lib/rouge/lexers/xml.rb +59 -0
- data/lib/rouge/lexers/yaml.rb +364 -0
- data/lib/rouge/plugins/redcarpet.rb +30 -0
- data/lib/rouge/regex_lexer.rb +439 -0
- data/lib/rouge/template_lexer.rb +22 -0
- data/lib/rouge/text_analyzer.rb +48 -0
- data/lib/rouge/theme.rb +195 -0
- data/lib/rouge/themes/base16.rb +130 -0
- data/lib/rouge/themes/colorful.rb +67 -0
- data/lib/rouge/themes/github.rb +71 -0
- data/lib/rouge/themes/molokai.rb +82 -0
- data/lib/rouge/themes/monokai.rb +92 -0
- data/lib/rouge/themes/monokai_sublime.rb +90 -0
- data/lib/rouge/themes/thankful_eyes.rb +71 -0
- data/lib/rouge/token.rb +182 -0
- data/lib/rouge/util.rb +101 -0
- data/lib/rouge/version.rb +7 -0
- metadata +231 -0
data/lib/rouge/lexer.rb
ADDED
@@ -0,0 +1,443 @@
|
|
1
|
+
# -*- coding: utf-8 -*- #
|
2
|
+
|
3
|
+
# stdlib
|
4
|
+
require 'strscan'
|
5
|
+
require 'cgi'
|
6
|
+
require 'set'
|
7
|
+
|
8
|
+
module Rouge
|
9
|
+
# @abstract
|
10
|
+
# A lexer transforms text into a stream of `[token, chunk]` pairs.
|
11
|
+
class Lexer
|
12
|
+
include Token::Tokens
|
13
|
+
|
14
|
+
class << self
|
15
|
+
# Lexes `stream` with the given options. The lex is delegated to a
|
16
|
+
# new instance.
|
17
|
+
#
|
18
|
+
# @see #lex
|
19
|
+
def lex(stream, opts={}, &b)
|
20
|
+
new(opts).lex(stream, &b)
|
21
|
+
end
|
22
|
+
|
23
|
+
def default_options(o={})
|
24
|
+
@default_options ||= {}
|
25
|
+
@default_options.merge!(o)
|
26
|
+
@default_options
|
27
|
+
end
|
28
|
+
|
29
|
+
# Given a string, return the correct lexer class.
|
30
|
+
def find(name)
|
31
|
+
registry[name.to_s]
|
32
|
+
end
|
33
|
+
|
34
|
+
# Find a lexer, with fancy shiny features.
|
35
|
+
#
|
36
|
+
# * The string you pass can include CGI-style options
|
37
|
+
#
|
38
|
+
# Lexer.find_fancy('erb?parent=tex')
|
39
|
+
#
|
40
|
+
# * You can pass the special name 'guess' so we guess for you,
|
41
|
+
# and you can pass a second argument of the code to guess by
|
42
|
+
#
|
43
|
+
# Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world")
|
44
|
+
#
|
45
|
+
# This is used in the Redcarpet plugin as well as Rouge's own
|
46
|
+
# markdown lexer for highlighting internal code blocks.
|
47
|
+
#
|
48
|
+
def find_fancy(str, code=nil)
|
49
|
+
name, opts = str ? str.split('?', 2) : [nil, '']
|
50
|
+
|
51
|
+
# parse the options hash from a cgi-style string
|
52
|
+
opts = CGI.parse(opts || '').map do |k, vals|
|
53
|
+
[ k.to_sym, vals.empty? ? true : vals[0] ]
|
54
|
+
end
|
55
|
+
|
56
|
+
opts = Hash[opts]
|
57
|
+
|
58
|
+
lexer_class = case name
|
59
|
+
when 'guess', nil
|
60
|
+
self.guess(:source => code, :mimetype => opts[:mimetype])
|
61
|
+
when String
|
62
|
+
self.find(name)
|
63
|
+
end
|
64
|
+
|
65
|
+
lexer_class && lexer_class.new(opts)
|
66
|
+
end
|
67
|
+
|
68
|
+
# Specify or get this lexer's title. Meant to be human-readable.
|
69
|
+
def title(t=nil)
|
70
|
+
if t.nil?
|
71
|
+
t = tag.capitalize
|
72
|
+
end
|
73
|
+
@title ||= t
|
74
|
+
end
|
75
|
+
|
76
|
+
# Specify or get this lexer's description.
|
77
|
+
def desc(arg=:absent)
|
78
|
+
if arg == :absent
|
79
|
+
@desc
|
80
|
+
else
|
81
|
+
@desc = arg
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Specify or get the path name containing a small demo for
|
86
|
+
# this lexer (can be overriden by {demo}).
|
87
|
+
def demo_file(arg=:absent)
|
88
|
+
return @demo_file = Pathname.new(arg) unless arg == :absent
|
89
|
+
|
90
|
+
@demo_file = Pathname.new(__FILE__).dirname.join('demos', tag)
|
91
|
+
end
|
92
|
+
|
93
|
+
# Specify or get a small demo string for this lexer
|
94
|
+
def demo(arg=:absent)
|
95
|
+
return @demo = arg unless arg == :absent
|
96
|
+
|
97
|
+
@demo = File.read(demo_file, encoding: 'utf-8')
|
98
|
+
end
|
99
|
+
|
100
|
+
# @return a list of all lexers.
|
101
|
+
def all
|
102
|
+
registry.values.uniq
|
103
|
+
end
|
104
|
+
|
105
|
+
# Guess which lexer to use based on a hash of info.
|
106
|
+
#
|
107
|
+
# This accepts the same arguments as Lexer.guess, but will never throw
|
108
|
+
# an error. It will return a (possibly empty) list of potential lexers
|
109
|
+
# to use.
|
110
|
+
def guesses(info={})
|
111
|
+
mimetype, filename, source = info.values_at(:mimetype, :filename, :source)
|
112
|
+
lexers = registry.values.uniq
|
113
|
+
total_size = lexers.size
|
114
|
+
|
115
|
+
lexers = filter_by_mimetype(lexers, mimetype) if mimetype
|
116
|
+
return lexers if lexers.size == 1
|
117
|
+
|
118
|
+
lexers = filter_by_filename(lexers, filename) if filename
|
119
|
+
return lexers if lexers.size == 1
|
120
|
+
|
121
|
+
if source
|
122
|
+
# If we're filtering against *all* lexers, we only use confident return
|
123
|
+
# values from analyze_text. But if we've filtered down already, we can trust
|
124
|
+
# the analysis more.
|
125
|
+
source_threshold = lexers.size < total_size ? 0 : 0.5
|
126
|
+
return [best_by_source(lexers, source, source_threshold)].compact
|
127
|
+
end
|
128
|
+
|
129
|
+
[]
|
130
|
+
end
|
131
|
+
|
132
|
+
class AmbiguousGuess < StandardError
|
133
|
+
attr_reader :alternatives
|
134
|
+
def initialize(alternatives); @alternatives = alternatives; end
|
135
|
+
|
136
|
+
def message
|
137
|
+
"Ambiguous guess: can't decide between #{alternatives.map(&:tag).inspect}"
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
# Guess which lexer to use based on a hash of info.
|
142
|
+
#
|
143
|
+
# @option info :mimetype
|
144
|
+
# A mimetype to guess by
|
145
|
+
# @option info :filename
|
146
|
+
# A filename to guess by
|
147
|
+
# @option info :source
|
148
|
+
# The source itself, which, if guessing by mimetype or filename
|
149
|
+
# fails, will be searched for shebangs, <!DOCTYPE ...> tags, and
|
150
|
+
# other hints.
|
151
|
+
#
|
152
|
+
# @see Lexer.analyze_text
|
153
|
+
# @see Lexer.guesses
|
154
|
+
def guess(info={})
|
155
|
+
lexers = guesses(info)
|
156
|
+
|
157
|
+
return Lexers::PlainText if lexers.empty?
|
158
|
+
return lexers[0] if lexers.size == 1
|
159
|
+
|
160
|
+
raise AmbiguousGuess.new(lexers)
|
161
|
+
end
|
162
|
+
|
163
|
+
def guess_by_mimetype(mt)
|
164
|
+
guess :mimetype => mt
|
165
|
+
end
|
166
|
+
|
167
|
+
def guess_by_filename(fname)
|
168
|
+
guess :filename => fname
|
169
|
+
end
|
170
|
+
|
171
|
+
def guess_by_source(source)
|
172
|
+
guess :source => source
|
173
|
+
end
|
174
|
+
|
175
|
+
private
|
176
|
+
def filter_by_mimetype(lexers, mt)
|
177
|
+
filtered = lexers.select { |lexer| lexer.mimetypes.include? mt }
|
178
|
+
filtered.any? ? filtered : lexers
|
179
|
+
end
|
180
|
+
|
181
|
+
# returns a list of lexers that match the given filename with
|
182
|
+
# equal specificity (i.e. number of wildcards in the pattern).
|
183
|
+
# This helps disambiguate between, e.g. the Nginx lexer, which
|
184
|
+
# matches `nginx.conf`, and the Conf lexer, which matches `*.conf`.
|
185
|
+
# In this case, nginx will win because the pattern has no wildcards,
|
186
|
+
# while `*.conf` has one.
|
187
|
+
def filter_by_filename(lexers, fname)
|
188
|
+
fname = File.basename(fname)
|
189
|
+
|
190
|
+
out = []
|
191
|
+
best_seen = nil
|
192
|
+
lexers.each do |lexer|
|
193
|
+
score = lexer.filenames.map do |pattern|
|
194
|
+
if File.fnmatch?(pattern, fname, File::FNM_DOTMATCH)
|
195
|
+
# specificity is better the fewer wildcards there are
|
196
|
+
pattern.scan(/[*?\[]/).size
|
197
|
+
end
|
198
|
+
end.compact.min
|
199
|
+
|
200
|
+
next unless score
|
201
|
+
|
202
|
+
if best_seen.nil? || score < best_seen
|
203
|
+
best_seen = score
|
204
|
+
out = [lexer]
|
205
|
+
elsif score == best_seen
|
206
|
+
out << lexer
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
out.any? ? out : lexers
|
211
|
+
end
|
212
|
+
|
213
|
+
def best_by_source(lexers, source, threshold=0)
|
214
|
+
source = case source
|
215
|
+
when String
|
216
|
+
source
|
217
|
+
when ->(s){ s.respond_to? :read }
|
218
|
+
source.read
|
219
|
+
else
|
220
|
+
raise 'invalid source'
|
221
|
+
end
|
222
|
+
|
223
|
+
assert_utf8!(source)
|
224
|
+
|
225
|
+
source = TextAnalyzer.new(source)
|
226
|
+
|
227
|
+
best_result = threshold
|
228
|
+
best_match = nil
|
229
|
+
lexers.each do |lexer|
|
230
|
+
result = lexer.analyze_text(source) || 0
|
231
|
+
return lexer if result == 1
|
232
|
+
|
233
|
+
if result > best_result
|
234
|
+
best_match = lexer
|
235
|
+
best_result = result
|
236
|
+
end
|
237
|
+
end
|
238
|
+
|
239
|
+
best_match
|
240
|
+
end
|
241
|
+
|
242
|
+
protected
|
243
|
+
# @private
|
244
|
+
def register(name, lexer)
|
245
|
+
registry[name.to_s] = lexer
|
246
|
+
end
|
247
|
+
|
248
|
+
public
|
249
|
+
# Used to specify or get the canonical name of this lexer class.
|
250
|
+
#
|
251
|
+
# @example
|
252
|
+
# class MyLexer < Lexer
|
253
|
+
# tag 'foo'
|
254
|
+
# end
|
255
|
+
#
|
256
|
+
# MyLexer.tag # => 'foo'
|
257
|
+
#
|
258
|
+
# Lexer.find('foo') # => MyLexer
|
259
|
+
def tag(t=nil)
|
260
|
+
return @tag if t.nil?
|
261
|
+
|
262
|
+
@tag = t.to_s
|
263
|
+
Lexer.register(@tag, self)
|
264
|
+
end
|
265
|
+
|
266
|
+
# Used to specify alternate names this lexer class may be found by.
|
267
|
+
#
|
268
|
+
# @example
|
269
|
+
# class Erb < Lexer
|
270
|
+
# tag 'erb'
|
271
|
+
# aliases 'eruby', 'rhtml'
|
272
|
+
# end
|
273
|
+
#
|
274
|
+
# Lexer.find('eruby') # => Erb
|
275
|
+
def aliases(*args)
|
276
|
+
args.map!(&:to_s)
|
277
|
+
args.each { |arg| Lexer.register(arg, self) }
|
278
|
+
(@aliases ||= []).concat(args)
|
279
|
+
end
|
280
|
+
|
281
|
+
# Specify a list of filename globs associated with this lexer.
|
282
|
+
#
|
283
|
+
# @example
|
284
|
+
# class Ruby < Lexer
|
285
|
+
# filenames '*.rb', '*.ruby', 'Gemfile', 'Rakefile'
|
286
|
+
# end
|
287
|
+
def filenames(*fnames)
|
288
|
+
(@filenames ||= []).concat(fnames)
|
289
|
+
end
|
290
|
+
|
291
|
+
# Specify a list of mimetypes associated with this lexer.
|
292
|
+
#
|
293
|
+
# @example
|
294
|
+
# class Html < Lexer
|
295
|
+
# mimetypes 'text/html', 'application/xhtml+xml'
|
296
|
+
# end
|
297
|
+
def mimetypes(*mts)
|
298
|
+
(@mimetypes ||= []).concat(mts)
|
299
|
+
end
|
300
|
+
|
301
|
+
# @private
|
302
|
+
def assert_utf8!(str)
|
303
|
+
return if %w(US-ASCII UTF-8 ASCII-8BIT).include? str.encoding.name
|
304
|
+
raise EncodingError.new(
|
305
|
+
"Bad encoding: #{str.encoding.names.join(',')}. " +
|
306
|
+
"Please convert your string to UTF-8."
|
307
|
+
)
|
308
|
+
end
|
309
|
+
|
310
|
+
private
|
311
|
+
def registry
|
312
|
+
@registry ||= {}
|
313
|
+
end
|
314
|
+
end
|
315
|
+
|
316
|
+
# -*- instance methods -*- #
|
317
|
+
|
318
|
+
# Create a new lexer with the given options. Individual lexers may
|
319
|
+
# specify extra options. The only current globally accepted option
|
320
|
+
# is `:debug`.
|
321
|
+
#
|
322
|
+
# @option opts :debug
|
323
|
+
# Prints debug information to stdout. The particular info depends
|
324
|
+
# on the lexer in question. In regex lexers, this will log the
|
325
|
+
# state stack at the beginning of each step, along with each regex
|
326
|
+
# tried and each stream consumed. Try it, it's pretty useful.
|
327
|
+
def initialize(opts={})
|
328
|
+
options(opts)
|
329
|
+
|
330
|
+
@debug = option(:debug)
|
331
|
+
end
|
332
|
+
|
333
|
+
# get and/or specify the options for this lexer.
|
334
|
+
def options(o={})
|
335
|
+
(@options ||= {}).merge!(o)
|
336
|
+
|
337
|
+
self.class.default_options.merge(@options)
|
338
|
+
end
|
339
|
+
|
340
|
+
# get or specify one option for this lexer
|
341
|
+
def option(k, v=:absent)
|
342
|
+
if v == :absent
|
343
|
+
options[k]
|
344
|
+
else
|
345
|
+
options({ k => v })
|
346
|
+
end
|
347
|
+
end
|
348
|
+
|
349
|
+
# @deprecated
|
350
|
+
# Instead of `debug { "foo" }`, simply `puts "foo" if @debug`.
|
351
|
+
#
|
352
|
+
# Leave a debug message if the `:debug` option is set. The message
|
353
|
+
# is given as a block because some debug messages contain calculated
|
354
|
+
# information that is unnecessary for lexing in the real world.
|
355
|
+
#
|
356
|
+
# Calls to this method should be guarded with "if @debug" for best
|
357
|
+
# performance when debugging is turned off.
|
358
|
+
#
|
359
|
+
# @example
|
360
|
+
# debug { "hello, world!" } if @debug
|
361
|
+
def debug
|
362
|
+
warn "Lexer#debug is deprecated. Simply puts if @debug instead."
|
363
|
+
puts yield if @debug
|
364
|
+
end
|
365
|
+
|
366
|
+
# @abstract
|
367
|
+
#
|
368
|
+
# Called after each lex is finished. The default implementation
|
369
|
+
# is a noop.
|
370
|
+
def reset!
|
371
|
+
end
|
372
|
+
|
373
|
+
# Given a string, yield [token, chunk] pairs. If no block is given,
|
374
|
+
# an enumerator is returned.
|
375
|
+
#
|
376
|
+
# @option opts :continue
|
377
|
+
# Continue the lex from the previous state (i.e. don't call #reset!)
|
378
|
+
def lex(string, opts={}, &b)
|
379
|
+
return enum_for(:lex, string, opts) unless block_given?
|
380
|
+
|
381
|
+
Lexer.assert_utf8!(string)
|
382
|
+
|
383
|
+
reset! unless opts[:continue]
|
384
|
+
|
385
|
+
# consolidate consecutive tokens of the same type
|
386
|
+
last_token = nil
|
387
|
+
last_val = nil
|
388
|
+
stream_tokens(string) do |tok, val|
|
389
|
+
next if val.empty?
|
390
|
+
|
391
|
+
if tok == last_token
|
392
|
+
last_val << val
|
393
|
+
next
|
394
|
+
end
|
395
|
+
|
396
|
+
b.call(last_token, last_val) if last_token
|
397
|
+
last_token = tok
|
398
|
+
last_val = val
|
399
|
+
end
|
400
|
+
|
401
|
+
b.call(last_token, last_val) if last_token
|
402
|
+
end
|
403
|
+
|
404
|
+
# delegated to {Lexer.tag}
|
405
|
+
def tag
|
406
|
+
self.class.tag
|
407
|
+
end
|
408
|
+
|
409
|
+
# @abstract
|
410
|
+
#
|
411
|
+
# Yield `[token, chunk]` pairs, given a prepared input stream. This
|
412
|
+
# must be implemented.
|
413
|
+
#
|
414
|
+
# @param [StringScanner] stream
|
415
|
+
# the stream
|
416
|
+
def stream_tokens(stream, &b)
|
417
|
+
raise 'abstract'
|
418
|
+
end
|
419
|
+
|
420
|
+
# @abstract
|
421
|
+
#
|
422
|
+
# Return a number between 0 and 1 indicating the likelihood that
|
423
|
+
# the text given should be lexed with this lexer. The default
|
424
|
+
# implementation returns 0. Values under 0.5 will only be used
|
425
|
+
# to disambiguate filename or mimetype matches.
|
426
|
+
#
|
427
|
+
# @param [TextAnalyzer] text
|
428
|
+
# the text to be analyzed, with a couple of handy methods on it,
|
429
|
+
# like {TextAnalyzer#shebang?} and {TextAnalyzer#doctype?}
|
430
|
+
def self.analyze_text(text)
|
431
|
+
0
|
432
|
+
end
|
433
|
+
end
|
434
|
+
|
435
|
+
module Lexers
|
436
|
+
def self.load_const(const_name, relpath)
|
437
|
+
return if const_defined?(const_name)
|
438
|
+
|
439
|
+
root = Pathname.new(__FILE__).dirname.join('lexers')
|
440
|
+
load root.join(relpath)
|
441
|
+
end
|
442
|
+
end
|
443
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'yaml'
|
2
|
+
|
3
|
+
module Rouge
|
4
|
+
module Lexers
|
5
|
+
class Apache < RegexLexer
|
6
|
+
title "Apache"
|
7
|
+
desc 'configuration files for Apache web server'
|
8
|
+
tag 'apache'
|
9
|
+
mimetypes 'text/x-httpd-conf', 'text/x-apache-conf'
|
10
|
+
filenames '.htaccess', 'httpd.conf'
|
11
|
+
|
12
|
+
class << self
|
13
|
+
attr_reader :keywords
|
14
|
+
end
|
15
|
+
# Load Apache keywords from separate YML file
|
16
|
+
@keywords = ::YAML.load(File.open(Pathname.new(__FILE__).dirname.join('apache/keywords.yml')))
|
17
|
+
|
18
|
+
def name_for_token(token)
|
19
|
+
if self.class.keywords[:sections].include? token
|
20
|
+
Name::Class
|
21
|
+
elsif self.class.keywords[:directives].include? token
|
22
|
+
Name::Label
|
23
|
+
elsif self.class.keywords[:values].include? token
|
24
|
+
Literal::String::Symbol
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
state :whitespace do
|
29
|
+
rule /\#.*?\n/, Comment
|
30
|
+
rule /[\s\n]+/m, Text
|
31
|
+
end
|
32
|
+
|
33
|
+
|
34
|
+
state :root do
|
35
|
+
mixin :whitespace
|
36
|
+
|
37
|
+
rule /(<\/?)(\w+)/ do |m|
|
38
|
+
groups Punctuation, name_for_token(m[2])
|
39
|
+
push :section
|
40
|
+
end
|
41
|
+
|
42
|
+
rule /\w+/ do |m|
|
43
|
+
token name_for_token(m[0])
|
44
|
+
push :directive
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
state :section do
|
49
|
+
mixin :whitespace
|
50
|
+
|
51
|
+
# Match section arguments
|
52
|
+
rule /([^>]+)?(>\n)/ do |m|
|
53
|
+
groups Literal::String::Regex, Punctuation
|
54
|
+
pop!
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
state :directive do
|
59
|
+
# Match value literals and other directive arguments
|
60
|
+
rule /(\w+)*(.*?(\n|$))/ do |m|
|
61
|
+
token name_for_token(m[1]), m[1]
|
62
|
+
token Text, m[2]
|
63
|
+
pop!
|
64
|
+
end
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|