rouge 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -13,6 +13,7 @@ module Rouge
13
13
  state :root do
14
14
  rule /[^<&]+/m, 'Text'
15
15
  rule /&\S*?;/, 'Name.Entity'
16
+ rule /<!DOCTYPE .*?>/i, 'Comment.Preproc'
16
17
  rule /<!\[CDATA\[.*?\]\]>/m, 'Comment.Preproc'
17
18
  rule /<!--/, 'Comment', :comment
18
19
  rule /<\?.*?\?>/m, 'Comment.Preproc' # php? really?
@@ -0,0 +1,173 @@
1
+ module Rouge
2
+ module Lexers
3
+ class Markdown < RegexLexer
4
+ tag 'markdown'
5
+ aliases 'md', 'mkd'
6
+ filenames '*.markdown', '*.md', '*.mkd'
7
+ mimetypes 'text/x-markdown'
8
+
9
+ def html
10
+ @html ||= HTML.new(options)
11
+ end
12
+
13
+ start { html.reset! }
14
+
15
+ edot = /\\.|[^\\\n]/
16
+
17
+ state :root do
18
+ # YAML frontmatter
19
+ rule(/\A(---\s*\n.*?\n?)^(---\s*$\n?)/m) { delegate YAML }
20
+
21
+ rule /\\./, 'Literal.String.Escape'
22
+
23
+ rule /^[\S ]+\n(?:---*)\n/, 'Generic.Heading'
24
+ rule /^[\S ]+\n(?:===*)\n/, 'Generic.Subheading'
25
+
26
+ rule /^#(?=[^#]).*?$/, 'Generic.Heading'
27
+ rule /^##*.*?$/, 'Generic.Subheading'
28
+
29
+ # TODO: syntax highlight the code block, github style
30
+ rule /(\n[ \t]*)(```|~~~)(.*?)(\n.*?)(\2)/m do |m|
31
+ sublexer, opts = m[3].strip.split('?', 2)
32
+
33
+ if sublexer
34
+ sublexer = Lexer.find(sublexer)
35
+
36
+ # parse the options hash from a cgi-style string
37
+ opts = CGI.parse(opts || '').map do |k, vals|
38
+ [ k.to_sym, vals.empty? ? true : vals[0] ]
39
+ end
40
+
41
+ opts = Hash[opts]
42
+
43
+ sublexer &&= sublexer.new(opts)
44
+ end
45
+
46
+ token 'Text', m[1]
47
+ token 'Punctuation', m[2]
48
+ token 'Name.Label', m[3]
49
+ if sublexer
50
+ delegate sublexer, m[4]
51
+ else
52
+ token 'Literal.String.Backtick', m[4]
53
+ end
54
+ token 'Punctuation', m[5]
55
+ end
56
+
57
+ rule /\n\n(( |\t).*?\n|\n)+/, 'Literal.String.Backtick'
58
+
59
+ rule /(`+)#{edot}*\1/, 'Literal.String.Backtick'
60
+
61
+ # various uses of * are in order of precedence
62
+
63
+ # line breaks
64
+ rule /^(\s*[*]){3,}\s*$/, 'Punctuation'
65
+ rule /^(\s*[-]){3,}\s*$/, 'Punctuation'
66
+
67
+ # bulleted lists
68
+ rule /^\s*[*+-](?=\s)/, 'Punctuation'
69
+
70
+ # numbered lists
71
+ rule /^\s*\d+\./, 'Punctuation'
72
+
73
+ # blockquotes
74
+ rule /^\s*>.*?$/, 'Generic.Traceback'
75
+
76
+ # link references
77
+ # [foo]: bar "baz"
78
+ rule %r(^
79
+ (\s*) # leading whitespace
80
+ (\[) (#{edot}+?) (\]) # the reference
81
+ (\s*) (:) # colon
82
+ )x do
83
+ group 'Text'
84
+ group 'Punctuation'; group 'Literal.String.Symbol'; group 'Punctuation'
85
+ group 'Text'; group 'Punctuation'
86
+
87
+ push :title
88
+ push :url
89
+ end
90
+
91
+ # links and images
92
+ rule /(!?\[)(#{edot}+?)(\])/ do
93
+ group 'Punctuation'
94
+ group 'Name.Variable'
95
+ group 'Punctuation'
96
+ push :link
97
+ end
98
+
99
+ rule /[*][*]#{edot}*?[*][*]/, 'Generic.Strong'
100
+ rule /__#{edot}*?__/, 'Generic.Strong'
101
+
102
+ rule /[*]#{edot}*?[*]/, 'Generic.Emph'
103
+ rule /_#{edot}*?_/, 'Generic.Emph'
104
+
105
+ # Automatic links
106
+ rule /<.*?@.+[.].+>/, 'Name.Variable'
107
+ rule %r[<(https?|mailto|ftp)://#{edot}*?>], 'Name.Variable'
108
+
109
+
110
+ rule /[^\\`\[*\n&<]+/, 'Text'
111
+
112
+ # inline html
113
+ rule(/&\S*;/) { delegate html }
114
+ rule(/<#{edot}*?>/) { delegate html }
115
+ rule /[&<]/, 'Text'
116
+
117
+ rule /\n/, 'Text'
118
+ end
119
+
120
+ state :link do
121
+ rule /(\[)(#{edot}*?)(\])/ do
122
+ group 'Punctuation'
123
+ group 'Literal.String.Symbol'
124
+ group 'Punctuation'
125
+ pop!
126
+ end
127
+
128
+ rule /[(]/ do
129
+ token 'Punctuation'
130
+ push :inline_title
131
+ push :inline_url
132
+ end
133
+
134
+ rule /[ \t]+/, 'Text'
135
+
136
+ rule(//) { pop! }
137
+ end
138
+
139
+ state :url do
140
+ rule /[ \t]+/, 'Text'
141
+
142
+ # the url
143
+ rule /(<)(#{edot}*?)(>)/ do
144
+ group 'Name.Tag'
145
+ group 'Literal.String.Other'
146
+ group 'Name.Tag'
147
+ pop!
148
+ end
149
+
150
+ rule /\S+/, 'Literal.String.Other', :pop!
151
+ end
152
+
153
+ state :title do
154
+ rule /"#{edot}*?"/, 'Name.Namespace'
155
+ rule /'#{edot}*?'/, 'Name.Namespace'
156
+ rule /[(]#{edot}*?[)]/, 'Name.Namespace'
157
+ rule /\s*(?=["'()])/, 'Text'
158
+ rule(//) { pop! }
159
+ end
160
+
161
+ state :inline_title do
162
+ rule /[)]/, 'Punctuation', :pop!
163
+ mixin :title
164
+ end
165
+
166
+ state :inline_url do
167
+ rule /[^<\s)]+/, 'Literal.String.Other', :pop!
168
+ rule /\s+/m, 'Text'
169
+ mixin :url
170
+ end
171
+ end
172
+ end
173
+ end
@@ -1,17 +1,21 @@
1
1
  module Rouge
2
2
  module Lexers
3
- class PHP < RegexLexer
3
+ class PHP < TemplateLexer
4
4
  tag 'php'
5
5
  aliases 'php', 'php3', 'php4', 'php5'
6
6
  filenames '*.php', '*.php[345]'
7
7
  mimetypes 'text/x-php'
8
8
 
9
+ default_options :parent => 'html'
10
+
9
11
  def initialize(opts={})
10
12
  # if truthy, the lexer starts highlighting with php code
11
13
  # (no <?php required)
12
14
  @start_inline = opts.delete(:start_inline)
13
15
  @funcnamehighlighting = opts.delete(:funcnamehighlighting) { true }
14
16
  @disabledmodules = opts.delete(:disabledmodules) { [] }
17
+
18
+ super(opts)
15
19
  end
16
20
 
17
21
  def builtins
@@ -48,9 +52,8 @@ module Rouge
48
52
  )
49
53
 
50
54
  state :root do
51
- rule /<\?(php)?/, 'Comment.Preproc', :php
52
- rule /.*?(?=<\?)/, 'Other'
53
- rule /</, 'Other'
55
+ rule /<\?(php|=)?/, 'Comment.Preproc', :php
56
+ rule(/.*?(?=<\?)|.*/m) { delegate parent }
54
57
  end
55
58
 
56
59
  state :php do
@@ -140,16 +143,11 @@ module Rouge
140
143
  mixin :php
141
144
  end
142
145
 
143
- def stream_tokens(source, &b)
144
- super(source) do |tok, val|
145
- if tok.name == 'Name.Other' and builtins.include? val
146
- yield [Token['Name.Builtin'], val]
147
- else
148
- yield [tok, val]
149
- end
150
- end
146
+ postprocess 'Name.Other' do |tok, val|
147
+ tok = 'Name.Builtin' if builtins.include? val
148
+
149
+ token tok, val
151
150
  end
152
151
  end
153
152
  end
154
153
  end
155
-
@@ -17,8 +17,8 @@ module Rouge
17
17
  rule %r(
18
18
  : # initial :
19
19
  @{0,2} # optional ivar, for :@foo and :@@foo
20
- [a-z_]\w*[!?] # the symbol
21
- )x, 'Literal.String.Symbol'
20
+ [a-z_]\w*[!?]? # the symbol
21
+ )xi, 'Literal.String.Symbol'
22
22
 
23
23
  # special symbols
24
24
  rule %r(:(?:\*\*|[-+]@|[/\%&\|^`~]|\[\]=?|<<|>>|<=?>|<=?|===?)),
@@ -0,0 +1,358 @@
1
+ module Rouge
2
+ module Lexers
3
+ class YAML < RegexLexer
4
+ tag 'yaml'
5
+ aliases 'yml'
6
+
7
+ filenames '*.yaml', '*.yml'
8
+ # NB: Tabs are forbidden in YAML, which is why you see things
9
+ # like /[ ]+/.
10
+
11
+ # reset the indentation levels
12
+ def reset_indent
13
+ debug { " yaml: reset_indent" }
14
+ @indent_stack = [0]
15
+ @next_indent = 0
16
+ @block_scalar_indent = nil
17
+ end
18
+
19
+ def indent
20
+ raise 'empty indent stack!' if @indent_stack.empty?
21
+ @indent_stack.last
22
+ end
23
+
24
+ def dedent?(level)
25
+ level < self.indent
26
+ end
27
+
28
+ def indent?(level)
29
+ level > self.indent
30
+ end
31
+
32
+ # Save a possible indentation level
33
+ def save_indent(opts={})
34
+ debug { " yaml: save_indent" }
35
+ match = @last_match[0]
36
+ @next_indent = match.size
37
+ debug { " yaml: indent: #{self.indent}/#@next_indent" }
38
+ debug { " yaml: popping indent stack - before: #@indent_stack" }
39
+ if dedent?(@next_indent)
40
+ @indent_stack.pop while dedent?(@next_indent)
41
+ debug { " yaml: popping indent stack - after: #@indent_stack" }
42
+ debug { " yaml: indent: #{self.indent}/#@next_indent" }
43
+
44
+ # dedenting to a state not previously indented to is an error
45
+ [match[0...self.indent], match[self.indent..-1]]
46
+ else
47
+ [match, '']
48
+ end
49
+ end
50
+
51
+ def continue_indent
52
+ debug { " yaml: continue_indent" }
53
+ @next_indent += @last_match[0].size
54
+ end
55
+
56
+ def set_indent(opts={})
57
+ if indent < @next_indent
58
+ @indent_stack << @next_indent
59
+ end
60
+
61
+ @next_indent += @last_match[0].size unless opts[:implicit]
62
+ end
63
+
64
+ plain_scalar_start = /[^ \t\n\r\f\v?:,\[\]{}#&*!\|>'"%@`]/
65
+
66
+ start { reset_indent }
67
+
68
+ state :basic do
69
+ rule /#.*$/, 'Comment.Single'
70
+ end
71
+
72
+ state :root do
73
+ mixin :basic
74
+
75
+ rule /\n+/, 'Text'
76
+
77
+ # trailing or pre-comment whitespace
78
+ rule /[ ]+(?=#|$)/, 'Text'
79
+
80
+ rule /^%YAML\b/ do
81
+ token 'Name.Tag'
82
+ reset_indent
83
+ push :yaml_directive
84
+ end
85
+
86
+ rule /^%TAG\b/ do
87
+ token 'Name.Tag'
88
+ reset_indent
89
+ push :tag_directive
90
+ end
91
+
92
+ # doc-start and doc-end indicators
93
+ rule /^(?:---|\.\.\.)(?= |$)/ do
94
+ token 'Name.Namespace'
95
+ reset_indent
96
+ push :block_line
97
+ end
98
+
99
+ # indentation spaces
100
+ rule /[ ]*(?!\s|$)/ do
101
+ text, err = save_indent
102
+ token 'Text', text
103
+ token 'Error', err
104
+ push :block_line; push :indentation
105
+ end
106
+ end
107
+
108
+ state :indentation do
109
+ rule(/\s*?\n/) { token 'Text'; pop! 2 }
110
+ # whitespace preceding block collection indicators
111
+ rule /[ ]+(?=[-:?](?:[ ]|$))/ do
112
+ token 'Text'
113
+ continue_indent
114
+ end
115
+
116
+ # block collection indicators
117
+ rule(/[?:-](?=[ ]|$)/) { token 'Punctuation.Indicator'; set_indent }
118
+
119
+ # the beginning of a block line
120
+ rule(/[ ]*/) { token 'Text'; continue_indent; pop! }
121
+ end
122
+
123
+ # indented line in the block context
124
+ state :block_line do
125
+ # line end
126
+ rule /[ ]*(?=#|$)/, 'Text', :pop!
127
+ rule /[ ]+/, 'Text'
128
+ # tags, anchors, and aliases
129
+ mixin :descriptors
130
+ # block collections and scalars
131
+ mixin :block_nodes
132
+ # flow collections and quoed scalars
133
+ mixin :flow_nodes
134
+
135
+ # a plain scalar
136
+ rule /(?=#{plain_scalar_start}|[?:-][^ \t\n\r\f\v])/ do
137
+ token 'Name.Variable'
138
+ push :plain_scalar_in_block_context
139
+ end
140
+ end
141
+
142
+ state :descriptors do
143
+ # a full-form tag
144
+ rule /!<[0-9A-Za-z;\/?:@&=+$,_.!~*'()\[\]%-]+>/, 'Keyword.Type'
145
+
146
+ # a tag in the form '!', '!suffix' or '!handle!suffix'
147
+ rule %r(
148
+ !(?:[\w-]+)? #handle
149
+ (?:![\w;/?:@&=+$,.!~*\'()\[\]%-]+)? #suffix
150
+ )x, 'Keyword.Type'
151
+
152
+ # an anchor
153
+ rule /&[\w-]+/, 'Name.Label'
154
+
155
+ # an alias
156
+ rule /\*[\w-]+/, 'Name.Variable'
157
+ end
158
+
159
+ state :block_nodes do
160
+ # implicit key
161
+ rule /:(?=\s|$)/ do
162
+ token 'Punctuation.Indicator'
163
+ set_indent :implicit => true
164
+ end
165
+
166
+ # literal and folded scalars
167
+ rule /[\|>]/ do
168
+ token 'Punctuation.Indicator'
169
+ push :block_scalar_content
170
+ push :block_scalar_header
171
+ end
172
+ end
173
+
174
+ state :flow_nodes do
175
+ rule /\[/, 'Punctuation.Indicator', :flow_sequence
176
+ rule /\{/, 'Punctuation.Indicator', :flow_mapping
177
+ rule /'/, 'Literal.String.Single', :single_quoted_scalar
178
+ rule /"/, 'Literal.String.Double', :double_quoted_scalar
179
+ end
180
+
181
+ state :flow_collection do
182
+ rule /\s+/m, 'Text'
183
+ mixin :basic
184
+ rule /[?:,]/, 'Punctuation.Indicator'
185
+ mixin :descriptors
186
+ mixin :flow_nodes
187
+
188
+ rule /(?=#{plain_scalar_start})/ do
189
+ push :plain_scalar_in_flow_context
190
+ end
191
+ end
192
+
193
+ state :flow_sequence do
194
+ rule /\]/, 'Punctuation.Indicator', :pop!
195
+ mixin :flow_collection
196
+ end
197
+
198
+ state :flow_mapping do
199
+ rule /\}/, 'Punctuation.Indicator', :pop!
200
+ mixin :flow_collection
201
+ end
202
+
203
+ state :block_scalar_content do
204
+ rule /\n+/, 'Text'
205
+
206
+ # empty lines never dedent, but they might be part of the scalar.
207
+ rule /^[ ]+$/ do |m|
208
+ text = m[0]
209
+ indent_size = text.size
210
+
211
+ indent_mark = @block_scalar_indent || indent_size
212
+
213
+ token 'Text', text[0...indent_mark]
214
+ token 'Name.Constant', text[indent_mark..-1]
215
+ end
216
+
217
+ # TODO: ^ doesn't actually seem to affect the match at all.
218
+ # Find a way to work around this limitation.
219
+ rule /^[ ]*/ do |m|
220
+ token 'Text'
221
+
222
+ indent_size = m[0].size
223
+
224
+ dedent_level = @block_scalar_indent || self.indent
225
+ @block_scalar_indent ||= indent_size
226
+
227
+ if indent_size < dedent_level
228
+ pop! 2
229
+ end
230
+ end
231
+
232
+ rule /[^\n\r\f\v]+/, 'Name.Constant'
233
+ end
234
+
235
+ state :block_scalar_header do
236
+ # optional indentation indicator and chomping flag, in either order
237
+ rule %r(
238
+ (
239
+ ([1-9])[+-]? | [+-]?([1-9])?
240
+ )(?=[ ]|$)
241
+ )x do |m|
242
+ @block_scalar_indent = nil
243
+ pop!; push :ignored_line
244
+ next if m[0].empty?
245
+
246
+ increment = m[1] || m[2]
247
+ if increment
248
+ @block_scalar_indent = indent + increment.to_i
249
+ end
250
+
251
+ token 'Punctuation.Indicator'
252
+ end
253
+ end
254
+
255
+ state :ignored_line do
256
+ mixin :basic
257
+ rule /[ ]+/, 'Text'
258
+ rule /\n/, 'Text', :pop!
259
+ end
260
+
261
+ state :quoted_scalar_whitespaces do
262
+ # leading and trailing whitespace is ignored
263
+ rule /^[ ]+/, 'Text'
264
+ rule /[ ]+$/, 'Text'
265
+
266
+ rule /\n+/m, 'Text'
267
+
268
+ rule /[ ]+/, 'Name.Variable'
269
+ end
270
+
271
+ state :single_quoted_scalar do
272
+ mixin :quoted_scalar_whitespaces
273
+ rule /\\'/, 'Literal.String.Escape'
274
+ rule /'/, 'Literal.String', :pop!
275
+ rule /[^\s']+/, 'Literal.String'
276
+ end
277
+
278
+ state :double_quoted_scalar do
279
+ rule /"/, 'Literal.String', :pop!
280
+ mixin :quoted_scalar_whitespaces
281
+ # escapes
282
+ rule /\\[0abt\tn\nvfre "\\N_LP]/, 'Literal.String.Escape'
283
+ rule /\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
284
+ 'Literal.String.Escape'
285
+ rule /[^ \t\n\r\f\v"\\]+/, 'Literal.String'
286
+ end
287
+
288
+ state :plain_scalar_in_block_context_new_line do
289
+ rule /^[ ]+\n/, 'Text'
290
+ rule /\n+/m, 'Text'
291
+ rule /^(?=---|\.\.\.)/ do
292
+ pop! 3
293
+ end
294
+
295
+ # dedent detection
296
+ rule /^[ ]*/ do |m|
297
+ token 'Text'
298
+ pop!
299
+
300
+ indent_size = m[0].size
301
+
302
+ # dedent = end of scalar
303
+ if m[0].size <= self.indent
304
+ pop!
305
+ val, err = save_indent
306
+ # push :block_line
307
+ push :indentation
308
+ end
309
+ end
310
+ end
311
+
312
+ state :plain_scalar_in_block_context do
313
+ # the : indicator ends a scalar
314
+ rule /[ ]*(?=:[ \n]|:$)/, 'Text', :pop!
315
+ rule /[ ]*:/, 'Literal.String'
316
+ rule /[ ]+(?=#)/, 'Text', :pop!
317
+ rule /[ ]+$/, 'Text'
318
+ # check for new documents or dedents at the new line
319
+ rule /\n+/ do
320
+ token 'Text'
321
+ push :plain_scalar_in_block_context_new_line
322
+ end
323
+
324
+ rule /[ ]+/, 'Literal.String'
325
+ # regular non-whitespace characters
326
+ rule /[^\s:]+/, 'Literal.String'
327
+ end
328
+
329
+ state :plain_scalar_in_flow_context do
330
+ rule /[ ]*(?=[,:?\[\]{}])/, 'Text', :pop!
331
+ rule /[ ]+(?=#)/, 'Text', :pop!
332
+ rule /^[ ]+/, 'Text'
333
+ rule /[ ]+$/, 'Text'
334
+ rule /\n+/, 'Text'
335
+ rule /[ ]+/, 'Name.Variable'
336
+ rule /[^\s,:?\[\]{}]+/, 'Name.Variable'
337
+ end
338
+
339
+ state :yaml_directive do
340
+ rule /([ ]+)(\d+\.\d+)/ do
341
+ group 'Text'; group 'Number'
342
+ pop!; push :ignored_line
343
+ end
344
+ end
345
+
346
+ state :tag_directive do
347
+ rule %r(
348
+ ([ ]+)(!|![\w-]*!) # prefix
349
+ ([ ]+)(!|!?[\w;/?:@&=+$,.!~*'()\[\]%-]+) # tag handle
350
+ )x do
351
+ group 'Text'; group 'Keyword.Type'
352
+ group 'Text'; group 'Keyword.Type'
353
+ pop!; push :ignored_line
354
+ end
355
+ end
356
+ end
357
+ end
358
+ end