rouge 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,7 @@ module Rouge
13
13
  state :root do
14
14
  rule /[^<&]+/m, 'Text'
15
15
  rule /&\S*?;/, 'Name.Entity'
16
+ rule /<!DOCTYPE .*?>/i, 'Comment.Preproc'
16
17
  rule /<!\[CDATA\[.*?\]\]>/m, 'Comment.Preproc'
17
18
  rule /<!--/, 'Comment', :comment
18
19
  rule /<\?.*?\?>/m, 'Comment.Preproc' # php? really?
@@ -0,0 +1,173 @@
1
+ module Rouge
2
+ module Lexers
3
+ class Markdown < RegexLexer
4
+ tag 'markdown'
5
+ aliases 'md', 'mkd'
6
+ filenames '*.markdown', '*.md', '*.mkd'
7
+ mimetypes 'text/x-markdown'
8
+
9
+ def html
10
+ @html ||= HTML.new(options)
11
+ end
12
+
13
+ start { html.reset! }
14
+
15
+ edot = /\\.|[^\\\n]/
16
+
17
+ state :root do
18
+ # YAML frontmatter
19
+ rule(/\A(---\s*\n.*?\n?)^(---\s*$\n?)/m) { delegate YAML }
20
+
21
+ rule /\\./, 'Literal.String.Escape'
22
+
23
+ rule /^[\S ]+\n(?:---*)\n/, 'Generic.Heading'
24
+ rule /^[\S ]+\n(?:===*)\n/, 'Generic.Subheading'
25
+
26
+ rule /^#(?=[^#]).*?$/, 'Generic.Heading'
27
+ rule /^##*.*?$/, 'Generic.Subheading'
28
+
29
+ # TODO: syntax highlight the code block, github style
30
+ rule /(\n[ \t]*)(```|~~~)(.*?)(\n.*?)(\2)/m do |m|
31
+ sublexer, opts = m[3].strip.split('?', 2)
32
+
33
+ if sublexer
34
+ sublexer = Lexer.find(sublexer)
35
+
36
+ # parse the options hash from a cgi-style string
37
+ opts = CGI.parse(opts || '').map do |k, vals|
38
+ [ k.to_sym, vals.empty? ? true : vals[0] ]
39
+ end
40
+
41
+ opts = Hash[opts]
42
+
43
+ sublexer &&= sublexer.new(opts)
44
+ end
45
+
46
+ token 'Text', m[1]
47
+ token 'Punctuation', m[2]
48
+ token 'Name.Label', m[3]
49
+ if sublexer
50
+ delegate sublexer, m[4]
51
+ else
52
+ token 'Literal.String.Backtick', m[4]
53
+ end
54
+ token 'Punctuation', m[5]
55
+ end
56
+
57
+ rule /\n\n(( |\t).*?\n|\n)+/, 'Literal.String.Backtick'
58
+
59
+ rule /(`+)#{edot}*\1/, 'Literal.String.Backtick'
60
+
61
+ # various uses of * are in order of precedence
62
+
63
+ # line breaks
64
+ rule /^(\s*[*]){3,}\s*$/, 'Punctuation'
65
+ rule /^(\s*[-]){3,}\s*$/, 'Punctuation'
66
+
67
+ # bulleted lists
68
+ rule /^\s*[*+-](?=\s)/, 'Punctuation'
69
+
70
+ # numbered lists
71
+ rule /^\s*\d+\./, 'Punctuation'
72
+
73
+ # blockquotes
74
+ rule /^\s*>.*?$/, 'Generic.Traceback'
75
+
76
+ # link references
77
+ # [foo]: bar "baz"
78
+ rule %r(^
79
+ (\s*) # leading whitespace
80
+ (\[) (#{edot}+?) (\]) # the reference
81
+ (\s*) (:) # colon
82
+ )x do
83
+ group 'Text'
84
+ group 'Punctuation'; group 'Literal.String.Symbol'; group 'Punctuation'
85
+ group 'Text'; group 'Punctuation'
86
+
87
+ push :title
88
+ push :url
89
+ end
90
+
91
+ # links and images
92
+ rule /(!?\[)(#{edot}+?)(\])/ do
93
+ group 'Punctuation'
94
+ group 'Name.Variable'
95
+ group 'Punctuation'
96
+ push :link
97
+ end
98
+
99
+ rule /[*][*]#{edot}*?[*][*]/, 'Generic.Strong'
100
+ rule /__#{edot}*?__/, 'Generic.Strong'
101
+
102
+ rule /[*]#{edot}*?[*]/, 'Generic.Emph'
103
+ rule /_#{edot}*?_/, 'Generic.Emph'
104
+
105
+ # Automatic links
106
+ rule /<.*?@.+[.].+>/, 'Name.Variable'
107
+ rule %r[<(https?|mailto|ftp)://#{edot}*?>], 'Name.Variable'
108
+
109
+
110
+ rule /[^\\`\[*\n&<]+/, 'Text'
111
+
112
+ # inline html
113
+ rule(/&\S*;/) { delegate html }
114
+ rule(/<#{edot}*?>/) { delegate html }
115
+ rule /[&<]/, 'Text'
116
+
117
+ rule /\n/, 'Text'
118
+ end
119
+
120
+ state :link do
121
+ rule /(\[)(#{edot}*?)(\])/ do
122
+ group 'Punctuation'
123
+ group 'Literal.String.Symbol'
124
+ group 'Punctuation'
125
+ pop!
126
+ end
127
+
128
+ rule /[(]/ do
129
+ token 'Punctuation'
130
+ push :inline_title
131
+ push :inline_url
132
+ end
133
+
134
+ rule /[ \t]+/, 'Text'
135
+
136
+ rule(//) { pop! }
137
+ end
138
+
139
+ state :url do
140
+ rule /[ \t]+/, 'Text'
141
+
142
+ # the url
143
+ rule /(<)(#{edot}*?)(>)/ do
144
+ group 'Name.Tag'
145
+ group 'Literal.String.Other'
146
+ group 'Name.Tag'
147
+ pop!
148
+ end
149
+
150
+ rule /\S+/, 'Literal.String.Other', :pop!
151
+ end
152
+
153
+ state :title do
154
+ rule /"#{edot}*?"/, 'Name.Namespace'
155
+ rule /'#{edot}*?'/, 'Name.Namespace'
156
+ rule /[(]#{edot}*?[)]/, 'Name.Namespace'
157
+ rule /\s*(?=["'()])/, 'Text'
158
+ rule(//) { pop! }
159
+ end
160
+
161
+ state :inline_title do
162
+ rule /[)]/, 'Punctuation', :pop!
163
+ mixin :title
164
+ end
165
+
166
+ state :inline_url do
167
+ rule /[^<\s)]+/, 'Literal.String.Other', :pop!
168
+ rule /\s+/m, 'Text'
169
+ mixin :url
170
+ end
171
+ end
172
+ end
173
+ end
@@ -1,17 +1,21 @@
1
1
  module Rouge
2
2
  module Lexers
3
- class PHP < RegexLexer
3
+ class PHP < TemplateLexer
4
4
  tag 'php'
5
5
  aliases 'php', 'php3', 'php4', 'php5'
6
6
  filenames '*.php', '*.php[345]'
7
7
  mimetypes 'text/x-php'
8
8
 
9
+ default_options :parent => 'html'
10
+
9
11
  def initialize(opts={})
10
12
  # if truthy, the lexer starts highlighting with php code
11
13
  # (no <?php required)
12
14
  @start_inline = opts.delete(:start_inline)
13
15
  @funcnamehighlighting = opts.delete(:funcnamehighlighting) { true }
14
16
  @disabledmodules = opts.delete(:disabledmodules) { [] }
17
+
18
+ super(opts)
15
19
  end
16
20
 
17
21
  def builtins
@@ -48,9 +52,8 @@ module Rouge
48
52
  )
49
53
 
50
54
  state :root do
51
- rule /<\?(php)?/, 'Comment.Preproc', :php
52
- rule /.*?(?=<\?)/, 'Other'
53
- rule /</, 'Other'
55
+ rule /<\?(php|=)?/, 'Comment.Preproc', :php
56
+ rule(/.*?(?=<\?)|.*/m) { delegate parent }
54
57
  end
55
58
 
56
59
  state :php do
@@ -140,16 +143,11 @@ module Rouge
140
143
  mixin :php
141
144
  end
142
145
 
143
- def stream_tokens(source, &b)
144
- super(source) do |tok, val|
145
- if tok.name == 'Name.Other' and builtins.include? val
146
- yield [Token['Name.Builtin'], val]
147
- else
148
- yield [tok, val]
149
- end
150
- end
146
+ postprocess 'Name.Other' do |tok, val|
147
+ tok = 'Name.Builtin' if builtins.include? val
148
+
149
+ token tok, val
151
150
  end
152
151
  end
153
152
  end
154
153
  end
155
-
@@ -17,8 +17,8 @@ module Rouge
17
17
  rule %r(
18
18
  : # initial :
19
19
  @{0,2} # optional ivar, for :@foo and :@@foo
20
- [a-z_]\w*[!?] # the symbol
21
- )x, 'Literal.String.Symbol'
20
+ [a-z_]\w*[!?]? # the symbol
21
+ )xi, 'Literal.String.Symbol'
22
22
 
23
23
  # special symbols
24
24
  rule %r(:(?:\*\*|[-+]@|[/\%&\|^`~]|\[\]=?|<<|>>|<=?>|<=?|===?)),
@@ -0,0 +1,358 @@
1
+ module Rouge
2
+ module Lexers
3
+ class YAML < RegexLexer
4
+ tag 'yaml'
5
+ aliases 'yml'
6
+
7
+ filenames '*.yaml', '*.yml'
8
+ # NB: Tabs are forbidden in YAML, which is why you see things
9
+ # like /[ ]+/.
10
+
11
+ # reset the indentation levels
12
+ def reset_indent
13
+ debug { " yaml: reset_indent" }
14
+ @indent_stack = [0]
15
+ @next_indent = 0
16
+ @block_scalar_indent = nil
17
+ end
18
+
19
+ def indent
20
+ raise 'empty indent stack!' if @indent_stack.empty?
21
+ @indent_stack.last
22
+ end
23
+
24
+ def dedent?(level)
25
+ level < self.indent
26
+ end
27
+
28
+ def indent?(level)
29
+ level > self.indent
30
+ end
31
+
32
+ # Save a possible indentation level
33
+ def save_indent(opts={})
34
+ debug { " yaml: save_indent" }
35
+ match = @last_match[0]
36
+ @next_indent = match.size
37
+ debug { " yaml: indent: #{self.indent}/#@next_indent" }
38
+ debug { " yaml: popping indent stack - before: #@indent_stack" }
39
+ if dedent?(@next_indent)
40
+ @indent_stack.pop while dedent?(@next_indent)
41
+ debug { " yaml: popping indent stack - after: #@indent_stack" }
42
+ debug { " yaml: indent: #{self.indent}/#@next_indent" }
43
+
44
+ # dedenting to a state not previously indented to is an error
45
+ [match[0...self.indent], match[self.indent..-1]]
46
+ else
47
+ [match, '']
48
+ end
49
+ end
50
+
51
+ def continue_indent
52
+ debug { " yaml: continue_indent" }
53
+ @next_indent += @last_match[0].size
54
+ end
55
+
56
+ def set_indent(opts={})
57
+ if indent < @next_indent
58
+ @indent_stack << @next_indent
59
+ end
60
+
61
+ @next_indent += @last_match[0].size unless opts[:implicit]
62
+ end
63
+
64
+ plain_scalar_start = /[^ \t\n\r\f\v?:,\[\]{}#&*!\|>'"%@`]/
65
+
66
+ start { reset_indent }
67
+
68
+ state :basic do
69
+ rule /#.*$/, 'Comment.Single'
70
+ end
71
+
72
+ state :root do
73
+ mixin :basic
74
+
75
+ rule /\n+/, 'Text'
76
+
77
+ # trailing or pre-comment whitespace
78
+ rule /[ ]+(?=#|$)/, 'Text'
79
+
80
+ rule /^%YAML\b/ do
81
+ token 'Name.Tag'
82
+ reset_indent
83
+ push :yaml_directive
84
+ end
85
+
86
+ rule /^%TAG\b/ do
87
+ token 'Name.Tag'
88
+ reset_indent
89
+ push :tag_directive
90
+ end
91
+
92
+ # doc-start and doc-end indicators
93
+ rule /^(?:---|\.\.\.)(?= |$)/ do
94
+ token 'Name.Namespace'
95
+ reset_indent
96
+ push :block_line
97
+ end
98
+
99
+ # indentation spaces
100
+ rule /[ ]*(?!\s|$)/ do
101
+ text, err = save_indent
102
+ token 'Text', text
103
+ token 'Error', err
104
+ push :block_line; push :indentation
105
+ end
106
+ end
107
+
108
+ state :indentation do
109
+ rule(/\s*?\n/) { token 'Text'; pop! 2 }
110
+ # whitespace preceding block collection indicators
111
+ rule /[ ]+(?=[-:?](?:[ ]|$))/ do
112
+ token 'Text'
113
+ continue_indent
114
+ end
115
+
116
+ # block collection indicators
117
+ rule(/[?:-](?=[ ]|$)/) { token 'Punctuation.Indicator'; set_indent }
118
+
119
+ # the beginning of a block line
120
+ rule(/[ ]*/) { token 'Text'; continue_indent; pop! }
121
+ end
122
+
123
+ # indented line in the block context
124
+ state :block_line do
125
+ # line end
126
+ rule /[ ]*(?=#|$)/, 'Text', :pop!
127
+ rule /[ ]+/, 'Text'
128
+ # tags, anchors, and aliases
129
+ mixin :descriptors
130
+ # block collections and scalars
131
+ mixin :block_nodes
132
+ # flow collections and quoed scalars
133
+ mixin :flow_nodes
134
+
135
+ # a plain scalar
136
+ rule /(?=#{plain_scalar_start}|[?:-][^ \t\n\r\f\v])/ do
137
+ token 'Name.Variable'
138
+ push :plain_scalar_in_block_context
139
+ end
140
+ end
141
+
142
+ state :descriptors do
143
+ # a full-form tag
144
+ rule /!<[0-9A-Za-z;\/?:@&=+$,_.!~*'()\[\]%-]+>/, 'Keyword.Type'
145
+
146
+ # a tag in the form '!', '!suffix' or '!handle!suffix'
147
+ rule %r(
148
+ !(?:[\w-]+)? #handle
149
+ (?:![\w;/?:@&=+$,.!~*\'()\[\]%-]+)? #suffix
150
+ )x, 'Keyword.Type'
151
+
152
+ # an anchor
153
+ rule /&[\w-]+/, 'Name.Label'
154
+
155
+ # an alias
156
+ rule /\*[\w-]+/, 'Name.Variable'
157
+ end
158
+
159
+ state :block_nodes do
160
+ # implicit key
161
+ rule /:(?=\s|$)/ do
162
+ token 'Punctuation.Indicator'
163
+ set_indent :implicit => true
164
+ end
165
+
166
+ # literal and folded scalars
167
+ rule /[\|>]/ do
168
+ token 'Punctuation.Indicator'
169
+ push :block_scalar_content
170
+ push :block_scalar_header
171
+ end
172
+ end
173
+
174
+ state :flow_nodes do
175
+ rule /\[/, 'Punctuation.Indicator', :flow_sequence
176
+ rule /\{/, 'Punctuation.Indicator', :flow_mapping
177
+ rule /'/, 'Literal.String.Single', :single_quoted_scalar
178
+ rule /"/, 'Literal.String.Double', :double_quoted_scalar
179
+ end
180
+
181
+ state :flow_collection do
182
+ rule /\s+/m, 'Text'
183
+ mixin :basic
184
+ rule /[?:,]/, 'Punctuation.Indicator'
185
+ mixin :descriptors
186
+ mixin :flow_nodes
187
+
188
+ rule /(?=#{plain_scalar_start})/ do
189
+ push :plain_scalar_in_flow_context
190
+ end
191
+ end
192
+
193
+ state :flow_sequence do
194
+ rule /\]/, 'Punctuation.Indicator', :pop!
195
+ mixin :flow_collection
196
+ end
197
+
198
+ state :flow_mapping do
199
+ rule /\}/, 'Punctuation.Indicator', :pop!
200
+ mixin :flow_collection
201
+ end
202
+
203
+ state :block_scalar_content do
204
+ rule /\n+/, 'Text'
205
+
206
+ # empty lines never dedent, but they might be part of the scalar.
207
+ rule /^[ ]+$/ do |m|
208
+ text = m[0]
209
+ indent_size = text.size
210
+
211
+ indent_mark = @block_scalar_indent || indent_size
212
+
213
+ token 'Text', text[0...indent_mark]
214
+ token 'Name.Constant', text[indent_mark..-1]
215
+ end
216
+
217
+ # TODO: ^ doesn't actually seem to affect the match at all.
218
+ # Find a way to work around this limitation.
219
+ rule /^[ ]*/ do |m|
220
+ token 'Text'
221
+
222
+ indent_size = m[0].size
223
+
224
+ dedent_level = @block_scalar_indent || self.indent
225
+ @block_scalar_indent ||= indent_size
226
+
227
+ if indent_size < dedent_level
228
+ pop! 2
229
+ end
230
+ end
231
+
232
+ rule /[^\n\r\f\v]+/, 'Name.Constant'
233
+ end
234
+
235
+ state :block_scalar_header do
236
+ # optional indentation indicator and chomping flag, in either order
237
+ rule %r(
238
+ (
239
+ ([1-9])[+-]? | [+-]?([1-9])?
240
+ )(?=[ ]|$)
241
+ )x do |m|
242
+ @block_scalar_indent = nil
243
+ pop!; push :ignored_line
244
+ next if m[0].empty?
245
+
246
+ increment = m[1] || m[2]
247
+ if increment
248
+ @block_scalar_indent = indent + increment.to_i
249
+ end
250
+
251
+ token 'Punctuation.Indicator'
252
+ end
253
+ end
254
+
255
+ state :ignored_line do
256
+ mixin :basic
257
+ rule /[ ]+/, 'Text'
258
+ rule /\n/, 'Text', :pop!
259
+ end
260
+
261
+ state :quoted_scalar_whitespaces do
262
+ # leading and trailing whitespace is ignored
263
+ rule /^[ ]+/, 'Text'
264
+ rule /[ ]+$/, 'Text'
265
+
266
+ rule /\n+/m, 'Text'
267
+
268
+ rule /[ ]+/, 'Name.Variable'
269
+ end
270
+
271
+ state :single_quoted_scalar do
272
+ mixin :quoted_scalar_whitespaces
273
+ rule /\\'/, 'Literal.String.Escape'
274
+ rule /'/, 'Literal.String', :pop!
275
+ rule /[^\s']+/, 'Literal.String'
276
+ end
277
+
278
+ state :double_quoted_scalar do
279
+ rule /"/, 'Literal.String', :pop!
280
+ mixin :quoted_scalar_whitespaces
281
+ # escapes
282
+ rule /\\[0abt\tn\nvfre "\\N_LP]/, 'Literal.String.Escape'
283
+ rule /\\(?:x[0-9A-Fa-f]{2}|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/,
284
+ 'Literal.String.Escape'
285
+ rule /[^ \t\n\r\f\v"\\]+/, 'Literal.String'
286
+ end
287
+
288
+ state :plain_scalar_in_block_context_new_line do
289
+ rule /^[ ]+\n/, 'Text'
290
+ rule /\n+/m, 'Text'
291
+ rule /^(?=---|\.\.\.)/ do
292
+ pop! 3
293
+ end
294
+
295
+ # dedent detection
296
+ rule /^[ ]*/ do |m|
297
+ token 'Text'
298
+ pop!
299
+
300
+ indent_size = m[0].size
301
+
302
+ # dedent = end of scalar
303
+ if m[0].size <= self.indent
304
+ pop!
305
+ val, err = save_indent
306
+ # push :block_line
307
+ push :indentation
308
+ end
309
+ end
310
+ end
311
+
312
+ state :plain_scalar_in_block_context do
313
+ # the : indicator ends a scalar
314
+ rule /[ ]*(?=:[ \n]|:$)/, 'Text', :pop!
315
+ rule /[ ]*:/, 'Literal.String'
316
+ rule /[ ]+(?=#)/, 'Text', :pop!
317
+ rule /[ ]+$/, 'Text'
318
+ # check for new documents or dedents at the new line
319
+ rule /\n+/ do
320
+ token 'Text'
321
+ push :plain_scalar_in_block_context_new_line
322
+ end
323
+
324
+ rule /[ ]+/, 'Literal.String'
325
+ # regular non-whitespace characters
326
+ rule /[^\s:]+/, 'Literal.String'
327
+ end
328
+
329
+ state :plain_scalar_in_flow_context do
330
+ rule /[ ]*(?=[,:?\[\]{}])/, 'Text', :pop!
331
+ rule /[ ]+(?=#)/, 'Text', :pop!
332
+ rule /^[ ]+/, 'Text'
333
+ rule /[ ]+$/, 'Text'
334
+ rule /\n+/, 'Text'
335
+ rule /[ ]+/, 'Name.Variable'
336
+ rule /[^\s,:?\[\]{}]+/, 'Name.Variable'
337
+ end
338
+
339
+ state :yaml_directive do
340
+ rule /([ ]+)(\d+\.\d+)/ do
341
+ group 'Text'; group 'Number'
342
+ pop!; push :ignored_line
343
+ end
344
+ end
345
+
346
+ state :tag_directive do
347
+ rule %r(
348
+ ([ ]+)(!|![\w-]*!) # prefix
349
+ ([ ]+)(!|!?[\w;/?:@&=+$,.!~*'()\[\]%-]+) # tag handle
350
+ )x do
351
+ group 'Text'; group 'Keyword.Type'
352
+ group 'Text'; group 'Keyword.Type'
353
+ pop!; push :ignored_line
354
+ end
355
+ end
356
+ end
357
+ end
358
+ end