rouge 0.2.1 → 0.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile CHANGED
@@ -8,7 +8,8 @@ gem 'wrong', '~> 0.6.2'
8
8
 
9
9
  gem 'rake'
10
10
 
11
- gem 'redcarpet'
11
+ # don't try to install redcarpet under jruby
12
+ gem 'redcarpet', :platforms => :ruby
12
13
 
13
14
  # for visual tests
14
15
  gem 'sinatra'
@@ -7,7 +7,7 @@ module Rouge
7
7
  lexer = Lexer.find(lexer) unless lexer.respond_to? :lex
8
8
  raise "unknown lexer #{lexer}" unless lexer
9
9
 
10
- formatter.render(lexer.lex(text))
10
+ formatter.format(lexer.lex(text))
11
11
  end
12
12
  end
13
13
  end
@@ -39,10 +39,13 @@ load load_dir.join('rouge/lexers/viml.rb')
39
39
  load load_dir.join('rouge/lexers/javascript.rb')
40
40
  load load_dir.join('rouge/lexers/css.rb')
41
41
  load load_dir.join('rouge/lexers/html.rb')
42
- load load_dir.join('rouge/lexers/haml.rb')
43
42
  load load_dir.join('rouge/lexers/xml.rb')
44
43
  load load_dir.join('rouge/lexers/php.rb')
45
44
 
45
+ load load_dir.join('rouge/lexers/haml.rb')
46
+ load load_dir.join('rouge/lexers/sass.rb')
47
+ load load_dir.join('rouge/lexers/scss.rb')
48
+
46
49
  load load_dir.join('rouge/lexers/erb.rb')
47
50
 
48
51
  load load_dir.join('rouge/lexers/tcl.rb')
@@ -50,6 +53,8 @@ load load_dir.join('rouge/lexers/python.rb')
50
53
  load load_dir.join('rouge/lexers/ruby.rb')
51
54
  load load_dir.join('rouge/lexers/perl.rb')
52
55
  load load_dir.join('rouge/lexers/factor.rb')
56
+ load load_dir.join('rouge/lexers/clojure.rb')
57
+ load load_dir.join('rouge/lexers/groovy.rb')
53
58
 
54
59
  load load_dir.join('rouge/lexers/haskell.rb')
55
60
  load load_dir.join('rouge/lexers/scheme.rb')
@@ -18,10 +18,15 @@ module Rouge
18
18
  end
19
19
 
20
20
  # Format a token stream.
21
- def render(tokens)
21
+ def format(tokens)
22
22
  enum_for(:stream, tokens).to_a.join
23
23
  end
24
24
 
25
+ def render(tokens)
26
+ warn 'Formatter#render is deprecated, use #format instead.'
27
+ format(tokens)
28
+ end
29
+
25
30
  # @abstract
26
31
  # yield strings that, when concatenated, form the formatted output
27
32
  def stream(tokens, &b)
@@ -146,12 +146,21 @@ module Rouge
146
146
  # private
147
147
  def escape_sequence(token)
148
148
  @escape_sequences ||= {}
149
- @escape_sequences[token.name] ||= begin
150
- esc = EscapeSequence.new(theme.get_style(token))
151
- # don't highlight text backgrounds
152
- esc.style.delete(:bg) if token.name == 'Text'
153
- esc
154
- end
149
+ @escape_sequences[token.name] ||=
150
+ EscapeSequence.new(get_style(token))
151
+ end
152
+
153
+ def get_style(token)
154
+ return text_style if token.name == 'Text'
155
+
156
+ theme.get_own_style(token) || text_style
157
+ end
158
+
159
+ def text_style
160
+ style = theme.get_style(Token['Text'])
161
+ # don't highlight text backgrounds
162
+ style.delete :bg
163
+ style
155
164
  end
156
165
  end
157
166
  end
@@ -25,6 +25,40 @@ module Rouge
25
25
  registry[name.to_s]
26
26
  end
27
27
 
28
+ # Find a lexer, with fancy shiny features.
29
+ #
30
+ # * The string you pass can include CGI-style options
31
+ #
32
+ # Lexer.find_fancy('erb?parent=tex')
33
+ #
34
+ # * You can pass the special name 'guess' so we guess for you,
35
+ # and you can pass a second argument of the code to guess by
36
+ #
37
+ # Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world")
38
+ #
39
+ # This is used in the Redcarpet plugin as well as Rouge's own
40
+ # markdown lexer for highlighting internal code blocks.
41
+ #
42
+ def find_fancy(str, code=nil)
43
+ name, opts = str ? str.split('?', 2) : [nil, '']
44
+
45
+ # parse the options hash from a cgi-style string
46
+ opts = CGI.parse(opts || '').map do |k, vals|
47
+ [ k.to_sym, vals.empty? ? true : vals[0] ]
48
+ end
49
+
50
+ opts = Hash[opts]
51
+
52
+ lexer_class = case name
53
+ when 'guess', nil
54
+ self.guess(:source => code, :mimetype => opts[:mimetype])
55
+ when String
56
+ self.find(name)
57
+ end
58
+
59
+ lexer_class && lexer_class.new(opts)
60
+ end
61
+
28
62
  # Specify or get this lexer's description.
29
63
  def desc(arg=:absent)
30
64
  if arg == :absent
@@ -75,12 +109,14 @@ module Rouge
75
109
  fname = File.basename(fname)
76
110
  registry.values.detect do |lexer|
77
111
  lexer.filenames.any? do |pattern|
78
- File.fnmatch?(pattern, fname)
112
+ File.fnmatch?(pattern, fname, File::FNM_DOTMATCH)
79
113
  end
80
114
  end
81
115
  end
82
116
 
83
117
  def guess_by_source(source)
118
+ assert_utf8!(source)
119
+
84
120
  source = TextAnalyzer.new(source)
85
121
 
86
122
  best_result = 0
@@ -155,6 +191,15 @@ module Rouge
155
191
  (@mimetypes ||= []).concat(mts)
156
192
  end
157
193
 
194
+ # @private
195
+ def assert_utf8!(str)
196
+ return if %w(US-ASCII UTF-8).include? str.encoding.name
197
+ raise EncodingError.new(
198
+ "Bad encoding: #{str.encoding.names.join(',')}. " +
199
+ "Please convert your string to UTF-8."
200
+ )
201
+ end
202
+
158
203
  private
159
204
  def registry
160
205
  @registry ||= {}
@@ -217,8 +262,11 @@ module Rouge
217
262
  def lex(string, opts={}, &b)
218
263
  return enum_for(:lex, string) unless block_given?
219
264
 
265
+ Lexer.assert_utf8!(string)
266
+
220
267
  reset! unless opts[:continue]
221
268
 
269
+ # consolidate consecutive tokens of the same type
222
270
  last_token = nil
223
271
  last_val = nil
224
272
  stream_tokens(StringScanner.new(string)) do |tok, val|
@@ -237,6 +285,11 @@ module Rouge
237
285
  b.call(last_token, last_val) if last_token
238
286
  end
239
287
 
288
+ # delegated to {Lexer.tag}
289
+ def tag
290
+ self.class.tag
291
+ end
292
+
240
293
  # @abstract
241
294
  #
242
295
  # Yield `[token, chunk]` pairs, given a prepared input stream. This
@@ -0,0 +1,108 @@
1
+ module Rouge
2
+ module Lexers
3
+ class Clojure < RegexLexer
4
+ desc "The Clojure programming language (clojure.org)"
5
+
6
+ tag 'clojure'
7
+ aliases 'clj'
8
+
9
+ filenames '*.clj'
10
+
11
+ mimetypes 'text/x-clojure', 'application/x-clojure'
12
+
13
+ def self.keywords
14
+ @keywords ||= Set.new %w(
15
+ fn def defn defmacro defmethod defmulti defn- defstruct if
16
+ cond let for
17
+ )
18
+ end
19
+
20
+ def self.builtins
21
+ @builtins ||= Set.new %w(
22
+ . .. * + - -> / < <= = == > >= accessor agent agent-errors
23
+ aget alength all-ns alter and append-child apply array-map
24
+ aset aset-boolean aset-byte aset-char aset-double aset-float
25
+ aset-int aset-long aset-short assert assoc await await-for bean
26
+ binding bit-and bit-not bit-or bit-shift-left bit-shift-right
27
+ bit-xor boolean branch? butlast byte cast char children
28
+ class clear-agent-errors comment commute comp comparator
29
+ complement concat conj cons constantly construct-proxy
30
+ contains? count create-ns create-struct cycle dec deref
31
+ difference disj dissoc distinct doall doc dorun doseq dosync
32
+ dotimes doto double down drop drop-while edit end? ensure eval
33
+ every? false? ffirst file-seq filter find find-doc find-ns
34
+ find-var first float flush fnseq frest gensym get-proxy-class
35
+ get hash-map hash-set identical? identity if-let import in-ns
36
+ inc index insert-child insert-left insert-right inspect-table
37
+ inspect-tree instance? int interleave intersection into
38
+ into-array iterate join key keys keyword keyword? last lazy-cat
39
+ lazy-cons left lefts line-seq list* list load load-file locking
40
+ long loop macroexpand macroexpand-1 make-array make-node map
41
+ map-invert map? mapcat max max-key memfn merge merge-with meta
42
+ min min-key name namespace neg? new newline next nil? node not
43
+ not-any? not-every? not= ns-imports ns-interns ns-map ns-name
44
+ ns-publics ns-refers ns-resolve ns-unmap nth nthrest or parse
45
+ partial path peek pop pos? pr pr-str print print-str println
46
+ println-str prn prn-str project proxy proxy-mappings quot
47
+ rand rand-int range re-find re-groups re-matcher re-matches
48
+ re-pattern re-seq read read-line reduce ref ref-set refer rem
49
+ remove remove-method remove-ns rename rename-keys repeat replace
50
+ replicate resolve rest resultset-seq reverse rfirst right
51
+ rights root rrest rseq second select select-keys send send-off
52
+ seq seq-zip seq? set short slurp some sort sort-by sorted-map
53
+ sorted-map-by sorted-set special-symbol? split-at split-with
54
+ str string? struct struct-map subs subvec symbol symbol?
55
+ sync take take-nth take-while test time to-array to-array-2d
56
+ tree-seq true? union up update-proxy val vals var-get var-set
57
+ var? vector vector-zip vector? when when-first when-let
58
+ when-not with-local-vars with-meta with-open with-out-str
59
+ xml-seq xml-zip zero? zipmap zipper'
60
+ )
61
+ end
62
+
63
+ identifier = %r([\w!$%*+,<=>?/.-]+)
64
+
65
+ def name_token(name)
66
+ return 'Keyword' if self.class.keywords.include?(name)
67
+ return 'Name.Builtin' if self.class.builtins.include?(name)
68
+ nil
69
+ end
70
+
71
+ state :root do
72
+ rule /;.*?\n/, 'Comment.Single'
73
+ rule /\s+/m, 'Text.Whitespace'
74
+
75
+ rule /-?\d+\.\d+/, 'Literal.Number.Float'
76
+ rule /-?\d+/, 'Literal.Number.Integer'
77
+ rule /0x-?[0-9a-fA-F]+/, 'Literal.Number.Hex'
78
+
79
+ rule /"(\\.|[^"])*"/, 'Literal.String'
80
+ rule /'#{identifier}/, 'Literal.String.Symbol'
81
+ rule /\\(.|[a-z]+)/i, 'Literal.String.Char'
82
+
83
+ rule /:#{identifier}/, 'Name.Constant'
84
+
85
+ rule /~@|[`\'#^~&]/, 'Operator'
86
+
87
+ rule /(\()(\s*)(#{identifier})/m do |m|
88
+ token 'Punctuation', m[1]
89
+ token 'Text.Whitespace', m[2]
90
+ token(name_token(m[3]) || 'Name.Function', m[3])
91
+ end
92
+
93
+ rule identifier do |m|
94
+ token name_token(m[0]) || 'Name.Variable'
95
+ end
96
+
97
+ # vectors
98
+ rule /[\[\]]/, 'Punctuation'
99
+
100
+ # maps
101
+ rule /[{}]/, 'Punctuation'
102
+
103
+ # parentheses
104
+ rule /[()]/, 'Punctuation'
105
+ end
106
+ end
107
+ end
108
+ end
@@ -9,93 +9,105 @@ module Rouge
9
9
 
10
10
  identifier = /[a-zA-Z0-9_-]+/
11
11
  number = /-?(?:[0-9]+(\.[0-9]+)?|\.[0-9]+)/
12
- keywords = %w(
13
- azimuth background-attachment background-color
14
- background-image background-position background-repeat
15
- background border-bottom-color border-bottom-style
16
- border-bottom-width border-left-color border-left-style
17
- border-left-width border-right border-right-color
18
- border-right-style border-right-width border-top-color
19
- border-top-style border-top-width border-bottom
20
- border-collapse border-left border-width border-color
21
- border-spacing border-style border-top border caption-side
22
- clear clip color content counter-increment counter-reset
23
- cue-after cue-before cue cursor direction display
24
- elevation empty-cells float font-family font-size
25
- font-size-adjust font-stretch font-style font-variant
26
- font-weight font height letter-spacing line-height
27
- list-style-type list-style-image list-style-position
28
- list-style margin-bottom margin-left margin-right
29
- margin-top margin marker-offset marks max-height max-width
30
- min-height min-width opacity orphans outline outline-color
31
- outline-style outline-width overflow(?:-x -y ) padding-bottom
32
- padding-left padding-right padding-top padding page
33
- page-break-after page-break-before page-break-inside
34
- pause-after pause-before pause pitch pitch-range
35
- play-during position quotes richness right size
36
- speak-header speak-numeral speak-punctuation speak
37
- speech-rate stress table-layout text-align text-decoration
38
- text-indent text-shadow text-transform top unicode-bidi
39
- vertical-align visibility voice-family volume white-space
40
- widows width word-spacing z-index bottom left
41
- above absolute always armenian aural auto avoid baseline
42
- behind below bidi-override blink block bold bolder both
43
- capitalize center-left center-right center circle
44
- cjk-ideographic close-quote collapse condensed continuous
45
- crop crosshair cross cursive dashed decimal-leading-zero
46
- decimal default digits disc dotted double e-resize embed
47
- extra-condensed extra-expanded expanded fantasy far-left
48
- far-right faster fast fixed georgian groove hebrew help
49
- hidden hide higher high hiragana-iroha hiragana icon
50
- inherit inline-table inline inset inside invert italic
51
- justify katakana-iroha katakana landscape larger large
52
- left-side leftwards level lighter line-through list-item
53
- loud lower-alpha lower-greek lower-roman lowercase ltr
54
- lower low medium message-box middle mix monospace
55
- n-resize narrower ne-resize no-close-quote no-open-quote
56
- no-repeat none normal nowrap nw-resize oblique once
57
- open-quote outset outside overline pointer portrait px
58
- relative repeat-x repeat-y repeat rgb ridge right-side
59
- rightwards s-resize sans-serif scroll se-resize
60
- semi-condensed semi-expanded separate serif show silent
61
- slow slower small-caps small-caption smaller soft solid
62
- spell-out square static status-bar super sw-resize
63
- table-caption table-cell table-column table-column-group
64
- table-footer-group table-header-group table-row
65
- table-row-group text text-bottom text-top thick thin
66
- transparent ultra-condensed ultra-expanded underline
67
- upper-alpha upper-latin upper-roman uppercase url
68
- visible w-resize wait wider x-fast x-high x-large x-loud
69
- x-low x-small x-soft xx-large xx-small yes
70
- )
71
-
72
- builtins = %w(
73
- indigo gold firebrick indianred yellow darkolivegreen
74
- darkseagreen mediumvioletred mediumorchid chartreuse
75
- mediumslateblue black springgreen crimson lightsalmon brown
76
- turquoise olivedrab cyan silver skyblue gray darkturquoise
77
- goldenrod darkgreen darkviolet darkgray lightpink teal
78
- darkmagenta lightgoldenrodyellow lavender yellowgreen thistle
79
- violet navy orchid blue ghostwhite honeydew cornflowerblue
80
- darkblue darkkhaki mediumpurple cornsilk red bisque slategray
81
- darkcyan khaki wheat deepskyblue darkred steelblue aliceblue
82
- gainsboro mediumturquoise floralwhite coral purple lightgrey
83
- lightcyan darksalmon beige azure lightsteelblue oldlace
84
- greenyellow royalblue lightseagreen mistyrose sienna
85
- lightcoral orangered navajowhite lime palegreen burlywood
86
- seashell mediumspringgreen fuchsia papayawhip blanchedalmond
87
- peru aquamarine white darkslategray ivory dodgerblue
88
- lemonchiffon chocolate orange forestgreen slateblue olive
89
- mintcream antiquewhite darkorange cadetblue moccasin
90
- limegreen saddlebrown darkslateblue lightskyblue deeppink
91
- plum aqua darkgoldenrod maroon sandybrown magenta tan
92
- rosybrown pink lightblue palevioletred mediumseagreen
93
- dimgray powderblue seagreen snow mediumblue midnightblue
94
- paleturquoise palegoldenrod whitesmoke darkorchid salmon
95
- lightslategray lawngreen lightgreen tomato hotpink
96
- lightyellow lavenderblush linen mediumaquamarine green
97
- blueviolet peachpuff
98
- )
12
+
13
+ def self.attributes
14
+ @attributes ||= Set.new %w(
15
+ azimuth background background-attachment background-color
16
+ background-image background-position background-repeat
17
+ border border-bottom border-bottom-color border-bottom-style
18
+ border-bottom-width border-collapse border-color border-left
19
+ border-left-color border-left-style border-left-width
20
+ border-right border-right-color border-right-style
21
+ border-right-width border-spacing border-style border-top
22
+ border-top-color border-top-style border-top-width
23
+ border-width bottom caption-side clear clip color content
24
+ counter-increment counter-reset cue cue-after cue-before cursor
25
+ direction display elevation empty-cells float font font-family
26
+ font-size font-size-adjust font-stretch font-style font-variant
27
+ font-weight height left letter-spacing line-height list-style
28
+ list-style-image list-style-position list-style-type margin
29
+ margin-bottom margin-left margin-right margin-top marker-offset
30
+ marks max-height max-width min-height min-width opacity orphans
31
+ outline outline-color outline-style outline-width overflow-x
32
+ overflow-y padding padding-bottom padding-left padding-right
33
+ padding-top page page-break-after page-break-before
34
+ page-break-inside pause pause-after pause-before pitch
35
+ pitch-range play-during position quotes richness right size
36
+ speak speak-header speak-numeral speak-punctuation speech-rate
37
+ src stress table-layout text-align text-decoration text-indent
38
+ text-shadow text-transform top unicode-bidi vertical-align
39
+ visibility voice-family volume white-space widows width
40
+ word-spacing z-index
41
+ )
42
+ end
43
+
44
+ def self.builtins
45
+ @builtins ||= Set.new %w(
46
+ above absolute always armenian aural auto avoid left bottom
47
+ baseline behind below bidi-override blink block bold bolder
48
+ both bottom capitalize center center-left center-right circle
49
+ cjk-ideographic close-quote collapse condensed continuous crop
50
+ cross crosshair cursive dashed decimal decimal-leading-zero
51
+ default digits disc dotted double e-resize embed expanded
52
+ extra-condensed extra-expanded fantasy far-left far-right fast
53
+ faster fixed georgian groove hebrew help hidden hide high higher
54
+ hiragana hiragana-iroha icon inherit inline inline-table inset
55
+ inside invert italic justify katakana katakana-iroha landscape
56
+ large larger left left-side leftwards level lighter line-through
57
+ list-item loud low lower lower-alpha lower-greek lower-roman
58
+ lowercase ltr medium message-box middle mix monospace n-resize
59
+ narrower ne-resize no-close-quote no-open-quote no-repeat none
60
+ normal nowrap nw-resize oblique once open-quote outset outside
61
+ overline pointer portrait px relative repeat repeat-x repeat-y
62
+ rgb ridge right right-side rightwards s-resize sans-serif scroll
63
+ se-resize semi-condensed semi-expanded separate serif show
64
+ silent slow slower small-caps small-caption smaller soft solid
65
+ spell-out square static status-bar super sw-resize table-caption
66
+ table-cell table-column table-column-group table-footer-group
67
+ table-header-group table-row table-row-group text text-bottom
68
+ text-top thick thin top transparent ultra-condensed
69
+ ultra-expanded underline upper-alpha upper-latin upper-roman
70
+ uppercase url visible w-resize wait wider x-fast x-high x-large
71
+ x-loud x-low x-small x-soft xx-large xx-small yes
72
+ )
73
+ end
74
+
75
+ def self.constants
76
+ @constants ||= Set.new %w(
77
+ indigo gold firebrick indianred yellow darkolivegreen
78
+ darkseagreen mediumvioletred mediumorchid chartreuse
79
+ mediumslateblue black springgreen crimson lightsalmon brown
80
+ turquoise olivedrab cyan silver skyblue gray darkturquoise
81
+ goldenrod darkgreen darkviolet darkgray lightpink teal
82
+ darkmagenta lightgoldenrodyellow lavender yellowgreen thistle
83
+ violet navy orchid blue ghostwhite honeydew cornflowerblue
84
+ darkblue darkkhaki mediumpurple cornsilk red bisque slategray
85
+ darkcyan khaki wheat deepskyblue darkred steelblue aliceblue
86
+ gainsboro mediumturquoise floralwhite coral purple lightgrey
87
+ lightcyan darksalmon beige azure lightsteelblue oldlace
88
+ greenyellow royalblue lightseagreen mistyrose sienna lightcoral
89
+ orangered navajowhite lime palegreen burlywood seashell
90
+ mediumspringgreen fuchsia papayawhip blanchedalmond peru
91
+ aquamarine white darkslategray ivory dodgerblue lemonchiffon
92
+ chocolate orange forestgreen slateblue olive mintcream
93
+ antiquewhite darkorange cadetblue moccasin limegreen saddlebrown
94
+ darkslateblue lightskyblue deeppink plum aqua darkgoldenrod
95
+ maroon sandybrown magenta tan rosybrown pink lightblue
96
+ palevioletred mediumseagreen dimgray powderblue seagreen snow
97
+ mediumblue midnightblue paleturquoise palegoldenrod whitesmoke
98
+ darkorchid salmon lightslategray lawngreen lightgreen tomato
99
+ hotpink lightyellow lavenderblush linen mediumaquamarine green
100
+ blueviolet peachpuff
101
+ )
102
+ end
103
+
104
+ # source: http://www.w3.org/TR/CSS21/syndata.html#vendor-keyword-history
105
+ def self.vendor_prefixes
106
+ @vendor_prefixes ||= Set.new %w(
107
+ -ah- -atsc- -hp- -khtml- -moz- -ms- -o- -rim- -ro- -tc- -wap-
108
+ -webkit- -xv- mso- prince-
109
+ )
110
+ end
99
111
 
100
112
  state :root do
101
113
  mixin :basics
@@ -116,7 +128,15 @@ module Rouge
116
128
  rule /[\[\]():\/.]/, 'Punctuation'
117
129
  rule /"(\\\\|\\"|[^"])*"/, 'Literal.String.Single'
118
130
  rule /'(\\\\|\\'|[^'])*'/, 'Literal.String.Double'
119
- rule identifier, 'Name'
131
+ rule(identifier) do |m|
132
+ if self.class.constants.include? m[0]
133
+ token 'Name.Constant'
134
+ elsif self.class.builtins.include? m[0]
135
+ token 'Name.Builtin'
136
+ else
137
+ token 'Name'
138
+ end
139
+ end
120
140
  end
121
141
 
122
142
  state :at_rule do
@@ -151,15 +171,18 @@ module Rouge
151
171
  state :stanza do
152
172
  mixin :basics
153
173
  rule /}/, 'Punctuation', :pop!
154
- rule /(#{identifier}\s*):/m do |m|
155
- if keywords.include? m[1]
156
- token 'Keyword'
157
- elsif builtins.include? m[1]
158
- token 'Name.Builtin'
174
+ rule /(#{identifier})(\s*)(:)/m do |m|
175
+ if self.class.attributes.include? m[1]
176
+ group 'Name.Label'
177
+ elsif self.class.vendor_prefixes.any? { |p| m[1].start_with?(p) }
178
+ group 'Name.Label'
159
179
  else
160
- token 'Name'
180
+ group 'Name.Property'
161
181
  end
162
182
 
183
+ group 'Text'
184
+ group 'Punctuation'
185
+
163
186
  push :stanza_value
164
187
  end
165
188
  end
@@ -0,0 +1,102 @@
1
+ module Rouge
2
+ module Lexers
3
+ class Groovy < RegexLexer
4
+ desc 'The Groovy programming language (groovy.codehaus.org)'
5
+ tag 'groovy'
6
+ filenames '*.groovy'
7
+ mimetypes 'text/x-groovy'
8
+
9
+ ws = %r((?:\s|//.*?\n|/[*].*?[*]/)+)
10
+
11
+ def self.keywords
12
+ @keywords ||= Set.new %w(
13
+ assert break case catch continue default do else finally for
14
+ if goto instanceof new return switch this throw try while in as
15
+ )
16
+ end
17
+
18
+ def self.declarations
19
+ @declarations ||= Set.new %w(
20
+ abstract const enum extends final implements native private
21
+ protected public static strictfp super synchronized throws
22
+ transient volatile
23
+ )
24
+ end
25
+
26
+ def self.types
27
+ @types ||= Set.new %w(
28
+ def boolean byte char double float int long short void
29
+ )
30
+ end
31
+
32
+ def self.constants
33
+ @constants ||= Set.new %w(true false null)
34
+ end
35
+
36
+ state :root do
37
+ rule %r(^
38
+ (\s*(?:\w[\w\d.\[\]]*\s+)+?) # return arguments
39
+ (\w[\w\d]*) # method name
40
+ (\s*) (\() # signature start
41
+ )x do |m|
42
+ delegate self.clone, m[1]
43
+ token 'Name.Function', m[2]
44
+ token 'Text', m[3]
45
+ token 'Operator', m[4]
46
+ end
47
+
48
+ # whitespace
49
+ rule /[^\S\n]+/, 'Text'
50
+ rule %r(//.*?\n), 'Comment.Single'
51
+ rule %r(/[*].*?[*]/), 'Comment.Multiline'
52
+ rule /@\w[\w\d.]*/, 'Name.Decorator'
53
+ rule /(class|interface)\b/, 'Keyword.Declaration', :class
54
+ rule /package\b/, 'Keyword.Namespace', :import
55
+ rule /import\b/, 'Keyword.Namespace', :import
56
+
57
+ rule /"(\\\\|\\"|[^"])*"/, 'Literal.String.Double'
58
+ rule /'(\\\\|\\'|[^'])*'/, 'Literal.String.Single'
59
+ rule %r(\$/((?!/\$).)*/\$), 'Literal.String'
60
+ rule %r(/(\\\\|\\"|[^/])*/), 'Literal.String'
61
+ rule /'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'/, 'Literal.String.Char'
62
+ rule /(\.)([a-zA-Z_][a-zA-Z0-9_]*)/ do
63
+ group 'Operator'
64
+ group 'Name.Attribute'
65
+ end
66
+
67
+ rule /[a-zA-Z_][a-zA-Z0-9_]*:/, 'Name.Label'
68
+ rule /[a-zA-Z_\$][a-zA-Z0-9_]*/ do |m|
69
+ if self.class.keywords.include? m[0]
70
+ token 'Keyword'
71
+ elsif self.class.declarations.include? m[0]
72
+ token 'Keyword.Declaration'
73
+ elsif self.class.types.include? m[0]
74
+ token 'Keyword.Type'
75
+ elsif self.class.constants.include? m[0]
76
+ token 'Keyword.Constant'
77
+ else
78
+ token 'Name'
79
+ end
80
+ end
81
+
82
+ rule %r([~^*!%&\[\](){}<>\|+=:;,./?-]), 'Operator'
83
+
84
+ # numbers
85
+ rule /\d+\.\d+([eE]\d+)?[fd]?/, 'Literal.Number.Float'
86
+ rule /0x[0-9a-f]+/, 'Literal.Number.Hex'
87
+ rule /[0-9]+L?/, 'Literal.Number.Integer'
88
+ rule /\n/, 'Text'
89
+ end
90
+
91
+ state :class do
92
+ rule /\s+/, 'Text'
93
+ rule /\w[\w\d]*/, 'Name.Class', :pop!
94
+ end
95
+
96
+ state :import do
97
+ rule /\s+/, 'Text'
98
+ rule /[\w\d.]+[*]?/, 'Name.Namespace', :pop!
99
+ end
100
+ end
101
+ end
102
+ end
@@ -3,6 +3,8 @@ module Rouge
3
3
  # A lexer for the Haml templating system for Ruby.
4
4
  # @see http://haml.info
5
5
  class Haml < RegexLexer
6
+ include Indentation
7
+
6
8
  desc "The Haml templating system for Ruby (haml.info)"
7
9
 
8
10
  tag 'haml'
@@ -56,33 +58,6 @@ module Rouge
56
58
 
57
59
  start { ruby.reset!; html.reset! }
58
60
 
59
- # push a state for the next indented block
60
- def starts_block(block_state)
61
- @block_state = block_state
62
- @block_indentation = @last_indentation || ''
63
- debug { " haml: starts_block #{block_state.inspect}" }
64
- debug { " haml: block_indentation: #{@block_indentation.inspect}" }
65
- end
66
-
67
- def indentation(indent_str)
68
- debug { " haml: indentation #{indent_str.inspect}" }
69
- debug { " haml: block_indentation: #{@block_indentation.inspect}" }
70
- @last_indentation = indent_str
71
-
72
- # if it's an indent and we know where to go next,
73
- # push that state. otherwise, push content and
74
- # clear the block state.
75
- if (@block_state &&
76
- indent_str.start_with?(@block_indentation) &&
77
- indent_str != @block_indentation
78
- )
79
- push @block_state
80
- else
81
- @block_state = @block_indentation = nil
82
- push :content
83
- end
84
- end
85
-
86
61
  identifier = /[\w:-]+/
87
62
  ruby_var = /[a-z]\w*/
88
63
 
@@ -30,29 +30,13 @@ module Rouge
30
30
 
31
31
  # TODO: syntax highlight the code block, github style
32
32
  rule /(\n[ \t]*)(```|~~~)(.*?)(\n.*?)(\2)/m do |m|
33
- sublexer, opts = m[3].strip.split('?', 2)
34
-
35
- if sublexer
36
- sublexer = Lexer.find(sublexer)
37
-
38
- # parse the options hash from a cgi-style string
39
- opts = CGI.parse(opts || '').map do |k, vals|
40
- [ k.to_sym, vals.empty? ? true : vals[0] ]
41
- end
42
-
43
- opts = Hash[opts]
44
-
45
- sublexer &&= sublexer.new(opts)
46
- end
33
+ sublexer = Lexer.find_fancy(m[3].strip, m[4])
34
+ sublexer ||= Text.new(:token => 'Literal.String.Backtick')
47
35
 
48
36
  token 'Text', m[1]
49
37
  token 'Punctuation', m[2]
50
38
  token 'Name.Label', m[3]
51
- if sublexer
52
- delegate sublexer, m[4]
53
- else
54
- token 'Literal.String.Backtick', m[4]
55
- end
39
+ delegate sublexer, m[4]
56
40
  token 'Punctuation', m[5]
57
41
  end
58
42
 
@@ -0,0 +1,228 @@
1
+ module Rouge
2
+ module Lexers
3
+ class Sass < RegexLexer
4
+ include Indentation
5
+
6
+ tag 'sass'
7
+ filenames '*.sass'
8
+ mimetypes 'text/x-sass'
9
+
10
+ id = /[\w-]+/
11
+
12
+ state :root do
13
+ rule /[ \t]*\n/, 'Text'
14
+ rule(/[ \t]*/) { |m| token 'Text'; indentation(m[0]) }
15
+ end
16
+
17
+ state :content do
18
+ # block comments
19
+ rule %r(//.*?\n) do
20
+ token 'Comment.Single'
21
+ pop!; starts_block :single_comment
22
+ end
23
+
24
+ rule %r(/[*].*?\n) do
25
+ token 'Comment.Multiline'
26
+ pop!; starts_block :multi_comment
27
+ end
28
+
29
+ rule /@import\b/, 'Keyword', :import
30
+
31
+ mixin :content_common
32
+
33
+ rule %r(=#{id}), 'Name.Function', :value
34
+ rule %r([+]#{id}), 'Name.Decorator', :value
35
+
36
+ rule /:/, 'Name.Attribute', :old_style_attr
37
+
38
+ rule(/(?=.+?:(^a-z|$))/) { push :attribute }
39
+
40
+ rule(//) { push :selector }
41
+ end
42
+
43
+ state :single_comment do
44
+ rule /.*?\n/, 'Comment.Single', :pop!
45
+ end
46
+
47
+ state :multi_comment do
48
+ rule /.*?\n/, 'Comment.Multiline', :pop!
49
+ end
50
+
51
+ state :import do
52
+ rule /[ \t]+/, 'Text'
53
+ rule /\S+/, 'Literal.String'
54
+ rule /\n/, 'Text', :pop!
55
+ end
56
+
57
+ state :old_style_attr do
58
+ mixin :attr_common
59
+ rule(//) { pop!; push :value }
60
+ end
61
+
62
+ state :end_section do
63
+ rule(/\n/) { token 'Text'; reset_stack }
64
+ end
65
+
66
+ # shared states with SCSS
67
+ # TODO: make this less nasty to do
68
+ COMMON = proc do
69
+ state :content_common do
70
+ rule /@for\b/, 'Keyword', :for
71
+ rule /@(debug|warn|if|while)/, 'Keyword', :value
72
+ rule /(@mixin)(\s+)(#{id})/ do
73
+ group 'Keyword'; group 'Text'; group 'Name.Function'
74
+ push :value
75
+ end
76
+
77
+ rule /(@include)(\s+)(#{id})/ do
78
+ group 'Keyword'; group 'Text'; group 'Name.Decorator'
79
+ push :value
80
+ end
81
+
82
+ rule /@#{id}/, 'Keyword', :selector
83
+
84
+ # $variable: assignment
85
+ rule /([$]#{id})([ \t]*)(:)/ do
86
+ group 'Name.Variable'; group 'Text'; group 'Punctuation'
87
+ push :value
88
+ end
89
+ end
90
+
91
+ state :value do
92
+ mixin :end_section
93
+ rule /[ \t]+/, 'Text'
94
+ rule /[$]#{id}/, 'Name.Variable'
95
+ rule /url[(]/, 'Literal.String.Other', :string_url
96
+ rule /#{id}(?=\s*[(])/, 'Name.Function'
97
+
98
+ # named literals
99
+ rule /(true|false)\b/, 'Name.Pseudo'
100
+ rule /(and|or|not)\b/, 'Operator.Word'
101
+
102
+ # colors and numbers
103
+ rule /#[a-z0-9]{1,6}/i, 'Literal.Number.Hex'
104
+ rule /-?\d+(%|[a-z]+)?/, 'Literal.Number'
105
+ rule /-?\d*\.\d+(%|[a-z]+)?/, 'Literal.Number.Integer'
106
+
107
+ mixin :has_strings
108
+ mixin :has_interp
109
+
110
+ rule /[~^*!&%<>\|+=@:,.\/?-]+/, 'Operator'
111
+ rule /[\[\]()]+/, 'Punctuation'
112
+ rule %r(/[*]), 'Comment.Multiline', :inline_comment
113
+ rule %r(//[^\n]*), 'Comment.Single'
114
+
115
+ # identifiers
116
+ rule(id) do |m|
117
+ if CSS.builtins.include? m[0]
118
+ token 'Name.Builtin'
119
+ elsif CSS.constants.include? m[0]
120
+ token 'Name.Constant'
121
+ else
122
+ token 'Name'
123
+ end
124
+ end
125
+ end
126
+
127
+ state :has_interp do
128
+ rule /[#][{]/, 'Literal.String.Interpol', :interpolation
129
+ end
130
+
131
+ state :has_strings do
132
+ rule /"/, 'Literal.String.Double', :dq
133
+ rule /'/, 'Literal.String.Single', :sq
134
+ end
135
+
136
+ state :interpolation do
137
+ rule /}/, 'Literal.String.Interpol', :pop!
138
+ mixin :value
139
+ end
140
+
141
+ state :selector do
142
+ mixin :end_section
143
+
144
+ mixin :has_strings
145
+ mixin :has_interp
146
+ rule /[ \t]+/, 'Text'
147
+ rule /:/, 'Name.Decorator', :pseudo_class
148
+ rule /[.]/, 'Name.Class', :class
149
+ rule /#/, 'Name.Namespace', :id
150
+ rule id, 'Name.Tag'
151
+ rule /&/, 'Keyword'
152
+ rule /[~^*!&\[\]()<>\|+=@:;,.\/?-]/, 'Operator'
153
+ end
154
+
155
+ state :dq do
156
+ rule /"/, 'Literal.String.Double', :pop!
157
+ mixin :has_interp
158
+ rule /(\\.|#(?![{])|[^\n"#])+/, 'Literal.String.Double'
159
+ end
160
+
161
+ state :sq do
162
+ rule /'/, 'Literal.String.Single', :pop!
163
+ mixin :has_interp
164
+ rule /(\\.|#(?![{])|[^\n'#])+/, 'Literal.String.Single'
165
+ end
166
+
167
+ state :string_url do
168
+ rule /[)]/, 'Literal.String.Other', :pop!
169
+ rule /(\\.|#(?![{])|[^\n)#])+/, 'Literal.String.Other'
170
+ mixin :has_interp
171
+ end
172
+
173
+ state :selector_piece do
174
+ mixin :has_interp
175
+ rule(//) { pop! }
176
+ end
177
+
178
+ state :pseudo_class do
179
+ rule id, 'Name.Decorator'
180
+ mixin :selector_piece
181
+ end
182
+
183
+ state :class do
184
+ rule id, 'Name.Class'
185
+ mixin :selector_piece
186
+ end
187
+
188
+ state :id do
189
+ rule id, 'Name.Namespace'
190
+ mixin :selector_piece
191
+ end
192
+
193
+ state :for do
194
+ rule /(from|to|through)/, 'Operator.Word'
195
+ mixin :value
196
+ end
197
+
198
+ state :attr_common do
199
+ mixin :has_interp
200
+ rule id do |m|
201
+ if CSS.attributes.include? m[0]
202
+ token 'Name.Label'
203
+ else
204
+ token 'Name.Attribute'
205
+ end
206
+ end
207
+ end
208
+
209
+ state :attribute do
210
+ mixin :attr_common
211
+
212
+ rule /([ \t]*)(:)/ do
213
+ group 'Text'; group 'Punctuation'
214
+ push :value
215
+ end
216
+ end
217
+
218
+ state :inline_comment do
219
+ rule /(\\#|#(?=[^\n{])|\*(?=[^\n\/])|[^\n#*])+/, 'Comment.Multiline'
220
+ mixin :has_interp
221
+ rule %r([*]/), 'Comment.Multiline', :pop!
222
+ end
223
+ end
224
+
225
+ class_eval(&COMMON)
226
+ end
227
+ end
228
+ end
@@ -0,0 +1,31 @@
1
+ module Rouge
2
+ module Lexers
3
+ class Scss < RegexLexer
4
+ desc "SCSS stylesheets (sass-lang.com)"
5
+ tag 'scss'
6
+ filenames '*.scss'
7
+ mimetypes 'text/x-scss'
8
+
9
+ state :root do
10
+ rule /\s+/, 'Text'
11
+ rule %r(//.*?\n), 'Comment.Single'
12
+ rule %r(/[*].*?[*]/)m, 'Comment.Multiline'
13
+ rule /@import\b/, 'Keyword', :value
14
+
15
+ mixin :content_common
16
+
17
+ rule(/(?=[^;{}][;}])/) { push :attribute }
18
+ rule(/(?=[^;{}:]+:[^a-z])/) { push :attribute }
19
+
20
+ rule(//) { push :selector }
21
+ end
22
+
23
+ state :end_section do
24
+ rule /\n/, 'Text'
25
+ rule(/[;{}]/) { token 'Punctuation'; reset_stack }
26
+ end
27
+
28
+ instance_eval(&Sass::COMMON)
29
+ end
30
+ end
31
+ end
@@ -7,8 +7,14 @@ module Rouge
7
7
  filenames '*.txt'
8
8
  mimetypes 'text/plain'
9
9
 
10
+ default_options :token => 'Text'
11
+
12
+ def token
13
+ @token ||= Token[option :token]
14
+ end
15
+
10
16
  def stream_tokens(stream, &b)
11
- yield Token['Text'], stream.string
17
+ yield self.token, stream.string
12
18
  end
13
19
  end
14
20
  end
@@ -2,9 +2,15 @@ module Rouge
2
2
  module Lexers
3
3
  class YAML < RegexLexer
4
4
  desc "Yaml Ain't Markup Language (yaml.org)"
5
+ mimetypes 'text/x-yaml'
5
6
  tag 'yaml'
6
7
  aliases 'yml'
7
8
 
9
+ def self.analyze_text(text)
10
+ # look for the %YAML directive
11
+ return 1 if text =~ /\A\s*%YAML/m
12
+ end
13
+
8
14
  filenames '*.yaml', '*.yml'
9
15
  # NB: Tabs are forbidden in YAML, which is why you see things
10
16
  # like /[ ]+/.
@@ -2,9 +2,6 @@
2
2
  #
3
3
  # require 'rouge/plugins/redcarpet'
4
4
 
5
- # this plugin depends on redcarpet
6
- require 'redcarpet'
7
-
8
5
  # stdlib
9
6
  require 'cgi'
10
7
 
@@ -12,33 +9,20 @@ module Rouge
12
9
  module Plugins
13
10
  module Redcarpet
14
11
  def block_code(code, language)
15
- name, opts = language ? language.split('?', 2) : [nil, '']
16
-
17
- # parse the options hash from a cgi-style string
18
- opts = CGI.parse(opts || '').map do |k, vals|
19
- [ k.to_sym, vals.empty? ? true : vals[0] ]
20
- end
21
-
22
- opts = Hash[opts]
23
-
24
- lexer_class = case name
25
- when 'guess', nil
26
- lexer = Lexer.guess(:source => code, :mimetype => opts[:mimetype])
27
- when String
28
- Lexer.find(name)
29
- end || Lexers::Text
12
+ lexer = Lexer.find_fancy(language, code) || Lexers::Text
30
13
 
31
14
  # XXX HACK: Redcarpet strips hard tabs out of code blocks,
32
15
  # so we assume you're not using leading spaces that aren't tabs,
33
16
  # and just replace them here.
34
- if lexer_class.tag == 'make'
17
+ if lexer.tag == 'make'
35
18
  code.gsub! /^ /, "\t"
36
19
  end
37
20
 
38
- lexer = lexer_class.new(opts)
39
- formatter = Formatters::HTML.new(:css_class => "highlight #{lexer_class.tag}")
21
+ formatter = Formatters::HTML.new(
22
+ :css_class => "highlight #{lexer.tag}"
23
+ )
40
24
 
41
- Rouge.highlight(code, lexer, formatter)
25
+ formatter.format(lexer.lex(code))
42
26
  end
43
27
  end
44
28
  end
@@ -76,12 +76,14 @@ module Rouge
76
76
  end
77
77
  end
78
78
 
79
- def get_style(token)
79
+ def get_own_style(token)
80
80
  token.ancestors do |anc|
81
81
  return styles[anc.name] if styles[anc.name]
82
82
  end
83
+ end
83
84
 
84
- styles['Text']
85
+ def get_style(token)
86
+ get_own_style(token) || style['Text']
85
87
  end
86
88
 
87
89
  def name(n=nil)
@@ -59,6 +59,7 @@ module Rouge
59
59
  style 'Name.Entity', :fg => '#999999', :bold => true
60
60
  style 'Text.Whitespace', :fg => '#BBBBBB'
61
61
  style 'Name.Function',
62
+ 'Name.Property',
62
63
  'Name.Attribute', :fg => :chilly
63
64
  style 'Name.Variable', :fg => :chilly, :bold => true
64
65
  end
@@ -70,4 +70,40 @@ module Rouge
70
70
  end
71
71
  alias << push
72
72
  end
73
+
74
+ # shared methods for some indentation-sensitive lexers
75
+ module Indentation
76
+ def reset!
77
+ super
78
+ @block_state = @block_indentation = nil
79
+ end
80
+
81
+ # push a state for the next indented block
82
+ def starts_block(block_state)
83
+ @block_state = block_state
84
+ @block_indentation = @last_indentation || ''
85
+ debug { " starts_block #{block_state.inspect}" }
86
+ debug { " block_indentation: #{@block_indentation.inspect}" }
87
+ end
88
+
89
+ # handle a single indented line
90
+ def indentation(indent_str)
91
+ debug { " indentation #{indent_str.inspect}" }
92
+ debug { " block_indentation: #{@block_indentation.inspect}" }
93
+ @last_indentation = indent_str
94
+
95
+ # if it's an indent and we know where to go next,
96
+ # push that state. otherwise, push content and
97
+ # clear the block state.
98
+ if (@block_state &&
99
+ indent_str.start_with?(@block_indentation) &&
100
+ indent_str != @block_indentation
101
+ )
102
+ push @block_state
103
+ else
104
+ @block_state = @block_indentation = nil
105
+ push :content
106
+ end
107
+ end
108
+ end
73
109
  end
@@ -1,5 +1,5 @@
1
1
  module Rouge
2
2
  def self.version
3
- "0.2.1"
3
+ "0.2.2"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rouge
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-11 00:00:00.000000000 Z
12
+ date: 2012-10-13 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: thor
@@ -42,11 +42,13 @@ files:
42
42
  - lib/rouge/lexers/common_lisp.rb
43
43
  - lib/rouge/lexers/erb.rb
44
44
  - lib/rouge/lexers/scheme.rb
45
+ - lib/rouge/lexers/clojure.rb
45
46
  - lib/rouge/lexers/xml.rb
46
47
  - lib/rouge/lexers/shell.rb
47
48
  - lib/rouge/lexers/haml.rb
48
49
  - lib/rouge/lexers/ruby.rb
49
50
  - lib/rouge/lexers/make.rb
51
+ - lib/rouge/lexers/scss.rb
50
52
  - lib/rouge/lexers/perl.rb
51
53
  - lib/rouge/lexers/javascript.rb
52
54
  - lib/rouge/lexers/diff.rb
@@ -66,6 +68,8 @@ files:
66
68
  - lib/rouge/lexers/cpp.rb
67
69
  - lib/rouge/lexers/factor.rb
68
70
  - lib/rouge/lexers/viml/keywords.rb
71
+ - lib/rouge/lexers/groovy.rb
72
+ - lib/rouge/lexers/sass.rb
69
73
  - lib/rouge/plugins/redcarpet.rb
70
74
  - lib/rouge/themes/thankful_eyes.rb
71
75
  - lib/rouge/themes/colorful.rb