rouge 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -8,7 +8,8 @@ gem 'wrong', '~> 0.6.2'
8
8
 
9
9
  gem 'rake'
10
10
 
11
- gem 'redcarpet'
11
+ # don't try to install redcarpet under jruby
12
+ gem 'redcarpet', :platforms => :ruby
12
13
 
13
14
  # for visual tests
14
15
  gem 'sinatra'
@@ -7,7 +7,7 @@ module Rouge
7
7
  lexer = Lexer.find(lexer) unless lexer.respond_to? :lex
8
8
  raise "unknown lexer #{lexer}" unless lexer
9
9
 
10
- formatter.render(lexer.lex(text))
10
+ formatter.format(lexer.lex(text))
11
11
  end
12
12
  end
13
13
  end
@@ -39,10 +39,13 @@ load load_dir.join('rouge/lexers/viml.rb')
39
39
  load load_dir.join('rouge/lexers/javascript.rb')
40
40
  load load_dir.join('rouge/lexers/css.rb')
41
41
  load load_dir.join('rouge/lexers/html.rb')
42
- load load_dir.join('rouge/lexers/haml.rb')
43
42
  load load_dir.join('rouge/lexers/xml.rb')
44
43
  load load_dir.join('rouge/lexers/php.rb')
45
44
 
45
+ load load_dir.join('rouge/lexers/haml.rb')
46
+ load load_dir.join('rouge/lexers/sass.rb')
47
+ load load_dir.join('rouge/lexers/scss.rb')
48
+
46
49
  load load_dir.join('rouge/lexers/erb.rb')
47
50
 
48
51
  load load_dir.join('rouge/lexers/tcl.rb')
@@ -50,6 +53,8 @@ load load_dir.join('rouge/lexers/python.rb')
50
53
  load load_dir.join('rouge/lexers/ruby.rb')
51
54
  load load_dir.join('rouge/lexers/perl.rb')
52
55
  load load_dir.join('rouge/lexers/factor.rb')
56
+ load load_dir.join('rouge/lexers/clojure.rb')
57
+ load load_dir.join('rouge/lexers/groovy.rb')
53
58
 
54
59
  load load_dir.join('rouge/lexers/haskell.rb')
55
60
  load load_dir.join('rouge/lexers/scheme.rb')
@@ -18,10 +18,15 @@ module Rouge
18
18
  end
19
19
 
20
20
  # Format a token stream.
21
- def render(tokens)
21
+ def format(tokens)
22
22
  enum_for(:stream, tokens).to_a.join
23
23
  end
24
24
 
25
+ def render(tokens)
26
+ warn 'Formatter#render is deprecated, use #format instead.'
27
+ format(tokens)
28
+ end
29
+
25
30
  # @abstract
26
31
  # yield strings that, when concatenated, form the formatted output
27
32
  def stream(tokens, &b)
@@ -146,12 +146,21 @@ module Rouge
146
146
  # private
147
147
  def escape_sequence(token)
148
148
  @escape_sequences ||= {}
149
- @escape_sequences[token.name] ||= begin
150
- esc = EscapeSequence.new(theme.get_style(token))
151
- # don't highlight text backgrounds
152
- esc.style.delete(:bg) if token.name == 'Text'
153
- esc
154
- end
149
+ @escape_sequences[token.name] ||=
150
+ EscapeSequence.new(get_style(token))
151
+ end
152
+
153
+ def get_style(token)
154
+ return text_style if token.name == 'Text'
155
+
156
+ theme.get_own_style(token) || text_style
157
+ end
158
+
159
+ def text_style
160
+ style = theme.get_style(Token['Text'])
161
+ # don't highlight text backgrounds
162
+ style.delete :bg
163
+ style
155
164
  end
156
165
  end
157
166
  end
@@ -25,6 +25,40 @@ module Rouge
25
25
  registry[name.to_s]
26
26
  end
27
27
 
28
+ # Find a lexer, with fancy shiny features.
29
+ #
30
+ # * The string you pass can include CGI-style options
31
+ #
32
+ # Lexer.find_fancy('erb?parent=tex')
33
+ #
34
+ # * You can pass the special name 'guess' so we guess for you,
35
+ # and you can pass a second argument of the code to guess by
36
+ #
37
+ # Lexer.find_fancy('guess', "#!/bin/bash\necho Hello, world")
38
+ #
39
+ # This is used in the Redcarpet plugin as well as Rouge's own
40
+ # markdown lexer for highlighting internal code blocks.
41
+ #
42
+ def find_fancy(str, code=nil)
43
+ name, opts = str ? str.split('?', 2) : [nil, '']
44
+
45
+ # parse the options hash from a cgi-style string
46
+ opts = CGI.parse(opts || '').map do |k, vals|
47
+ [ k.to_sym, vals.empty? ? true : vals[0] ]
48
+ end
49
+
50
+ opts = Hash[opts]
51
+
52
+ lexer_class = case name
53
+ when 'guess', nil
54
+ self.guess(:source => code, :mimetype => opts[:mimetype])
55
+ when String
56
+ self.find(name)
57
+ end
58
+
59
+ lexer_class && lexer_class.new(opts)
60
+ end
61
+
28
62
  # Specify or get this lexer's description.
29
63
  def desc(arg=:absent)
30
64
  if arg == :absent
@@ -75,12 +109,14 @@ module Rouge
75
109
  fname = File.basename(fname)
76
110
  registry.values.detect do |lexer|
77
111
  lexer.filenames.any? do |pattern|
78
- File.fnmatch?(pattern, fname)
112
+ File.fnmatch?(pattern, fname, File::FNM_DOTMATCH)
79
113
  end
80
114
  end
81
115
  end
82
116
 
83
117
  def guess_by_source(source)
118
+ assert_utf8!(source)
119
+
84
120
  source = TextAnalyzer.new(source)
85
121
 
86
122
  best_result = 0
@@ -155,6 +191,15 @@ module Rouge
155
191
  (@mimetypes ||= []).concat(mts)
156
192
  end
157
193
 
194
+ # @private
195
+ def assert_utf8!(str)
196
+ return if %w(US-ASCII UTF-8).include? str.encoding.name
197
+ raise EncodingError.new(
198
+ "Bad encoding: #{str.encoding.names.join(',')}. " +
199
+ "Please convert your string to UTF-8."
200
+ )
201
+ end
202
+
158
203
  private
159
204
  def registry
160
205
  @registry ||= {}
@@ -217,8 +262,11 @@ module Rouge
217
262
  def lex(string, opts={}, &b)
218
263
  return enum_for(:lex, string) unless block_given?
219
264
 
265
+ Lexer.assert_utf8!(string)
266
+
220
267
  reset! unless opts[:continue]
221
268
 
269
+ # consolidate consecutive tokens of the same type
222
270
  last_token = nil
223
271
  last_val = nil
224
272
  stream_tokens(StringScanner.new(string)) do |tok, val|
@@ -237,6 +285,11 @@ module Rouge
237
285
  b.call(last_token, last_val) if last_token
238
286
  end
239
287
 
288
+ # delegated to {Lexer.tag}
289
+ def tag
290
+ self.class.tag
291
+ end
292
+
240
293
  # @abstract
241
294
  #
242
295
  # Yield `[token, chunk]` pairs, given a prepared input stream. This
@@ -0,0 +1,108 @@
1
+ module Rouge
2
+ module Lexers
3
+ class Clojure < RegexLexer
4
+ desc "The Clojure programming language (clojure.org)"
5
+
6
+ tag 'clojure'
7
+ aliases 'clj'
8
+
9
+ filenames '*.clj'
10
+
11
+ mimetypes 'text/x-clojure', 'application/x-clojure'
12
+
13
+ def self.keywords
14
+ @keywords ||= Set.new %w(
15
+ fn def defn defmacro defmethod defmulti defn- defstruct if
16
+ cond let for
17
+ )
18
+ end
19
+
20
+ def self.builtins
21
+ @builtins ||= Set.new %w(
22
+ . .. * + - -> / < <= = == > >= accessor agent agent-errors
23
+ aget alength all-ns alter and append-child apply array-map
24
+ aset aset-boolean aset-byte aset-char aset-double aset-float
25
+ aset-int aset-long aset-short assert assoc await await-for bean
26
+ binding bit-and bit-not bit-or bit-shift-left bit-shift-right
27
+ bit-xor boolean branch? butlast byte cast char children
28
+ class clear-agent-errors comment commute comp comparator
29
+ complement concat conj cons constantly construct-proxy
30
+ contains? count create-ns create-struct cycle dec deref
31
+ difference disj dissoc distinct doall doc dorun doseq dosync
32
+ dotimes doto double down drop drop-while edit end? ensure eval
33
+ every? false? ffirst file-seq filter find find-doc find-ns
34
+ find-var first float flush fnseq frest gensym get-proxy-class
35
+ get hash-map hash-set identical? identity if-let import in-ns
36
+ inc index insert-child insert-left insert-right inspect-table
37
+ inspect-tree instance? int interleave intersection into
38
+ into-array iterate join key keys keyword keyword? last lazy-cat
39
+ lazy-cons left lefts line-seq list* list load load-file locking
40
+ long loop macroexpand macroexpand-1 make-array make-node map
41
+ map-invert map? mapcat max max-key memfn merge merge-with meta
42
+ min min-key name namespace neg? new newline next nil? node not
43
+ not-any? not-every? not= ns-imports ns-interns ns-map ns-name
44
+ ns-publics ns-refers ns-resolve ns-unmap nth nthrest or parse
45
+ partial path peek pop pos? pr pr-str print print-str println
46
+ println-str prn prn-str project proxy proxy-mappings quot
47
+ rand rand-int range re-find re-groups re-matcher re-matches
48
+ re-pattern re-seq read read-line reduce ref ref-set refer rem
49
+ remove remove-method remove-ns rename rename-keys repeat replace
50
+ replicate resolve rest resultset-seq reverse rfirst right
51
+ rights root rrest rseq second select select-keys send send-off
52
+ seq seq-zip seq? set short slurp some sort sort-by sorted-map
53
+ sorted-map-by sorted-set special-symbol? split-at split-with
54
+ str string? struct struct-map subs subvec symbol symbol?
55
+ sync take take-nth take-while test time to-array to-array-2d
56
+ tree-seq true? union up update-proxy val vals var-get var-set
57
+ var? vector vector-zip vector? when when-first when-let
58
+ when-not with-local-vars with-meta with-open with-out-str
59
+ xml-seq xml-zip zero? zipmap zipper'
60
+ )
61
+ end
62
+
63
+ identifier = %r([\w!$%*+,<=>?/.-]+)
64
+
65
+ def name_token(name)
66
+ return 'Keyword' if self.class.keywords.include?(name)
67
+ return 'Name.Builtin' if self.class.builtins.include?(name)
68
+ nil
69
+ end
70
+
71
+ state :root do
72
+ rule /;.*?\n/, 'Comment.Single'
73
+ rule /\s+/m, 'Text.Whitespace'
74
+
75
+ rule /-?\d+\.\d+/, 'Literal.Number.Float'
76
+ rule /-?\d+/, 'Literal.Number.Integer'
77
+ rule /0x-?[0-9a-fA-F]+/, 'Literal.Number.Hex'
78
+
79
+ rule /"(\\.|[^"])*"/, 'Literal.String'
80
+ rule /'#{identifier}/, 'Literal.String.Symbol'
81
+ rule /\\(.|[a-z]+)/i, 'Literal.String.Char'
82
+
83
+ rule /:#{identifier}/, 'Name.Constant'
84
+
85
+ rule /~@|[`\'#^~&]/, 'Operator'
86
+
87
+ rule /(\()(\s*)(#{identifier})/m do |m|
88
+ token 'Punctuation', m[1]
89
+ token 'Text.Whitespace', m[2]
90
+ token(name_token(m[3]) || 'Name.Function', m[3])
91
+ end
92
+
93
+ rule identifier do |m|
94
+ token name_token(m[0]) || 'Name.Variable'
95
+ end
96
+
97
+ # vectors
98
+ rule /[\[\]]/, 'Punctuation'
99
+
100
+ # maps
101
+ rule /[{}]/, 'Punctuation'
102
+
103
+ # parentheses
104
+ rule /[()]/, 'Punctuation'
105
+ end
106
+ end
107
+ end
108
+ end
@@ -9,93 +9,105 @@ module Rouge
9
9
 
10
10
  identifier = /[a-zA-Z0-9_-]+/
11
11
  number = /-?(?:[0-9]+(\.[0-9]+)?|\.[0-9]+)/
12
- keywords = %w(
13
- azimuth background-attachment background-color
14
- background-image background-position background-repeat
15
- background border-bottom-color border-bottom-style
16
- border-bottom-width border-left-color border-left-style
17
- border-left-width border-right border-right-color
18
- border-right-style border-right-width border-top-color
19
- border-top-style border-top-width border-bottom
20
- border-collapse border-left border-width border-color
21
- border-spacing border-style border-top border caption-side
22
- clear clip color content counter-increment counter-reset
23
- cue-after cue-before cue cursor direction display
24
- elevation empty-cells float font-family font-size
25
- font-size-adjust font-stretch font-style font-variant
26
- font-weight font height letter-spacing line-height
27
- list-style-type list-style-image list-style-position
28
- list-style margin-bottom margin-left margin-right
29
- margin-top margin marker-offset marks max-height max-width
30
- min-height min-width opacity orphans outline outline-color
31
- outline-style outline-width overflow(?:-x -y ) padding-bottom
32
- padding-left padding-right padding-top padding page
33
- page-break-after page-break-before page-break-inside
34
- pause-after pause-before pause pitch pitch-range
35
- play-during position quotes richness right size
36
- speak-header speak-numeral speak-punctuation speak
37
- speech-rate stress table-layout text-align text-decoration
38
- text-indent text-shadow text-transform top unicode-bidi
39
- vertical-align visibility voice-family volume white-space
40
- widows width word-spacing z-index bottom left
41
- above absolute always armenian aural auto avoid baseline
42
- behind below bidi-override blink block bold bolder both
43
- capitalize center-left center-right center circle
44
- cjk-ideographic close-quote collapse condensed continuous
45
- crop crosshair cross cursive dashed decimal-leading-zero
46
- decimal default digits disc dotted double e-resize embed
47
- extra-condensed extra-expanded expanded fantasy far-left
48
- far-right faster fast fixed georgian groove hebrew help
49
- hidden hide higher high hiragana-iroha hiragana icon
50
- inherit inline-table inline inset inside invert italic
51
- justify katakana-iroha katakana landscape larger large
52
- left-side leftwards level lighter line-through list-item
53
- loud lower-alpha lower-greek lower-roman lowercase ltr
54
- lower low medium message-box middle mix monospace
55
- n-resize narrower ne-resize no-close-quote no-open-quote
56
- no-repeat none normal nowrap nw-resize oblique once
57
- open-quote outset outside overline pointer portrait px
58
- relative repeat-x repeat-y repeat rgb ridge right-side
59
- rightwards s-resize sans-serif scroll se-resize
60
- semi-condensed semi-expanded separate serif show silent
61
- slow slower small-caps small-caption smaller soft solid
62
- spell-out square static status-bar super sw-resize
63
- table-caption table-cell table-column table-column-group
64
- table-footer-group table-header-group table-row
65
- table-row-group text text-bottom text-top thick thin
66
- transparent ultra-condensed ultra-expanded underline
67
- upper-alpha upper-latin upper-roman uppercase url
68
- visible w-resize wait wider x-fast x-high x-large x-loud
69
- x-low x-small x-soft xx-large xx-small yes
70
- )
71
-
72
- builtins = %w(
73
- indigo gold firebrick indianred yellow darkolivegreen
74
- darkseagreen mediumvioletred mediumorchid chartreuse
75
- mediumslateblue black springgreen crimson lightsalmon brown
76
- turquoise olivedrab cyan silver skyblue gray darkturquoise
77
- goldenrod darkgreen darkviolet darkgray lightpink teal
78
- darkmagenta lightgoldenrodyellow lavender yellowgreen thistle
79
- violet navy orchid blue ghostwhite honeydew cornflowerblue
80
- darkblue darkkhaki mediumpurple cornsilk red bisque slategray
81
- darkcyan khaki wheat deepskyblue darkred steelblue aliceblue
82
- gainsboro mediumturquoise floralwhite coral purple lightgrey
83
- lightcyan darksalmon beige azure lightsteelblue oldlace
84
- greenyellow royalblue lightseagreen mistyrose sienna
85
- lightcoral orangered navajowhite lime palegreen burlywood
86
- seashell mediumspringgreen fuchsia papayawhip blanchedalmond
87
- peru aquamarine white darkslategray ivory dodgerblue
88
- lemonchiffon chocolate orange forestgreen slateblue olive
89
- mintcream antiquewhite darkorange cadetblue moccasin
90
- limegreen saddlebrown darkslateblue lightskyblue deeppink
91
- plum aqua darkgoldenrod maroon sandybrown magenta tan
92
- rosybrown pink lightblue palevioletred mediumseagreen
93
- dimgray powderblue seagreen snow mediumblue midnightblue
94
- paleturquoise palegoldenrod whitesmoke darkorchid salmon
95
- lightslategray lawngreen lightgreen tomato hotpink
96
- lightyellow lavenderblush linen mediumaquamarine green
97
- blueviolet peachpuff
98
- )
12
+
13
+ def self.attributes
14
+ @attributes ||= Set.new %w(
15
+ azimuth background background-attachment background-color
16
+ background-image background-position background-repeat
17
+ border border-bottom border-bottom-color border-bottom-style
18
+ border-bottom-width border-collapse border-color border-left
19
+ border-left-color border-left-style border-left-width
20
+ border-right border-right-color border-right-style
21
+ border-right-width border-spacing border-style border-top
22
+ border-top-color border-top-style border-top-width
23
+ border-width bottom caption-side clear clip color content
24
+ counter-increment counter-reset cue cue-after cue-before cursor
25
+ direction display elevation empty-cells float font font-family
26
+ font-size font-size-adjust font-stretch font-style font-variant
27
+ font-weight height left letter-spacing line-height list-style
28
+ list-style-image list-style-position list-style-type margin
29
+ margin-bottom margin-left margin-right margin-top marker-offset
30
+ marks max-height max-width min-height min-width opacity orphans
31
+ outline outline-color outline-style outline-width overflow-x
32
+ overflow-y padding padding-bottom padding-left padding-right
33
+ padding-top page page-break-after page-break-before
34
+ page-break-inside pause pause-after pause-before pitch
35
+ pitch-range play-during position quotes richness right size
36
+ speak speak-header speak-numeral speak-punctuation speech-rate
37
+ src stress table-layout text-align text-decoration text-indent
38
+ text-shadow text-transform top unicode-bidi vertical-align
39
+ visibility voice-family volume white-space widows width
40
+ word-spacing z-index
41
+ )
42
+ end
43
+
44
+ def self.builtins
45
+ @builtins ||= Set.new %w(
46
+ above absolute always armenian aural auto avoid left bottom
47
+ baseline behind below bidi-override blink block bold bolder
48
+ both bottom capitalize center center-left center-right circle
49
+ cjk-ideographic close-quote collapse condensed continuous crop
50
+ cross crosshair cursive dashed decimal decimal-leading-zero
51
+ default digits disc dotted double e-resize embed expanded
52
+ extra-condensed extra-expanded fantasy far-left far-right fast
53
+ faster fixed georgian groove hebrew help hidden hide high higher
54
+ hiragana hiragana-iroha icon inherit inline inline-table inset
55
+ inside invert italic justify katakana katakana-iroha landscape
56
+ large larger left left-side leftwards level lighter line-through
57
+ list-item loud low lower lower-alpha lower-greek lower-roman
58
+ lowercase ltr medium message-box middle mix monospace n-resize
59
+ narrower ne-resize no-close-quote no-open-quote no-repeat none
60
+ normal nowrap nw-resize oblique once open-quote outset outside
61
+ overline pointer portrait px relative repeat repeat-x repeat-y
62
+ rgb ridge right right-side rightwards s-resize sans-serif scroll
63
+ se-resize semi-condensed semi-expanded separate serif show
64
+ silent slow slower small-caps small-caption smaller soft solid
65
+ spell-out square static status-bar super sw-resize table-caption
66
+ table-cell table-column table-column-group table-footer-group
67
+ table-header-group table-row table-row-group text text-bottom
68
+ text-top thick thin top transparent ultra-condensed
69
+ ultra-expanded underline upper-alpha upper-latin upper-roman
70
+ uppercase url visible w-resize wait wider x-fast x-high x-large
71
+ x-loud x-low x-small x-soft xx-large xx-small yes
72
+ )
73
+ end
74
+
75
+ def self.constants
76
+ @constants ||= Set.new %w(
77
+ indigo gold firebrick indianred yellow darkolivegreen
78
+ darkseagreen mediumvioletred mediumorchid chartreuse
79
+ mediumslateblue black springgreen crimson lightsalmon brown
80
+ turquoise olivedrab cyan silver skyblue gray darkturquoise
81
+ goldenrod darkgreen darkviolet darkgray lightpink teal
82
+ darkmagenta lightgoldenrodyellow lavender yellowgreen thistle
83
+ violet navy orchid blue ghostwhite honeydew cornflowerblue
84
+ darkblue darkkhaki mediumpurple cornsilk red bisque slategray
85
+ darkcyan khaki wheat deepskyblue darkred steelblue aliceblue
86
+ gainsboro mediumturquoise floralwhite coral purple lightgrey
87
+ lightcyan darksalmon beige azure lightsteelblue oldlace
88
+ greenyellow royalblue lightseagreen mistyrose sienna lightcoral
89
+ orangered navajowhite lime palegreen burlywood seashell
90
+ mediumspringgreen fuchsia papayawhip blanchedalmond peru
91
+ aquamarine white darkslategray ivory dodgerblue lemonchiffon
92
+ chocolate orange forestgreen slateblue olive mintcream
93
+ antiquewhite darkorange cadetblue moccasin limegreen saddlebrown
94
+ darkslateblue lightskyblue deeppink plum aqua darkgoldenrod
95
+ maroon sandybrown magenta tan rosybrown pink lightblue
96
+ palevioletred mediumseagreen dimgray powderblue seagreen snow
97
+ mediumblue midnightblue paleturquoise palegoldenrod whitesmoke
98
+ darkorchid salmon lightslategray lawngreen lightgreen tomato
99
+ hotpink lightyellow lavenderblush linen mediumaquamarine green
100
+ blueviolet peachpuff
101
+ )
102
+ end
103
+
104
+ # source: http://www.w3.org/TR/CSS21/syndata.html#vendor-keyword-history
105
+ def self.vendor_prefixes
106
+ @vendor_prefixes ||= Set.new %w(
107
+ -ah- -atsc- -hp- -khtml- -moz- -ms- -o- -rim- -ro- -tc- -wap-
108
+ -webkit- -xv- mso- prince-
109
+ )
110
+ end
99
111
 
100
112
  state :root do
101
113
  mixin :basics
@@ -116,7 +128,15 @@ module Rouge
116
128
  rule /[\[\]():\/.]/, 'Punctuation'
117
129
  rule /"(\\\\|\\"|[^"])*"/, 'Literal.String.Single'
118
130
  rule /'(\\\\|\\'|[^'])*'/, 'Literal.String.Double'
119
- rule identifier, 'Name'
131
+ rule(identifier) do |m|
132
+ if self.class.constants.include? m[0]
133
+ token 'Name.Constant'
134
+ elsif self.class.builtins.include? m[0]
135
+ token 'Name.Builtin'
136
+ else
137
+ token 'Name'
138
+ end
139
+ end
120
140
  end
121
141
 
122
142
  state :at_rule do
@@ -151,15 +171,18 @@ module Rouge
151
171
  state :stanza do
152
172
  mixin :basics
153
173
  rule /}/, 'Punctuation', :pop!
154
- rule /(#{identifier}\s*):/m do |m|
155
- if keywords.include? m[1]
156
- token 'Keyword'
157
- elsif builtins.include? m[1]
158
- token 'Name.Builtin'
174
+ rule /(#{identifier})(\s*)(:)/m do |m|
175
+ if self.class.attributes.include? m[1]
176
+ group 'Name.Label'
177
+ elsif self.class.vendor_prefixes.any? { |p| m[1].start_with?(p) }
178
+ group 'Name.Label'
159
179
  else
160
- token 'Name'
180
+ group 'Name.Property'
161
181
  end
162
182
 
183
+ group 'Text'
184
+ group 'Punctuation'
185
+
163
186
  push :stanza_value
164
187
  end
165
188
  end
@@ -0,0 +1,102 @@
1
+ module Rouge
2
+ module Lexers
3
+ class Groovy < RegexLexer
4
+ desc 'The Groovy programming language (groovy.codehaus.org)'
5
+ tag 'groovy'
6
+ filenames '*.groovy'
7
+ mimetypes 'text/x-groovy'
8
+
9
+ ws = %r((?:\s|//.*?\n|/[*].*?[*]/)+)
10
+
11
+ def self.keywords
12
+ @keywords ||= Set.new %w(
13
+ assert break case catch continue default do else finally for
14
+ if goto instanceof new return switch this throw try while in as
15
+ )
16
+ end
17
+
18
+ def self.declarations
19
+ @declarations ||= Set.new %w(
20
+ abstract const enum extends final implements native private
21
+ protected public static strictfp super synchronized throws
22
+ transient volatile
23
+ )
24
+ end
25
+
26
+ def self.types
27
+ @types ||= Set.new %w(
28
+ def boolean byte char double float int long short void
29
+ )
30
+ end
31
+
32
+ def self.constants
33
+ @constants ||= Set.new %w(true false null)
34
+ end
35
+
36
+ state :root do
37
+ rule %r(^
38
+ (\s*(?:\w[\w\d.\[\]]*\s+)+?) # return arguments
39
+ (\w[\w\d]*) # method name
40
+ (\s*) (\() # signature start
41
+ )x do |m|
42
+ delegate self.clone, m[1]
43
+ token 'Name.Function', m[2]
44
+ token 'Text', m[3]
45
+ token 'Operator', m[4]
46
+ end
47
+
48
+ # whitespace
49
+ rule /[^\S\n]+/, 'Text'
50
+ rule %r(//.*?\n), 'Comment.Single'
51
+ rule %r(/[*].*?[*]/), 'Comment.Multiline'
52
+ rule /@\w[\w\d.]*/, 'Name.Decorator'
53
+ rule /(class|interface)\b/, 'Keyword.Declaration', :class
54
+ rule /package\b/, 'Keyword.Namespace', :import
55
+ rule /import\b/, 'Keyword.Namespace', :import
56
+
57
+ rule /"(\\\\|\\"|[^"])*"/, 'Literal.String.Double'
58
+ rule /'(\\\\|\\'|[^'])*'/, 'Literal.String.Single'
59
+ rule %r(\$/((?!/\$).)*/\$), 'Literal.String'
60
+ rule %r(/(\\\\|\\"|[^/])*/), 'Literal.String'
61
+ rule /'\\.'|'[^\\]'|'\\u[0-9a-f]{4}'/, 'Literal.String.Char'
62
+ rule /(\.)([a-zA-Z_][a-zA-Z0-9_]*)/ do
63
+ group 'Operator'
64
+ group 'Name.Attribute'
65
+ end
66
+
67
+ rule /[a-zA-Z_][a-zA-Z0-9_]*:/, 'Name.Label'
68
+ rule /[a-zA-Z_\$][a-zA-Z0-9_]*/ do |m|
69
+ if self.class.keywords.include? m[0]
70
+ token 'Keyword'
71
+ elsif self.class.declarations.include? m[0]
72
+ token 'Keyword.Declaration'
73
+ elsif self.class.types.include? m[0]
74
+ token 'Keyword.Type'
75
+ elsif self.class.constants.include? m[0]
76
+ token 'Keyword.Constant'
77
+ else
78
+ token 'Name'
79
+ end
80
+ end
81
+
82
+ rule %r([~^*!%&\[\](){}<>\|+=:;,./?-]), 'Operator'
83
+
84
+ # numbers
85
+ rule /\d+\.\d+([eE]\d+)?[fd]?/, 'Literal.Number.Float'
86
+ rule /0x[0-9a-f]+/, 'Literal.Number.Hex'
87
+ rule /[0-9]+L?/, 'Literal.Number.Integer'
88
+ rule /\n/, 'Text'
89
+ end
90
+
91
+ state :class do
92
+ rule /\s+/, 'Text'
93
+ rule /\w[\w\d]*/, 'Name.Class', :pop!
94
+ end
95
+
96
+ state :import do
97
+ rule /\s+/, 'Text'
98
+ rule /[\w\d.]+[*]?/, 'Name.Namespace', :pop!
99
+ end
100
+ end
101
+ end
102
+ end
@@ -3,6 +3,8 @@ module Rouge
3
3
  # A lexer for the Haml templating system for Ruby.
4
4
  # @see http://haml.info
5
5
  class Haml < RegexLexer
6
+ include Indentation
7
+
6
8
  desc "The Haml templating system for Ruby (haml.info)"
7
9
 
8
10
  tag 'haml'
@@ -56,33 +58,6 @@ module Rouge
56
58
 
57
59
  start { ruby.reset!; html.reset! }
58
60
 
59
- # push a state for the next indented block
60
- def starts_block(block_state)
61
- @block_state = block_state
62
- @block_indentation = @last_indentation || ''
63
- debug { " haml: starts_block #{block_state.inspect}" }
64
- debug { " haml: block_indentation: #{@block_indentation.inspect}" }
65
- end
66
-
67
- def indentation(indent_str)
68
- debug { " haml: indentation #{indent_str.inspect}" }
69
- debug { " haml: block_indentation: #{@block_indentation.inspect}" }
70
- @last_indentation = indent_str
71
-
72
- # if it's an indent and we know where to go next,
73
- # push that state. otherwise, push content and
74
- # clear the block state.
75
- if (@block_state &&
76
- indent_str.start_with?(@block_indentation) &&
77
- indent_str != @block_indentation
78
- )
79
- push @block_state
80
- else
81
- @block_state = @block_indentation = nil
82
- push :content
83
- end
84
- end
85
-
86
61
  identifier = /[\w:-]+/
87
62
  ruby_var = /[a-z]\w*/
88
63
 
@@ -30,29 +30,13 @@ module Rouge
30
30
 
31
31
  # TODO: syntax highlight the code block, github style
32
32
  rule /(\n[ \t]*)(```|~~~)(.*?)(\n.*?)(\2)/m do |m|
33
- sublexer, opts = m[3].strip.split('?', 2)
34
-
35
- if sublexer
36
- sublexer = Lexer.find(sublexer)
37
-
38
- # parse the options hash from a cgi-style string
39
- opts = CGI.parse(opts || '').map do |k, vals|
40
- [ k.to_sym, vals.empty? ? true : vals[0] ]
41
- end
42
-
43
- opts = Hash[opts]
44
-
45
- sublexer &&= sublexer.new(opts)
46
- end
33
+ sublexer = Lexer.find_fancy(m[3].strip, m[4])
34
+ sublexer ||= Text.new(:token => 'Literal.String.Backtick')
47
35
 
48
36
  token 'Text', m[1]
49
37
  token 'Punctuation', m[2]
50
38
  token 'Name.Label', m[3]
51
- if sublexer
52
- delegate sublexer, m[4]
53
- else
54
- token 'Literal.String.Backtick', m[4]
55
- end
39
+ delegate sublexer, m[4]
56
40
  token 'Punctuation', m[5]
57
41
  end
58
42
 
@@ -0,0 +1,228 @@
1
+ module Rouge
2
+ module Lexers
3
+ class Sass < RegexLexer
4
+ include Indentation
5
+
6
+ tag 'sass'
7
+ filenames '*.sass'
8
+ mimetypes 'text/x-sass'
9
+
10
+ id = /[\w-]+/
11
+
12
+ state :root do
13
+ rule /[ \t]*\n/, 'Text'
14
+ rule(/[ \t]*/) { |m| token 'Text'; indentation(m[0]) }
15
+ end
16
+
17
+ state :content do
18
+ # block comments
19
+ rule %r(//.*?\n) do
20
+ token 'Comment.Single'
21
+ pop!; starts_block :single_comment
22
+ end
23
+
24
+ rule %r(/[*].*?\n) do
25
+ token 'Comment.Multiline'
26
+ pop!; starts_block :multi_comment
27
+ end
28
+
29
+ rule /@import\b/, 'Keyword', :import
30
+
31
+ mixin :content_common
32
+
33
+ rule %r(=#{id}), 'Name.Function', :value
34
+ rule %r([+]#{id}), 'Name.Decorator', :value
35
+
36
+ rule /:/, 'Name.Attribute', :old_style_attr
37
+
38
+ rule(/(?=.+?:(^a-z|$))/) { push :attribute }
39
+
40
+ rule(//) { push :selector }
41
+ end
42
+
43
+ state :single_comment do
44
+ rule /.*?\n/, 'Comment.Single', :pop!
45
+ end
46
+
47
+ state :multi_comment do
48
+ rule /.*?\n/, 'Comment.Multiline', :pop!
49
+ end
50
+
51
+ state :import do
52
+ rule /[ \t]+/, 'Text'
53
+ rule /\S+/, 'Literal.String'
54
+ rule /\n/, 'Text', :pop!
55
+ end
56
+
57
+ state :old_style_attr do
58
+ mixin :attr_common
59
+ rule(//) { pop!; push :value }
60
+ end
61
+
62
+ state :end_section do
63
+ rule(/\n/) { token 'Text'; reset_stack }
64
+ end
65
+
66
+ # shared states with SCSS
67
+ # TODO: make this less nasty to do
68
+ COMMON = proc do
69
+ state :content_common do
70
+ rule /@for\b/, 'Keyword', :for
71
+ rule /@(debug|warn|if|while)/, 'Keyword', :value
72
+ rule /(@mixin)(\s+)(#{id})/ do
73
+ group 'Keyword'; group 'Text'; group 'Name.Function'
74
+ push :value
75
+ end
76
+
77
+ rule /(@include)(\s+)(#{id})/ do
78
+ group 'Keyword'; group 'Text'; group 'Name.Decorator'
79
+ push :value
80
+ end
81
+
82
+ rule /@#{id}/, 'Keyword', :selector
83
+
84
+ # $variable: assignment
85
+ rule /([$]#{id})([ \t]*)(:)/ do
86
+ group 'Name.Variable'; group 'Text'; group 'Punctuation'
87
+ push :value
88
+ end
89
+ end
90
+
91
+ state :value do
92
+ mixin :end_section
93
+ rule /[ \t]+/, 'Text'
94
+ rule /[$]#{id}/, 'Name.Variable'
95
+ rule /url[(]/, 'Literal.String.Other', :string_url
96
+ rule /#{id}(?=\s*[(])/, 'Name.Function'
97
+
98
+ # named literals
99
+ rule /(true|false)\b/, 'Name.Pseudo'
100
+ rule /(and|or|not)\b/, 'Operator.Word'
101
+
102
+ # colors and numbers
103
+ rule /#[a-z0-9]{1,6}/i, 'Literal.Number.Hex'
104
+ rule /-?\d+(%|[a-z]+)?/, 'Literal.Number'
105
+ rule /-?\d*\.\d+(%|[a-z]+)?/, 'Literal.Number.Integer'
106
+
107
+ mixin :has_strings
108
+ mixin :has_interp
109
+
110
+ rule /[~^*!&%<>\|+=@:,.\/?-]+/, 'Operator'
111
+ rule /[\[\]()]+/, 'Punctuation'
112
+ rule %r(/[*]), 'Comment.Multiline', :inline_comment
113
+ rule %r(//[^\n]*), 'Comment.Single'
114
+
115
+ # identifiers
116
+ rule(id) do |m|
117
+ if CSS.builtins.include? m[0]
118
+ token 'Name.Builtin'
119
+ elsif CSS.constants.include? m[0]
120
+ token 'Name.Constant'
121
+ else
122
+ token 'Name'
123
+ end
124
+ end
125
+ end
126
+
127
+ state :has_interp do
128
+ rule /[#][{]/, 'Literal.String.Interpol', :interpolation
129
+ end
130
+
131
+ state :has_strings do
132
+ rule /"/, 'Literal.String.Double', :dq
133
+ rule /'/, 'Literal.String.Single', :sq
134
+ end
135
+
136
+ state :interpolation do
137
+ rule /}/, 'Literal.String.Interpol', :pop!
138
+ mixin :value
139
+ end
140
+
141
+ state :selector do
142
+ mixin :end_section
143
+
144
+ mixin :has_strings
145
+ mixin :has_interp
146
+ rule /[ \t]+/, 'Text'
147
+ rule /:/, 'Name.Decorator', :pseudo_class
148
+ rule /[.]/, 'Name.Class', :class
149
+ rule /#/, 'Name.Namespace', :id
150
+ rule id, 'Name.Tag'
151
+ rule /&/, 'Keyword'
152
+ rule /[~^*!&\[\]()<>\|+=@:;,.\/?-]/, 'Operator'
153
+ end
154
+
155
+ state :dq do
156
+ rule /"/, 'Literal.String.Double', :pop!
157
+ mixin :has_interp
158
+ rule /(\\.|#(?![{])|[^\n"#])+/, 'Literal.String.Double'
159
+ end
160
+
161
+ state :sq do
162
+ rule /'/, 'Literal.String.Single', :pop!
163
+ mixin :has_interp
164
+ rule /(\\.|#(?![{])|[^\n'#])+/, 'Literal.String.Single'
165
+ end
166
+
167
+ state :string_url do
168
+ rule /[)]/, 'Literal.String.Other', :pop!
169
+ rule /(\\.|#(?![{])|[^\n)#])+/, 'Literal.String.Other'
170
+ mixin :has_interp
171
+ end
172
+
173
+ state :selector_piece do
174
+ mixin :has_interp
175
+ rule(//) { pop! }
176
+ end
177
+
178
+ state :pseudo_class do
179
+ rule id, 'Name.Decorator'
180
+ mixin :selector_piece
181
+ end
182
+
183
+ state :class do
184
+ rule id, 'Name.Class'
185
+ mixin :selector_piece
186
+ end
187
+
188
+ state :id do
189
+ rule id, 'Name.Namespace'
190
+ mixin :selector_piece
191
+ end
192
+
193
+ state :for do
194
+ rule /(from|to|through)/, 'Operator.Word'
195
+ mixin :value
196
+ end
197
+
198
+ state :attr_common do
199
+ mixin :has_interp
200
+ rule id do |m|
201
+ if CSS.attributes.include? m[0]
202
+ token 'Name.Label'
203
+ else
204
+ token 'Name.Attribute'
205
+ end
206
+ end
207
+ end
208
+
209
+ state :attribute do
210
+ mixin :attr_common
211
+
212
+ rule /([ \t]*)(:)/ do
213
+ group 'Text'; group 'Punctuation'
214
+ push :value
215
+ end
216
+ end
217
+
218
+ state :inline_comment do
219
+ rule /(\\#|#(?=[^\n{])|\*(?=[^\n\/])|[^\n#*])+/, 'Comment.Multiline'
220
+ mixin :has_interp
221
+ rule %r([*]/), 'Comment.Multiline', :pop!
222
+ end
223
+ end
224
+
225
+ class_eval(&COMMON)
226
+ end
227
+ end
228
+ end
@@ -0,0 +1,31 @@
1
+ module Rouge
2
+ module Lexers
3
+ class Scss < RegexLexer
4
+ desc "SCSS stylesheets (sass-lang.com)"
5
+ tag 'scss'
6
+ filenames '*.scss'
7
+ mimetypes 'text/x-scss'
8
+
9
+ state :root do
10
+ rule /\s+/, 'Text'
11
+ rule %r(//.*?\n), 'Comment.Single'
12
+ rule %r(/[*].*?[*]/)m, 'Comment.Multiline'
13
+ rule /@import\b/, 'Keyword', :value
14
+
15
+ mixin :content_common
16
+
17
+ rule(/(?=[^;{}][;}])/) { push :attribute }
18
+ rule(/(?=[^;{}:]+:[^a-z])/) { push :attribute }
19
+
20
+ rule(//) { push :selector }
21
+ end
22
+
23
+ state :end_section do
24
+ rule /\n/, 'Text'
25
+ rule(/[;{}]/) { token 'Punctuation'; reset_stack }
26
+ end
27
+
28
+ instance_eval(&Sass::COMMON)
29
+ end
30
+ end
31
+ end
@@ -7,8 +7,14 @@ module Rouge
7
7
  filenames '*.txt'
8
8
  mimetypes 'text/plain'
9
9
 
10
+ default_options :token => 'Text'
11
+
12
+ def token
13
+ @token ||= Token[option :token]
14
+ end
15
+
10
16
  def stream_tokens(stream, &b)
11
- yield Token['Text'], stream.string
17
+ yield self.token, stream.string
12
18
  end
13
19
  end
14
20
  end
@@ -2,9 +2,15 @@ module Rouge
2
2
  module Lexers
3
3
  class YAML < RegexLexer
4
4
  desc "Yaml Ain't Markup Language (yaml.org)"
5
+ mimetypes 'text/x-yaml'
5
6
  tag 'yaml'
6
7
  aliases 'yml'
7
8
 
9
+ def self.analyze_text(text)
10
+ # look for the %YAML directive
11
+ return 1 if text =~ /\A\s*%YAML/m
12
+ end
13
+
8
14
  filenames '*.yaml', '*.yml'
9
15
  # NB: Tabs are forbidden in YAML, which is why you see things
10
16
  # like /[ ]+/.
@@ -2,9 +2,6 @@
2
2
  #
3
3
  # require 'rouge/plugins/redcarpet'
4
4
 
5
- # this plugin depends on redcarpet
6
- require 'redcarpet'
7
-
8
5
  # stdlib
9
6
  require 'cgi'
10
7
 
@@ -12,33 +9,20 @@ module Rouge
12
9
  module Plugins
13
10
  module Redcarpet
14
11
  def block_code(code, language)
15
- name, opts = language ? language.split('?', 2) : [nil, '']
16
-
17
- # parse the options hash from a cgi-style string
18
- opts = CGI.parse(opts || '').map do |k, vals|
19
- [ k.to_sym, vals.empty? ? true : vals[0] ]
20
- end
21
-
22
- opts = Hash[opts]
23
-
24
- lexer_class = case name
25
- when 'guess', nil
26
- lexer = Lexer.guess(:source => code, :mimetype => opts[:mimetype])
27
- when String
28
- Lexer.find(name)
29
- end || Lexers::Text
12
+ lexer = Lexer.find_fancy(language, code) || Lexers::Text
30
13
 
31
14
  # XXX HACK: Redcarpet strips hard tabs out of code blocks,
32
15
  # so we assume you're not using leading spaces that aren't tabs,
33
16
  # and just replace them here.
34
- if lexer_class.tag == 'make'
17
+ if lexer.tag == 'make'
35
18
  code.gsub! /^ /, "\t"
36
19
  end
37
20
 
38
- lexer = lexer_class.new(opts)
39
- formatter = Formatters::HTML.new(:css_class => "highlight #{lexer_class.tag}")
21
+ formatter = Formatters::HTML.new(
22
+ :css_class => "highlight #{lexer.tag}"
23
+ )
40
24
 
41
- Rouge.highlight(code, lexer, formatter)
25
+ formatter.format(lexer.lex(code))
42
26
  end
43
27
  end
44
28
  end
@@ -76,12 +76,14 @@ module Rouge
76
76
  end
77
77
  end
78
78
 
79
- def get_style(token)
79
+ def get_own_style(token)
80
80
  token.ancestors do |anc|
81
81
  return styles[anc.name] if styles[anc.name]
82
82
  end
83
+ end
83
84
 
84
- styles['Text']
85
+ def get_style(token)
86
+ get_own_style(token) || style['Text']
85
87
  end
86
88
 
87
89
  def name(n=nil)
@@ -59,6 +59,7 @@ module Rouge
59
59
  style 'Name.Entity', :fg => '#999999', :bold => true
60
60
  style 'Text.Whitespace', :fg => '#BBBBBB'
61
61
  style 'Name.Function',
62
+ 'Name.Property',
62
63
  'Name.Attribute', :fg => :chilly
63
64
  style 'Name.Variable', :fg => :chilly, :bold => true
64
65
  end
@@ -70,4 +70,40 @@ module Rouge
70
70
  end
71
71
  alias << push
72
72
  end
73
+
74
+ # shared methods for some indentation-sensitive lexers
75
+ module Indentation
76
+ def reset!
77
+ super
78
+ @block_state = @block_indentation = nil
79
+ end
80
+
81
+ # push a state for the next indented block
82
+ def starts_block(block_state)
83
+ @block_state = block_state
84
+ @block_indentation = @last_indentation || ''
85
+ debug { " starts_block #{block_state.inspect}" }
86
+ debug { " block_indentation: #{@block_indentation.inspect}" }
87
+ end
88
+
89
+ # handle a single indented line
90
+ def indentation(indent_str)
91
+ debug { " indentation #{indent_str.inspect}" }
92
+ debug { " block_indentation: #{@block_indentation.inspect}" }
93
+ @last_indentation = indent_str
94
+
95
+ # if it's an indent and we know where to go next,
96
+ # push that state. otherwise, push content and
97
+ # clear the block state.
98
+ if (@block_state &&
99
+ indent_str.start_with?(@block_indentation) &&
100
+ indent_str != @block_indentation
101
+ )
102
+ push @block_state
103
+ else
104
+ @block_state = @block_indentation = nil
105
+ push :content
106
+ end
107
+ end
108
+ end
73
109
  end
@@ -1,5 +1,5 @@
1
1
  module Rouge
2
2
  def self.version
3
- "0.2.1"
3
+ "0.2.2"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rouge
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.1
4
+ version: 0.2.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-11 00:00:00.000000000 Z
12
+ date: 2012-10-13 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: thor
@@ -42,11 +42,13 @@ files:
42
42
  - lib/rouge/lexers/common_lisp.rb
43
43
  - lib/rouge/lexers/erb.rb
44
44
  - lib/rouge/lexers/scheme.rb
45
+ - lib/rouge/lexers/clojure.rb
45
46
  - lib/rouge/lexers/xml.rb
46
47
  - lib/rouge/lexers/shell.rb
47
48
  - lib/rouge/lexers/haml.rb
48
49
  - lib/rouge/lexers/ruby.rb
49
50
  - lib/rouge/lexers/make.rb
51
+ - lib/rouge/lexers/scss.rb
50
52
  - lib/rouge/lexers/perl.rb
51
53
  - lib/rouge/lexers/javascript.rb
52
54
  - lib/rouge/lexers/diff.rb
@@ -66,6 +68,8 @@ files:
66
68
  - lib/rouge/lexers/cpp.rb
67
69
  - lib/rouge/lexers/factor.rb
68
70
  - lib/rouge/lexers/viml/keywords.rb
71
+ - lib/rouge/lexers/groovy.rb
72
+ - lib/rouge/lexers/sass.rb
69
73
  - lib/rouge/plugins/redcarpet.rb
70
74
  - lib/rouge/themes/thankful_eyes.rb
71
75
  - lib/rouge/themes/colorful.rb