syntax 0.5.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/data/ruby.css +18 -0
- data/data/xml.css +8 -0
- data/data/yaml.css +12 -0
- data/lib/syntax.rb +8 -1
- data/lib/syntax/common.rb +30 -6
- data/lib/syntax/convertors/abstract.rb +24 -0
- data/lib/syntax/convertors/html.rb +18 -17
- data/lib/syntax/{ruby.rb → lang/ruby.rb} +84 -19
- data/lib/syntax/{xml.rb → lang/xml.rb} +0 -0
- data/lib/syntax/{yaml.rb → lang/yaml.rb} +0 -0
- data/lib/syntax/version.rb +1 -1
- data/test/syntax/tc_ruby.rb +500 -352
- data/test/syntax/tc_xml.rb +2 -2
- data/test/syntax/tc_yaml.rb +2 -2
- data/test/tc_syntax.rb +22 -0
- metadata +14 -8
data/data/ruby.css
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
.ruby .normal {}
|
2
|
+
.ruby .comment { color: #005; font-style: italic; }
|
3
|
+
.ruby .keyword { color: #A00; font-weight: bold; }
|
4
|
+
.ruby .method { color: #077; }
|
5
|
+
.ruby .class { color: #074; }
|
6
|
+
.ruby .module { color: #050; }
|
7
|
+
.ruby .punct { color: #447; font-weight: bold; }
|
8
|
+
.ruby .symbol { color: #099; }
|
9
|
+
.ruby .string { color: #944; background: #FFE; }
|
10
|
+
.ruby .char { color: #F07; }
|
11
|
+
.ruby .ident { color: #004; }
|
12
|
+
.ruby .constant { color: #07F; }
|
13
|
+
.ruby .regex { color: #B66; background: #FEF; }
|
14
|
+
.ruby .number { color: #F99; }
|
15
|
+
.ruby .attribute { color: #7BB; }
|
16
|
+
.ruby .global { color: #7FB; }
|
17
|
+
.ruby .expr { color: #227; }
|
18
|
+
.ruby .escape { color: #277; }
|
data/data/xml.css
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
.xml .normal {}
|
2
|
+
.xml .namespace { color: #B66; font-weight: bold; }
|
3
|
+
.xml .tag { color: #F88; }
|
4
|
+
.xml .comment { color: #005; font-style: italic; }
|
5
|
+
.xml .punct { color: #447; font-weight: bold; }
|
6
|
+
.xml .string { color: #944; }
|
7
|
+
.xml .number { color: #F99; }
|
8
|
+
.xml .attribute { color: #BB7; }
|
data/data/yaml.css
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
.yaml .normal {}
|
2
|
+
.yaml .document { font-weight: bold; color: #07F; }
|
3
|
+
.yaml .type { font-weight: bold; color: #05C; }
|
4
|
+
.yaml .key { color: #F88; }
|
5
|
+
.yaml .comment { color: #005; font-style: italic; }
|
6
|
+
.yaml .punct { color: #447; font-weight: bold; }
|
7
|
+
.yaml .string { color: #944; }
|
8
|
+
.yaml .number { color: #F99; }
|
9
|
+
.yaml .time { color: #F99; }
|
10
|
+
.yaml .date { color: #F99; }
|
11
|
+
.yaml .ref { color: #944; }
|
12
|
+
.yaml .anchor { color: #944; }
|
data/lib/syntax.rb
CHANGED
@@ -21,11 +21,18 @@ module Syntax
|
|
21
21
|
# handler will be returned.
|
22
22
|
def load( syntax )
|
23
23
|
begin
|
24
|
-
require "syntax/#{syntax}"
|
24
|
+
require "syntax/lang/#{syntax}"
|
25
25
|
rescue LoadError
|
26
26
|
end
|
27
27
|
SYNTAX[ syntax ].new
|
28
28
|
end
|
29
29
|
module_function :load
|
30
30
|
|
31
|
+
# Return an array of the names of supported syntaxes.
|
32
|
+
def all
|
33
|
+
lang_dir = File.join(File.dirname(__FILE__), "syntax", "lang")
|
34
|
+
Dir["#{lang_dir}/*.rb"].map { |path| File.basename(path, ".rb") }
|
35
|
+
end
|
36
|
+
module_function :all
|
37
|
+
|
31
38
|
end
|
data/lib/syntax/common.rb
CHANGED
@@ -10,11 +10,16 @@ module Syntax
|
|
10
10
|
# the type of the lexeme that was extracted.
|
11
11
|
attr_reader :group
|
12
12
|
|
13
|
+
# the instruction associated with this token (:none, :region_open, or
|
14
|
+
# :region_close)
|
15
|
+
attr_reader :instruction
|
16
|
+
|
13
17
|
# Create a new Token representing the given text, and belonging to the
|
14
18
|
# given group.
|
15
|
-
def initialize( text, group )
|
19
|
+
def initialize( text, group, instruction = :none )
|
16
20
|
super text
|
17
21
|
@group = group
|
22
|
+
@instruction = instruction
|
18
23
|
end
|
19
24
|
|
20
25
|
end
|
@@ -25,6 +30,12 @@ module Syntax
|
|
25
30
|
# a single token.
|
26
31
|
class Tokenizer
|
27
32
|
|
33
|
+
# The current group being processed by the tokenizer
|
34
|
+
attr_reader :group
|
35
|
+
|
36
|
+
# The current chunk of text being accumulated
|
37
|
+
attr_reader :chunk
|
38
|
+
|
28
39
|
# Start tokenizing. This sets up the state in preparation for tokenization,
|
29
40
|
# such as creating a new scanner for the text and saving the callback block.
|
30
41
|
# The block will be invoked for each token extracted.
|
@@ -104,15 +115,28 @@ module Syntax
|
|
104
115
|
# After the new group is started, if +data+ is non-nil it will be appended
|
105
116
|
# to the chunk.
|
106
117
|
def start_group( gr, data=nil )
|
107
|
-
if gr != @group
|
108
|
-
@callback.call( Token.new( @chunk, @group ) )
|
109
|
-
@chunk = ""
|
110
|
-
end
|
111
|
-
|
118
|
+
flush_chunk if gr != @group
|
112
119
|
@group = gr
|
113
120
|
@chunk << data if data
|
114
121
|
end
|
115
122
|
|
123
|
+
def start_region( gr, data=nil )
|
124
|
+
flush_chunk
|
125
|
+
@group = gr
|
126
|
+
@callback.call( Token.new( data||"", @group, :region_open ) )
|
127
|
+
end
|
128
|
+
|
129
|
+
def end_region( gr, data=nil )
|
130
|
+
flush_chunk
|
131
|
+
@group = gr
|
132
|
+
@callback.call( Token.new( data||"", @group, :region_close ) )
|
133
|
+
end
|
134
|
+
|
135
|
+
def flush_chunk
|
136
|
+
@callback.call( Token.new( @chunk, @group ) ) unless @chunk.empty?
|
137
|
+
@chunk = ""
|
138
|
+
end
|
139
|
+
|
116
140
|
end
|
117
141
|
|
118
142
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'syntax'
|
2
|
+
|
3
|
+
module Syntax
|
4
|
+
module Convertors
|
5
|
+
|
6
|
+
# The abstract ancestor class for all convertors. It implements a few
|
7
|
+
# convenience methods to provide a common interface for all convertors.
|
8
|
+
class Abstract
|
9
|
+
|
10
|
+
# A convenience method for instantiating a new convertor for a
|
11
|
+
# specific syntax.
|
12
|
+
def self.for_syntax( syntax )
|
13
|
+
new( Syntax.load( syntax ) )
|
14
|
+
end
|
15
|
+
|
16
|
+
# Creates a new convertor that uses the given tokenizer.
|
17
|
+
def initialize( tokenizer )
|
18
|
+
@tokenizer = tokenizer
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
@@ -1,21 +1,10 @@
|
|
1
|
-
require 'syntax'
|
1
|
+
require 'syntax/convertors/abstract'
|
2
2
|
|
3
3
|
module Syntax
|
4
4
|
module Convertors
|
5
5
|
|
6
6
|
# A simple class for converting a text into HTML.
|
7
|
-
class HTML
|
8
|
-
|
9
|
-
# A convenience method for instantiating a new HTML convertor for a
|
10
|
-
# specific syntax.
|
11
|
-
def self.for_syntax( syntax )
|
12
|
-
new( Syntax.load( syntax ) )
|
13
|
-
end
|
14
|
-
|
15
|
-
# Creates a new HTML convertor that uses the given tokenizer.
|
16
|
-
def initialize( tokenizer )
|
17
|
-
@tokenizer = tokenizer
|
18
|
-
end
|
7
|
+
class HTML < Abstract
|
19
8
|
|
20
9
|
# Converts the given text to HTML, using spans to represent token groups
|
21
10
|
# of any type but <tt>:normal</tt> (which is always unhighlighted). If
|
@@ -23,13 +12,25 @@ module Syntax
|
|
23
12
|
def convert( text, pre=true )
|
24
13
|
html = ""
|
25
14
|
html << "<pre>" if pre
|
15
|
+
regions = []
|
26
16
|
@tokenizer.tokenize( text ) do |tok|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
17
|
+
value = html_escape(tok)
|
18
|
+
case tok.instruction
|
19
|
+
when :region_close then
|
20
|
+
regions.pop
|
21
|
+
html << "</span>"
|
22
|
+
when :region_open then
|
23
|
+
regions.push tok.group
|
24
|
+
html << "<span class=\"#{tok.group}\">#{value}"
|
25
|
+
else
|
26
|
+
if tok.group == ( regions.last || :normal )
|
27
|
+
html << value
|
28
|
+
else
|
29
|
+
html << "<span class=\"#{tok.group}\">#{value}</span>"
|
30
|
+
end
|
31
31
|
end
|
32
32
|
end
|
33
|
+
html << "</span>" while regions.pop
|
33
34
|
html << "</pre>" if pre
|
34
35
|
html
|
35
36
|
end
|
@@ -17,6 +17,8 @@ module Syntax
|
|
17
17
|
# Perform ruby-specific setup
|
18
18
|
def setup
|
19
19
|
@selector = false
|
20
|
+
@allow_operator = false
|
21
|
+
@heredocs = []
|
20
22
|
end
|
21
23
|
|
22
24
|
# Step through a single iteration of the tokenization process.
|
@@ -42,13 +44,17 @@ module Syntax
|
|
42
44
|
when check( /:"/ )
|
43
45
|
start_group :symbol, scan(/:/)
|
44
46
|
scan_delimited_region :symbol, :symbol, "", true
|
47
|
+
@allow_operator = true
|
45
48
|
when check( /:'/ )
|
46
49
|
start_group :symbol, scan(/:/)
|
47
50
|
scan_delimited_region :symbol, :symbol, "", false
|
51
|
+
@allow_operator = true
|
48
52
|
when check( /:\w/ )
|
49
53
|
start_group :symbol, scan(/:\w+[!?]?/)
|
54
|
+
@allow_operator = true
|
50
55
|
when check( /\?\\?./ )
|
51
56
|
start_group :char, scan(/\?\\?./)
|
57
|
+
@allow_operator = true
|
52
58
|
when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
|
53
59
|
if @selector || matched[-1] == ?? || matched[-1] == ?!
|
54
60
|
start_group :ident,
|
@@ -58,20 +64,32 @@ module Syntax
|
|
58
64
|
scan(/(__FILE__|__LINE__|true|false|nil|self)/)
|
59
65
|
end
|
60
66
|
@selector = false
|
67
|
+
@allow_operator = true
|
61
68
|
else
|
62
69
|
case peek(2)
|
63
70
|
when "%r"
|
64
71
|
scan_delimited_region :punct, :regex, scan( /../ ), true
|
72
|
+
@allow_operator = true
|
65
73
|
when "%w", "%q"
|
66
74
|
scan_delimited_region :punct, :string, scan( /../ ), false
|
75
|
+
@allow_operator = true
|
67
76
|
when "%s"
|
68
77
|
scan_delimited_region :punct, :symbol, scan( /../ ), false
|
78
|
+
@allow_operator = true
|
69
79
|
when "%W", "%Q", "%x"
|
70
80
|
scan_delimited_region :punct, :string, scan( /../ ), true
|
81
|
+
@allow_operator = true
|
71
82
|
when /%[^\sa-zA-Z0-9]/
|
72
83
|
scan_delimited_region :punct, :string, scan( /./ ), true
|
84
|
+
@allow_operator = true
|
73
85
|
when "<<"
|
86
|
+
saw_word = ( chunk[-1,1] =~ /[\w!?]/ )
|
74
87
|
start_group :punct, scan( /<</ )
|
88
|
+
if saw_word
|
89
|
+
@allow_operator = false
|
90
|
+
return
|
91
|
+
end
|
92
|
+
|
75
93
|
float_right = scan( /-/ )
|
76
94
|
append "-" if float_right
|
77
95
|
if ( type = scan( /['"]/ ) )
|
@@ -86,45 +104,69 @@ module Syntax
|
|
86
104
|
end
|
87
105
|
start_group :constant, delim
|
88
106
|
start_group :punct, scan( /#{type}/ ) if type
|
89
|
-
|
90
|
-
|
107
|
+
@heredocs << [ float_right, type, delim ]
|
108
|
+
@allow_operator = true
|
91
109
|
else
|
92
110
|
case peek(1)
|
111
|
+
when /[\n\r]/
|
112
|
+
unless @heredocs.empty?
|
113
|
+
scan_heredoc(*@heredocs.shift)
|
114
|
+
else
|
115
|
+
start_group :normal, scan( /\s+/ )
|
116
|
+
end
|
117
|
+
@allow_operator = false
|
93
118
|
when /\s/
|
94
119
|
start_group :normal, scan( /\s+/ )
|
95
120
|
when "#"
|
96
|
-
start_group :comment, scan(
|
121
|
+
start_group :comment, scan( /#[^\n\r]*/ )
|
97
122
|
when /[A-Z]/
|
98
123
|
start_group :constant, scan( /\w+/ )
|
124
|
+
@allow_operator = true
|
99
125
|
when /[a-z_]/
|
100
126
|
word = scan( /\w+[?!]?/ )
|
101
127
|
if !@selector && KEYWORDS.include?( word )
|
102
128
|
start_group :keyword, word
|
129
|
+
@allow_operator = false
|
103
130
|
elsif
|
104
131
|
start_group :ident, word
|
132
|
+
@allow_operator = true
|
105
133
|
end
|
106
134
|
@selector = false
|
107
135
|
when /\d/
|
108
136
|
start_group :number,
|
109
137
|
scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
|
138
|
+
@allow_operator = true
|
110
139
|
when '"'
|
111
140
|
scan_delimited_region :punct, :string, "", true
|
141
|
+
@allow_operator = true
|
112
142
|
when '/'
|
113
|
-
|
143
|
+
if @allow_operator
|
144
|
+
start_group :punct, scan(%r{/})
|
145
|
+
@allow_operator = false
|
146
|
+
else
|
147
|
+
scan_delimited_region :punct, :regex, "", true
|
148
|
+
@allow_operator = true
|
149
|
+
end
|
114
150
|
when "'"
|
115
151
|
scan_delimited_region :punct, :string, "", false
|
152
|
+
@allow_operator = true
|
116
153
|
when "."
|
117
154
|
dots = scan( /\.{1,3}/ )
|
118
155
|
start_group :punct, dots
|
119
156
|
@selector = ( dots.length == 1 )
|
120
157
|
when /[@]/
|
121
158
|
start_group :attribute, scan( /@{1,2}\w*/ )
|
159
|
+
@allow_operator = true
|
122
160
|
when /[$]/
|
123
161
|
start_group :global, scan(/\$/)
|
124
162
|
start_group :global, scan( /\w+|./ ) if check(/./)
|
125
|
-
|
126
|
-
|
127
|
-
|
163
|
+
@allow_operator = true
|
164
|
+
when /[-!?*\/+=<>(\[\{}:;,&|%]/
|
165
|
+
start_group :punct, scan(/./)
|
166
|
+
@allow_operator = false
|
167
|
+
when /[)\]]/
|
168
|
+
start_group :punct, scan(/./)
|
169
|
+
@allow_operator = true
|
128
170
|
else
|
129
171
|
# all else just falls through this, to prevent
|
130
172
|
# infinite loops...
|
@@ -140,8 +182,21 @@ module Syntax
|
|
140
182
|
# Scan a delimited region of text. This handles the simple cases (strings
|
141
183
|
# delimited with quotes) as well as the more complex cases of %-strings
|
142
184
|
# and here-documents.
|
185
|
+
#
|
186
|
+
# * +delim_group+ is the group to use to classify the delimiters of the
|
187
|
+
# region
|
188
|
+
# * +inner_group+ is the group to use to classify the contents of the
|
189
|
+
# region
|
190
|
+
# * +starter+ is the text to use as the starting delimiter
|
191
|
+
# * +exprs+ is a boolean flag indicating whether the region is an
|
192
|
+
# interpolated string or not
|
193
|
+
# * +delim+ is the text to use as the delimiter of the region. If +nil+,
|
194
|
+
# the next character will be treated as the delimiter.
|
195
|
+
# * +heredoc+ is either +false+, meaning the region is not a heredoc, or
|
196
|
+
# <tt>:flush</tt> (meaning the delimiter must be flushed left), or
|
197
|
+
# <tt>:float</tt> (meaning the delimiter doens't have to be flush left).
|
143
198
|
def scan_delimited_region( delim_group, inner_group, starter, exprs,
|
144
|
-
delim=nil,
|
199
|
+
delim=nil, heredoc=false )
|
145
200
|
# begin
|
146
201
|
if !delim
|
147
202
|
start_group delim_group, starter
|
@@ -152,23 +207,22 @@ module Syntax
|
|
152
207
|
when '{' then '}'
|
153
208
|
when '(' then ')'
|
154
209
|
when '[' then ']'
|
210
|
+
when '<' then '>'
|
155
211
|
else delim
|
156
212
|
end
|
157
213
|
end
|
158
214
|
|
159
|
-
|
215
|
+
start_region inner_group
|
160
216
|
|
161
217
|
items = "\\\\|"
|
162
|
-
|
163
|
-
if delim_alone
|
218
|
+
if heredoc
|
164
219
|
items << "(^"
|
165
|
-
items << '\s*' if
|
166
|
-
items << "#{delim}$
|
220
|
+
items << '\s*' if heredoc == :float
|
221
|
+
items << "#{Regexp.escape(delim)}\s*)$"
|
167
222
|
else
|
168
|
-
items << "#{delim}"
|
223
|
+
items << "#{Regexp.escape(delim)}"
|
169
224
|
end
|
170
|
-
|
171
|
-
items << "|#(\\$|@|\\{)"if exprs
|
225
|
+
items << "|#(\\$|@|\\{)" if exprs
|
172
226
|
items = Regexp.new( items )
|
173
227
|
|
174
228
|
loop do
|
@@ -186,15 +240,15 @@ module Syntax
|
|
186
240
|
case peek(1)
|
187
241
|
when "'"
|
188
242
|
scan(/./)
|
189
|
-
start_group :
|
243
|
+
start_group :escape, "\\'"
|
190
244
|
when "\\"
|
191
245
|
scan(/./)
|
192
|
-
start_group :
|
246
|
+
start_group :escape, "\\\\"
|
193
247
|
else
|
194
248
|
start_group inner_group, "\\"
|
195
249
|
end
|
196
250
|
else
|
197
|
-
start_group :
|
251
|
+
start_group :escape, "\\"
|
198
252
|
c = getch
|
199
253
|
append c
|
200
254
|
case c
|
@@ -205,6 +259,7 @@ module Syntax
|
|
205
259
|
end
|
206
260
|
end
|
207
261
|
when delim
|
262
|
+
end_region inner_group
|
208
263
|
start_group delim_group, matched
|
209
264
|
break
|
210
265
|
when /^#/
|
@@ -232,6 +287,16 @@ module Syntax
|
|
232
287
|
end
|
233
288
|
end
|
234
289
|
end
|
290
|
+
|
291
|
+
# Scan a heredoc beginning at the current position.
|
292
|
+
#
|
293
|
+
# * +float+ indicates whether the delimiter may be floated to the right
|
294
|
+
# * +type+ is +nil+, a single quote, or a double quote
|
295
|
+
# * +delim+ is the delimiter to look for
|
296
|
+
def scan_heredoc(float, type, delim)
|
297
|
+
scan_delimited_region( :constant, :string, "", type != "'",
|
298
|
+
delim, float ? :float : :flush )
|
299
|
+
end
|
235
300
|
end
|
236
301
|
|
237
302
|
SYNTAX["ruby"] = Ruby
|
File without changes
|
File without changes
|