syntax 0.5.0 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/data/ruby.css +18 -0
- data/data/xml.css +8 -0
- data/data/yaml.css +12 -0
- data/lib/syntax.rb +8 -1
- data/lib/syntax/common.rb +30 -6
- data/lib/syntax/convertors/abstract.rb +24 -0
- data/lib/syntax/convertors/html.rb +18 -17
- data/lib/syntax/{ruby.rb → lang/ruby.rb} +84 -19
- data/lib/syntax/{xml.rb → lang/xml.rb} +0 -0
- data/lib/syntax/{yaml.rb → lang/yaml.rb} +0 -0
- data/lib/syntax/version.rb +1 -1
- data/test/syntax/tc_ruby.rb +500 -352
- data/test/syntax/tc_xml.rb +2 -2
- data/test/syntax/tc_yaml.rb +2 -2
- data/test/tc_syntax.rb +22 -0
- metadata +14 -8
data/data/ruby.css
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
.ruby .normal {}
|
2
|
+
.ruby .comment { color: #005; font-style: italic; }
|
3
|
+
.ruby .keyword { color: #A00; font-weight: bold; }
|
4
|
+
.ruby .method { color: #077; }
|
5
|
+
.ruby .class { color: #074; }
|
6
|
+
.ruby .module { color: #050; }
|
7
|
+
.ruby .punct { color: #447; font-weight: bold; }
|
8
|
+
.ruby .symbol { color: #099; }
|
9
|
+
.ruby .string { color: #944; background: #FFE; }
|
10
|
+
.ruby .char { color: #F07; }
|
11
|
+
.ruby .ident { color: #004; }
|
12
|
+
.ruby .constant { color: #07F; }
|
13
|
+
.ruby .regex { color: #B66; background: #FEF; }
|
14
|
+
.ruby .number { color: #F99; }
|
15
|
+
.ruby .attribute { color: #7BB; }
|
16
|
+
.ruby .global { color: #7FB; }
|
17
|
+
.ruby .expr { color: #227; }
|
18
|
+
.ruby .escape { color: #277; }
|
data/data/xml.css
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
.xml .normal {}
|
2
|
+
.xml .namespace { color: #B66; font-weight: bold; }
|
3
|
+
.xml .tag { color: #F88; }
|
4
|
+
.xml .comment { color: #005; font-style: italic; }
|
5
|
+
.xml .punct { color: #447; font-weight: bold; }
|
6
|
+
.xml .string { color: #944; }
|
7
|
+
.xml .number { color: #F99; }
|
8
|
+
.xml .attribute { color: #BB7; }
|
data/data/yaml.css
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
.yaml .normal {}
|
2
|
+
.yaml .document { font-weight: bold; color: #07F; }
|
3
|
+
.yaml .type { font-weight: bold; color: #05C; }
|
4
|
+
.yaml .key { color: #F88; }
|
5
|
+
.yaml .comment { color: #005; font-style: italic; }
|
6
|
+
.yaml .punct { color: #447; font-weight: bold; }
|
7
|
+
.yaml .string { color: #944; }
|
8
|
+
.yaml .number { color: #F99; }
|
9
|
+
.yaml .time { color: #F99; }
|
10
|
+
.yaml .date { color: #F99; }
|
11
|
+
.yaml .ref { color: #944; }
|
12
|
+
.yaml .anchor { color: #944; }
|
data/lib/syntax.rb
CHANGED
@@ -21,11 +21,18 @@ module Syntax
|
|
21
21
|
# handler will be returned.
|
22
22
|
def load( syntax )
|
23
23
|
begin
|
24
|
-
require "syntax/#{syntax}"
|
24
|
+
require "syntax/lang/#{syntax}"
|
25
25
|
rescue LoadError
|
26
26
|
end
|
27
27
|
SYNTAX[ syntax ].new
|
28
28
|
end
|
29
29
|
module_function :load
|
30
30
|
|
31
|
+
# Return an array of the names of supported syntaxes.
|
32
|
+
def all
|
33
|
+
lang_dir = File.join(File.dirname(__FILE__), "syntax", "lang")
|
34
|
+
Dir["#{lang_dir}/*.rb"].map { |path| File.basename(path, ".rb") }
|
35
|
+
end
|
36
|
+
module_function :all
|
37
|
+
|
31
38
|
end
|
data/lib/syntax/common.rb
CHANGED
@@ -10,11 +10,16 @@ module Syntax
|
|
10
10
|
# the type of the lexeme that was extracted.
|
11
11
|
attr_reader :group
|
12
12
|
|
13
|
+
# the instruction associated with this token (:none, :region_open, or
|
14
|
+
# :region_close)
|
15
|
+
attr_reader :instruction
|
16
|
+
|
13
17
|
# Create a new Token representing the given text, and belonging to the
|
14
18
|
# given group.
|
15
|
-
def initialize( text, group )
|
19
|
+
def initialize( text, group, instruction = :none )
|
16
20
|
super text
|
17
21
|
@group = group
|
22
|
+
@instruction = instruction
|
18
23
|
end
|
19
24
|
|
20
25
|
end
|
@@ -25,6 +30,12 @@ module Syntax
|
|
25
30
|
# a single token.
|
26
31
|
class Tokenizer
|
27
32
|
|
33
|
+
# The current group being processed by the tokenizer
|
34
|
+
attr_reader :group
|
35
|
+
|
36
|
+
# The current chunk of text being accumulated
|
37
|
+
attr_reader :chunk
|
38
|
+
|
28
39
|
# Start tokenizing. This sets up the state in preparation for tokenization,
|
29
40
|
# such as creating a new scanner for the text and saving the callback block.
|
30
41
|
# The block will be invoked for each token extracted.
|
@@ -104,15 +115,28 @@ module Syntax
|
|
104
115
|
# After the new group is started, if +data+ is non-nil it will be appended
|
105
116
|
# to the chunk.
|
106
117
|
def start_group( gr, data=nil )
|
107
|
-
if gr != @group
|
108
|
-
@callback.call( Token.new( @chunk, @group ) )
|
109
|
-
@chunk = ""
|
110
|
-
end
|
111
|
-
|
118
|
+
flush_chunk if gr != @group
|
112
119
|
@group = gr
|
113
120
|
@chunk << data if data
|
114
121
|
end
|
115
122
|
|
123
|
+
def start_region( gr, data=nil )
|
124
|
+
flush_chunk
|
125
|
+
@group = gr
|
126
|
+
@callback.call( Token.new( data||"", @group, :region_open ) )
|
127
|
+
end
|
128
|
+
|
129
|
+
def end_region( gr, data=nil )
|
130
|
+
flush_chunk
|
131
|
+
@group = gr
|
132
|
+
@callback.call( Token.new( data||"", @group, :region_close ) )
|
133
|
+
end
|
134
|
+
|
135
|
+
def flush_chunk
|
136
|
+
@callback.call( Token.new( @chunk, @group ) ) unless @chunk.empty?
|
137
|
+
@chunk = ""
|
138
|
+
end
|
139
|
+
|
116
140
|
end
|
117
141
|
|
118
142
|
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
require 'syntax'
|
2
|
+
|
3
|
+
module Syntax
|
4
|
+
module Convertors
|
5
|
+
|
6
|
+
# The abstract ancestor class for all convertors. It implements a few
|
7
|
+
# convenience methods to provide a common interface for all convertors.
|
8
|
+
class Abstract
|
9
|
+
|
10
|
+
# A convenience method for instantiating a new convertor for a
|
11
|
+
# specific syntax.
|
12
|
+
def self.for_syntax( syntax )
|
13
|
+
new( Syntax.load( syntax ) )
|
14
|
+
end
|
15
|
+
|
16
|
+
# Creates a new convertor that uses the given tokenizer.
|
17
|
+
def initialize( tokenizer )
|
18
|
+
@tokenizer = tokenizer
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
end
|
@@ -1,21 +1,10 @@
|
|
1
|
-
require 'syntax'
|
1
|
+
require 'syntax/convertors/abstract'
|
2
2
|
|
3
3
|
module Syntax
|
4
4
|
module Convertors
|
5
5
|
|
6
6
|
# A simple class for converting a text into HTML.
|
7
|
-
class HTML
|
8
|
-
|
9
|
-
# A convenience method for instantiating a new HTML convertor for a
|
10
|
-
# specific syntax.
|
11
|
-
def self.for_syntax( syntax )
|
12
|
-
new( Syntax.load( syntax ) )
|
13
|
-
end
|
14
|
-
|
15
|
-
# Creates a new HTML convertor that uses the given tokenizer.
|
16
|
-
def initialize( tokenizer )
|
17
|
-
@tokenizer = tokenizer
|
18
|
-
end
|
7
|
+
class HTML < Abstract
|
19
8
|
|
20
9
|
# Converts the given text to HTML, using spans to represent token groups
|
21
10
|
# of any type but <tt>:normal</tt> (which is always unhighlighted). If
|
@@ -23,13 +12,25 @@ module Syntax
|
|
23
12
|
def convert( text, pre=true )
|
24
13
|
html = ""
|
25
14
|
html << "<pre>" if pre
|
15
|
+
regions = []
|
26
16
|
@tokenizer.tokenize( text ) do |tok|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
17
|
+
value = html_escape(tok)
|
18
|
+
case tok.instruction
|
19
|
+
when :region_close then
|
20
|
+
regions.pop
|
21
|
+
html << "</span>"
|
22
|
+
when :region_open then
|
23
|
+
regions.push tok.group
|
24
|
+
html << "<span class=\"#{tok.group}\">#{value}"
|
25
|
+
else
|
26
|
+
if tok.group == ( regions.last || :normal )
|
27
|
+
html << value
|
28
|
+
else
|
29
|
+
html << "<span class=\"#{tok.group}\">#{value}</span>"
|
30
|
+
end
|
31
31
|
end
|
32
32
|
end
|
33
|
+
html << "</span>" while regions.pop
|
33
34
|
html << "</pre>" if pre
|
34
35
|
html
|
35
36
|
end
|
@@ -17,6 +17,8 @@ module Syntax
|
|
17
17
|
# Perform ruby-specific setup
|
18
18
|
def setup
|
19
19
|
@selector = false
|
20
|
+
@allow_operator = false
|
21
|
+
@heredocs = []
|
20
22
|
end
|
21
23
|
|
22
24
|
# Step through a single iteration of the tokenization process.
|
@@ -42,13 +44,17 @@ module Syntax
|
|
42
44
|
when check( /:"/ )
|
43
45
|
start_group :symbol, scan(/:/)
|
44
46
|
scan_delimited_region :symbol, :symbol, "", true
|
47
|
+
@allow_operator = true
|
45
48
|
when check( /:'/ )
|
46
49
|
start_group :symbol, scan(/:/)
|
47
50
|
scan_delimited_region :symbol, :symbol, "", false
|
51
|
+
@allow_operator = true
|
48
52
|
when check( /:\w/ )
|
49
53
|
start_group :symbol, scan(/:\w+[!?]?/)
|
54
|
+
@allow_operator = true
|
50
55
|
when check( /\?\\?./ )
|
51
56
|
start_group :char, scan(/\?\\?./)
|
57
|
+
@allow_operator = true
|
52
58
|
when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
|
53
59
|
if @selector || matched[-1] == ?? || matched[-1] == ?!
|
54
60
|
start_group :ident,
|
@@ -58,20 +64,32 @@ module Syntax
|
|
58
64
|
scan(/(__FILE__|__LINE__|true|false|nil|self)/)
|
59
65
|
end
|
60
66
|
@selector = false
|
67
|
+
@allow_operator = true
|
61
68
|
else
|
62
69
|
case peek(2)
|
63
70
|
when "%r"
|
64
71
|
scan_delimited_region :punct, :regex, scan( /../ ), true
|
72
|
+
@allow_operator = true
|
65
73
|
when "%w", "%q"
|
66
74
|
scan_delimited_region :punct, :string, scan( /../ ), false
|
75
|
+
@allow_operator = true
|
67
76
|
when "%s"
|
68
77
|
scan_delimited_region :punct, :symbol, scan( /../ ), false
|
78
|
+
@allow_operator = true
|
69
79
|
when "%W", "%Q", "%x"
|
70
80
|
scan_delimited_region :punct, :string, scan( /../ ), true
|
81
|
+
@allow_operator = true
|
71
82
|
when /%[^\sa-zA-Z0-9]/
|
72
83
|
scan_delimited_region :punct, :string, scan( /./ ), true
|
84
|
+
@allow_operator = true
|
73
85
|
when "<<"
|
86
|
+
saw_word = ( chunk[-1,1] =~ /[\w!?]/ )
|
74
87
|
start_group :punct, scan( /<</ )
|
88
|
+
if saw_word
|
89
|
+
@allow_operator = false
|
90
|
+
return
|
91
|
+
end
|
92
|
+
|
75
93
|
float_right = scan( /-/ )
|
76
94
|
append "-" if float_right
|
77
95
|
if ( type = scan( /['"]/ ) )
|
@@ -86,45 +104,69 @@ module Syntax
|
|
86
104
|
end
|
87
105
|
start_group :constant, delim
|
88
106
|
start_group :punct, scan( /#{type}/ ) if type
|
89
|
-
|
90
|
-
|
107
|
+
@heredocs << [ float_right, type, delim ]
|
108
|
+
@allow_operator = true
|
91
109
|
else
|
92
110
|
case peek(1)
|
111
|
+
when /[\n\r]/
|
112
|
+
unless @heredocs.empty?
|
113
|
+
scan_heredoc(*@heredocs.shift)
|
114
|
+
else
|
115
|
+
start_group :normal, scan( /\s+/ )
|
116
|
+
end
|
117
|
+
@allow_operator = false
|
93
118
|
when /\s/
|
94
119
|
start_group :normal, scan( /\s+/ )
|
95
120
|
when "#"
|
96
|
-
start_group :comment, scan(
|
121
|
+
start_group :comment, scan( /#[^\n\r]*/ )
|
97
122
|
when /[A-Z]/
|
98
123
|
start_group :constant, scan( /\w+/ )
|
124
|
+
@allow_operator = true
|
99
125
|
when /[a-z_]/
|
100
126
|
word = scan( /\w+[?!]?/ )
|
101
127
|
if !@selector && KEYWORDS.include?( word )
|
102
128
|
start_group :keyword, word
|
129
|
+
@allow_operator = false
|
103
130
|
elsif
|
104
131
|
start_group :ident, word
|
132
|
+
@allow_operator = true
|
105
133
|
end
|
106
134
|
@selector = false
|
107
135
|
when /\d/
|
108
136
|
start_group :number,
|
109
137
|
scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
|
138
|
+
@allow_operator = true
|
110
139
|
when '"'
|
111
140
|
scan_delimited_region :punct, :string, "", true
|
141
|
+
@allow_operator = true
|
112
142
|
when '/'
|
113
|
-
|
143
|
+
if @allow_operator
|
144
|
+
start_group :punct, scan(%r{/})
|
145
|
+
@allow_operator = false
|
146
|
+
else
|
147
|
+
scan_delimited_region :punct, :regex, "", true
|
148
|
+
@allow_operator = true
|
149
|
+
end
|
114
150
|
when "'"
|
115
151
|
scan_delimited_region :punct, :string, "", false
|
152
|
+
@allow_operator = true
|
116
153
|
when "."
|
117
154
|
dots = scan( /\.{1,3}/ )
|
118
155
|
start_group :punct, dots
|
119
156
|
@selector = ( dots.length == 1 )
|
120
157
|
when /[@]/
|
121
158
|
start_group :attribute, scan( /@{1,2}\w*/ )
|
159
|
+
@allow_operator = true
|
122
160
|
when /[$]/
|
123
161
|
start_group :global, scan(/\$/)
|
124
162
|
start_group :global, scan( /\w+|./ ) if check(/./)
|
125
|
-
|
126
|
-
|
127
|
-
|
163
|
+
@allow_operator = true
|
164
|
+
when /[-!?*\/+=<>(\[\{}:;,&|%]/
|
165
|
+
start_group :punct, scan(/./)
|
166
|
+
@allow_operator = false
|
167
|
+
when /[)\]]/
|
168
|
+
start_group :punct, scan(/./)
|
169
|
+
@allow_operator = true
|
128
170
|
else
|
129
171
|
# all else just falls through this, to prevent
|
130
172
|
# infinite loops...
|
@@ -140,8 +182,21 @@ module Syntax
|
|
140
182
|
# Scan a delimited region of text. This handles the simple cases (strings
|
141
183
|
# delimited with quotes) as well as the more complex cases of %-strings
|
142
184
|
# and here-documents.
|
185
|
+
#
|
186
|
+
# * +delim_group+ is the group to use to classify the delimiters of the
|
187
|
+
# region
|
188
|
+
# * +inner_group+ is the group to use to classify the contents of the
|
189
|
+
# region
|
190
|
+
# * +starter+ is the text to use as the starting delimiter
|
191
|
+
# * +exprs+ is a boolean flag indicating whether the region is an
|
192
|
+
# interpolated string or not
|
193
|
+
# * +delim+ is the text to use as the delimiter of the region. If +nil+,
|
194
|
+
# the next character will be treated as the delimiter.
|
195
|
+
# * +heredoc+ is either +false+, meaning the region is not a heredoc, or
|
196
|
+
# <tt>:flush</tt> (meaning the delimiter must be flushed left), or
|
197
|
+
# <tt>:float</tt> (meaning the delimiter doens't have to be flush left).
|
143
198
|
def scan_delimited_region( delim_group, inner_group, starter, exprs,
|
144
|
-
delim=nil,
|
199
|
+
delim=nil, heredoc=false )
|
145
200
|
# begin
|
146
201
|
if !delim
|
147
202
|
start_group delim_group, starter
|
@@ -152,23 +207,22 @@ module Syntax
|
|
152
207
|
when '{' then '}'
|
153
208
|
when '(' then ')'
|
154
209
|
when '[' then ']'
|
210
|
+
when '<' then '>'
|
155
211
|
else delim
|
156
212
|
end
|
157
213
|
end
|
158
214
|
|
159
|
-
|
215
|
+
start_region inner_group
|
160
216
|
|
161
217
|
items = "\\\\|"
|
162
|
-
|
163
|
-
if delim_alone
|
218
|
+
if heredoc
|
164
219
|
items << "(^"
|
165
|
-
items << '\s*' if
|
166
|
-
items << "#{delim}$
|
220
|
+
items << '\s*' if heredoc == :float
|
221
|
+
items << "#{Regexp.escape(delim)}\s*)$"
|
167
222
|
else
|
168
|
-
items << "#{delim}"
|
223
|
+
items << "#{Regexp.escape(delim)}"
|
169
224
|
end
|
170
|
-
|
171
|
-
items << "|#(\\$|@|\\{)"if exprs
|
225
|
+
items << "|#(\\$|@|\\{)" if exprs
|
172
226
|
items = Regexp.new( items )
|
173
227
|
|
174
228
|
loop do
|
@@ -186,15 +240,15 @@ module Syntax
|
|
186
240
|
case peek(1)
|
187
241
|
when "'"
|
188
242
|
scan(/./)
|
189
|
-
start_group :
|
243
|
+
start_group :escape, "\\'"
|
190
244
|
when "\\"
|
191
245
|
scan(/./)
|
192
|
-
start_group :
|
246
|
+
start_group :escape, "\\\\"
|
193
247
|
else
|
194
248
|
start_group inner_group, "\\"
|
195
249
|
end
|
196
250
|
else
|
197
|
-
start_group :
|
251
|
+
start_group :escape, "\\"
|
198
252
|
c = getch
|
199
253
|
append c
|
200
254
|
case c
|
@@ -205,6 +259,7 @@ module Syntax
|
|
205
259
|
end
|
206
260
|
end
|
207
261
|
when delim
|
262
|
+
end_region inner_group
|
208
263
|
start_group delim_group, matched
|
209
264
|
break
|
210
265
|
when /^#/
|
@@ -232,6 +287,16 @@ module Syntax
|
|
232
287
|
end
|
233
288
|
end
|
234
289
|
end
|
290
|
+
|
291
|
+
# Scan a heredoc beginning at the current position.
|
292
|
+
#
|
293
|
+
# * +float+ indicates whether the delimiter may be floated to the right
|
294
|
+
# * +type+ is +nil+, a single quote, or a double quote
|
295
|
+
# * +delim+ is the delimiter to look for
|
296
|
+
def scan_heredoc(float, type, delim)
|
297
|
+
scan_delimited_region( :constant, :string, "", type != "'",
|
298
|
+
delim, float ? :float : :flush )
|
299
|
+
end
|
235
300
|
end
|
236
301
|
|
237
302
|
SYNTAX["ruby"] = Ruby
|
File without changes
|
File without changes
|