purple_shoes 0.0.126 → 0.5.149

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.126
1
+ 0.5.149
@@ -0,0 +1,197 @@
1
+ require 'strscan'
2
+
3
+ module HH::Syntax
4
+
5
+ # A single token extracted by a tokenizer. It is simply the lexeme
6
+ # itself, decorated with a 'group' attribute to identify the type of the
7
+ # lexeme.
8
+ class Token < String
9
+
10
+ # the type of the lexeme that was extracted.
11
+ attr_reader :group
12
+
13
+ # the instruction associated with this token (:none, :region_open, or
14
+ # :region_close)
15
+ attr_reader :instruction
16
+
17
+ # Create a new Token representing the given text, and belonging to the
18
+ # given group.
19
+ def initialize( text, group, instruction = :none )
20
+ super text
21
+ @group = group
22
+ @instruction = instruction
23
+ end
24
+
25
+ end
26
+
27
+ # The base class of all tokenizers. It sets up the scanner and manages the
28
+ # looping until all tokens have been extracted. It also provides convenience
29
+ # methods to make sure adjacent tokens of identical groups are returned as
30
+ # a single token.
31
+ class Tokenizer
32
+
33
+ # The current group being processed by the tokenizer
34
+ attr_reader :group
35
+
36
+ # The current chunk of text being accumulated
37
+ attr_reader :chunk
38
+
39
+ # Start tokenizing. This sets up the state in preparation for tokenization,
40
+ # such as creating a new scanner for the text and saving the callback block.
41
+ # The block will be invoked for each token extracted.
42
+ def start( text, &block )
43
+ @chunk = ""
44
+ @group = :normal
45
+ @callback = block
46
+ @text = StringScanner.new( text )
47
+ setup
48
+ end
49
+
50
+ # Subclasses may override this method to provide implementation-specific
51
+ # setup logic.
52
+ def setup
53
+ end
54
+
55
+ # Finish tokenizing. This flushes the buffer, yielding any remaining text
56
+ # to the client.
57
+ def finish
58
+ start_group nil
59
+ teardown
60
+ end
61
+
62
+ # Subclasses may override this method to provide implementation-specific
63
+ # teardown logic.
64
+ def teardown
65
+ end
66
+
67
+ # Subclasses must implement this method, which is called for each iteration
68
+ # of the tokenization process. This method may extract multiple tokens.
69
+ def step
70
+ raise NotImplementedError, "subclasses must implement #step"
71
+ end
72
+
73
+ # Begins tokenizing the given text, calling #step until the text has been
74
+ # exhausted.
75
+ def tokenize( text, &block )
76
+ start text, &block
77
+ step until @text.eos?
78
+ finish
79
+ end
80
+
81
+ # Specify a set of tokenizer-specific options. Each tokenizer may (or may
82
+ # not) publish any options, but if a tokenizer does those options may be
83
+ # used to specify optional behavior.
84
+ def set( opts={} )
85
+ ( @options ||= Hash.new ).update opts
86
+ end
87
+
88
+ # Get the value of the specified option.
89
+ def option(opt)
90
+ @options ? @options[opt] : nil
91
+ end
92
+
93
+ private
94
+
95
+ EOL = /(?=\r\n?|\n|$)/
96
+
97
+ # A convenience for delegating method calls to the scanner.
98
+ def self.delegate( sym )
99
+ define_method( sym ) { |*a| @text.__send__( sym, *a ) }
100
+ end
101
+
102
+ delegate :bol?
103
+ delegate :eos?
104
+ delegate :scan
105
+ delegate :scan_until
106
+ delegate :check
107
+ delegate :check_until
108
+ delegate :getch
109
+ delegate :matched
110
+ delegate :pre_match
111
+ delegate :peek
112
+ delegate :pos
113
+
114
+ # Access the n-th subgroup from the most recent match.
115
+ def subgroup(n)
116
+ @text[n]
117
+ end
118
+
119
+ # Append the given data to the currently active chunk.
120
+ def append( data )
121
+ @chunk << data
122
+ end
123
+
124
+ # Request that a new group be started. If the current group is the same
125
+ # as the group being requested, a new group will not be created. If a new
126
+ # group is created and the current chunk is not empty, the chunk's
127
+ # contents will be yielded to the client as a token, and then cleared.
128
+ #
129
+ # After the new group is started, if +data+ is non-nil it will be appended
130
+ # to the chunk.
131
+ def start_group( gr, data=nil )
132
+ flush_chunk if gr != @group
133
+ @group = gr
134
+ @chunk << data if data
135
+ end
136
+
137
+ def start_region( gr, data=nil )
138
+ flush_chunk
139
+ @group = gr
140
+ @callback.call( Token.new( data||"", @group, :region_open ) )
141
+ end
142
+
143
+ def end_region( gr, data=nil )
144
+ flush_chunk
145
+ @group = gr
146
+ @callback.call( Token.new( data||"", @group, :region_close ) )
147
+ end
148
+
149
+ def flush_chunk
150
+ @callback.call( Token.new( @chunk, @group ) ) unless @chunk.empty?
151
+ @chunk = ""
152
+ end
153
+
154
+ def subtokenize( syntax, text )
155
+ tokenizer = Syntax.load( syntax )
156
+ tokenizer.set @options if @options
157
+ flush_chunk
158
+ tokenizer.tokenize( text, &@callback )
159
+ end
160
+ end
161
+
162
+
163
+ # A default tokenizer for handling syntaxes that are not explicitly handled
164
+ # elsewhere. It simply yields the given text as a single token.
165
+ class Default
166
+
167
+ # Yield the given text as a single token.
168
+ def tokenize( text )
169
+ yield Token.new( text, :normal )
170
+ end
171
+
172
+ end
173
+
174
+ # A hash for registering syntax implementations.
175
+ SYNTAX = Hash.new( Default )
176
+
177
+ # Load the implementation of the requested syntax. If the syntax cannot be
178
+ # found, or if it cannot be loaded for whatever reason, the Default syntax
179
+ # handler will be returned.
180
+ def load( syntax )
181
+ begin
182
+ require_relative "lang/#{syntax}"
183
+ rescue LoadError
184
+ end
185
+ SYNTAX[ syntax ].new
186
+ end
187
+ module_function :load
188
+
189
+ # Return an array of the names of supported syntaxes.
190
+ def all
191
+ lang_dir = File.join(File.dirname(__FILE__), "syntax", "lang")
192
+ Dir["#{lang_dir}/*.rb"].map { |path| File.basename(path, ".rb") }
193
+ end
194
+ module_function :all
195
+
196
+
197
+ end
@@ -0,0 +1,316 @@
1
+
2
+ module HH::Syntax
3
+
4
+ # A tokenizer for the Ruby language. It recognizes all common syntax
5
+ # (and some less common syntax) but because it is not a true lexer, it
6
+ # will make mistakes on some ambiguous cases.
7
+ class Ruby < Tokenizer
8
+
9
+ # The list of all identifiers recognized as keywords.
10
+ KEYWORDS =
11
+ %w{if then elsif else end begin do rescue ensure while for
12
+ class module def yield raise until unless and or not when
13
+ case super undef break next redo retry in return alias
14
+ defined?}
15
+
16
+ # Perform ruby-specific setup
17
+ def setup
18
+ @selector = false
19
+ @allow_operator = false
20
+ @heredocs = []
21
+ end
22
+
23
+ # Step through a single iteration of the tokenization process.
24
+ def step
25
+ case
26
+ when bol? && check( /=begin/ )
27
+ start_group( :comment, scan_until( /^=end#{EOL}/ ) )
28
+ when bol? && check( /__END__#{EOL}/ )
29
+ start_group( :comment, scan_until( /\Z/ ) )
30
+ else
31
+ case
32
+ when check( /def\s+/ )
33
+ start_group :keyword, scan( /def\s+/ )
34
+ start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ )
35
+ when check( /class\s+/ )
36
+ start_group :keyword, scan( /class\s+/ )
37
+ start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ )
38
+ when check( /module\s+/ )
39
+ start_group :keyword, scan( /module\s+/ )
40
+ start_group :module, scan_until( /(?=[;\s]|#{EOL})/ )
41
+ when check( /::/ )
42
+ start_group :punct, scan(/::/)
43
+ when check( /:"/ )
44
+ start_group :symbol, scan(/:/)
45
+ scan_delimited_region :symbol, :symbol, "", true
46
+ @allow_operator = true
47
+ when check( /:'/ )
48
+ start_group :symbol, scan(/:/)
49
+ scan_delimited_region :symbol, :symbol, "", false
50
+ @allow_operator = true
51
+ when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
52
+ start_group :symbol, matched
53
+ @allow_operator = true
54
+ when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
55
+ start_group :char, matched
56
+ @allow_operator = true
57
+ when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
58
+ if @selector || matched[-1] == ?? || matched[-1] == ?!
59
+ start_group :ident,
60
+ scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
61
+ else
62
+ start_group :constant,
63
+ scan(/(__FILE__|__LINE__|true|false|nil|self)/)
64
+ end
65
+ @selector = false
66
+ @allow_operator = true
67
+ when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
68
+ start_group :number, matched
69
+ @allow_operator = true
70
+ else
71
+ case peek(2)
72
+ when "%r"
73
+ scan_delimited_region :punct, :regex, scan( /../ ), true
74
+ @allow_operator = true
75
+ when "%w", "%q"
76
+ scan_delimited_region :punct, :string, scan( /../ ), false
77
+ @allow_operator = true
78
+ when "%s"
79
+ scan_delimited_region :punct, :symbol, scan( /../ ), false
80
+ @allow_operator = true
81
+ when "%W", "%Q", "%x"
82
+ scan_delimited_region :punct, :string, scan( /../ ), true
83
+ @allow_operator = true
84
+ when /%[^\sa-zA-Z0-9]/
85
+ scan_delimited_region :punct, :string, scan( /./ ), true
86
+ @allow_operator = true
87
+ when "<<"
88
+ saw_word = ( chunk[-1,1] =~ /[\w!?]/ )
89
+ start_group :punct, scan( /<</ )
90
+ if saw_word
91
+ @allow_operator = false
92
+ return
93
+ end
94
+
95
+ float_right = scan( /-/ )
96
+ append "-" if float_right
97
+ if ( type = scan( /['"]/ ) )
98
+ append type
99
+ delim = scan_until( /(?=#{type})/ )
100
+ if delim.nil?
101
+ append scan_until( /\Z/ )
102
+ return
103
+ end
104
+ else
105
+ delim = scan( /\w+/ ) or return
106
+ end
107
+ start_group :constant, delim
108
+ start_group :punct, scan( /#{type}/ ) if type
109
+ @heredocs << [ float_right, type, delim ]
110
+ @allow_operator = true
111
+ else
112
+ case peek(1)
113
+ when /[\n\r]/
114
+ unless @heredocs.empty?
115
+ scan_heredoc(*@heredocs.shift)
116
+ else
117
+ start_group :normal, scan( /\s+/ )
118
+ end
119
+ @allow_operator = false
120
+ when /\s/
121
+ start_group :normal, scan( /\s+/ )
122
+ when "#"
123
+ start_group :comment, scan( /#[^\n\r]*/ )
124
+ when /[A-Z]/
125
+ start_group @selector ? :ident : :constant, scan( /\w+/ )
126
+ @allow_operator = true
127
+ when /[a-z_]/
128
+ word = scan( /\w+[?!]?/ )
129
+ if !@selector && KEYWORDS.include?( word )
130
+ start_group :keyword, word
131
+ @allow_operator = false
132
+ elsif
133
+ start_group :ident, word
134
+ @allow_operator = true
135
+ end
136
+ @selector = false
137
+ when /\d/
138
+ start_group :number,
139
+ scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
140
+ @allow_operator = true
141
+ when '"'
142
+ scan_delimited_region :punct, :string, "", true
143
+ @allow_operator = true
144
+ when '/'
145
+ if @allow_operator
146
+ start_group :punct, scan(%r{/})
147
+ @allow_operator = false
148
+ else
149
+ scan_delimited_region :punct, :regex, "", true
150
+ @allow_operator = true
151
+ end
152
+ when "'"
153
+ scan_delimited_region :punct, :string, "", false
154
+ @allow_operator = true
155
+ when "."
156
+ dots = scan( /\.{1,3}/ )
157
+ start_group :punct, dots
158
+ @selector = ( dots.length == 1 )
159
+ when /[@]/
160
+ start_group :attribute, scan( /@{1,2}\w*/ )
161
+ @allow_operator = true
162
+ when /[$]/
163
+ start_group :global, scan(/\$/)
164
+ start_group :global, scan( /\w+|./ ) if check(/./)
165
+ @allow_operator = true
166
+ when /[-!?*\/+=<>(\[\{}:;,&|%]/
167
+ start_group :punct, scan(/./)
168
+ @allow_operator = false
169
+ when /[)\]]/
170
+ start_group :punct, scan(/./)
171
+ @allow_operator = true
172
+ else
173
+ # all else just falls through this, to prevent
174
+ # infinite loops...
175
+ append getch
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
181
+
182
+ private
183
+
184
+ # Scan a delimited region of text. This handles the simple cases (strings
185
+ # delimited with quotes) as well as the more complex cases of %-strings
186
+ # and here-documents.
187
+ #
188
+ # * +delim_group+ is the group to use to classify the delimiters of the
189
+ # region
190
+ # * +inner_group+ is the group to use to classify the contents of the
191
+ # region
192
+ # * +starter+ is the text to use as the starting delimiter
193
+ # * +exprs+ is a boolean flag indicating whether the region is an
194
+ # interpolated string or not
195
+ # * +delim+ is the text to use as the delimiter of the region. If +nil+,
196
+ # the next character will be treated as the delimiter.
197
+ # * +heredoc+ is either +false+, meaning the region is not a heredoc, or
198
+ # <tt>:flush</tt> (meaning the delimiter must be flushed left), or
199
+ # <tt>:float</tt> (meaning the delimiter doens't have to be flush left).
200
+ def scan_delimited_region( delim_group, inner_group, starter, exprs,
201
+ delim=nil, heredoc=false )
202
+ # begin
203
+ if !delim
204
+ start_group delim_group, starter
205
+ delim = scan( /./ )
206
+ append delim
207
+
208
+ delim = case delim
209
+ when '{' then '}'
210
+ when '(' then ')'
211
+ when '[' then ']'
212
+ when '<' then '>'
213
+ else delim
214
+ end
215
+ end
216
+
217
+ start_region inner_group
218
+
219
+ items = "\\\\|"
220
+ if heredoc
221
+ items << "(^"
222
+ items << '\s*' if heredoc == :float
223
+ items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
224
+ else
225
+ items << "#{Regexp.escape(delim)}"
226
+ end
227
+ items << "|#(\\$|@@?|\\{)" if exprs
228
+ items = Regexp.new( items )
229
+
230
+ loop do
231
+ p = pos
232
+ match = scan_until( items )
233
+ if match.nil?
234
+ start_group inner_group, scan_until( /\Z/ )
235
+ break
236
+ else
237
+ text = pre_match[p..-1]
238
+ start_group inner_group, text if text.length > 0
239
+ case matched.strip
240
+ when "\\"
241
+ unless exprs
242
+ case peek(1)
243
+ when "'"
244
+ scan(/./)
245
+ start_group :escape, "\\'"
246
+ when "\\"
247
+ scan(/./)
248
+ start_group :escape, "\\\\"
249
+ else
250
+ start_group inner_group, "\\"
251
+ end
252
+ else
253
+ start_group :escape, "\\"
254
+ c = getch
255
+ append c
256
+ case c
257
+ when 'x'
258
+ append scan( /[a-fA-F0-9]{1,2}/ )
259
+ when /[0-7]/
260
+ append scan( /[0-7]{0,2}/ )
261
+ end
262
+ end
263
+ when delim
264
+ end_region inner_group
265
+ start_group delim_group, matched
266
+ break
267
+ when /^#/
268
+ do_highlight = (option(:expressions) == :highlight)
269
+ start_region :expr if do_highlight
270
+ start_group :expr, matched
271
+ case matched[1]
272
+ when ?{
273
+ depth = 1
274
+ content = ""
275
+ while depth > 0
276
+ p = pos
277
+ c = scan_until( /[\{}]/ )
278
+ if c.nil?
279
+ content << scan_until( /\Z/ )
280
+ break
281
+ else
282
+ depth += ( matched == "{" ? 1 : -1 )
283
+ content << pre_match[p..-1]
284
+ content << matched if depth > 0
285
+ end
286
+ end
287
+ if do_highlight
288
+ subtokenize "ruby", content
289
+ start_group :expr, "}"
290
+ else
291
+ append content + "}"
292
+ end
293
+ when ?$, ?@
294
+ append scan( /\w+/ )
295
+ end
296
+ end_region :expr if do_highlight
297
+ else raise "unexpected match on #{matched}"
298
+ end
299
+ end
300
+ end
301
+ end
302
+
303
+ # Scan a heredoc beginning at the current position.
304
+ #
305
+ # * +float+ indicates whether the delimiter may be floated to the right
306
+ # * +type+ is +nil+, a single quote, or a double quote
307
+ # * +delim+ is the delimiter to look for
308
+ def scan_heredoc(float, type, delim)
309
+ scan_delimited_region( :constant, :string, "", type != "'",
310
+ delim, float ? :float : :flush )
311
+ end
312
+ end
313
+
314
+ SYNTAX["ruby"] = Ruby
315
+
316
+ end