purple_shoes 0.0.126 → 0.5.149

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.126
1
+ 0.5.149
@@ -0,0 +1,197 @@
1
+ require 'strscan'
2
+
3
+ module HH::Syntax
4
+
5
+ # A single token extracted by a tokenizer. It is simply the lexeme
6
+ # itself, decorated with a 'group' attribute to identify the type of the
7
+ # lexeme.
8
+ class Token < String
9
+
10
+ # the type of the lexeme that was extracted.
11
+ attr_reader :group
12
+
13
+ # the instruction associated with this token (:none, :region_open, or
14
+ # :region_close)
15
+ attr_reader :instruction
16
+
17
+ # Create a new Token representing the given text, and belonging to the
18
+ # given group.
19
+ def initialize( text, group, instruction = :none )
20
+ super text
21
+ @group = group
22
+ @instruction = instruction
23
+ end
24
+
25
+ end
26
+
27
+ # The base class of all tokenizers. It sets up the scanner and manages the
28
+ # looping until all tokens have been extracted. It also provides convenience
29
+ # methods to make sure adjacent tokens of identical groups are returned as
30
+ # a single token.
31
+ class Tokenizer
32
+
33
+ # The current group being processed by the tokenizer
34
+ attr_reader :group
35
+
36
+ # The current chunk of text being accumulated
37
+ attr_reader :chunk
38
+
39
+ # Start tokenizing. This sets up the state in preparation for tokenization,
40
+ # such as creating a new scanner for the text and saving the callback block.
41
+ # The block will be invoked for each token extracted.
42
+ def start( text, &block )
43
+ @chunk = ""
44
+ @group = :normal
45
+ @callback = block
46
+ @text = StringScanner.new( text )
47
+ setup
48
+ end
49
+
50
+ # Subclasses may override this method to provide implementation-specific
51
+ # setup logic.
52
+ def setup
53
+ end
54
+
55
+ # Finish tokenizing. This flushes the buffer, yielding any remaining text
56
+ # to the client.
57
+ def finish
58
+ start_group nil
59
+ teardown
60
+ end
61
+
62
+ # Subclasses may override this method to provide implementation-specific
63
+ # teardown logic.
64
+ def teardown
65
+ end
66
+
67
+ # Subclasses must implement this method, which is called for each iteration
68
+ # of the tokenization process. This method may extract multiple tokens.
69
+ def step
70
+ raise NotImplementedError, "subclasses must implement #step"
71
+ end
72
+
73
+ # Begins tokenizing the given text, calling #step until the text has been
74
+ # exhausted.
75
+ def tokenize( text, &block )
76
+ start text, &block
77
+ step until @text.eos?
78
+ finish
79
+ end
80
+
81
+ # Specify a set of tokenizer-specific options. Each tokenizer may (or may
82
+ # not) publish any options, but if a tokenizer does those options may be
83
+ # used to specify optional behavior.
84
+ def set( opts={} )
85
+ ( @options ||= Hash.new ).update opts
86
+ end
87
+
88
+ # Get the value of the specified option.
89
+ def option(opt)
90
+ @options ? @options[opt] : nil
91
+ end
92
+
93
+ private
94
+
95
+ EOL = /(?=\r\n?|\n|$)/
96
+
97
+ # A convenience for delegating method calls to the scanner.
98
+ def self.delegate( sym )
99
+ define_method( sym ) { |*a| @text.__send__( sym, *a ) }
100
+ end
101
+
102
+ delegate :bol?
103
+ delegate :eos?
104
+ delegate :scan
105
+ delegate :scan_until
106
+ delegate :check
107
+ delegate :check_until
108
+ delegate :getch
109
+ delegate :matched
110
+ delegate :pre_match
111
+ delegate :peek
112
+ delegate :pos
113
+
114
+ # Access the n-th subgroup from the most recent match.
115
+ def subgroup(n)
116
+ @text[n]
117
+ end
118
+
119
+ # Append the given data to the currently active chunk.
120
+ def append( data )
121
+ @chunk << data
122
+ end
123
+
124
+ # Request that a new group be started. If the current group is the same
125
+ # as the group being requested, a new group will not be created. If a new
126
+ # group is created and the current chunk is not empty, the chunk's
127
+ # contents will be yielded to the client as a token, and then cleared.
128
+ #
129
+ # After the new group is started, if +data+ is non-nil it will be appended
130
+ # to the chunk.
131
+ def start_group( gr, data=nil )
132
+ flush_chunk if gr != @group
133
+ @group = gr
134
+ @chunk << data if data
135
+ end
136
+
137
+ def start_region( gr, data=nil )
138
+ flush_chunk
139
+ @group = gr
140
+ @callback.call( Token.new( data||"", @group, :region_open ) )
141
+ end
142
+
143
+ def end_region( gr, data=nil )
144
+ flush_chunk
145
+ @group = gr
146
+ @callback.call( Token.new( data||"", @group, :region_close ) )
147
+ end
148
+
149
+ def flush_chunk
150
+ @callback.call( Token.new( @chunk, @group ) ) unless @chunk.empty?
151
+ @chunk = ""
152
+ end
153
+
154
+ def subtokenize( syntax, text )
155
+ tokenizer = Syntax.load( syntax )
156
+ tokenizer.set @options if @options
157
+ flush_chunk
158
+ tokenizer.tokenize( text, &@callback )
159
+ end
160
+ end
161
+
162
+
163
+ # A default tokenizer for handling syntaxes that are not explicitly handled
164
+ # elsewhere. It simply yields the given text as a single token.
165
+ class Default
166
+
167
+ # Yield the given text as a single token.
168
+ def tokenize( text )
169
+ yield Token.new( text, :normal )
170
+ end
171
+
172
+ end
173
+
174
+ # A hash for registering syntax implementations.
175
+ SYNTAX = Hash.new( Default )
176
+
177
+ # Load the implementation of the requested syntax. If the syntax cannot be
178
+ # found, or if it cannot be loaded for whatever reason, the Default syntax
179
+ # handler will be returned.
180
+ def load( syntax )
181
+ begin
182
+ require_relative "lang/#{syntax}"
183
+ rescue LoadError
184
+ end
185
+ SYNTAX[ syntax ].new
186
+ end
187
+ module_function :load
188
+
189
+ # Return an array of the names of supported syntaxes.
190
+ def all
191
+ lang_dir = File.join(File.dirname(__FILE__), "syntax", "lang")
192
+ Dir["#{lang_dir}/*.rb"].map { |path| File.basename(path, ".rb") }
193
+ end
194
+ module_function :all
195
+
196
+
197
+ end
@@ -0,0 +1,316 @@
1
+
2
+ module HH::Syntax
3
+
4
+ # A tokenizer for the Ruby language. It recognizes all common syntax
5
+ # (and some less common syntax) but because it is not a true lexer, it
6
+ # will make mistakes on some ambiguous cases.
7
+ class Ruby < Tokenizer
8
+
9
+ # The list of all identifiers recognized as keywords.
10
+ KEYWORDS =
11
+ %w{if then elsif else end begin do rescue ensure while for
12
+ class module def yield raise until unless and or not when
13
+ case super undef break next redo retry in return alias
14
+ defined?}
15
+
16
+ # Perform ruby-specific setup
17
+ def setup
18
+ @selector = false
19
+ @allow_operator = false
20
+ @heredocs = []
21
+ end
22
+
23
+ # Step through a single iteration of the tokenization process.
24
+ def step
25
+ case
26
+ when bol? && check( /=begin/ )
27
+ start_group( :comment, scan_until( /^=end#{EOL}/ ) )
28
+ when bol? && check( /__END__#{EOL}/ )
29
+ start_group( :comment, scan_until( /\Z/ ) )
30
+ else
31
+ case
32
+ when check( /def\s+/ )
33
+ start_group :keyword, scan( /def\s+/ )
34
+ start_group :method, scan_until( /(?=[;(\s]|#{EOL})/ )
35
+ when check( /class\s+/ )
36
+ start_group :keyword, scan( /class\s+/ )
37
+ start_group :class, scan_until( /(?=[;\s<]|#{EOL})/ )
38
+ when check( /module\s+/ )
39
+ start_group :keyword, scan( /module\s+/ )
40
+ start_group :module, scan_until( /(?=[;\s]|#{EOL})/ )
41
+ when check( /::/ )
42
+ start_group :punct, scan(/::/)
43
+ when check( /:"/ )
44
+ start_group :symbol, scan(/:/)
45
+ scan_delimited_region :symbol, :symbol, "", true
46
+ @allow_operator = true
47
+ when check( /:'/ )
48
+ start_group :symbol, scan(/:/)
49
+ scan_delimited_region :symbol, :symbol, "", false
50
+ @allow_operator = true
51
+ when scan( /:[_a-zA-Z@$][$@\w]*[=!?]?/ )
52
+ start_group :symbol, matched
53
+ @allow_operator = true
54
+ when scan( /\?(\\[^\n\r]|[^\\\n\r\s])/ )
55
+ start_group :char, matched
56
+ @allow_operator = true
57
+ when check( /(__FILE__|__LINE__|true|false|nil|self)[?!]?/ )
58
+ if @selector || matched[-1] == ?? || matched[-1] == ?!
59
+ start_group :ident,
60
+ scan(/(__FILE__|__LINE__|true|false|nil|self)[?!]?/)
61
+ else
62
+ start_group :constant,
63
+ scan(/(__FILE__|__LINE__|true|false|nil|self)/)
64
+ end
65
+ @selector = false
66
+ @allow_operator = true
67
+ when scan(/0([bB][01]+|[oO][0-7]+|[dD][0-9]+|[xX][0-9a-fA-F]+)/)
68
+ start_group :number, matched
69
+ @allow_operator = true
70
+ else
71
+ case peek(2)
72
+ when "%r"
73
+ scan_delimited_region :punct, :regex, scan( /../ ), true
74
+ @allow_operator = true
75
+ when "%w", "%q"
76
+ scan_delimited_region :punct, :string, scan( /../ ), false
77
+ @allow_operator = true
78
+ when "%s"
79
+ scan_delimited_region :punct, :symbol, scan( /../ ), false
80
+ @allow_operator = true
81
+ when "%W", "%Q", "%x"
82
+ scan_delimited_region :punct, :string, scan( /../ ), true
83
+ @allow_operator = true
84
+ when /%[^\sa-zA-Z0-9]/
85
+ scan_delimited_region :punct, :string, scan( /./ ), true
86
+ @allow_operator = true
87
+ when "<<"
88
+ saw_word = ( chunk[-1,1] =~ /[\w!?]/ )
89
+ start_group :punct, scan( /<</ )
90
+ if saw_word
91
+ @allow_operator = false
92
+ return
93
+ end
94
+
95
+ float_right = scan( /-/ )
96
+ append "-" if float_right
97
+ if ( type = scan( /['"]/ ) )
98
+ append type
99
+ delim = scan_until( /(?=#{type})/ )
100
+ if delim.nil?
101
+ append scan_until( /\Z/ )
102
+ return
103
+ end
104
+ else
105
+ delim = scan( /\w+/ ) or return
106
+ end
107
+ start_group :constant, delim
108
+ start_group :punct, scan( /#{type}/ ) if type
109
+ @heredocs << [ float_right, type, delim ]
110
+ @allow_operator = true
111
+ else
112
+ case peek(1)
113
+ when /[\n\r]/
114
+ unless @heredocs.empty?
115
+ scan_heredoc(*@heredocs.shift)
116
+ else
117
+ start_group :normal, scan( /\s+/ )
118
+ end
119
+ @allow_operator = false
120
+ when /\s/
121
+ start_group :normal, scan( /\s+/ )
122
+ when "#"
123
+ start_group :comment, scan( /#[^\n\r]*/ )
124
+ when /[A-Z]/
125
+ start_group @selector ? :ident : :constant, scan( /\w+/ )
126
+ @allow_operator = true
127
+ when /[a-z_]/
128
+ word = scan( /\w+[?!]?/ )
129
+ if !@selector && KEYWORDS.include?( word )
130
+ start_group :keyword, word
131
+ @allow_operator = false
132
+ elsif
133
+ start_group :ident, word
134
+ @allow_operator = true
135
+ end
136
+ @selector = false
137
+ when /\d/
138
+ start_group :number,
139
+ scan( /[\d_]+(\.[\d_]+)?([eE][\d_]+)?/ )
140
+ @allow_operator = true
141
+ when '"'
142
+ scan_delimited_region :punct, :string, "", true
143
+ @allow_operator = true
144
+ when '/'
145
+ if @allow_operator
146
+ start_group :punct, scan(%r{/})
147
+ @allow_operator = false
148
+ else
149
+ scan_delimited_region :punct, :regex, "", true
150
+ @allow_operator = true
151
+ end
152
+ when "'"
153
+ scan_delimited_region :punct, :string, "", false
154
+ @allow_operator = true
155
+ when "."
156
+ dots = scan( /\.{1,3}/ )
157
+ start_group :punct, dots
158
+ @selector = ( dots.length == 1 )
159
+ when /[@]/
160
+ start_group :attribute, scan( /@{1,2}\w*/ )
161
+ @allow_operator = true
162
+ when /[$]/
163
+ start_group :global, scan(/\$/)
164
+ start_group :global, scan( /\w+|./ ) if check(/./)
165
+ @allow_operator = true
166
+ when /[-!?*\/+=<>(\[\{}:;,&|%]/
167
+ start_group :punct, scan(/./)
168
+ @allow_operator = false
169
+ when /[)\]]/
170
+ start_group :punct, scan(/./)
171
+ @allow_operator = true
172
+ else
173
+ # all else just falls through this, to prevent
174
+ # infinite loops...
175
+ append getch
176
+ end
177
+ end
178
+ end
179
+ end
180
+ end
181
+
182
+ private
183
+
184
+ # Scan a delimited region of text. This handles the simple cases (strings
185
+ # delimited with quotes) as well as the more complex cases of %-strings
186
+ # and here-documents.
187
+ #
188
+ # * +delim_group+ is the group to use to classify the delimiters of the
189
+ # region
190
+ # * +inner_group+ is the group to use to classify the contents of the
191
+ # region
192
+ # * +starter+ is the text to use as the starting delimiter
193
+ # * +exprs+ is a boolean flag indicating whether the region is an
194
+ # interpolated string or not
195
+ # * +delim+ is the text to use as the delimiter of the region. If +nil+,
196
+ # the next character will be treated as the delimiter.
197
+ # * +heredoc+ is either +false+, meaning the region is not a heredoc, or
198
+ # <tt>:flush</tt> (meaning the delimiter must be flushed left), or
199
+ # <tt>:float</tt> (meaning the delimiter doens't have to be flush left).
200
+ def scan_delimited_region( delim_group, inner_group, starter, exprs,
201
+ delim=nil, heredoc=false )
202
+ # begin
203
+ if !delim
204
+ start_group delim_group, starter
205
+ delim = scan( /./ )
206
+ append delim
207
+
208
+ delim = case delim
209
+ when '{' then '}'
210
+ when '(' then ')'
211
+ when '[' then ']'
212
+ when '<' then '>'
213
+ else delim
214
+ end
215
+ end
216
+
217
+ start_region inner_group
218
+
219
+ items = "\\\\|"
220
+ if heredoc
221
+ items << "(^"
222
+ items << '\s*' if heredoc == :float
223
+ items << "#{Regexp.escape(delim)}\s*?)#{EOL}"
224
+ else
225
+ items << "#{Regexp.escape(delim)}"
226
+ end
227
+ items << "|#(\\$|@@?|\\{)" if exprs
228
+ items = Regexp.new( items )
229
+
230
+ loop do
231
+ p = pos
232
+ match = scan_until( items )
233
+ if match.nil?
234
+ start_group inner_group, scan_until( /\Z/ )
235
+ break
236
+ else
237
+ text = pre_match[p..-1]
238
+ start_group inner_group, text if text.length > 0
239
+ case matched.strip
240
+ when "\\"
241
+ unless exprs
242
+ case peek(1)
243
+ when "'"
244
+ scan(/./)
245
+ start_group :escape, "\\'"
246
+ when "\\"
247
+ scan(/./)
248
+ start_group :escape, "\\\\"
249
+ else
250
+ start_group inner_group, "\\"
251
+ end
252
+ else
253
+ start_group :escape, "\\"
254
+ c = getch
255
+ append c
256
+ case c
257
+ when 'x'
258
+ append scan( /[a-fA-F0-9]{1,2}/ )
259
+ when /[0-7]/
260
+ append scan( /[0-7]{0,2}/ )
261
+ end
262
+ end
263
+ when delim
264
+ end_region inner_group
265
+ start_group delim_group, matched
266
+ break
267
+ when /^#/
268
+ do_highlight = (option(:expressions) == :highlight)
269
+ start_region :expr if do_highlight
270
+ start_group :expr, matched
271
+ case matched[1]
272
+ when ?{
273
+ depth = 1
274
+ content = ""
275
+ while depth > 0
276
+ p = pos
277
+ c = scan_until( /[\{}]/ )
278
+ if c.nil?
279
+ content << scan_until( /\Z/ )
280
+ break
281
+ else
282
+ depth += ( matched == "{" ? 1 : -1 )
283
+ content << pre_match[p..-1]
284
+ content << matched if depth > 0
285
+ end
286
+ end
287
+ if do_highlight
288
+ subtokenize "ruby", content
289
+ start_group :expr, "}"
290
+ else
291
+ append content + "}"
292
+ end
293
+ when ?$, ?@
294
+ append scan( /\w+/ )
295
+ end
296
+ end_region :expr if do_highlight
297
+ else raise "unexpected match on #{matched}"
298
+ end
299
+ end
300
+ end
301
+ end
302
+
303
+ # Scan a heredoc beginning at the current position.
304
+ #
305
+ # * +float+ indicates whether the delimiter may be floated to the right
306
+ # * +type+ is +nil+, a single quote, or a double quote
307
+ # * +delim+ is the delimiter to look for
308
+ def scan_heredoc(float, type, delim)
309
+ scan_delimited_region( :constant, :string, "", type != "'",
310
+ delim, float ? :float : :flush )
311
+ end
312
+ end
313
+
314
+ SYNTAX["ruby"] = Ruby
315
+
316
+ end