cgialib 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,481 @@
1
+ # File: CTokenizer.rb
2
+ # Author: Jack Herrington
3
+ # Purpose: A tokenizer specialized to look for C style tokens
4
+ # Date: 12/21/02
5
+
6
+ #require "Tokenizer"
7
+
8
+ module LanguageParser
9
+
10
+ # class : CT_State
11
+ #
12
+ # The base class state object for the C-Tokenizer state machine.
13
+
14
+ class CT_State
15
+ # Special characters that are found as delineators in C
16
+
17
+ @@specials = { ";" => 1, "," => 1, ":" => 1, "{" => 1, "}" => 1,
18
+ "(" => 1, ")" => 1, "[" => 1, "]" => 1, "%" => 1,
19
+ "+" => 1, "-" => 1, "*" => 1, "." => 1 }
20
+
21
+ # initialize( newstate, addtoken )
22
+ #
23
+ # newstate - A method to be called to change state
24
+ # addtoken - The method to be called to add a token
25
+ #
26
+ # Intializes the state object
27
+
28
+ def initialize( newstate, addtoken )
29
+
30
+ @newstate = newstate
31
+ @addtoken = addtoken
32
+
33
+ end
34
+
35
+ # next( ch )
36
+ #
37
+ # ch - The character
38
+ #
39
+ # All states should override this method. This handles a
40
+ # character from the stream. Returning true means that the
41
+ # parsing should continue to the next character. Returning false
42
+ # means the parser should stay on the current character.
43
+
44
+ def next( ch )
45
+
46
+ true
47
+
48
+ end
49
+
50
+ end
51
+
52
+ # class : CT_OldComment
53
+ #
54
+ # Handles parsing an old-style C comment (e.g. /* ... */ )
55
+
56
+ class CT_OldComment < CT_State
57
+
58
+ # initialize( newstate, addtoken )
59
+ #
60
+ # newstate - A method to be called to change state
61
+ # addtoken - The method to be called to add a token
62
+ #
63
+ # Intializes the old-style comment state object
64
+
65
+ def initialize( newstate, addtoken )
66
+
67
+ super( newstate, addtoken )
68
+
69
+ # Initialize the text with the beginning /*
70
+
71
+ @text = "/*"
72
+
73
+ # True if the last character was a star
74
+
75
+ @last_was_star = false
76
+
77
+ end
78
+
79
+ # next( ch )
80
+ #
81
+ # ch - The character
82
+ #
83
+ # Handles the character in the parsing stream
84
+
85
+ def next( ch )
86
+
87
+ # Add this character to the comment
88
+
89
+ @text += ch
90
+
91
+ # See if we have a '/' if the last character was a star.
92
+ # If that is the case then return to normal parsing
93
+ # and add the comment token to the token array.
94
+
95
+ if ( ch == "/" && @last_was_star )
96
+
97
+ @addtoken.call( CommentToken.new( @text ) )
98
+ @newstate.call( CT_NormalState )
99
+
100
+ end
101
+
102
+ # Set the last_was_star to true if we see a star
103
+
104
+ @last_was_star = ( ch == "*" )
105
+
106
+ # Continue onto the next character
107
+
108
+ true
109
+
110
+ end
111
+
112
+ end
113
+
114
+ # class : CT_NewComment
115
+ #
116
+ # State object for new style C comments (e.g. //)
117
+
118
+ class CT_NewComment < CT_State
119
+
120
+ # initialize( newstate, addtoken )
121
+ #
122
+ # newstate - A method to be called to change state
123
+ # addtoken - The method to be called to add a token
124
+ #
125
+
126
+ def initialize( newstate, addtoken )
127
+
128
+ super( newstate, addtoken )
129
+
130
+ # Initialize the text buffer with the beginning //
131
+
132
+ @text = "//"
133
+
134
+ end
135
+
136
+ # next( ch )
137
+ #
138
+ # ch - The character
139
+ #
140
+ # Handles the character in the parsing stream
141
+
142
+ def next( ch )
143
+
144
+ # Add the character to the comment text
145
+
146
+ @text += ch
147
+
148
+ # Go back to the normal state if we find a return
149
+
150
+ if ( ch == "\n" )
151
+
152
+ @addtoken.call( CommentToken.new( @text ) )
153
+ @newstate.call( CT_NormalState )
154
+
155
+ end
156
+
157
+ # Proceed to the next character
158
+
159
+ true
160
+
161
+ end
162
+
163
+ end
164
+
165
+ # class : CT_DoubleQuote
166
+ #
167
+ # Handles parsing strings
168
+
169
+ class CT_DoubleQuote < CT_State
170
+
171
+ # initialize( newstate, addtoken )
172
+ #
173
+ # newstate - A method to be called to change state
174
+ # addtoken - The method to be called to add a token
175
+ #
176
+
177
+ def initialize( newstate, addtoken )
178
+
179
+ super( newstate, addtoken )
180
+
181
+ # Start the text buffer with the beginning double quote
182
+
183
+ @text = "\""
184
+
185
+ # Set the escaped flag to false. This will go true when
186
+ # we see a '\'
187
+
188
+ @escaped = false
189
+
190
+ end
191
+
192
+ # next( ch )
193
+ #
194
+ # ch - The character
195
+ #
196
+ # Handles the character in the parsing stream
197
+
198
+ def next( ch )
199
+
200
+ # Add this character to the text buffer
201
+
202
+ @text += ch
203
+
204
+ # If the character is a double qoute and we are not
205
+ # escape then go back to the normal state and add
206
+ # the string token to the array
207
+
208
+ if ( ch == "\"" && ! @escaped )
209
+
210
+ @addtoken.call( CodeToken.new( @text ) )
211
+ @newstate.call( CT_NormalState )
212
+
213
+ end
214
+
215
+ # Set escaped to true if we see a \
216
+
217
+ @escaped = ( ch == "\\" )
218
+
219
+ # Proceed to the next character
220
+
221
+ true
222
+
223
+ end
224
+
225
+ end
226
+
227
+ # CT_WhitespaceTokenizer
228
+ #
229
+ # Handles whitespace in the character stream
230
+
231
+ class CT_WhitespaceTokenizer < CT_State
232
+
233
+ # initialize( newstate, addtoken )
234
+ #
235
+ # newstate - A method to be called to change state
236
+ # addtoken - The method to be called to add a token
237
+ #
238
+
239
+ def initialize( newstate, addtoken )
240
+
241
+ super( newstate, addtoken )
242
+
243
+ # Initialize the text buffer to blank
244
+
245
+ @text = ""
246
+
247
+ end
248
+
249
+ # next( ch )
250
+ #
251
+ # ch - The character
252
+ #
253
+ # Handles the character in the parsing stream
254
+
255
+ def next( ch )
256
+
257
+ if ( ch =~ /\s/ )
258
+
259
+ # If the character is whitespace add it to
260
+ # the buffer
261
+
262
+ @text += ch
263
+ return true
264
+
265
+ else
266
+
267
+ # Otherwise return to the normal state and
268
+ # add the token
269
+
270
+ @addtoken.call( WhitespaceToken.new( @text ) )
271
+ @newstate.call( CT_NormalState )
272
+
273
+ # Return false because we want the tokenizer
274
+ # to re-run on the current character
275
+
276
+ return false
277
+
278
+ end
279
+
280
+ end
281
+
282
+ end
283
+
284
+ # class : CT_WatingForComment
285
+ #
286
+ # Handles switching between old comments, new comments, and slashes.
287
+
288
+ class CT_WaitingForComment < CT_State
289
+
290
+ # next( ch )
291
+ #
292
+ # ch - The character
293
+ #
294
+ # Handles the character in the parsing stream
295
+
296
+ def next( ch )
297
+
298
+ # Check to see if we are looking at a new or old
299
+ # style comment
300
+
301
+ if ( ch == "*" )
302
+
303
+ @newstate.call( CT_OldComment )
304
+
305
+ elsif ( ch == "/" )
306
+
307
+ @newstate.call( CT_NewComment )
308
+
309
+ else
310
+
311
+ # Or if it was just a slash
312
+
313
+ @addtoken.call( CodeToken.new( "/" ) )
314
+ @newstate.call( CT_NormalState )
315
+
316
+ end
317
+
318
+ end
319
+
320
+ end
321
+
322
+ # class : CT_NormalState
323
+ #
324
+ # The default state machine to which all of the other states return.
325
+
326
+ class CT_NormalState < CT_State
327
+
328
+ # initialize( newstate, addtoken )
329
+ #
330
+ # newstate - A method to be called to change state
331
+ # addtoken - The method to be called to add a token
332
+ #
333
+
334
+ def initialize( newstate, addtoken )
335
+
336
+ super( newstate, addtoken )
337
+
338
+ # This normal state handles adding CodeTokens in the
339
+ # basic stream (e.g. not in a string). So we have a
340
+ # text buffer.
341
+
342
+ @text = ""
343
+
344
+ end
345
+
346
+ # next( ch )
347
+ #
348
+ # ch - The character
349
+ #
350
+ # Handles the character in the parsing stream
351
+
352
+ def next( ch )
353
+
354
+ if @@specials[ch]
355
+
356
+ # If this is a special character (e.g. ;,*,+, etc.)
357
+ # then dump the current token and add the special
358
+ # characer token
359
+
360
+ @addtoken.call( CodeToken.new( @text ) )
361
+ @text = ""
362
+
363
+ @addtoken.call( CodeToken.new( ch ) )
364
+
365
+ elsif ch == "\""
366
+
367
+ # Start the double quote state if we see a
368
+ # double quote
369
+
370
+ @addtoken.call( CodeToken.new( @text ) )
371
+ @newstate.call( CT_DoubleQuote )
372
+
373
+ elsif ch == "/"
374
+
375
+ # Start the comment switcher state if we
376
+ # see a slash
377
+
378
+ @addtoken.call( CodeToken.new( @text ) )
379
+ @newstate.call( CT_WaitingForComment )
380
+
381
+ elsif ch =~ /\s/
382
+
383
+ # Move into the whitespace state if we
384
+ # see whitespace. Return true to re-run
385
+ # the parser on this character.
386
+
387
+ @addtoken.call( CodeToken.new( @text ) )
388
+ @newstate.call( CT_WhitespaceTokenizer )
389
+ return false
390
+
391
+ else
392
+
393
+ # Otherwise add this character to the buffer
394
+
395
+ @text += ch
396
+
397
+ end
398
+
399
+ # Continue onto the next character
400
+
401
+ true
402
+
403
+ end
404
+
405
+ end
406
+
407
+ # class : CTokenizer
408
+ #
409
+ # The main entry class that parses C text into a set of tokens
410
+
411
+ class CTokenizer < Tokenizer
412
+
413
+ # parse( text )
414
+ #
415
+ # text - The C text
416
+ #
417
+ # Parses the C text string into tokens
418
+
419
+ def parse( text )
420
+
421
+ # Set the current state to the normal state
422
+
423
+ @state = CT_NormalState.new( method( :newstate ), method( :addtoken ) )
424
+
425
+ # Iterate through the text
426
+
427
+ index = 0
428
+
429
+ while index < text.length
430
+
431
+ # Dispatch the character to the current state
432
+
433
+ if ( @state.next( text[ index ].chr() ) )
434
+
435
+ index += 1
436
+
437
+ end
438
+
439
+ end
440
+
441
+ end
442
+
443
+ protected
444
+
445
+ # newstate( classref )
446
+ #
447
+ # classref - The new static class type
448
+ #
449
+ # Called when we are requesting a change of state. This method creates the
450
+ # new state from the class reference that is passed in.
451
+
452
+ def newstate( classref )
453
+
454
+ # Sets the state to a new state based on the class
455
+ # given
456
+
457
+ @state = classref.new( method( :newstate ), method( :addtoken ) )
458
+
459
+ end
460
+
461
+ # addtoken( token )
462
+ #
463
+ # token - The new token
464
+ #
465
+ # This adds a token to the token list.
466
+
467
+ def addtoken( token )
468
+
469
+ # Adds a token to the stack. If the token text is empty
470
+ # then ignore it
471
+
472
+ return if ( token.to_s().length < 1 )
473
+
474
+ # Add the token to the array
475
+
476
+ @tokens.push( token )
477
+
478
+ end
479
+
480
+ end
481
+ end