cgialib 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,481 @@
1
+ # File: CTokenizer.rb
2
+ # Author: Jack Herrington
3
+ # Purpose: A tokenizer specialized to look for C style tokens
4
+ # Date: 12/21/02
5
+
6
+ #require "Tokenizer"
7
+
8
+ module LanguageParser
9
+
10
+ # class : CT_State
11
+ #
12
+ # The base class state object for the C-Tokenizer state machine.
13
+
14
+ class CT_State
15
+ # Special characters that are found as delineators in C
16
+
17
+ @@specials = { ";" => 1, "," => 1, ":" => 1, "{" => 1, "}" => 1,
18
+ "(" => 1, ")" => 1, "[" => 1, "]" => 1, "%" => 1,
19
+ "+" => 1, "-" => 1, "*" => 1, "." => 1 }
20
+
21
+ # initialize( newstate, addtoken )
22
+ #
23
+ # newstate - A method to be called to change state
24
+ # addtoken - The method to be called to add a token
25
+ #
26
+ # Intializes the state object
27
+
28
+ def initialize( newstate, addtoken )
29
+
30
+ @newstate = newstate
31
+ @addtoken = addtoken
32
+
33
+ end
34
+
35
+ # next( ch )
36
+ #
37
+ # ch - The character
38
+ #
39
+ # All states should override this method. This handles a
40
+ # character from the stream. Returning true means that the
41
+ # parsing should continue to the next character. Returning false
42
+ # means the parser should stay on the current character.
43
+
44
+ def next( ch )
45
+
46
+ true
47
+
48
+ end
49
+
50
+ end
51
+
52
+ # class : CT_OldComment
53
+ #
54
+ # Handles parsing an old-style C comment (e.g. /* ... */ )
55
+
56
+ class CT_OldComment < CT_State
57
+
58
+ # initialize( newstate, addtoken )
59
+ #
60
+ # newstate - A method to be called to change state
61
+ # addtoken - The method to be called to add a token
62
+ #
63
+ # Intializes the old-style comment state object
64
+
65
+ def initialize( newstate, addtoken )
66
+
67
+ super( newstate, addtoken )
68
+
69
+ # Initialize the text with the beginning /*
70
+
71
+ @text = "/*"
72
+
73
+ # True if the last character was a star
74
+
75
+ @last_was_star = false
76
+
77
+ end
78
+
79
+ # next( ch )
80
+ #
81
+ # ch - The character
82
+ #
83
+ # Handles the character in the parsing stream
84
+
85
+ def next( ch )
86
+
87
+ # Add this character to the comment
88
+
89
+ @text += ch
90
+
91
+ # See if we have a '/' if the last character was a star.
92
+ # If that is the case then return to normal parsing
93
+ # and add the comment token to the token array.
94
+
95
+ if ( ch == "/" && @last_was_star )
96
+
97
+ @addtoken.call( CommentToken.new( @text ) )
98
+ @newstate.call( CT_NormalState )
99
+
100
+ end
101
+
102
+ # Set the last_was_star to true if we see a star
103
+
104
+ @last_was_star = ( ch == "*" )
105
+
106
+ # Continue onto the next character
107
+
108
+ true
109
+
110
+ end
111
+
112
+ end
113
+
114
+ # class : CT_NewComment
115
+ #
116
+ # State object for new style C comments (e.g. //)
117
+
118
+ class CT_NewComment < CT_State
119
+
120
+ # initialize( newstate, addtoken )
121
+ #
122
+ # newstate - A method to be called to change state
123
+ # addtoken - The method to be called to add a token
124
+ #
125
+
126
+ def initialize( newstate, addtoken )
127
+
128
+ super( newstate, addtoken )
129
+
130
+ # Initialize the text buffer with the beginning //
131
+
132
+ @text = "//"
133
+
134
+ end
135
+
136
+ # next( ch )
137
+ #
138
+ # ch - The character
139
+ #
140
+ # Handles the character in the parsing stream
141
+
142
+ def next( ch )
143
+
144
+ # Add the character to the comment text
145
+
146
+ @text += ch
147
+
148
+ # Go back to the normal state if we find a return
149
+
150
+ if ( ch == "\n" )
151
+
152
+ @addtoken.call( CommentToken.new( @text ) )
153
+ @newstate.call( CT_NormalState )
154
+
155
+ end
156
+
157
+ # Proceed to the next character
158
+
159
+ true
160
+
161
+ end
162
+
163
+ end
164
+
165
+ # class : CT_DoubleQuote
166
+ #
167
+ # Handles parsing strings
168
+
169
+ class CT_DoubleQuote < CT_State
170
+
171
+ # initialize( newstate, addtoken )
172
+ #
173
+ # newstate - A method to be called to change state
174
+ # addtoken - The method to be called to add a token
175
+ #
176
+
177
+ def initialize( newstate, addtoken )
178
+
179
+ super( newstate, addtoken )
180
+
181
+ # Start the text buffer with the beginning double quote
182
+
183
+ @text = "\""
184
+
185
+ # Set the escaped flag to false. This will go true when
186
+ # we see a '\'
187
+
188
+ @escaped = false
189
+
190
+ end
191
+
192
+ # next( ch )
193
+ #
194
+ # ch - The character
195
+ #
196
+ # Handles the character in the parsing stream
197
+
198
+ def next( ch )
199
+
200
+ # Add this character to the text buffer
201
+
202
+ @text += ch
203
+
204
+ # If the character is a double qoute and we are not
205
+ # escape then go back to the normal state and add
206
+ # the string token to the array
207
+
208
+ if ( ch == "\"" && ! @escaped )
209
+
210
+ @addtoken.call( CodeToken.new( @text ) )
211
+ @newstate.call( CT_NormalState )
212
+
213
+ end
214
+
215
+ # Set escaped to true if we see a \
216
+
217
+ @escaped = ( ch == "\\" )
218
+
219
+ # Proceed to the next character
220
+
221
+ true
222
+
223
+ end
224
+
225
+ end
226
+
227
+ # CT_WhitespaceTokenizer
228
+ #
229
+ # Handles whitespace in the character stream
230
+
231
+ class CT_WhitespaceTokenizer < CT_State
232
+
233
+ # initialize( newstate, addtoken )
234
+ #
235
+ # newstate - A method to be called to change state
236
+ # addtoken - The method to be called to add a token
237
+ #
238
+
239
+ def initialize( newstate, addtoken )
240
+
241
+ super( newstate, addtoken )
242
+
243
+ # Initialize the text buffer to blank
244
+
245
+ @text = ""
246
+
247
+ end
248
+
249
+ # next( ch )
250
+ #
251
+ # ch - The character
252
+ #
253
+ # Handles the character in the parsing stream
254
+
255
+ def next( ch )
256
+
257
+ if ( ch =~ /\s/ )
258
+
259
+ # If the character is whitespace add it to
260
+ # the buffer
261
+
262
+ @text += ch
263
+ return true
264
+
265
+ else
266
+
267
+ # Otherwise return to the normal state and
268
+ # add the token
269
+
270
+ @addtoken.call( WhitespaceToken.new( @text ) )
271
+ @newstate.call( CT_NormalState )
272
+
273
+ # Return false because we want the tokenizer
274
+ # to re-run on the current character
275
+
276
+ return false
277
+
278
+ end
279
+
280
+ end
281
+
282
+ end
283
+
284
+ # class : CT_WatingForComment
285
+ #
286
+ # Handles switching between old comments, new comments, and slashes.
287
+
288
+ class CT_WaitingForComment < CT_State
289
+
290
+ # next( ch )
291
+ #
292
+ # ch - The character
293
+ #
294
+ # Handles the character in the parsing stream
295
+
296
+ def next( ch )
297
+
298
+ # Check to see if we are looking at a new or old
299
+ # style comment
300
+
301
+ if ( ch == "*" )
302
+
303
+ @newstate.call( CT_OldComment )
304
+
305
+ elsif ( ch == "/" )
306
+
307
+ @newstate.call( CT_NewComment )
308
+
309
+ else
310
+
311
+ # Or if it was just a slash
312
+
313
+ @addtoken.call( CodeToken.new( "/" ) )
314
+ @newstate.call( CT_NormalState )
315
+
316
+ end
317
+
318
+ end
319
+
320
+ end
321
+
322
+ # class : CT_NormalState
323
+ #
324
+ # The default state machine to which all of the other states return.
325
+
326
+ class CT_NormalState < CT_State
327
+
328
+ # initialize( newstate, addtoken )
329
+ #
330
+ # newstate - A method to be called to change state
331
+ # addtoken - The method to be called to add a token
332
+ #
333
+
334
+ def initialize( newstate, addtoken )
335
+
336
+ super( newstate, addtoken )
337
+
338
+ # This normal state handles adding CodeTokens in the
339
+ # basic stream (e.g. not in a string). So we have a
340
+ # text buffer.
341
+
342
+ @text = ""
343
+
344
+ end
345
+
346
+ # next( ch )
347
+ #
348
+ # ch - The character
349
+ #
350
+ # Handles the character in the parsing stream
351
+
352
+ def next( ch )
353
+
354
+ if @@specials[ch]
355
+
356
+ # If this is a special character (e.g. ;,*,+, etc.)
357
+ # then dump the current token and add the special
358
+ # characer token
359
+
360
+ @addtoken.call( CodeToken.new( @text ) )
361
+ @text = ""
362
+
363
+ @addtoken.call( CodeToken.new( ch ) )
364
+
365
+ elsif ch == "\""
366
+
367
+ # Start the double quote state if we see a
368
+ # double quote
369
+
370
+ @addtoken.call( CodeToken.new( @text ) )
371
+ @newstate.call( CT_DoubleQuote )
372
+
373
+ elsif ch == "/"
374
+
375
+ # Start the comment switcher state if we
376
+ # see a slash
377
+
378
+ @addtoken.call( CodeToken.new( @text ) )
379
+ @newstate.call( CT_WaitingForComment )
380
+
381
+ elsif ch =~ /\s/
382
+
383
+ # Move into the whitespace state if we
384
+ # see whitespace. Return true to re-run
385
+ # the parser on this character.
386
+
387
+ @addtoken.call( CodeToken.new( @text ) )
388
+ @newstate.call( CT_WhitespaceTokenizer )
389
+ return false
390
+
391
+ else
392
+
393
+ # Otherwise add this character to the buffer
394
+
395
+ @text += ch
396
+
397
+ end
398
+
399
+ # Continue onto the next character
400
+
401
+ true
402
+
403
+ end
404
+
405
+ end
406
+
407
+ # class : CTokenizer
408
+ #
409
+ # The main entry class that parses C text into a set of tokens
410
+
411
+ class CTokenizer < Tokenizer
412
+
413
+ # parse( text )
414
+ #
415
+ # text - The C text
416
+ #
417
+ # Parses the C text string into tokens
418
+
419
+ def parse( text )
420
+
421
+ # Set the current state to the normal state
422
+
423
+ @state = CT_NormalState.new( method( :newstate ), method( :addtoken ) )
424
+
425
+ # Iterate through the text
426
+
427
+ index = 0
428
+
429
+ while index < text.length
430
+
431
+ # Dispatch the character to the current state
432
+
433
+ if ( @state.next( text[ index ].chr() ) )
434
+
435
+ index += 1
436
+
437
+ end
438
+
439
+ end
440
+
441
+ end
442
+
443
+ protected
444
+
445
+ # newstate( classref )
446
+ #
447
+ # classref - The new static class type
448
+ #
449
+ # Called when we are requesting a change of state. This method creates the
450
+ # new state from the class reference that is passed in.
451
+
452
+ def newstate( classref )
453
+
454
+ # Sets the state to a new state based on the class
455
+ # given
456
+
457
+ @state = classref.new( method( :newstate ), method( :addtoken ) )
458
+
459
+ end
460
+
461
+ # addtoken( token )
462
+ #
463
+ # token - The new token
464
+ #
465
+ # This adds a token to the token list.
466
+
467
+ def addtoken( token )
468
+
469
+ # Adds a token to the stack. If the token text is empty
470
+ # then ignore it
471
+
472
+ return if ( token.to_s().length < 1 )
473
+
474
+ # Add the token to the array
475
+
476
+ @tokens.push( token )
477
+
478
+ end
479
+
480
+ end
481
+ end