bijou 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. data/ChangeLog.txt +4 -0
  2. data/LICENSE.txt +58 -0
  3. data/README.txt +48 -0
  4. data/Rakefile +105 -0
  5. data/doc/INSTALL.rdoc +260 -0
  6. data/doc/README.rdoc +314 -0
  7. data/doc/releases/bijou-0.1.0.rdoc +60 -0
  8. data/examples/birthday/birthday.rb +34 -0
  9. data/examples/holiday/holiday.rb +61 -0
  10. data/examples/holiday/letterhead.txt +4 -0
  11. data/examples/holiday/signature.txt +9 -0
  12. data/examples/phishing/letter.txt +29 -0
  13. data/examples/phishing/letterhead.txt +4 -0
  14. data/examples/phishing/phishing.rb +21 -0
  15. data/examples/phishing/signature.txt +9 -0
  16. data/examples/profile/profile.rb +46 -0
  17. data/lib/bijou.rb +15 -0
  18. data/lib/bijou/backend.rb +542 -0
  19. data/lib/bijou/cgi/adapter.rb +201 -0
  20. data/lib/bijou/cgi/handler.rb +5 -0
  21. data/lib/bijou/cgi/request.rb +37 -0
  22. data/lib/bijou/common.rb +12 -0
  23. data/lib/bijou/component.rb +108 -0
  24. data/lib/bijou/config.rb +60 -0
  25. data/lib/bijou/console/adapter.rb +167 -0
  26. data/lib/bijou/console/handler.rb +4 -0
  27. data/lib/bijou/console/request.rb +26 -0
  28. data/lib/bijou/context.rb +431 -0
  29. data/lib/bijou/diagnostics.rb +87 -0
  30. data/lib/bijou/errorformatter.rb +322 -0
  31. data/lib/bijou/exception.rb +39 -0
  32. data/lib/bijou/filters.rb +107 -0
  33. data/lib/bijou/httprequest.rb +108 -0
  34. data/lib/bijou/httpresponse.rb +268 -0
  35. data/lib/bijou/lexer.rb +513 -0
  36. data/lib/bijou/minicgi.rb +159 -0
  37. data/lib/bijou/parser.rb +1026 -0
  38. data/lib/bijou/processor.rb +404 -0
  39. data/lib/bijou/prstringio.rb +400 -0
  40. data/lib/bijou/webrick/adapter.rb +174 -0
  41. data/lib/bijou/webrick/handler.rb +32 -0
  42. data/lib/bijou/webrick/request.rb +45 -0
  43. data/script/cgi.rb +25 -0
  44. data/script/console.rb +7 -0
  45. data/script/server.rb +7 -0
  46. data/test/t1.cfg +5 -0
  47. data/test/tc_config.rb +26 -0
  48. data/test/tc_filter.rb +25 -0
  49. data/test/tc_lexer.rb +120 -0
  50. data/test/tc_response.rb +103 -0
  51. data/test/tc_ruby.rb +62 -0
  52. data/test/tc_stack.rb +50 -0
  53. metadata +121 -0
@@ -0,0 +1,513 @@
1
+ #
2
+ # Copyright (c) 2007-2008 Todd Lucas. All rights reserved.
3
+ #
4
+ # lexer.rb - The lexer classes used by the parser
5
+ #
6
+ require 'bijou/common'
7
+ require 'bijou/diagnostics.rb'
8
+
9
+ module Bijou
10
+ module Parse
11
+
12
+ class Token
13
+ Null = 0
14
+ TagOpen = 1
15
+ TagClose = 2
16
+ Char = 3
17
+ String = 4
18
+ Operator = 5
19
+ end
20
+
21
+ #
22
+ # Contains operations and state that is shared between the different lexers.
23
+ # This simplifies the process of tracking the line and column numbers of the
24
+ # input files. Also maintains a lookahead list of characters so the lexers
25
+ # don't have to. This list is only used for named tags, which are reserved
26
+ # words, so its length may be constrained.
27
+ #
28
+ class LexerInput
29
+ MaxLookahead = 15 # Must be > than max name in named tags (<%name>)
30
+
31
+ def initialize(file, diagnostics)
32
+ @diagnostics = diagnostics
33
+ @file = file
34
+ @character = 0
35
+ @line = 1
36
+ @column = 1
37
+ @lookahead = []
38
+ @columns = []
39
+ end
40
+
41
+ def close
42
+ @file.close
43
+ end
44
+
45
+ attr_reader :diagnostics, :file
46
+
47
+ def character=(n)
48
+ @character = n
49
+ end
50
+ def character
51
+ @character
52
+ end
53
+
54
+ def line=(n)
55
+ @line = n
56
+ end
57
+ def line
58
+ @line
59
+ end
60
+
61
+ def column
62
+ @column
63
+ end
64
+
65
+ def getc()
66
+ @character += 1
67
+ ch = @file.getc
68
+ #puts "getc #{@character}: #{ch.chr}"
69
+ if ch == 10
70
+ @line += 1
71
+ @columns.push(@column)
72
+ if @columns.length > MaxLookahead
73
+ @columns.shift
74
+ end
75
+ @column = 1
76
+ elsif ch == 9
77
+ @column += 2 # REVIEW: We assume tabs of two spaces
78
+ else
79
+ @column += 1
80
+ end
81
+ @lookahead.push(ch)
82
+ if @lookahead.length > MaxLookahead
83
+ @lookahead.shift
84
+ end
85
+ return ch
86
+ end
87
+
88
+ def ungetc(ch)
89
+ #puts "ungetc #{@character}: #{ch.chr}"
90
+ if ch == 10
91
+ @line -= 1
92
+ if @columns.length > 0
93
+ @column = @columns.pop
94
+ else
95
+ raise "column lookahead underflow"
96
+ end
97
+ end
98
+ @character -= 1
99
+ @file.ungetc(ch)
100
+ end
101
+
102
+ # Pop a character from the lookahead list back to the file stream.
103
+ def pop
104
+ if @lookahead.length > 0
105
+ ungetc(@lookahead.pop)
106
+ else
107
+ raise "lookahead underflow"
108
+ end
109
+ end
110
+ end
111
+
112
+ #
113
+ # The base class shared by the lexers. It encapsulates common functionality,
114
+ # such as token management and error reporting.
115
+ #
116
+ class Lexer
117
+ attr_reader :input
118
+
119
+ def initialize(input)
120
+ @input = input
121
+ @token = nil
122
+ @text = ''
123
+ # One token lookahead for LL(1)
124
+ @prev_token = nil
125
+ @prev_text = ''
126
+ @next_token = nil
127
+ @next_text = ''
128
+ end
129
+
130
+ def token
131
+ @token
132
+ end
133
+
134
+ def text
135
+ @text
136
+ end
137
+
138
+ # The current line number being scanned, used for diagnostics.
139
+ def line
140
+ @input.line
141
+ end
142
+
143
+ # The current column number being scanned, used for diagnostics.
144
+ def column
145
+ @input.column
146
+ end
147
+
148
+ # Returns the next character from the input stream.
149
+ def getc()
150
+ @input.getc
151
+ end
152
+
153
+ # Used for backtracking, puts the most recently removed character
154
+ # back into the input stream.
155
+ def ungetc(ch)
156
+ @input.ungetc(ch)
157
+ end
158
+
159
+ # Used for backtracking, puts the most recently removed character
160
+ # back into the input stream. The character was automatically
161
+ # buffered when getc was called.
162
+ def pop()
163
+ @input.pop
164
+ end
165
+
166
+ def diagnostic(m, l, c)
167
+ if !l; l = line; end
168
+ if !c; c = column; end
169
+ m.at(l, c)
170
+ m
171
+ end
172
+
173
+ def warning(s, l=nil, c=nil)
174
+ m = Bijou::Parse::Warning.new
175
+ m << s
176
+ @input.diagnostics.add_warning(diagnostic(m, l, c))
177
+ end
178
+
179
+ def error(s, l=nil, c=nil)
180
+ m = Bijou::Parse::Error.new
181
+ m << s
182
+ @input.diagnostics.add_error(diagnostic(m, l, c))
183
+ end
184
+
185
+ def error(s)
186
+ m = Bijou::Parse::Error.new
187
+ m.at(line, column)
188
+ m << s
189
+ @input.diagnostics.add(m)
190
+ end
191
+
192
+ def warnings()
193
+ @input.diagnostics.warnings
194
+ end
195
+
196
+ def errors()
197
+ @input.diagnostics.errors
198
+ end
199
+
200
+ def push_token(token, text)
201
+ @prev_token = @token
202
+ @prev_text = @text
203
+ @token = token
204
+ @text = text
205
+ @next_token = nil
206
+ @next_text = ''
207
+ @token
208
+ end
209
+
210
+ # Used for backtracking by the one token
211
+ def pop_token
212
+ @next_token = @token
213
+ @next_text = @text
214
+ @token = @prev_token
215
+ @text = @prev_text
216
+ @prev_token = nil
217
+ @prev_text = ''
218
+ @token
219
+ end
220
+
221
+ # Used after a pop operation
222
+ def shift_token
223
+ push_token @next_token, @next_text
224
+ end
225
+
226
+ def prev_token
227
+ @prev_token
228
+ end
229
+
230
+ def peek_token
231
+ @next_token
232
+ end
233
+
234
+ def is_token_buffered
235
+ @next_token ? true : false
236
+ end
237
+
238
+ def next_token
239
+ if is_token_buffered
240
+ shift_token
241
+ else
242
+ nil
243
+ end
244
+ end
245
+
246
+ def set_string_token(startToken, double)
247
+ start_line = @line
248
+ start_column = @column
249
+
250
+ buf = startToken # Raw string
251
+
252
+ while ch0 = getc
253
+ ch = ch0.chr
254
+ buf << ch
255
+
256
+ if ch == "\\"
257
+ # Ignore the next character, but preserve it.
258
+ ch1 = getc
259
+ buf << ch1.chr
260
+ elsif ch == '"'
261
+ if double
262
+ return push_token(Token::String, buf)
263
+ end
264
+ elsif ch == "'"
265
+ if !double
266
+ return push_token(Token::String, buf)
267
+ end
268
+ end
269
+ # push_token(Token::Char, buf)
270
+ end
271
+
272
+ warning("unterminated string literal", start_line, start_column)
273
+ return nil
274
+ end
275
+ end
276
+
277
+ class TextLexer < Lexer
278
+ attr_accessor :tokenize_arguments
279
+
280
+ def initialize(input)
281
+ super
282
+ @tokenize_arguments = false
283
+ end
284
+
285
+ def next_token()
286
+ if is_token_buffered
287
+ return shift_token
288
+ end
289
+
290
+ while ch0 = getc
291
+ ch = ch0.chr
292
+
293
+ if ch == '"'
294
+ if @tokenize_arguments
295
+ return set_string_token('"', true)
296
+ end
297
+ elsif ch == "'"
298
+ if @tokenize_arguments
299
+ return set_string_token("'", false)
300
+ end
301
+ elsif ch == '='
302
+ if @tokenize_arguments
303
+ ch1 = getc
304
+ if !ch1
305
+ # End of stream.
306
+ return push_token(Token::Char, ch)
307
+ elsif ch1.chr == '>'
308
+ return push_token(Token::Operator, "=>")
309
+ end
310
+ end
311
+ elsif ch == '<'
312
+ ch1 = getc
313
+ if !ch1
314
+ # End of stream.
315
+ return push_token(Token::Char, ch)
316
+ elsif ch1.chr == '%'
317
+ ch2 = getc
318
+ if ch2.chr == '='
319
+ return push_token(Token::TagOpen, "<%=")
320
+ elsif ch2.chr == '!'
321
+ return push_token(Token::TagOpen, "<%!")
322
+ end
323
+ ungetc(ch2)
324
+ return push_token(Token::TagOpen, "<%")
325
+ elsif ch1.chr == '&'
326
+ return push_token(Token::TagOpen, "<&")
327
+ elsif ch1.chr == '/'
328
+ ch2 = getc
329
+ if ch2.chr == '%'
330
+ return push_token(Token::TagOpen, "</%")
331
+ end
332
+ ungetc(ch2)
333
+ end
334
+ ungetc(ch1)
335
+ end
336
+ return push_token(Token::Char, ch)
337
+ end
338
+ nil
339
+ end
340
+ end
341
+
342
+ class TagType
343
+ Directive = 0 # <%! ... %>
344
+ Output = 1 # <%= ... %>
345
+ Inline = 2 # <% ... %>
346
+ Named = 3 # <%name ... > or </%name ... >
347
+ Call = 4 # <& name ... >
348
+ end
349
+
350
+ class TagLexer < Lexer
351
+ attr_accessor :tokenize_arguments
352
+
353
+ def initialize(input)
354
+ super(input)
355
+ @startToken = nil
356
+ @tagType = nil
357
+ @tokenize_arguments = false
358
+ end
359
+
360
+ def parse_string(stringType)
361
+ @lexer = @stringLexer
362
+ @lexer.type = stringType
363
+
364
+ while tok = @lexer.next_token
365
+ # print @lexer.text # print before switching lexers
366
+ if tok == Token::String
367
+ # print "<" + @lexer.text + ">---"
368
+ # print @lexer.text
369
+ @backend.tagString(@lexer.unquoted_string, @lexer.type)
370
+ return @lexer.raw_string
371
+ end
372
+ end
373
+ end
374
+
375
+ def start(tok, type)
376
+ @startToken = tok
377
+ @tagType = type
378
+ end
379
+
380
+ def next_token()
381
+ if is_token_buffered
382
+ return shift_token
383
+ end
384
+
385
+ while ch0 = getc
386
+ ch = ch0.chr
387
+ #puts "Read: '#{ch}' #{@tagType}"
388
+
389
+ if ch == '"'
390
+ return set_string_token('"', true)
391
+ elsif ch == "'"
392
+ return set_string_token("'", false)
393
+ elsif ch == '='
394
+ if @tokenize_arguments
395
+ ch1 = getc
396
+ if ch1.chr == '>'
397
+ return push_token(Token::Operator, "=>")
398
+ end
399
+ end
400
+ elsif ch == '%'
401
+ ch1 = getc
402
+ if ch1.chr == '>'
403
+ # if @startToken == '<%=' || @startToken == '<%' ||
404
+ # @startToken == '</%' || @startToken == '<%!'
405
+ return push_token(Token::TagClose, "%>")
406
+ # else
407
+ # warning("unexpected '%>' tag close")
408
+ # end
409
+ end
410
+ ungetc(ch1)
411
+ elsif ch == '&'
412
+ ch1 = getc
413
+ if ch1.chr == '>'
414
+ # if @startToken == '<&'
415
+ return push_token(Token::TagClose, "&>")
416
+ # else
417
+ # warning("unexpected '&>' tag close")
418
+ # end
419
+ end
420
+ ungetc(ch1)
421
+ elsif ch == '>'
422
+ if @tagType == TagType::Named ||
423
+ @tagType == TagType::Directive
424
+ return push_token(Token::TagClose, ">")
425
+ else
426
+ # For inline and calls, we ignore this case, because it may
427
+ # be '=>' '>=', etc., which are valid expressions.
428
+ # REVIEW: This can cause problems.
429
+ # BUGBUG: When parsing <%init>, for example, we haven't set the
430
+ # token type because we don't know it yet.
431
+ # print("#{@startToken} #{@tagType} ***")
432
+ end
433
+ end
434
+ return push_token(Token::Char, ch)
435
+ end
436
+ nil
437
+ end
438
+ end
439
+
440
+ #
441
+ # The string lexer is used to switch the parser into a mode in which it can
442
+ # intelligently ignore (or process) Ruby strings, including strings which
443
+ # have embedded tag close sequences like '>' and '%>'.
444
+ #
445
+ class StringLexer < Lexer
446
+ def initialize(input)
447
+ super(input)
448
+ @string = ""
449
+ end
450
+
451
+ class Type
452
+ Single = 1
453
+ Double = 2
454
+ end
455
+
456
+ def type=(t)
457
+ @type = t
458
+ @string = '' # reinitialize
459
+ end
460
+
461
+ def type
462
+ @type
463
+ end
464
+
465
+ def unquoted_string
466
+ @string
467
+ end
468
+
469
+ def raw_string
470
+ if type == StringLexer::Type::Double
471
+ "\"#{@string}\""
472
+ else
473
+ "'#{@string}'"
474
+ end
475
+ end
476
+
477
+ def push_token(token, text)
478
+ if token == Token::Char
479
+ # print "(#{token}, #{text})"
480
+ @string << text
481
+ end
482
+ super(token, text)
483
+ end
484
+
485
+ def next_token()
486
+ if is_token_buffered
487
+ return shift_token
488
+ end
489
+
490
+ while ch0 = getc
491
+ ch = ch0.chr
492
+
493
+ if ch == "\\"
494
+ push_token(Token::Char, ch)
495
+ ch1 = getc
496
+ return push_token(Token::Char, ch1.chr)
497
+ elsif ch == '"'
498
+ if @type == Type::Double
499
+ return push_token(Token::String, ch)
500
+ end
501
+ elsif ch == "'"
502
+ if @type == Type::Single
503
+ return push_token(Token::String, ch)
504
+ end
505
+ end
506
+ return push_token(Token::Char, ch)
507
+ end
508
+ nil
509
+ end
510
+ end
511
+
512
+ end # module Parse
513
+ end # module Bijou