bijou 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. data/ChangeLog.txt +4 -0
  2. data/LICENSE.txt +58 -0
  3. data/README.txt +48 -0
  4. data/Rakefile +105 -0
  5. data/doc/INSTALL.rdoc +260 -0
  6. data/doc/README.rdoc +314 -0
  7. data/doc/releases/bijou-0.1.0.rdoc +60 -0
  8. data/examples/birthday/birthday.rb +34 -0
  9. data/examples/holiday/holiday.rb +61 -0
  10. data/examples/holiday/letterhead.txt +4 -0
  11. data/examples/holiday/signature.txt +9 -0
  12. data/examples/phishing/letter.txt +29 -0
  13. data/examples/phishing/letterhead.txt +4 -0
  14. data/examples/phishing/phishing.rb +21 -0
  15. data/examples/phishing/signature.txt +9 -0
  16. data/examples/profile/profile.rb +46 -0
  17. data/lib/bijou.rb +15 -0
  18. data/lib/bijou/backend.rb +542 -0
  19. data/lib/bijou/cgi/adapter.rb +201 -0
  20. data/lib/bijou/cgi/handler.rb +5 -0
  21. data/lib/bijou/cgi/request.rb +37 -0
  22. data/lib/bijou/common.rb +12 -0
  23. data/lib/bijou/component.rb +108 -0
  24. data/lib/bijou/config.rb +60 -0
  25. data/lib/bijou/console/adapter.rb +167 -0
  26. data/lib/bijou/console/handler.rb +4 -0
  27. data/lib/bijou/console/request.rb +26 -0
  28. data/lib/bijou/context.rb +431 -0
  29. data/lib/bijou/diagnostics.rb +87 -0
  30. data/lib/bijou/errorformatter.rb +322 -0
  31. data/lib/bijou/exception.rb +39 -0
  32. data/lib/bijou/filters.rb +107 -0
  33. data/lib/bijou/httprequest.rb +108 -0
  34. data/lib/bijou/httpresponse.rb +268 -0
  35. data/lib/bijou/lexer.rb +513 -0
  36. data/lib/bijou/minicgi.rb +159 -0
  37. data/lib/bijou/parser.rb +1026 -0
  38. data/lib/bijou/processor.rb +404 -0
  39. data/lib/bijou/prstringio.rb +400 -0
  40. data/lib/bijou/webrick/adapter.rb +174 -0
  41. data/lib/bijou/webrick/handler.rb +32 -0
  42. data/lib/bijou/webrick/request.rb +45 -0
  43. data/script/cgi.rb +25 -0
  44. data/script/console.rb +7 -0
  45. data/script/server.rb +7 -0
  46. data/test/t1.cfg +5 -0
  47. data/test/tc_config.rb +26 -0
  48. data/test/tc_filter.rb +25 -0
  49. data/test/tc_lexer.rb +120 -0
  50. data/test/tc_response.rb +103 -0
  51. data/test/tc_ruby.rb +62 -0
  52. data/test/tc_stack.rb +50 -0
  53. metadata +121 -0
@@ -0,0 +1,513 @@
1
+ #
2
+ # Copyright (c) 2007-2008 Todd Lucas. All rights reserved.
3
+ #
4
+ # lexer.rb - The lexer classes used by the parser
5
+ #
6
+ require 'bijou/common'
7
+ require 'bijou/diagnostics.rb'
8
+
9
+ module Bijou
10
+ module Parse
11
+
12
+ class Token
13
+ Null = 0
14
+ TagOpen = 1
15
+ TagClose = 2
16
+ Char = 3
17
+ String = 4
18
+ Operator = 5
19
+ end
20
+
21
+ #
22
+ # Contains operations and state that is shared between the different lexers.
23
+ # This simplifies the process of tracking the line and column numbers of the
24
+ # input files. Also maintains a lookahead list of characters so the lexers
25
+ # don't have to. This list is only used for named tags, which are reserved
26
+ # words, so its length may be constrained.
27
+ #
28
+ class LexerInput
29
+ MaxLookahead = 15 # Must be > than max name in named tags (<%name>)
30
+
31
+ def initialize(file, diagnostics)
32
+ @diagnostics = diagnostics
33
+ @file = file
34
+ @character = 0
35
+ @line = 1
36
+ @column = 1
37
+ @lookahead = []
38
+ @columns = []
39
+ end
40
+
41
+ def close
42
+ @file.close
43
+ end
44
+
45
+ attr_reader :diagnostics, :file
46
+
47
+ def character=(n)
48
+ @character = n
49
+ end
50
+ def character
51
+ @character
52
+ end
53
+
54
+ def line=(n)
55
+ @line = n
56
+ end
57
+ def line
58
+ @line
59
+ end
60
+
61
+ def column
62
+ @column
63
+ end
64
+
65
+ def getc()
66
+ @character += 1
67
+ ch = @file.getc
68
+ #puts "getc #{@character}: #{ch.chr}"
69
+ if ch == 10
70
+ @line += 1
71
+ @columns.push(@column)
72
+ if @columns.length > MaxLookahead
73
+ @columns.shift
74
+ end
75
+ @column = 1
76
+ elsif ch == 9
77
+ @column += 2 # REVIEW: We assume tabs of two spaces
78
+ else
79
+ @column += 1
80
+ end
81
+ @lookahead.push(ch)
82
+ if @lookahead.length > MaxLookahead
83
+ @lookahead.shift
84
+ end
85
+ return ch
86
+ end
87
+
88
+ def ungetc(ch)
89
+ #puts "ungetc #{@character}: #{ch.chr}"
90
+ if ch == 10
91
+ @line -= 1
92
+ if @columns.length > 0
93
+ @column = @columns.pop
94
+ else
95
+ raise "column lookahead underflow"
96
+ end
97
+ end
98
+ @character -= 1
99
+ @file.ungetc(ch)
100
+ end
101
+
102
+ # Pop a character from the lookahead list back to the file stream.
103
+ def pop
104
+ if @lookahead.length > 0
105
+ ungetc(@lookahead.pop)
106
+ else
107
+ raise "lookahead underflow"
108
+ end
109
+ end
110
+ end
111
+
112
+ #
113
+ # The base class shared by the lexers. It encapsulates common functionality,
114
+ # such as token management and error reporting.
115
+ #
116
+ class Lexer
117
+ attr_reader :input
118
+
119
+ def initialize(input)
120
+ @input = input
121
+ @token = nil
122
+ @text = ''
123
+ # One token lookahead for LL(1)
124
+ @prev_token = nil
125
+ @prev_text = ''
126
+ @next_token = nil
127
+ @next_text = ''
128
+ end
129
+
130
+ def token
131
+ @token
132
+ end
133
+
134
+ def text
135
+ @text
136
+ end
137
+
138
+ # The current line number being scanned, used for diagnostics.
139
+ def line
140
+ @input.line
141
+ end
142
+
143
+ # The current column number being scanned, used for diagnostics.
144
+ def column
145
+ @input.column
146
+ end
147
+
148
+ # Returns the next character from the input stream.
149
+ def getc()
150
+ @input.getc
151
+ end
152
+
153
+ # Used for backtracking, puts the most recently removed character
154
+ # back into the input stream.
155
+ def ungetc(ch)
156
+ @input.ungetc(ch)
157
+ end
158
+
159
+ # Used for backtracking, puts the most recently removed character
160
+ # back into the input stream. The character was automatically
161
+ # buffered when getc was called.
162
+ def pop()
163
+ @input.pop
164
+ end
165
+
166
+ def diagnostic(m, l, c)
167
+ if !l; l = line; end
168
+ if !c; c = column; end
169
+ m.at(l, c)
170
+ m
171
+ end
172
+
173
+ def warning(s, l=nil, c=nil)
174
+ m = Bijou::Parse::Warning.new
175
+ m << s
176
+ @input.diagnostics.add_warning(diagnostic(m, l, c))
177
+ end
178
+
179
+ def error(s, l=nil, c=nil)
180
+ m = Bijou::Parse::Error.new
181
+ m << s
182
+ @input.diagnostics.add_error(diagnostic(m, l, c))
183
+ end
184
+
185
+ def error(s)
186
+ m = Bijou::Parse::Error.new
187
+ m.at(line, column)
188
+ m << s
189
+ @input.diagnostics.add(m)
190
+ end
191
+
192
+ def warnings()
193
+ @input.diagnostics.warnings
194
+ end
195
+
196
+ def errors()
197
+ @input.diagnostics.errors
198
+ end
199
+
200
+ def push_token(token, text)
201
+ @prev_token = @token
202
+ @prev_text = @text
203
+ @token = token
204
+ @text = text
205
+ @next_token = nil
206
+ @next_text = ''
207
+ @token
208
+ end
209
+
210
+ # Used for backtracking by the one token
211
+ def pop_token
212
+ @next_token = @token
213
+ @next_text = @text
214
+ @token = @prev_token
215
+ @text = @prev_text
216
+ @prev_token = nil
217
+ @prev_text = ''
218
+ @token
219
+ end
220
+
221
+ # Used after a pop operation
222
+ def shift_token
223
+ push_token @next_token, @next_text
224
+ end
225
+
226
+ def prev_token
227
+ @prev_token
228
+ end
229
+
230
+ def peek_token
231
+ @next_token
232
+ end
233
+
234
+ def is_token_buffered
235
+ @next_token ? true : false
236
+ end
237
+
238
+ def next_token
239
+ if is_token_buffered
240
+ shift_token
241
+ else
242
+ nil
243
+ end
244
+ end
245
+
246
+ def set_string_token(startToken, double)
247
+ start_line = @line
248
+ start_column = @column
249
+
250
+ buf = startToken # Raw string
251
+
252
+ while ch0 = getc
253
+ ch = ch0.chr
254
+ buf << ch
255
+
256
+ if ch == "\\"
257
+ # Ignore the next character, but preserve it.
258
+ ch1 = getc
259
+ buf << ch1.chr
260
+ elsif ch == '"'
261
+ if double
262
+ return push_token(Token::String, buf)
263
+ end
264
+ elsif ch == "'"
265
+ if !double
266
+ return push_token(Token::String, buf)
267
+ end
268
+ end
269
+ # push_token(Token::Char, buf)
270
+ end
271
+
272
+ warning("unterminated string literal", start_line, start_column)
273
+ return nil
274
+ end
275
+ end
276
+
277
+ class TextLexer < Lexer
278
+ attr_accessor :tokenize_arguments
279
+
280
+ def initialize(input)
281
+ super
282
+ @tokenize_arguments = false
283
+ end
284
+
285
+ def next_token()
286
+ if is_token_buffered
287
+ return shift_token
288
+ end
289
+
290
+ while ch0 = getc
291
+ ch = ch0.chr
292
+
293
+ if ch == '"'
294
+ if @tokenize_arguments
295
+ return set_string_token('"', true)
296
+ end
297
+ elsif ch == "'"
298
+ if @tokenize_arguments
299
+ return set_string_token("'", false)
300
+ end
301
+ elsif ch == '='
302
+ if @tokenize_arguments
303
+ ch1 = getc
304
+ if !ch1
305
+ # End of stream.
306
+ return push_token(Token::Char, ch)
307
+ elsif ch1.chr == '>'
308
+ return push_token(Token::Operator, "=>")
309
+ end
310
+ end
311
+ elsif ch == '<'
312
+ ch1 = getc
313
+ if !ch1
314
+ # End of stream.
315
+ return push_token(Token::Char, ch)
316
+ elsif ch1.chr == '%'
317
+ ch2 = getc
318
+ if ch2.chr == '='
319
+ return push_token(Token::TagOpen, "<%=")
320
+ elsif ch2.chr == '!'
321
+ return push_token(Token::TagOpen, "<%!")
322
+ end
323
+ ungetc(ch2)
324
+ return push_token(Token::TagOpen, "<%")
325
+ elsif ch1.chr == '&'
326
+ return push_token(Token::TagOpen, "<&")
327
+ elsif ch1.chr == '/'
328
+ ch2 = getc
329
+ if ch2.chr == '%'
330
+ return push_token(Token::TagOpen, "</%")
331
+ end
332
+ ungetc(ch2)
333
+ end
334
+ ungetc(ch1)
335
+ end
336
+ return push_token(Token::Char, ch)
337
+ end
338
+ nil
339
+ end
340
+ end
341
+
342
+ class TagType
343
+ Directive = 0 # <%! ... %>
344
+ Output = 1 # <%= ... %>
345
+ Inline = 2 # <% ... %>
346
+ Named = 3 # <%name ... > or </%name ... >
347
+ Call = 4 # <& name ... >
348
+ end
349
+
350
+ class TagLexer < Lexer
351
+ attr_accessor :tokenize_arguments
352
+
353
+ def initialize(input)
354
+ super(input)
355
+ @startToken = nil
356
+ @tagType = nil
357
+ @tokenize_arguments = false
358
+ end
359
+
360
+ def parse_string(stringType)
361
+ @lexer = @stringLexer
362
+ @lexer.type = stringType
363
+
364
+ while tok = @lexer.next_token
365
+ # print @lexer.text # print before switching lexers
366
+ if tok == Token::String
367
+ # print "<" + @lexer.text + ">---"
368
+ # print @lexer.text
369
+ @backend.tagString(@lexer.unquoted_string, @lexer.type)
370
+ return @lexer.raw_string
371
+ end
372
+ end
373
+ end
374
+
375
+ def start(tok, type)
376
+ @startToken = tok
377
+ @tagType = type
378
+ end
379
+
380
+ def next_token()
381
+ if is_token_buffered
382
+ return shift_token
383
+ end
384
+
385
+ while ch0 = getc
386
+ ch = ch0.chr
387
+ #puts "Read: '#{ch}' #{@tagType}"
388
+
389
+ if ch == '"'
390
+ return set_string_token('"', true)
391
+ elsif ch == "'"
392
+ return set_string_token("'", false)
393
+ elsif ch == '='
394
+ if @tokenize_arguments
395
+ ch1 = getc
396
+ if ch1.chr == '>'
397
+ return push_token(Token::Operator, "=>")
398
+ end
399
+ end
400
+ elsif ch == '%'
401
+ ch1 = getc
402
+ if ch1.chr == '>'
403
+ # if @startToken == '<%=' || @startToken == '<%' ||
404
+ # @startToken == '</%' || @startToken == '<%!'
405
+ return push_token(Token::TagClose, "%>")
406
+ # else
407
+ # warning("unexpected '%>' tag close")
408
+ # end
409
+ end
410
+ ungetc(ch1)
411
+ elsif ch == '&'
412
+ ch1 = getc
413
+ if ch1.chr == '>'
414
+ # if @startToken == '<&'
415
+ return push_token(Token::TagClose, "&>")
416
+ # else
417
+ # warning("unexpected '&>' tag close")
418
+ # end
419
+ end
420
+ ungetc(ch1)
421
+ elsif ch == '>'
422
+ if @tagType == TagType::Named ||
423
+ @tagType == TagType::Directive
424
+ return push_token(Token::TagClose, ">")
425
+ else
426
+ # For inline and calls, we ignore this case, because it may
427
+ # be '=>' '>=', etc., which are valid expressions.
428
+ # REVIEW: This can cause problems.
429
+ # BUGBUG: When parsing <%init>, for example, we haven't set the
430
+ # token type because we don't know it yet.
431
+ # print("#{@startToken} #{@tagType} ***")
432
+ end
433
+ end
434
+ return push_token(Token::Char, ch)
435
+ end
436
+ nil
437
+ end
438
+ end
439
+
440
+ #
441
+ # The string lexer is used to switch the parser into a mode in which it can
442
+ # intelligently ignore (or process) Ruby strings, including strings which
443
+ # have embedded tag close sequences like '>' and '%>'.
444
+ #
445
+ class StringLexer < Lexer
446
+ def initialize(input)
447
+ super(input)
448
+ @string = ""
449
+ end
450
+
451
+ class Type
452
+ Single = 1
453
+ Double = 2
454
+ end
455
+
456
+ def type=(t)
457
+ @type = t
458
+ @string = '' # reinitialize
459
+ end
460
+
461
+ def type
462
+ @type
463
+ end
464
+
465
+ def unquoted_string
466
+ @string
467
+ end
468
+
469
+ def raw_string
470
+ if type == StringLexer::Type::Double
471
+ "\"#{@string}\""
472
+ else
473
+ "'#{@string}'"
474
+ end
475
+ end
476
+
477
+ def push_token(token, text)
478
+ if token == Token::Char
479
+ # print "(#{token}, #{text})"
480
+ @string << text
481
+ end
482
+ super(token, text)
483
+ end
484
+
485
+ def next_token()
486
+ if is_token_buffered
487
+ return shift_token
488
+ end
489
+
490
+ while ch0 = getc
491
+ ch = ch0.chr
492
+
493
+ if ch == "\\"
494
+ push_token(Token::Char, ch)
495
+ ch1 = getc
496
+ return push_token(Token::Char, ch1.chr)
497
+ elsif ch == '"'
498
+ if @type == Type::Double
499
+ return push_token(Token::String, ch)
500
+ end
501
+ elsif ch == "'"
502
+ if @type == Type::Single
503
+ return push_token(Token::String, ch)
504
+ end
505
+ end
506
+ return push_token(Token::Char, ch)
507
+ end
508
+ nil
509
+ end
510
+ end
511
+
512
+ end # module Parse
513
+ end # module Bijou