bijou 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/ChangeLog.txt +4 -0
- data/LICENSE.txt +58 -0
- data/README.txt +48 -0
- data/Rakefile +105 -0
- data/doc/INSTALL.rdoc +260 -0
- data/doc/README.rdoc +314 -0
- data/doc/releases/bijou-0.1.0.rdoc +60 -0
- data/examples/birthday/birthday.rb +34 -0
- data/examples/holiday/holiday.rb +61 -0
- data/examples/holiday/letterhead.txt +4 -0
- data/examples/holiday/signature.txt +9 -0
- data/examples/phishing/letter.txt +29 -0
- data/examples/phishing/letterhead.txt +4 -0
- data/examples/phishing/phishing.rb +21 -0
- data/examples/phishing/signature.txt +9 -0
- data/examples/profile/profile.rb +46 -0
- data/lib/bijou.rb +15 -0
- data/lib/bijou/backend.rb +542 -0
- data/lib/bijou/cgi/adapter.rb +201 -0
- data/lib/bijou/cgi/handler.rb +5 -0
- data/lib/bijou/cgi/request.rb +37 -0
- data/lib/bijou/common.rb +12 -0
- data/lib/bijou/component.rb +108 -0
- data/lib/bijou/config.rb +60 -0
- data/lib/bijou/console/adapter.rb +167 -0
- data/lib/bijou/console/handler.rb +4 -0
- data/lib/bijou/console/request.rb +26 -0
- data/lib/bijou/context.rb +431 -0
- data/lib/bijou/diagnostics.rb +87 -0
- data/lib/bijou/errorformatter.rb +322 -0
- data/lib/bijou/exception.rb +39 -0
- data/lib/bijou/filters.rb +107 -0
- data/lib/bijou/httprequest.rb +108 -0
- data/lib/bijou/httpresponse.rb +268 -0
- data/lib/bijou/lexer.rb +513 -0
- data/lib/bijou/minicgi.rb +159 -0
- data/lib/bijou/parser.rb +1026 -0
- data/lib/bijou/processor.rb +404 -0
- data/lib/bijou/prstringio.rb +400 -0
- data/lib/bijou/webrick/adapter.rb +174 -0
- data/lib/bijou/webrick/handler.rb +32 -0
- data/lib/bijou/webrick/request.rb +45 -0
- data/script/cgi.rb +25 -0
- data/script/console.rb +7 -0
- data/script/server.rb +7 -0
- data/test/t1.cfg +5 -0
- data/test/tc_config.rb +26 -0
- data/test/tc_filter.rb +25 -0
- data/test/tc_lexer.rb +120 -0
- data/test/tc_response.rb +103 -0
- data/test/tc_ruby.rb +62 -0
- data/test/tc_stack.rb +50 -0
- metadata +121 -0
data/lib/bijou/lexer.rb
ADDED
@@ -0,0 +1,513 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2007-2008 Todd Lucas. All rights reserved.
|
3
|
+
#
|
4
|
+
# lexer.rb - The lexer classes used by the parser
|
5
|
+
#
|
6
|
+
require 'bijou/common'
|
7
|
+
require 'bijou/diagnostics.rb'
|
8
|
+
|
9
|
+
module Bijou
|
10
|
+
module Parse
|
11
|
+
|
12
|
+
class Token
|
13
|
+
Null = 0
|
14
|
+
TagOpen = 1
|
15
|
+
TagClose = 2
|
16
|
+
Char = 3
|
17
|
+
String = 4
|
18
|
+
Operator = 5
|
19
|
+
end
|
20
|
+
|
21
|
+
#
|
22
|
+
# Contains operations and state that is shared between the different lexers.
|
23
|
+
# This simplifies the process of tracking the line and column numbers of the
|
24
|
+
# input files. Also maintains a lookahead list of characters so the lexers
|
25
|
+
# don't have to. This list is only used for named tags, which are reserved
|
26
|
+
# words, so its length may be constrained.
|
27
|
+
#
|
28
|
+
class LexerInput
|
29
|
+
MaxLookahead = 15 # Must be > than max name in named tags (<%name>)
|
30
|
+
|
31
|
+
def initialize(file, diagnostics)
|
32
|
+
@diagnostics = diagnostics
|
33
|
+
@file = file
|
34
|
+
@character = 0
|
35
|
+
@line = 1
|
36
|
+
@column = 1
|
37
|
+
@lookahead = []
|
38
|
+
@columns = []
|
39
|
+
end
|
40
|
+
|
41
|
+
def close
|
42
|
+
@file.close
|
43
|
+
end
|
44
|
+
|
45
|
+
attr_reader :diagnostics, :file
|
46
|
+
|
47
|
+
def character=(n)
|
48
|
+
@character = n
|
49
|
+
end
|
50
|
+
def character
|
51
|
+
@character
|
52
|
+
end
|
53
|
+
|
54
|
+
def line=(n)
|
55
|
+
@line = n
|
56
|
+
end
|
57
|
+
def line
|
58
|
+
@line
|
59
|
+
end
|
60
|
+
|
61
|
+
def column
|
62
|
+
@column
|
63
|
+
end
|
64
|
+
|
65
|
+
def getc()
|
66
|
+
@character += 1
|
67
|
+
ch = @file.getc
|
68
|
+
#puts "getc #{@character}: #{ch.chr}"
|
69
|
+
if ch == 10
|
70
|
+
@line += 1
|
71
|
+
@columns.push(@column)
|
72
|
+
if @columns.length > MaxLookahead
|
73
|
+
@columns.shift
|
74
|
+
end
|
75
|
+
@column = 1
|
76
|
+
elsif ch == 9
|
77
|
+
@column += 2 # REVIEW: We assume tabs of two spaces
|
78
|
+
else
|
79
|
+
@column += 1
|
80
|
+
end
|
81
|
+
@lookahead.push(ch)
|
82
|
+
if @lookahead.length > MaxLookahead
|
83
|
+
@lookahead.shift
|
84
|
+
end
|
85
|
+
return ch
|
86
|
+
end
|
87
|
+
|
88
|
+
def ungetc(ch)
|
89
|
+
#puts "ungetc #{@character}: #{ch.chr}"
|
90
|
+
if ch == 10
|
91
|
+
@line -= 1
|
92
|
+
if @columns.length > 0
|
93
|
+
@column = @columns.pop
|
94
|
+
else
|
95
|
+
raise "column lookahead underflow"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
@character -= 1
|
99
|
+
@file.ungetc(ch)
|
100
|
+
end
|
101
|
+
|
102
|
+
# Pop a character from the lookahead list back to the file stream.
|
103
|
+
def pop
|
104
|
+
if @lookahead.length > 0
|
105
|
+
ungetc(@lookahead.pop)
|
106
|
+
else
|
107
|
+
raise "lookahead underflow"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
#
|
113
|
+
# The base class shared by the lexers. It encapsulates common functionality,
|
114
|
+
# such as token management and error reporting.
|
115
|
+
#
|
116
|
+
class Lexer
|
117
|
+
attr_reader :input
|
118
|
+
|
119
|
+
def initialize(input)
|
120
|
+
@input = input
|
121
|
+
@token = nil
|
122
|
+
@text = ''
|
123
|
+
# One token lookahead for LL(1)
|
124
|
+
@prev_token = nil
|
125
|
+
@prev_text = ''
|
126
|
+
@next_token = nil
|
127
|
+
@next_text = ''
|
128
|
+
end
|
129
|
+
|
130
|
+
def token
|
131
|
+
@token
|
132
|
+
end
|
133
|
+
|
134
|
+
def text
|
135
|
+
@text
|
136
|
+
end
|
137
|
+
|
138
|
+
# The current line number being scanned, used for diagnostics.
|
139
|
+
def line
|
140
|
+
@input.line
|
141
|
+
end
|
142
|
+
|
143
|
+
# The current column number being scanned, used for diagnostics.
|
144
|
+
def column
|
145
|
+
@input.column
|
146
|
+
end
|
147
|
+
|
148
|
+
# Returns the next character from the input stream.
|
149
|
+
def getc()
|
150
|
+
@input.getc
|
151
|
+
end
|
152
|
+
|
153
|
+
# Used for backtracking, puts the most recently removed character
|
154
|
+
# back into the input stream.
|
155
|
+
def ungetc(ch)
|
156
|
+
@input.ungetc(ch)
|
157
|
+
end
|
158
|
+
|
159
|
+
# Used for backtracking, puts the most recently removed character
|
160
|
+
# back into the input stream. The character was automatically
|
161
|
+
# buffered when getc was called.
|
162
|
+
def pop()
|
163
|
+
@input.pop
|
164
|
+
end
|
165
|
+
|
166
|
+
def diagnostic(m, l, c)
|
167
|
+
if !l; l = line; end
|
168
|
+
if !c; c = column; end
|
169
|
+
m.at(l, c)
|
170
|
+
m
|
171
|
+
end
|
172
|
+
|
173
|
+
def warning(s, l=nil, c=nil)
|
174
|
+
m = Bijou::Parse::Warning.new
|
175
|
+
m << s
|
176
|
+
@input.diagnostics.add_warning(diagnostic(m, l, c))
|
177
|
+
end
|
178
|
+
|
179
|
+
def error(s, l=nil, c=nil)
|
180
|
+
m = Bijou::Parse::Error.new
|
181
|
+
m << s
|
182
|
+
@input.diagnostics.add_error(diagnostic(m, l, c))
|
183
|
+
end
|
184
|
+
|
185
|
+
def error(s)
|
186
|
+
m = Bijou::Parse::Error.new
|
187
|
+
m.at(line, column)
|
188
|
+
m << s
|
189
|
+
@input.diagnostics.add(m)
|
190
|
+
end
|
191
|
+
|
192
|
+
def warnings()
|
193
|
+
@input.diagnostics.warnings
|
194
|
+
end
|
195
|
+
|
196
|
+
def errors()
|
197
|
+
@input.diagnostics.errors
|
198
|
+
end
|
199
|
+
|
200
|
+
def push_token(token, text)
|
201
|
+
@prev_token = @token
|
202
|
+
@prev_text = @text
|
203
|
+
@token = token
|
204
|
+
@text = text
|
205
|
+
@next_token = nil
|
206
|
+
@next_text = ''
|
207
|
+
@token
|
208
|
+
end
|
209
|
+
|
210
|
+
# Used for backtracking by the one token
|
211
|
+
def pop_token
|
212
|
+
@next_token = @token
|
213
|
+
@next_text = @text
|
214
|
+
@token = @prev_token
|
215
|
+
@text = @prev_text
|
216
|
+
@prev_token = nil
|
217
|
+
@prev_text = ''
|
218
|
+
@token
|
219
|
+
end
|
220
|
+
|
221
|
+
# Used after a pop operation
|
222
|
+
def shift_token
|
223
|
+
push_token @next_token, @next_text
|
224
|
+
end
|
225
|
+
|
226
|
+
def prev_token
|
227
|
+
@prev_token
|
228
|
+
end
|
229
|
+
|
230
|
+
def peek_token
|
231
|
+
@next_token
|
232
|
+
end
|
233
|
+
|
234
|
+
def is_token_buffered
|
235
|
+
@next_token ? true : false
|
236
|
+
end
|
237
|
+
|
238
|
+
def next_token
|
239
|
+
if is_token_buffered
|
240
|
+
shift_token
|
241
|
+
else
|
242
|
+
nil
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
def set_string_token(startToken, double)
|
247
|
+
start_line = @line
|
248
|
+
start_column = @column
|
249
|
+
|
250
|
+
buf = startToken # Raw string
|
251
|
+
|
252
|
+
while ch0 = getc
|
253
|
+
ch = ch0.chr
|
254
|
+
buf << ch
|
255
|
+
|
256
|
+
if ch == "\\"
|
257
|
+
# Ignore the next character, but preserve it.
|
258
|
+
ch1 = getc
|
259
|
+
buf << ch1.chr
|
260
|
+
elsif ch == '"'
|
261
|
+
if double
|
262
|
+
return push_token(Token::String, buf)
|
263
|
+
end
|
264
|
+
elsif ch == "'"
|
265
|
+
if !double
|
266
|
+
return push_token(Token::String, buf)
|
267
|
+
end
|
268
|
+
end
|
269
|
+
# push_token(Token::Char, buf)
|
270
|
+
end
|
271
|
+
|
272
|
+
warning("unterminated string literal", start_line, start_column)
|
273
|
+
return nil
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
class TextLexer < Lexer
|
278
|
+
attr_accessor :tokenize_arguments
|
279
|
+
|
280
|
+
def initialize(input)
|
281
|
+
super
|
282
|
+
@tokenize_arguments = false
|
283
|
+
end
|
284
|
+
|
285
|
+
def next_token()
|
286
|
+
if is_token_buffered
|
287
|
+
return shift_token
|
288
|
+
end
|
289
|
+
|
290
|
+
while ch0 = getc
|
291
|
+
ch = ch0.chr
|
292
|
+
|
293
|
+
if ch == '"'
|
294
|
+
if @tokenize_arguments
|
295
|
+
return set_string_token('"', true)
|
296
|
+
end
|
297
|
+
elsif ch == "'"
|
298
|
+
if @tokenize_arguments
|
299
|
+
return set_string_token("'", false)
|
300
|
+
end
|
301
|
+
elsif ch == '='
|
302
|
+
if @tokenize_arguments
|
303
|
+
ch1 = getc
|
304
|
+
if !ch1
|
305
|
+
# End of stream.
|
306
|
+
return push_token(Token::Char, ch)
|
307
|
+
elsif ch1.chr == '>'
|
308
|
+
return push_token(Token::Operator, "=>")
|
309
|
+
end
|
310
|
+
end
|
311
|
+
elsif ch == '<'
|
312
|
+
ch1 = getc
|
313
|
+
if !ch1
|
314
|
+
# End of stream.
|
315
|
+
return push_token(Token::Char, ch)
|
316
|
+
elsif ch1.chr == '%'
|
317
|
+
ch2 = getc
|
318
|
+
if ch2.chr == '='
|
319
|
+
return push_token(Token::TagOpen, "<%=")
|
320
|
+
elsif ch2.chr == '!'
|
321
|
+
return push_token(Token::TagOpen, "<%!")
|
322
|
+
end
|
323
|
+
ungetc(ch2)
|
324
|
+
return push_token(Token::TagOpen, "<%")
|
325
|
+
elsif ch1.chr == '&'
|
326
|
+
return push_token(Token::TagOpen, "<&")
|
327
|
+
elsif ch1.chr == '/'
|
328
|
+
ch2 = getc
|
329
|
+
if ch2.chr == '%'
|
330
|
+
return push_token(Token::TagOpen, "</%")
|
331
|
+
end
|
332
|
+
ungetc(ch2)
|
333
|
+
end
|
334
|
+
ungetc(ch1)
|
335
|
+
end
|
336
|
+
return push_token(Token::Char, ch)
|
337
|
+
end
|
338
|
+
nil
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
class TagType
|
343
|
+
Directive = 0 # <%! ... %>
|
344
|
+
Output = 1 # <%= ... %>
|
345
|
+
Inline = 2 # <% ... %>
|
346
|
+
Named = 3 # <%name ... > or </%name ... >
|
347
|
+
Call = 4 # <& name ... >
|
348
|
+
end
|
349
|
+
|
350
|
+
class TagLexer < Lexer
|
351
|
+
attr_accessor :tokenize_arguments
|
352
|
+
|
353
|
+
def initialize(input)
|
354
|
+
super(input)
|
355
|
+
@startToken = nil
|
356
|
+
@tagType = nil
|
357
|
+
@tokenize_arguments = false
|
358
|
+
end
|
359
|
+
|
360
|
+
def parse_string(stringType)
|
361
|
+
@lexer = @stringLexer
|
362
|
+
@lexer.type = stringType
|
363
|
+
|
364
|
+
while tok = @lexer.next_token
|
365
|
+
# print @lexer.text # print before switching lexers
|
366
|
+
if tok == Token::String
|
367
|
+
# print "<" + @lexer.text + ">---"
|
368
|
+
# print @lexer.text
|
369
|
+
@backend.tagString(@lexer.unquoted_string, @lexer.type)
|
370
|
+
return @lexer.raw_string
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
def start(tok, type)
|
376
|
+
@startToken = tok
|
377
|
+
@tagType = type
|
378
|
+
end
|
379
|
+
|
380
|
+
def next_token()
|
381
|
+
if is_token_buffered
|
382
|
+
return shift_token
|
383
|
+
end
|
384
|
+
|
385
|
+
while ch0 = getc
|
386
|
+
ch = ch0.chr
|
387
|
+
#puts "Read: '#{ch}' #{@tagType}"
|
388
|
+
|
389
|
+
if ch == '"'
|
390
|
+
return set_string_token('"', true)
|
391
|
+
elsif ch == "'"
|
392
|
+
return set_string_token("'", false)
|
393
|
+
elsif ch == '='
|
394
|
+
if @tokenize_arguments
|
395
|
+
ch1 = getc
|
396
|
+
if ch1.chr == '>'
|
397
|
+
return push_token(Token::Operator, "=>")
|
398
|
+
end
|
399
|
+
end
|
400
|
+
elsif ch == '%'
|
401
|
+
ch1 = getc
|
402
|
+
if ch1.chr == '>'
|
403
|
+
# if @startToken == '<%=' || @startToken == '<%' ||
|
404
|
+
# @startToken == '</%' || @startToken == '<%!'
|
405
|
+
return push_token(Token::TagClose, "%>")
|
406
|
+
# else
|
407
|
+
# warning("unexpected '%>' tag close")
|
408
|
+
# end
|
409
|
+
end
|
410
|
+
ungetc(ch1)
|
411
|
+
elsif ch == '&'
|
412
|
+
ch1 = getc
|
413
|
+
if ch1.chr == '>'
|
414
|
+
# if @startToken == '<&'
|
415
|
+
return push_token(Token::TagClose, "&>")
|
416
|
+
# else
|
417
|
+
# warning("unexpected '&>' tag close")
|
418
|
+
# end
|
419
|
+
end
|
420
|
+
ungetc(ch1)
|
421
|
+
elsif ch == '>'
|
422
|
+
if @tagType == TagType::Named ||
|
423
|
+
@tagType == TagType::Directive
|
424
|
+
return push_token(Token::TagClose, ">")
|
425
|
+
else
|
426
|
+
# For inline and calls, we ignore this case, because it may
|
427
|
+
# be '=>' '>=', etc., which are valid expressions.
|
428
|
+
# REVIEW: This can cause problems.
|
429
|
+
# BUGBUG: When parsing <%init>, for example, we haven't set the
|
430
|
+
# token type because we don't know it yet.
|
431
|
+
# print("#{@startToken} #{@tagType} ***")
|
432
|
+
end
|
433
|
+
end
|
434
|
+
return push_token(Token::Char, ch)
|
435
|
+
end
|
436
|
+
nil
|
437
|
+
end
|
438
|
+
end
|
439
|
+
|
440
|
+
#
|
441
|
+
# The string lexer is used to switch the parser into a mode in which it can
|
442
|
+
# intelligently ignore (or process) Ruby strings, including strings which
|
443
|
+
# have embedded tag close sequences like '>' and '%>'.
|
444
|
+
#
|
445
|
+
class StringLexer < Lexer
|
446
|
+
def initialize(input)
|
447
|
+
super(input)
|
448
|
+
@string = ""
|
449
|
+
end
|
450
|
+
|
451
|
+
class Type
|
452
|
+
Single = 1
|
453
|
+
Double = 2
|
454
|
+
end
|
455
|
+
|
456
|
+
def type=(t)
|
457
|
+
@type = t
|
458
|
+
@string = '' # reinitialize
|
459
|
+
end
|
460
|
+
|
461
|
+
def type
|
462
|
+
@type
|
463
|
+
end
|
464
|
+
|
465
|
+
def unquoted_string
|
466
|
+
@string
|
467
|
+
end
|
468
|
+
|
469
|
+
def raw_string
|
470
|
+
if type == StringLexer::Type::Double
|
471
|
+
"\"#{@string}\""
|
472
|
+
else
|
473
|
+
"'#{@string}'"
|
474
|
+
end
|
475
|
+
end
|
476
|
+
|
477
|
+
def push_token(token, text)
|
478
|
+
if token == Token::Char
|
479
|
+
# print "(#{token}, #{text})"
|
480
|
+
@string << text
|
481
|
+
end
|
482
|
+
super(token, text)
|
483
|
+
end
|
484
|
+
|
485
|
+
def next_token()
|
486
|
+
if is_token_buffered
|
487
|
+
return shift_token
|
488
|
+
end
|
489
|
+
|
490
|
+
while ch0 = getc
|
491
|
+
ch = ch0.chr
|
492
|
+
|
493
|
+
if ch == "\\"
|
494
|
+
push_token(Token::Char, ch)
|
495
|
+
ch1 = getc
|
496
|
+
return push_token(Token::Char, ch1.chr)
|
497
|
+
elsif ch == '"'
|
498
|
+
if @type == Type::Double
|
499
|
+
return push_token(Token::String, ch)
|
500
|
+
end
|
501
|
+
elsif ch == "'"
|
502
|
+
if @type == Type::Single
|
503
|
+
return push_token(Token::String, ch)
|
504
|
+
end
|
505
|
+
end
|
506
|
+
return push_token(Token::Char, ch)
|
507
|
+
end
|
508
|
+
nil
|
509
|
+
end
|
510
|
+
end
|
511
|
+
|
512
|
+
end # module Parse
|
513
|
+
end # module Bijou
|