bijou 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ChangeLog.txt +4 -0
- data/LICENSE.txt +58 -0
- data/README.txt +48 -0
- data/Rakefile +105 -0
- data/doc/INSTALL.rdoc +260 -0
- data/doc/README.rdoc +314 -0
- data/doc/releases/bijou-0.1.0.rdoc +60 -0
- data/examples/birthday/birthday.rb +34 -0
- data/examples/holiday/holiday.rb +61 -0
- data/examples/holiday/letterhead.txt +4 -0
- data/examples/holiday/signature.txt +9 -0
- data/examples/phishing/letter.txt +29 -0
- data/examples/phishing/letterhead.txt +4 -0
- data/examples/phishing/phishing.rb +21 -0
- data/examples/phishing/signature.txt +9 -0
- data/examples/profile/profile.rb +46 -0
- data/lib/bijou.rb +15 -0
- data/lib/bijou/backend.rb +542 -0
- data/lib/bijou/cgi/adapter.rb +201 -0
- data/lib/bijou/cgi/handler.rb +5 -0
- data/lib/bijou/cgi/request.rb +37 -0
- data/lib/bijou/common.rb +12 -0
- data/lib/bijou/component.rb +108 -0
- data/lib/bijou/config.rb +60 -0
- data/lib/bijou/console/adapter.rb +167 -0
- data/lib/bijou/console/handler.rb +4 -0
- data/lib/bijou/console/request.rb +26 -0
- data/lib/bijou/context.rb +431 -0
- data/lib/bijou/diagnostics.rb +87 -0
- data/lib/bijou/errorformatter.rb +322 -0
- data/lib/bijou/exception.rb +39 -0
- data/lib/bijou/filters.rb +107 -0
- data/lib/bijou/httprequest.rb +108 -0
- data/lib/bijou/httpresponse.rb +268 -0
- data/lib/bijou/lexer.rb +513 -0
- data/lib/bijou/minicgi.rb +159 -0
- data/lib/bijou/parser.rb +1026 -0
- data/lib/bijou/processor.rb +404 -0
- data/lib/bijou/prstringio.rb +400 -0
- data/lib/bijou/webrick/adapter.rb +174 -0
- data/lib/bijou/webrick/handler.rb +32 -0
- data/lib/bijou/webrick/request.rb +45 -0
- data/script/cgi.rb +25 -0
- data/script/console.rb +7 -0
- data/script/server.rb +7 -0
- data/test/t1.cfg +5 -0
- data/test/tc_config.rb +26 -0
- data/test/tc_filter.rb +25 -0
- data/test/tc_lexer.rb +120 -0
- data/test/tc_response.rb +103 -0
- data/test/tc_ruby.rb +62 -0
- data/test/tc_stack.rb +50 -0
- metadata +121 -0
data/lib/bijou/lexer.rb
ADDED
@@ -0,0 +1,513 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2007-2008 Todd Lucas. All rights reserved.
|
3
|
+
#
|
4
|
+
# lexer.rb - The lexer classes used by the parser
|
5
|
+
#
|
6
|
+
require 'bijou/common'
|
7
|
+
require 'bijou/diagnostics.rb'
|
8
|
+
|
9
|
+
module Bijou
|
10
|
+
module Parse
|
11
|
+
|
12
|
+
class Token
|
13
|
+
Null = 0
|
14
|
+
TagOpen = 1
|
15
|
+
TagClose = 2
|
16
|
+
Char = 3
|
17
|
+
String = 4
|
18
|
+
Operator = 5
|
19
|
+
end
|
20
|
+
|
21
|
+
#
|
22
|
+
# Contains operations and state that is shared between the different lexers.
|
23
|
+
# This simplifies the process of tracking the line and column numbers of the
|
24
|
+
# input files. Also maintains a lookahead list of characters so the lexers
|
25
|
+
# don't have to. This list is only used for named tags, which are reserved
|
26
|
+
# words, so its length may be constrained.
|
27
|
+
#
|
28
|
+
class LexerInput
|
29
|
+
MaxLookahead = 15 # Must be > than max name in named tags (<%name>)
|
30
|
+
|
31
|
+
def initialize(file, diagnostics)
|
32
|
+
@diagnostics = diagnostics
|
33
|
+
@file = file
|
34
|
+
@character = 0
|
35
|
+
@line = 1
|
36
|
+
@column = 1
|
37
|
+
@lookahead = []
|
38
|
+
@columns = []
|
39
|
+
end
|
40
|
+
|
41
|
+
def close
|
42
|
+
@file.close
|
43
|
+
end
|
44
|
+
|
45
|
+
attr_reader :diagnostics, :file
|
46
|
+
|
47
|
+
def character=(n)
|
48
|
+
@character = n
|
49
|
+
end
|
50
|
+
def character
|
51
|
+
@character
|
52
|
+
end
|
53
|
+
|
54
|
+
def line=(n)
|
55
|
+
@line = n
|
56
|
+
end
|
57
|
+
def line
|
58
|
+
@line
|
59
|
+
end
|
60
|
+
|
61
|
+
def column
|
62
|
+
@column
|
63
|
+
end
|
64
|
+
|
65
|
+
def getc()
|
66
|
+
@character += 1
|
67
|
+
ch = @file.getc
|
68
|
+
#puts "getc #{@character}: #{ch.chr}"
|
69
|
+
if ch == 10
|
70
|
+
@line += 1
|
71
|
+
@columns.push(@column)
|
72
|
+
if @columns.length > MaxLookahead
|
73
|
+
@columns.shift
|
74
|
+
end
|
75
|
+
@column = 1
|
76
|
+
elsif ch == 9
|
77
|
+
@column += 2 # REVIEW: We assume tabs of two spaces
|
78
|
+
else
|
79
|
+
@column += 1
|
80
|
+
end
|
81
|
+
@lookahead.push(ch)
|
82
|
+
if @lookahead.length > MaxLookahead
|
83
|
+
@lookahead.shift
|
84
|
+
end
|
85
|
+
return ch
|
86
|
+
end
|
87
|
+
|
88
|
+
def ungetc(ch)
|
89
|
+
#puts "ungetc #{@character}: #{ch.chr}"
|
90
|
+
if ch == 10
|
91
|
+
@line -= 1
|
92
|
+
if @columns.length > 0
|
93
|
+
@column = @columns.pop
|
94
|
+
else
|
95
|
+
raise "column lookahead underflow"
|
96
|
+
end
|
97
|
+
end
|
98
|
+
@character -= 1
|
99
|
+
@file.ungetc(ch)
|
100
|
+
end
|
101
|
+
|
102
|
+
# Pop a character from the lookahead list back to the file stream.
|
103
|
+
def pop
|
104
|
+
if @lookahead.length > 0
|
105
|
+
ungetc(@lookahead.pop)
|
106
|
+
else
|
107
|
+
raise "lookahead underflow"
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
#
|
113
|
+
# The base class shared by the lexers. It encapsulates common functionality,
|
114
|
+
# such as token management and error reporting.
|
115
|
+
#
|
116
|
+
class Lexer
|
117
|
+
attr_reader :input
|
118
|
+
|
119
|
+
def initialize(input)
|
120
|
+
@input = input
|
121
|
+
@token = nil
|
122
|
+
@text = ''
|
123
|
+
# One token lookahead for LL(1)
|
124
|
+
@prev_token = nil
|
125
|
+
@prev_text = ''
|
126
|
+
@next_token = nil
|
127
|
+
@next_text = ''
|
128
|
+
end
|
129
|
+
|
130
|
+
def token
|
131
|
+
@token
|
132
|
+
end
|
133
|
+
|
134
|
+
def text
|
135
|
+
@text
|
136
|
+
end
|
137
|
+
|
138
|
+
# The current line number being scanned, used for diagnostics.
|
139
|
+
def line
|
140
|
+
@input.line
|
141
|
+
end
|
142
|
+
|
143
|
+
# The current column number being scanned, used for diagnostics.
|
144
|
+
def column
|
145
|
+
@input.column
|
146
|
+
end
|
147
|
+
|
148
|
+
# Returns the next character from the input stream.
|
149
|
+
def getc()
|
150
|
+
@input.getc
|
151
|
+
end
|
152
|
+
|
153
|
+
# Used for backtracking, puts the most recently removed character
|
154
|
+
# back into the input stream.
|
155
|
+
def ungetc(ch)
|
156
|
+
@input.ungetc(ch)
|
157
|
+
end
|
158
|
+
|
159
|
+
# Used for backtracking, puts the most recently removed character
|
160
|
+
# back into the input stream. The character was automatically
|
161
|
+
# buffered when getc was called.
|
162
|
+
def pop()
|
163
|
+
@input.pop
|
164
|
+
end
|
165
|
+
|
166
|
+
def diagnostic(m, l, c)
|
167
|
+
if !l; l = line; end
|
168
|
+
if !c; c = column; end
|
169
|
+
m.at(l, c)
|
170
|
+
m
|
171
|
+
end
|
172
|
+
|
173
|
+
def warning(s, l=nil, c=nil)
|
174
|
+
m = Bijou::Parse::Warning.new
|
175
|
+
m << s
|
176
|
+
@input.diagnostics.add_warning(diagnostic(m, l, c))
|
177
|
+
end
|
178
|
+
|
179
|
+
def error(s, l=nil, c=nil)
|
180
|
+
m = Bijou::Parse::Error.new
|
181
|
+
m << s
|
182
|
+
@input.diagnostics.add_error(diagnostic(m, l, c))
|
183
|
+
end
|
184
|
+
|
185
|
+
def error(s)
|
186
|
+
m = Bijou::Parse::Error.new
|
187
|
+
m.at(line, column)
|
188
|
+
m << s
|
189
|
+
@input.diagnostics.add(m)
|
190
|
+
end
|
191
|
+
|
192
|
+
def warnings()
|
193
|
+
@input.diagnostics.warnings
|
194
|
+
end
|
195
|
+
|
196
|
+
def errors()
|
197
|
+
@input.diagnostics.errors
|
198
|
+
end
|
199
|
+
|
200
|
+
def push_token(token, text)
|
201
|
+
@prev_token = @token
|
202
|
+
@prev_text = @text
|
203
|
+
@token = token
|
204
|
+
@text = text
|
205
|
+
@next_token = nil
|
206
|
+
@next_text = ''
|
207
|
+
@token
|
208
|
+
end
|
209
|
+
|
210
|
+
# Used for backtracking by the one token
|
211
|
+
def pop_token
|
212
|
+
@next_token = @token
|
213
|
+
@next_text = @text
|
214
|
+
@token = @prev_token
|
215
|
+
@text = @prev_text
|
216
|
+
@prev_token = nil
|
217
|
+
@prev_text = ''
|
218
|
+
@token
|
219
|
+
end
|
220
|
+
|
221
|
+
# Used after a pop operation
|
222
|
+
def shift_token
|
223
|
+
push_token @next_token, @next_text
|
224
|
+
end
|
225
|
+
|
226
|
+
def prev_token
|
227
|
+
@prev_token
|
228
|
+
end
|
229
|
+
|
230
|
+
def peek_token
|
231
|
+
@next_token
|
232
|
+
end
|
233
|
+
|
234
|
+
def is_token_buffered
|
235
|
+
@next_token ? true : false
|
236
|
+
end
|
237
|
+
|
238
|
+
def next_token
|
239
|
+
if is_token_buffered
|
240
|
+
shift_token
|
241
|
+
else
|
242
|
+
nil
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
def set_string_token(startToken, double)
|
247
|
+
start_line = @line
|
248
|
+
start_column = @column
|
249
|
+
|
250
|
+
buf = startToken # Raw string
|
251
|
+
|
252
|
+
while ch0 = getc
|
253
|
+
ch = ch0.chr
|
254
|
+
buf << ch
|
255
|
+
|
256
|
+
if ch == "\\"
|
257
|
+
# Ignore the next character, but preserve it.
|
258
|
+
ch1 = getc
|
259
|
+
buf << ch1.chr
|
260
|
+
elsif ch == '"'
|
261
|
+
if double
|
262
|
+
return push_token(Token::String, buf)
|
263
|
+
end
|
264
|
+
elsif ch == "'"
|
265
|
+
if !double
|
266
|
+
return push_token(Token::String, buf)
|
267
|
+
end
|
268
|
+
end
|
269
|
+
# push_token(Token::Char, buf)
|
270
|
+
end
|
271
|
+
|
272
|
+
warning("unterminated string literal", start_line, start_column)
|
273
|
+
return nil
|
274
|
+
end
|
275
|
+
end
|
276
|
+
|
277
|
+
class TextLexer < Lexer
|
278
|
+
attr_accessor :tokenize_arguments
|
279
|
+
|
280
|
+
def initialize(input)
|
281
|
+
super
|
282
|
+
@tokenize_arguments = false
|
283
|
+
end
|
284
|
+
|
285
|
+
def next_token()
|
286
|
+
if is_token_buffered
|
287
|
+
return shift_token
|
288
|
+
end
|
289
|
+
|
290
|
+
while ch0 = getc
|
291
|
+
ch = ch0.chr
|
292
|
+
|
293
|
+
if ch == '"'
|
294
|
+
if @tokenize_arguments
|
295
|
+
return set_string_token('"', true)
|
296
|
+
end
|
297
|
+
elsif ch == "'"
|
298
|
+
if @tokenize_arguments
|
299
|
+
return set_string_token("'", false)
|
300
|
+
end
|
301
|
+
elsif ch == '='
|
302
|
+
if @tokenize_arguments
|
303
|
+
ch1 = getc
|
304
|
+
if !ch1
|
305
|
+
# End of stream.
|
306
|
+
return push_token(Token::Char, ch)
|
307
|
+
elsif ch1.chr == '>'
|
308
|
+
return push_token(Token::Operator, "=>")
|
309
|
+
end
|
310
|
+
end
|
311
|
+
elsif ch == '<'
|
312
|
+
ch1 = getc
|
313
|
+
if !ch1
|
314
|
+
# End of stream.
|
315
|
+
return push_token(Token::Char, ch)
|
316
|
+
elsif ch1.chr == '%'
|
317
|
+
ch2 = getc
|
318
|
+
if ch2.chr == '='
|
319
|
+
return push_token(Token::TagOpen, "<%=")
|
320
|
+
elsif ch2.chr == '!'
|
321
|
+
return push_token(Token::TagOpen, "<%!")
|
322
|
+
end
|
323
|
+
ungetc(ch2)
|
324
|
+
return push_token(Token::TagOpen, "<%")
|
325
|
+
elsif ch1.chr == '&'
|
326
|
+
return push_token(Token::TagOpen, "<&")
|
327
|
+
elsif ch1.chr == '/'
|
328
|
+
ch2 = getc
|
329
|
+
if ch2.chr == '%'
|
330
|
+
return push_token(Token::TagOpen, "</%")
|
331
|
+
end
|
332
|
+
ungetc(ch2)
|
333
|
+
end
|
334
|
+
ungetc(ch1)
|
335
|
+
end
|
336
|
+
return push_token(Token::Char, ch)
|
337
|
+
end
|
338
|
+
nil
|
339
|
+
end
|
340
|
+
end
|
341
|
+
|
342
|
+
class TagType
|
343
|
+
Directive = 0 # <%! ... %>
|
344
|
+
Output = 1 # <%= ... %>
|
345
|
+
Inline = 2 # <% ... %>
|
346
|
+
Named = 3 # <%name ... > or </%name ... >
|
347
|
+
Call = 4 # <& name ... >
|
348
|
+
end
|
349
|
+
|
350
|
+
class TagLexer < Lexer
|
351
|
+
attr_accessor :tokenize_arguments
|
352
|
+
|
353
|
+
def initialize(input)
|
354
|
+
super(input)
|
355
|
+
@startToken = nil
|
356
|
+
@tagType = nil
|
357
|
+
@tokenize_arguments = false
|
358
|
+
end
|
359
|
+
|
360
|
+
def parse_string(stringType)
|
361
|
+
@lexer = @stringLexer
|
362
|
+
@lexer.type = stringType
|
363
|
+
|
364
|
+
while tok = @lexer.next_token
|
365
|
+
# print @lexer.text # print before switching lexers
|
366
|
+
if tok == Token::String
|
367
|
+
# print "<" + @lexer.text + ">---"
|
368
|
+
# print @lexer.text
|
369
|
+
@backend.tagString(@lexer.unquoted_string, @lexer.type)
|
370
|
+
return @lexer.raw_string
|
371
|
+
end
|
372
|
+
end
|
373
|
+
end
|
374
|
+
|
375
|
+
def start(tok, type)
|
376
|
+
@startToken = tok
|
377
|
+
@tagType = type
|
378
|
+
end
|
379
|
+
|
380
|
+
def next_token()
|
381
|
+
if is_token_buffered
|
382
|
+
return shift_token
|
383
|
+
end
|
384
|
+
|
385
|
+
while ch0 = getc
|
386
|
+
ch = ch0.chr
|
387
|
+
#puts "Read: '#{ch}' #{@tagType}"
|
388
|
+
|
389
|
+
if ch == '"'
|
390
|
+
return set_string_token('"', true)
|
391
|
+
elsif ch == "'"
|
392
|
+
return set_string_token("'", false)
|
393
|
+
elsif ch == '='
|
394
|
+
if @tokenize_arguments
|
395
|
+
ch1 = getc
|
396
|
+
if ch1.chr == '>'
|
397
|
+
return push_token(Token::Operator, "=>")
|
398
|
+
end
|
399
|
+
end
|
400
|
+
elsif ch == '%'
|
401
|
+
ch1 = getc
|
402
|
+
if ch1.chr == '>'
|
403
|
+
# if @startToken == '<%=' || @startToken == '<%' ||
|
404
|
+
# @startToken == '</%' || @startToken == '<%!'
|
405
|
+
return push_token(Token::TagClose, "%>")
|
406
|
+
# else
|
407
|
+
# warning("unexpected '%>' tag close")
|
408
|
+
# end
|
409
|
+
end
|
410
|
+
ungetc(ch1)
|
411
|
+
elsif ch == '&'
|
412
|
+
ch1 = getc
|
413
|
+
if ch1.chr == '>'
|
414
|
+
# if @startToken == '<&'
|
415
|
+
return push_token(Token::TagClose, "&>")
|
416
|
+
# else
|
417
|
+
# warning("unexpected '&>' tag close")
|
418
|
+
# end
|
419
|
+
end
|
420
|
+
ungetc(ch1)
|
421
|
+
elsif ch == '>'
|
422
|
+
if @tagType == TagType::Named ||
|
423
|
+
@tagType == TagType::Directive
|
424
|
+
return push_token(Token::TagClose, ">")
|
425
|
+
else
|
426
|
+
# For inline and calls, we ignore this case, because it may
|
427
|
+
# be '=>' '>=', etc., which are valid expressions.
|
428
|
+
# REVIEW: This can cause problems.
|
429
|
+
# BUGBUG: When parsing <%init>, for example, we haven't set the
|
430
|
+
# token type because we don't know it yet.
|
431
|
+
# print("#{@startToken} #{@tagType} ***")
|
432
|
+
end
|
433
|
+
end
|
434
|
+
return push_token(Token::Char, ch)
|
435
|
+
end
|
436
|
+
nil
|
437
|
+
end
|
438
|
+
end
|
439
|
+
|
440
|
+
#
|
441
|
+
# The string lexer is used to switch the parser into a mode in which it can
|
442
|
+
# intelligently ignore (or process) Ruby strings, including strings which
|
443
|
+
# have embedded tag close sequences like '>' and '%>'.
|
444
|
+
#
|
445
|
+
class StringLexer < Lexer
|
446
|
+
def initialize(input)
|
447
|
+
super(input)
|
448
|
+
@string = ""
|
449
|
+
end
|
450
|
+
|
451
|
+
class Type
|
452
|
+
Single = 1
|
453
|
+
Double = 2
|
454
|
+
end
|
455
|
+
|
456
|
+
def type=(t)
|
457
|
+
@type = t
|
458
|
+
@string = '' # reinitialize
|
459
|
+
end
|
460
|
+
|
461
|
+
def type
|
462
|
+
@type
|
463
|
+
end
|
464
|
+
|
465
|
+
def unquoted_string
|
466
|
+
@string
|
467
|
+
end
|
468
|
+
|
469
|
+
def raw_string
|
470
|
+
if type == StringLexer::Type::Double
|
471
|
+
"\"#{@string}\""
|
472
|
+
else
|
473
|
+
"'#{@string}'"
|
474
|
+
end
|
475
|
+
end
|
476
|
+
|
477
|
+
def push_token(token, text)
|
478
|
+
if token == Token::Char
|
479
|
+
# print "(#{token}, #{text})"
|
480
|
+
@string << text
|
481
|
+
end
|
482
|
+
super(token, text)
|
483
|
+
end
|
484
|
+
|
485
|
+
def next_token()
|
486
|
+
if is_token_buffered
|
487
|
+
return shift_token
|
488
|
+
end
|
489
|
+
|
490
|
+
while ch0 = getc
|
491
|
+
ch = ch0.chr
|
492
|
+
|
493
|
+
if ch == "\\"
|
494
|
+
push_token(Token::Char, ch)
|
495
|
+
ch1 = getc
|
496
|
+
return push_token(Token::Char, ch1.chr)
|
497
|
+
elsif ch == '"'
|
498
|
+
if @type == Type::Double
|
499
|
+
return push_token(Token::String, ch)
|
500
|
+
end
|
501
|
+
elsif ch == "'"
|
502
|
+
if @type == Type::Single
|
503
|
+
return push_token(Token::String, ch)
|
504
|
+
end
|
505
|
+
end
|
506
|
+
return push_token(Token::Char, ch)
|
507
|
+
end
|
508
|
+
nil
|
509
|
+
end
|
510
|
+
end
|
511
|
+
|
512
|
+
end # module Parse
|
513
|
+
end # module Bijou
|