rexical 1.0.5 → 1.0.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.rdoc +19 -2
- data/COPYING +129 -0
- data/DOCUMENTATION.en.rdoc +210 -214
- data/DOCUMENTATION.ja.rdoc +2 -2
- data/Manifest.txt +2 -0
- data/README.rdoc +15 -7
- data/Rakefile +2 -28
- data/bin/rex +0 -0
- data/lib/rexical/generator.rb +147 -161
- data/lib/rexical/rexcmd.rb +4 -2
- data/lib/rexical/version.rb +5 -0
- data/lib/rexical.rb +3 -8
- data/sample/calc3.tab.rb +1 -1
- data/sample/error1.rex +1 -1
- data/sample/error2.rex +2 -2
- data/sample/sample.rex +1 -1
- data/sample/sample.rex.rb +92 -100
- data/sample/sample1.rex +4 -4
- data/sample/sample2.rex +2 -2
- data/sample/xhtmlparser.rex +15 -15
- data/test/assets/test.rex +4 -0
- data/test/rex-20060125.rb +1 -1
- data/test/rex-20060511.rb +1 -1
- data/test/test_generator.rb +59 -3
- metadata +49 -68
data/README.rdoc
CHANGED
@@ -1,30 +1,33 @@
|
|
1
1
|
= Rexical
|
2
2
|
|
3
|
-
|
3
|
+
home :: http://github.com/sparklemotion/rexical/tree/master
|
4
4
|
|
5
5
|
== DESCRIPTION
|
6
6
|
|
7
|
-
Rexical is a lexical scanner generator
|
8
|
-
|
9
|
-
|
7
|
+
Rexical is a lexical scanner generator that is used with
|
8
|
+
Racc to generate Ruby programs.
|
9
|
+
Rexical is written in Ruby.
|
10
10
|
|
11
11
|
|
12
12
|
== SYNOPSIS
|
13
13
|
|
14
|
-
|
14
|
+
Several examples of Rexical grammar files are provided in
|
15
|
+
the sample directory.
|
16
|
+
|
17
|
+
Here is an example of a lexical definition:
|
15
18
|
|
16
19
|
class Sample
|
17
20
|
macro
|
18
21
|
BLANK [\ \t]+
|
19
22
|
rule
|
20
|
-
BLANK
|
23
|
+
{BLANK} # no action
|
21
24
|
\d+ { [:digit, text.to_i] }
|
22
25
|
\w+ { [:word, text] }
|
23
26
|
\n
|
24
27
|
. { [text, text] }
|
25
28
|
end
|
26
29
|
|
27
|
-
Here
|
30
|
+
Here are examples of the command line usage:
|
28
31
|
|
29
32
|
$ rex sample1.rex --stub
|
30
33
|
$ ruby sample1.rex.rb sample1.c
|
@@ -36,6 +39,9 @@ Here is the command line usage:
|
|
36
39
|
$ rex calc3.rex
|
37
40
|
$ ruby calc3.tab.rb
|
38
41
|
|
42
|
+
The description files for lexical analysis in the sample directory
|
43
|
+
are the files ending with the .rex extension.
|
44
|
+
|
39
45
|
== REQUIREMENTS
|
40
46
|
|
41
47
|
* ruby version 1.8.x or later.
|
@@ -50,3 +56,5 @@ Rexical is distributed under the terms of the GNU Lesser General
|
|
50
56
|
Public License version 2. Note that you do NOT need to follow
|
51
57
|
LGPL for your own parser (Rexical outputs). You can provide those
|
52
58
|
files under any licenses you want.
|
59
|
+
|
60
|
+
See COPYING for more details.
|
data/Rakefile
CHANGED
@@ -1,29 +1,3 @@
|
|
1
|
-
|
1
|
+
require 'minitest/test_task'
|
2
2
|
|
3
|
-
|
4
|
-
require 'hoe'
|
5
|
-
|
6
|
-
Hoe.plugin :debugging
|
7
|
-
Hoe.plugin :git
|
8
|
-
Hoe.plugins.delete :rubyforge
|
9
|
-
|
10
|
-
Hoe.spec 'rexical' do
|
11
|
-
self.readme_file = 'README.rdoc'
|
12
|
-
self.history_file = 'CHANGELOG.rdoc'
|
13
|
-
developer('Aaron Patterson', 'aaronp@rubyforge.org')
|
14
|
-
self.rubyforge_name = 'ruby-rex'
|
15
|
-
self.extra_rdoc_files = FileList['*.rdoc']
|
16
|
-
end
|
17
|
-
|
18
|
-
namespace :gem do
|
19
|
-
namespace :spec do
|
20
|
-
task :dev do
|
21
|
-
File.open("#{HOE.name}.gemspec", 'w') do |f|
|
22
|
-
HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
|
23
|
-
f.write(HOE.spec.to_ruby)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
# vim: syntax=Ruby
|
3
|
+
Minitest::TestTask.create
|
data/bin/rex
CHANGED
File without changes
|
data/lib/rexical/generator.rb
CHANGED
@@ -11,13 +11,9 @@
|
|
11
11
|
require 'strscan'
|
12
12
|
module Rexical
|
13
13
|
|
14
|
-
## ---------------------------------------------------------------------
|
15
14
|
class ParseError < StandardError ; end
|
16
15
|
|
17
|
-
## ---------------------------------------------------------------------
|
18
16
|
class Generator
|
19
|
-
|
20
|
-
## ---------------------------------------------------------------------
|
21
17
|
attr_accessor :grammar_file
|
22
18
|
attr_accessor :grammar_lines
|
23
19
|
attr_accessor :scanner_file
|
@@ -30,7 +26,6 @@ module Rexical
|
|
30
26
|
attr_accessor :independent
|
31
27
|
attr_accessor :debug
|
32
28
|
|
33
|
-
## ---------------------------------------------------------------------
|
34
29
|
def initialize(opts)
|
35
30
|
@lineno = 0
|
36
31
|
@macro = {}
|
@@ -43,22 +38,18 @@ module Rexical
|
|
43
38
|
@opt = opts
|
44
39
|
end
|
45
40
|
|
46
|
-
## ---------------------------------------------------------------------
|
47
41
|
def add_header( st )
|
48
42
|
@scanner_header += "#{st}\n"
|
49
43
|
end
|
50
44
|
|
51
|
-
## ---------------------------------------------------------------------
|
52
45
|
def add_footer( st )
|
53
46
|
@scanner_footer += "#{st}\n"
|
54
47
|
end
|
55
48
|
|
56
|
-
## ---------------------------------------------------------------------
|
57
49
|
def add_inner( st )
|
58
50
|
@scanner_inner += "#{st}\n"
|
59
51
|
end
|
60
52
|
|
61
|
-
## ---------------------------------------------------------------------
|
62
53
|
def add_option( st )
|
63
54
|
opts = st.split
|
64
55
|
opts.each do |opt|
|
@@ -69,11 +60,12 @@ module Rexical
|
|
69
60
|
@opt['--stub'] = true
|
70
61
|
when /independent/i
|
71
62
|
@opt['--independent'] = true
|
63
|
+
when /matcheos/i
|
64
|
+
@opt['--matcheos'] = true
|
72
65
|
end
|
73
66
|
end
|
74
67
|
end
|
75
68
|
|
76
|
-
## ---------------------------------------------------------------------
|
77
69
|
def add_macro( st )
|
78
70
|
ss = StringScanner.new(st)
|
79
71
|
ss.scan(/\s+/)
|
@@ -105,7 +97,6 @@ module Rexical
|
|
105
97
|
raise ParseError, "parse error in add_macro:'#{st}'"
|
106
98
|
end
|
107
99
|
|
108
|
-
## ---------------------------------------------------------------------
|
109
100
|
def add_rule( rule_state, rule_expr, rule_action=nil )
|
110
101
|
st = rule_expr.dup
|
111
102
|
@macro.each_pair do |k, e|
|
@@ -218,16 +209,11 @@ module Rexical
|
|
218
209
|
end
|
219
210
|
|
220
211
|
end # case state3
|
221
|
-
|
222
212
|
end # case state2
|
223
|
-
|
224
213
|
end # case state1
|
225
|
-
|
226
214
|
end # while
|
227
|
-
|
228
215
|
end
|
229
216
|
|
230
|
-
## ---------------------------------------------------------------------
|
231
217
|
def parse_rule(st)
|
232
218
|
st.strip!
|
233
219
|
return if st.size == 0 or st[0,1] == '#'
|
@@ -240,7 +226,6 @@ module Rexical
|
|
240
226
|
[rule_state, rule_expr, ss.post_match]
|
241
227
|
end
|
242
228
|
|
243
|
-
## ---------------------------------------------------------------------
|
244
229
|
def parse_action(st, lastmodes=[])
|
245
230
|
modes = lastmodes
|
246
231
|
mode = lastmodes[-1]
|
@@ -293,89 +278,83 @@ module Rexical
|
|
293
278
|
return modes
|
294
279
|
end
|
295
280
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
#
|
301
|
-
|
302
|
-
# from lexical definition file "%s".
|
303
|
-
#++
|
304
|
-
|
305
|
-
REX_EOT
|
306
|
-
|
307
|
-
REX_UTIL = <<-REX_EOT
|
308
|
-
require 'strscan'
|
281
|
+
REX_HEADER = <<-REX_EOT.gsub(/^ {6}/, '')
|
282
|
+
#--
|
283
|
+
# DO NOT MODIFY!!!!
|
284
|
+
# This file is automatically generated by rex %s
|
285
|
+
# from lexical definition file "%s".
|
286
|
+
#++
|
309
287
|
|
310
|
-
|
288
|
+
REX_EOT
|
311
289
|
|
312
|
-
|
313
|
-
|
314
|
-
attr_accessor :state
|
290
|
+
REX_UTIL = <<-REX_EOT
|
291
|
+
require 'strscan'
|
315
292
|
|
316
|
-
|
317
|
-
@ss = StringScanner.new(str)
|
318
|
-
@lineno = 1
|
319
|
-
@state = nil
|
320
|
-
end
|
293
|
+
class ScanError < StandardError ; end
|
321
294
|
|
322
|
-
|
323
|
-
|
324
|
-
|
295
|
+
attr_reader :lineno
|
296
|
+
attr_reader :filename
|
297
|
+
attr_accessor :state
|
325
298
|
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
299
|
+
def scan_setup(str)
|
300
|
+
@ss = StringScanner.new(str)
|
301
|
+
@lineno = 1
|
302
|
+
@state = nil
|
303
|
+
end
|
331
304
|
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
scan_setup(f.read)
|
336
|
-
end
|
337
|
-
end
|
305
|
+
def action
|
306
|
+
yield
|
307
|
+
end
|
338
308
|
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
309
|
+
def scan_str(str)
|
310
|
+
scan_setup(str)
|
311
|
+
do_parse
|
312
|
+
end
|
313
|
+
alias :scan :scan_str
|
343
314
|
|
344
|
-
|
315
|
+
def load_file( filename )
|
316
|
+
@filename = filename
|
317
|
+
File.open(filename, "r") do |f|
|
318
|
+
scan_setup(f.read)
|
319
|
+
end
|
320
|
+
end
|
345
321
|
|
346
|
-
|
322
|
+
def scan_file( filename )
|
323
|
+
load_file(filename)
|
324
|
+
do_parse
|
325
|
+
end
|
347
326
|
|
348
|
-
|
349
|
-
exit if ARGV.size != 1
|
350
|
-
filename = ARGV.shift
|
351
|
-
rex = %s.new
|
352
|
-
begin
|
353
|
-
rex.load_file filename
|
354
|
-
while token = rex.next_token
|
355
|
-
p token
|
356
|
-
end
|
357
|
-
rescue
|
358
|
-
$stderr.printf %s, rex.filename, rex.lineno, $!.message
|
359
|
-
end
|
360
|
-
end
|
361
|
-
REX_EOT
|
327
|
+
REX_EOT
|
362
328
|
|
363
|
-
|
329
|
+
REX_STUB = <<-REX_EOT
|
364
330
|
|
331
|
+
if __FILE__ == $0
|
332
|
+
exit if ARGV.size != 1
|
333
|
+
filename = ARGV.shift
|
334
|
+
rex = %s.new
|
335
|
+
begin
|
336
|
+
rex.load_file filename
|
337
|
+
while token = rex.next_token
|
338
|
+
p token
|
339
|
+
end
|
340
|
+
rescue
|
341
|
+
$stderr.printf %s, rex.filename, rex.lineno, $!.message
|
342
|
+
end
|
343
|
+
end
|
344
|
+
REX_EOT
|
365
345
|
|
366
346
|
def scanner_io
|
367
347
|
unless scanner_file = @opt['--output-file']
|
368
348
|
scanner_file = grammar_file + ".rb"
|
369
349
|
end
|
370
|
-
|
350
|
+
File.open(scanner_file, 'wb')
|
371
351
|
end
|
372
352
|
private :scanner_io
|
373
353
|
|
374
354
|
def write_scanner f = scanner_io
|
375
|
-
## scan flag
|
376
355
|
flag = ""
|
377
356
|
flag += "i" if @opt['--ignorecase']
|
378
|
-
|
357
|
+
|
379
358
|
f.printf REX_HEADER, Rexical::VERSION, grammar_file
|
380
359
|
|
381
360
|
unless @opt['--independent']
|
@@ -391,120 +370,127 @@ REX_EOT
|
|
391
370
|
f.puts "class #{@class_name} < Racc::Parser"
|
392
371
|
end
|
393
372
|
|
394
|
-
## utility method
|
395
373
|
f.print REX_UTIL
|
396
374
|
|
397
|
-
|
375
|
+
eos_check = @opt["--matcheos"] ? "" : "return if @ss.eos?"
|
398
376
|
|
377
|
+
## scanner method
|
399
378
|
f.print <<-REX_EOT
|
400
379
|
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
# skips empty actions
|
405
|
-
until token = _next_token or @ss.eos?; end
|
406
|
-
token
|
407
|
-
end
|
408
|
-
|
409
|
-
def _next_token
|
410
|
-
text = @ss.peek(1)
|
411
|
-
@lineno += 1 if text == "\\n"
|
412
|
-
token = case @state
|
413
|
-
REX_EOT
|
380
|
+
def next_token
|
381
|
+
#{eos_check}
|
414
382
|
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
383
|
+
# skips empty actions
|
384
|
+
until token = _next_token or @ss.eos?; end
|
385
|
+
token
|
386
|
+
end
|
387
|
+
|
388
|
+
def _next_token
|
389
|
+
text = @ss.peek(1)
|
390
|
+
@lineno += 1 if text == "\\n"
|
391
|
+
token = case @state
|
419
392
|
REX_EOT
|
420
|
-
rules.each do |rule|
|
421
|
-
exclusive_state, start_state, rule_expr, rule_action = *rule
|
422
|
-
if es == exclusive_state
|
423
393
|
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
394
|
+
exclusive_states.each do |es|
|
395
|
+
if es.nil?
|
396
|
+
f.printf <<-REX_EOT
|
397
|
+
when #{(["nil"] + rules.collect{ |rule| rule[1].nil? ? "nil" : rule[1] }).uniq.join(', ')}
|
398
|
+
REX_EOT
|
399
|
+
else
|
400
|
+
f.printf <<-REX_EOT
|
401
|
+
when #{es}
|
402
|
+
REX_EOT
|
403
|
+
end
|
404
|
+
f.printf <<-REX_EOT
|
405
|
+
case
|
406
|
+
REX_EOT
|
429
407
|
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
when (text = @ss.scan(/#{rule_expr}/#{flag}))
|
434
|
-
action #{rule_action}
|
408
|
+
rules.each do |rule|
|
409
|
+
exclusive_state, start_state, rule_expr, rule_action = *rule
|
410
|
+
if es == exclusive_state
|
435
411
|
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
412
|
+
if rule_action
|
413
|
+
if start_state
|
414
|
+
f.print <<-REX_EOT
|
415
|
+
when((state == #{start_state}) and (text = @ss.scan(/#{rule_expr}/#{flag})))
|
416
|
+
action #{rule_action}
|
417
|
+
|
418
|
+
REX_EOT
|
419
|
+
else
|
420
|
+
f.print <<-REX_EOT
|
421
|
+
when (text = @ss.scan(/#{rule_expr}/#{flag}))
|
422
|
+
action #{rule_action}
|
443
423
|
|
444
|
-
|
424
|
+
REX_EOT
|
425
|
+
end
|
445
426
|
else
|
446
|
-
|
447
|
-
|
448
|
-
|
427
|
+
if start_state
|
428
|
+
f.print <<-REX_EOT
|
429
|
+
when (@state == #{start_state}) && (text = @ss.scan(/#{rule_expr}/#{flag}))
|
430
|
+
;
|
431
|
+
|
432
|
+
REX_EOT
|
433
|
+
else
|
434
|
+
f.print <<-REX_EOT
|
435
|
+
when (text = @ss.scan(/#{rule_expr}/#{flag}))
|
436
|
+
;
|
449
437
|
|
450
|
-
|
438
|
+
REX_EOT
|
439
|
+
end
|
451
440
|
end
|
452
|
-
end
|
453
441
|
|
442
|
+
end
|
443
|
+
end # rules.each
|
444
|
+
|
445
|
+
if @opt["--matcheos"]
|
446
|
+
eos_check = <<-REX_EOT
|
447
|
+
when @@ss.scan(/$/)
|
448
|
+
;
|
449
|
+
REX_EOT
|
450
|
+
else
|
451
|
+
eos_check = ""
|
454
452
|
end
|
455
|
-
end
|
456
|
-
f.print <<-REX_EOT
|
457
|
-
else
|
458
|
-
text = @ss.string[@ss.pos .. -1]
|
459
|
-
raise ScanError, "can not match: '" + text + "'"
|
460
|
-
end # if
|
461
453
|
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
454
|
+
f.print <<-REX_EOT
|
455
|
+
#{eos_check}
|
456
|
+
else
|
457
|
+
text = @ss.string[@ss.pos .. -1]
|
458
|
+
raise ScanError, "can not match: '" + text + "'"
|
459
|
+
end # if
|
460
|
+
|
461
|
+
REX_EOT
|
462
|
+
end # exclusive_states.each
|
463
|
+
|
470
464
|
f.print <<-REX_EOT
|
471
|
-
|
465
|
+
else
|
466
|
+
raise ScanError, "undefined state: '" + state.to_s + "'"
|
467
|
+
end # case state
|
472
468
|
REX_EOT
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
469
|
+
|
470
|
+
if @opt['--debug']
|
471
|
+
f.print <<-REX_EOT
|
472
|
+
p token
|
473
|
+
REX_EOT
|
474
|
+
end
|
475
|
+
|
476
|
+
f.print <<-REX_EOT
|
477
|
+
token
|
478
|
+
end # def _next_token
|
477
479
|
|
478
480
|
REX_EOT
|
479
481
|
|
480
|
-
## inner method
|
481
482
|
@scanner_inner.each_line do |s|
|
482
483
|
f.print s
|
483
484
|
end
|
484
485
|
f.puts "end # class"
|
485
486
|
|
486
|
-
## footer
|
487
487
|
@scanner_footer.each_line do |s|
|
488
488
|
f.print s
|
489
|
-
end
|
489
|
+
end
|
490
490
|
|
491
|
-
## stub main
|
492
491
|
f.printf REX_STUB, @class_name, '"%s:%d:%s\n"' if @opt['--stub']
|
493
492
|
f.close
|
494
493
|
|
495
|
-
end ## def
|
496
|
-
end ## class
|
497
|
-
end ## module
|
498
|
-
|
499
|
-
|
500
|
-
## ---------------------------------------------------------------------
|
501
|
-
## test
|
502
|
-
|
503
|
-
if __FILE__ == $0
|
504
|
-
rex = Rexical::Generator.new(nil)
|
505
|
-
rex.grammar_file = "sample.rex"
|
506
|
-
rex.read_grammar
|
507
|
-
rex.parse
|
508
|
-
rex.write_scanner
|
509
|
-
end
|
510
|
-
|
494
|
+
end ## def write_scanner
|
495
|
+
end ## class Generator
|
496
|
+
end ## module Rexical
|
data/lib/rexical/rexcmd.rb
CHANGED
@@ -22,6 +22,7 @@ o -s --stub - append stub code for debug
|
|
22
22
|
o -i --ignorecase - ignore char case
|
23
23
|
o -C --check-only - syntax check only
|
24
24
|
o - --independent - independent mode
|
25
|
+
o - --matcheos - allow match against end of string
|
25
26
|
o -d --debug - print debug information
|
26
27
|
o -h --help - print this message and quit
|
27
28
|
o - --version - print version and quit
|
@@ -65,7 +66,8 @@ EOT
|
|
65
66
|
@cmd = File.basename($0, ".rb")
|
66
67
|
tmp = OPTIONS.lines.collect do |line|
|
67
68
|
next if /\A\s*\z/ === line
|
68
|
-
disp, sopt, lopt, takearg, doc
|
69
|
+
# disp, sopt, lopt, takearg, doc
|
70
|
+
_, sopt, lopt, takearg, _ = line.strip.split(/\s+/, 5)
|
69
71
|
a = []
|
70
72
|
a.push lopt unless lopt == '-'
|
71
73
|
a.push sopt unless sopt == '-'
|
@@ -83,7 +85,7 @@ EOT
|
|
83
85
|
"#{@cmd}: #{name} given twice" if @opt.key? name
|
84
86
|
@opt[name] = arg.empty? ? true : arg
|
85
87
|
end
|
86
|
-
rescue GetoptLong::
|
88
|
+
rescue GetoptLong::AmbiguousOption, GetoptLong::InvalidOption,
|
87
89
|
GetoptLong::MissingArgument, GetoptLong::NeedlessArgument
|
88
90
|
usage $!.message
|
89
91
|
end
|
data/lib/rexical.rb
CHANGED
@@ -1,8 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
module Rexical
|
5
|
-
VERSION = "1.0.5"
|
6
|
-
Copyright = 'Copyright (c) 2005-2006 ARIMA Yasuhiro'
|
7
|
-
Mailto = 'arima.yasuhiro@nifty.com'
|
8
|
-
end
|
1
|
+
require_relative "rexical/generator"
|
2
|
+
require_relative "rexical/rexcmd"
|
3
|
+
require_relative "rexical/version"
|
data/sample/calc3.tab.rb
CHANGED
data/sample/error1.rex
CHANGED
data/sample/error2.rex
CHANGED