rexical 1.0.5 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.rdoc +19 -2
- data/COPYING +129 -0
- data/DOCUMENTATION.en.rdoc +210 -214
- data/DOCUMENTATION.ja.rdoc +2 -2
- data/Manifest.txt +2 -0
- data/README.rdoc +15 -7
- data/Rakefile +2 -28
- data/bin/rex +0 -0
- data/lib/rexical/generator.rb +147 -161
- data/lib/rexical/rexcmd.rb +4 -2
- data/lib/rexical/version.rb +5 -0
- data/lib/rexical.rb +3 -8
- data/sample/calc3.tab.rb +1 -1
- data/sample/error1.rex +1 -1
- data/sample/error2.rex +2 -2
- data/sample/sample.rex +1 -1
- data/sample/sample.rex.rb +92 -100
- data/sample/sample1.rex +4 -4
- data/sample/sample2.rex +2 -2
- data/sample/xhtmlparser.rex +15 -15
- data/test/assets/test.rex +4 -0
- data/test/rex-20060125.rb +1 -1
- data/test/rex-20060511.rb +1 -1
- data/test/test_generator.rb +59 -3
- metadata +49 -68
data/README.rdoc
CHANGED
@@ -1,30 +1,33 @@
|
|
1
1
|
= Rexical
|
2
2
|
|
3
|
-
|
3
|
+
home :: http://github.com/sparklemotion/rexical/tree/master
|
4
4
|
|
5
5
|
== DESCRIPTION
|
6
6
|
|
7
|
-
Rexical is a lexical scanner generator
|
8
|
-
|
9
|
-
|
7
|
+
Rexical is a lexical scanner generator that is used with
|
8
|
+
Racc to generate Ruby programs.
|
9
|
+
Rexical is written in Ruby.
|
10
10
|
|
11
11
|
|
12
12
|
== SYNOPSIS
|
13
13
|
|
14
|
-
|
14
|
+
Several examples of Rexical grammar files are provided in
|
15
|
+
the sample directory.
|
16
|
+
|
17
|
+
Here is an example of a lexical definition:
|
15
18
|
|
16
19
|
class Sample
|
17
20
|
macro
|
18
21
|
BLANK [\ \t]+
|
19
22
|
rule
|
20
|
-
BLANK
|
23
|
+
{BLANK} # no action
|
21
24
|
\d+ { [:digit, text.to_i] }
|
22
25
|
\w+ { [:word, text] }
|
23
26
|
\n
|
24
27
|
. { [text, text] }
|
25
28
|
end
|
26
29
|
|
27
|
-
Here
|
30
|
+
Here are examples of the command line usage:
|
28
31
|
|
29
32
|
$ rex sample1.rex --stub
|
30
33
|
$ ruby sample1.rex.rb sample1.c
|
@@ -36,6 +39,9 @@ Here is the command line usage:
|
|
36
39
|
$ rex calc3.rex
|
37
40
|
$ ruby calc3.tab.rb
|
38
41
|
|
42
|
+
The description files for lexical analysis in the sample directory
|
43
|
+
are the files ending with the .rex extension.
|
44
|
+
|
39
45
|
== REQUIREMENTS
|
40
46
|
|
41
47
|
* ruby version 1.8.x or later.
|
@@ -50,3 +56,5 @@ Rexical is distributed under the terms of the GNU Lesser General
|
|
50
56
|
Public License version 2. Note that you do NOT need to follow
|
51
57
|
LGPL for your own parser (Rexical outputs). You can provide those
|
52
58
|
files under any licenses you want.
|
59
|
+
|
60
|
+
See COPYING for more details.
|
data/Rakefile
CHANGED
@@ -1,29 +1,3 @@
|
|
1
|
-
|
1
|
+
require 'minitest/test_task'
|
2
2
|
|
3
|
-
|
4
|
-
require 'hoe'
|
5
|
-
|
6
|
-
Hoe.plugin :debugging
|
7
|
-
Hoe.plugin :git
|
8
|
-
Hoe.plugins.delete :rubyforge
|
9
|
-
|
10
|
-
Hoe.spec 'rexical' do
|
11
|
-
self.readme_file = 'README.rdoc'
|
12
|
-
self.history_file = 'CHANGELOG.rdoc'
|
13
|
-
developer('Aaron Patterson', 'aaronp@rubyforge.org')
|
14
|
-
self.rubyforge_name = 'ruby-rex'
|
15
|
-
self.extra_rdoc_files = FileList['*.rdoc']
|
16
|
-
end
|
17
|
-
|
18
|
-
namespace :gem do
|
19
|
-
namespace :spec do
|
20
|
-
task :dev do
|
21
|
-
File.open("#{HOE.name}.gemspec", 'w') do |f|
|
22
|
-
HOE.spec.version = "#{HOE.version}.#{Time.now.strftime("%Y%m%d%H%M%S")}"
|
23
|
-
f.write(HOE.spec.to_ruby)
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
# vim: syntax=Ruby
|
3
|
+
Minitest::TestTask.create
|
data/bin/rex
CHANGED
File without changes
|
data/lib/rexical/generator.rb
CHANGED
@@ -11,13 +11,9 @@
|
|
11
11
|
require 'strscan'
|
12
12
|
module Rexical
|
13
13
|
|
14
|
-
## ---------------------------------------------------------------------
|
15
14
|
class ParseError < StandardError ; end
|
16
15
|
|
17
|
-
## ---------------------------------------------------------------------
|
18
16
|
class Generator
|
19
|
-
|
20
|
-
## ---------------------------------------------------------------------
|
21
17
|
attr_accessor :grammar_file
|
22
18
|
attr_accessor :grammar_lines
|
23
19
|
attr_accessor :scanner_file
|
@@ -30,7 +26,6 @@ module Rexical
|
|
30
26
|
attr_accessor :independent
|
31
27
|
attr_accessor :debug
|
32
28
|
|
33
|
-
## ---------------------------------------------------------------------
|
34
29
|
def initialize(opts)
|
35
30
|
@lineno = 0
|
36
31
|
@macro = {}
|
@@ -43,22 +38,18 @@ module Rexical
|
|
43
38
|
@opt = opts
|
44
39
|
end
|
45
40
|
|
46
|
-
## ---------------------------------------------------------------------
|
47
41
|
def add_header( st )
|
48
42
|
@scanner_header += "#{st}\n"
|
49
43
|
end
|
50
44
|
|
51
|
-
## ---------------------------------------------------------------------
|
52
45
|
def add_footer( st )
|
53
46
|
@scanner_footer += "#{st}\n"
|
54
47
|
end
|
55
48
|
|
56
|
-
## ---------------------------------------------------------------------
|
57
49
|
def add_inner( st )
|
58
50
|
@scanner_inner += "#{st}\n"
|
59
51
|
end
|
60
52
|
|
61
|
-
## ---------------------------------------------------------------------
|
62
53
|
def add_option( st )
|
63
54
|
opts = st.split
|
64
55
|
opts.each do |opt|
|
@@ -69,11 +60,12 @@ module Rexical
|
|
69
60
|
@opt['--stub'] = true
|
70
61
|
when /independent/i
|
71
62
|
@opt['--independent'] = true
|
63
|
+
when /matcheos/i
|
64
|
+
@opt['--matcheos'] = true
|
72
65
|
end
|
73
66
|
end
|
74
67
|
end
|
75
68
|
|
76
|
-
## ---------------------------------------------------------------------
|
77
69
|
def add_macro( st )
|
78
70
|
ss = StringScanner.new(st)
|
79
71
|
ss.scan(/\s+/)
|
@@ -105,7 +97,6 @@ module Rexical
|
|
105
97
|
raise ParseError, "parse error in add_macro:'#{st}'"
|
106
98
|
end
|
107
99
|
|
108
|
-
## ---------------------------------------------------------------------
|
109
100
|
def add_rule( rule_state, rule_expr, rule_action=nil )
|
110
101
|
st = rule_expr.dup
|
111
102
|
@macro.each_pair do |k, e|
|
@@ -218,16 +209,11 @@ module Rexical
|
|
218
209
|
end
|
219
210
|
|
220
211
|
end # case state3
|
221
|
-
|
222
212
|
end # case state2
|
223
|
-
|
224
213
|
end # case state1
|
225
|
-
|
226
214
|
end # while
|
227
|
-
|
228
215
|
end
|
229
216
|
|
230
|
-
## ---------------------------------------------------------------------
|
231
217
|
def parse_rule(st)
|
232
218
|
st.strip!
|
233
219
|
return if st.size == 0 or st[0,1] == '#'
|
@@ -240,7 +226,6 @@ module Rexical
|
|
240
226
|
[rule_state, rule_expr, ss.post_match]
|
241
227
|
end
|
242
228
|
|
243
|
-
## ---------------------------------------------------------------------
|
244
229
|
def parse_action(st, lastmodes=[])
|
245
230
|
modes = lastmodes
|
246
231
|
mode = lastmodes[-1]
|
@@ -293,89 +278,83 @@ module Rexical
|
|
293
278
|
return modes
|
294
279
|
end
|
295
280
|
|
296
|
-
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
#
|
301
|
-
|
302
|
-
# from lexical definition file "%s".
|
303
|
-
#++
|
304
|
-
|
305
|
-
REX_EOT
|
306
|
-
|
307
|
-
REX_UTIL = <<-REX_EOT
|
308
|
-
require 'strscan'
|
281
|
+
REX_HEADER = <<-REX_EOT.gsub(/^ {6}/, '')
|
282
|
+
#--
|
283
|
+
# DO NOT MODIFY!!!!
|
284
|
+
# This file is automatically generated by rex %s
|
285
|
+
# from lexical definition file "%s".
|
286
|
+
#++
|
309
287
|
|
310
|
-
|
288
|
+
REX_EOT
|
311
289
|
|
312
|
-
|
313
|
-
|
314
|
-
attr_accessor :state
|
290
|
+
REX_UTIL = <<-REX_EOT
|
291
|
+
require 'strscan'
|
315
292
|
|
316
|
-
|
317
|
-
@ss = StringScanner.new(str)
|
318
|
-
@lineno = 1
|
319
|
-
@state = nil
|
320
|
-
end
|
293
|
+
class ScanError < StandardError ; end
|
321
294
|
|
322
|
-
|
323
|
-
|
324
|
-
|
295
|
+
attr_reader :lineno
|
296
|
+
attr_reader :filename
|
297
|
+
attr_accessor :state
|
325
298
|
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
330
|
-
|
299
|
+
def scan_setup(str)
|
300
|
+
@ss = StringScanner.new(str)
|
301
|
+
@lineno = 1
|
302
|
+
@state = nil
|
303
|
+
end
|
331
304
|
|
332
|
-
|
333
|
-
|
334
|
-
|
335
|
-
scan_setup(f.read)
|
336
|
-
end
|
337
|
-
end
|
305
|
+
def action
|
306
|
+
yield
|
307
|
+
end
|
338
308
|
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
309
|
+
def scan_str(str)
|
310
|
+
scan_setup(str)
|
311
|
+
do_parse
|
312
|
+
end
|
313
|
+
alias :scan :scan_str
|
343
314
|
|
344
|
-
|
315
|
+
def load_file( filename )
|
316
|
+
@filename = filename
|
317
|
+
File.open(filename, "r") do |f|
|
318
|
+
scan_setup(f.read)
|
319
|
+
end
|
320
|
+
end
|
345
321
|
|
346
|
-
|
322
|
+
def scan_file( filename )
|
323
|
+
load_file(filename)
|
324
|
+
do_parse
|
325
|
+
end
|
347
326
|
|
348
|
-
|
349
|
-
exit if ARGV.size != 1
|
350
|
-
filename = ARGV.shift
|
351
|
-
rex = %s.new
|
352
|
-
begin
|
353
|
-
rex.load_file filename
|
354
|
-
while token = rex.next_token
|
355
|
-
p token
|
356
|
-
end
|
357
|
-
rescue
|
358
|
-
$stderr.printf %s, rex.filename, rex.lineno, $!.message
|
359
|
-
end
|
360
|
-
end
|
361
|
-
REX_EOT
|
327
|
+
REX_EOT
|
362
328
|
|
363
|
-
|
329
|
+
REX_STUB = <<-REX_EOT
|
364
330
|
|
331
|
+
if __FILE__ == $0
|
332
|
+
exit if ARGV.size != 1
|
333
|
+
filename = ARGV.shift
|
334
|
+
rex = %s.new
|
335
|
+
begin
|
336
|
+
rex.load_file filename
|
337
|
+
while token = rex.next_token
|
338
|
+
p token
|
339
|
+
end
|
340
|
+
rescue
|
341
|
+
$stderr.printf %s, rex.filename, rex.lineno, $!.message
|
342
|
+
end
|
343
|
+
end
|
344
|
+
REX_EOT
|
365
345
|
|
366
346
|
def scanner_io
|
367
347
|
unless scanner_file = @opt['--output-file']
|
368
348
|
scanner_file = grammar_file + ".rb"
|
369
349
|
end
|
370
|
-
|
350
|
+
File.open(scanner_file, 'wb')
|
371
351
|
end
|
372
352
|
private :scanner_io
|
373
353
|
|
374
354
|
def write_scanner f = scanner_io
|
375
|
-
## scan flag
|
376
355
|
flag = ""
|
377
356
|
flag += "i" if @opt['--ignorecase']
|
378
|
-
|
357
|
+
|
379
358
|
f.printf REX_HEADER, Rexical::VERSION, grammar_file
|
380
359
|
|
381
360
|
unless @opt['--independent']
|
@@ -391,120 +370,127 @@ REX_EOT
|
|
391
370
|
f.puts "class #{@class_name} < Racc::Parser"
|
392
371
|
end
|
393
372
|
|
394
|
-
## utility method
|
395
373
|
f.print REX_UTIL
|
396
374
|
|
397
|
-
|
375
|
+
eos_check = @opt["--matcheos"] ? "" : "return if @ss.eos?"
|
398
376
|
|
377
|
+
## scanner method
|
399
378
|
f.print <<-REX_EOT
|
400
379
|
|
401
|
-
|
402
|
-
|
403
|
-
|
404
|
-
# skips empty actions
|
405
|
-
until token = _next_token or @ss.eos?; end
|
406
|
-
token
|
407
|
-
end
|
408
|
-
|
409
|
-
def _next_token
|
410
|
-
text = @ss.peek(1)
|
411
|
-
@lineno += 1 if text == "\\n"
|
412
|
-
token = case @state
|
413
|
-
REX_EOT
|
380
|
+
def next_token
|
381
|
+
#{eos_check}
|
414
382
|
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
383
|
+
# skips empty actions
|
384
|
+
until token = _next_token or @ss.eos?; end
|
385
|
+
token
|
386
|
+
end
|
387
|
+
|
388
|
+
def _next_token
|
389
|
+
text = @ss.peek(1)
|
390
|
+
@lineno += 1 if text == "\\n"
|
391
|
+
token = case @state
|
419
392
|
REX_EOT
|
420
|
-
rules.each do |rule|
|
421
|
-
exclusive_state, start_state, rule_expr, rule_action = *rule
|
422
|
-
if es == exclusive_state
|
423
393
|
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
394
|
+
exclusive_states.each do |es|
|
395
|
+
if es.nil?
|
396
|
+
f.printf <<-REX_EOT
|
397
|
+
when #{(["nil"] + rules.collect{ |rule| rule[1].nil? ? "nil" : rule[1] }).uniq.join(', ')}
|
398
|
+
REX_EOT
|
399
|
+
else
|
400
|
+
f.printf <<-REX_EOT
|
401
|
+
when #{es}
|
402
|
+
REX_EOT
|
403
|
+
end
|
404
|
+
f.printf <<-REX_EOT
|
405
|
+
case
|
406
|
+
REX_EOT
|
429
407
|
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
when (text = @ss.scan(/#{rule_expr}/#{flag}))
|
434
|
-
action #{rule_action}
|
408
|
+
rules.each do |rule|
|
409
|
+
exclusive_state, start_state, rule_expr, rule_action = *rule
|
410
|
+
if es == exclusive_state
|
435
411
|
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
|
440
|
-
|
441
|
-
|
442
|
-
|
412
|
+
if rule_action
|
413
|
+
if start_state
|
414
|
+
f.print <<-REX_EOT
|
415
|
+
when((state == #{start_state}) and (text = @ss.scan(/#{rule_expr}/#{flag})))
|
416
|
+
action #{rule_action}
|
417
|
+
|
418
|
+
REX_EOT
|
419
|
+
else
|
420
|
+
f.print <<-REX_EOT
|
421
|
+
when (text = @ss.scan(/#{rule_expr}/#{flag}))
|
422
|
+
action #{rule_action}
|
443
423
|
|
444
|
-
|
424
|
+
REX_EOT
|
425
|
+
end
|
445
426
|
else
|
446
|
-
|
447
|
-
|
448
|
-
|
427
|
+
if start_state
|
428
|
+
f.print <<-REX_EOT
|
429
|
+
when (@state == #{start_state}) && (text = @ss.scan(/#{rule_expr}/#{flag}))
|
430
|
+
;
|
431
|
+
|
432
|
+
REX_EOT
|
433
|
+
else
|
434
|
+
f.print <<-REX_EOT
|
435
|
+
when (text = @ss.scan(/#{rule_expr}/#{flag}))
|
436
|
+
;
|
449
437
|
|
450
|
-
|
438
|
+
REX_EOT
|
439
|
+
end
|
451
440
|
end
|
452
|
-
end
|
453
441
|
|
442
|
+
end
|
443
|
+
end # rules.each
|
444
|
+
|
445
|
+
if @opt["--matcheos"]
|
446
|
+
eos_check = <<-REX_EOT
|
447
|
+
when @@ss.scan(/$/)
|
448
|
+
;
|
449
|
+
REX_EOT
|
450
|
+
else
|
451
|
+
eos_check = ""
|
454
452
|
end
|
455
|
-
end
|
456
|
-
f.print <<-REX_EOT
|
457
|
-
else
|
458
|
-
text = @ss.string[@ss.pos .. -1]
|
459
|
-
raise ScanError, "can not match: '" + text + "'"
|
460
|
-
end # if
|
461
453
|
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
454
|
+
f.print <<-REX_EOT
|
455
|
+
#{eos_check}
|
456
|
+
else
|
457
|
+
text = @ss.string[@ss.pos .. -1]
|
458
|
+
raise ScanError, "can not match: '" + text + "'"
|
459
|
+
end # if
|
460
|
+
|
461
|
+
REX_EOT
|
462
|
+
end # exclusive_states.each
|
463
|
+
|
470
464
|
f.print <<-REX_EOT
|
471
|
-
|
465
|
+
else
|
466
|
+
raise ScanError, "undefined state: '" + state.to_s + "'"
|
467
|
+
end # case state
|
472
468
|
REX_EOT
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
469
|
+
|
470
|
+
if @opt['--debug']
|
471
|
+
f.print <<-REX_EOT
|
472
|
+
p token
|
473
|
+
REX_EOT
|
474
|
+
end
|
475
|
+
|
476
|
+
f.print <<-REX_EOT
|
477
|
+
token
|
478
|
+
end # def _next_token
|
477
479
|
|
478
480
|
REX_EOT
|
479
481
|
|
480
|
-
## inner method
|
481
482
|
@scanner_inner.each_line do |s|
|
482
483
|
f.print s
|
483
484
|
end
|
484
485
|
f.puts "end # class"
|
485
486
|
|
486
|
-
## footer
|
487
487
|
@scanner_footer.each_line do |s|
|
488
488
|
f.print s
|
489
|
-
end
|
489
|
+
end
|
490
490
|
|
491
|
-
## stub main
|
492
491
|
f.printf REX_STUB, @class_name, '"%s:%d:%s\n"' if @opt['--stub']
|
493
492
|
f.close
|
494
493
|
|
495
|
-
end ## def
|
496
|
-
end ## class
|
497
|
-
end ## module
|
498
|
-
|
499
|
-
|
500
|
-
## ---------------------------------------------------------------------
|
501
|
-
## test
|
502
|
-
|
503
|
-
if __FILE__ == $0
|
504
|
-
rex = Rexical::Generator.new(nil)
|
505
|
-
rex.grammar_file = "sample.rex"
|
506
|
-
rex.read_grammar
|
507
|
-
rex.parse
|
508
|
-
rex.write_scanner
|
509
|
-
end
|
510
|
-
|
494
|
+
end ## def write_scanner
|
495
|
+
end ## class Generator
|
496
|
+
end ## module Rexical
|
data/lib/rexical/rexcmd.rb
CHANGED
@@ -22,6 +22,7 @@ o -s --stub - append stub code for debug
|
|
22
22
|
o -i --ignorecase - ignore char case
|
23
23
|
o -C --check-only - syntax check only
|
24
24
|
o - --independent - independent mode
|
25
|
+
o - --matcheos - allow match against end of string
|
25
26
|
o -d --debug - print debug information
|
26
27
|
o -h --help - print this message and quit
|
27
28
|
o - --version - print version and quit
|
@@ -65,7 +66,8 @@ EOT
|
|
65
66
|
@cmd = File.basename($0, ".rb")
|
66
67
|
tmp = OPTIONS.lines.collect do |line|
|
67
68
|
next if /\A\s*\z/ === line
|
68
|
-
disp, sopt, lopt, takearg, doc
|
69
|
+
# disp, sopt, lopt, takearg, doc
|
70
|
+
_, sopt, lopt, takearg, _ = line.strip.split(/\s+/, 5)
|
69
71
|
a = []
|
70
72
|
a.push lopt unless lopt == '-'
|
71
73
|
a.push sopt unless sopt == '-'
|
@@ -83,7 +85,7 @@ EOT
|
|
83
85
|
"#{@cmd}: #{name} given twice" if @opt.key? name
|
84
86
|
@opt[name] = arg.empty? ? true : arg
|
85
87
|
end
|
86
|
-
rescue GetoptLong::
|
88
|
+
rescue GetoptLong::AmbiguousOption, GetoptLong::InvalidOption,
|
87
89
|
GetoptLong::MissingArgument, GetoptLong::NeedlessArgument
|
88
90
|
usage $!.message
|
89
91
|
end
|
data/lib/rexical.rb
CHANGED
@@ -1,8 +1,3 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
module Rexical
|
5
|
-
VERSION = "1.0.5"
|
6
|
-
Copyright = 'Copyright (c) 2005-2006 ARIMA Yasuhiro'
|
7
|
-
Mailto = 'arima.yasuhiro@nifty.com'
|
8
|
-
end
|
1
|
+
require_relative "rexical/generator"
|
2
|
+
require_relative "rexical/rexcmd"
|
3
|
+
require_relative "rexical/version"
|
data/sample/calc3.tab.rb
CHANGED
data/sample/error1.rex
CHANGED
data/sample/error2.rex
CHANGED