rltk 1.1.0 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +6 -2
- data/Rakefile +28 -48
- data/lib/rltk/ast.rb +36 -12
- data/lib/rltk/cfg.rb +2 -2
- data/lib/rltk/lexer.rb +101 -87
- data/lib/rltk/parser.rb +1 -1
- data/lib/rltk/version.rb +8 -0
- data/test/tc_lexer.rb +11 -0
- metadata +65 -9
data/README
CHANGED
@@ -132,6 +132,10 @@ A RLTK::Lexer may be told to select either the first substring that is found to
|
|
132
132
|
...
|
133
133
|
end
|
134
134
|
|
135
|
+
=== Match Data
|
136
|
+
|
137
|
+
Because it isn't RLTK's job to tell you how to write lexers and parsers, the MatchData object from a pattern match is available inside the Lexer::Environment object via the +match+ accessor.
|
138
|
+
|
135
139
|
== Parsers
|
136
140
|
|
137
141
|
To create a parser using RLTK simply subclass RLTK::Parser, define the productions of the grammar you wish to parse, and call +finalize+. During finalization RLTK will build an LALR(1) parsing table, which may contain conflicts that can't be resolved with LALR(1) lookahead sets or precedence/associativity information. Traditionally, when parser generators such as *YACC* encounter conflicts during parsing table generation they will resolve shift/reduce conflicts in favor of shifts and reduce/reduce conflicts in favor of the production that was defined first. This means that the generated parsers can't handle ambiguous grammars.
|
@@ -222,7 +226,7 @@ The parsing environment is the context in which the proc objects associated with
|
|
222
226
|
|
223
227
|
=== Instantiating Parsers
|
224
228
|
|
225
|
-
In addition to using the RLTK::Parser::parse class method you may also instantiate parser objects. The only difference then is that the parsing environment used between subsequent calls to +object.
|
229
|
+
In addition to using the RLTK::Parser::parse class method you may also instantiate parser objects. The only difference then is that the parsing environment used between subsequent calls to +object.parse+ is the same object, and therefor allows you to keep persistent state.
|
226
230
|
|
227
231
|
=== Finalization Options
|
228
232
|
|
@@ -259,7 +263,7 @@ Calls to RLTK::Parser::ParserCore.parse may raise one of four exceptions:
|
|
259
263
|
|
260
264
|
=== Error Productions
|
261
265
|
|
262
|
-
<b>Warning
|
266
|
+
<b>Warning: this is the lest tested feature of RLTK. If you encounter any problems while using it, please let me know so I can fix any bugs as soon as possible.</b>
|
263
267
|
|
264
268
|
When an RLTK parser encounters a token for which there are no more valid tokens (and it is on the last parse stack / possible parse-tree path) it will enter error handling mode. In this mode the parser pops states and input off of the parse stack (the parser is a pushdown automaton after all) until it finds a state that has a shift action for the +ERROR+ terminal. A dummy +ERROR+ terminal is then placed onto the parse stack and the shift action is taken. This error token will have the position information of the token that caused the parser to enter error handling mode.
|
265
269
|
|
data/Rakefile
CHANGED
@@ -8,60 +8,40 @@
|
|
8
8
|
##############
|
9
9
|
|
10
10
|
require 'rake/testtask'
|
11
|
-
require '
|
12
|
-
require 'rdoc/task'
|
11
|
+
require 'bundler'
|
13
12
|
|
14
|
-
|
15
|
-
|
16
|
-
t.main = 'README'
|
17
|
-
t.rdoc_dir = 'doc'
|
13
|
+
begin
|
14
|
+
require 'rdoc/task'
|
18
15
|
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
16
|
+
RDoc::Task.new do |t|
|
17
|
+
t.title = 'The Ruby Language Toolkit'
|
18
|
+
t.main = 'README'
|
19
|
+
t.rdoc_dir = 'doc'
|
20
|
+
|
21
|
+
t.rdoc_files.include('README', 'lib/*.rb', 'lib/rltk/*.rb', 'lib/rltk/**/*.rb')
|
22
|
+
end
|
26
23
|
|
27
|
-
|
28
|
-
|
29
|
-
# get over-ridden.
|
30
|
-
task :test do
|
31
|
-
exec "ruby -C \"test\" -e \"require 'ts_rltk.rb'\""
|
24
|
+
rescue LoadError
|
25
|
+
warn 'RDoc is not installed.'
|
32
26
|
end
|
33
27
|
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
s.description =
|
42
|
-
'The Ruby Language Toolkit provides classes for creating' +
|
43
|
-
'context-free grammars, lexers, parsers, and abstract syntax trees.'
|
44
|
-
|
45
|
-
s.files = [
|
46
|
-
'LICENSE',
|
47
|
-
'AUTHORS',
|
48
|
-
'README',
|
49
|
-
'Rakefile',
|
50
|
-
] +
|
51
|
-
Dir.glob('lib/rltk/**/*.rb')
|
52
|
-
|
53
|
-
|
54
|
-
s.require_path = 'lib'
|
55
|
-
|
56
|
-
s.author = 'Chris Wailes'
|
57
|
-
s.email = 'chris.wailes@gmail.com'
|
58
|
-
s.homepage = 'http://github.com/chriswailes/RLTK'
|
59
|
-
s.license = 'University of Illinois/NCSA Open Source License'
|
60
|
-
|
61
|
-
s.test_files = Dir.glob('test/tc_*.rb')
|
28
|
+
begin
|
29
|
+
require 'rcov/rcovtask'
|
30
|
+
|
31
|
+
Rcov::RcovTask.new do |t|
|
32
|
+
t.libs << 'test'
|
33
|
+
t.rcov_opts << '--exclude gems,ruby'
|
34
|
+
t.test_files = FileList['test/tc_*.rb']
|
62
35
|
end
|
36
|
+
|
37
|
+
rescue LoadError
|
38
|
+
warn 'Rcov not installed.'
|
63
39
|
end
|
64
40
|
|
65
|
-
|
66
|
-
t.
|
41
|
+
Rake::TestTask.new do |t|
|
42
|
+
t.libs << 'test'
|
43
|
+
t.loader = :testrb
|
44
|
+
t.test_files = FileList['test/ts_rltk.rb']
|
67
45
|
end
|
46
|
+
|
47
|
+
Bundler::GemHelper.install_tasks
|
data/lib/rltk/ast.rb
CHANGED
@@ -90,26 +90,50 @@ module RLTK # :nodoc:
|
|
90
90
|
end
|
91
91
|
|
92
92
|
if type.is_a?(Class)
|
93
|
-
|
94
|
-
|
95
|
-
|
93
|
+
if set_parent
|
94
|
+
define_method((name.to_s + '=').to_sym) do |value|
|
95
|
+
if value.is_a?(type) or value == nil
|
96
|
+
self.instance_variable_set(ivar_name, value)
|
96
97
|
|
97
|
-
|
98
|
-
|
99
|
-
|
98
|
+
value.parent = self if value
|
99
|
+
else
|
100
|
+
raise TypeMismatch.new(type, value.class)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
|
104
|
+
else
|
105
|
+
define_method((name.to_s + '=').to_sym) do |value|
|
106
|
+
if value.is_a?(type) or value == nil
|
107
|
+
self.instance_variable_set(ivar_name, value)
|
108
|
+
|
109
|
+
else
|
110
|
+
raise TypeMismatch.new(type, value.class)
|
111
|
+
end
|
100
112
|
end
|
101
113
|
end
|
102
114
|
|
103
115
|
else
|
104
116
|
type = type.first
|
105
117
|
|
106
|
-
|
107
|
-
|
108
|
-
|
118
|
+
if set_parent
|
119
|
+
define_method((name.to_s + '=').to_sym) do |value|
|
120
|
+
if value.inject(true) { |m, o| m and o.is_a?(type) }
|
121
|
+
self.instance_variable_set(ivar_name, value)
|
109
122
|
|
110
|
-
|
111
|
-
|
112
|
-
|
123
|
+
value.each { |c| c.parent = self }
|
124
|
+
else
|
125
|
+
raise TypeMismatch.new(type, value.class)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
else
|
130
|
+
define_method((name.to_s + '=').to_sym) do |value|
|
131
|
+
if value.inject(true) { |m, o| m and o.is_a?(type) }
|
132
|
+
self.instance_variable_set(ivar_name, value)
|
133
|
+
|
134
|
+
else
|
135
|
+
raise TypeMismatch.new(type, value.class)
|
136
|
+
end
|
113
137
|
end
|
114
138
|
end
|
115
139
|
|
data/lib/rltk/cfg.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
# Author: Chris Wailes <chris.wailes@gmail.com>
|
2
4
|
# Project: Ruby Language Toolkit
|
3
5
|
# Date: 2011/03/24
|
4
6
|
# Description: This file contains the a class representing a context-free
|
5
7
|
# grammar.
|
6
8
|
|
7
|
-
# encoding: utf-8
|
8
|
-
|
9
9
|
############
|
10
10
|
# Requires #
|
11
11
|
############
|
data/lib/rltk/lexer.rb
CHANGED
@@ -113,106 +113,108 @@ module RLTK # :nodoc:
|
|
113
113
|
# return the array of tokens generated by the lexer with a token
|
114
114
|
# of type EOS (End of Stream) appended to the end.
|
115
115
|
def lex(string, env, file_name = nil)
|
116
|
-
|
117
|
-
|
116
|
+
# Offset from start of stream.
|
117
|
+
stream_offset = 0
|
118
|
+
|
119
|
+
# Offset from the start of the line.
|
120
|
+
line_offset = 0
|
121
|
+
line_number = 1
|
118
122
|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
123
|
+
# Empty token list.
|
124
|
+
tokens = Array.new
|
125
|
+
|
126
|
+
# The scanner.
|
127
|
+
scanner = StringScanner.new(string)
|
128
|
+
|
129
|
+
# Start scanning the input string.
|
130
|
+
until scanner.eos?
|
131
|
+
match = nil
|
128
132
|
|
129
|
-
#
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
if not match or match.first.length < txt.length
|
142
|
-
match = [txt, rule]
|
143
|
-
|
144
|
-
break if @match_type == :first
|
145
|
-
end
|
133
|
+
# If the match_type is set to :longest all of the
|
134
|
+
# rules for the current state need to be scanned
|
135
|
+
# and the longest match returned. If the
|
136
|
+
# match_type is :first, we only need to scan until
|
137
|
+
# we find a match.
|
138
|
+
@rules[env.state].each do |rule|
|
139
|
+
if (rule.flags - env.flags).empty?
|
140
|
+
if txt = scanner.check(rule.pattern)
|
141
|
+
if not match or match.first.length < txt.length
|
142
|
+
match = [txt, rule]
|
143
|
+
|
144
|
+
break if @match_type == :first
|
146
145
|
end
|
147
146
|
end
|
148
147
|
end
|
148
|
+
end
|
149
|
+
|
150
|
+
if match
|
151
|
+
rule = match.last
|
149
152
|
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
if (newlines = txt.count("\n")) > 0
|
165
|
-
line_number += newlines
|
166
|
-
line_offset = 0
|
167
|
-
else
|
168
|
-
line_offset += txt.length()
|
169
|
-
end
|
153
|
+
txt = scanner.scan(rule.pattern)
|
154
|
+
type, value = env.rule_exec(rule.pattern.match(txt), txt, &rule.action)
|
155
|
+
|
156
|
+
if type
|
157
|
+
pos = StreamPosition.new(stream_offset, line_number, line_offset, txt.length, file_name)
|
158
|
+
tokens << Token.new(type, value, pos)
|
159
|
+
end
|
160
|
+
|
161
|
+
# Advance our stat counters.
|
162
|
+
stream_offset += txt.length
|
163
|
+
|
164
|
+
if (newlines = txt.count("\n")) > 0
|
165
|
+
line_number += newlines
|
166
|
+
line_offset = 0
|
170
167
|
else
|
171
|
-
|
172
|
-
raise(error, 'Unable to match string with any of the given rules')
|
168
|
+
line_offset += txt.length()
|
173
169
|
end
|
170
|
+
else
|
171
|
+
error = LexingError.new(stream_offset, line_number, line_offset, scanner.post_match)
|
172
|
+
raise(error, 'Unable to match string with any of the given rules')
|
174
173
|
end
|
175
|
-
|
176
|
-
return tokens << Token.new(:EOS)
|
177
|
-
end
|
178
|
-
|
179
|
-
# A wrapper function that calls ParserCore.lex on the
|
180
|
-
# contents of a file.
|
181
|
-
def lex_file(file_name, evn)
|
182
|
-
File.open(file_name, 'r') { |f| lex(f.read, env, file_name) }
|
183
174
|
end
|
184
175
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
176
|
+
return tokens << Token.new(:EOS)
|
177
|
+
end
|
178
|
+
|
179
|
+
# A wrapper function that calls ParserCore.lex on the
|
180
|
+
# contents of a file.
|
181
|
+
def lex_file(file_name, env)
|
182
|
+
File.open(file_name, 'r') { |f| lex(f.read, env, file_name) }
|
183
|
+
end
|
184
|
+
|
185
|
+
# Used to tell a lexer to use the first match found instead
|
186
|
+
# of the longest match found.
|
187
|
+
def match_first
|
188
|
+
@match_type = :first
|
189
|
+
end
|
190
|
+
|
191
|
+
# This method is used to define a new lexing rule. The
|
192
|
+
# first argument is the regular expression used to match
|
193
|
+
# substrings of the input. The second argument is the state
|
194
|
+
# to which the rule belongs. Flags that need to be set for
|
195
|
+
# the rule to be considered are specified by the third
|
196
|
+
# argument. The last argument is a block that returns a
|
197
|
+
# type and value to be used in constructing a Token. If no
|
198
|
+
# block is specified the matched substring will be
|
199
|
+
# discarded and lexing will continue.
|
200
|
+
def rule(pattern, state = :default, flags = [], &action)
|
201
|
+
# If no action is given we will set it to an empty
|
202
|
+
# action.
|
203
|
+
action ||= Proc.new() {}
|
190
204
|
|
191
|
-
|
192
|
-
# first argument is the regular expression used to match
|
193
|
-
# substrings of the input. The second argument is the state
|
194
|
-
# to which the rule belongs. Flags that need to be set for
|
195
|
-
# the rule to be considered are specified by the third
|
196
|
-
# argument. The last argument is a block that returns a
|
197
|
-
# type and value to be used in constructing a Token. If no
|
198
|
-
# block is specified the matched substring will be
|
199
|
-
# discarded and lexing will continue.
|
200
|
-
def rule(pattern, state = :default, flags = [], &action)
|
201
|
-
# If no action is given we will set it to an empty
|
202
|
-
# action.
|
203
|
-
action ||= Proc.new() {}
|
204
|
-
|
205
|
-
r = Rule.new(pattern, action, state, flags)
|
206
|
-
|
207
|
-
if state == :ALL then @rules.each_key { |k| @rules[k] << r } else @rules[state] << r end
|
208
|
-
end
|
205
|
+
pattern = Regexp.new(pattern) if pattern.is_a?(String)
|
209
206
|
|
210
|
-
|
207
|
+
r = Rule.new(pattern, action, state, flags)
|
211
208
|
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
209
|
+
if state == :ALL then @rules.each_key { |k| @rules[k] << r } else @rules[state] << r end
|
210
|
+
end
|
211
|
+
|
212
|
+
alias :r :rule
|
213
|
+
|
214
|
+
# Changes the starting state of the lexer.
|
215
|
+
def start(state)
|
216
|
+
@start_state = state
|
217
|
+
end
|
216
218
|
end
|
217
219
|
|
218
220
|
# All actions passed to LexerCore.rule are evaluated inside an
|
@@ -224,12 +226,24 @@ module RLTK # :nodoc:
|
|
224
226
|
# The flags currently set in this environment.
|
225
227
|
attr_reader :flags
|
226
228
|
|
229
|
+
# The Match object generated by a rule's regular expression.
|
230
|
+
attr_accessor :match
|
231
|
+
|
227
232
|
# Instantiates a new Environment object.
|
228
|
-
def initialize(start_state)
|
233
|
+
def initialize(start_state, match = nil)
|
229
234
|
@state = [start_state]
|
235
|
+
@match = match
|
230
236
|
@flags = Array.new
|
231
237
|
end
|
232
238
|
|
239
|
+
# This function will instance_exec a block for a rule after
|
240
|
+
# setting the match value.
|
241
|
+
def rule_exec(match, txt, &block)
|
242
|
+
self.match = match
|
243
|
+
|
244
|
+
self.instance_exec(txt, &block)
|
245
|
+
end
|
246
|
+
|
233
247
|
# Pops a state from the state stack.
|
234
248
|
def pop_state
|
235
249
|
@state.pop
|
data/lib/rltk/parser.rb
CHANGED
@@ -1010,7 +1010,7 @@ module RLTK # :nodoc:
|
|
1010
1010
|
# Build the lookahead set.
|
1011
1011
|
@states.each do |state1|
|
1012
1012
|
if self.check_reachability(state1, state0, production.rhs)
|
1013
|
-
lookahead |=
|
1013
|
+
lookahead |= self.grammar_prime.follow_set("#{state1.id}_#{production.lhs}".to_sym)
|
1014
1014
|
end
|
1015
1015
|
end
|
1016
1016
|
|
data/lib/rltk/version.rb
ADDED
data/test/tc_lexer.rb
CHANGED
@@ -70,6 +70,10 @@ class StateLexer < RLTK::Lexer
|
|
70
70
|
rule(/./, :comment)
|
71
71
|
end
|
72
72
|
|
73
|
+
class MatchDataLexer < RLTK::Lexer
|
74
|
+
rule(/a(b*)(c+)/) { [:FOO, match[1,2]] }
|
75
|
+
end
|
76
|
+
|
73
77
|
class LexerTester < Test::Unit::TestCase
|
74
78
|
def test_calc
|
75
79
|
expected =
|
@@ -200,6 +204,13 @@ class LexerTester < Test::Unit::TestCase
|
|
200
204
|
assert_equal(expected, actual)
|
201
205
|
end
|
202
206
|
|
207
|
+
def test_match_data
|
208
|
+
expected = [RLTK::Token.new(:FOO, ['', 'ccc']), RLTK::Token.new(:EOS)]
|
209
|
+
actual = MatchDataLexer.lex('accc')
|
210
|
+
|
211
|
+
assert_equal(expected, actual)
|
212
|
+
end
|
213
|
+
|
203
214
|
def test_state
|
204
215
|
expected =
|
205
216
|
[
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rltk
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 31
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
|
-
-
|
8
|
+
- 2
|
9
9
|
- 0
|
10
|
-
version: 1.
|
10
|
+
version: 1.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Chris Wailes
|
@@ -15,11 +15,66 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date:
|
19
|
-
dependencies:
|
20
|
-
|
21
|
-
|
22
|
-
|
18
|
+
date: 2012-03-08 00:00:00 Z
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: builder
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
none: false
|
25
|
+
requirements:
|
26
|
+
- - ">="
|
27
|
+
- !ruby/object:Gem::Version
|
28
|
+
hash: 3
|
29
|
+
segments:
|
30
|
+
- 0
|
31
|
+
version: "0"
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
- !ruby/object:Gem::Dependency
|
35
|
+
name: rake
|
36
|
+
prerelease: false
|
37
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
38
|
+
none: false
|
39
|
+
requirements:
|
40
|
+
- - ">="
|
41
|
+
- !ruby/object:Gem::Version
|
42
|
+
hash: 3
|
43
|
+
segments:
|
44
|
+
- 0
|
45
|
+
version: "0"
|
46
|
+
type: :development
|
47
|
+
version_requirements: *id002
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: rcov
|
50
|
+
prerelease: false
|
51
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
52
|
+
none: false
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
hash: 3
|
57
|
+
segments:
|
58
|
+
- 0
|
59
|
+
version: "0"
|
60
|
+
type: :development
|
61
|
+
version_requirements: *id003
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rdoc
|
64
|
+
prerelease: false
|
65
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
66
|
+
none: false
|
67
|
+
requirements:
|
68
|
+
- - ">="
|
69
|
+
- !ruby/object:Gem::Version
|
70
|
+
hash: 3
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
type: :development
|
75
|
+
version_requirements: *id004
|
76
|
+
description: The Ruby Language Toolkit provides classes for creating context-free grammars, lexers, parsers, and abstract syntax trees.
|
77
|
+
email: chris.wailes+rltk@gmail.com
|
23
78
|
executables: []
|
24
79
|
|
25
80
|
extensions: []
|
@@ -41,6 +96,7 @@ files:
|
|
41
96
|
- lib/rltk/parsers/postfix_calc.rb
|
42
97
|
- lib/rltk/parsers/infix_calc.rb
|
43
98
|
- lib/rltk/parsers/prefix_calc.rb
|
99
|
+
- lib/rltk/version.rb
|
44
100
|
- test/tc_ast.rb
|
45
101
|
- test/tc_token.rb
|
46
102
|
- test/tc_cfg.rb
|
@@ -75,7 +131,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
75
131
|
requirements: []
|
76
132
|
|
77
133
|
rubyforge_project:
|
78
|
-
rubygems_version: 1.8.
|
134
|
+
rubygems_version: 1.8.11
|
79
135
|
signing_key:
|
80
136
|
specification_version: 3
|
81
137
|
summary: The Ruby Language Toolkit
|