rltk 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/AUTHORS +1 -0
- data/LICENSE +27 -0
- data/README +386 -0
- data/Rakefile +67 -0
- data/lib/rltk/ast.rb +264 -0
- data/lib/rltk/cfg.rb +491 -0
- data/lib/rltk/lexer.rb +298 -0
- data/lib/rltk/lexers/calculator.rb +41 -0
- data/lib/rltk/lexers/ebnf.rb +40 -0
- data/lib/rltk/parser.rb +1354 -0
- data/lib/rltk/parsers/infix_calc.rb +43 -0
- data/lib/rltk/parsers/postfix_calc.rb +34 -0
- data/lib/rltk/parsers/prefix_calc.rb +34 -0
- data/lib/rltk/token.rb +66 -0
- data/test/tc_ast.rb +85 -0
- data/test/tc_cfg.rb +149 -0
- data/test/tc_lexer.rb +217 -0
- data/test/tc_parser.rb +275 -0
- data/test/tc_token.rb +34 -0
- metadata +87 -0
data/lib/rltk/lexer.rb
ADDED
@@ -0,0 +1,298 @@
|
|
1
|
+
# Author: Chris Wailes <chris.wailes@gmail.com>
|
2
|
+
# Project: Ruby Language Toolkit
|
3
|
+
# Date: 2011/01/17
|
4
|
+
# Description: This file contains the base class for lexers that use RLTK.
|
5
|
+
|
6
|
+
############
|
7
|
+
# Requires #
|
8
|
+
############
|
9
|
+
|
10
|
+
# Standard Library
|
11
|
+
require 'strscan'
|
12
|
+
|
13
|
+
# Ruby Language Toolkit
|
14
|
+
require 'rltk/token'
|
15
|
+
|
16
|
+
#######################
|
17
|
+
# Classes and Modules #
|
18
|
+
#######################
|
19
|
+
|
20
|
+
module RLTK # :nodoc:
|
21
|
+
|
22
|
+
# A LexingError exception is raised when an input stream contains a
|
23
|
+
# substring that isn't matched by any of a lexer's rules.
|
24
|
+
class LexingError < Exception
|
25
|
+
def initialize(stream_offset, line_number, line_offset, remainder)
|
26
|
+
@stream_offset = stream_offset
|
27
|
+
@line_number = line_number
|
28
|
+
@line_offset = line_offset
|
29
|
+
@remainder = remainder
|
30
|
+
end
|
31
|
+
|
32
|
+
def to_s()
|
33
|
+
"#{super()}: #{@remainder}"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
# The Lexer class may be sub-classed to produce new lexers. These lexers
|
38
|
+
# have a lot of features, and are described in the main documentation.
|
39
|
+
class Lexer
|
40
|
+
|
41
|
+
# Called when the Lexer class is sub-classed, this method adds a
|
42
|
+
# LexerCore to the new class, and installs some needed class and
|
43
|
+
# instance methods.
|
44
|
+
def Lexer.inherited(klass)
|
45
|
+
klass.class_exec do
|
46
|
+
@core = LexerCore.new
|
47
|
+
|
48
|
+
# Returns this class's LexerCore object.
|
49
|
+
def self.core
|
50
|
+
@core
|
51
|
+
end
|
52
|
+
|
53
|
+
# Lexes the given string using a newly instantiated
|
54
|
+
# environment.
|
55
|
+
def self.lex(str)
|
56
|
+
@core.lex(str, self::Environment.new(@core.start_state))
|
57
|
+
end
|
58
|
+
|
59
|
+
# Lexes the contents of the given file using a newly
|
60
|
+
# instantiated environment.
|
61
|
+
def self.lex_file(file_name)
|
62
|
+
@core.lex_file(file_name, self::Environment.new(@core.start_state))
|
63
|
+
end
|
64
|
+
|
65
|
+
# Routes method calls to the new subclass to the LexerCore
|
66
|
+
# object.
|
67
|
+
def self.method_missing(method, *args, &proc)
|
68
|
+
@core.send(method, *args, &proc)
|
69
|
+
end
|
70
|
+
|
71
|
+
# Instantiates a new lexer and creates an environment to be
|
72
|
+
# used for subsequent calls.
|
73
|
+
def initialize
|
74
|
+
@env = self.class::Environment.new(self.class.core.start_state)
|
75
|
+
end
|
76
|
+
|
77
|
+
# Returns the environment used by an instantiated lexer.
|
78
|
+
def env
|
79
|
+
@env
|
80
|
+
end
|
81
|
+
|
82
|
+
# Lexes a string using the encapsulated environment.
|
83
|
+
def lex(string)
|
84
|
+
self.class.core.lex(string, @env)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Lexes a file using the encapsulated environment.
|
88
|
+
def lex_file(file_name)
|
89
|
+
self.class.core.lex_file(file_name, @env)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
#################
|
95
|
+
# Inner Classes #
|
96
|
+
#################
|
97
|
+
|
98
|
+
# The LexerCore class provides most of the functionality of the Lexer
|
99
|
+
# class. A LexerCore is instantiated for each subclass of Lexer,
|
100
|
+
# thereby allowing multiple lexers to be defined inside a single Ruby
|
101
|
+
# program.
|
102
|
+
class LexerCore
|
103
|
+
attr_reader :start_state
|
104
|
+
|
105
|
+
# Instantiate a new LexerCore object.
|
106
|
+
def initialize
|
107
|
+
@match_type = :longest
|
108
|
+
@rules = Hash.new {|h,k| h[k] = Array.new}
|
109
|
+
@start_state = :default
|
110
|
+
end
|
111
|
+
|
112
|
+
# Lex _string_, using _env_ as the environment. This method will
|
113
|
+
# return the array of tokens generated by the lexer with a token
|
114
|
+
# of type EOS (End of Stream) appended to the end.
|
115
|
+
def lex(string, env, file_name = nil)
|
116
|
+
# Offset from start of stream.
|
117
|
+
stream_offset = 0
|
118
|
+
|
119
|
+
# Offset from the start of the line.
|
120
|
+
line_offset = 0
|
121
|
+
line_number = 1
|
122
|
+
|
123
|
+
# Empty token list.
|
124
|
+
tokens = Array.new
|
125
|
+
|
126
|
+
# The scanner.
|
127
|
+
scanner = StringScanner.new(string)
|
128
|
+
|
129
|
+
# Start scanning the input string.
|
130
|
+
until scanner.eos?
|
131
|
+
match = nil
|
132
|
+
|
133
|
+
# If the match_type is set to :longest all of the
|
134
|
+
# rules for the current state need to be scanned
|
135
|
+
# and the longest match returned. If the
|
136
|
+
# match_type is :first, we only need to scan until
|
137
|
+
# we find a match.
|
138
|
+
@rules[env.state].each do |rule|
|
139
|
+
if (rule.flags - env.flags).empty?
|
140
|
+
if txt = scanner.check(rule.pattern)
|
141
|
+
if not match or match.first.length < txt.length
|
142
|
+
match = [txt, rule]
|
143
|
+
|
144
|
+
break if @match_type == :first
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
148
|
+
end
|
149
|
+
|
150
|
+
if match
|
151
|
+
rule = match.last
|
152
|
+
|
153
|
+
txt = scanner.scan(rule.pattern)
|
154
|
+
type, value = env.instance_exec(txt, &rule.action)
|
155
|
+
|
156
|
+
if type
|
157
|
+
pos = StreamPosition.new(stream_offset, line_number, line_offset, txt.length, file_name)
|
158
|
+
tokens << Token.new(type, value, pos)
|
159
|
+
end
|
160
|
+
|
161
|
+
# Advance our stat counters.
|
162
|
+
stream_offset += txt.length
|
163
|
+
|
164
|
+
if (newlines = txt.count("\n")) > 0
|
165
|
+
line_number += newlines
|
166
|
+
line_offset = 0
|
167
|
+
else
|
168
|
+
line_offset += txt.length()
|
169
|
+
end
|
170
|
+
else
|
171
|
+
error = LexingError.new(stream_offset, line_number, line_offset, scanner.post_match)
|
172
|
+
raise(error, 'Unable to match string with any of the given rules')
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
return tokens << Token.new(:EOS)
|
177
|
+
end
|
178
|
+
|
179
|
+
# A wrapper function that calls ParserCore.lex on the
|
180
|
+
# contents of a file.
|
181
|
+
def lex_file(file_name, evn)
|
182
|
+
File.open(file_name, 'r') { |f| lex(f.read, env, file_name) }
|
183
|
+
end
|
184
|
+
|
185
|
+
# Used to tell a lexer to use the first match found instead
|
186
|
+
# of the longest match found.
|
187
|
+
def match_first
|
188
|
+
@match_type = :first
|
189
|
+
end
|
190
|
+
|
191
|
+
# This method is used to define a new lexing rule. The
|
192
|
+
# first argument is the regular expression used to match
|
193
|
+
# substrings of the input. The second argument is the state
|
194
|
+
# to which the rule belongs. Flags that need to be set for
|
195
|
+
# the rule to be considered are specified by the third
|
196
|
+
# argument. The last argument is a block that returns a
|
197
|
+
# type and value to be used in constructing a Token. If no
|
198
|
+
# block is specified the matched substring will be
|
199
|
+
# discarded and lexing will continue.
|
200
|
+
def rule(pattern, state = :default, flags = [], &action)
|
201
|
+
# If no action is given we will set it to an empty
|
202
|
+
# action.
|
203
|
+
action ||= Proc.new() {}
|
204
|
+
|
205
|
+
r = Rule.new(pattern, action, state, flags)
|
206
|
+
|
207
|
+
if state == :ALL then @rules.each_key { |k| @rules[k] << r } else @rules[state] << r end
|
208
|
+
end
|
209
|
+
|
210
|
+
alias :r :rule
|
211
|
+
|
212
|
+
# Changes the starting state of the lexer.
|
213
|
+
def start(state)
|
214
|
+
@start_state = state
|
215
|
+
end
|
216
|
+
end
|
217
|
+
|
218
|
+
# All actions passed to LexerCore.rule are evaluated inside an
|
219
|
+
# instance of the Environment class or its subclass (which must have
|
220
|
+
# the same name). This class provides functions for manipulating
|
221
|
+
# lexer state and flags.
|
222
|
+
class Environment
|
223
|
+
|
224
|
+
# The flags currently set in this environment.
|
225
|
+
attr_reader :flags
|
226
|
+
|
227
|
+
# Instantiates a new Environment object.
|
228
|
+
def initialize(start_state)
|
229
|
+
@state = [start_state]
|
230
|
+
@flags = Array.new
|
231
|
+
end
|
232
|
+
|
233
|
+
# Pops a state from the state stack.
|
234
|
+
def pop_state
|
235
|
+
@state.pop
|
236
|
+
|
237
|
+
nil
|
238
|
+
end
|
239
|
+
|
240
|
+
# Pushes a new state onto the state stack.
|
241
|
+
def push_state(state)
|
242
|
+
@state << state
|
243
|
+
|
244
|
+
nil
|
245
|
+
end
|
246
|
+
|
247
|
+
# Sets the value on the top of the state stack.
|
248
|
+
def set_state(state)
|
249
|
+
@state[-1] = state
|
250
|
+
|
251
|
+
nil
|
252
|
+
end
|
253
|
+
|
254
|
+
# Returns the current state.
|
255
|
+
def state
|
256
|
+
return @state.last
|
257
|
+
end
|
258
|
+
|
259
|
+
# Sets a flag in the current environment.
|
260
|
+
def set_flag(flag)
|
261
|
+
if not @flags.include?(flag)
|
262
|
+
@flags << flag
|
263
|
+
end
|
264
|
+
|
265
|
+
nil
|
266
|
+
end
|
267
|
+
|
268
|
+
# Unsets a flag in the current environment.
|
269
|
+
def unset_flag(flag)
|
270
|
+
@flags.delete(flag)
|
271
|
+
|
272
|
+
nil
|
273
|
+
end
|
274
|
+
|
275
|
+
# Unsets all flags in the current environment.
|
276
|
+
def clear_flags
|
277
|
+
@flags = Array.new
|
278
|
+
|
279
|
+
nil
|
280
|
+
end
|
281
|
+
end
|
282
|
+
|
283
|
+
# The Rule class is used simply for data encapsulation.
|
284
|
+
class Rule
|
285
|
+
attr_reader :action
|
286
|
+
attr_reader :pattern
|
287
|
+
attr_reader :flags
|
288
|
+
|
289
|
+
# Instantiates a new Rule object.
|
290
|
+
def initialize(pattern, action, state, flags)
|
291
|
+
@pattern = pattern
|
292
|
+
@action = action
|
293
|
+
@state = state
|
294
|
+
@flags = flags
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|
298
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
# Author: Chris Wailes <chris.wailes@gmail.com>
|
2
|
+
# Project: Ruby Language Toolkit
|
3
|
+
# Date: 2011/03/04
|
4
|
+
# Description: This file contains a lexer for a simple calculator.
|
5
|
+
|
6
|
+
############
|
7
|
+
# Requires #
|
8
|
+
############
|
9
|
+
|
10
|
+
# Ruby Language Toolkit
|
11
|
+
require 'rltk/lexer'
|
12
|
+
|
13
|
+
#######################
|
14
|
+
# Classes and Modules #
|
15
|
+
#######################
|
16
|
+
|
17
|
+
module RLTK # :nodoc:
|
18
|
+
module Lexers # :nodoc:
|
19
|
+
|
20
|
+
# The Calculator lexer is a simple lexer for use with several of the
|
21
|
+
# provided parsers.
|
22
|
+
class Calculator < Lexer
|
23
|
+
|
24
|
+
#################
|
25
|
+
# Default State #
|
26
|
+
#################
|
27
|
+
|
28
|
+
rule(/\+/) { :PLS }
|
29
|
+
rule(/-/) { :SUB }
|
30
|
+
rule(/\*/) { :MUL }
|
31
|
+
rule(/\//) { :DIV }
|
32
|
+
|
33
|
+
rule(/\(/) { :LPAREN }
|
34
|
+
rule(/\)/) { :RPAREN }
|
35
|
+
|
36
|
+
rule(/[0-9]+/) { |t| [:NUM, t.to_i] }
|
37
|
+
|
38
|
+
rule(/\s/)
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# Author: Chris Wailes <chris.wailes@gmail.com>
|
2
|
+
# Project: Ruby Language Toolkit
|
3
|
+
# Date: 2011/01/20
|
4
|
+
# Description: This file contains a lexer for Extended Backus–Naur Form.
|
5
|
+
|
6
|
+
############
|
7
|
+
# Requires #
|
8
|
+
############
|
9
|
+
|
10
|
+
# Ruby Language Toolkit
|
11
|
+
require 'rltk/lexer'
|
12
|
+
|
13
|
+
#######################
|
14
|
+
# Classes and Modules #
|
15
|
+
#######################
|
16
|
+
|
17
|
+
module RLTK # :nodoc:
|
18
|
+
|
19
|
+
# The RLTK::Lexers module contains the lexers that are included as part of
|
20
|
+
# the RLKT project.
|
21
|
+
module Lexers
|
22
|
+
|
23
|
+
# The EBNF lexer is used by the RLTK::CFG class.
|
24
|
+
class EBNF < Lexer
|
25
|
+
|
26
|
+
#################
|
27
|
+
# Default State #
|
28
|
+
#################
|
29
|
+
|
30
|
+
rule(/\*/) { :* }
|
31
|
+
rule(/\+/) { :+ }
|
32
|
+
rule(/\?/) { :'?' }
|
33
|
+
|
34
|
+
rule(/[a-z0-9_]+/) { |t| [:NONTERM, t.to_sym] }
|
35
|
+
rule(/[A-Z0-9_]+/) { |t| [:TERM, t.to_sym] }
|
36
|
+
|
37
|
+
rule(/\s/)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|