rltk 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/rltk/lexer.rb ADDED
@@ -0,0 +1,298 @@
1
+ # Author: Chris Wailes <chris.wailes@gmail.com>
2
+ # Project: Ruby Language Toolkit
3
+ # Date: 2011/01/17
4
+ # Description: This file contains the base class for lexers that use RLTK.
5
+
6
+ ############
7
+ # Requires #
8
+ ############
9
+
10
+ # Standard Library
11
+ require 'strscan'
12
+
13
+ # Ruby Language Toolkit
14
+ require 'rltk/token'
15
+
16
+ #######################
17
+ # Classes and Modules #
18
+ #######################
19
+
20
+ module RLTK # :nodoc:
21
+
22
+ # A LexingError exception is raised when an input stream contains a
23
+ # substring that isn't matched by any of a lexer's rules.
24
+ class LexingError < Exception
25
+ def initialize(stream_offset, line_number, line_offset, remainder)
26
+ @stream_offset = stream_offset
27
+ @line_number = line_number
28
+ @line_offset = line_offset
29
+ @remainder = remainder
30
+ end
31
+
32
+ def to_s()
33
+ "#{super()}: #{@remainder}"
34
+ end
35
+ end
36
+
37
+ # The Lexer class may be sub-classed to produce new lexers. These lexers
38
+ # have a lot of features, and are described in the main documentation.
39
+ class Lexer
40
+
41
+ # Called when the Lexer class is sub-classed, this method adds a
42
+ # LexerCore to the new class, and installs some needed class and
43
+ # instance methods.
44
+ def Lexer.inherited(klass)
45
+ klass.class_exec do
46
+ @core = LexerCore.new
47
+
48
+ # Returns this class's LexerCore object.
49
+ def self.core
50
+ @core
51
+ end
52
+
53
+ # Lexes the given string using a newly instantiated
54
+ # environment.
55
+ def self.lex(str)
56
+ @core.lex(str, self::Environment.new(@core.start_state))
57
+ end
58
+
59
+ # Lexes the contents of the given file using a newly
60
+ # instantiated environment.
61
+ def self.lex_file(file_name)
62
+ @core.lex_file(file_name, self::Environment.new(@core.start_state))
63
+ end
64
+
65
+ # Routes method calls to the new subclass to the LexerCore
66
+ # object.
67
+ def self.method_missing(method, *args, &proc)
68
+ @core.send(method, *args, &proc)
69
+ end
70
+
71
+ # Instantiates a new lexer and creates an environment to be
72
+ # used for subsequent calls.
73
+ def initialize
74
+ @env = self.class::Environment.new(self.class.core.start_state)
75
+ end
76
+
77
+ # Returns the environment used by an instantiated lexer.
78
+ def env
79
+ @env
80
+ end
81
+
82
+ # Lexes a string using the encapsulated environment.
83
+ def lex(string)
84
+ self.class.core.lex(string, @env)
85
+ end
86
+
87
+ # Lexes a file using the encapsulated environment.
88
+ def lex_file(file_name)
89
+ self.class.core.lex_file(file_name, @env)
90
+ end
91
+ end
92
+ end
93
+
94
+ #################
95
+ # Inner Classes #
96
+ #################
97
+
98
+ # The LexerCore class provides most of the functionality of the Lexer
99
+ # class. A LexerCore is instantiated for each subclass of Lexer,
100
+ # thereby allowing multiple lexers to be defined inside a single Ruby
101
+ # program.
102
+ class LexerCore
103
+ attr_reader :start_state
104
+
105
+ # Instantiate a new LexerCore object.
106
+ def initialize
107
+ @match_type = :longest
108
+ @rules = Hash.new {|h,k| h[k] = Array.new}
109
+ @start_state = :default
110
+ end
111
+
112
+ # Lex _string_, using _env_ as the environment. This method will
113
+ # return the array of tokens generated by the lexer with a token
114
+ # of type EOS (End of Stream) appended to the end.
115
+ def lex(string, env, file_name = nil)
116
+ # Offset from start of stream.
117
+ stream_offset = 0
118
+
119
+ # Offset from the start of the line.
120
+ line_offset = 0
121
+ line_number = 1
122
+
123
+ # Empty token list.
124
+ tokens = Array.new
125
+
126
+ # The scanner.
127
+ scanner = StringScanner.new(string)
128
+
129
+ # Start scanning the input string.
130
+ until scanner.eos?
131
+ match = nil
132
+
133
+ # If the match_type is set to :longest all of the
134
+ # rules for the current state need to be scanned
135
+ # and the longest match returned. If the
136
+ # match_type is :first, we only need to scan until
137
+ # we find a match.
138
+ @rules[env.state].each do |rule|
139
+ if (rule.flags - env.flags).empty?
140
+ if txt = scanner.check(rule.pattern)
141
+ if not match or match.first.length < txt.length
142
+ match = [txt, rule]
143
+
144
+ break if @match_type == :first
145
+ end
146
+ end
147
+ end
148
+ end
149
+
150
+ if match
151
+ rule = match.last
152
+
153
+ txt = scanner.scan(rule.pattern)
154
+ type, value = env.instance_exec(txt, &rule.action)
155
+
156
+ if type
157
+ pos = StreamPosition.new(stream_offset, line_number, line_offset, txt.length, file_name)
158
+ tokens << Token.new(type, value, pos)
159
+ end
160
+
161
+ # Advance our stat counters.
162
+ stream_offset += txt.length
163
+
164
+ if (newlines = txt.count("\n")) > 0
165
+ line_number += newlines
166
+ line_offset = 0
167
+ else
168
+ line_offset += txt.length()
169
+ end
170
+ else
171
+ error = LexingError.new(stream_offset, line_number, line_offset, scanner.post_match)
172
+ raise(error, 'Unable to match string with any of the given rules')
173
+ end
174
+ end
175
+
176
+ return tokens << Token.new(:EOS)
177
+ end
178
+
179
+ # A wrapper function that calls ParserCore.lex on the
180
+ # contents of a file.
181
+ def lex_file(file_name, evn)
182
+ File.open(file_name, 'r') { |f| lex(f.read, env, file_name) }
183
+ end
184
+
185
+ # Used to tell a lexer to use the first match found instead
186
+ # of the longest match found.
187
+ def match_first
188
+ @match_type = :first
189
+ end
190
+
191
+ # This method is used to define a new lexing rule. The
192
+ # first argument is the regular expression used to match
193
+ # substrings of the input. The second argument is the state
194
+ # to which the rule belongs. Flags that need to be set for
195
+ # the rule to be considered are specified by the third
196
+ # argument. The last argument is a block that returns a
197
+ # type and value to be used in constructing a Token. If no
198
+ # block is specified the matched substring will be
199
+ # discarded and lexing will continue.
200
+ def rule(pattern, state = :default, flags = [], &action)
201
+ # If no action is given we will set it to an empty
202
+ # action.
203
+ action ||= Proc.new() {}
204
+
205
+ r = Rule.new(pattern, action, state, flags)
206
+
207
+ if state == :ALL then @rules.each_key { |k| @rules[k] << r } else @rules[state] << r end
208
+ end
209
+
210
+ alias :r :rule
211
+
212
+ # Changes the starting state of the lexer.
213
+ def start(state)
214
+ @start_state = state
215
+ end
216
+ end
217
+
218
+ # All actions passed to LexerCore.rule are evaluated inside an
219
+ # instance of the Environment class or its subclass (which must have
220
+ # the same name). This class provides functions for manipulating
221
+ # lexer state and flags.
222
+ class Environment
223
+
224
+ # The flags currently set in this environment.
225
+ attr_reader :flags
226
+
227
+ # Instantiates a new Environment object.
228
+ def initialize(start_state)
229
+ @state = [start_state]
230
+ @flags = Array.new
231
+ end
232
+
233
+ # Pops a state from the state stack.
234
+ def pop_state
235
+ @state.pop
236
+
237
+ nil
238
+ end
239
+
240
+ # Pushes a new state onto the state stack.
241
+ def push_state(state)
242
+ @state << state
243
+
244
+ nil
245
+ end
246
+
247
+ # Sets the value on the top of the state stack.
248
+ def set_state(state)
249
+ @state[-1] = state
250
+
251
+ nil
252
+ end
253
+
254
+ # Returns the current state.
255
+ def state
256
+ return @state.last
257
+ end
258
+
259
+ # Sets a flag in the current environment.
260
+ def set_flag(flag)
261
+ if not @flags.include?(flag)
262
+ @flags << flag
263
+ end
264
+
265
+ nil
266
+ end
267
+
268
+ # Unsets a flag in the current environment.
269
+ def unset_flag(flag)
270
+ @flags.delete(flag)
271
+
272
+ nil
273
+ end
274
+
275
+ # Unsets all flags in the current environment.
276
+ def clear_flags
277
+ @flags = Array.new
278
+
279
+ nil
280
+ end
281
+ end
282
+
283
+ # The Rule class is used simply for data encapsulation.
284
+ class Rule
285
+ attr_reader :action
286
+ attr_reader :pattern
287
+ attr_reader :flags
288
+
289
+ # Instantiates a new Rule object.
290
+ def initialize(pattern, action, state, flags)
291
+ @pattern = pattern
292
+ @action = action
293
+ @state = state
294
+ @flags = flags
295
+ end
296
+ end
297
+ end
298
+ end
@@ -0,0 +1,41 @@
1
+ # Author: Chris Wailes <chris.wailes@gmail.com>
2
+ # Project: Ruby Language Toolkit
3
+ # Date: 2011/03/04
4
+ # Description: This file contains a lexer for a simple calculator.
5
+
6
+ ############
7
+ # Requires #
8
+ ############
9
+
10
+ # Ruby Language Toolkit
11
+ require 'rltk/lexer'
12
+
13
+ #######################
14
+ # Classes and Modules #
15
+ #######################
16
+
17
+ module RLTK # :nodoc:
18
+ module Lexers # :nodoc:
19
+
20
+ # The Calculator lexer is a simple lexer for use with several of the
21
+ # provided parsers.
22
+ class Calculator < Lexer
23
+
24
+ #################
25
+ # Default State #
26
+ #################
27
+
28
+ rule(/\+/) { :PLS }
29
+ rule(/-/) { :SUB }
30
+ rule(/\*/) { :MUL }
31
+ rule(/\//) { :DIV }
32
+
33
+ rule(/\(/) { :LPAREN }
34
+ rule(/\)/) { :RPAREN }
35
+
36
+ rule(/[0-9]+/) { |t| [:NUM, t.to_i] }
37
+
38
+ rule(/\s/)
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,40 @@
1
+ # Author: Chris Wailes <chris.wailes@gmail.com>
2
+ # Project: Ruby Language Toolkit
3
+ # Date: 2011/01/20
4
+ # Description: This file contains a lexer for Extended Backus–Naur Form.
5
+
6
+ ############
7
+ # Requires #
8
+ ############
9
+
10
+ # Ruby Language Toolkit
11
+ require 'rltk/lexer'
12
+
13
+ #######################
14
+ # Classes and Modules #
15
+ #######################
16
+
17
+ module RLTK # :nodoc:
18
+
19
+ # The RLTK::Lexers module contains the lexers that are included as part of
20
+ # the RLKT project.
21
+ module Lexers
22
+
23
+ # The EBNF lexer is used by the RLTK::CFG class.
24
+ class EBNF < Lexer
25
+
26
+ #################
27
+ # Default State #
28
+ #################
29
+
30
+ rule(/\*/) { :* }
31
+ rule(/\+/) { :+ }
32
+ rule(/\?/) { :'?' }
33
+
34
+ rule(/[a-z0-9_]+/) { |t| [:NONTERM, t.to_sym] }
35
+ rule(/[A-Z0-9_]+/) { |t| [:TERM, t.to_sym] }
36
+
37
+ rule(/\s/)
38
+ end
39
+ end
40
+ end