llip 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/MIT-LICENSE +21 -0
- data/Manifest.txt +45 -0
- data/README.txt +148 -0
- data/Rakefile +66 -0
- data/examples/ariteval/ariteval.rb +132 -0
- data/examples/ariteval/evaluator.rb +61 -0
- data/examples/ariteval/exp.rb +104 -0
- data/lib/llip.rb +6 -0
- data/lib/llip/abstract_parser.rb +170 -0
- data/lib/llip/abstract_scanner.rb +83 -0
- data/lib/llip/buffer.rb +35 -0
- data/lib/llip/llip_error.rb +43 -0
- data/lib/llip/parser.rb +93 -0
- data/lib/llip/production_compiler.rb +168 -0
- data/lib/llip/production_specification.rb +79 -0
- data/lib/llip/recursive_production_compiler.rb +35 -0
- data/lib/llip/regexp_abstract_scanner.rb +116 -0
- data/lib/llip/regexp_parser.rb +197 -0
- data/lib/llip/regexp_scanner.rb +33 -0
- data/lib/llip/regexp_specification.rb +210 -0
- data/lib/llip/token.rb +47 -0
- data/lib/llip/visitable.rb +37 -0
- data/spec/ariteval/ariteval_spec.rb +111 -0
- data/spec/ariteval/evaluator_spec.rb +106 -0
- data/spec/ariteval/exp_spec.rb +232 -0
- data/spec/llip/abstract_parser_spec.rb +273 -0
- data/spec/llip/abstract_scanner_spec.rb +152 -0
- data/spec/llip/buffer_spec.rb +60 -0
- data/spec/llip/llip_error_spec.rb +77 -0
- data/spec/llip/parser_spec.rb +163 -0
- data/spec/llip/production_compiler_spec.rb +271 -0
- data/spec/llip/production_specification_spec.rb +75 -0
- data/spec/llip/recursive_production_compiler_spec.rb +86 -0
- data/spec/llip/regexp_abstract_scanner_spec.rb +320 -0
- data/spec/llip/regexp_parser_spec.rb +265 -0
- data/spec/llip/regexp_scanner_spec.rb +40 -0
- data/spec/llip/regexp_specification_spec.rb +734 -0
- data/spec/llip/token_spec.rb +70 -0
- data/spec/llip/visitable_spec.rb +38 -0
- data/spec/spec_helper.rb +10 -0
- metadata +110 -0
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'visitable'
|
2
|
+
|
3
|
+
class NumExp
|
4
|
+
|
5
|
+
include Visitable
|
6
|
+
|
7
|
+
attr_reader :value
|
8
|
+
|
9
|
+
def initialize(value)
|
10
|
+
@value = value
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
@value.to_s
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class IdentExp
|
19
|
+
include Visitable
|
20
|
+
|
21
|
+
attr_reader :value
|
22
|
+
|
23
|
+
def initialize(value)
|
24
|
+
@value = value
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
@value.to_s
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class AssignIdentExp
|
33
|
+
|
34
|
+
include Visitable
|
35
|
+
|
36
|
+
attr_reader :name
|
37
|
+
attr_reader :value
|
38
|
+
|
39
|
+
def initialize(name,value)
|
40
|
+
@name = name
|
41
|
+
@value = value
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_s
|
45
|
+
"( #{@name} = #{@value} )"
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
class OpExp
|
51
|
+
|
52
|
+
include Visitable
|
53
|
+
|
54
|
+
attr_reader :op
|
55
|
+
attr_reader :left
|
56
|
+
attr_reader :right
|
57
|
+
|
58
|
+
def initialize(left,right)
|
59
|
+
@left = left
|
60
|
+
@right = right
|
61
|
+
end
|
62
|
+
|
63
|
+
def to_s
|
64
|
+
"( #{left.to_s} #{op} #{right.to_s} )"
|
65
|
+
end
|
66
|
+
|
67
|
+
def ==(other)
|
68
|
+
return false if other.class != self.class
|
69
|
+
|
70
|
+
left == other.left and right == other.right
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class PlusExp < OpExp
|
75
|
+
def initialize(left,right)
|
76
|
+
super
|
77
|
+
@op = "+"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class MinusExp < OpExp
|
82
|
+
def initialize(left,right)
|
83
|
+
super
|
84
|
+
@op = "-"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class MulExp < OpExp
|
89
|
+
def initialize(left,right)
|
90
|
+
super
|
91
|
+
@op = "*"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
class DivExp < OpExp
|
96
|
+
def initialize(left,right)
|
97
|
+
super
|
98
|
+
@op = "/"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
|
data/lib/llip.rb
ADDED
@@ -0,0 +1,170 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/production_specification'
|
2
|
+
require File.dirname(__FILE__) + '/production_compiler'
|
3
|
+
require File.dirname(__FILE__) + '/recursive_production_compiler'
|
4
|
+
require File.dirname(__FILE__) + '/llip_error'
|
5
|
+
|
6
|
+
module LLIP
|
7
|
+
|
8
|
+
# This class hide all the complexity of generating an building a parser.
|
9
|
+
# Ater subclassing it, it's possible to use all the methods defined in
|
10
|
+
# AbstractParser::ClassMethods to specify the productions.
|
11
|
+
class AbstractParser
|
12
|
+
|
13
|
+
def self.inherited(other)
|
14
|
+
other.extend(ClassMethods)
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@hash = {}
|
19
|
+
end
|
20
|
+
|
21
|
+
def productions
|
22
|
+
self.class.productions
|
23
|
+
end
|
24
|
+
|
25
|
+
# Parse the token generated from the scanner until it reaches the end.
|
26
|
+
# See AbstractScanner to know how to develop a scanner.
|
27
|
+
def parse(scanner)
|
28
|
+
raise "This method hasn't been compiled yet."
|
29
|
+
end
|
30
|
+
|
31
|
+
def [](key)
|
32
|
+
@hash[key]
|
33
|
+
end
|
34
|
+
|
35
|
+
def []=(key,value)
|
36
|
+
@hash[key] = value
|
37
|
+
end
|
38
|
+
|
39
|
+
# It raises a ParserError instead of a RuntimeError if no exception is given.
|
40
|
+
#
|
41
|
+
# It's public so it's important to call it from the production definitions, to have the exception set to ParserError.
|
42
|
+
def raise(*args)
|
43
|
+
if args.first.respond_to? :exception or not @scanner.respond_to? :current or @scanner.current == nil
|
44
|
+
super(*args)
|
45
|
+
else
|
46
|
+
error = ParserError.new(@scanner.current,args.shift)
|
47
|
+
backtrace = args.shift
|
48
|
+
backtrace ||= caller(1)
|
49
|
+
error.set_backtrace(backtrace)
|
50
|
+
super error
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
module ClassMethods
|
55
|
+
|
56
|
+
# Contains the evaluated code, it's useful for debugging.
|
57
|
+
attr_reader :code
|
58
|
+
|
59
|
+
# :call-seq:
|
60
|
+
# autocompile(true)
|
61
|
+
# autocompile(false)
|
62
|
+
#
|
63
|
+
# Set the autocompile flag true or false. The default is *true*.
|
64
|
+
# If this flag is turned on every production is automatically evaulated and converted into code.
|
65
|
+
# Otherwise you can compile it using AbstractParser::ClassMethods#compile.
|
66
|
+
def autocompile(autocompile=nil)
|
67
|
+
if not autocompile.nil?
|
68
|
+
@autocompile = autocompile
|
69
|
+
else
|
70
|
+
@autocompile = true if @autocompile.nil?
|
71
|
+
end
|
72
|
+
init_compile if @autocompile
|
73
|
+
@autocompile
|
74
|
+
end
|
75
|
+
|
76
|
+
# Add a production to the parser, the block must accept an argument which is
|
77
|
+
# a new ProductionSpecification.
|
78
|
+
# The ProductionSpecficiation name is set to the first parameter and its mode to the second if exists.
|
79
|
+
# A ProductionSpecification is compiled to a method named +parse_name+
|
80
|
+
def production(name,mode=nil) # :yields: production_specification
|
81
|
+
productions[name.to_sym] ||= LLIP::ProductionSpecification.new(name.to_sym)
|
82
|
+
productions[name.to_sym].mode = mode if mode
|
83
|
+
yield productions[name.to_sym]
|
84
|
+
compile_production(productions[name.to_sym]) if autocompile
|
85
|
+
name
|
86
|
+
end
|
87
|
+
|
88
|
+
# Return an hash containing all the specified productions
|
89
|
+
def productions
|
90
|
+
@productions ||= {}
|
91
|
+
end
|
92
|
+
|
93
|
+
# Return/set the scope, which is the first production to be called.
|
94
|
+
# The scope is mandatory to generate the parse method.
|
95
|
+
def scope(name=nil)
|
96
|
+
if name
|
97
|
+
raise ArgumentError.new("The scope must be a not empty string") if name == ""
|
98
|
+
@scope = name
|
99
|
+
compile_scope if autocompile
|
100
|
+
end
|
101
|
+
@scope
|
102
|
+
end
|
103
|
+
|
104
|
+
# Compile all the productions and sets the code attribute correctly.
|
105
|
+
def compile
|
106
|
+
|
107
|
+
init_compile
|
108
|
+
|
109
|
+
#first check the scope
|
110
|
+
if @scope.nil? or not @productions.has_key? @scope.to_sym
|
111
|
+
raise "You must give a legal scope"
|
112
|
+
end
|
113
|
+
|
114
|
+
compile_scope
|
115
|
+
|
116
|
+
#compile and eval all the productions
|
117
|
+
@productions.values.each { |prod| compile_production(prod) }
|
118
|
+
|
119
|
+
class_eval(@code)
|
120
|
+
@compiled = true
|
121
|
+
end
|
122
|
+
|
123
|
+
# Returns a boolean which specify if the parser has been compiled
|
124
|
+
def compiled
|
125
|
+
@compiled ||= false
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
def compile_scope
|
130
|
+
scope_code = <<-CODE
|
131
|
+
def parse(scanner)
|
132
|
+
@scanner = scanner
|
133
|
+
@scanner.next
|
134
|
+
result = parse_#{@scope}
|
135
|
+
raise "The parsing terminating without processing all tokens, the exceeding token is '\#{@scanner.current}'" unless @scanner.current.nil?
|
136
|
+
result
|
137
|
+
end
|
138
|
+
CODE
|
139
|
+
|
140
|
+
class_eval(scope_code) if autocompile
|
141
|
+
@code << scope_code
|
142
|
+
end
|
143
|
+
|
144
|
+
def compile_production(prod)
|
145
|
+
if prod.mode == :single
|
146
|
+
compiler = @single_compiler
|
147
|
+
elsif prod.mode == :recursive
|
148
|
+
compiler = @recursive_compiler
|
149
|
+
else
|
150
|
+
raise "Unknow compile mode(#{prod.mode})for production #{prod.name}"
|
151
|
+
end
|
152
|
+
|
153
|
+
compiler.compile(prod)
|
154
|
+
@code << "\n\n"
|
155
|
+
@code << compiler.code
|
156
|
+
|
157
|
+
class_eval(compiler.code) if autocompile
|
158
|
+
compiler.reset
|
159
|
+
end
|
160
|
+
|
161
|
+
def init_compile
|
162
|
+
unless @code
|
163
|
+
@code = ""
|
164
|
+
@single_compiler = LLIP::ProductionCompiler.new
|
165
|
+
@recursive_compiler = LLIP::RecursiveProductionCompiler.new
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/token'
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
module LLIP
|
5
|
+
|
6
|
+
#It's the base class of all the scanners. It handles common issues like read a multibyte char or setting up correctly an IO to read from.
|
7
|
+
#This class is +Abstract+ because it lacks the +next+ method, which a subclass must add.
|
8
|
+
class AbstractScanner
|
9
|
+
|
10
|
+
# It contains the data scanned by the scanner. It can be nil or an IO.
|
11
|
+
# To set it, use scan.
|
12
|
+
attr_reader :source
|
13
|
+
|
14
|
+
# It contains the last token generated by the scanner
|
15
|
+
attr_reader :current
|
16
|
+
|
17
|
+
# It contains the current line of the input file.
|
18
|
+
# It's default is -1, and it's incremented every $-0 the AbstractScanner reads.
|
19
|
+
attr_reader :current_line
|
20
|
+
|
21
|
+
# It contains the current char of the input file, it's automatically incremented for every read char.
|
22
|
+
# It's default is -1.
|
23
|
+
# For every $-0 read, it's re-initialized to -1.
|
24
|
+
attr_reader :current_char
|
25
|
+
|
26
|
+
def initialize(source=nil)
|
27
|
+
@current_line = -1
|
28
|
+
@current_char = -1
|
29
|
+
scan(source) if source
|
30
|
+
@current = Token.new
|
31
|
+
end
|
32
|
+
|
33
|
+
# :call-seq:
|
34
|
+
# scan(IO)
|
35
|
+
# scan(String)
|
36
|
+
#
|
37
|
+
# It initializes the scanner to scan an IO. If a String is given, it is automatically converted into a StringIO.
|
38
|
+
# It memorize the IO in the source attribute.
|
39
|
+
def scan(source)
|
40
|
+
@source = source
|
41
|
+
@source = StringIO.new(source) unless @source.respond_to? :readchar and @source.respond_to? :eof?
|
42
|
+
@current_line += 1
|
43
|
+
@current_char = -1
|
44
|
+
read_next
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
# This method is abstract and it must be implemented by a subclass.
|
49
|
+
# This method must read the chars to produce a Token, set it to current and return it. If a char doesn't match the first char of a token, this method should raise LLIPError.
|
50
|
+
# If a char matches the first char of a token, but the next ones don't, this method should raise an UnvalidTokenError.
|
51
|
+
def next
|
52
|
+
raise NotImplementedError.new
|
53
|
+
end
|
54
|
+
|
55
|
+
protected
|
56
|
+
|
57
|
+
# It's used to recognize if a char is multibyte
|
58
|
+
UTF8_MB_PATTERN = /[\xc0-\xdf]$|[\xe0-\xef]$|[\xe0-\xef][\x80-\xbf]$/ #it's a little hack
|
59
|
+
|
60
|
+
# It reads one char from the source and memorize it in the internal variabile @next_char. It handles multibyte chars correctly.
|
61
|
+
# If EOF is reached, it will return nil.
|
62
|
+
def read_next
|
63
|
+
unless @source.eof?
|
64
|
+
@next_char = @source.readchar.chr
|
65
|
+
while @next_char =~ UTF8_MB_PATTERN
|
66
|
+
@next_char << @source.readchar.chr
|
67
|
+
end
|
68
|
+
|
69
|
+
if @next_char == $-0
|
70
|
+
@current_line += 1
|
71
|
+
@current_char = -1
|
72
|
+
else
|
73
|
+
@current_char += 1
|
74
|
+
end
|
75
|
+
|
76
|
+
@next_char
|
77
|
+
else
|
78
|
+
@next_char = nil
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
data/lib/llip/buffer.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
|
2
|
+
module LLIP
|
3
|
+
class Buffer
|
4
|
+
|
5
|
+
attr_accessor :scanner
|
6
|
+
attr_reader :current
|
7
|
+
|
8
|
+
def initialize(scanner)
|
9
|
+
@scanner = scanner
|
10
|
+
@current = nil
|
11
|
+
@buffer = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
def scan(text)
|
15
|
+
@scanner.scan(text)
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
def next
|
20
|
+
return @current = @scanner.next unless @buffer
|
21
|
+
|
22
|
+
@current = @buffer.shift
|
23
|
+
@buffer = nil if @buffer.size == 0
|
24
|
+
@current
|
25
|
+
end
|
26
|
+
|
27
|
+
def lookahead(n)
|
28
|
+
@buffer ||= []
|
29
|
+
while @buffer.size < n
|
30
|
+
@buffer << @scanner.next
|
31
|
+
end
|
32
|
+
@buffer[n-1]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
|
2
|
+
module LLIP
|
3
|
+
|
4
|
+
# It's the base Exception for all the exception of LLIP.
|
5
|
+
# It adds a header to all the messages with the line and the char of the token
|
6
|
+
# that caused the exception.
|
7
|
+
#
|
8
|
+
# To subclass it for a class-specific message, pass it to the constructor or
|
9
|
+
# override the :message method.
|
10
|
+
#
|
11
|
+
class LLIPError < StandardError
|
12
|
+
|
13
|
+
# The token that caused the exception
|
14
|
+
attr_reader :token
|
15
|
+
|
16
|
+
def initialize(token,msg=nil)
|
17
|
+
super msg
|
18
|
+
@token = token
|
19
|
+
end
|
20
|
+
|
21
|
+
alias :internal_message :to_s
|
22
|
+
|
23
|
+
def to_s
|
24
|
+
"At line #{token.line} char #{token.char} a #{self.class.name} occurred: #{internal_message}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class UnvalidTokenError < LLIPError
|
29
|
+
|
30
|
+
def initialize(token)
|
31
|
+
super token, "the current token '#{token.value}' doesn't match with the regular expression #{token.name}."
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class ParserError < LLIPError
|
36
|
+
end
|
37
|
+
|
38
|
+
class NotAllowedTokenError < ParserError
|
39
|
+
def initialize(token,production)
|
40
|
+
super token, "the token '#{token.value}' matched by the regexp '#{token.name}' isn't allowed in production #{production}."
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/llip/parser.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/abstract_parser'
|
2
|
+
require File.dirname(__FILE__) + '/regexp_abstract_scanner'
|
3
|
+
require File.dirname(__FILE__) + '/regexp_parser'
|
4
|
+
require File.dirname(__FILE__) + '/regexp_scanner'
|
5
|
+
require File.dirname(__FILE__) + '/buffer'
|
6
|
+
require 'forwardable'
|
7
|
+
|
8
|
+
module LLIP
|
9
|
+
|
10
|
+
# It's a +facade+ of the LLIP library.
|
11
|
+
#
|
12
|
+
# To use it subclass it and then use the methods: production, scope and token to build the parser and its scanner.
|
13
|
+
class Parser
|
14
|
+
|
15
|
+
def self.inherited(other)
|
16
|
+
other.extend(ClassMethods)
|
17
|
+
other.send(:init_parser)
|
18
|
+
end
|
19
|
+
|
20
|
+
# The parser of the Parser subclass. It's created from the class returned from LLIP::Parser::ClassMethods.parser.
|
21
|
+
attr_reader :parser
|
22
|
+
|
23
|
+
# The scanner of the Parser subclass. It's created from the class returned from LLIP::Parser::ClassMethods.scanner.
|
24
|
+
attr_reader :scanner
|
25
|
+
|
26
|
+
def initialize
|
27
|
+
@parser = self.class.parser.new
|
28
|
+
@scanner = self.class.scanner.new
|
29
|
+
@scanner = Buffer.new(@scanner) if self.class.lookahead
|
30
|
+
end
|
31
|
+
|
32
|
+
# Parse the source using the parser and the scanner.
|
33
|
+
#
|
34
|
+
# See AbstractScanner#scan to know what is a valid source.
|
35
|
+
def parse(source)
|
36
|
+
@parser.parse(@scanner.scan(source))
|
37
|
+
end
|
38
|
+
|
39
|
+
module ClassMethods
|
40
|
+
|
41
|
+
# A class descending from AbstractParser which will contain all the productions.
|
42
|
+
# The messages :production and :scope are redirected to it.
|
43
|
+
# See AbstractParser::ClassMethods#production, AbstractParser::ClassMethods#scope and ProductionSpecification.
|
44
|
+
attr_reader :parser
|
45
|
+
|
46
|
+
# A class desceding from RegexpAbstractScanner which will contain all the token definitions.
|
47
|
+
# To add it in a simple way use token.
|
48
|
+
attr_reader :scanner
|
49
|
+
|
50
|
+
# It's a RegexpParser
|
51
|
+
attr_reader :regexp_parser
|
52
|
+
|
53
|
+
# It's a RegexpScanner
|
54
|
+
attr_reader :regexp_scanner
|
55
|
+
|
56
|
+
extend Forwardable
|
57
|
+
|
58
|
+
def_delegators :@parser, :production, :scope
|
59
|
+
|
60
|
+
# It use _regexp_parser_ and _regexp_scanner_ to compile a correct regular expression string in a RegexpSpecification.
|
61
|
+
# A correct regular expression string must follow the grammar specified in RegexpParser.
|
62
|
+
#
|
63
|
+
# The first argument is the name with which all the Token derived by this regular expression will be marked. It must be a symbol.
|
64
|
+
def token(name,string)
|
65
|
+
regexp = @regexp_parser.parse(@regexp_scanner.scan(string))
|
66
|
+
regexp.name = name
|
67
|
+
@scanner.add_regexp(regexp)
|
68
|
+
self
|
69
|
+
end
|
70
|
+
|
71
|
+
# :call-seq:
|
72
|
+
# lookahead
|
73
|
+
# lookahead(true)
|
74
|
+
#
|
75
|
+
# It allows to set the lookahead behaviour. If the lookahead is set to true, a Buffer will be used during parsing.
|
76
|
+
def lookahead(lookahead = nil)
|
77
|
+
@lookahead = lookahead unless lookahead.nil?
|
78
|
+
@lookahead
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
def init_parser # :nodoc:
|
83
|
+
@parser = Class.new(AbstractParser)
|
84
|
+
@scanner = Class.new(RegexpAbstractScanner)
|
85
|
+
|
86
|
+
@regexp_scanner = RegexpScanner.new
|
87
|
+
@regexp_parser = RegexpParser.new
|
88
|
+
|
89
|
+
@lookahead = false
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|