llip 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/MIT-LICENSE +21 -0
- data/Manifest.txt +45 -0
- data/README.txt +148 -0
- data/Rakefile +66 -0
- data/examples/ariteval/ariteval.rb +132 -0
- data/examples/ariteval/evaluator.rb +61 -0
- data/examples/ariteval/exp.rb +104 -0
- data/lib/llip.rb +6 -0
- data/lib/llip/abstract_parser.rb +170 -0
- data/lib/llip/abstract_scanner.rb +83 -0
- data/lib/llip/buffer.rb +35 -0
- data/lib/llip/llip_error.rb +43 -0
- data/lib/llip/parser.rb +93 -0
- data/lib/llip/production_compiler.rb +168 -0
- data/lib/llip/production_specification.rb +79 -0
- data/lib/llip/recursive_production_compiler.rb +35 -0
- data/lib/llip/regexp_abstract_scanner.rb +116 -0
- data/lib/llip/regexp_parser.rb +197 -0
- data/lib/llip/regexp_scanner.rb +33 -0
- data/lib/llip/regexp_specification.rb +210 -0
- data/lib/llip/token.rb +47 -0
- data/lib/llip/visitable.rb +37 -0
- data/spec/ariteval/ariteval_spec.rb +111 -0
- data/spec/ariteval/evaluator_spec.rb +106 -0
- data/spec/ariteval/exp_spec.rb +232 -0
- data/spec/llip/abstract_parser_spec.rb +273 -0
- data/spec/llip/abstract_scanner_spec.rb +152 -0
- data/spec/llip/buffer_spec.rb +60 -0
- data/spec/llip/llip_error_spec.rb +77 -0
- data/spec/llip/parser_spec.rb +163 -0
- data/spec/llip/production_compiler_spec.rb +271 -0
- data/spec/llip/production_specification_spec.rb +75 -0
- data/spec/llip/recursive_production_compiler_spec.rb +86 -0
- data/spec/llip/regexp_abstract_scanner_spec.rb +320 -0
- data/spec/llip/regexp_parser_spec.rb +265 -0
- data/spec/llip/regexp_scanner_spec.rb +40 -0
- data/spec/llip/regexp_specification_spec.rb +734 -0
- data/spec/llip/token_spec.rb +70 -0
- data/spec/llip/visitable_spec.rb +38 -0
- data/spec/spec_helper.rb +10 -0
- metadata +110 -0
@@ -0,0 +1,104 @@
|
|
1
|
+
require 'visitable'
|
2
|
+
|
3
|
+
class NumExp
|
4
|
+
|
5
|
+
include Visitable
|
6
|
+
|
7
|
+
attr_reader :value
|
8
|
+
|
9
|
+
def initialize(value)
|
10
|
+
@value = value
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_s
|
14
|
+
@value.to_s
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class IdentExp
|
19
|
+
include Visitable
|
20
|
+
|
21
|
+
attr_reader :value
|
22
|
+
|
23
|
+
def initialize(value)
|
24
|
+
@value = value
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
@value.to_s
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
class AssignIdentExp
|
33
|
+
|
34
|
+
include Visitable
|
35
|
+
|
36
|
+
attr_reader :name
|
37
|
+
attr_reader :value
|
38
|
+
|
39
|
+
def initialize(name,value)
|
40
|
+
@name = name
|
41
|
+
@value = value
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_s
|
45
|
+
"( #{@name} = #{@value} )"
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
class OpExp
|
51
|
+
|
52
|
+
include Visitable
|
53
|
+
|
54
|
+
attr_reader :op
|
55
|
+
attr_reader :left
|
56
|
+
attr_reader :right
|
57
|
+
|
58
|
+
def initialize(left,right)
|
59
|
+
@left = left
|
60
|
+
@right = right
|
61
|
+
end
|
62
|
+
|
63
|
+
def to_s
|
64
|
+
"( #{left.to_s} #{op} #{right.to_s} )"
|
65
|
+
end
|
66
|
+
|
67
|
+
def ==(other)
|
68
|
+
return false if other.class != self.class
|
69
|
+
|
70
|
+
left == other.left and right == other.right
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
class PlusExp < OpExp
|
75
|
+
def initialize(left,right)
|
76
|
+
super
|
77
|
+
@op = "+"
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
class MinusExp < OpExp
|
82
|
+
def initialize(left,right)
|
83
|
+
super
|
84
|
+
@op = "-"
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
class MulExp < OpExp
|
89
|
+
def initialize(left,right)
|
90
|
+
super
|
91
|
+
@op = "*"
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
class DivExp < OpExp
|
96
|
+
def initialize(left,right)
|
97
|
+
super
|
98
|
+
@op = "/"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
|
data/lib/llip.rb
ADDED
@@ -0,0 +1,170 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/production_specification'
|
2
|
+
require File.dirname(__FILE__) + '/production_compiler'
|
3
|
+
require File.dirname(__FILE__) + '/recursive_production_compiler'
|
4
|
+
require File.dirname(__FILE__) + '/llip_error'
|
5
|
+
|
6
|
+
module LLIP
|
7
|
+
|
8
|
+
# This class hide all the complexity of generating an building a parser.
|
9
|
+
# Ater subclassing it, it's possible to use all the methods defined in
|
10
|
+
# AbstractParser::ClassMethods to specify the productions.
|
11
|
+
class AbstractParser
|
12
|
+
|
13
|
+
def self.inherited(other)
|
14
|
+
other.extend(ClassMethods)
|
15
|
+
end
|
16
|
+
|
17
|
+
def initialize
|
18
|
+
@hash = {}
|
19
|
+
end
|
20
|
+
|
21
|
+
def productions
|
22
|
+
self.class.productions
|
23
|
+
end
|
24
|
+
|
25
|
+
# Parse the token generated from the scanner until it reaches the end.
|
26
|
+
# See AbstractScanner to know how to develop a scanner.
|
27
|
+
def parse(scanner)
|
28
|
+
raise "This method hasn't been compiled yet."
|
29
|
+
end
|
30
|
+
|
31
|
+
def [](key)
|
32
|
+
@hash[key]
|
33
|
+
end
|
34
|
+
|
35
|
+
def []=(key,value)
|
36
|
+
@hash[key] = value
|
37
|
+
end
|
38
|
+
|
39
|
+
# It raises a ParserError instead of a RuntimeError if no exception is given.
|
40
|
+
#
|
41
|
+
# It's public so it's important to call it from the production definitions, to have the exception set to ParserError.
|
42
|
+
def raise(*args)
|
43
|
+
if args.first.respond_to? :exception or not @scanner.respond_to? :current or @scanner.current == nil
|
44
|
+
super(*args)
|
45
|
+
else
|
46
|
+
error = ParserError.new(@scanner.current,args.shift)
|
47
|
+
backtrace = args.shift
|
48
|
+
backtrace ||= caller(1)
|
49
|
+
error.set_backtrace(backtrace)
|
50
|
+
super error
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
module ClassMethods
|
55
|
+
|
56
|
+
# Contains the evaluated code, it's useful for debugging.
|
57
|
+
attr_reader :code
|
58
|
+
|
59
|
+
# :call-seq:
|
60
|
+
# autocompile(true)
|
61
|
+
# autocompile(false)
|
62
|
+
#
|
63
|
+
# Set the autocompile flag true or false. The default is *true*.
|
64
|
+
# If this flag is turned on every production is automatically evaulated and converted into code.
|
65
|
+
# Otherwise you can compile it using AbstractParser::ClassMethods#compile.
|
66
|
+
def autocompile(autocompile=nil)
|
67
|
+
if not autocompile.nil?
|
68
|
+
@autocompile = autocompile
|
69
|
+
else
|
70
|
+
@autocompile = true if @autocompile.nil?
|
71
|
+
end
|
72
|
+
init_compile if @autocompile
|
73
|
+
@autocompile
|
74
|
+
end
|
75
|
+
|
76
|
+
# Add a production to the parser, the block must accept an argument which is
|
77
|
+
# a new ProductionSpecification.
|
78
|
+
# The ProductionSpecficiation name is set to the first parameter and its mode to the second if exists.
|
79
|
+
# A ProductionSpecification is compiled to a method named +parse_name+
|
80
|
+
def production(name,mode=nil) # :yields: production_specification
|
81
|
+
productions[name.to_sym] ||= LLIP::ProductionSpecification.new(name.to_sym)
|
82
|
+
productions[name.to_sym].mode = mode if mode
|
83
|
+
yield productions[name.to_sym]
|
84
|
+
compile_production(productions[name.to_sym]) if autocompile
|
85
|
+
name
|
86
|
+
end
|
87
|
+
|
88
|
+
# Return an hash containing all the specified productions
|
89
|
+
def productions
|
90
|
+
@productions ||= {}
|
91
|
+
end
|
92
|
+
|
93
|
+
# Return/set the scope, which is the first production to be called.
|
94
|
+
# The scope is mandatory to generate the parse method.
|
95
|
+
def scope(name=nil)
|
96
|
+
if name
|
97
|
+
raise ArgumentError.new("The scope must be a not empty string") if name == ""
|
98
|
+
@scope = name
|
99
|
+
compile_scope if autocompile
|
100
|
+
end
|
101
|
+
@scope
|
102
|
+
end
|
103
|
+
|
104
|
+
# Compile all the productions and sets the code attribute correctly.
|
105
|
+
def compile
|
106
|
+
|
107
|
+
init_compile
|
108
|
+
|
109
|
+
#first check the scope
|
110
|
+
if @scope.nil? or not @productions.has_key? @scope.to_sym
|
111
|
+
raise "You must give a legal scope"
|
112
|
+
end
|
113
|
+
|
114
|
+
compile_scope
|
115
|
+
|
116
|
+
#compile and eval all the productions
|
117
|
+
@productions.values.each { |prod| compile_production(prod) }
|
118
|
+
|
119
|
+
class_eval(@code)
|
120
|
+
@compiled = true
|
121
|
+
end
|
122
|
+
|
123
|
+
# Returns a boolean which specify if the parser has been compiled
|
124
|
+
def compiled
|
125
|
+
@compiled ||= false
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
def compile_scope
|
130
|
+
scope_code = <<-CODE
|
131
|
+
def parse(scanner)
|
132
|
+
@scanner = scanner
|
133
|
+
@scanner.next
|
134
|
+
result = parse_#{@scope}
|
135
|
+
raise "The parsing terminating without processing all tokens, the exceeding token is '\#{@scanner.current}'" unless @scanner.current.nil?
|
136
|
+
result
|
137
|
+
end
|
138
|
+
CODE
|
139
|
+
|
140
|
+
class_eval(scope_code) if autocompile
|
141
|
+
@code << scope_code
|
142
|
+
end
|
143
|
+
|
144
|
+
def compile_production(prod)
|
145
|
+
if prod.mode == :single
|
146
|
+
compiler = @single_compiler
|
147
|
+
elsif prod.mode == :recursive
|
148
|
+
compiler = @recursive_compiler
|
149
|
+
else
|
150
|
+
raise "Unknow compile mode(#{prod.mode})for production #{prod.name}"
|
151
|
+
end
|
152
|
+
|
153
|
+
compiler.compile(prod)
|
154
|
+
@code << "\n\n"
|
155
|
+
@code << compiler.code
|
156
|
+
|
157
|
+
class_eval(compiler.code) if autocompile
|
158
|
+
compiler.reset
|
159
|
+
end
|
160
|
+
|
161
|
+
def init_compile
|
162
|
+
unless @code
|
163
|
+
@code = ""
|
164
|
+
@single_compiler = LLIP::ProductionCompiler.new
|
165
|
+
@recursive_compiler = LLIP::RecursiveProductionCompiler.new
|
166
|
+
end
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
@@ -0,0 +1,83 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/token'
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
module LLIP
|
5
|
+
|
6
|
+
#It's the base class of all the scanners. It handles common issues like read a multibyte char or setting up correctly an IO to read from.
|
7
|
+
#This class is +Abstract+ because it lacks the +next+ method, which a subclass must add.
|
8
|
+
class AbstractScanner
|
9
|
+
|
10
|
+
# It contains the data scanned by the scanner. It can be nil or an IO.
|
11
|
+
# To set it, use scan.
|
12
|
+
attr_reader :source
|
13
|
+
|
14
|
+
# It contains the last token generated by the scanner
|
15
|
+
attr_reader :current
|
16
|
+
|
17
|
+
# It contains the current line of the input file.
|
18
|
+
# It's default is -1, and it's incremented every $-0 the AbstractScanner reads.
|
19
|
+
attr_reader :current_line
|
20
|
+
|
21
|
+
# It contains the current char of the input file, it's automatically incremented for every read char.
|
22
|
+
# It's default is -1.
|
23
|
+
# For every $-0 read, it's re-initialized to -1.
|
24
|
+
attr_reader :current_char
|
25
|
+
|
26
|
+
def initialize(source=nil)
|
27
|
+
@current_line = -1
|
28
|
+
@current_char = -1
|
29
|
+
scan(source) if source
|
30
|
+
@current = Token.new
|
31
|
+
end
|
32
|
+
|
33
|
+
# :call-seq:
|
34
|
+
# scan(IO)
|
35
|
+
# scan(String)
|
36
|
+
#
|
37
|
+
# It initializes the scanner to scan an IO. If a String is given, it is automatically converted into a StringIO.
|
38
|
+
# It memorize the IO in the source attribute.
|
39
|
+
def scan(source)
|
40
|
+
@source = source
|
41
|
+
@source = StringIO.new(source) unless @source.respond_to? :readchar and @source.respond_to? :eof?
|
42
|
+
@current_line += 1
|
43
|
+
@current_char = -1
|
44
|
+
read_next
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
# This method is abstract and it must be implemented by a subclass.
|
49
|
+
# This method must read the chars to produce a Token, set it to current and return it. If a char doesn't match the first char of a token, this method should raise LLIPError.
|
50
|
+
# If a char matches the first char of a token, but the next ones don't, this method should raise an UnvalidTokenError.
|
51
|
+
def next
|
52
|
+
raise NotImplementedError.new
|
53
|
+
end
|
54
|
+
|
55
|
+
protected
|
56
|
+
|
57
|
+
# It's used to recognize if a char is multibyte
|
58
|
+
UTF8_MB_PATTERN = /[\xc0-\xdf]$|[\xe0-\xef]$|[\xe0-\xef][\x80-\xbf]$/ #it's a little hack
|
59
|
+
|
60
|
+
# It reads one char from the source and memorize it in the internal variabile @next_char. It handles multibyte chars correctly.
|
61
|
+
# If EOF is reached, it will return nil.
|
62
|
+
def read_next
|
63
|
+
unless @source.eof?
|
64
|
+
@next_char = @source.readchar.chr
|
65
|
+
while @next_char =~ UTF8_MB_PATTERN
|
66
|
+
@next_char << @source.readchar.chr
|
67
|
+
end
|
68
|
+
|
69
|
+
if @next_char == $-0
|
70
|
+
@current_line += 1
|
71
|
+
@current_char = -1
|
72
|
+
else
|
73
|
+
@current_char += 1
|
74
|
+
end
|
75
|
+
|
76
|
+
@next_char
|
77
|
+
else
|
78
|
+
@next_char = nil
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
data/lib/llip/buffer.rb
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
|
2
|
+
module LLIP
|
3
|
+
class Buffer
|
4
|
+
|
5
|
+
attr_accessor :scanner
|
6
|
+
attr_reader :current
|
7
|
+
|
8
|
+
def initialize(scanner)
|
9
|
+
@scanner = scanner
|
10
|
+
@current = nil
|
11
|
+
@buffer = nil
|
12
|
+
end
|
13
|
+
|
14
|
+
def scan(text)
|
15
|
+
@scanner.scan(text)
|
16
|
+
self
|
17
|
+
end
|
18
|
+
|
19
|
+
def next
|
20
|
+
return @current = @scanner.next unless @buffer
|
21
|
+
|
22
|
+
@current = @buffer.shift
|
23
|
+
@buffer = nil if @buffer.size == 0
|
24
|
+
@current
|
25
|
+
end
|
26
|
+
|
27
|
+
def lookahead(n)
|
28
|
+
@buffer ||= []
|
29
|
+
while @buffer.size < n
|
30
|
+
@buffer << @scanner.next
|
31
|
+
end
|
32
|
+
@buffer[n-1]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
|
2
|
+
module LLIP
|
3
|
+
|
4
|
+
# It's the base Exception for all the exception of LLIP.
|
5
|
+
# It adds a header to all the messages with the line and the char of the token
|
6
|
+
# that caused the exception.
|
7
|
+
#
|
8
|
+
# To subclass it for a class-specific message, pass it to the constructor or
|
9
|
+
# override the :message method.
|
10
|
+
#
|
11
|
+
class LLIPError < StandardError
|
12
|
+
|
13
|
+
# The token that caused the exception
|
14
|
+
attr_reader :token
|
15
|
+
|
16
|
+
def initialize(token,msg=nil)
|
17
|
+
super msg
|
18
|
+
@token = token
|
19
|
+
end
|
20
|
+
|
21
|
+
alias :internal_message :to_s
|
22
|
+
|
23
|
+
def to_s
|
24
|
+
"At line #{token.line} char #{token.char} a #{self.class.name} occurred: #{internal_message}"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
class UnvalidTokenError < LLIPError
|
29
|
+
|
30
|
+
def initialize(token)
|
31
|
+
super token, "the current token '#{token.value}' doesn't match with the regular expression #{token.name}."
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
class ParserError < LLIPError
|
36
|
+
end
|
37
|
+
|
38
|
+
class NotAllowedTokenError < ParserError
|
39
|
+
def initialize(token,production)
|
40
|
+
super token, "the token '#{token.value}' matched by the regexp '#{token.name}' isn't allowed in production #{production}."
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/llip/parser.rb
ADDED
@@ -0,0 +1,93 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/abstract_parser'
|
2
|
+
require File.dirname(__FILE__) + '/regexp_abstract_scanner'
|
3
|
+
require File.dirname(__FILE__) + '/regexp_parser'
|
4
|
+
require File.dirname(__FILE__) + '/regexp_scanner'
|
5
|
+
require File.dirname(__FILE__) + '/buffer'
|
6
|
+
require 'forwardable'
|
7
|
+
|
8
|
+
module LLIP
|
9
|
+
|
10
|
+
# It's a +facade+ of the LLIP library.
|
11
|
+
#
|
12
|
+
# To use it subclass it and then use the methods: production, scope and token to build the parser and its scanner.
|
13
|
+
class Parser
|
14
|
+
|
15
|
+
def self.inherited(other)
|
16
|
+
other.extend(ClassMethods)
|
17
|
+
other.send(:init_parser)
|
18
|
+
end
|
19
|
+
|
20
|
+
# The parser of the Parser subclass. It's created from the class returned from LLIP::Parser::ClassMethods.parser.
|
21
|
+
attr_reader :parser
|
22
|
+
|
23
|
+
# The scanner of the Parser subclass. It's created from the class returned from LLIP::Parser::ClassMethods.scanner.
|
24
|
+
attr_reader :scanner
|
25
|
+
|
26
|
+
def initialize
|
27
|
+
@parser = self.class.parser.new
|
28
|
+
@scanner = self.class.scanner.new
|
29
|
+
@scanner = Buffer.new(@scanner) if self.class.lookahead
|
30
|
+
end
|
31
|
+
|
32
|
+
# Parse the source using the parser and the scanner.
|
33
|
+
#
|
34
|
+
# See AbstractScanner#scan to know what is a valid source.
|
35
|
+
def parse(source)
|
36
|
+
@parser.parse(@scanner.scan(source))
|
37
|
+
end
|
38
|
+
|
39
|
+
module ClassMethods
|
40
|
+
|
41
|
+
# A class descending from AbstractParser which will contain all the productions.
|
42
|
+
# The messages :production and :scope are redirected to it.
|
43
|
+
# See AbstractParser::ClassMethods#production, AbstractParser::ClassMethods#scope and ProductionSpecification.
|
44
|
+
attr_reader :parser
|
45
|
+
|
46
|
+
# A class desceding from RegexpAbstractScanner which will contain all the token definitions.
|
47
|
+
# To add it in a simple way use token.
|
48
|
+
attr_reader :scanner
|
49
|
+
|
50
|
+
# It's a RegexpParser
|
51
|
+
attr_reader :regexp_parser
|
52
|
+
|
53
|
+
# It's a RegexpScanner
|
54
|
+
attr_reader :regexp_scanner
|
55
|
+
|
56
|
+
extend Forwardable
|
57
|
+
|
58
|
+
def_delegators :@parser, :production, :scope
|
59
|
+
|
60
|
+
# It use _regexp_parser_ and _regexp_scanner_ to compile a correct regular expression string in a RegexpSpecification.
|
61
|
+
# A correct regular expression string must follow the grammar specified in RegexpParser.
|
62
|
+
#
|
63
|
+
# The first argument is the name with which all the Token derived by this regular expression will be marked. It must be a symbol.
|
64
|
+
def token(name,string)
|
65
|
+
regexp = @regexp_parser.parse(@regexp_scanner.scan(string))
|
66
|
+
regexp.name = name
|
67
|
+
@scanner.add_regexp(regexp)
|
68
|
+
self
|
69
|
+
end
|
70
|
+
|
71
|
+
# :call-seq:
|
72
|
+
# lookahead
|
73
|
+
# lookahead(true)
|
74
|
+
#
|
75
|
+
# It allows to set the lookahead behaviour. If the lookahead is set to true, a Buffer will be used during parsing.
|
76
|
+
def lookahead(lookahead = nil)
|
77
|
+
@lookahead = lookahead unless lookahead.nil?
|
78
|
+
@lookahead
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
def init_parser # :nodoc:
|
83
|
+
@parser = Class.new(AbstractParser)
|
84
|
+
@scanner = Class.new(RegexpAbstractScanner)
|
85
|
+
|
86
|
+
@regexp_scanner = RegexpScanner.new
|
87
|
+
@regexp_parser = RegexpParser.new
|
88
|
+
|
89
|
+
@lookahead = false
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|