llip 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/History.txt +4 -0
  2. data/MIT-LICENSE +21 -0
  3. data/Manifest.txt +45 -0
  4. data/README.txt +148 -0
  5. data/Rakefile +66 -0
  6. data/examples/ariteval/ariteval.rb +132 -0
  7. data/examples/ariteval/evaluator.rb +61 -0
  8. data/examples/ariteval/exp.rb +104 -0
  9. data/lib/llip.rb +6 -0
  10. data/lib/llip/abstract_parser.rb +170 -0
  11. data/lib/llip/abstract_scanner.rb +83 -0
  12. data/lib/llip/buffer.rb +35 -0
  13. data/lib/llip/llip_error.rb +43 -0
  14. data/lib/llip/parser.rb +93 -0
  15. data/lib/llip/production_compiler.rb +168 -0
  16. data/lib/llip/production_specification.rb +79 -0
  17. data/lib/llip/recursive_production_compiler.rb +35 -0
  18. data/lib/llip/regexp_abstract_scanner.rb +116 -0
  19. data/lib/llip/regexp_parser.rb +197 -0
  20. data/lib/llip/regexp_scanner.rb +33 -0
  21. data/lib/llip/regexp_specification.rb +210 -0
  22. data/lib/llip/token.rb +47 -0
  23. data/lib/llip/visitable.rb +37 -0
  24. data/spec/ariteval/ariteval_spec.rb +111 -0
  25. data/spec/ariteval/evaluator_spec.rb +106 -0
  26. data/spec/ariteval/exp_spec.rb +232 -0
  27. data/spec/llip/abstract_parser_spec.rb +273 -0
  28. data/spec/llip/abstract_scanner_spec.rb +152 -0
  29. data/spec/llip/buffer_spec.rb +60 -0
  30. data/spec/llip/llip_error_spec.rb +77 -0
  31. data/spec/llip/parser_spec.rb +163 -0
  32. data/spec/llip/production_compiler_spec.rb +271 -0
  33. data/spec/llip/production_specification_spec.rb +75 -0
  34. data/spec/llip/recursive_production_compiler_spec.rb +86 -0
  35. data/spec/llip/regexp_abstract_scanner_spec.rb +320 -0
  36. data/spec/llip/regexp_parser_spec.rb +265 -0
  37. data/spec/llip/regexp_scanner_spec.rb +40 -0
  38. data/spec/llip/regexp_specification_spec.rb +734 -0
  39. data/spec/llip/token_spec.rb +70 -0
  40. data/spec/llip/visitable_spec.rb +38 -0
  41. data/spec/spec_helper.rb +10 -0
  42. metadata +110 -0
@@ -0,0 +1,104 @@
1
+ require 'visitable'
2
+
3
+ class NumExp
4
+
5
+ include Visitable
6
+
7
+ attr_reader :value
8
+
9
+ def initialize(value)
10
+ @value = value
11
+ end
12
+
13
+ def to_s
14
+ @value.to_s
15
+ end
16
+ end
17
+
18
+ class IdentExp
19
+ include Visitable
20
+
21
+ attr_reader :value
22
+
23
+ def initialize(value)
24
+ @value = value
25
+ end
26
+
27
+ def to_s
28
+ @value.to_s
29
+ end
30
+ end
31
+
32
+ class AssignIdentExp
33
+
34
+ include Visitable
35
+
36
+ attr_reader :name
37
+ attr_reader :value
38
+
39
+ def initialize(name,value)
40
+ @name = name
41
+ @value = value
42
+ end
43
+
44
+ def to_s
45
+ "( #{@name} = #{@value} )"
46
+ end
47
+
48
+ end
49
+
50
+ class OpExp
51
+
52
+ include Visitable
53
+
54
+ attr_reader :op
55
+ attr_reader :left
56
+ attr_reader :right
57
+
58
+ def initialize(left,right)
59
+ @left = left
60
+ @right = right
61
+ end
62
+
63
+ def to_s
64
+ "( #{left.to_s} #{op} #{right.to_s} )"
65
+ end
66
+
67
+ def ==(other)
68
+ return false if other.class != self.class
69
+
70
+ left == other.left and right == other.right
71
+ end
72
+ end
73
+
74
+ class PlusExp < OpExp
75
+ def initialize(left,right)
76
+ super
77
+ @op = "+"
78
+ end
79
+ end
80
+
81
+ class MinusExp < OpExp
82
+ def initialize(left,right)
83
+ super
84
+ @op = "-"
85
+ end
86
+ end
87
+
88
+ class MulExp < OpExp
89
+ def initialize(left,right)
90
+ super
91
+ @op = "*"
92
+ end
93
+ end
94
+
95
+ class DivExp < OpExp
96
+ def initialize(left,right)
97
+ super
98
+ @op = "/"
99
+ end
100
+ end
101
+
102
+
103
+
104
+
@@ -0,0 +1,6 @@
1
+ # :include: README.txt
2
+ module LLIP
3
+ VERSION = "0.1.0"
4
+ end
5
+
6
+ require File.dirname(__FILE__) + '/llip/parser'
@@ -0,0 +1,170 @@
1
+ require File.dirname(__FILE__) + '/production_specification'
2
+ require File.dirname(__FILE__) + '/production_compiler'
3
+ require File.dirname(__FILE__) + '/recursive_production_compiler'
4
+ require File.dirname(__FILE__) + '/llip_error'
5
+
6
+ module LLIP
7
+
8
+ # This class hide all the complexity of generating an building a parser.
9
+ # Ater subclassing it, it's possible to use all the methods defined in
10
+ # AbstractParser::ClassMethods to specify the productions.
11
+ class AbstractParser
12
+
13
+ def self.inherited(other)
14
+ other.extend(ClassMethods)
15
+ end
16
+
17
+ def initialize
18
+ @hash = {}
19
+ end
20
+
21
+ def productions
22
+ self.class.productions
23
+ end
24
+
25
+ # Parse the token generated from the scanner until it reaches the end.
26
+ # See AbstractScanner to know how to develop a scanner.
27
+ def parse(scanner)
28
+ raise "This method hasn't been compiled yet."
29
+ end
30
+
31
+ def [](key)
32
+ @hash[key]
33
+ end
34
+
35
+ def []=(key,value)
36
+ @hash[key] = value
37
+ end
38
+
39
+ # It raises a ParserError instead of a RuntimeError if no exception is given.
40
+ #
41
+ # It's public so it's important to call it from the production definitions, to have the exception set to ParserError.
42
+ def raise(*args)
43
+ if args.first.respond_to? :exception or not @scanner.respond_to? :current or @scanner.current == nil
44
+ super(*args)
45
+ else
46
+ error = ParserError.new(@scanner.current,args.shift)
47
+ backtrace = args.shift
48
+ backtrace ||= caller(1)
49
+ error.set_backtrace(backtrace)
50
+ super error
51
+ end
52
+ end
53
+
54
+ module ClassMethods
55
+
56
+ # Contains the evaluated code, it's useful for debugging.
57
+ attr_reader :code
58
+
59
+ # :call-seq:
60
+ # autocompile(true)
61
+ # autocompile(false)
62
+ #
63
+ # Set the autocompile flag true or false. The default is *true*.
64
+ # If this flag is turned on every production is automatically evaulated and converted into code.
65
+ # Otherwise you can compile it using AbstractParser::ClassMethods#compile.
66
+ def autocompile(autocompile=nil)
67
+ if not autocompile.nil?
68
+ @autocompile = autocompile
69
+ else
70
+ @autocompile = true if @autocompile.nil?
71
+ end
72
+ init_compile if @autocompile
73
+ @autocompile
74
+ end
75
+
76
+ # Add a production to the parser, the block must accept an argument which is
77
+ # a new ProductionSpecification.
78
+ # The ProductionSpecficiation name is set to the first parameter and its mode to the second if exists.
79
+ # A ProductionSpecification is compiled to a method named +parse_name+
80
+ def production(name,mode=nil) # :yields: production_specification
81
+ productions[name.to_sym] ||= LLIP::ProductionSpecification.new(name.to_sym)
82
+ productions[name.to_sym].mode = mode if mode
83
+ yield productions[name.to_sym]
84
+ compile_production(productions[name.to_sym]) if autocompile
85
+ name
86
+ end
87
+
88
+ # Return an hash containing all the specified productions
89
+ def productions
90
+ @productions ||= {}
91
+ end
92
+
93
+ # Return/set the scope, which is the first production to be called.
94
+ # The scope is mandatory to generate the parse method.
95
+ def scope(name=nil)
96
+ if name
97
+ raise ArgumentError.new("The scope must be a not empty string") if name == ""
98
+ @scope = name
99
+ compile_scope if autocompile
100
+ end
101
+ @scope
102
+ end
103
+
104
+ # Compile all the productions and sets the code attribute correctly.
105
+ def compile
106
+
107
+ init_compile
108
+
109
+ #first check the scope
110
+ if @scope.nil? or not @productions.has_key? @scope.to_sym
111
+ raise "You must give a legal scope"
112
+ end
113
+
114
+ compile_scope
115
+
116
+ #compile and eval all the productions
117
+ @productions.values.each { |prod| compile_production(prod) }
118
+
119
+ class_eval(@code)
120
+ @compiled = true
121
+ end
122
+
123
+ # Returns a boolean which specify if the parser has been compiled
124
+ def compiled
125
+ @compiled ||= false
126
+ end
127
+
128
+ private
129
+ def compile_scope
130
+ scope_code = <<-CODE
131
+ def parse(scanner)
132
+ @scanner = scanner
133
+ @scanner.next
134
+ result = parse_#{@scope}
135
+ raise "The parsing terminating without processing all tokens, the exceeding token is '\#{@scanner.current}'" unless @scanner.current.nil?
136
+ result
137
+ end
138
+ CODE
139
+
140
+ class_eval(scope_code) if autocompile
141
+ @code << scope_code
142
+ end
143
+
144
+ def compile_production(prod)
145
+ if prod.mode == :single
146
+ compiler = @single_compiler
147
+ elsif prod.mode == :recursive
148
+ compiler = @recursive_compiler
149
+ else
150
+ raise "Unknow compile mode(#{prod.mode})for production #{prod.name}"
151
+ end
152
+
153
+ compiler.compile(prod)
154
+ @code << "\n\n"
155
+ @code << compiler.code
156
+
157
+ class_eval(compiler.code) if autocompile
158
+ compiler.reset
159
+ end
160
+
161
+ def init_compile
162
+ unless @code
163
+ @code = ""
164
+ @single_compiler = LLIP::ProductionCompiler.new
165
+ @recursive_compiler = LLIP::RecursiveProductionCompiler.new
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,83 @@
1
+ require File.dirname(__FILE__) + '/token'
2
+ require 'stringio'
3
+
4
+ module LLIP
5
+
6
+ #It's the base class of all the scanners. It handles common issues like read a multibyte char or setting up correctly an IO to read from.
7
+ #This class is +Abstract+ because it lacks the +next+ method, which a subclass must add.
8
+ class AbstractScanner
9
+
10
+ # It contains the data scanned by the scanner. It can be nil or an IO.
11
+ # To set it, use scan.
12
+ attr_reader :source
13
+
14
+ # It contains the last token generated by the scanner
15
+ attr_reader :current
16
+
17
+ # It contains the current line of the input file.
18
+ # It's default is -1, and it's incremented every $-0 the AbstractScanner reads.
19
+ attr_reader :current_line
20
+
21
+ # It contains the current char of the input file, it's automatically incremented for every read char.
22
+ # It's default is -1.
23
+ # For every $-0 read, it's re-initialized to -1.
24
+ attr_reader :current_char
25
+
26
+ def initialize(source=nil)
27
+ @current_line = -1
28
+ @current_char = -1
29
+ scan(source) if source
30
+ @current = Token.new
31
+ end
32
+
33
+ # :call-seq:
34
+ # scan(IO)
35
+ # scan(String)
36
+ #
37
+ # It initializes the scanner to scan an IO. If a String is given, it is automatically converted into a StringIO.
38
+ # It memorize the IO in the source attribute.
39
+ def scan(source)
40
+ @source = source
41
+ @source = StringIO.new(source) unless @source.respond_to? :readchar and @source.respond_to? :eof?
42
+ @current_line += 1
43
+ @current_char = -1
44
+ read_next
45
+ self
46
+ end
47
+
48
+ # This method is abstract and it must be implemented by a subclass.
49
+ # This method must read the chars to produce a Token, set it to current and return it. If a char doesn't match the first char of a token, this method should raise LLIPError.
50
+ # If a char matches the first char of a token, but the next ones don't, this method should raise an UnvalidTokenError.
51
+ def next
52
+ raise NotImplementedError.new
53
+ end
54
+
55
+ protected
56
+
57
+ # It's used to recognize if a char is multibyte
58
+ UTF8_MB_PATTERN = /[\xc0-\xdf]$|[\xe0-\xef]$|[\xe0-\xef][\x80-\xbf]$/ #it's a little hack
59
+
60
+ # It reads one char from the source and memorize it in the internal variabile @next_char. It handles multibyte chars correctly.
61
+ # If EOF is reached, it will return nil.
62
+ def read_next
63
+ unless @source.eof?
64
+ @next_char = @source.readchar.chr
65
+ while @next_char =~ UTF8_MB_PATTERN
66
+ @next_char << @source.readchar.chr
67
+ end
68
+
69
+ if @next_char == $-0
70
+ @current_line += 1
71
+ @current_char = -1
72
+ else
73
+ @current_char += 1
74
+ end
75
+
76
+ @next_char
77
+ else
78
+ @next_char = nil
79
+ end
80
+ end
81
+
82
+ end
83
+ end
@@ -0,0 +1,35 @@
1
+
2
+ module LLIP
3
+ class Buffer
4
+
5
+ attr_accessor :scanner
6
+ attr_reader :current
7
+
8
+ def initialize(scanner)
9
+ @scanner = scanner
10
+ @current = nil
11
+ @buffer = nil
12
+ end
13
+
14
+ def scan(text)
15
+ @scanner.scan(text)
16
+ self
17
+ end
18
+
19
+ def next
20
+ return @current = @scanner.next unless @buffer
21
+
22
+ @current = @buffer.shift
23
+ @buffer = nil if @buffer.size == 0
24
+ @current
25
+ end
26
+
27
+ def lookahead(n)
28
+ @buffer ||= []
29
+ while @buffer.size < n
30
+ @buffer << @scanner.next
31
+ end
32
+ @buffer[n-1]
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,43 @@
1
+
2
+ module LLIP
3
+
4
+ # It's the base Exception for all the exception of LLIP.
5
+ # It adds a header to all the messages with the line and the char of the token
6
+ # that caused the exception.
7
+ #
8
+ # To subclass it for a class-specific message, pass it to the constructor or
9
+ # override the :message method.
10
+ #
11
+ class LLIPError < StandardError
12
+
13
+ # The token that caused the exception
14
+ attr_reader :token
15
+
16
+ def initialize(token,msg=nil)
17
+ super msg
18
+ @token = token
19
+ end
20
+
21
+ alias :internal_message :to_s
22
+
23
+ def to_s
24
+ "At line #{token.line} char #{token.char} a #{self.class.name} occurred: #{internal_message}"
25
+ end
26
+ end
27
+
28
+ class UnvalidTokenError < LLIPError
29
+
30
+ def initialize(token)
31
+ super token, "the current token '#{token.value}' doesn't match with the regular expression #{token.name}."
32
+ end
33
+ end
34
+
35
+ class ParserError < LLIPError
36
+ end
37
+
38
+ class NotAllowedTokenError < ParserError
39
+ def initialize(token,production)
40
+ super token, "the token '#{token.value}' matched by the regexp '#{token.name}' isn't allowed in production #{production}."
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,93 @@
1
+ require File.dirname(__FILE__) + '/abstract_parser'
2
+ require File.dirname(__FILE__) + '/regexp_abstract_scanner'
3
+ require File.dirname(__FILE__) + '/regexp_parser'
4
+ require File.dirname(__FILE__) + '/regexp_scanner'
5
+ require File.dirname(__FILE__) + '/buffer'
6
+ require 'forwardable'
7
+
8
+ module LLIP
9
+
10
+ # It's a +facade+ of the LLIP library.
11
+ #
12
+ # To use it subclass it and then use the methods: production, scope and token to build the parser and its scanner.
13
+ class Parser
14
+
15
+ def self.inherited(other)
16
+ other.extend(ClassMethods)
17
+ other.send(:init_parser)
18
+ end
19
+
20
+ # The parser of the Parser subclass. It's created from the class returned from LLIP::Parser::ClassMethods.parser.
21
+ attr_reader :parser
22
+
23
+ # The scanner of the Parser subclass. It's created from the class returned from LLIP::Parser::ClassMethods.scanner.
24
+ attr_reader :scanner
25
+
26
+ def initialize
27
+ @parser = self.class.parser.new
28
+ @scanner = self.class.scanner.new
29
+ @scanner = Buffer.new(@scanner) if self.class.lookahead
30
+ end
31
+
32
+ # Parse the source using the parser and the scanner.
33
+ #
34
+ # See AbstractScanner#scan to know what is a valid source.
35
+ def parse(source)
36
+ @parser.parse(@scanner.scan(source))
37
+ end
38
+
39
+ module ClassMethods
40
+
41
+ # A class descending from AbstractParser which will contain all the productions.
42
+ # The messages :production and :scope are redirected to it.
43
+ # See AbstractParser::ClassMethods#production, AbstractParser::ClassMethods#scope and ProductionSpecification.
44
+ attr_reader :parser
45
+
46
+ # A class desceding from RegexpAbstractScanner which will contain all the token definitions.
47
+ # To add it in a simple way use token.
48
+ attr_reader :scanner
49
+
50
+ # It's a RegexpParser
51
+ attr_reader :regexp_parser
52
+
53
+ # It's a RegexpScanner
54
+ attr_reader :regexp_scanner
55
+
56
+ extend Forwardable
57
+
58
+ def_delegators :@parser, :production, :scope
59
+
60
+ # It use _regexp_parser_ and _regexp_scanner_ to compile a correct regular expression string in a RegexpSpecification.
61
+ # A correct regular expression string must follow the grammar specified in RegexpParser.
62
+ #
63
+ # The first argument is the name with which all the Token derived by this regular expression will be marked. It must be a symbol.
64
+ def token(name,string)
65
+ regexp = @regexp_parser.parse(@regexp_scanner.scan(string))
66
+ regexp.name = name
67
+ @scanner.add_regexp(regexp)
68
+ self
69
+ end
70
+
71
+ # :call-seq:
72
+ # lookahead
73
+ # lookahead(true)
74
+ #
75
+ # It allows to set the lookahead behaviour. If the lookahead is set to true, a Buffer will be used during parsing.
76
+ def lookahead(lookahead = nil)
77
+ @lookahead = lookahead unless lookahead.nil?
78
+ @lookahead
79
+ end
80
+
81
+ private
82
+ def init_parser # :nodoc:
83
+ @parser = Class.new(AbstractParser)
84
+ @scanner = Class.new(RegexpAbstractScanner)
85
+
86
+ @regexp_scanner = RegexpScanner.new
87
+ @regexp_parser = RegexpParser.new
88
+
89
+ @lookahead = false
90
+ end
91
+ end
92
+ end
93
+ end