llip 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/History.txt +4 -0
  2. data/MIT-LICENSE +21 -0
  3. data/Manifest.txt +45 -0
  4. data/README.txt +148 -0
  5. data/Rakefile +66 -0
  6. data/examples/ariteval/ariteval.rb +132 -0
  7. data/examples/ariteval/evaluator.rb +61 -0
  8. data/examples/ariteval/exp.rb +104 -0
  9. data/lib/llip.rb +6 -0
  10. data/lib/llip/abstract_parser.rb +170 -0
  11. data/lib/llip/abstract_scanner.rb +83 -0
  12. data/lib/llip/buffer.rb +35 -0
  13. data/lib/llip/llip_error.rb +43 -0
  14. data/lib/llip/parser.rb +93 -0
  15. data/lib/llip/production_compiler.rb +168 -0
  16. data/lib/llip/production_specification.rb +79 -0
  17. data/lib/llip/recursive_production_compiler.rb +35 -0
  18. data/lib/llip/regexp_abstract_scanner.rb +116 -0
  19. data/lib/llip/regexp_parser.rb +197 -0
  20. data/lib/llip/regexp_scanner.rb +33 -0
  21. data/lib/llip/regexp_specification.rb +210 -0
  22. data/lib/llip/token.rb +47 -0
  23. data/lib/llip/visitable.rb +37 -0
  24. data/spec/ariteval/ariteval_spec.rb +111 -0
  25. data/spec/ariteval/evaluator_spec.rb +106 -0
  26. data/spec/ariteval/exp_spec.rb +232 -0
  27. data/spec/llip/abstract_parser_spec.rb +273 -0
  28. data/spec/llip/abstract_scanner_spec.rb +152 -0
  29. data/spec/llip/buffer_spec.rb +60 -0
  30. data/spec/llip/llip_error_spec.rb +77 -0
  31. data/spec/llip/parser_spec.rb +163 -0
  32. data/spec/llip/production_compiler_spec.rb +271 -0
  33. data/spec/llip/production_specification_spec.rb +75 -0
  34. data/spec/llip/recursive_production_compiler_spec.rb +86 -0
  35. data/spec/llip/regexp_abstract_scanner_spec.rb +320 -0
  36. data/spec/llip/regexp_parser_spec.rb +265 -0
  37. data/spec/llip/regexp_scanner_spec.rb +40 -0
  38. data/spec/llip/regexp_specification_spec.rb +734 -0
  39. data/spec/llip/token_spec.rb +70 -0
  40. data/spec/llip/visitable_spec.rb +38 -0
  41. data/spec/spec_helper.rb +10 -0
  42. metadata +110 -0
@@ -0,0 +1,104 @@
1
+ require 'visitable'
2
+
3
+ class NumExp
4
+
5
+ include Visitable
6
+
7
+ attr_reader :value
8
+
9
+ def initialize(value)
10
+ @value = value
11
+ end
12
+
13
+ def to_s
14
+ @value.to_s
15
+ end
16
+ end
17
+
18
+ class IdentExp
19
+ include Visitable
20
+
21
+ attr_reader :value
22
+
23
+ def initialize(value)
24
+ @value = value
25
+ end
26
+
27
+ def to_s
28
+ @value.to_s
29
+ end
30
+ end
31
+
32
+ class AssignIdentExp
33
+
34
+ include Visitable
35
+
36
+ attr_reader :name
37
+ attr_reader :value
38
+
39
+ def initialize(name,value)
40
+ @name = name
41
+ @value = value
42
+ end
43
+
44
+ def to_s
45
+ "( #{@name} = #{@value} )"
46
+ end
47
+
48
+ end
49
+
50
+ class OpExp
51
+
52
+ include Visitable
53
+
54
+ attr_reader :op
55
+ attr_reader :left
56
+ attr_reader :right
57
+
58
+ def initialize(left,right)
59
+ @left = left
60
+ @right = right
61
+ end
62
+
63
+ def to_s
64
+ "( #{left.to_s} #{op} #{right.to_s} )"
65
+ end
66
+
67
+ def ==(other)
68
+ return false if other.class != self.class
69
+
70
+ left == other.left and right == other.right
71
+ end
72
+ end
73
+
74
+ class PlusExp < OpExp
75
+ def initialize(left,right)
76
+ super
77
+ @op = "+"
78
+ end
79
+ end
80
+
81
+ class MinusExp < OpExp
82
+ def initialize(left,right)
83
+ super
84
+ @op = "-"
85
+ end
86
+ end
87
+
88
+ class MulExp < OpExp
89
+ def initialize(left,right)
90
+ super
91
+ @op = "*"
92
+ end
93
+ end
94
+
95
+ class DivExp < OpExp
96
+ def initialize(left,right)
97
+ super
98
+ @op = "/"
99
+ end
100
+ end
101
+
102
+
103
+
104
+
@@ -0,0 +1,6 @@
1
+ # :include: README.txt
2
+ module LLIP
3
+ VERSION = "0.1.0"
4
+ end
5
+
6
+ require File.dirname(__FILE__) + '/llip/parser'
@@ -0,0 +1,170 @@
1
+ require File.dirname(__FILE__) + '/production_specification'
2
+ require File.dirname(__FILE__) + '/production_compiler'
3
+ require File.dirname(__FILE__) + '/recursive_production_compiler'
4
+ require File.dirname(__FILE__) + '/llip_error'
5
+
6
+ module LLIP
7
+
8
+ # This class hide all the complexity of generating an building a parser.
9
+ # Ater subclassing it, it's possible to use all the methods defined in
10
+ # AbstractParser::ClassMethods to specify the productions.
11
+ class AbstractParser
12
+
13
+ def self.inherited(other)
14
+ other.extend(ClassMethods)
15
+ end
16
+
17
+ def initialize
18
+ @hash = {}
19
+ end
20
+
21
+ def productions
22
+ self.class.productions
23
+ end
24
+
25
+ # Parse the token generated from the scanner until it reaches the end.
26
+ # See AbstractScanner to know how to develop a scanner.
27
+ def parse(scanner)
28
+ raise "This method hasn't been compiled yet."
29
+ end
30
+
31
+ def [](key)
32
+ @hash[key]
33
+ end
34
+
35
+ def []=(key,value)
36
+ @hash[key] = value
37
+ end
38
+
39
+ # It raises a ParserError instead of a RuntimeError if no exception is given.
40
+ #
41
+ # It's public so it's important to call it from the production definitions, to have the exception set to ParserError.
42
+ def raise(*args)
43
+ if args.first.respond_to? :exception or not @scanner.respond_to? :current or @scanner.current == nil
44
+ super(*args)
45
+ else
46
+ error = ParserError.new(@scanner.current,args.shift)
47
+ backtrace = args.shift
48
+ backtrace ||= caller(1)
49
+ error.set_backtrace(backtrace)
50
+ super error
51
+ end
52
+ end
53
+
54
+ module ClassMethods
55
+
56
+ # Contains the evaluated code, it's useful for debugging.
57
+ attr_reader :code
58
+
59
+ # :call-seq:
60
+ # autocompile(true)
61
+ # autocompile(false)
62
+ #
63
+ # Set the autocompile flag true or false. The default is *true*.
64
+ # If this flag is turned on every production is automatically evaulated and converted into code.
65
+ # Otherwise you can compile it using AbstractParser::ClassMethods#compile.
66
+ def autocompile(autocompile=nil)
67
+ if not autocompile.nil?
68
+ @autocompile = autocompile
69
+ else
70
+ @autocompile = true if @autocompile.nil?
71
+ end
72
+ init_compile if @autocompile
73
+ @autocompile
74
+ end
75
+
76
+ # Add a production to the parser, the block must accept an argument which is
77
+ # a new ProductionSpecification.
78
+ # The ProductionSpecficiation name is set to the first parameter and its mode to the second if exists.
79
+ # A ProductionSpecification is compiled to a method named +parse_name+
80
+ def production(name,mode=nil) # :yields: production_specification
81
+ productions[name.to_sym] ||= LLIP::ProductionSpecification.new(name.to_sym)
82
+ productions[name.to_sym].mode = mode if mode
83
+ yield productions[name.to_sym]
84
+ compile_production(productions[name.to_sym]) if autocompile
85
+ name
86
+ end
87
+
88
+ # Return an hash containing all the specified productions
89
+ def productions
90
+ @productions ||= {}
91
+ end
92
+
93
+ # Return/set the scope, which is the first production to be called.
94
+ # The scope is mandatory to generate the parse method.
95
+ def scope(name=nil)
96
+ if name
97
+ raise ArgumentError.new("The scope must be a not empty string") if name == ""
98
+ @scope = name
99
+ compile_scope if autocompile
100
+ end
101
+ @scope
102
+ end
103
+
104
+ # Compile all the productions and sets the code attribute correctly.
105
+ def compile
106
+
107
+ init_compile
108
+
109
+ #first check the scope
110
+ if @scope.nil? or not @productions.has_key? @scope.to_sym
111
+ raise "You must give a legal scope"
112
+ end
113
+
114
+ compile_scope
115
+
116
+ #compile and eval all the productions
117
+ @productions.values.each { |prod| compile_production(prod) }
118
+
119
+ class_eval(@code)
120
+ @compiled = true
121
+ end
122
+
123
+ # Returns a boolean which specify if the parser has been compiled
124
+ def compiled
125
+ @compiled ||= false
126
+ end
127
+
128
+ private
129
+ def compile_scope
130
+ scope_code = <<-CODE
131
+ def parse(scanner)
132
+ @scanner = scanner
133
+ @scanner.next
134
+ result = parse_#{@scope}
135
+ raise "The parsing terminating without processing all tokens, the exceeding token is '\#{@scanner.current}'" unless @scanner.current.nil?
136
+ result
137
+ end
138
+ CODE
139
+
140
+ class_eval(scope_code) if autocompile
141
+ @code << scope_code
142
+ end
143
+
144
+ def compile_production(prod)
145
+ if prod.mode == :single
146
+ compiler = @single_compiler
147
+ elsif prod.mode == :recursive
148
+ compiler = @recursive_compiler
149
+ else
150
+ raise "Unknow compile mode(#{prod.mode})for production #{prod.name}"
151
+ end
152
+
153
+ compiler.compile(prod)
154
+ @code << "\n\n"
155
+ @code << compiler.code
156
+
157
+ class_eval(compiler.code) if autocompile
158
+ compiler.reset
159
+ end
160
+
161
+ def init_compile
162
+ unless @code
163
+ @code = ""
164
+ @single_compiler = LLIP::ProductionCompiler.new
165
+ @recursive_compiler = LLIP::RecursiveProductionCompiler.new
166
+ end
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,83 @@
1
+ require File.dirname(__FILE__) + '/token'
2
+ require 'stringio'
3
+
4
+ module LLIP
5
+
6
+ #It's the base class of all the scanners. It handles common issues like read a multibyte char or setting up correctly an IO to read from.
7
+ #This class is +Abstract+ because it lacks the +next+ method, which a subclass must add.
8
+ class AbstractScanner
9
+
10
+ # It contains the data scanned by the scanner. It can be nil or an IO.
11
+ # To set it, use scan.
12
+ attr_reader :source
13
+
14
+ # It contains the last token generated by the scanner
15
+ attr_reader :current
16
+
17
+ # It contains the current line of the input file.
18
+ # It's default is -1, and it's incremented every $-0 the AbstractScanner reads.
19
+ attr_reader :current_line
20
+
21
+ # It contains the current char of the input file, it's automatically incremented for every read char.
22
+ # It's default is -1.
23
+ # For every $-0 read, it's re-initialized to -1.
24
+ attr_reader :current_char
25
+
26
+ def initialize(source=nil)
27
+ @current_line = -1
28
+ @current_char = -1
29
+ scan(source) if source
30
+ @current = Token.new
31
+ end
32
+
33
+ # :call-seq:
34
+ # scan(IO)
35
+ # scan(String)
36
+ #
37
+ # It initializes the scanner to scan an IO. If a String is given, it is automatically converted into a StringIO.
38
+ # It memorize the IO in the source attribute.
39
+ def scan(source)
40
+ @source = source
41
+ @source = StringIO.new(source) unless @source.respond_to? :readchar and @source.respond_to? :eof?
42
+ @current_line += 1
43
+ @current_char = -1
44
+ read_next
45
+ self
46
+ end
47
+
48
+ # This method is abstract and it must be implemented by a subclass.
49
+ # This method must read the chars to produce a Token, set it to current and return it. If a char doesn't match the first char of a token, this method should raise LLIPError.
50
+ # If a char matches the first char of a token, but the next ones don't, this method should raise an UnvalidTokenError.
51
+ def next
52
+ raise NotImplementedError.new
53
+ end
54
+
55
+ protected
56
+
57
+ # It's used to recognize if a char is multibyte
58
+ UTF8_MB_PATTERN = /[\xc0-\xdf]$|[\xe0-\xef]$|[\xe0-\xef][\x80-\xbf]$/ #it's a little hack
59
+
60
+ # It reads one char from the source and memorize it in the internal variabile @next_char. It handles multibyte chars correctly.
61
+ # If EOF is reached, it will return nil.
62
+ def read_next
63
+ unless @source.eof?
64
+ @next_char = @source.readchar.chr
65
+ while @next_char =~ UTF8_MB_PATTERN
66
+ @next_char << @source.readchar.chr
67
+ end
68
+
69
+ if @next_char == $-0
70
+ @current_line += 1
71
+ @current_char = -1
72
+ else
73
+ @current_char += 1
74
+ end
75
+
76
+ @next_char
77
+ else
78
+ @next_char = nil
79
+ end
80
+ end
81
+
82
+ end
83
+ end
@@ -0,0 +1,35 @@
1
+
2
+ module LLIP
3
+ class Buffer
4
+
5
+ attr_accessor :scanner
6
+ attr_reader :current
7
+
8
+ def initialize(scanner)
9
+ @scanner = scanner
10
+ @current = nil
11
+ @buffer = nil
12
+ end
13
+
14
+ def scan(text)
15
+ @scanner.scan(text)
16
+ self
17
+ end
18
+
19
+ def next
20
+ return @current = @scanner.next unless @buffer
21
+
22
+ @current = @buffer.shift
23
+ @buffer = nil if @buffer.size == 0
24
+ @current
25
+ end
26
+
27
+ def lookahead(n)
28
+ @buffer ||= []
29
+ while @buffer.size < n
30
+ @buffer << @scanner.next
31
+ end
32
+ @buffer[n-1]
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,43 @@
1
+
2
+ module LLIP
3
+
4
+ # It's the base Exception for all the exception of LLIP.
5
+ # It adds a header to all the messages with the line and the char of the token
6
+ # that caused the exception.
7
+ #
8
+ # To subclass it for a class-specific message, pass it to the constructor or
9
+ # override the :message method.
10
+ #
11
+ class LLIPError < StandardError
12
+
13
+ # The token that caused the exception
14
+ attr_reader :token
15
+
16
+ def initialize(token,msg=nil)
17
+ super msg
18
+ @token = token
19
+ end
20
+
21
+ alias :internal_message :to_s
22
+
23
+ def to_s
24
+ "At line #{token.line} char #{token.char} a #{self.class.name} occurred: #{internal_message}"
25
+ end
26
+ end
27
+
28
+ class UnvalidTokenError < LLIPError
29
+
30
+ def initialize(token)
31
+ super token, "the current token '#{token.value}' doesn't match with the regular expression #{token.name}."
32
+ end
33
+ end
34
+
35
+ class ParserError < LLIPError
36
+ end
37
+
38
+ class NotAllowedTokenError < ParserError
39
+ def initialize(token,production)
40
+ super token, "the token '#{token.value}' matched by the regexp '#{token.name}' isn't allowed in production #{production}."
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,93 @@
1
+ require File.dirname(__FILE__) + '/abstract_parser'
2
+ require File.dirname(__FILE__) + '/regexp_abstract_scanner'
3
+ require File.dirname(__FILE__) + '/regexp_parser'
4
+ require File.dirname(__FILE__) + '/regexp_scanner'
5
+ require File.dirname(__FILE__) + '/buffer'
6
+ require 'forwardable'
7
+
8
+ module LLIP
9
+
10
+ # It's a +facade+ of the LLIP library.
11
+ #
12
+ # To use it subclass it and then use the methods: production, scope and token to build the parser and its scanner.
13
+ class Parser
14
+
15
+ def self.inherited(other)
16
+ other.extend(ClassMethods)
17
+ other.send(:init_parser)
18
+ end
19
+
20
+ # The parser of the Parser subclass. It's created from the class returned from LLIP::Parser::ClassMethods.parser.
21
+ attr_reader :parser
22
+
23
+ # The scanner of the Parser subclass. It's created from the class returned from LLIP::Parser::ClassMethods.scanner.
24
+ attr_reader :scanner
25
+
26
+ def initialize
27
+ @parser = self.class.parser.new
28
+ @scanner = self.class.scanner.new
29
+ @scanner = Buffer.new(@scanner) if self.class.lookahead
30
+ end
31
+
32
+ # Parse the source using the parser and the scanner.
33
+ #
34
+ # See AbstractScanner#scan to know what is a valid source.
35
+ def parse(source)
36
+ @parser.parse(@scanner.scan(source))
37
+ end
38
+
39
+ module ClassMethods
40
+
41
+ # A class descending from AbstractParser which will contain all the productions.
42
+ # The messages :production and :scope are redirected to it.
43
+ # See AbstractParser::ClassMethods#production, AbstractParser::ClassMethods#scope and ProductionSpecification.
44
+ attr_reader :parser
45
+
46
+ # A class desceding from RegexpAbstractScanner which will contain all the token definitions.
47
+ # To add it in a simple way use token.
48
+ attr_reader :scanner
49
+
50
+ # It's a RegexpParser
51
+ attr_reader :regexp_parser
52
+
53
+ # It's a RegexpScanner
54
+ attr_reader :regexp_scanner
55
+
56
+ extend Forwardable
57
+
58
+ def_delegators :@parser, :production, :scope
59
+
60
+ # It use _regexp_parser_ and _regexp_scanner_ to compile a correct regular expression string in a RegexpSpecification.
61
+ # A correct regular expression string must follow the grammar specified in RegexpParser.
62
+ #
63
+ # The first argument is the name with which all the Token derived by this regular expression will be marked. It must be a symbol.
64
+ def token(name,string)
65
+ regexp = @regexp_parser.parse(@regexp_scanner.scan(string))
66
+ regexp.name = name
67
+ @scanner.add_regexp(regexp)
68
+ self
69
+ end
70
+
71
+ # :call-seq:
72
+ # lookahead
73
+ # lookahead(true)
74
+ #
75
+ # It allows to set the lookahead behaviour. If the lookahead is set to true, a Buffer will be used during parsing.
76
+ def lookahead(lookahead = nil)
77
+ @lookahead = lookahead unless lookahead.nil?
78
+ @lookahead
79
+ end
80
+
81
+ private
82
+ def init_parser # :nodoc:
83
+ @parser = Class.new(AbstractParser)
84
+ @scanner = Class.new(RegexpAbstractScanner)
85
+
86
+ @regexp_scanner = RegexpScanner.new
87
+ @regexp_parser = RegexpParser.new
88
+
89
+ @lookahead = false
90
+ end
91
+ end
92
+ end
93
+ end