llip 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (42) hide show
  1. data/History.txt +4 -0
  2. data/MIT-LICENSE +21 -0
  3. data/Manifest.txt +45 -0
  4. data/README.txt +148 -0
  5. data/Rakefile +66 -0
  6. data/examples/ariteval/ariteval.rb +132 -0
  7. data/examples/ariteval/evaluator.rb +61 -0
  8. data/examples/ariteval/exp.rb +104 -0
  9. data/lib/llip.rb +6 -0
  10. data/lib/llip/abstract_parser.rb +170 -0
  11. data/lib/llip/abstract_scanner.rb +83 -0
  12. data/lib/llip/buffer.rb +35 -0
  13. data/lib/llip/llip_error.rb +43 -0
  14. data/lib/llip/parser.rb +93 -0
  15. data/lib/llip/production_compiler.rb +168 -0
  16. data/lib/llip/production_specification.rb +79 -0
  17. data/lib/llip/recursive_production_compiler.rb +35 -0
  18. data/lib/llip/regexp_abstract_scanner.rb +116 -0
  19. data/lib/llip/regexp_parser.rb +197 -0
  20. data/lib/llip/regexp_scanner.rb +33 -0
  21. data/lib/llip/regexp_specification.rb +210 -0
  22. data/lib/llip/token.rb +47 -0
  23. data/lib/llip/visitable.rb +37 -0
  24. data/spec/ariteval/ariteval_spec.rb +111 -0
  25. data/spec/ariteval/evaluator_spec.rb +106 -0
  26. data/spec/ariteval/exp_spec.rb +232 -0
  27. data/spec/llip/abstract_parser_spec.rb +273 -0
  28. data/spec/llip/abstract_scanner_spec.rb +152 -0
  29. data/spec/llip/buffer_spec.rb +60 -0
  30. data/spec/llip/llip_error_spec.rb +77 -0
  31. data/spec/llip/parser_spec.rb +163 -0
  32. data/spec/llip/production_compiler_spec.rb +271 -0
  33. data/spec/llip/production_specification_spec.rb +75 -0
  34. data/spec/llip/recursive_production_compiler_spec.rb +86 -0
  35. data/spec/llip/regexp_abstract_scanner_spec.rb +320 -0
  36. data/spec/llip/regexp_parser_spec.rb +265 -0
  37. data/spec/llip/regexp_scanner_spec.rb +40 -0
  38. data/spec/llip/regexp_specification_spec.rb +734 -0
  39. data/spec/llip/token_spec.rb +70 -0
  40. data/spec/llip/visitable_spec.rb +38 -0
  41. data/spec/spec_helper.rb +10 -0
  42. metadata +110 -0
@@ -0,0 +1,168 @@
1
+
2
+ module LLIP
3
+
4
+ # It's the main class which handles the generation of the source code dinamically.
5
+ class ProductionCompiler
6
+
7
+ # It contains the produced
8
+ attr_reader :code
9
+
10
+ def initialize
11
+ reset
12
+ end
13
+
14
+ # It initializes the compiler for a new generation.
15
+ def start(name)
16
+ reset
17
+ @name_str = name
18
+ @name = str_to_sym(name)
19
+ @code << <<-CODE
20
+ def parse_#{name}
21
+ result = productions[#{@name}].default.call(@scanner,self)
22
+ CODE
23
+ self
24
+ end
25
+
26
+ # :call-seq:
27
+ # token(Array)
28
+ # token(Symbol)
29
+ # token(String)
30
+ #
31
+ # If the argument is a Symbol or a String, the produced code will match them through ==.
32
+ # It the argument is an Array, lookaheads will be used, so the scanner must support lookaheads (or use a Buffer which supports them).
33
+ def token(tokens)
34
+ lookaheads = ""
35
+ name = nil
36
+ token_identifier = nil
37
+
38
+ if tokens.kind_of? Array
39
+ tokens_names = tokens.map { |tk| build_token_name(tk) }
40
+ token_identifier = "["
41
+ tokens_names.each { |tk| token_identifier << tk + "," }
42
+ token_identifier[-1] = "]"
43
+
44
+ name = build_token_name(tokens[0])
45
+ counter = 0
46
+ tokens[1..-1].each do |token|
47
+ lookaheads << " and "
48
+ counter += 1
49
+ token = build_token_name(token)
50
+ lookaheads << "@scanner.lookahead(#{counter}) == #{token}"
51
+ end
52
+ else
53
+ name = build_token_name(tokens)
54
+ token_identifier = name
55
+ end
56
+
57
+ @code << <<-CODE
58
+ #{@else}if @scanner.current == #{name}#{lookaheads}
59
+ result = productions[#{@name}].tokens[#{token_identifier}].call(result,@scanner,self)
60
+ CODE
61
+ @else = "els"
62
+ self
63
+ end
64
+
65
+ # It closes the method definition
66
+ def end(raise_on_error=true)
67
+ build_else(raise_on_error) if @else != ""
68
+ build_end
69
+ end
70
+
71
+ # It resets the compiler
72
+ def reset
73
+ @code = ""
74
+ @name = nil
75
+ @else = ""
76
+ end
77
+
78
+ # It takes a ProductionSpecification and then call its compiling methods by itself. It takes care to order all the productions the right way.
79
+ def compile(production)
80
+ start(production.name)
81
+ sort_production(production).each { |tk| token(tk)}
82
+ self.end(production.raise_on_error)
83
+ end
84
+
85
+ def sort_production(production) # :nodoc:
86
+ tokens = production.tokens
87
+
88
+ lk_tk = []
89
+ not_lk_tk = []
90
+
91
+ tokens.keys.each do |tk|
92
+ if tk.kind_of? Array
93
+ lk_tk << tk
94
+ lk_tk << tk[0] if tokens.has_key? tk[0]
95
+ end
96
+ end
97
+
98
+ not_lk_tk = tokens.keys - lk_tk
99
+
100
+ lk_tk.uniq!
101
+ lk_tk.sort! do |a,b|
102
+ if a.kind_of? Array and b.kind_of? Array
103
+ if a.size > b.size
104
+ -1
105
+ else
106
+ 1
107
+ end
108
+ elsif a.kind_of? Array and not b.kind_of? Array
109
+ -1
110
+ else
111
+ 1
112
+ end
113
+ end
114
+
115
+ if not_lk_tk.include? :everything
116
+ ret_value = not_lk_tk + lk_tk
117
+ ret_value.delete(:everything)
118
+ ret_value << :everything
119
+ ret_value
120
+ else
121
+ not_lk_tk + lk_tk
122
+ end
123
+ end
124
+
125
+ protected
126
+ # :call-seq:
127
+ # str_to_sym(object) => ":#{object.to_s}"
128
+ #
129
+ def str_to_sym(string)
130
+ string = string.to_s
131
+ ":\"#{string}\""
132
+ end
133
+
134
+ # :call-seq:
135
+ # build_token_name(string) => "'#{string}'"
136
+ # build_token_name(symbol) => ":#{object.to_s}"
137
+ #
138
+ def build_token_name(string)
139
+ if string.kind_of? String
140
+ "'#{string.gsub("\\","\\\\\\")}'"
141
+ elsif string.kind_of? Symbol
142
+ str_to_sym(string)
143
+ end
144
+ end
145
+
146
+ # It builds the else clause in the method definition.
147
+ # It accepts a raise_on_error parameter to specify if it has to raise or not.
148
+ def build_else(raise_on_error=true)
149
+ if raise_on_error
150
+ @code << <<-CODE
151
+ else
152
+ raise NotAllowedTokenError.new(@scanner.current,#{@name})
153
+ CODE
154
+ end
155
+ @code << "\nend\n"
156
+ @else = ""
157
+ end
158
+
159
+ # It closes the method definition and sets the return value
160
+ def build_end
161
+ @code << <<-CODE
162
+ return result
163
+ end
164
+ CODE
165
+ end
166
+
167
+ end
168
+ end
@@ -0,0 +1,79 @@
1
+
2
+ module LLIP
3
+
4
+
5
+ # A ProductionSpecification contains all it's needed to transform it into live code.
6
+ # This transformation is done by ProductionCompiler or RecursiveProductionCompiler.
7
+ #
8
+ # The flow of the execution of a production is:
9
+ # 1. The default block is called and it's result is stored in a +result+ var.
10
+ # 2. The current token is matched against every key of the ProductionSpecification#tokens
11
+ # hash, and if this match is positive the associated block is executed.
12
+ # The result is stored inside the +result+ var.
13
+ # If nothing matches and the ProductionSpecification#mode is :single and ProductionSpecification#raise_on_error
14
+ # is true an exception must be raised. if nothing matches and the ProductionSpecification#mode is
15
+ # recursive the production must return the +result+ var.
16
+ # 3. If the ProductionSpecification#mode is :single, the production must return
17
+ # the +result+ var. If the ProductionSpecification#mode is :recursive, the step
18
+ # 2 is going to be executed until it recognizes a Token.
19
+ class ProductionSpecification
20
+
21
+ NIL_BLOCK = lambda { nil }
22
+
23
+ # The production name.
24
+ attr_reader :name
25
+
26
+ # It's an hash which has as keys the token to recognize and as value the block to be executed with it.
27
+ # They are specified through ProductionSpecification#token.
28
+ attr_reader :tokens
29
+
30
+ # The mode of the production. It can be :single or :recursive.
31
+ attr_accessor :mode
32
+
33
+ # This attribute specifies if the production should raise an exception if the current token hasn't been recognized.
34
+ # It's important only for :single productions.
35
+ attr_accessor :raise_on_error
36
+
37
+ def initialize(name)
38
+ @name = name
39
+ @tokens = {}
40
+ @mode = :single
41
+ @default = NIL_BLOCK
42
+ @raise_on_error = true
43
+ end
44
+
45
+ # :call-seq:
46
+ # token(*token_name) { |result, scanner, parser| ... }
47
+ #
48
+ # The block specified through this method will be executed when the token with the specified name is matched.
49
+ # If more than a name is given, the parser should automatically use lookahead and match all the tokens.
50
+ #
51
+ # This name is going to be matched for equality with a Token.
52
+ #
53
+ # The arguments of the block will be filled by:
54
+ # * The +result+ argument contains the result of a previous called block inside this production.
55
+ # * The +scanner+ is an instance of a class descending from AbstractScanner. It's the scanner used by the parser.
56
+ # It's important to call +next+ on this scanner to make it build the next token.
57
+ # * The +parser+ is an instance of a class descending from AbstractParser. It's the caller of the production.
58
+ # It's necessary to call other productions.
59
+ def token(*args,&block) # :yields: result,scanner,parser
60
+ args.flatten!
61
+ block = args.pop if args.last.respond_to? :call
62
+ args = args.first if args.size == 1
63
+ @tokens[args] = block || NIL_BLOCK
64
+ self
65
+ end
66
+
67
+ # :call-seq:
68
+ # default() { |scanner, parser| ... }
69
+ #
70
+ # The specified block is going to be executed before any token is recognized.
71
+ # The default is NIL_BLOCK.
72
+ def default(block=nil,&b)
73
+ block ||= b
74
+ @default = block if block
75
+ @default
76
+ end
77
+
78
+ end
79
+ end
@@ -0,0 +1,35 @@
1
+ require File.dirname(__FILE__) + '/production_compiler'
2
+
3
+ module LLIP
4
+
5
+ #It modifies ProductionCompiler to add support to a recursive behaviour.
6
+ class RecursiveProductionCompiler < ProductionCompiler
7
+
8
+ def start(name)
9
+ super
10
+ @code << <<-CODE
11
+ while not @scanner.current.nil?
12
+ CODE
13
+ end
14
+
15
+ protected
16
+ def build_else(raise_on_error=true)
17
+ if raise_on_error
18
+ @code << <<-CODE
19
+ else
20
+ break
21
+ CODE
22
+ end
23
+ @code << "\nend\n"
24
+ @else = ""
25
+ end
26
+
27
+ def build_end
28
+ @code << <<-CODE
29
+ end
30
+ CODE
31
+
32
+ super
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,116 @@
1
+ require File.dirname(__FILE__) + '/regexp_specification'
2
+ require File.dirname(__FILE__) + '/abstract_scanner'
3
+ require File.dirname(__FILE__) + '/llip_error'
4
+
5
+ module LLIP
6
+
7
+ # The RegexpAbstractScanner is the main abstract scanner of LLIP.
8
+ # To have a real scanner, just subclass it and add some regular expressions.
9
+ #
10
+ # See ClassMethods to know how.
11
+ class RegexpAbstractScanner < AbstractScanner
12
+
13
+ def self.inherited(other)
14
+ other.extend(ClassMethods)
15
+ end
16
+
17
+ def initialize(*args)
18
+ super
19
+ self.class.build unless self.class.built?
20
+ end
21
+
22
+ def next
23
+ return @current = Token.new(:nil,nil,@current_line,@current_char) unless @next_char
24
+
25
+ line = @current_line
26
+ char = @current_char
27
+
28
+ regexp = self.class.scanning_table[@next_char]
29
+ unless regexp
30
+ token = Token.new(:nil,@next_char,line,char)
31
+ raise LLIPError.new(token,"there isn't a regular expression which starts with #{@next_char}")
32
+ end
33
+
34
+ state = regexp.init
35
+ string = ""
36
+ while state[@next_char] != :error and @next_char
37
+ state = state[@next_char]
38
+ string << @next_char
39
+ read_next
40
+ end
41
+
42
+ token = Token.new(state.regexp.name,string,line,char)
43
+ if state.final?
44
+ @current = token
45
+ else
46
+ raise UnvalidTokenError.new(token)
47
+ end
48
+ end
49
+
50
+ module ClassMethods
51
+
52
+ # Its where all the regular expressions are stored. The keys are the starting_chars of the RegexpSpecification.
53
+ # While the table can be modified directly, it's reccomanded to use the add_regexp method.
54
+ def scanning_table
55
+ @scanning_table ||= Hash.new
56
+ end
57
+
58
+ # It allows to add a RegularExpression to the scanner and it makes sure that all the specified tokens don't collide.
59
+ #
60
+ # If a RegexpSpecification has starting_chars == :everything, it's set to the default value of the scanning_table.
61
+ def add_regexp(regexp)
62
+ starting_chars = regexp.starting_chars
63
+ if starting_chars.kind_of? Symbol
64
+ scanning_table.default = regexp
65
+ else
66
+ common_chars = starting_chars.select { |c| scanning_table.has_key? c }
67
+ starting_chars = starting_chars - common_chars
68
+ starting_chars.each { |c| scanning_table[c] = regexp }
69
+ colliding_states = common_chars.map { |c| scanning_table[c] }
70
+ colliding_states.uniq!
71
+ colliding_states.zip(common_chars).each { |r,c| scanning_table[c] = RegexpSpecification.mix(regexp,r) }
72
+ end
73
+
74
+ if @built
75
+ build
76
+ end
77
+
78
+ self
79
+ end
80
+
81
+ # It fix a problem with all the regexp that ends with ".*" or ".+".
82
+ # If such a regexp is given without calling this method,
83
+ # all the successive chars are going to be included by that regexp.
84
+ # This method add :error in the last state of that regexp for all
85
+ # starting chars in the scanner.
86
+ #
87
+ # This method is automatically called when a new scanner is istantiated.
88
+ def build
89
+ regexps = scanning_table.values.uniq
90
+ regexps << scanning_table.default if scanning_table.default
91
+
92
+ fixable = []
93
+ regexps.each do |regexp|
94
+ regexp.last.each do |state|
95
+ fixable << state if state.error == state
96
+ end
97
+ end
98
+
99
+ starting_chars = scanning_table.keys
100
+ fixable.each do |state|
101
+ starting_chars.each do |char|
102
+ state[char] = :error
103
+ end
104
+ end
105
+ @built = true
106
+ self
107
+ end
108
+
109
+ # It returns true if the build method has been called.
110
+ def built?
111
+ @built = false if @built.nil?
112
+ @built
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,197 @@
1
+ require File.dirname(__FILE__) + '/abstract_parser'
2
+
3
+ module LLIP
4
+
5
+ # It's a parser for regular expression. It correctly builds a RegexpSpecification given a valid regular expression string.
6
+ #
7
+ # === Grammar
8
+ #
9
+ # VN = { EXP , ELEMENT}
10
+ #
11
+ # char = every charachter
12
+ #
13
+ # symb = { ( , ) , . , * , + , \ , |}
14
+ #
15
+ # VT = char U symb
16
+ #
17
+ # In every production it has been used "or" instead of "|" to not make confusion.
18
+ #
19
+ # P = {
20
+ # EXP -> META EXP
21
+ # EXP -> META or EXP
22
+ # EXP -> META
23
+ # META -> ELEMENT*
24
+ # META -> ELEMENT+
25
+ # META -> ELEMENT
26
+ # ELEMENT -> char or . or \symb
27
+ # ELEMENT -> (EXP)
28
+ # }
29
+ #
30
+ # or in EBNF format
31
+ #
32
+ # P' = {
33
+ # EXP ::= META{[|]EXP}
34
+ # META ::= ELEMENT[* or +]
35
+ # ELEMENT ::= char or . or \symb or (EXP)
36
+ # }
37
+ #
38
+ class LLIP::RegexpParser < LLIP::AbstractParser
39
+
40
+ SPECIALS_TABLE = {
41
+ "n" => "\n",
42
+ "r" => "\r",
43
+ "t" => "\t"
44
+ }
45
+
46
+ SPECIALS_TABLE.default = lambda { |hash,key| raise 'Unknown special #{key}' }
47
+
48
+ scope(:scope)
49
+
50
+ production(:scope,:single) do |p|
51
+ p.default do |scanner,parser|
52
+ parser[:regexp] = RegexpSpecification.new
53
+ parser[:last] = [parser[:regexp].add_state]
54
+
55
+ parser.parse_exp
56
+
57
+ parser[:regexp].last.each { |s| s.final= true }
58
+ parser[:last].each { |s| s.final = true }
59
+ parser[:regexp]
60
+ end
61
+ end
62
+
63
+ production(:exp,:recursive) do |p|
64
+
65
+ p.default do |scanner,parser|
66
+ parser.parse_meta.last
67
+ end
68
+
69
+ p.token("|") do |result,scanner,parser|
70
+ result
71
+ scanner.next
72
+ parser[:last] = result
73
+ parser.parse_meta.last
74
+ end
75
+
76
+ p.token(:char) do |result,scanner,parser|
77
+ parser.parse_meta
78
+ result
79
+ end
80
+
81
+ p.token(".") do |result,scanner,parser|
82
+ parser.parse_meta
83
+ result
84
+ end
85
+
86
+ p.token("(") do |result,scanner,parser|
87
+ parser.parse_meta
88
+ result
89
+ end
90
+
91
+ p.token("\\") do |result,scanner,parser|
92
+ parser.parse_meta
93
+ result
94
+ end
95
+ end
96
+
97
+ production(:meta,:single) do |p|
98
+ p.raise_on_error = false
99
+
100
+ p.default do |scanner,parser|
101
+ MetaAccessor.new(parser[:last],parser.parse_element)
102
+ end
103
+
104
+ p.token("*") do |meta,scanner,parser|
105
+ if meta.results == :everything
106
+ parser[:last].last.error = parser[:last].last
107
+ else
108
+ if meta.results.kind_of? Array
109
+ meta.results.each do |c|
110
+ parser[:last].each { |s| s[c] = meta.last.last[c] }
111
+ end
112
+ else
113
+ parser[:last].last[meta.results] = parser[:last].last
114
+ end
115
+ parser[:last].concat(meta.last)
116
+ end
117
+ scanner.next
118
+ meta
119
+ end
120
+
121
+ p.token("+") do |meta,scanner,parser|
122
+ if meta.results == :everything
123
+ parser[:last].last.error = parser[:last].last
124
+ parser[:last] = [parser[:last].last]
125
+ else
126
+ if meta.results.kind_of? Array
127
+ meta.results.each do |c|
128
+ parser[:last].each { |s| s[c] = meta.last.last[c] }
129
+ end
130
+ else
131
+ parser[:last].last[meta.results] = parser[:last].last
132
+ end
133
+ end
134
+ scanner.next
135
+ meta
136
+ end
137
+ end
138
+
139
+ production(:element,:single) do |p|
140
+
141
+ p.token(:char) do |result, scanner, parser|
142
+ parser.add_char(parser,scanner)
143
+ end
144
+
145
+ p.token(".") do |result, scanner, parser|
146
+ r = parser[:regexp].add_state
147
+ parser[:last].last.error = r
148
+ parser[:last] << r
149
+ scanner.next
150
+ :everything
151
+ end
152
+
153
+ p.token("\\") do |result,scanner,parser|
154
+ if scanner.next == :symbol
155
+ parser.add_char(parser,scanner)
156
+ else
157
+ parser.add_char(parser,scanner,SPECIALS_TABLE[scanner.current.value])
158
+ end
159
+ end
160
+
161
+ p.token("(") do |result,scanner,parser|
162
+ scanner.next
163
+ first_state = parser[:last].last
164
+ parser.parse_exp
165
+
166
+ unless scanner.current == ")"
167
+ raise "Every '(' must be followed by a ')'"
168
+ end
169
+
170
+ scanner.next
171
+ parser[:last] = first_state.last
172
+ first_state.keys
173
+ end
174
+ end
175
+
176
+ def add_char(parser, scanner, char=scanner.current.value)
177
+ r = parser[:regexp].add_state
178
+ parser[:last].each { |s| s[char] = r }
179
+ parser[:regexp].add_state(r)
180
+ parser[:last] = [r]
181
+ scanner.next
182
+ char
183
+ end
184
+
185
+ class MetaAccessor
186
+
187
+ attr_accessor :results
188
+ attr_accessor :last
189
+
190
+ def initialize(last,results)
191
+ @results = results
192
+ @last = last
193
+ end
194
+
195
+ end
196
+ end
197
+ end