llip 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/History.txt +4 -0
  2. data/MIT-LICENSE +21 -0
  3. data/Manifest.txt +45 -0
  4. data/README.txt +148 -0
  5. data/Rakefile +66 -0
  6. data/examples/ariteval/ariteval.rb +132 -0
  7. data/examples/ariteval/evaluator.rb +61 -0
  8. data/examples/ariteval/exp.rb +104 -0
  9. data/lib/llip.rb +6 -0
  10. data/lib/llip/abstract_parser.rb +170 -0
  11. data/lib/llip/abstract_scanner.rb +83 -0
  12. data/lib/llip/buffer.rb +35 -0
  13. data/lib/llip/llip_error.rb +43 -0
  14. data/lib/llip/parser.rb +93 -0
  15. data/lib/llip/production_compiler.rb +168 -0
  16. data/lib/llip/production_specification.rb +79 -0
  17. data/lib/llip/recursive_production_compiler.rb +35 -0
  18. data/lib/llip/regexp_abstract_scanner.rb +116 -0
  19. data/lib/llip/regexp_parser.rb +197 -0
  20. data/lib/llip/regexp_scanner.rb +33 -0
  21. data/lib/llip/regexp_specification.rb +210 -0
  22. data/lib/llip/token.rb +47 -0
  23. data/lib/llip/visitable.rb +37 -0
  24. data/spec/ariteval/ariteval_spec.rb +111 -0
  25. data/spec/ariteval/evaluator_spec.rb +106 -0
  26. data/spec/ariteval/exp_spec.rb +232 -0
  27. data/spec/llip/abstract_parser_spec.rb +273 -0
  28. data/spec/llip/abstract_scanner_spec.rb +152 -0
  29. data/spec/llip/buffer_spec.rb +60 -0
  30. data/spec/llip/llip_error_spec.rb +77 -0
  31. data/spec/llip/parser_spec.rb +163 -0
  32. data/spec/llip/production_compiler_spec.rb +271 -0
  33. data/spec/llip/production_specification_spec.rb +75 -0
  34. data/spec/llip/recursive_production_compiler_spec.rb +86 -0
  35. data/spec/llip/regexp_abstract_scanner_spec.rb +320 -0
  36. data/spec/llip/regexp_parser_spec.rb +265 -0
  37. data/spec/llip/regexp_scanner_spec.rb +40 -0
  38. data/spec/llip/regexp_specification_spec.rb +734 -0
  39. data/spec/llip/token_spec.rb +70 -0
  40. data/spec/llip/visitable_spec.rb +38 -0
  41. data/spec/spec_helper.rb +10 -0
  42. metadata +110 -0
@@ -0,0 +1,168 @@
1
+
2
+ module LLIP
3
+
4
+ # It's the main class which handles the generation of the source code dinamically.
5
+ class ProductionCompiler
6
+
7
+ # It contains the produced
8
+ attr_reader :code
9
+
10
+ def initialize
11
+ reset
12
+ end
13
+
14
+ # It initializes the compiler for a new generation.
15
+ def start(name)
16
+ reset
17
+ @name_str = name
18
+ @name = str_to_sym(name)
19
+ @code << <<-CODE
20
+ def parse_#{name}
21
+ result = productions[#{@name}].default.call(@scanner,self)
22
+ CODE
23
+ self
24
+ end
25
+
26
+ # :call-seq:
27
+ # token(Array)
28
+ # token(Symbol)
29
+ # token(String)
30
+ #
31
+ # If the argument is a Symbol or a String, the produced code will match them through ==.
32
+ # It the argument is an Array, lookaheads will be used, so the scanner must support lookaheads (or use a Buffer which supports them).
33
+ def token(tokens)
34
+ lookaheads = ""
35
+ name = nil
36
+ token_identifier = nil
37
+
38
+ if tokens.kind_of? Array
39
+ tokens_names = tokens.map { |tk| build_token_name(tk) }
40
+ token_identifier = "["
41
+ tokens_names.each { |tk| token_identifier << tk + "," }
42
+ token_identifier[-1] = "]"
43
+
44
+ name = build_token_name(tokens[0])
45
+ counter = 0
46
+ tokens[1..-1].each do |token|
47
+ lookaheads << " and "
48
+ counter += 1
49
+ token = build_token_name(token)
50
+ lookaheads << "@scanner.lookahead(#{counter}) == #{token}"
51
+ end
52
+ else
53
+ name = build_token_name(tokens)
54
+ token_identifier = name
55
+ end
56
+
57
+ @code << <<-CODE
58
+ #{@else}if @scanner.current == #{name}#{lookaheads}
59
+ result = productions[#{@name}].tokens[#{token_identifier}].call(result,@scanner,self)
60
+ CODE
61
+ @else = "els"
62
+ self
63
+ end
64
+
65
+ # It closes the method definition
66
+ def end(raise_on_error=true)
67
+ build_else(raise_on_error) if @else != ""
68
+ build_end
69
+ end
70
+
71
+ # It resets the compiler
72
+ def reset
73
+ @code = ""
74
+ @name = nil
75
+ @else = ""
76
+ end
77
+
78
+ # It takes a ProductionSpecification and then call its compiling methods by itself. It takes care to order all the productions the right way.
79
+ def compile(production)
80
+ start(production.name)
81
+ sort_production(production).each { |tk| token(tk)}
82
+ self.end(production.raise_on_error)
83
+ end
84
+
85
+ def sort_production(production) # :nodoc:
86
+ tokens = production.tokens
87
+
88
+ lk_tk = []
89
+ not_lk_tk = []
90
+
91
+ tokens.keys.each do |tk|
92
+ if tk.kind_of? Array
93
+ lk_tk << tk
94
+ lk_tk << tk[0] if tokens.has_key? tk[0]
95
+ end
96
+ end
97
+
98
+ not_lk_tk = tokens.keys - lk_tk
99
+
100
+ lk_tk.uniq!
101
+ lk_tk.sort! do |a,b|
102
+ if a.kind_of? Array and b.kind_of? Array
103
+ if a.size > b.size
104
+ -1
105
+ else
106
+ 1
107
+ end
108
+ elsif a.kind_of? Array and not b.kind_of? Array
109
+ -1
110
+ else
111
+ 1
112
+ end
113
+ end
114
+
115
+ if not_lk_tk.include? :everything
116
+ ret_value = not_lk_tk + lk_tk
117
+ ret_value.delete(:everything)
118
+ ret_value << :everything
119
+ ret_value
120
+ else
121
+ not_lk_tk + lk_tk
122
+ end
123
+ end
124
+
125
+ protected
126
+ # :call-seq:
127
+ # str_to_sym(object) => ":#{object.to_s}"
128
+ #
129
+ def str_to_sym(string)
130
+ string = string.to_s
131
+ ":\"#{string}\""
132
+ end
133
+
134
+ # :call-seq:
135
+ # build_token_name(string) => "'#{string}'"
136
+ # build_token_name(symbol) => ":#{object.to_s}"
137
+ #
138
+ def build_token_name(string)
139
+ if string.kind_of? String
140
+ "'#{string.gsub("\\","\\\\\\")}'"
141
+ elsif string.kind_of? Symbol
142
+ str_to_sym(string)
143
+ end
144
+ end
145
+
146
+ # It builds the else clause in the method definition.
147
+ # It accepts a raise_on_error parameter to specify if it has to raise or not.
148
+ def build_else(raise_on_error=true)
149
+ if raise_on_error
150
+ @code << <<-CODE
151
+ else
152
+ raise NotAllowedTokenError.new(@scanner.current,#{@name})
153
+ CODE
154
+ end
155
+ @code << "\nend\n"
156
+ @else = ""
157
+ end
158
+
159
+ # It closes the method definition and sets the return value
160
+ def build_end
161
+ @code << <<-CODE
162
+ return result
163
+ end
164
+ CODE
165
+ end
166
+
167
+ end
168
+ end
@@ -0,0 +1,79 @@
1
+
2
+ module LLIP
3
+
4
+
5
+ # A ProductionSpecification contains all it's needed to transform it into live code.
6
+ # This transformation is done by ProductionCompiler or RecursiveProductionCompiler.
7
+ #
8
+ # The flow of the execution of a production is:
9
+ # 1. The default block is called and it's result is stored in a +result+ var.
10
+ # 2. The current token is matched against every key of the ProductionSpecification#tokens
11
+ # hash, and if this match is positive the associated block is executed.
12
+ # The result is stored inside the +result+ var.
13
+ # If nothing matches and the ProductionSpecification#mode is :single and ProductionSpecification#raise_on_error
14
+ # is true an exception must be raised. if nothing matches and the ProductionSpecification#mode is
15
+ # recursive the production must return the +result+ var.
16
+ # 3. If the ProductionSpecification#mode is :single, the production must return
17
+ # the +result+ var. If the ProductionSpecification#mode is :recursive, the step
18
+ # 2 is going to be executed until it recognizes a Token.
19
+ class ProductionSpecification
20
+
21
+ NIL_BLOCK = lambda { nil }
22
+
23
+ # The production name.
24
+ attr_reader :name
25
+
26
+ # It's an hash which has as keys the token to recognize and as value the block to be executed with it.
27
+ # They are specified through ProductionSpecification#token.
28
+ attr_reader :tokens
29
+
30
+ # The mode of the production. It can be :single or :recursive.
31
+ attr_accessor :mode
32
+
33
+ # This attribute specifies if the production should raise an exception if the current token hasn't been recognized.
34
+ # It's important only for :single productions.
35
+ attr_accessor :raise_on_error
36
+
37
+ def initialize(name)
38
+ @name = name
39
+ @tokens = {}
40
+ @mode = :single
41
+ @default = NIL_BLOCK
42
+ @raise_on_error = true
43
+ end
44
+
45
+ # :call-seq:
46
+ # token(*token_name) { |result, scanner, parser| ... }
47
+ #
48
+ # The block specified through this method will be executed when the token with the specified name is matched.
49
+ # If more than a name is given, the parser should automatically use lookahead and match all the tokens.
50
+ #
51
+ # This name is going to be matched for equality with a Token.
52
+ #
53
+ # The arguments of the block will be filled by:
54
+ # * The +result+ argument contains the result of a previous called block inside this production.
55
+ # * The +scanner+ is an instance of a class descending from AbstractScanner. It's the scanner used by the parser.
56
+ # It's important to call +next+ on this scanner to make it build the next token.
57
+ # * The +parser+ is an instance of a class descending from AbstractParser. It's the caller of the production.
58
+ # It's necessary to call other productions.
59
+ def token(*args,&block) # :yields: result,scanner,parser
60
+ args.flatten!
61
+ block = args.pop if args.last.respond_to? :call
62
+ args = args.first if args.size == 1
63
+ @tokens[args] = block || NIL_BLOCK
64
+ self
65
+ end
66
+
67
+ # :call-seq:
68
+ # default() { |scanner, parser| ... }
69
+ #
70
+ # The specified block is going to be executed before any token is recognized.
71
+ # The default is NIL_BLOCK.
72
+ def default(block=nil,&b)
73
+ block ||= b
74
+ @default = block if block
75
+ @default
76
+ end
77
+
78
+ end
79
+ end
@@ -0,0 +1,35 @@
1
+ require File.dirname(__FILE__) + '/production_compiler'
2
+
3
+ module LLIP
4
+
5
+ #It modifies ProductionCompiler to add support to a recursive behaviour.
6
+ class RecursiveProductionCompiler < ProductionCompiler
7
+
8
+ def start(name)
9
+ super
10
+ @code << <<-CODE
11
+ while not @scanner.current.nil?
12
+ CODE
13
+ end
14
+
15
+ protected
16
+ def build_else(raise_on_error=true)
17
+ if raise_on_error
18
+ @code << <<-CODE
19
+ else
20
+ break
21
+ CODE
22
+ end
23
+ @code << "\nend\n"
24
+ @else = ""
25
+ end
26
+
27
+ def build_end
28
+ @code << <<-CODE
29
+ end
30
+ CODE
31
+
32
+ super
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,116 @@
1
+ require File.dirname(__FILE__) + '/regexp_specification'
2
+ require File.dirname(__FILE__) + '/abstract_scanner'
3
+ require File.dirname(__FILE__) + '/llip_error'
4
+
5
+ module LLIP
6
+
7
+ # The RegexpAbstractScanner is the main abstract scanner of LLIP.
8
+ # To have a real scanner, just subclass it and add some regular expressions.
9
+ #
10
+ # See ClassMethods to know how.
11
+ class RegexpAbstractScanner < AbstractScanner
12
+
13
+ def self.inherited(other)
14
+ other.extend(ClassMethods)
15
+ end
16
+
17
+ def initialize(*args)
18
+ super
19
+ self.class.build unless self.class.built?
20
+ end
21
+
22
+ def next
23
+ return @current = Token.new(:nil,nil,@current_line,@current_char) unless @next_char
24
+
25
+ line = @current_line
26
+ char = @current_char
27
+
28
+ regexp = self.class.scanning_table[@next_char]
29
+ unless regexp
30
+ token = Token.new(:nil,@next_char,line,char)
31
+ raise LLIPError.new(token,"there isn't a regular expression which starts with #{@next_char}")
32
+ end
33
+
34
+ state = regexp.init
35
+ string = ""
36
+ while state[@next_char] != :error and @next_char
37
+ state = state[@next_char]
38
+ string << @next_char
39
+ read_next
40
+ end
41
+
42
+ token = Token.new(state.regexp.name,string,line,char)
43
+ if state.final?
44
+ @current = token
45
+ else
46
+ raise UnvalidTokenError.new(token)
47
+ end
48
+ end
49
+
50
+ module ClassMethods
51
+
52
+ # Its where all the regular expressions are stored. The keys are the starting_chars of the RegexpSpecification.
53
+ # While the table can be modified directly, it's reccomanded to use the add_regexp method.
54
+ def scanning_table
55
+ @scanning_table ||= Hash.new
56
+ end
57
+
58
+ # It allows to add a RegularExpression to the scanner and it makes sure that all the specified tokens don't collide.
59
+ #
60
+ # If a RegexpSpecification has starting_chars == :everything, it's set to the default value of the scanning_table.
61
+ def add_regexp(regexp)
62
+ starting_chars = regexp.starting_chars
63
+ if starting_chars.kind_of? Symbol
64
+ scanning_table.default = regexp
65
+ else
66
+ common_chars = starting_chars.select { |c| scanning_table.has_key? c }
67
+ starting_chars = starting_chars - common_chars
68
+ starting_chars.each { |c| scanning_table[c] = regexp }
69
+ colliding_states = common_chars.map { |c| scanning_table[c] }
70
+ colliding_states.uniq!
71
+ colliding_states.zip(common_chars).each { |r,c| scanning_table[c] = RegexpSpecification.mix(regexp,r) }
72
+ end
73
+
74
+ if @built
75
+ build
76
+ end
77
+
78
+ self
79
+ end
80
+
81
+ # It fix a problem with all the regexp that ends with ".*" or ".+".
82
+ # If such a regexp is given without calling this method,
83
+ # all the successive chars are going to be included by that regexp.
84
+ # This method add :error in the last state of that regexp for all
85
+ # starting chars in the scanner.
86
+ #
87
+ # This method is automatically called when a new scanner is istantiated.
88
+ def build
89
+ regexps = scanning_table.values.uniq
90
+ regexps << scanning_table.default if scanning_table.default
91
+
92
+ fixable = []
93
+ regexps.each do |regexp|
94
+ regexp.last.each do |state|
95
+ fixable << state if state.error == state
96
+ end
97
+ end
98
+
99
+ starting_chars = scanning_table.keys
100
+ fixable.each do |state|
101
+ starting_chars.each do |char|
102
+ state[char] = :error
103
+ end
104
+ end
105
+ @built = true
106
+ self
107
+ end
108
+
109
+ # It returns true if the build method has been called.
110
+ def built?
111
+ @built = false if @built.nil?
112
+ @built
113
+ end
114
+ end
115
+ end
116
+ end
@@ -0,0 +1,197 @@
1
+ require File.dirname(__FILE__) + '/abstract_parser'
2
+
3
+ module LLIP
4
+
5
+ # It's a parser for regular expression. It correctly builds a RegexpSpecification given a valid regular expression string.
6
+ #
7
+ # === Grammar
8
+ #
9
+ # VN = { EXP , ELEMENT}
10
+ #
11
+ # char = every charachter
12
+ #
13
+ # symb = { ( , ) , . , * , + , \ , |}
14
+ #
15
+ # VT = char U symb
16
+ #
17
+ # In every production it has been used "or" instead of "|" to not make confusion.
18
+ #
19
+ # P = {
20
+ # EXP -> META EXP
21
+ # EXP -> META or EXP
22
+ # EXP -> META
23
+ # META -> ELEMENT*
24
+ # META -> ELEMENT+
25
+ # META -> ELEMENT
26
+ # ELEMENT -> char or . or \symb
27
+ # ELEMENT -> (EXP)
28
+ # }
29
+ #
30
+ # or in EBNF format
31
+ #
32
+ # P' = {
33
+ # EXP ::= META{[|]EXP}
34
+ # META ::= ELEMENT[* or +]
35
+ # ELEMENT ::= char or . or \symb or (EXP)
36
+ # }
37
+ #
38
+ class LLIP::RegexpParser < LLIP::AbstractParser
39
+
40
+ SPECIALS_TABLE = {
41
+ "n" => "\n",
42
+ "r" => "\r",
43
+ "t" => "\t"
44
+ }
45
+
46
+ SPECIALS_TABLE.default = lambda { |hash,key| raise 'Unknown special #{key}' }
47
+
48
+ scope(:scope)
49
+
50
+ production(:scope,:single) do |p|
51
+ p.default do |scanner,parser|
52
+ parser[:regexp] = RegexpSpecification.new
53
+ parser[:last] = [parser[:regexp].add_state]
54
+
55
+ parser.parse_exp
56
+
57
+ parser[:regexp].last.each { |s| s.final= true }
58
+ parser[:last].each { |s| s.final = true }
59
+ parser[:regexp]
60
+ end
61
+ end
62
+
63
+ production(:exp,:recursive) do |p|
64
+
65
+ p.default do |scanner,parser|
66
+ parser.parse_meta.last
67
+ end
68
+
69
+ p.token("|") do |result,scanner,parser|
70
+ result
71
+ scanner.next
72
+ parser[:last] = result
73
+ parser.parse_meta.last
74
+ end
75
+
76
+ p.token(:char) do |result,scanner,parser|
77
+ parser.parse_meta
78
+ result
79
+ end
80
+
81
+ p.token(".") do |result,scanner,parser|
82
+ parser.parse_meta
83
+ result
84
+ end
85
+
86
+ p.token("(") do |result,scanner,parser|
87
+ parser.parse_meta
88
+ result
89
+ end
90
+
91
+ p.token("\\") do |result,scanner,parser|
92
+ parser.parse_meta
93
+ result
94
+ end
95
+ end
96
+
97
+ production(:meta,:single) do |p|
98
+ p.raise_on_error = false
99
+
100
+ p.default do |scanner,parser|
101
+ MetaAccessor.new(parser[:last],parser.parse_element)
102
+ end
103
+
104
+ p.token("*") do |meta,scanner,parser|
105
+ if meta.results == :everything
106
+ parser[:last].last.error = parser[:last].last
107
+ else
108
+ if meta.results.kind_of? Array
109
+ meta.results.each do |c|
110
+ parser[:last].each { |s| s[c] = meta.last.last[c] }
111
+ end
112
+ else
113
+ parser[:last].last[meta.results] = parser[:last].last
114
+ end
115
+ parser[:last].concat(meta.last)
116
+ end
117
+ scanner.next
118
+ meta
119
+ end
120
+
121
+ p.token("+") do |meta,scanner,parser|
122
+ if meta.results == :everything
123
+ parser[:last].last.error = parser[:last].last
124
+ parser[:last] = [parser[:last].last]
125
+ else
126
+ if meta.results.kind_of? Array
127
+ meta.results.each do |c|
128
+ parser[:last].each { |s| s[c] = meta.last.last[c] }
129
+ end
130
+ else
131
+ parser[:last].last[meta.results] = parser[:last].last
132
+ end
133
+ end
134
+ scanner.next
135
+ meta
136
+ end
137
+ end
138
+
139
+ production(:element,:single) do |p|
140
+
141
+ p.token(:char) do |result, scanner, parser|
142
+ parser.add_char(parser,scanner)
143
+ end
144
+
145
+ p.token(".") do |result, scanner, parser|
146
+ r = parser[:regexp].add_state
147
+ parser[:last].last.error = r
148
+ parser[:last] << r
149
+ scanner.next
150
+ :everything
151
+ end
152
+
153
+ p.token("\\") do |result,scanner,parser|
154
+ if scanner.next == :symbol
155
+ parser.add_char(parser,scanner)
156
+ else
157
+ parser.add_char(parser,scanner,SPECIALS_TABLE[scanner.current.value])
158
+ end
159
+ end
160
+
161
+ p.token("(") do |result,scanner,parser|
162
+ scanner.next
163
+ first_state = parser[:last].last
164
+ parser.parse_exp
165
+
166
+ unless scanner.current == ")"
167
+ raise "Every '(' must be followed by a ')'"
168
+ end
169
+
170
+ scanner.next
171
+ parser[:last] = first_state.last
172
+ first_state.keys
173
+ end
174
+ end
175
+
176
+ def add_char(parser, scanner, char=scanner.current.value)
177
+ r = parser[:regexp].add_state
178
+ parser[:last].each { |s| s[char] = r }
179
+ parser[:regexp].add_state(r)
180
+ parser[:last] = [r]
181
+ scanner.next
182
+ char
183
+ end
184
+
185
+ class MetaAccessor
186
+
187
+ attr_accessor :results
188
+ attr_accessor :last
189
+
190
+ def initialize(last,results)
191
+ @results = results
192
+ @last = last
193
+ end
194
+
195
+ end
196
+ end
197
+ end