kanocc 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,36 +15,22 @@
15
15
  # You should have received a copy of the GNU General Public License,
16
16
  # version 3 along with Kanocc. If not, see <http://www.gnu.org/licenses/>.
17
17
  #
18
+ require 'rubygems'
18
19
  module Kanocc
19
20
  class GrammarRule
20
- attr_reader :lhs, :rhs, :method, :operator_prec
21
- attr_accessor :prec
21
+ attr_reader :lhs, :rhs, :method
22
+ attr_accessor :precedence, :derives_right
22
23
 
23
24
  def initialize(lhs, rhs, method)
24
25
  @lhs = lhs
25
26
  @rhs = rhs
26
27
  @method = method
27
- if (operator =rhs.find {|s| s.is_a?(String) or s.is_a?(Token)})
28
- @operator_prec = Nonterminal.operator_precedence(operator)
29
- end
30
- @prec = 0
31
- @logger.debug("#{lhs} --> #{rhs.map {|gs| gs.is_a?(Symbol) ? gs.to_s : gs}.join}, #prec = #{@prec}, method = #{method}") unless not @logger
32
- end
33
-
34
- def operator_prec
35
- unless @operator_prec_calculated
36
- operator = rhs.find {|s| s.is_a?(String) or s.is_a?(Token)}
37
- if operator
38
- @operator_prec = lhs.operator_precedence(operator)
39
- end
40
- @operator_prec_calculated = true
41
- end
42
- @operator_prec
43
- end
44
-
28
+ @prededence = 0
29
+ @logger.debug("#{lhs} --> #{rhs.map {|gs| gs.is_a?(Symbol) ? gs.to_s : gs}.join}, #prec = #{@prec}, method = #{method}") if @logger
30
+ end
31
+
45
32
  def inspect
46
33
  return lhs.inspect + " ::= " + rhs.map{|gs| gs.inspect}.join(" ")
47
34
  end
48
-
49
35
  end
50
36
  end
@@ -1,4 +1,4 @@
1
- #
1
+ #
2
2
  # Copyright 2008 Christian Surlykke
3
3
  #
4
4
  # This file is part of Kanocc.
@@ -20,46 +20,12 @@ module Kanocc
20
20
  class Nonterminal
21
21
  @@rules = Hash.new
22
22
  @@last_rule = Hash.new
23
- @@derives_right = Hash.new
24
- @@operator_precedence = Hash.new
23
+ @@bind_right = Hash.new
25
24
  @@method_names = Hash.new
26
25
 
27
26
  Left = 1
28
27
  Right = 2
29
28
 
30
- def Nonterminal.derives_right
31
- @@derives_right[self] = true
32
- end
33
-
34
- def Nonterminal.derives_right?
35
- return @@derives_right[self]
36
- end
37
-
38
- def Nonterminal.set_operator_precedence(operator, precedence)
39
- raise "Precedence must be an integer" unless precedence.class == Fixnum
40
- @@operator_precedence[self] ||= Hash.new
41
- if is_an_operator?(operator)
42
- @@operator_precedence[self][operator] = precedence
43
- elsif is_an_array_of_operators(operator)
44
- operator.each {|o| @@operator_precedence[self][o] = precedence}
45
- else
46
- raise "Operator must be a string, a token or an array of those"
47
- end
48
- end
49
-
50
- def Nonterminal.operator_precedence(operator)
51
- (@@operator_precedence[self] and @@operator_precedence[self][operator]) or 0
52
- end
53
-
54
- def Nonterminal.is_an_array_of_operators(arr)
55
- arr.is_a?(Array) and
56
- arr.collect{|o| is_an_operator?(o)}.inject {|b1, b2| b1 and b2 }
57
- end
58
-
59
- def Nonterminal.is_an_operator?(operator)
60
- operator.is_a?(String) or operator.is_a?(Token)
61
- end
62
-
63
29
  def Nonterminal.rules
64
30
  rules = @@rules[self]
65
31
  return rules ? rules : []
@@ -97,36 +63,52 @@ module Kanocc
97
63
 
98
64
  def Nonterminal.zm(symbols, sep = nil)
99
65
  list_class = new_list_class
66
+ non_empty_list_class = new_list_class
100
67
  list_class.rule() {@elements = []}
101
- list_class.rule(om(symbols, sep)) {@elements = @rhs[0].elements}
68
+ list_class.rule(non_empty_list_class) {@elements = @rhs[0].elements}
69
+ non_empty_list_class.rule(*symbols) {@elements = @rhs}
70
+ if sep
71
+ non_empty_list_class.rule(non_empty_list_class, sep, *symbols) {@elements = @rhs[0].elements + @rhs[2..@rhs.length]}
72
+ else
73
+ non_empty_list_class.rule(non_empty_list_class, *symbols) {@elements = @rhs[0].elements + @rhs[1..@rhs.length]}
74
+ end
102
75
  return list_class
103
76
  end
104
77
 
105
78
  def Nonterminal.om(symbols, sep = nil)
106
79
  symbols = [symbols] unless symbols.is_a? Array
107
- list_class = new_list_class
108
- list_class.rule(*symbols) {@elements = @rhs}
80
+ non_empty_list_class = new_list_class
81
+ non_empty_list_class.rule(*symbols) {@elements = @rhs}
109
82
  if sep
110
- list_class.rule(list_class, sep, *symbols) {@elements = @rhs[0].elements + @rhs[2..@rhs.length]}
83
+ non_empty_list_class.rule(non_empty_list_class, sep, *symbols) {@elements = @rhs[0].elements + @rhs[2..@rhs.length]}
111
84
  else
112
- list_class.rule(list_class, *symbols) {@elements = @rhs[0].elements + @rhs[1..@rhs.length]}
85
+ non_empty_list_class.rule(non_empty_list_class, *symbols) {@elements = @rhs[0].elements + @rhs[1..@rhs.length]}
113
86
  end
114
- return list_class
87
+ return non_empty_list_class
115
88
  end
116
-
89
+
90
+ def Nonterminal.zo(symbols)
91
+ zero_or_one_class = new_list_class
92
+ zero_or_one_class.rule(*symbols) {@elements = @rhs}
93
+ zero_or_one_class.rule() {@elements = []}
94
+ end
95
+
117
96
  @@listClassNumber = 0
118
97
 
119
98
  def Nonterminal.new_list_class
120
- list_class = Class.new(List)
99
+ list_class = Class.new(AnonymousNonterminal)
121
100
  @@listClassNumber += 1
101
+
122
102
  def list_class.inspect
123
103
  return "anonList_#{@@listClassNumber}"
124
104
  end
105
+
125
106
  return list_class
126
107
  end
127
108
 
128
109
  def Nonterminal.generate_method_name(*args)
129
- method_name = self.name + " --> " + args.map {|a| a.inspect}.join(' ')
110
+ class_name = self.name || ""
111
+ method_name = class_name + " --> " + args.map {|a| a.inspect}.join(' ')
130
112
  @@method_names[self] ||= []
131
113
  i = 1
132
114
  while @@method_names[self].member?(method_name) do
@@ -135,10 +117,16 @@ module Kanocc
135
117
  @@method_names[self].push(method_name)
136
118
  return method_name
137
119
  end
138
-
139
- def Nonterminal.prec(p)
140
- raise "Call to prec not preceded by rule" unless @@last_rule[self]
141
- @@last_rule[self].prec = p
120
+
121
+ def Nonterminal.precedence(prec)
122
+ raise "Given rule precedence was not a Numeric" unless prec.is_a? Numeric
123
+ raise "Call to precedence must be preceded by a rule" unless @@last_rule[self]
124
+ @@last_rule[self].precedence = prec
125
+ end
126
+
127
+ def Nonterminal.derives_right
128
+ raise "Call to derives_right must be preceded by a rule" unless @@last_rule[self]
129
+ @@last_rule[self].derives_right = true
142
130
  end
143
131
 
144
132
  def Nonterminal.show_method_names
@@ -148,29 +136,43 @@ module Kanocc
148
136
  def inspect
149
137
  self.class.name
150
138
  end
139
+
140
+ def Nonterminal.show_rules
141
+ rules.each do |rule|
142
+ puts rule.inspect
143
+ end
144
+ end
145
+
146
+ def Nonterminal.show_all_rules
147
+ queue = [self]
148
+ done = {}
149
+ i = 0
150
+ while (i < queue.length)
151
+ queue[i].show_rules
152
+ done[queue[i]] = true
153
+ queue[i].rules.each do |rule|
154
+ rule.rhs.each do |gs|
155
+ if gs.respond_to?(:rules) and not done[gs]
156
+ queue.push(gs)
157
+ end
158
+ end
159
+ end
160
+ i += 1
161
+ end
162
+ end
151
163
  end
152
164
 
153
165
 
154
- class List < Nonterminal
166
+ class AnonymousNonterminal < Nonterminal
155
167
  attr_reader :elements
156
-
157
- protected
158
- # Assumes @rhs[0] is a Kanocc::List and that rhs.length > 1
159
- def collect(strip_separator = false)
160
- @elements = @rhs[0].elements
161
- if strip_separator
162
- @elements = @elements + @rhs[2..@rhs.length]
163
- else
164
- @elements = @elements + @rhs[1..@rhs.length]
165
- end
166
- end
167
168
  end
168
169
 
169
170
  class Error < Nonterminal
170
- attr_reader :text
171
- def initialize
172
- super
173
- @text = "FIXME"
171
+ def str
172
+ "hey" # FIXME
174
173
  end
175
174
  end
175
+
176
+ class StartSymbol < Nonterminal
177
+ end
176
178
  end
@@ -2,8 +2,7 @@
2
2
  # Copyright 2008 Christian Surlykke
3
3
  #
4
4
  # This file is part of Kanocc.
5
- #require 'logger'
6
-
5
+ #
7
6
  # Kanocc is free software: you can redistribute it and/or modify
8
7
  # it under the terms of the GNU General Public License, version 3
9
8
  # as published by the Free Software Foundation.
@@ -19,132 +18,216 @@
19
18
  require 'stringio'
20
19
  require 'strscan'
21
20
  require "logger"
21
+ require 'rubygems'
22
22
  module Kanocc
23
23
  class Scanner
24
- attr_accessor :logger
24
+ attr_accessor :logger, :current_match, :input
25
+
25
26
  def initialize(init = {})
26
- if init[:logger]
27
- @logger = init[:logger]
28
- else
27
+ @logger = init[:logger]
28
+ unless @logger
29
29
  @logger = Logger.new(STDOUT)
30
30
  @logger.level = Logger::WARN
31
31
  end
32
32
  @ws_regs = [/\s/]
33
- @recognizables = []
34
- @regexps = []
33
+ @terminals = []
34
+ @string_patterns = {}
35
+ @input = ""
36
+ @stringScanner = StringScanner.new(@input)
37
+ @current_match = nil
35
38
  end
36
39
 
37
40
  def set_whitespace(*ws_regs)
38
- @ws_regs = []
39
- ws_regs.each do |ws_reg|
40
- unless ws_reg.is_a?(Regexp)
41
- raise "set_whitespace must be given a list of Regexp's"
42
- end
43
- @ws_regs << ws_reg
44
- end
41
+ raise "set_whitespace must be given a list of Regexp's" \
42
+ if ws_regs.find {|ws_reg| not ws_reg.is_a?(RegExp)}
43
+
44
+ @ws_regs = ws_regs
45
45
  end
46
46
 
47
- def set_recognized(*rec)
47
+ def set_recognized(*recognizables)
48
48
  @recognizables = []
49
- rec.each do |r|
50
- if r.class == Class and r.ancestors.include?(Token)
51
- @recognizables = @recognizables + r.patterns
52
- elsif r.is_a? String
53
- @recognizables << {:literal => r,
54
- :regexp => Regexp.new(Regexp.escape(r))}
55
- else
56
- raise "set_recognized must be given a list of Tokens classes and or strings"
57
- end
49
+ @literals = []
50
+ @tokens = []
51
+ @string_patterns = {}
52
+ recognizables.each do |recognizable|
53
+ unless (recognizable.class == Class and recognizable.ancestors.include?(Token)) or
54
+ recognizable.is_a?(String)
55
+ raise "set_recognized must be given a list of Tokens classes" +
56
+ "and or strings, got #{recognizable.inspect}"
57
+ end
58
+ @recognizables << recognizable
59
+ if recognizable.is_a? String
60
+ @string_patterns[recognizable] = Regexp.new(Regexp.escape(recognizable))
61
+ @literals << recognizable
62
+ else
63
+ @tokens << recognizable
64
+ end
58
65
  end
59
66
  end
60
-
61
- def each_token(input)
62
- if input.is_a?(IO)
63
- @input = input.readlines.join("")
64
- elsif input.is_a?(String)
65
- @input = input
66
- else
67
- raise "Input must be a string or an IO object"
68
- end
67
+
68
+ def input=(input)
69
+ @input = input
69
70
  @stringScanner = StringScanner.new(@input)
70
- while match = do_match do
71
- if match[:matches]
72
- @logger.debug("Yielding #{match}")
73
- yield(match)
74
- end
75
- @stringScanner.pos += match[:length]
76
- end
71
+ @current_match = nil
77
72
  end
78
73
 
79
- private
80
-
81
- def do_match
82
- if @stringScanner.pos >= @stringScanner.string.length
83
- return nil;
84
- end
85
-
86
- token_match = match_token
87
- whitespace_match = match_whitespace
88
-
89
- if whitespace_match[:length] > token_match[:length]
90
- return whitespace_match
91
- elsif token_match[:length] > 0
92
- return token_match
74
+ def next_match!
75
+ do_match!
76
+ return @current_match
77
+ end
78
+
79
+ private
80
+
81
+ def do_match!
82
+ if @stringScanner.pos >= @input.length
83
+ @current_match = nil
84
+ elsif match_token
85
+ @stringScanner.pos += @current_match.length
86
+ elsif (whitespace_len = match_whitespace) > 0
87
+ @stringScanner.pos += whitespace_len
88
+ do_match!
93
89
  else
94
90
  # So we've not been able to match tokens nor whitespace.
95
91
  # We return the first character of the remaining input as a string
96
92
  # literal
97
- string = @stringScanner.string.slice(@stringScanner.pos, 1)
98
- matches = [{:literal => string,
99
- :regexp => Regexp.new(Regexp.escape(string))}]
100
- return {:matches => matches,
101
- :string => string,
102
- :start_pos => @stringScanner.pos,
103
- :length => 1}
93
+ str = @stringScanner.string.slice(@stringScanner.pos, 1)
94
+ regexp = Regexp.new(Regexp.escape(str))
95
+ @current_match = LexicalMatch.new([str], {str=>regexp}, @stringScanner.pos, 1)
96
+ @stringScanner.pos += 1
104
97
  end
105
98
  end
106
99
 
100
+
101
+ private
102
+
107
103
  def match_token
108
- matches = []
104
+ matching_terminals = []
105
+ regexps = {}
109
106
  max_length = 0
110
- @recognizables.each do |rec|
111
- if (len = @stringScanner.match?(rec[:regexp])) and len > 0
107
+ @recognizables.each do |recognizable|
108
+ len, regexp = match(recognizable)
109
+ if len > 0
112
110
  if len > max_length
113
111
  # Now, we have a match longer than whatever we had,
114
112
  # so we discharge what we had, and save the new one
115
- matches = [rec]
116
- max_length = len
113
+ matching_terminals = [recognizable]
114
+ regexps = {recognizable => regexp}
115
+ max_length = len
117
116
  elsif len == max_length
118
117
  # This regular expression matches a string of same length
119
- # as our previous match, so we prepare to return both
120
- matches << rec
118
+ # as our previous match(es), so we prepare to return both/all
119
+ matching_terminals << recognizable
120
+ regexps[recognizable] = regexp
121
121
  end
122
122
  end
123
123
  end
124
- start_pos = @stringScanner.pos
125
- string = @stringScanner.string.slice(start_pos, max_length)
126
- return {:matches => matches,
127
- :string => string,
128
- :start_pos => start_pos,
129
- :length => max_length}
124
+ if max_length == 0
125
+ return false
126
+ else
127
+ @current_match = LexicalMatch.new(matching_terminals, regexps, @stringScanner.pos, max_length)
128
+ return true
129
+ end
130
130
  end
131
-
131
+
132
+ def match(recognizable)
133
+ if recognizable.class == Class # It must be a token
134
+ return recognizable.match(@stringScanner)
135
+ elsif (len = @stringScanner.match?(@string_patterns[recognizable])) and len > 0
136
+ return len, @string_patterns[recognizable]
137
+ else
138
+ return 0, nil
139
+ end
140
+ end
141
+
132
142
  def match_whitespace
133
- max_length = 0
143
+ max_len = 0
134
144
  for i in 0..@ws_regs.size - 1 do
135
145
  len = @stringScanner.match?(@ws_regs[i]) || 0
136
- if len > max_length
137
- max_length = len
146
+ if len > max_len
147
+ max_len = len
138
148
  end
139
149
  end
140
- string = @stringScanner.string.slice(@stringScanner.pos, max_length)
141
- result = {:string => string,
142
- :start_pos => @stringScanner.pos,
143
- :length => max_length}
144
- return result
150
+ return max_len
151
+ end
152
+
153
+
154
+ def do_match2!
155
+ while @stringScanner.pos < @input.length do
156
+ look_for_token_match
157
+ look_for_whitespace_match
158
+
159
+ if @whitespace_match_length > @match_length
160
+ @stringScanner.pos += @whitespace_match_length
161
+ elsif @match_length > 0
162
+ @current_match = LexicalMatch.new(@matching_recognizables, @regexps, @stringScanner.pos, @match_length)
163
+ @stringScanner.pos += @match_length
164
+ break
165
+ else
166
+ str = @stringScanner.string.slice(@stringScanner.pos, 1)
167
+ regexp = Regexp.new(Regexp.escape(str))
168
+ @current_match = LexicalMatch.new([str], {str=>regexp}, @stringScanner.pos, 1)
169
+ @stringScanner.pos += 1
170
+ break
171
+ end
172
+ end
173
+ end
174
+
175
+ def look_for_token_match
176
+ @matching_recognizables = []
177
+ @regexps = {}
178
+ @match_length = 0
179
+ @tokens.each do |token|
180
+ new_match_length, regexp = token.match(@stringScanner)
181
+ if new_match_length > match_length
182
+ @matching_recognizables = [token]
183
+ @regexps = {token => regexp}
184
+ @match_length = new_match_length
185
+ elsif new_match_length > 0 and new_match_length == match_length
186
+ @matching_recognizables << token
187
+ @regexps[token] = regexp
188
+ end
189
+ end
190
+ @literals.each do |literal|
191
+ new_match_length = @stringScanner.match?(@string_patterns[literal])
192
+ if new_match_length > match_length
193
+ matching_recognizables = [literal]
194
+ regexps = {literal => @string_patterns[literal]}
195
+ match_length = new_match_length
196
+ elsif new_match_length > 0 and new_match_length == match_length
197
+ matching_recognizables << literal
198
+ regexps[literal] = @string_paterns[literal]
199
+ end
200
+ end
201
+ end
202
+
203
+ def look_for_whitespace_match
204
+ @whitespace_match_length = 0
205
+ for i in 0..@ws_regs.size - 1 do
206
+ len = @stringScanner.match?(@ws_regs[i]) || 0
207
+ if len > @whitespace_match_length
208
+ @whitespace_match_length = len
209
+ end
210
+ end
211
+ end
212
+
213
+
214
+
215
+ end
216
+
217
+ class LexicalMatch
218
+ attr_accessor :terminals, :start_pos, :length
219
+
220
+ def initialize(terminals, regexps, start_pos, length)
221
+ @terminals = terminals
222
+ @regexps = regexps
223
+ @start_pos = start_pos
224
+ @length = length
225
+ end
226
+
227
+ def regexp(terminal)
228
+ @regexps[terminal]
145
229
  end
146
230
  end
147
-
148
231
  end
149
232
 
150
233