kanocc 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/examples/bind.rb +26 -0
- data/examples/calculator.rb +19 -15
- data/examples/doc_calc.rb +42 -0
- data/examples/minipascalsyntax.html +371 -0
- data/examples/ruby_quiz_78.rb +12 -11
- data/lib/kanocc.rb +73 -102
- data/lib/kanocc/earley.rb +262 -217
- data/lib/kanocc/grammar_rule.rb +7 -21
- data/lib/kanocc/nonterminal.rb +67 -65
- data/lib/kanocc/scanner.rb +168 -85
- data/lib/kanocc/token.rb +24 -0
- data/lib/todo +2 -3
- metadata +13 -9
data/lib/kanocc/grammar_rule.rb
CHANGED
@@ -15,36 +15,22 @@
|
|
15
15
|
# You should have received a copy of the GNU General Public License,
|
16
16
|
# version 3 along with Kanocc. If not, see <http://www.gnu.org/licenses/>.
|
17
17
|
#
|
18
|
+
require 'rubygems'
|
18
19
|
module Kanocc
|
19
20
|
class GrammarRule
|
20
|
-
attr_reader :lhs, :rhs, :method
|
21
|
-
attr_accessor :
|
21
|
+
attr_reader :lhs, :rhs, :method
|
22
|
+
attr_accessor :precedence, :derives_right
|
22
23
|
|
23
24
|
def initialize(lhs, rhs, method)
|
24
25
|
@lhs = lhs
|
25
26
|
@rhs = rhs
|
26
27
|
@method = method
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
@logger.debug("#{lhs} --> #{rhs.map {|gs| gs.is_a?(Symbol) ? gs.to_s : gs}.join}, #prec = #{@prec}, method = #{method}") unless not @logger
|
32
|
-
end
|
33
|
-
|
34
|
-
def operator_prec
|
35
|
-
unless @operator_prec_calculated
|
36
|
-
operator = rhs.find {|s| s.is_a?(String) or s.is_a?(Token)}
|
37
|
-
if operator
|
38
|
-
@operator_prec = lhs.operator_precedence(operator)
|
39
|
-
end
|
40
|
-
@operator_prec_calculated = true
|
41
|
-
end
|
42
|
-
@operator_prec
|
43
|
-
end
|
44
|
-
|
28
|
+
@prededence = 0
|
29
|
+
@logger.debug("#{lhs} --> #{rhs.map {|gs| gs.is_a?(Symbol) ? gs.to_s : gs}.join}, #prec = #{@prec}, method = #{method}") if @logger
|
30
|
+
end
|
31
|
+
|
45
32
|
def inspect
|
46
33
|
return lhs.inspect + " ::= " + rhs.map{|gs| gs.inspect}.join(" ")
|
47
34
|
end
|
48
|
-
|
49
35
|
end
|
50
36
|
end
|
data/lib/kanocc/nonterminal.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
#
|
2
2
|
# Copyright 2008 Christian Surlykke
|
3
3
|
#
|
4
4
|
# This file is part of Kanocc.
|
@@ -20,46 +20,12 @@ module Kanocc
|
|
20
20
|
class Nonterminal
|
21
21
|
@@rules = Hash.new
|
22
22
|
@@last_rule = Hash.new
|
23
|
-
@@
|
24
|
-
@@operator_precedence = Hash.new
|
23
|
+
@@bind_right = Hash.new
|
25
24
|
@@method_names = Hash.new
|
26
25
|
|
27
26
|
Left = 1
|
28
27
|
Right = 2
|
29
28
|
|
30
|
-
def Nonterminal.derives_right
|
31
|
-
@@derives_right[self] = true
|
32
|
-
end
|
33
|
-
|
34
|
-
def Nonterminal.derives_right?
|
35
|
-
return @@derives_right[self]
|
36
|
-
end
|
37
|
-
|
38
|
-
def Nonterminal.set_operator_precedence(operator, precedence)
|
39
|
-
raise "Precedence must be an integer" unless precedence.class == Fixnum
|
40
|
-
@@operator_precedence[self] ||= Hash.new
|
41
|
-
if is_an_operator?(operator)
|
42
|
-
@@operator_precedence[self][operator] = precedence
|
43
|
-
elsif is_an_array_of_operators(operator)
|
44
|
-
operator.each {|o| @@operator_precedence[self][o] = precedence}
|
45
|
-
else
|
46
|
-
raise "Operator must be a string, a token or an array of those"
|
47
|
-
end
|
48
|
-
end
|
49
|
-
|
50
|
-
def Nonterminal.operator_precedence(operator)
|
51
|
-
(@@operator_precedence[self] and @@operator_precedence[self][operator]) or 0
|
52
|
-
end
|
53
|
-
|
54
|
-
def Nonterminal.is_an_array_of_operators(arr)
|
55
|
-
arr.is_a?(Array) and
|
56
|
-
arr.collect{|o| is_an_operator?(o)}.inject {|b1, b2| b1 and b2 }
|
57
|
-
end
|
58
|
-
|
59
|
-
def Nonterminal.is_an_operator?(operator)
|
60
|
-
operator.is_a?(String) or operator.is_a?(Token)
|
61
|
-
end
|
62
|
-
|
63
29
|
def Nonterminal.rules
|
64
30
|
rules = @@rules[self]
|
65
31
|
return rules ? rules : []
|
@@ -97,36 +63,52 @@ module Kanocc
|
|
97
63
|
|
98
64
|
def Nonterminal.zm(symbols, sep = nil)
|
99
65
|
list_class = new_list_class
|
66
|
+
non_empty_list_class = new_list_class
|
100
67
|
list_class.rule() {@elements = []}
|
101
|
-
list_class.rule(
|
68
|
+
list_class.rule(non_empty_list_class) {@elements = @rhs[0].elements}
|
69
|
+
non_empty_list_class.rule(*symbols) {@elements = @rhs}
|
70
|
+
if sep
|
71
|
+
non_empty_list_class.rule(non_empty_list_class, sep, *symbols) {@elements = @rhs[0].elements + @rhs[2..@rhs.length]}
|
72
|
+
else
|
73
|
+
non_empty_list_class.rule(non_empty_list_class, *symbols) {@elements = @rhs[0].elements + @rhs[1..@rhs.length]}
|
74
|
+
end
|
102
75
|
return list_class
|
103
76
|
end
|
104
77
|
|
105
78
|
def Nonterminal.om(symbols, sep = nil)
|
106
79
|
symbols = [symbols] unless symbols.is_a? Array
|
107
|
-
|
108
|
-
|
80
|
+
non_empty_list_class = new_list_class
|
81
|
+
non_empty_list_class.rule(*symbols) {@elements = @rhs}
|
109
82
|
if sep
|
110
|
-
|
83
|
+
non_empty_list_class.rule(non_empty_list_class, sep, *symbols) {@elements = @rhs[0].elements + @rhs[2..@rhs.length]}
|
111
84
|
else
|
112
|
-
|
85
|
+
non_empty_list_class.rule(non_empty_list_class, *symbols) {@elements = @rhs[0].elements + @rhs[1..@rhs.length]}
|
113
86
|
end
|
114
|
-
return
|
87
|
+
return non_empty_list_class
|
115
88
|
end
|
116
|
-
|
89
|
+
|
90
|
+
def Nonterminal.zo(symbols)
|
91
|
+
zero_or_one_class = new_list_class
|
92
|
+
zero_or_one_class.rule(*symbols) {@elements = @rhs}
|
93
|
+
zero_or_one_class.rule() {@elements = []}
|
94
|
+
end
|
95
|
+
|
117
96
|
@@listClassNumber = 0
|
118
97
|
|
119
98
|
def Nonterminal.new_list_class
|
120
|
-
list_class = Class.new(
|
99
|
+
list_class = Class.new(AnonymousNonterminal)
|
121
100
|
@@listClassNumber += 1
|
101
|
+
|
122
102
|
def list_class.inspect
|
123
103
|
return "anonList_#{@@listClassNumber}"
|
124
104
|
end
|
105
|
+
|
125
106
|
return list_class
|
126
107
|
end
|
127
108
|
|
128
109
|
def Nonterminal.generate_method_name(*args)
|
129
|
-
|
110
|
+
class_name = self.name || ""
|
111
|
+
method_name = class_name + " --> " + args.map {|a| a.inspect}.join(' ')
|
130
112
|
@@method_names[self] ||= []
|
131
113
|
i = 1
|
132
114
|
while @@method_names[self].member?(method_name) do
|
@@ -135,10 +117,16 @@ module Kanocc
|
|
135
117
|
@@method_names[self].push(method_name)
|
136
118
|
return method_name
|
137
119
|
end
|
138
|
-
|
139
|
-
def Nonterminal.prec
|
140
|
-
raise "
|
141
|
-
@@last_rule[self]
|
120
|
+
|
121
|
+
def Nonterminal.precedence(prec)
|
122
|
+
raise "Given rule precedence was not a Numeric" unless prec.is_a? Numeric
|
123
|
+
raise "Call to precedence must be preceded by a rule" unless @@last_rule[self]
|
124
|
+
@@last_rule[self].precedence = prec
|
125
|
+
end
|
126
|
+
|
127
|
+
def Nonterminal.derives_right
|
128
|
+
raise "Call to derives_right must be preceded by a rule" unless @@last_rule[self]
|
129
|
+
@@last_rule[self].derives_right = true
|
142
130
|
end
|
143
131
|
|
144
132
|
def Nonterminal.show_method_names
|
@@ -148,29 +136,43 @@ module Kanocc
|
|
148
136
|
def inspect
|
149
137
|
self.class.name
|
150
138
|
end
|
139
|
+
|
140
|
+
def Nonterminal.show_rules
|
141
|
+
rules.each do |rule|
|
142
|
+
puts rule.inspect
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
def Nonterminal.show_all_rules
|
147
|
+
queue = [self]
|
148
|
+
done = {}
|
149
|
+
i = 0
|
150
|
+
while (i < queue.length)
|
151
|
+
queue[i].show_rules
|
152
|
+
done[queue[i]] = true
|
153
|
+
queue[i].rules.each do |rule|
|
154
|
+
rule.rhs.each do |gs|
|
155
|
+
if gs.respond_to?(:rules) and not done[gs]
|
156
|
+
queue.push(gs)
|
157
|
+
end
|
158
|
+
end
|
159
|
+
end
|
160
|
+
i += 1
|
161
|
+
end
|
162
|
+
end
|
151
163
|
end
|
152
164
|
|
153
165
|
|
154
|
-
class
|
166
|
+
class AnonymousNonterminal < Nonterminal
|
155
167
|
attr_reader :elements
|
156
|
-
|
157
|
-
protected
|
158
|
-
# Assumes @rhs[0] is a Kanocc::List and that rhs.length > 1
|
159
|
-
def collect(strip_separator = false)
|
160
|
-
@elements = @rhs[0].elements
|
161
|
-
if strip_separator
|
162
|
-
@elements = @elements + @rhs[2..@rhs.length]
|
163
|
-
else
|
164
|
-
@elements = @elements + @rhs[1..@rhs.length]
|
165
|
-
end
|
166
|
-
end
|
167
168
|
end
|
168
169
|
|
169
170
|
class Error < Nonterminal
|
170
|
-
|
171
|
-
|
172
|
-
super
|
173
|
-
@text = "FIXME"
|
171
|
+
def str
|
172
|
+
"hey" # FIXME
|
174
173
|
end
|
175
174
|
end
|
175
|
+
|
176
|
+
class StartSymbol < Nonterminal
|
177
|
+
end
|
176
178
|
end
|
data/lib/kanocc/scanner.rb
CHANGED
@@ -2,8 +2,7 @@
|
|
2
2
|
# Copyright 2008 Christian Surlykke
|
3
3
|
#
|
4
4
|
# This file is part of Kanocc.
|
5
|
-
#
|
6
|
-
|
5
|
+
#
|
7
6
|
# Kanocc is free software: you can redistribute it and/or modify
|
8
7
|
# it under the terms of the GNU General Public License, version 3
|
9
8
|
# as published by the Free Software Foundation.
|
@@ -19,132 +18,216 @@
|
|
19
18
|
require 'stringio'
|
20
19
|
require 'strscan'
|
21
20
|
require "logger"
|
21
|
+
require 'rubygems'
|
22
22
|
module Kanocc
|
23
23
|
class Scanner
|
24
|
-
attr_accessor :logger
|
24
|
+
attr_accessor :logger, :current_match, :input
|
25
|
+
|
25
26
|
def initialize(init = {})
|
26
|
-
|
27
|
-
|
28
|
-
else
|
27
|
+
@logger = init[:logger]
|
28
|
+
unless @logger
|
29
29
|
@logger = Logger.new(STDOUT)
|
30
30
|
@logger.level = Logger::WARN
|
31
31
|
end
|
32
32
|
@ws_regs = [/\s/]
|
33
|
-
@
|
34
|
-
@
|
33
|
+
@terminals = []
|
34
|
+
@string_patterns = {}
|
35
|
+
@input = ""
|
36
|
+
@stringScanner = StringScanner.new(@input)
|
37
|
+
@current_match = nil
|
35
38
|
end
|
36
39
|
|
37
40
|
def set_whitespace(*ws_regs)
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
end
|
43
|
-
@ws_regs << ws_reg
|
44
|
-
end
|
41
|
+
raise "set_whitespace must be given a list of Regexp's" \
|
42
|
+
if ws_regs.find {|ws_reg| not ws_reg.is_a?(RegExp)}
|
43
|
+
|
44
|
+
@ws_regs = ws_regs
|
45
45
|
end
|
46
46
|
|
47
|
-
def set_recognized(*
|
47
|
+
def set_recognized(*recognizables)
|
48
48
|
@recognizables = []
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
49
|
+
@literals = []
|
50
|
+
@tokens = []
|
51
|
+
@string_patterns = {}
|
52
|
+
recognizables.each do |recognizable|
|
53
|
+
unless (recognizable.class == Class and recognizable.ancestors.include?(Token)) or
|
54
|
+
recognizable.is_a?(String)
|
55
|
+
raise "set_recognized must be given a list of Tokens classes" +
|
56
|
+
"and or strings, got #{recognizable.inspect}"
|
57
|
+
end
|
58
|
+
@recognizables << recognizable
|
59
|
+
if recognizable.is_a? String
|
60
|
+
@string_patterns[recognizable] = Regexp.new(Regexp.escape(recognizable))
|
61
|
+
@literals << recognizable
|
62
|
+
else
|
63
|
+
@tokens << recognizable
|
64
|
+
end
|
58
65
|
end
|
59
66
|
end
|
60
|
-
|
61
|
-
def
|
62
|
-
|
63
|
-
@input = input.readlines.join("")
|
64
|
-
elsif input.is_a?(String)
|
65
|
-
@input = input
|
66
|
-
else
|
67
|
-
raise "Input must be a string or an IO object"
|
68
|
-
end
|
67
|
+
|
68
|
+
def input=(input)
|
69
|
+
@input = input
|
69
70
|
@stringScanner = StringScanner.new(@input)
|
70
|
-
|
71
|
-
if match[:matches]
|
72
|
-
@logger.debug("Yielding #{match}")
|
73
|
-
yield(match)
|
74
|
-
end
|
75
|
-
@stringScanner.pos += match[:length]
|
76
|
-
end
|
71
|
+
@current_match = nil
|
77
72
|
end
|
78
73
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
elsif
|
92
|
-
|
74
|
+
def next_match!
|
75
|
+
do_match!
|
76
|
+
return @current_match
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def do_match!
|
82
|
+
if @stringScanner.pos >= @input.length
|
83
|
+
@current_match = nil
|
84
|
+
elsif match_token
|
85
|
+
@stringScanner.pos += @current_match.length
|
86
|
+
elsif (whitespace_len = match_whitespace) > 0
|
87
|
+
@stringScanner.pos += whitespace_len
|
88
|
+
do_match!
|
93
89
|
else
|
94
90
|
# So we've not been able to match tokens nor whitespace.
|
95
91
|
# We return the first character of the remaining input as a string
|
96
92
|
# literal
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
:string => string,
|
102
|
-
:start_pos => @stringScanner.pos,
|
103
|
-
:length => 1}
|
93
|
+
str = @stringScanner.string.slice(@stringScanner.pos, 1)
|
94
|
+
regexp = Regexp.new(Regexp.escape(str))
|
95
|
+
@current_match = LexicalMatch.new([str], {str=>regexp}, @stringScanner.pos, 1)
|
96
|
+
@stringScanner.pos += 1
|
104
97
|
end
|
105
98
|
end
|
106
99
|
|
100
|
+
|
101
|
+
private
|
102
|
+
|
107
103
|
def match_token
|
108
|
-
|
104
|
+
matching_terminals = []
|
105
|
+
regexps = {}
|
109
106
|
max_length = 0
|
110
|
-
@recognizables.each do |
|
111
|
-
|
107
|
+
@recognizables.each do |recognizable|
|
108
|
+
len, regexp = match(recognizable)
|
109
|
+
if len > 0
|
112
110
|
if len > max_length
|
113
111
|
# Now, we have a match longer than whatever we had,
|
114
112
|
# so we discharge what we had, and save the new one
|
115
|
-
|
116
|
-
|
113
|
+
matching_terminals = [recognizable]
|
114
|
+
regexps = {recognizable => regexp}
|
115
|
+
max_length = len
|
117
116
|
elsif len == max_length
|
118
117
|
# This regular expression matches a string of same length
|
119
|
-
# as our previous match, so we prepare to return both
|
120
|
-
|
118
|
+
# as our previous match(es), so we prepare to return both/all
|
119
|
+
matching_terminals << recognizable
|
120
|
+
regexps[recognizable] = regexp
|
121
121
|
end
|
122
122
|
end
|
123
123
|
end
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
124
|
+
if max_length == 0
|
125
|
+
return false
|
126
|
+
else
|
127
|
+
@current_match = LexicalMatch.new(matching_terminals, regexps, @stringScanner.pos, max_length)
|
128
|
+
return true
|
129
|
+
end
|
130
130
|
end
|
131
|
-
|
131
|
+
|
132
|
+
def match(recognizable)
|
133
|
+
if recognizable.class == Class # It must be a token
|
134
|
+
return recognizable.match(@stringScanner)
|
135
|
+
elsif (len = @stringScanner.match?(@string_patterns[recognizable])) and len > 0
|
136
|
+
return len, @string_patterns[recognizable]
|
137
|
+
else
|
138
|
+
return 0, nil
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
132
142
|
def match_whitespace
|
133
|
-
|
143
|
+
max_len = 0
|
134
144
|
for i in 0..@ws_regs.size - 1 do
|
135
145
|
len = @stringScanner.match?(@ws_regs[i]) || 0
|
136
|
-
if len >
|
137
|
-
|
146
|
+
if len > max_len
|
147
|
+
max_len = len
|
138
148
|
end
|
139
149
|
end
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
150
|
+
return max_len
|
151
|
+
end
|
152
|
+
|
153
|
+
|
154
|
+
def do_match2!
|
155
|
+
while @stringScanner.pos < @input.length do
|
156
|
+
look_for_token_match
|
157
|
+
look_for_whitespace_match
|
158
|
+
|
159
|
+
if @whitespace_match_length > @match_length
|
160
|
+
@stringScanner.pos += @whitespace_match_length
|
161
|
+
elsif @match_length > 0
|
162
|
+
@current_match = LexicalMatch.new(@matching_recognizables, @regexps, @stringScanner.pos, @match_length)
|
163
|
+
@stringScanner.pos += @match_length
|
164
|
+
break
|
165
|
+
else
|
166
|
+
str = @stringScanner.string.slice(@stringScanner.pos, 1)
|
167
|
+
regexp = Regexp.new(Regexp.escape(str))
|
168
|
+
@current_match = LexicalMatch.new([str], {str=>regexp}, @stringScanner.pos, 1)
|
169
|
+
@stringScanner.pos += 1
|
170
|
+
break
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
175
|
+
def look_for_token_match
|
176
|
+
@matching_recognizables = []
|
177
|
+
@regexps = {}
|
178
|
+
@match_length = 0
|
179
|
+
@tokens.each do |token|
|
180
|
+
new_match_length, regexp = token.match(@stringScanner)
|
181
|
+
if new_match_length > match_length
|
182
|
+
@matching_recognizables = [token]
|
183
|
+
@regexps = {token => regexp}
|
184
|
+
@match_length = new_match_length
|
185
|
+
elsif new_match_length > 0 and new_match_length == match_length
|
186
|
+
@matching_recognizables << token
|
187
|
+
@regexps[token] = regexp
|
188
|
+
end
|
189
|
+
end
|
190
|
+
@literals.each do |literal|
|
191
|
+
new_match_length = @stringScanner.match?(@string_patterns[literal])
|
192
|
+
if new_match_length > match_length
|
193
|
+
matching_recognizables = [literal]
|
194
|
+
regexps = {literal => @string_patterns[literal]}
|
195
|
+
match_length = new_match_length
|
196
|
+
elsif new_match_length > 0 and new_match_length == match_length
|
197
|
+
matching_recognizables << literal
|
198
|
+
regexps[literal] = @string_paterns[literal]
|
199
|
+
end
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
def look_for_whitespace_match
|
204
|
+
@whitespace_match_length = 0
|
205
|
+
for i in 0..@ws_regs.size - 1 do
|
206
|
+
len = @stringScanner.match?(@ws_regs[i]) || 0
|
207
|
+
if len > @whitespace_match_length
|
208
|
+
@whitespace_match_length = len
|
209
|
+
end
|
210
|
+
end
|
211
|
+
end
|
212
|
+
|
213
|
+
|
214
|
+
|
215
|
+
end
|
216
|
+
|
217
|
+
class LexicalMatch
|
218
|
+
attr_accessor :terminals, :start_pos, :length
|
219
|
+
|
220
|
+
def initialize(terminals, regexps, start_pos, length)
|
221
|
+
@terminals = terminals
|
222
|
+
@regexps = regexps
|
223
|
+
@start_pos = start_pos
|
224
|
+
@length = length
|
225
|
+
end
|
226
|
+
|
227
|
+
def regexp(terminal)
|
228
|
+
@regexps[terminal]
|
145
229
|
end
|
146
230
|
end
|
147
|
-
|
148
231
|
end
|
149
232
|
|
150
233
|
|