kanocc 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/COPYING +674 -0
- data/README +15 -0
- data/examples/calculator.rb +98 -0
- data/examples/ruby_quiz_78.rb +65 -0
- data/lib/kanocc.rb +303 -0
- data/lib/kanocc/earley.rb +322 -0
- data/lib/kanocc/grammar_rule.rb +50 -0
- data/lib/kanocc/nonterminal.rb +176 -0
- data/lib/kanocc/scanner.rb +165 -0
- data/lib/kanocc/token.rb +58 -0
- data/lib/todo +3 -0
- metadata +64 -0
@@ -0,0 +1,165 @@
|
|
1
|
+
#
|
2
|
+
# Copyright 2008 Christian Surlykke
|
3
|
+
#
|
4
|
+
# This file is part of Kanocc.
|
5
|
+
#require 'logger'
|
6
|
+
|
7
|
+
# Kanocc is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU General Public License, version 3
|
9
|
+
# as published by the Free Software Foundation.
|
10
|
+
#
|
11
|
+
# Kanocc is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License, version 3 for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License,
|
17
|
+
# version 3 along with Kanocc. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
#
|
19
|
+
require 'stringio'
|
20
|
+
require 'strscan'
|
21
|
+
require "logger"
|
22
|
+
module Kanocc
|
23
|
+
class Scanner
|
24
|
+
attr_accessor :logger
|
25
|
+
def initialize(init = {})
|
26
|
+
if init[:logger]
|
27
|
+
@logger = init[:logger]
|
28
|
+
else
|
29
|
+
@logger = Logger.new(STDOUT)
|
30
|
+
@logger.level = Logger::WARN
|
31
|
+
end
|
32
|
+
@ws_regs = [/\s/]
|
33
|
+
@recognizables = []
|
34
|
+
@regexps = []
|
35
|
+
end
|
36
|
+
|
37
|
+
def set_whitespace(*ws_regs)
|
38
|
+
@ws_regs = []
|
39
|
+
ws_regs.each do |ws_reg|
|
40
|
+
unless ws_reg.is_a?(Regexp)
|
41
|
+
raise "set_whitespace must be given a list of Regexp's"
|
42
|
+
end
|
43
|
+
@ws_regs << ws_reg
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def set_recognized(*rec)
|
48
|
+
@recognizables = []
|
49
|
+
rec.each do |r|
|
50
|
+
if r.class == Class and r.ancestors.include?(Token)
|
51
|
+
@recognizables = @recognizables + r.patterns
|
52
|
+
elsif r.is_a? String
|
53
|
+
@recognizables << {:literal => r,
|
54
|
+
:regexp => Regexp.new(Regexp.escape(r))}
|
55
|
+
else
|
56
|
+
raise "set_recognized must be given a list of Tokens classes and or strings"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def each_token(input)
|
62
|
+
if input.is_a?(IO)
|
63
|
+
@input = input.readlines.join("")
|
64
|
+
elsif input.is_a?(String)
|
65
|
+
@input = input
|
66
|
+
else
|
67
|
+
raise "Input must be a string or an IO object"
|
68
|
+
end
|
69
|
+
@stringScanner = StringScanner.new(@input)
|
70
|
+
while match = do_match do
|
71
|
+
if match[:matches]
|
72
|
+
@logger.debug("Yielding #{match}")
|
73
|
+
yield(match)
|
74
|
+
end
|
75
|
+
@stringScanner.pos += match[:length]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def do_match
|
82
|
+
if @stringScanner.pos >= @stringScanner.string.length
|
83
|
+
return nil;
|
84
|
+
end
|
85
|
+
|
86
|
+
token_match = match_token
|
87
|
+
whitespace_match = match_whitespace
|
88
|
+
|
89
|
+
if whitespace_match[:length] > token_match[:length]
|
90
|
+
return whitespace_match
|
91
|
+
elsif token_match[:length] > 0
|
92
|
+
return token_match
|
93
|
+
else
|
94
|
+
# So we've not been able to match tokens nor whitespace.
|
95
|
+
# We return the first character of the remaining input as a string
|
96
|
+
# literal
|
97
|
+
string = @stringScanner.string.slice(@stringScanner.pos, 1)
|
98
|
+
matches = [{:literal => string,
|
99
|
+
:regexp => Regexp.new(Regexp.escape(string))}]
|
100
|
+
return {:matches => matches,
|
101
|
+
:string => string,
|
102
|
+
:start_pos => @stringScanner.pos,
|
103
|
+
:length => 1}
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def match_token
|
108
|
+
matches = []
|
109
|
+
max_length = 0
|
110
|
+
@recognizables.each do |rec|
|
111
|
+
if (len = @stringScanner.match?(rec[:regexp])) and len > 0
|
112
|
+
if len > max_length
|
113
|
+
# Now, we have a match longer than whatever we had,
|
114
|
+
# so we discharge what we had, and save the new one
|
115
|
+
matches = [rec]
|
116
|
+
max_length = len
|
117
|
+
elsif len == max_length
|
118
|
+
# This regular expression matches a string of same length
|
119
|
+
# as our previous match, so we prepare to return both
|
120
|
+
matches << rec
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
start_pos = @stringScanner.pos
|
125
|
+
string = @stringScanner.string.slice(start_pos, max_length)
|
126
|
+
return {:matches => matches,
|
127
|
+
:string => string,
|
128
|
+
:start_pos => start_pos,
|
129
|
+
:length => max_length}
|
130
|
+
end
|
131
|
+
|
132
|
+
def match_whitespace
|
133
|
+
max_length = 0
|
134
|
+
for i in 0..@ws_regs.size - 1 do
|
135
|
+
len = @stringScanner.match?(@ws_regs[i]) || 0
|
136
|
+
if len > max_length
|
137
|
+
max_length = len
|
138
|
+
end
|
139
|
+
end
|
140
|
+
string = @stringScanner.string.slice(@stringScanner.pos, max_length)
|
141
|
+
result = {:string => string,
|
142
|
+
:start_pos => @stringScanner.pos,
|
143
|
+
:length => max_length}
|
144
|
+
return result
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
############################################
|
152
|
+
# Testing
|
153
|
+
#require 'Token'
|
154
|
+
#
|
155
|
+
#class Number < Token
|
156
|
+
# set_pattern(/\d+/)
|
157
|
+
#end
|
158
|
+
#
|
159
|
+
#scanner = KanoccScanner.new
|
160
|
+
#scanner.set_recognized(Number, "Exit")
|
161
|
+
#scanner.set_whitespace(/[ \t]/)
|
162
|
+
#
|
163
|
+
#scanner.eachTokenDo{|token| print token.inspect, "\n"}
|
164
|
+
|
165
|
+
|
data/lib/kanocc/token.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#
|
2
|
+
# Copyright 2008 Christian Surlykke
|
3
|
+
#
|
4
|
+
# This file is part of Kanocc.
|
5
|
+
#
|
6
|
+
# Kanocc is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License, version 3
|
8
|
+
# as published by the Free Software Foundation.
|
9
|
+
#
|
10
|
+
# Kanocc is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License, version 3 for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License,
|
16
|
+
# version 3 along with Kanocc. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
#
|
18
|
+
module Kanocc
|
19
|
+
class Token
|
20
|
+
attr_accessor :m
|
21
|
+
|
22
|
+
@@patterns = Hash.new
|
23
|
+
|
24
|
+
def ===(klass)
|
25
|
+
self.class == klass
|
26
|
+
end
|
27
|
+
|
28
|
+
def Token.pattern(reg, &block)
|
29
|
+
raise "pattern must be given a Regexp as it's first argument" unless reg.is_a?(Regexp)
|
30
|
+
@@patterns[self] = [] unless @@patterns[self]
|
31
|
+
if block_given?
|
32
|
+
method_name = ("pattern " + reg.inspect).to_sym
|
33
|
+
define_method(method_name, &block)
|
34
|
+
else
|
35
|
+
method_name = nil
|
36
|
+
end
|
37
|
+
@@patterns[self] << {:token => self,
|
38
|
+
:regexp => reg,
|
39
|
+
:method_name=>method_name}
|
40
|
+
end
|
41
|
+
|
42
|
+
def Token.patterns
|
43
|
+
return @@patterns[self] || []
|
44
|
+
end
|
45
|
+
|
46
|
+
def is_a_kanocc_token?
|
47
|
+
return true
|
48
|
+
end
|
49
|
+
|
50
|
+
def Token.is_a_kanocc_grammarsymbol?
|
51
|
+
return true
|
52
|
+
end
|
53
|
+
|
54
|
+
def inspect
|
55
|
+
self.class.name
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
data/lib/todo
ADDED
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: kanocc
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Christian Surlykke
|
8
|
+
autorequire: kanocc
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-05-19 00:00:00 +02:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: ""
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- README
|
26
|
+
- COPYING
|
27
|
+
- lib/kanocc
|
28
|
+
- lib/kanocc.rb
|
29
|
+
- lib/todo
|
30
|
+
- lib/kanocc/earley.rb
|
31
|
+
- lib/kanocc/scanner.rb
|
32
|
+
- lib/kanocc/grammar_rule.rb
|
33
|
+
- lib/kanocc/nonterminal.rb
|
34
|
+
- lib/kanocc/token.rb
|
35
|
+
- examples/calculator.rb
|
36
|
+
- examples/ruby_quiz_78.rb
|
37
|
+
has_rdoc: false
|
38
|
+
homepage: ""
|
39
|
+
post_install_message:
|
40
|
+
rdoc_options: []
|
41
|
+
|
42
|
+
require_paths:
|
43
|
+
- lib
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: "0"
|
49
|
+
version:
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: "0"
|
55
|
+
version:
|
56
|
+
requirements: []
|
57
|
+
|
58
|
+
rubyforge_project:
|
59
|
+
rubygems_version: 0.9.5
|
60
|
+
signing_key:
|
61
|
+
specification_version: 2
|
62
|
+
summary: Kanocc - Kanocc ain't no compiler-compiler. A framework for syntax directed translation
|
63
|
+
test_files: []
|
64
|
+
|