kanocc 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/COPYING +674 -0
- data/README +15 -0
- data/examples/calculator.rb +98 -0
- data/examples/ruby_quiz_78.rb +65 -0
- data/lib/kanocc.rb +303 -0
- data/lib/kanocc/earley.rb +322 -0
- data/lib/kanocc/grammar_rule.rb +50 -0
- data/lib/kanocc/nonterminal.rb +176 -0
- data/lib/kanocc/scanner.rb +165 -0
- data/lib/kanocc/token.rb +58 -0
- data/lib/todo +3 -0
- metadata +64 -0
@@ -0,0 +1,165 @@
|
|
1
|
+
#
|
2
|
+
# Copyright 2008 Christian Surlykke
|
3
|
+
#
|
4
|
+
# This file is part of Kanocc.
|
5
|
+
#require 'logger'
|
6
|
+
|
7
|
+
# Kanocc is free software: you can redistribute it and/or modify
|
8
|
+
# it under the terms of the GNU General Public License, version 3
|
9
|
+
# as published by the Free Software Foundation.
|
10
|
+
#
|
11
|
+
# Kanocc is distributed in the hope that it will be useful,
|
12
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
13
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
14
|
+
# GNU General Public License, version 3 for more details.
|
15
|
+
#
|
16
|
+
# You should have received a copy of the GNU General Public License,
|
17
|
+
# version 3 along with Kanocc. If not, see <http://www.gnu.org/licenses/>.
|
18
|
+
#
|
19
|
+
require 'stringio'
|
20
|
+
require 'strscan'
|
21
|
+
require "logger"
|
22
|
+
module Kanocc
|
23
|
+
class Scanner
|
24
|
+
attr_accessor :logger
|
25
|
+
def initialize(init = {})
|
26
|
+
if init[:logger]
|
27
|
+
@logger = init[:logger]
|
28
|
+
else
|
29
|
+
@logger = Logger.new(STDOUT)
|
30
|
+
@logger.level = Logger::WARN
|
31
|
+
end
|
32
|
+
@ws_regs = [/\s/]
|
33
|
+
@recognizables = []
|
34
|
+
@regexps = []
|
35
|
+
end
|
36
|
+
|
37
|
+
def set_whitespace(*ws_regs)
|
38
|
+
@ws_regs = []
|
39
|
+
ws_regs.each do |ws_reg|
|
40
|
+
unless ws_reg.is_a?(Regexp)
|
41
|
+
raise "set_whitespace must be given a list of Regexp's"
|
42
|
+
end
|
43
|
+
@ws_regs << ws_reg
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
def set_recognized(*rec)
|
48
|
+
@recognizables = []
|
49
|
+
rec.each do |r|
|
50
|
+
if r.class == Class and r.ancestors.include?(Token)
|
51
|
+
@recognizables = @recognizables + r.patterns
|
52
|
+
elsif r.is_a? String
|
53
|
+
@recognizables << {:literal => r,
|
54
|
+
:regexp => Regexp.new(Regexp.escape(r))}
|
55
|
+
else
|
56
|
+
raise "set_recognized must be given a list of Tokens classes and or strings"
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
def each_token(input)
|
62
|
+
if input.is_a?(IO)
|
63
|
+
@input = input.readlines.join("")
|
64
|
+
elsif input.is_a?(String)
|
65
|
+
@input = input
|
66
|
+
else
|
67
|
+
raise "Input must be a string or an IO object"
|
68
|
+
end
|
69
|
+
@stringScanner = StringScanner.new(@input)
|
70
|
+
while match = do_match do
|
71
|
+
if match[:matches]
|
72
|
+
@logger.debug("Yielding #{match}")
|
73
|
+
yield(match)
|
74
|
+
end
|
75
|
+
@stringScanner.pos += match[:length]
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
private
|
80
|
+
|
81
|
+
def do_match
|
82
|
+
if @stringScanner.pos >= @stringScanner.string.length
|
83
|
+
return nil;
|
84
|
+
end
|
85
|
+
|
86
|
+
token_match = match_token
|
87
|
+
whitespace_match = match_whitespace
|
88
|
+
|
89
|
+
if whitespace_match[:length] > token_match[:length]
|
90
|
+
return whitespace_match
|
91
|
+
elsif token_match[:length] > 0
|
92
|
+
return token_match
|
93
|
+
else
|
94
|
+
# So we've not been able to match tokens nor whitespace.
|
95
|
+
# We return the first character of the remaining input as a string
|
96
|
+
# literal
|
97
|
+
string = @stringScanner.string.slice(@stringScanner.pos, 1)
|
98
|
+
matches = [{:literal => string,
|
99
|
+
:regexp => Regexp.new(Regexp.escape(string))}]
|
100
|
+
return {:matches => matches,
|
101
|
+
:string => string,
|
102
|
+
:start_pos => @stringScanner.pos,
|
103
|
+
:length => 1}
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def match_token
|
108
|
+
matches = []
|
109
|
+
max_length = 0
|
110
|
+
@recognizables.each do |rec|
|
111
|
+
if (len = @stringScanner.match?(rec[:regexp])) and len > 0
|
112
|
+
if len > max_length
|
113
|
+
# Now, we have a match longer than whatever we had,
|
114
|
+
# so we discharge what we had, and save the new one
|
115
|
+
matches = [rec]
|
116
|
+
max_length = len
|
117
|
+
elsif len == max_length
|
118
|
+
# This regular expression matches a string of same length
|
119
|
+
# as our previous match, so we prepare to return both
|
120
|
+
matches << rec
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
start_pos = @stringScanner.pos
|
125
|
+
string = @stringScanner.string.slice(start_pos, max_length)
|
126
|
+
return {:matches => matches,
|
127
|
+
:string => string,
|
128
|
+
:start_pos => start_pos,
|
129
|
+
:length => max_length}
|
130
|
+
end
|
131
|
+
|
132
|
+
def match_whitespace
|
133
|
+
max_length = 0
|
134
|
+
for i in 0..@ws_regs.size - 1 do
|
135
|
+
len = @stringScanner.match?(@ws_regs[i]) || 0
|
136
|
+
if len > max_length
|
137
|
+
max_length = len
|
138
|
+
end
|
139
|
+
end
|
140
|
+
string = @stringScanner.string.slice(@stringScanner.pos, max_length)
|
141
|
+
result = {:string => string,
|
142
|
+
:start_pos => @stringScanner.pos,
|
143
|
+
:length => max_length}
|
144
|
+
return result
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
end
|
149
|
+
|
150
|
+
|
151
|
+
############################################
|
152
|
+
# Testing
|
153
|
+
#require 'Token'
|
154
|
+
#
|
155
|
+
#class Number < Token
|
156
|
+
# set_pattern(/\d+/)
|
157
|
+
#end
|
158
|
+
#
|
159
|
+
#scanner = KanoccScanner.new
|
160
|
+
#scanner.set_recognized(Number, "Exit")
|
161
|
+
#scanner.set_whitespace(/[ \t]/)
|
162
|
+
#
|
163
|
+
#scanner.eachTokenDo{|token| print token.inspect, "\n"}
|
164
|
+
|
165
|
+
|
data/lib/kanocc/token.rb
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#
|
2
|
+
# Copyright 2008 Christian Surlykke
|
3
|
+
#
|
4
|
+
# This file is part of Kanocc.
|
5
|
+
#
|
6
|
+
# Kanocc is free software: you can redistribute it and/or modify
|
7
|
+
# it under the terms of the GNU General Public License, version 3
|
8
|
+
# as published by the Free Software Foundation.
|
9
|
+
#
|
10
|
+
# Kanocc is distributed in the hope that it will be useful,
|
11
|
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
12
|
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
13
|
+
# GNU General Public License, version 3 for more details.
|
14
|
+
#
|
15
|
+
# You should have received a copy of the GNU General Public License,
|
16
|
+
# version 3 along with Kanocc. If not, see <http://www.gnu.org/licenses/>.
|
17
|
+
#
|
18
|
+
module Kanocc
|
19
|
+
class Token
|
20
|
+
attr_accessor :m
|
21
|
+
|
22
|
+
@@patterns = Hash.new
|
23
|
+
|
24
|
+
def ===(klass)
|
25
|
+
self.class == klass
|
26
|
+
end
|
27
|
+
|
28
|
+
def Token.pattern(reg, &block)
|
29
|
+
raise "pattern must be given a Regexp as it's first argument" unless reg.is_a?(Regexp)
|
30
|
+
@@patterns[self] = [] unless @@patterns[self]
|
31
|
+
if block_given?
|
32
|
+
method_name = ("pattern " + reg.inspect).to_sym
|
33
|
+
define_method(method_name, &block)
|
34
|
+
else
|
35
|
+
method_name = nil
|
36
|
+
end
|
37
|
+
@@patterns[self] << {:token => self,
|
38
|
+
:regexp => reg,
|
39
|
+
:method_name=>method_name}
|
40
|
+
end
|
41
|
+
|
42
|
+
def Token.patterns
|
43
|
+
return @@patterns[self] || []
|
44
|
+
end
|
45
|
+
|
46
|
+
def is_a_kanocc_token?
|
47
|
+
return true
|
48
|
+
end
|
49
|
+
|
50
|
+
def Token.is_a_kanocc_grammarsymbol?
|
51
|
+
return true
|
52
|
+
end
|
53
|
+
|
54
|
+
def inspect
|
55
|
+
self.class.name
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
data/lib/todo
ADDED
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: kanocc
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Christian Surlykke
|
8
|
+
autorequire: kanocc
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-05-19 00:00:00 +02:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: ""
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- README
|
26
|
+
- COPYING
|
27
|
+
- lib/kanocc
|
28
|
+
- lib/kanocc.rb
|
29
|
+
- lib/todo
|
30
|
+
- lib/kanocc/earley.rb
|
31
|
+
- lib/kanocc/scanner.rb
|
32
|
+
- lib/kanocc/grammar_rule.rb
|
33
|
+
- lib/kanocc/nonterminal.rb
|
34
|
+
- lib/kanocc/token.rb
|
35
|
+
- examples/calculator.rb
|
36
|
+
- examples/ruby_quiz_78.rb
|
37
|
+
has_rdoc: false
|
38
|
+
homepage: ""
|
39
|
+
post_install_message:
|
40
|
+
rdoc_options: []
|
41
|
+
|
42
|
+
require_paths:
|
43
|
+
- lib
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: "0"
|
49
|
+
version:
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: "0"
|
55
|
+
version:
|
56
|
+
requirements: []
|
57
|
+
|
58
|
+
rubyforge_project:
|
59
|
+
rubygems_version: 0.9.5
|
60
|
+
signing_key:
|
61
|
+
specification_version: 2
|
62
|
+
summary: Kanocc - Kanocc ain't no compiler-compiler. A framework for syntax directed translation
|
63
|
+
test_files: []
|
64
|
+
|