kanocc 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,165 @@
1
+ #
2
+ # Copyright 2008 Christian Surlykke
3
+ #
4
+ # This file is part of Kanocc.
5
+ #require 'logger'
6
+
7
+ # Kanocc is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License, version 3
9
+ # as published by the Free Software Foundation.
10
+ #
11
+ # Kanocc is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License, version 3 for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License,
17
+ # version 3 along with Kanocc. If not, see <http://www.gnu.org/licenses/>.
18
+ #
19
+ require 'stringio'
20
+ require 'strscan'
21
+ require "logger"
22
+ module Kanocc
23
+ class Scanner
24
+ attr_accessor :logger
25
+ def initialize(init = {})
26
+ if init[:logger]
27
+ @logger = init[:logger]
28
+ else
29
+ @logger = Logger.new(STDOUT)
30
+ @logger.level = Logger::WARN
31
+ end
32
+ @ws_regs = [/\s/]
33
+ @recognizables = []
34
+ @regexps = []
35
+ end
36
+
37
+ def set_whitespace(*ws_regs)
38
+ @ws_regs = []
39
+ ws_regs.each do |ws_reg|
40
+ unless ws_reg.is_a?(Regexp)
41
+ raise "set_whitespace must be given a list of Regexp's"
42
+ end
43
+ @ws_regs << ws_reg
44
+ end
45
+ end
46
+
47
+ def set_recognized(*rec)
48
+ @recognizables = []
49
+ rec.each do |r|
50
+ if r.class == Class and r.ancestors.include?(Token)
51
+ @recognizables = @recognizables + r.patterns
52
+ elsif r.is_a? String
53
+ @recognizables << {:literal => r,
54
+ :regexp => Regexp.new(Regexp.escape(r))}
55
+ else
56
+ raise "set_recognized must be given a list of Tokens classes and or strings"
57
+ end
58
+ end
59
+ end
60
+
61
+ def each_token(input)
62
+ if input.is_a?(IO)
63
+ @input = input.readlines.join("")
64
+ elsif input.is_a?(String)
65
+ @input = input
66
+ else
67
+ raise "Input must be a string or an IO object"
68
+ end
69
+ @stringScanner = StringScanner.new(@input)
70
+ while match = do_match do
71
+ if match[:matches]
72
+ @logger.debug("Yielding #{match}")
73
+ yield(match)
74
+ end
75
+ @stringScanner.pos += match[:length]
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def do_match
82
+ if @stringScanner.pos >= @stringScanner.string.length
83
+ return nil;
84
+ end
85
+
86
+ token_match = match_token
87
+ whitespace_match = match_whitespace
88
+
89
+ if whitespace_match[:length] > token_match[:length]
90
+ return whitespace_match
91
+ elsif token_match[:length] > 0
92
+ return token_match
93
+ else
94
+ # So we've not been able to match tokens nor whitespace.
95
+ # We return the first character of the remaining input as a string
96
+ # literal
97
+ string = @stringScanner.string.slice(@stringScanner.pos, 1)
98
+ matches = [{:literal => string,
99
+ :regexp => Regexp.new(Regexp.escape(string))}]
100
+ return {:matches => matches,
101
+ :string => string,
102
+ :start_pos => @stringScanner.pos,
103
+ :length => 1}
104
+ end
105
+ end
106
+
107
+ def match_token
108
+ matches = []
109
+ max_length = 0
110
+ @recognizables.each do |rec|
111
+ if (len = @stringScanner.match?(rec[:regexp])) and len > 0
112
+ if len > max_length
113
+ # Now, we have a match longer than whatever we had,
114
+ # so we discharge what we had, and save the new one
115
+ matches = [rec]
116
+ max_length = len
117
+ elsif len == max_length
118
+ # This regular expression matches a string of same length
119
+ # as our previous match, so we prepare to return both
120
+ matches << rec
121
+ end
122
+ end
123
+ end
124
+ start_pos = @stringScanner.pos
125
+ string = @stringScanner.string.slice(start_pos, max_length)
126
+ return {:matches => matches,
127
+ :string => string,
128
+ :start_pos => start_pos,
129
+ :length => max_length}
130
+ end
131
+
132
+ def match_whitespace
133
+ max_length = 0
134
+ for i in 0..@ws_regs.size - 1 do
135
+ len = @stringScanner.match?(@ws_regs[i]) || 0
136
+ if len > max_length
137
+ max_length = len
138
+ end
139
+ end
140
+ string = @stringScanner.string.slice(@stringScanner.pos, max_length)
141
+ result = {:string => string,
142
+ :start_pos => @stringScanner.pos,
143
+ :length => max_length}
144
+ return result
145
+ end
146
+ end
147
+
148
+ end
149
+
150
+
151
+ ############################################
152
+ # Testing
153
+ #require 'Token'
154
+ #
155
+ #class Number < Token
156
+ # set_pattern(/\d+/)
157
+ #end
158
+ #
159
+ #scanner = KanoccScanner.new
160
+ #scanner.set_recognized(Number, "Exit")
161
+ #scanner.set_whitespace(/[ \t]/)
162
+ #
163
+ #scanner.eachTokenDo{|token| print token.inspect, "\n"}
164
+
165
+
@@ -0,0 +1,58 @@
1
+ #
2
+ # Copyright 2008 Christian Surlykke
3
+ #
4
+ # This file is part of Kanocc.
5
+ #
6
+ # Kanocc is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License, version 3
8
+ # as published by the Free Software Foundation.
9
+ #
10
+ # Kanocc is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License, version 3 for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License,
16
+ # version 3 along with Kanocc. If not, see <http://www.gnu.org/licenses/>.
17
+ #
18
+ module Kanocc
19
+ class Token
20
+ attr_accessor :m
21
+
22
+ @@patterns = Hash.new
23
+
24
+ def ===(klass)
25
+ self.class == klass
26
+ end
27
+
28
+ def Token.pattern(reg, &block)
29
+ raise "pattern must be given a Regexp as it's first argument" unless reg.is_a?(Regexp)
30
+ @@patterns[self] = [] unless @@patterns[self]
31
+ if block_given?
32
+ method_name = ("pattern " + reg.inspect).to_sym
33
+ define_method(method_name, &block)
34
+ else
35
+ method_name = nil
36
+ end
37
+ @@patterns[self] << {:token => self,
38
+ :regexp => reg,
39
+ :method_name=>method_name}
40
+ end
41
+
42
+ def Token.patterns
43
+ return @@patterns[self] || []
44
+ end
45
+
46
+ def is_a_kanocc_token?
47
+ return true
48
+ end
49
+
50
+ def Token.is_a_kanocc_grammarsymbol?
51
+ return true
52
+ end
53
+
54
+ def inspect
55
+ self.class.name
56
+ end
57
+ end
58
+ end
data/lib/todo ADDED
@@ -0,0 +1,3 @@
1
+ Better handling of blocks
2
+ LR Parsers
3
+ Scanner.eachToken method
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kanocc
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Christian Surlykke
8
+ autorequire: kanocc
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-05-19 00:00:00 +02:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: ""
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - README
26
+ - COPYING
27
+ - lib/kanocc
28
+ - lib/kanocc.rb
29
+ - lib/todo
30
+ - lib/kanocc/earley.rb
31
+ - lib/kanocc/scanner.rb
32
+ - lib/kanocc/grammar_rule.rb
33
+ - lib/kanocc/nonterminal.rb
34
+ - lib/kanocc/token.rb
35
+ - examples/calculator.rb
36
+ - examples/ruby_quiz_78.rb
37
+ has_rdoc: false
38
+ homepage: ""
39
+ post_install_message:
40
+ rdoc_options: []
41
+
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: "0"
55
+ version:
56
+ requirements: []
57
+
58
+ rubyforge_project:
59
+ rubygems_version: 0.9.5
60
+ signing_key:
61
+ specification_version: 2
62
+ summary: Kanocc - Kanocc ain't no compiler-compiler. A framework for syntax directed translation
63
+ test_files: []
64
+