kanocc 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,165 @@
1
+ #
2
+ # Copyright 2008 Christian Surlykke
3
+ #
4
+ # This file is part of Kanocc.
5
+ #require 'logger'
6
+
7
+ # Kanocc is free software: you can redistribute it and/or modify
8
+ # it under the terms of the GNU General Public License, version 3
9
+ # as published by the Free Software Foundation.
10
+ #
11
+ # Kanocc is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14
+ # GNU General Public License, version 3 for more details.
15
+ #
16
+ # You should have received a copy of the GNU General Public License,
17
+ # version 3 along with Kanocc. If not, see <http://www.gnu.org/licenses/>.
18
+ #
19
+ require 'stringio'
20
+ require 'strscan'
21
+ require "logger"
22
+ module Kanocc
23
+ class Scanner
24
+ attr_accessor :logger
25
+ def initialize(init = {})
26
+ if init[:logger]
27
+ @logger = init[:logger]
28
+ else
29
+ @logger = Logger.new(STDOUT)
30
+ @logger.level = Logger::WARN
31
+ end
32
+ @ws_regs = [/\s/]
33
+ @recognizables = []
34
+ @regexps = []
35
+ end
36
+
37
+ def set_whitespace(*ws_regs)
38
+ @ws_regs = []
39
+ ws_regs.each do |ws_reg|
40
+ unless ws_reg.is_a?(Regexp)
41
+ raise "set_whitespace must be given a list of Regexp's"
42
+ end
43
+ @ws_regs << ws_reg
44
+ end
45
+ end
46
+
47
+ def set_recognized(*rec)
48
+ @recognizables = []
49
+ rec.each do |r|
50
+ if r.class == Class and r.ancestors.include?(Token)
51
+ @recognizables = @recognizables + r.patterns
52
+ elsif r.is_a? String
53
+ @recognizables << {:literal => r,
54
+ :regexp => Regexp.new(Regexp.escape(r))}
55
+ else
56
+ raise "set_recognized must be given a list of Tokens classes and or strings"
57
+ end
58
+ end
59
+ end
60
+
61
+ def each_token(input)
62
+ if input.is_a?(IO)
63
+ @input = input.readlines.join("")
64
+ elsif input.is_a?(String)
65
+ @input = input
66
+ else
67
+ raise "Input must be a string or an IO object"
68
+ end
69
+ @stringScanner = StringScanner.new(@input)
70
+ while match = do_match do
71
+ if match[:matches]
72
+ @logger.debug("Yielding #{match}")
73
+ yield(match)
74
+ end
75
+ @stringScanner.pos += match[:length]
76
+ end
77
+ end
78
+
79
+ private
80
+
81
+ def do_match
82
+ if @stringScanner.pos >= @stringScanner.string.length
83
+ return nil;
84
+ end
85
+
86
+ token_match = match_token
87
+ whitespace_match = match_whitespace
88
+
89
+ if whitespace_match[:length] > token_match[:length]
90
+ return whitespace_match
91
+ elsif token_match[:length] > 0
92
+ return token_match
93
+ else
94
+ # So we've not been able to match tokens nor whitespace.
95
+ # We return the first character of the remaining input as a string
96
+ # literal
97
+ string = @stringScanner.string.slice(@stringScanner.pos, 1)
98
+ matches = [{:literal => string,
99
+ :regexp => Regexp.new(Regexp.escape(string))}]
100
+ return {:matches => matches,
101
+ :string => string,
102
+ :start_pos => @stringScanner.pos,
103
+ :length => 1}
104
+ end
105
+ end
106
+
107
+ def match_token
108
+ matches = []
109
+ max_length = 0
110
+ @recognizables.each do |rec|
111
+ if (len = @stringScanner.match?(rec[:regexp])) and len > 0
112
+ if len > max_length
113
+ # Now, we have a match longer than whatever we had,
114
+ # so we discharge what we had, and save the new one
115
+ matches = [rec]
116
+ max_length = len
117
+ elsif len == max_length
118
+ # This regular expression matches a string of same length
119
+ # as our previous match, so we prepare to return both
120
+ matches << rec
121
+ end
122
+ end
123
+ end
124
+ start_pos = @stringScanner.pos
125
+ string = @stringScanner.string.slice(start_pos, max_length)
126
+ return {:matches => matches,
127
+ :string => string,
128
+ :start_pos => start_pos,
129
+ :length => max_length}
130
+ end
131
+
132
+ def match_whitespace
133
+ max_length = 0
134
+ for i in 0..@ws_regs.size - 1 do
135
+ len = @stringScanner.match?(@ws_regs[i]) || 0
136
+ if len > max_length
137
+ max_length = len
138
+ end
139
+ end
140
+ string = @stringScanner.string.slice(@stringScanner.pos, max_length)
141
+ result = {:string => string,
142
+ :start_pos => @stringScanner.pos,
143
+ :length => max_length}
144
+ return result
145
+ end
146
+ end
147
+
148
+ end
149
+
150
+
151
+ ############################################
152
+ # Testing
153
+ #require 'Token'
154
+ #
155
+ #class Number < Token
156
+ # set_pattern(/\d+/)
157
+ #end
158
+ #
159
+ #scanner = KanoccScanner.new
160
+ #scanner.set_recognized(Number, "Exit")
161
+ #scanner.set_whitespace(/[ \t]/)
162
+ #
163
+ #scanner.eachTokenDo{|token| print token.inspect, "\n"}
164
+
165
+
@@ -0,0 +1,58 @@
1
+ #
2
+ # Copyright 2008 Christian Surlykke
3
+ #
4
+ # This file is part of Kanocc.
5
+ #
6
+ # Kanocc is free software: you can redistribute it and/or modify
7
+ # it under the terms of the GNU General Public License, version 3
8
+ # as published by the Free Software Foundation.
9
+ #
10
+ # Kanocc is distributed in the hope that it will be useful,
11
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
12
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13
+ # GNU General Public License, version 3 for more details.
14
+ #
15
+ # You should have received a copy of the GNU General Public License,
16
+ # version 3 along with Kanocc. If not, see <http://www.gnu.org/licenses/>.
17
+ #
18
+ module Kanocc
19
+ class Token
20
+ attr_accessor :m
21
+
22
+ @@patterns = Hash.new
23
+
24
+ def ===(klass)
25
+ self.class == klass
26
+ end
27
+
28
+ def Token.pattern(reg, &block)
29
+ raise "pattern must be given a Regexp as it's first argument" unless reg.is_a?(Regexp)
30
+ @@patterns[self] = [] unless @@patterns[self]
31
+ if block_given?
32
+ method_name = ("pattern " + reg.inspect).to_sym
33
+ define_method(method_name, &block)
34
+ else
35
+ method_name = nil
36
+ end
37
+ @@patterns[self] << {:token => self,
38
+ :regexp => reg,
39
+ :method_name=>method_name}
40
+ end
41
+
42
+ def Token.patterns
43
+ return @@patterns[self] || []
44
+ end
45
+
46
+ def is_a_kanocc_token?
47
+ return true
48
+ end
49
+
50
+ def Token.is_a_kanocc_grammarsymbol?
51
+ return true
52
+ end
53
+
54
+ def inspect
55
+ self.class.name
56
+ end
57
+ end
58
+ end
data/lib/todo ADDED
@@ -0,0 +1,3 @@
1
+ Better handling of blocks
2
+ LR Parsers
3
+ Scanner.eachToken method
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: kanocc
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Christian Surlykke
8
+ autorequire: kanocc
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-05-19 00:00:00 +02:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description:
17
+ email: ""
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - README
26
+ - COPYING
27
+ - lib/kanocc
28
+ - lib/kanocc.rb
29
+ - lib/todo
30
+ - lib/kanocc/earley.rb
31
+ - lib/kanocc/scanner.rb
32
+ - lib/kanocc/grammar_rule.rb
33
+ - lib/kanocc/nonterminal.rb
34
+ - lib/kanocc/token.rb
35
+ - examples/calculator.rb
36
+ - examples/ruby_quiz_78.rb
37
+ has_rdoc: false
38
+ homepage: ""
39
+ post_install_message:
40
+ rdoc_options: []
41
+
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: "0"
55
+ version:
56
+ requirements: []
57
+
58
+ rubyforge_project:
59
+ rubygems_version: 0.9.5
60
+ signing_key:
61
+ specification_version: 2
62
+ summary: Kanocc - Kanocc ain't no compiler-compiler. A framework for syntax directed translation
63
+ test_files: []
64
+