oedipus_lex 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,72 @@
1
+ # -*- ruby -*-
2
+
3
+ require "rubygems"
4
+ require "hoe"
5
+
6
+ Hoe.plugin :debugging
7
+ Hoe.plugin :git
8
+ Hoe.plugin :isolate
9
+ Hoe.plugin :seattlerb
10
+
11
+ Hoe.spec "oedipus_lex" do
12
+ developer "Ryan Davis", "ryand-ruby@zenspider.com"
13
+ license "MIT"
14
+
15
+ self.readme_file = "README.rdoc"
16
+ self.history_file = "History.rdoc"
17
+ end
18
+
19
+ task :bootstrap do
20
+ ruby "-Ilib lib/oedipus_lex.rb lib/oedipus_lex.rex > lib/oedipus_lex.rex.rb.new"
21
+ system "diff -uw lib/oedipus_lex.rex.rb lib/oedipus_lex.rex.rb.new"
22
+ sh "mv lib/oedipus_lex.rex.rb.new lib/oedipus_lex.rex.rb"
23
+ ruby "-S rake"
24
+ end
25
+
26
+ $: << "lib"
27
+ Rake.application.rake_require "oedipus_lex"
28
+ $rex_option[:stub] = true
29
+
30
+ task :demo => Dir["sample/*.rex"].map { |s| "#{s}.rb" }.sort
31
+
32
+ task :demo => :isolate do
33
+ Dir.chdir "sample" do
34
+ ruby "sample.rex.rb sample.html"
35
+ ruby "sample.rex.rb sample.xhtml"
36
+
37
+ ruby "sample1.rex.rb sample1.c"
38
+
39
+ ruby "sample2.rex.rb sample2.bas"
40
+
41
+ ruby "xhtmlparser.rex.rb xhtmlparser.html"
42
+ ruby "xhtmlparser.rex.rb xhtmlparser.xhtml"
43
+
44
+ cmd = "#{Gem.ruby} error1.rex.rb error1.txt"
45
+ warn cmd
46
+ system cmd
47
+
48
+ cmd = "#{Gem.ruby} error2.rex.rb error1.txt"
49
+ warn cmd
50
+ system cmd
51
+ end
52
+ end
53
+
54
+ task :raccdemo => :isolate do
55
+ $rex_option[:stub] = false
56
+ $rex_option[:do_parse] = false
57
+
58
+ rm_f "sample/calc3.rex.rb"
59
+ t = Rake.application["sample/calc3.rex.rb"]
60
+ t.reenable
61
+ t.invoke
62
+
63
+ ruby "-S racc sample/calc3.racc"
64
+
65
+ sh "echo 1 + 2 + 3 | #{Gem.ruby} -Isample sample/calc3.tab.rb"
66
+ end
67
+
68
+ task :clean do
69
+ rm Dir["sample/*.rb"]
70
+ end
71
+
72
+ # vim: syntax=ruby
@@ -0,0 +1,16 @@
1
+ # -*- ruby -*-
2
+
3
+ $: << "lib"
4
+ require "oedipus_lex"
5
+
6
+ $rex_option = {}
7
+
8
+ rule ".rex.rb" => proc {|path| path.sub(/\.rb$/, "") } do |t|
9
+ warn "Generating #{t.name} from #{t.source}"
10
+ rex = OedipusLex.new $rex_option
11
+ rex.parse_file t.source
12
+
13
+ File.open t.name, "w" do |f|
14
+ f.write rex.generate
15
+ end
16
+ end
@@ -0,0 +1,274 @@
1
+ require "stringio"
2
+ require 'strscan'
3
+ require "erb"
4
+ require "oedipus_lex.rex"
5
+
6
+ class OedipusLex
7
+ VERSION = "2.0.0"
8
+
9
+ attr_accessor :class_name
10
+ attr_accessor :header
11
+ attr_accessor :ends
12
+ attr_accessor :inners
13
+ attr_accessor :macros
14
+ attr_accessor :option
15
+ attr_accessor :rules
16
+ attr_accessor :starts
17
+
18
+ DEFAULTS = {
19
+ :debug => false,
20
+ :do_parse => true,
21
+ :stub => false,
22
+ }
23
+
24
+ def initialize opts = {}
25
+ self.option = DEFAULTS.merge opts
26
+ self.class_name = nil
27
+
28
+ self.header = []
29
+ self.ends = []
30
+ self.inners = []
31
+ self.macros = []
32
+ self.rules = []
33
+ self.starts = []
34
+ end
35
+
36
+ def lex_class prefix, name
37
+ header.concat prefix.split(/\n/)
38
+ self.class_name = name
39
+ end
40
+
41
+ def lex_comment line
42
+ # do nothing
43
+ end
44
+
45
+ def lex_end line
46
+ ends << line
47
+ end
48
+
49
+ def lex_inner line
50
+ inners << line
51
+ end
52
+
53
+ def lex_start line
54
+ starts << line.strip
55
+ end
56
+
57
+ def lex_macro name, value
58
+ macros << [name, value]
59
+ end
60
+
61
+ def lex_option option
62
+ self.option[option.to_sym] = true
63
+ end
64
+
65
+ def lex_rule start_state, regexp, action = nil
66
+ rules << [start_state, regexp, action]
67
+ end
68
+
69
+ def lex_rule2(*vals)
70
+ raise vals.inspect
71
+ end
72
+
73
+ def lex_state new_state
74
+ # do nothing -- lexer switches state for us
75
+ end
76
+
77
+ def generate
78
+ states = rules.map(&:first).compact.uniq
79
+ exclusives, inclusives = states.partition { |s| s =~ /^:[A-Z]/ }
80
+
81
+ # NOTE: doubling up assignment to remove unused var warnings in
82
+ # ERB binding.
83
+
84
+ all_states =
85
+ all_states = [[nil, # non-state # eg [[nil,
86
+ *inclusives], # incls # :a, :b],
87
+ *exclusives.map { |s| [s] }] # [excls] # [:A], [:B]]
88
+
89
+ ERB.new(TEMPLATE, nil, "%").result binding
90
+ end
91
+
92
+ TEMPLATE = <<-'REX'.gsub(/^ {6}/, '\1')
93
+ #--
94
+ # This file is automatically generated. Do not modify it.
95
+ # Generated by: oedipus_lex version <%= VERSION %>.
96
+ % if filename then
97
+ # Source: <%= filename %>
98
+ % end
99
+ #++
100
+
101
+ % unless header.empty? then
102
+ % header.each do |s|
103
+ <%= s %>
104
+ % end
105
+
106
+ % end
107
+ class <%= class_name %>
108
+ require 'strscan'
109
+
110
+ % unless macros.empty? then
111
+ % max = macros.map { |(k,_)| k.size }.max
112
+ % macros.each do |(k,v)|
113
+ <%= "%-#{max}s = %s" % [k, v] %>
114
+ % end
115
+
116
+ % end
117
+ class ScanError < StandardError ; end
118
+
119
+ attr_accessor :lineno
120
+ attr_accessor :filename
121
+ attr_accessor :ss
122
+ attr_accessor :state
123
+
124
+ alias :match :ss
125
+
126
+ def matches
127
+ m = (1..9).map { |i| ss[i] }
128
+ m.pop until m[-1] or m.empty?
129
+ m
130
+ end
131
+
132
+ def action
133
+ yield
134
+ end
135
+
136
+ % if option[:do_parse] then
137
+ def do_parse
138
+ while token = next_token do
139
+ type, *vals = token
140
+
141
+ send "lex_#{type}", *vals
142
+ end
143
+ end
144
+
145
+ % end
146
+ def scanner_class
147
+ StringScanner
148
+ end unless instance_methods(false).map(&:to_s).include?("scanner_class")
149
+
150
+ def parse str
151
+ self.ss = scanner_class.new str
152
+ self.lineno = 1
153
+ self.state ||= nil
154
+
155
+ do_parse
156
+ end
157
+
158
+ def parse_file path
159
+ self.filename = path
160
+ open path do |f|
161
+ parse f.read
162
+ end
163
+ end
164
+
165
+ def next_token
166
+ % starts.each do |s|
167
+ <%= s %>
168
+ % end
169
+ self.lineno += 1 if ss.peek(1) == "\n"
170
+
171
+ token = nil
172
+
173
+ until ss.eos? or token do
174
+ token =
175
+ case state
176
+ % all_states.each do |the_states|
177
+ % exclusive = the_states.first != nil
178
+ % all_states, predicates = the_states.partition { |s| s.nil? or s.start_with? ":" }
179
+ % filtered_states = the_states.select { |s| s.nil? or s.start_with? ":" }
180
+ when <%= all_states.map { |s| s || "nil" }.join ", " %> then
181
+ case
182
+ % all_states.each do |state|
183
+ % rules.each do |rule|
184
+ % start_state, rule_expr, rule_action = *rule
185
+ % if start_state == state or (state.nil? and predicates.include? start_state) then
186
+ % if start_state and not exclusive then
187
+ % if start_state =~ /^:/ then
188
+ when (state == <%= start_state %>) && (text = ss.scan(<%= rule_expr %>)) then
189
+ % else
190
+ when <%= start_state %> && (text = ss.scan(<%= rule_expr %>)) then
191
+ % end
192
+ % else
193
+ when text = ss.scan(<%= rule_expr %>) then
194
+ % end
195
+ % if rule_action then
196
+ % case rule_action
197
+ % when /^\{/ then
198
+ action <%= rule_action %>
199
+ % when /^:/, "nil" then
200
+ [:state, <%= rule_action %>]
201
+ % else
202
+ <%= rule_action %> text
203
+ % end
204
+ % else
205
+ # do nothing
206
+ % end
207
+ % end # start_state == state
208
+ % end # rules.each
209
+ % end # the_states.each
210
+ else
211
+ text = ss.string[ss.pos .. -1]
212
+ raise ScanError, "can not match (#{state.inspect}): '#{text}'"
213
+ end
214
+ % end # all_states
215
+ else
216
+ raise ScanError, "undefined state: '#{state}'"
217
+ end # token = case state
218
+
219
+ next unless token # allow functions to trigger redo w/ nil
220
+ end # while
221
+
222
+ raise "bad lexical result: #{token.inspect}" unless
223
+ token.nil? || (Array === token && token.size >= 2)
224
+
225
+ # auto-switch state
226
+ self.state = token.last if token && token.first == :state
227
+
228
+ % if option[:debug] then
229
+ p [state, token]
230
+ % end
231
+ token
232
+ end # def _next_token
233
+ % inners.each do |s|
234
+ <%= s %>
235
+ % end
236
+ end # class
237
+ % unless ends.empty? then
238
+
239
+ % ends.each do |s|
240
+ <%= s %>
241
+ % end
242
+ % end
243
+ % if option[:stub] then
244
+
245
+ if __FILE__ == $0
246
+ ARGV.each do |path|
247
+ rex = <%= class_name %>.new
248
+
249
+ def rex.do_parse
250
+ while token = self.next_token
251
+ p token
252
+ end
253
+ end
254
+
255
+ begin
256
+ rex.parse_file path
257
+ rescue
258
+ $stderr.printf "%s:%d:%s\n", rex.filename, rex.lineno, $!.message
259
+ exit 1
260
+ end
261
+ end
262
+ end
263
+ % end
264
+ REX
265
+ end
266
+
267
+ if $0 == __FILE__ then
268
+ ARGV.each do |path|
269
+ rex = OedipusLex.new
270
+
271
+ rex.parse_file path
272
+ puts rex.generate
273
+ end
274
+ end
@@ -0,0 +1,51 @@
1
+ # [Header Part]
2
+ # "class" Foo
3
+ # ["option"
4
+ # [options] ]
5
+ # ["inner"
6
+ # [methods] ]
7
+ # ["macro"
8
+ # [macro-name /pattern/[flags]] ]
9
+ # "rule"
10
+ # [:state | method_name] /pattern/[flags] [{ code } | method_name | :state]
11
+ # "end"
12
+ # [Footer Part]
13
+
14
+ class OedipusLex
15
+
16
+ macro
17
+ ST /(?:(:\S+|\w+\??))/
18
+ RE /(\/(?:\\.|[^\/])+\/[ion]?)/
19
+ ACT /(\{.*|:?\w+)/
20
+
21
+ rule
22
+ # [state] /pattern/[flags] [actions]
23
+ # nil state applies to all states, so we use this to switch lexing modes
24
+
25
+ /options?.*/ :option
26
+ /inner.*/ :inner
27
+ /macros?.*/ :macro
28
+ /rules?.*/ :rule
29
+ /start.*/ :start
30
+ /end/ :END
31
+
32
+ /\A((?:.|\n)*)class ([\w:]+.*)/ { [:class, *matches] }
33
+
34
+ /\n+/ # do nothing
35
+ /\s*(\#.*)/ { [:comment, text] }
36
+
37
+ :option /\s+/ # do nothing
38
+ :option /stub/i { [:option, text] }
39
+ :option /debug/i { [:option, text] }
40
+
41
+ :inner /.*/ { [:inner, text] }
42
+
43
+ :start /.*/ { [:start, text] }
44
+
45
+ :macro /\s+(\w+)\s+#{RE}/o { [:macro, *matches] }
46
+
47
+ :rule /\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o { [:rule, *matches] }
48
+
49
+ :END /\n+/ # do nothing
50
+ :END /.*/ { [:end, text] }
51
+ end
@@ -0,0 +1,144 @@
1
+ #--
2
+ # This file is automatically generated. Do not modify it.
3
+ # Generated by: oedipus_lex version 2.0.0.
4
+ # Source: lib/oedipus_lex.rex
5
+ #++
6
+
7
+ # [Header Part]
8
+ # "class" Foo
9
+ # ["option"
10
+ # [options] ]
11
+ # ["inner"
12
+ # [methods] ]
13
+ # ["macro"
14
+ # [macro-name /pattern/[flags]] ]
15
+ # "rule"
16
+ # [:state | method_name] /pattern/[flags] [{ code } | method_name | :state]
17
+ # "end"
18
+ # [Footer Part]
19
+
20
+ class OedipusLex
21
+ require 'strscan'
22
+
23
+ ST = /(?:(:\S+|\w+\??))/
24
+ RE = /(\/(?:\\.|[^\/])+\/[ion]?)/
25
+ ACT = /(\{.*|:?\w+)/
26
+
27
+ class ScanError < StandardError ; end
28
+
29
+ attr_accessor :lineno
30
+ attr_accessor :filename
31
+ attr_accessor :ss
32
+ attr_accessor :state
33
+
34
+ alias :match :ss
35
+
36
+ def matches
37
+ m = (1..9).map { |i| ss[i] }
38
+ m.pop until m[-1] or m.empty?
39
+ m
40
+ end
41
+
42
+ def action
43
+ yield
44
+ end
45
+
46
+ def do_parse
47
+ while token = next_token do
48
+ type, *vals = token
49
+
50
+ send "lex_#{type}", *vals
51
+ end
52
+ end
53
+
54
+ def scanner_class
55
+ StringScanner
56
+ end unless instance_methods(false).map(&:to_s).include?("scanner_class")
57
+
58
+ def parse str
59
+ self.ss = scanner_class.new str
60
+ self.lineno = 1
61
+ self.state ||= nil
62
+
63
+ do_parse
64
+ end
65
+
66
+ def parse_file path
67
+ self.filename = path
68
+ open path do |f|
69
+ parse f.read
70
+ end
71
+ end
72
+
73
+ def next_token
74
+ self.lineno += 1 if ss.peek(1) == "\n"
75
+
76
+ token = nil
77
+
78
+ until ss.eos? or token do
79
+ token =
80
+ case state
81
+ when nil, :option, :inner, :start, :macro, :rule then
82
+ case
83
+ when text = ss.scan(/options?.*/) then
84
+ [:state, :option]
85
+ when text = ss.scan(/inner.*/) then
86
+ [:state, :inner]
87
+ when text = ss.scan(/macros?.*/) then
88
+ [:state, :macro]
89
+ when text = ss.scan(/rules?.*/) then
90
+ [:state, :rule]
91
+ when text = ss.scan(/start.*/) then
92
+ [:state, :start]
93
+ when text = ss.scan(/end/) then
94
+ [:state, :END]
95
+ when text = ss.scan(/\A((?:.|\n)*)class ([\w:]+.*)/) then
96
+ action { [:class, *matches] }
97
+ when text = ss.scan(/\n+/) then
98
+ # do nothing
99
+ when text = ss.scan(/\s*(\#.*)/) then
100
+ action { [:comment, text] }
101
+ when (state == :option) && (text = ss.scan(/\s+/)) then
102
+ # do nothing
103
+ when (state == :option) && (text = ss.scan(/stub/i)) then
104
+ action { [:option, text] }
105
+ when (state == :option) && (text = ss.scan(/debug/i)) then
106
+ action { [:option, text] }
107
+ when (state == :inner) && (text = ss.scan(/.*/)) then
108
+ action { [:inner, text] }
109
+ when (state == :start) && (text = ss.scan(/.*/)) then
110
+ action { [:start, text] }
111
+ when (state == :macro) && (text = ss.scan(/\s+(\w+)\s+#{RE}/o)) then
112
+ action { [:macro, *matches] }
113
+ when (state == :rule) && (text = ss.scan(/\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then
114
+ action { [:rule, *matches] }
115
+ else
116
+ text = ss.string[ss.pos .. -1]
117
+ raise ScanError, "can not match (#{state.inspect}): '#{text}'"
118
+ end
119
+ when :END then
120
+ case
121
+ when text = ss.scan(/\n+/) then
122
+ # do nothing
123
+ when text = ss.scan(/.*/) then
124
+ action { [:end, text] }
125
+ else
126
+ text = ss.string[ss.pos .. -1]
127
+ raise ScanError, "can not match (#{state.inspect}): '#{text}'"
128
+ end
129
+ else
130
+ raise ScanError, "undefined state: '#{state}'"
131
+ end # token = case state
132
+
133
+ next unless token # allow functions to trigger redo w/ nil
134
+ end # while
135
+
136
+ raise "bad lexical result: #{token.inspect}" unless
137
+ token.nil? || (Array === token && token.size >= 2)
138
+
139
+ # auto-switch state
140
+ self.state = token.last if token && token.first == :state
141
+
142
+ token
143
+ end # def _next_token
144
+ end # class