oedipus_lex 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,72 @@
1
+ # -*- ruby -*-
2
+
3
+ require "rubygems"
4
+ require "hoe"
5
+
6
+ Hoe.plugin :debugging
7
+ Hoe.plugin :git
8
+ Hoe.plugin :isolate
9
+ Hoe.plugin :seattlerb
10
+
11
+ Hoe.spec "oedipus_lex" do
12
+ developer "Ryan Davis", "ryand-ruby@zenspider.com"
13
+ license "MIT"
14
+
15
+ self.readme_file = "README.rdoc"
16
+ self.history_file = "History.rdoc"
17
+ end
18
+
19
+ task :bootstrap do
20
+ ruby "-Ilib lib/oedipus_lex.rb lib/oedipus_lex.rex > lib/oedipus_lex.rex.rb.new"
21
+ system "diff -uw lib/oedipus_lex.rex.rb lib/oedipus_lex.rex.rb.new"
22
+ sh "mv lib/oedipus_lex.rex.rb.new lib/oedipus_lex.rex.rb"
23
+ ruby "-S rake"
24
+ end
25
+
26
+ $: << "lib"
27
+ Rake.application.rake_require "oedipus_lex"
28
+ $rex_option[:stub] = true
29
+
30
+ task :demo => Dir["sample/*.rex"].map { |s| "#{s}.rb" }.sort
31
+
32
+ task :demo => :isolate do
33
+ Dir.chdir "sample" do
34
+ ruby "sample.rex.rb sample.html"
35
+ ruby "sample.rex.rb sample.xhtml"
36
+
37
+ ruby "sample1.rex.rb sample1.c"
38
+
39
+ ruby "sample2.rex.rb sample2.bas"
40
+
41
+ ruby "xhtmlparser.rex.rb xhtmlparser.html"
42
+ ruby "xhtmlparser.rex.rb xhtmlparser.xhtml"
43
+
44
+ cmd = "#{Gem.ruby} error1.rex.rb error1.txt"
45
+ warn cmd
46
+ system cmd
47
+
48
+ cmd = "#{Gem.ruby} error2.rex.rb error1.txt"
49
+ warn cmd
50
+ system cmd
51
+ end
52
+ end
53
+
54
+ task :raccdemo => :isolate do
55
+ $rex_option[:stub] = false
56
+ $rex_option[:do_parse] = false
57
+
58
+ rm_f "sample/calc3.rex.rb"
59
+ t = Rake.application["sample/calc3.rex.rb"]
60
+ t.reenable
61
+ t.invoke
62
+
63
+ ruby "-S racc sample/calc3.racc"
64
+
65
+ sh "echo 1 + 2 + 3 | #{Gem.ruby} -Isample sample/calc3.tab.rb"
66
+ end
67
+
68
+ task :clean do
69
+ rm Dir["sample/*.rb"]
70
+ end
71
+
72
+ # vim: syntax=ruby
@@ -0,0 +1,16 @@
1
+ # -*- ruby -*-
2
+
3
+ $: << "lib"
4
+ require "oedipus_lex"
5
+
6
+ $rex_option = {}
7
+
8
+ rule ".rex.rb" => proc {|path| path.sub(/\.rb$/, "") } do |t|
9
+ warn "Generating #{t.name} from #{t.source}"
10
+ rex = OedipusLex.new $rex_option
11
+ rex.parse_file t.source
12
+
13
+ File.open t.name, "w" do |f|
14
+ f.write rex.generate
15
+ end
16
+ end
@@ -0,0 +1,274 @@
1
+ require "stringio"
2
+ require 'strscan'
3
+ require "erb"
4
+ require "oedipus_lex.rex"
5
+
6
+ class OedipusLex
7
+ VERSION = "2.0.0"
8
+
9
+ attr_accessor :class_name
10
+ attr_accessor :header
11
+ attr_accessor :ends
12
+ attr_accessor :inners
13
+ attr_accessor :macros
14
+ attr_accessor :option
15
+ attr_accessor :rules
16
+ attr_accessor :starts
17
+
18
+ DEFAULTS = {
19
+ :debug => false,
20
+ :do_parse => true,
21
+ :stub => false,
22
+ }
23
+
24
+ def initialize opts = {}
25
+ self.option = DEFAULTS.merge opts
26
+ self.class_name = nil
27
+
28
+ self.header = []
29
+ self.ends = []
30
+ self.inners = []
31
+ self.macros = []
32
+ self.rules = []
33
+ self.starts = []
34
+ end
35
+
36
+ def lex_class prefix, name
37
+ header.concat prefix.split(/\n/)
38
+ self.class_name = name
39
+ end
40
+
41
+ def lex_comment line
42
+ # do nothing
43
+ end
44
+
45
+ def lex_end line
46
+ ends << line
47
+ end
48
+
49
+ def lex_inner line
50
+ inners << line
51
+ end
52
+
53
+ def lex_start line
54
+ starts << line.strip
55
+ end
56
+
57
+ def lex_macro name, value
58
+ macros << [name, value]
59
+ end
60
+
61
+ def lex_option option
62
+ self.option[option.to_sym] = true
63
+ end
64
+
65
+ def lex_rule start_state, regexp, action = nil
66
+ rules << [start_state, regexp, action]
67
+ end
68
+
69
+ def lex_rule2(*vals)
70
+ raise vals.inspect
71
+ end
72
+
73
+ def lex_state new_state
74
+ # do nothing -- lexer switches state for us
75
+ end
76
+
77
+ def generate
78
+ states = rules.map(&:first).compact.uniq
79
+ exclusives, inclusives = states.partition { |s| s =~ /^:[A-Z]/ }
80
+
81
+ # NOTE: doubling up assignment to remove unused var warnings in
82
+ # ERB binding.
83
+
84
+ all_states =
85
+ all_states = [[nil, # non-state # eg [[nil,
86
+ *inclusives], # incls # :a, :b],
87
+ *exclusives.map { |s| [s] }] # [excls] # [:A], [:B]]
88
+
89
+ ERB.new(TEMPLATE, nil, "%").result binding
90
+ end
91
+
92
+ TEMPLATE = <<-'REX'.gsub(/^ {6}/, '\1')
93
+ #--
94
+ # This file is automatically generated. Do not modify it.
95
+ # Generated by: oedipus_lex version <%= VERSION %>.
96
+ % if filename then
97
+ # Source: <%= filename %>
98
+ % end
99
+ #++
100
+
101
+ % unless header.empty? then
102
+ % header.each do |s|
103
+ <%= s %>
104
+ % end
105
+
106
+ % end
107
+ class <%= class_name %>
108
+ require 'strscan'
109
+
110
+ % unless macros.empty? then
111
+ % max = macros.map { |(k,_)| k.size }.max
112
+ % macros.each do |(k,v)|
113
+ <%= "%-#{max}s = %s" % [k, v] %>
114
+ % end
115
+
116
+ % end
117
+ class ScanError < StandardError ; end
118
+
119
+ attr_accessor :lineno
120
+ attr_accessor :filename
121
+ attr_accessor :ss
122
+ attr_accessor :state
123
+
124
+ alias :match :ss
125
+
126
+ def matches
127
+ m = (1..9).map { |i| ss[i] }
128
+ m.pop until m[-1] or m.empty?
129
+ m
130
+ end
131
+
132
+ def action
133
+ yield
134
+ end
135
+
136
+ % if option[:do_parse] then
137
+ def do_parse
138
+ while token = next_token do
139
+ type, *vals = token
140
+
141
+ send "lex_#{type}", *vals
142
+ end
143
+ end
144
+
145
+ % end
146
+ def scanner_class
147
+ StringScanner
148
+ end unless instance_methods(false).map(&:to_s).include?("scanner_class")
149
+
150
+ def parse str
151
+ self.ss = scanner_class.new str
152
+ self.lineno = 1
153
+ self.state ||= nil
154
+
155
+ do_parse
156
+ end
157
+
158
+ def parse_file path
159
+ self.filename = path
160
+ open path do |f|
161
+ parse f.read
162
+ end
163
+ end
164
+
165
+ def next_token
166
+ % starts.each do |s|
167
+ <%= s %>
168
+ % end
169
+ self.lineno += 1 if ss.peek(1) == "\n"
170
+
171
+ token = nil
172
+
173
+ until ss.eos? or token do
174
+ token =
175
+ case state
176
+ % all_states.each do |the_states|
177
+ % exclusive = the_states.first != nil
178
+ % all_states, predicates = the_states.partition { |s| s.nil? or s.start_with? ":" }
179
+ % filtered_states = the_states.select { |s| s.nil? or s.start_with? ":" }
180
+ when <%= all_states.map { |s| s || "nil" }.join ", " %> then
181
+ case
182
+ % all_states.each do |state|
183
+ % rules.each do |rule|
184
+ % start_state, rule_expr, rule_action = *rule
185
+ % if start_state == state or (state.nil? and predicates.include? start_state) then
186
+ % if start_state and not exclusive then
187
+ % if start_state =~ /^:/ then
188
+ when (state == <%= start_state %>) && (text = ss.scan(<%= rule_expr %>)) then
189
+ % else
190
+ when <%= start_state %> && (text = ss.scan(<%= rule_expr %>)) then
191
+ % end
192
+ % else
193
+ when text = ss.scan(<%= rule_expr %>) then
194
+ % end
195
+ % if rule_action then
196
+ % case rule_action
197
+ % when /^\{/ then
198
+ action <%= rule_action %>
199
+ % when /^:/, "nil" then
200
+ [:state, <%= rule_action %>]
201
+ % else
202
+ <%= rule_action %> text
203
+ % end
204
+ % else
205
+ # do nothing
206
+ % end
207
+ % end # start_state == state
208
+ % end # rules.each
209
+ % end # the_states.each
210
+ else
211
+ text = ss.string[ss.pos .. -1]
212
+ raise ScanError, "can not match (#{state.inspect}): '#{text}'"
213
+ end
214
+ % end # all_states
215
+ else
216
+ raise ScanError, "undefined state: '#{state}'"
217
+ end # token = case state
218
+
219
+ next unless token # allow functions to trigger redo w/ nil
220
+ end # while
221
+
222
+ raise "bad lexical result: #{token.inspect}" unless
223
+ token.nil? || (Array === token && token.size >= 2)
224
+
225
+ # auto-switch state
226
+ self.state = token.last if token && token.first == :state
227
+
228
+ % if option[:debug] then
229
+ p [state, token]
230
+ % end
231
+ token
232
+ end # def _next_token
233
+ % inners.each do |s|
234
+ <%= s %>
235
+ % end
236
+ end # class
237
+ % unless ends.empty? then
238
+
239
+ % ends.each do |s|
240
+ <%= s %>
241
+ % end
242
+ % end
243
+ % if option[:stub] then
244
+
245
+ if __FILE__ == $0
246
+ ARGV.each do |path|
247
+ rex = <%= class_name %>.new
248
+
249
+ def rex.do_parse
250
+ while token = self.next_token
251
+ p token
252
+ end
253
+ end
254
+
255
+ begin
256
+ rex.parse_file path
257
+ rescue
258
+ $stderr.printf "%s:%d:%s\n", rex.filename, rex.lineno, $!.message
259
+ exit 1
260
+ end
261
+ end
262
+ end
263
+ % end
264
+ REX
265
+ end
266
+
267
+ if $0 == __FILE__ then
268
+ ARGV.each do |path|
269
+ rex = OedipusLex.new
270
+
271
+ rex.parse_file path
272
+ puts rex.generate
273
+ end
274
+ end
@@ -0,0 +1,51 @@
1
+ # [Header Part]
2
+ # "class" Foo
3
+ # ["option"
4
+ # [options] ]
5
+ # ["inner"
6
+ # [methods] ]
7
+ # ["macro"
8
+ # [macro-name /pattern/[flags]] ]
9
+ # "rule"
10
+ # [:state | method_name] /pattern/[flags] [{ code } | method_name | :state]
11
+ # "end"
12
+ # [Footer Part]
13
+
14
+ class OedipusLex
15
+
16
+ macro
17
+ ST /(?:(:\S+|\w+\??))/
18
+ RE /(\/(?:\\.|[^\/])+\/[ion]?)/
19
+ ACT /(\{.*|:?\w+)/
20
+
21
+ rule
22
+ # [state] /pattern/[flags] [actions]
23
+ # nil state applies to all states, so we use this to switch lexing modes
24
+
25
+ /options?.*/ :option
26
+ /inner.*/ :inner
27
+ /macros?.*/ :macro
28
+ /rules?.*/ :rule
29
+ /start.*/ :start
30
+ /end/ :END
31
+
32
+ /\A((?:.|\n)*)class ([\w:]+.*)/ { [:class, *matches] }
33
+
34
+ /\n+/ # do nothing
35
+ /\s*(\#.*)/ { [:comment, text] }
36
+
37
+ :option /\s+/ # do nothing
38
+ :option /stub/i { [:option, text] }
39
+ :option /debug/i { [:option, text] }
40
+
41
+ :inner /.*/ { [:inner, text] }
42
+
43
+ :start /.*/ { [:start, text] }
44
+
45
+ :macro /\s+(\w+)\s+#{RE}/o { [:macro, *matches] }
46
+
47
+ :rule /\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o { [:rule, *matches] }
48
+
49
+ :END /\n+/ # do nothing
50
+ :END /.*/ { [:end, text] }
51
+ end
@@ -0,0 +1,144 @@
1
+ #--
2
+ # This file is automatically generated. Do not modify it.
3
+ # Generated by: oedipus_lex version 2.0.0.
4
+ # Source: lib/oedipus_lex.rex
5
+ #++
6
+
7
+ # [Header Part]
8
+ # "class" Foo
9
+ # ["option"
10
+ # [options] ]
11
+ # ["inner"
12
+ # [methods] ]
13
+ # ["macro"
14
+ # [macro-name /pattern/[flags]] ]
15
+ # "rule"
16
+ # [:state | method_name] /pattern/[flags] [{ code } | method_name | :state]
17
+ # "end"
18
+ # [Footer Part]
19
+
20
+ class OedipusLex
21
+ require 'strscan'
22
+
23
+ ST = /(?:(:\S+|\w+\??))/
24
+ RE = /(\/(?:\\.|[^\/])+\/[ion]?)/
25
+ ACT = /(\{.*|:?\w+)/
26
+
27
+ class ScanError < StandardError ; end
28
+
29
+ attr_accessor :lineno
30
+ attr_accessor :filename
31
+ attr_accessor :ss
32
+ attr_accessor :state
33
+
34
+ alias :match :ss
35
+
36
+ def matches
37
+ m = (1..9).map { |i| ss[i] }
38
+ m.pop until m[-1] or m.empty?
39
+ m
40
+ end
41
+
42
+ def action
43
+ yield
44
+ end
45
+
46
+ def do_parse
47
+ while token = next_token do
48
+ type, *vals = token
49
+
50
+ send "lex_#{type}", *vals
51
+ end
52
+ end
53
+
54
+ def scanner_class
55
+ StringScanner
56
+ end unless instance_methods(false).map(&:to_s).include?("scanner_class")
57
+
58
+ def parse str
59
+ self.ss = scanner_class.new str
60
+ self.lineno = 1
61
+ self.state ||= nil
62
+
63
+ do_parse
64
+ end
65
+
66
+ def parse_file path
67
+ self.filename = path
68
+ open path do |f|
69
+ parse f.read
70
+ end
71
+ end
72
+
73
+ def next_token
74
+ self.lineno += 1 if ss.peek(1) == "\n"
75
+
76
+ token = nil
77
+
78
+ until ss.eos? or token do
79
+ token =
80
+ case state
81
+ when nil, :option, :inner, :start, :macro, :rule then
82
+ case
83
+ when text = ss.scan(/options?.*/) then
84
+ [:state, :option]
85
+ when text = ss.scan(/inner.*/) then
86
+ [:state, :inner]
87
+ when text = ss.scan(/macros?.*/) then
88
+ [:state, :macro]
89
+ when text = ss.scan(/rules?.*/) then
90
+ [:state, :rule]
91
+ when text = ss.scan(/start.*/) then
92
+ [:state, :start]
93
+ when text = ss.scan(/end/) then
94
+ [:state, :END]
95
+ when text = ss.scan(/\A((?:.|\n)*)class ([\w:]+.*)/) then
96
+ action { [:class, *matches] }
97
+ when text = ss.scan(/\n+/) then
98
+ # do nothing
99
+ when text = ss.scan(/\s*(\#.*)/) then
100
+ action { [:comment, text] }
101
+ when (state == :option) && (text = ss.scan(/\s+/)) then
102
+ # do nothing
103
+ when (state == :option) && (text = ss.scan(/stub/i)) then
104
+ action { [:option, text] }
105
+ when (state == :option) && (text = ss.scan(/debug/i)) then
106
+ action { [:option, text] }
107
+ when (state == :inner) && (text = ss.scan(/.*/)) then
108
+ action { [:inner, text] }
109
+ when (state == :start) && (text = ss.scan(/.*/)) then
110
+ action { [:start, text] }
111
+ when (state == :macro) && (text = ss.scan(/\s+(\w+)\s+#{RE}/o)) then
112
+ action { [:macro, *matches] }
113
+ when (state == :rule) && (text = ss.scan(/\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then
114
+ action { [:rule, *matches] }
115
+ else
116
+ text = ss.string[ss.pos .. -1]
117
+ raise ScanError, "can not match (#{state.inspect}): '#{text}'"
118
+ end
119
+ when :END then
120
+ case
121
+ when text = ss.scan(/\n+/) then
122
+ # do nothing
123
+ when text = ss.scan(/.*/) then
124
+ action { [:end, text] }
125
+ else
126
+ text = ss.string[ss.pos .. -1]
127
+ raise ScanError, "can not match (#{state.inspect}): '#{text}'"
128
+ end
129
+ else
130
+ raise ScanError, "undefined state: '#{state}'"
131
+ end # token = case state
132
+
133
+ next unless token # allow functions to trigger redo w/ nil
134
+ end # while
135
+
136
+ raise "bad lexical result: #{token.inspect}" unless
137
+ token.nil? || (Array === token && token.size >= 2)
138
+
139
+ # auto-switch state
140
+ self.state = token.last if token && token.first == :state
141
+
142
+ token
143
+ end # def _next_token
144
+ end # class