genmachine 0.0.0 → 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.0
1
+ 0.0.1
data/bin/genmachine ADDED
@@ -0,0 +1,66 @@
1
+ #!/usr/bin/env ruby
2
+ require 'optparse'
3
+ require 'genmachine'
4
+
5
+ BANNER = "Usage: genmachine [options] STATE_TABLE_FILES"
6
+
7
+ generators = GenMachine.generators
8
+ languages = generators.keys
9
+
10
+ options = {}
11
+ opts = OptionParser.new do |opts|
12
+ opts.version = '0.0.1'
13
+ opts.banner = BANNER
14
+ opts.on('-c', '--classname NAME',
15
+ "Class/module/function name for generated library code "+
16
+ "(default STATE_TABLE_FILE)") do |v|
17
+ options[:classname] = v
18
+ end
19
+ opts.on('-l', '--language LANGUAGE',
20
+ "Language to generate code for- currently one of [#{languages.join(',')}] " +
21
+ "(default #{languages.first})") do |v|
22
+ options[:language] = v.to_underscored.to_sym
23
+ end
24
+ opts.on('-t', '--test-with FILE',
25
+ "Try parsing the specified file after generating the parser "+
26
+ "(default STATE_TABLE_FILE.gmtest if it exists)") do |v|
27
+ options[:test_file] = v
28
+ end
29
+ opts.on('-e', '--[no-]executable',
30
+ "Generate an executable parser (default true)") do |v|
31
+ options[:executable] = v
32
+ end
33
+ opts.on('-o', '--output-dir DIR',
34
+ "Output directory for generated file(s) (default ./)") do |v|
35
+ options[:output_dir] = v
36
+ end
37
+ end
38
+
39
+ files = opts.parse(ARGV)
40
+ if files.size < 1
41
+ $stderr.puts BANNER
42
+ exit 1
43
+ end
44
+
45
+ file_base = files[0].chomp(File.extname(files[0]))
46
+ name_base = File.basename(file_base)
47
+ options[:executable] ||= true
48
+ options[:language] ||= languages.first
49
+ options[:classname] ||= name_base.capitalize + 'Parser'
50
+ options[:test_file] ||= files[0] + '.gmtest'
51
+ options[:output_dif] ||= './'
52
+ options[:class_fname] = options[:classname].to_underscored + '.rb'
53
+ options[:exe_fname] = name_base.to_underscored
54
+
55
+ unless languages.include? options[:language]
56
+ $stderr.puts "I don't know how to generate a parser in '#{options[:language]}' - try one of [#{languages.join(',')}]"
57
+ exit 2
58
+ end
59
+
60
+ #spec_parser = GenMachine::SpecParser.new(files)
61
+ #options[:spec_ast] = spec_parser.build
62
+ options[:spec_ast] = []
63
+ gen = generators[options[:language]].new(options)
64
+ gen.generate_class
65
+ gen.generate_executable
66
+ gen.run_test
@@ -0,0 +1,68 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{genmachine}
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Joseph Wecker"]
12
+ s.date = %q{2011-08-11}
13
+ s.default_executable = %q{genmachine}
14
+ s.description = %q{Takes a state table where the following are defined: state, input+conditions, accumulate-action, pre-transition-actions, and transition-to. It takes that state table and generates very fast parsers. Similar to Ragel. Currently only outputs pure Ruby.}
15
+ s.email = %q{joseph.wecker@gmail.com}
16
+ s.executables = ["genmachine"]
17
+ s.extra_rdoc_files = [
18
+ "LICENSE.txt",
19
+ "README.rdoc"
20
+ ]
21
+ s.files = [
22
+ ".document",
23
+ "Gemfile",
24
+ "LICENSE.txt",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "bin/genmachine",
29
+ "genmachine.gemspec",
30
+ "lib/genmachine.rb",
31
+ "lib/genmachine/char_set.rb",
32
+ "lib/genmachine/generator.rb",
33
+ "lib/genmachine/generators/helpers/general.rb",
34
+ "lib/genmachine/generators/helpers/ruby.rb",
35
+ "lib/genmachine/generators/ruby.rb",
36
+ "lib/genmachine/generators/templates/ruby/lib.erb.rb",
37
+ "lib/genmachine/spec_parser.rb",
38
+ "test/helper.rb",
39
+ "test/test_genmachine.rb"
40
+ ]
41
+ s.homepage = %q{http://github.com/josephwecker/genmachine}
42
+ s.licenses = ["MIT"]
43
+ s.require_paths = ["lib"]
44
+ s.rubygems_version = %q{1.6.2}
45
+ s.summary = %q{Generates parsers based on a fancy state-table}
46
+
47
+ if s.respond_to? :specification_version then
48
+ s.specification_version = 3
49
+
50
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
51
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
52
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
53
+ s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
54
+ s.add_development_dependency(%q<rcov>, [">= 0"])
55
+ else
56
+ s.add_dependency(%q<shoulda>, [">= 0"])
57
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
58
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
59
+ s.add_dependency(%q<rcov>, [">= 0"])
60
+ end
61
+ else
62
+ s.add_dependency(%q<shoulda>, [">= 0"])
63
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
64
+ s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
65
+ s.add_dependency(%q<rcov>, [">= 0"])
66
+ end
67
+ end
68
+
@@ -0,0 +1,59 @@
1
+ module GenMachine
2
+ class CharSet
3
+ attr_accessor :kind
4
+ def initialize(kind = :include)
5
+ @kind = kind
6
+ @include_intervals = []
7
+ @include_any = false
8
+ end
9
+
10
+ def inspect() for_conditional.inspect end
11
+ def [](k) for_conditional[k] end
12
+
13
+ def for_conditional
14
+ if @include_any
15
+ return {:kind=>@kind, :ranges=>[:any]}
16
+ else
17
+ ivals = @include_intervals.map do |a,b|
18
+ a == b ? a : [a,b]
19
+ end
20
+ return {:kind=>@kind, :ranges=>ivals}
21
+ end
22
+ end
23
+
24
+ def +(val) self.send(:<<,val) end
25
+ def <<(val)
26
+ if val.is_a?(String) && val =~ /([^-])-([^-])/
27
+ include_range($1,$2)
28
+ elsif val.is_a?(Range)
29
+ include_range(val.first, val.last)
30
+ elsif val == :any
31
+ @include_any = true
32
+ else include_char(val) end
33
+ end
34
+
35
+ def include_char(char) include_range(char,char) end
36
+
37
+ def include_range(from, to)
38
+ from = from.utf8_chars[0] if from.is_a?(String)
39
+ to = to.utf8_chars[0] if to.is_a?(String)
40
+ @include_intervals << [from,to].sort
41
+ if @include_intervals.length > 1
42
+ @include_intervals.sort!
43
+ merged = []
44
+ curr_a, curr_b = @include_intervals.shift
45
+ @include_intervals.each_with_index do |ab,i|
46
+ a,b = ab
47
+ if a <= (curr_b+1)
48
+ curr_b = [curr_b,b].max
49
+ else
50
+ merged << [curr_a, curr_b]
51
+ curr_a,curr_b = ab
52
+ end
53
+ end
54
+ merged << [curr_a, curr_b]
55
+ @include_intervals = merged
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,38 @@
1
+ module GenMachine
2
+ module Generators
3
+ module Generator
4
+ def initialize(opts={})
5
+ opts ||= {}
6
+ @spec_ast = opts.delete(:spec_ast)
7
+ @gen_executable = opts.delete(:executable) || false
8
+ @classname = opts.delete(:classname) || 'MiscParser'
9
+ @test_file = opts.delete(:test_file)
10
+ @output_dir = opts.delete(:output_dir) || './'
11
+ @class_fname = opts.delete(:class_fname)|| @classname.to_underscored
12
+ @exe_fname = opts.delete(:exe_fname) || @classname.sub(/parser$/i,'').to_underscored
13
+ raise ArgumentError, "Must include the table specification data (:spec_ast)" if @spec_ast.nil?
14
+ end
15
+
16
+ def generate_class
17
+ puts "Generate class (#{@classname}): #{@class_fname}"
18
+ #f = File.new(@class_fname, 'w+')
19
+ #f.write(@libraries[language].result(binding))
20
+ #f.close
21
+ end
22
+
23
+ def generate_executable
24
+ return unless @gen_executable
25
+ puts "Generate executable: #{@exe_fname}"
26
+ #f = File.new(@exe_fname, 'w+')
27
+ #f.write(@executables[language].result(binding))
28
+ #f.chmod(0755)
29
+ #f.close
30
+ end
31
+
32
+ def run_test
33
+ return if @test_file.nil?
34
+ puts "Run test"
35
+ end
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,62 @@
1
+ class String
2
+ def to_underscored
3
+ self.gsub(/::/, '/').
4
+ gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
5
+ gsub(/([a-z\d])([A-Z])/,'\1_\2').
6
+ gsub(/[^a-zA-Z\d_]/,'_').
7
+ downcase
8
+ end
9
+
10
+ def to_utf8_char_array
11
+ self.unpack("U*")
12
+ end
13
+ end
14
+
15
+ module GenMachine
16
+ module Helpers
17
+ module General
18
+ def accumulates?(states)
19
+ states.each do |name,clauses|
20
+ clauses.each do |c|
21
+ c[:exprs].each do |e|
22
+ return true if e =~ /<<s($|[^a-zA-Z0-9_])/
23
+ end
24
+ end
25
+ end
26
+ return false
27
+ end
28
+
29
+ def accumulators(states)
30
+ accs = {}
31
+ states.each do |name,clauses|
32
+ clauses.each do |c|
33
+ exprs = c[:exprs].dup
34
+ exprs << c[:acc].dup
35
+ exprs.each do |e|
36
+ if e =~ /^([a-zA-Z_][a-zA-Z0-9_]*)?\s*<<\s*(<?)([a-zA-Z_][a-zA-Z0-9_]*)?$/
37
+ accs[$1]=true unless ($1.nil? or $1 == '' or $1 == 'p' or $1 == 's')
38
+ accs[$3]=true unless ($3.nil? or $3 == '' or $3 == 'p' or $3 == 's')
39
+ end
40
+ end
41
+ end
42
+ end
43
+ return accs.keys
44
+ end
45
+
46
+ def makes_calls?(states)
47
+ # TODO: implement
48
+ false
49
+ end
50
+
51
+ def eof_state?(states)
52
+ states.each{|name,clauses| return true if name=='{eof}'}
53
+ return false
54
+ end
55
+
56
+ def eof_clause?(clauses)
57
+ clauses.each{|c| return true if c[:cond].include?('eof')}
58
+ return false
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,121 @@
1
+ module GenMachine
2
+ module Helpers
3
+ module Ruby
4
+ def rb_conditional(clause,states,clauses)
5
+ has_eof_state = eof_state?(states) || eof_clause?(clauses)
6
+ out = ''
7
+ if clause[:cond].size > 0
8
+ out += '('+clause[:cond].join(' || ')+')'
9
+ out += "&& (#{rb_charset_cond(clause[:input],has_eof_state)})" unless clause[:input].nil?
10
+ else
11
+ out += rb_charset_cond(clause[:input],has_eof_state,',')
12
+ end
13
+ return rb_vars(out)
14
+ end
15
+
16
+ def rb_charset_cond(input,has_eof_state,sep='||')
17
+ return 'true' if input.nil?
18
+ outs = []
19
+ sep = '||' if input[:kind] == :exclude
20
+ input[:ranges].each do |range|
21
+ if input[:kind] == :include
22
+ if range.is_a? Array
23
+ outs << "nl?" if (range[0] <= 0x0a) && (range[1] >= 0x0a)
24
+ outs << "space?" if (range[0] <= 0x20) && (range[1] >= 0x20)
25
+ outs << "(c>#{range[0]-1}&&c<#{range[1]+1})"
26
+ else
27
+ outs << case range
28
+ when 0x0a; 'nl?'
29
+ when 0x20; 'space?'
30
+ when :any; has_eof_state ? 'true' : 'c!=:eof'
31
+ else "c==#{range}" end
32
+ end
33
+ end
34
+ end
35
+ out = outs.join(sep)
36
+ out = '!('+out+')' if input[:kind] == :exclude
37
+ return out
38
+ end
39
+
40
+ def rb_commands(clause,currstate)
41
+ cmds = []
42
+ cmds += rb_simple_acc_commands(clause[:acc])
43
+ clause[:exprs].each do |expr|
44
+ if expr.include? '<<'
45
+ cmds += rb_acc_commands(expr)
46
+ else
47
+ cmds << rb_vars(expr.strip)
48
+ end
49
+ end
50
+ cmds += rb_transition_commands(clause[:next],currstate)
51
+ return cmds.join('; ')
52
+ end
53
+
54
+ def rb_transition_commands(st,currstate)
55
+ st = st.strip.split(';').map{|s|s.strip}
56
+ out = []
57
+ add_next = false
58
+ st.each do |s|
59
+ case
60
+ when s =~ /^([^\(\[]+)(?:\[([^\]]*)\])?\(([^\)]*)\)$/ # Call another group
61
+ funname = $1
62
+ rename = $2
63
+ params = $3.split(',').map{|p|rb_vars(p)}
64
+ params << 's'
65
+ params << "'#{rename}'" unless (rename.nil? or rename.strip=='')
66
+ out << "state=#{funname}(#{params.join(',')})"
67
+ add_next = true
68
+ when s =~ /^<done>$/
69
+ out << "return(s)"
70
+ add_next = false
71
+ when s =~ /^<([^>]+)>$/
72
+ out << "return(#{rb_vars($1)})"
73
+ add_next = false
74
+ when s =~ /^(:[a-zA-Z0-9_:-]+)$/
75
+ out << "state='#{$1}'" unless currstate == $1
76
+ add_next = true
77
+ else
78
+ out << s
79
+ end
80
+ end
81
+ out << 'next' if add_next
82
+ return out
83
+ end
84
+
85
+ def rb_vars(str)
86
+ str.tr('$','@').gsub /(:[a-zA-Z0-9_:-]+)/, '\'\1\''
87
+ end
88
+
89
+ def rb_simple_acc_commands(acc_phrase)
90
+ case
91
+ when (acc_phrase.nil? or acc_phrase == ''); return ['@fwd=true']
92
+ when acc_phrase.strip == '<<'; return []
93
+ when acc_phrase.strip =~ /^([a-zA-Z_][a-zA-Z0-9_]*)\s*<<\s*$/
94
+ return ["#{rb_vars(acc_phrase.strip)}c"]
95
+ else raise("Can't figure out your accumulator statement: #{acc_phrase}")
96
+ end
97
+ end
98
+
99
+ def rb_acc_commands(acc_phrase)
100
+ case
101
+ when (acc_phrase.nil? or acc_phrase == ''); return ['@fwd=true']
102
+ when acc_phrase.strip == '<<'; return []
103
+ when acc_phrase.strip =~ /^([a-zA-Z_][a-zA-Z0-9_]*)\s*<<\s*(<?)([a-zA-Z_][a-zA-Z0-9_]*)$/
104
+ into = $1
105
+ value = $3
106
+ clear_it = $2 == '<'
107
+ into = rb_vars(into)
108
+ value = rb_vars(value)
109
+ if clear_it
110
+ out = ["(#{into}<<#{value} if #{value}.size>0)"]
111
+ out << "#{value}=''"
112
+ else
113
+ out = ["#{into}<<#{value}"]
114
+ end
115
+ return out
116
+ else raise("Can't figure out your accumulator statement: #{acc_phrase}")
117
+ end
118
+ end
119
+ end
120
+ end
121
+ end
@@ -0,0 +1,8 @@
1
+ module GenMachine
2
+ module Generators
3
+ class RubyGenerator
4
+ include Generator
5
+ GENMACHINE_TARGET = 'ruby'
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,175 @@
1
+ require 'strscan'
2
+ $KCODE="U"
3
+
4
+ module <%= classname %>
5
+ def self.parse(str) Parser.new(str).parse end
6
+ def self.parse_file(fname) Parser.new(IO.read(fname)).parse end
7
+
8
+ class Node
9
+ attr_accessor :name, :children, :start_line, :start_pos, :end_line, :end_pos
10
+ def initialize(name='node',line=:unknown,pos=:unknown)
11
+ @name = name
12
+ @children = []
13
+ @start_line = line
14
+ @start_pos = pos
15
+ @end_line = :unknown
16
+ @end_pos = :unknown
17
+ end
18
+ def <<(val) @children<<val end
19
+ end
20
+
21
+ class Parser < StringScanner
22
+ def init(source, opts={})
23
+ opts ||= {}
24
+ super ensure_encoding(source)
25
+ @global = {}
26
+ end
27
+
28
+ def ensure_encoding(source)
29
+ if defined?(::Encoding)
30
+ if source.encoding == ::Encoding::ASCII_8BIT
31
+ b = source[0, 4].bytes.to_a
32
+ source =
33
+ case
34
+ when b.size>=4 && b[0]==0 && b[1]==0 && b[2]==0
35
+ source.dup.force_encoding(::Encoding::UTF_32BE).encode!(::Encoding::UTF_8)
36
+ when b.size>=4 && b[0]==0 && b[2]==0
37
+ source.dup.force_encoding(::Encoding::UTF_16BE).encode!(::Encoding::UTF_8)
38
+ when b.size>=4 && b[1]==0 && b[2]==0 && b[3]==0
39
+ source.dup.force_encoding(::Encoding::UTF_32LE).encode!(::Encoding::UTF_8)
40
+ when b.size>=4 && b[1]==0 && b[3]==0
41
+ source.dup.force_encoding(::Encoding::UTF_16LE).encode!(::Encoding::UTF_8)
42
+ else source.dup end
43
+ else source = source.encode(::Encoding::UTF_8) end
44
+ source.force_encoding(::Encoding::ASCII_8BIT)
45
+ else
46
+ b = source
47
+ source =
48
+ case
49
+ when b.size >= 4 && b[0] == 0 && b[1] == 0 && b[2] == 0; JSON.iconv('utf-8', 'utf-32be', b)
50
+ when b.size >= 4 && b[0] == 0 && b[2] == 0; JSON.iconv('utf-8', 'utf-16be', b)
51
+ when b.size >= 4 && b[1] == 0 && b[2] == 0 && b[3] == 0; JSON.iconv('utf-8', 'utf-32le', b)
52
+ when b.size >= 4 && b[1] == 0 && b[3] == 0; JSON.iconv('utf-8', 'utf-16le', b)
53
+ else b end
54
+ end
55
+ return source
56
+ end
57
+
58
+ def parse
59
+ reset
60
+ @line = 1
61
+ @pos = 1
62
+ @leading = true
63
+ @indent = 0
64
+ @ast = <%= @table[0][0] %>
65
+ return @ast
66
+ end
67
+
68
+ private
69
+
70
+ def error(msg)
71
+ $stderr.puts "#{msg} | line: #{@line} | char: #{@pos}"
72
+ end
73
+
74
+ def global_state(c)
75
+ # Unicode newline characters & combinations
76
+ # Plus leading space for indents.
77
+ # Also tracks line and position for the AST
78
+ @last_is_newline = @last_is_space = false
79
+ case c
80
+ when 0x0b, 0x0c, 0x85, 0x2028, 0x2029
81
+ @last_is_newline = true; @line += 1; @pos = 1
82
+ @leading = true; @indent = 0
83
+ when 0x0a
84
+ nc = peek(1).unpack('U')[0]
85
+ if nc == 0x0d then getch; c = 0x0a0d end
86
+ @last_is_newline = true; @line += 1; @pos = 1
87
+ @leading = true; @indent = 0
88
+ when 0x0d
89
+ nc = peek(1).unpack('U')[0]
90
+ if nc == 0x0a then getch; c = 0x0d0a end
91
+ @last_is_newline = true; @line += 1; @pos = 1
92
+ @leading = true; @indent = 0
93
+ when 0x20
94
+ @indent += 1 if @leading
95
+ @last_is_space = true; @pos += 1
96
+ else @leading = false; @pos += 1 end
97
+ return @last_c = c
98
+ end
99
+
100
+ def nl?() return @last_is_newline end
101
+ def space?() return @last_is_space end
102
+
103
+ def nextchar
104
+ if @fwd then @fwd = false; return @last_c
105
+ else
106
+ c = getch
107
+ if c.nil?
108
+ c = :eof
109
+ @last_is_space = @last_is_newline = false
110
+ return @last_c = c
111
+ end
112
+ return global_state(c.unpack('U')[0])
113
+ end
114
+ end
115
+
116
+ def eof?() return @last_c == :eof end
117
+
118
+ <%- @table.each do |name, args, cmds, first_state, states| -%>
119
+ <%- args << "p=nil" -%>
120
+ <%- args << "name='#{name}'" -%>
121
+ def <%= name %>(<%= args.join(',') %>)
122
+ <%- cmds.each do |c| -%>
123
+ <%= rb_vars(c) %>
124
+ <%- end -%>
125
+ state='<%= first_state %>'
126
+ <%- if states.size > 1 or accumulates?(states) or makes_calls?(states) -%>
127
+ s = Node.new(name,@line,@pos)
128
+ <%- end -%>
129
+ <%- accumulators(states).each do |_acc| -%>
130
+ <%= _acc %> ||= ''
131
+ <%- end -%>
132
+ loop do
133
+ c = nextchar
134
+ <%- has_fallthru = false -%>
135
+ <%- if eof_state?(states) -%>
136
+ state = '{eof}' if c==:eof
137
+ <%- end -%>
138
+ case state
139
+ <%- states.each do |st_name, clauses| -%>
140
+ when '<%= st_name %>'
141
+ <%- if clauses.size > 1 -%>
142
+ case
143
+ <%- clauses.each_with_index do |clause,i| -%>
144
+ <%- cond = rb_conditional(clause,states,clauses) -%>
145
+ <%- if cond == 'true' -%>
146
+ else <%= rb_commands(clause,st_name) %>
147
+ <%- break -%>
148
+ <%- else -%>
149
+ when <%= cond %>; <%= rb_commands(clause,st_name) %>
150
+ <%- has_fallthru = true if i == clauses.size-1 -%>
151
+ <%- end -%>
152
+ <%- end -%>
153
+ end
154
+ <%- else -%>
155
+ <%- cond = rb_conditional(clauses[0],states,clauses) -%>
156
+ <%- if cond == 'true' -%>
157
+ <%= rb_commands(clauses[0],st_name) %>
158
+ <%- else -%>
159
+ if <%= cond %>
160
+ <%= rb_commands(clauses[0],st_name) %>
161
+ end
162
+ <%- end -%>
163
+ <%- end -%>
164
+ <%- end -%>
165
+ end
166
+ <%- if has_fallthru -%>
167
+ error("Unexpected #{c}")
168
+ @fwd = true
169
+ return
170
+ <%- end -%>
171
+ end
172
+ end
173
+ <%- end -%>
174
+ end
175
+ end
@@ -0,0 +1,130 @@
1
+ module GenMachine
2
+ # This is a quick and dirty parser used for bootstrapping. Which means
3
+ # it'll eventually be replaced when the real parser is written as a
4
+ # genmachine table.
5
+ class SpecParser
6
+ def initialize(files)
7
+ @table = []
8
+ @files = files
9
+ @libraries = {}
10
+ @executables = {}
11
+ template_base = File.expand_path(File.dirname(__FILE__))+'/templates/'
12
+ LANGUAGES.each do |lang|
13
+ tbase = template_base + lang.to_s + '/'
14
+ @libraries[lang] = ERB.new(IO.read(tbase+'library.erb.rb'),nil,'-')
15
+ @executables[lang] = ERB.new(IO.read(tbase+'executable.erb'),nil,'-')
16
+ end
17
+ end
18
+
19
+ def build
20
+ c_name = c_args = c_cmds = c_first_state = c_states = nil
21
+ new_fun = false
22
+ @files.each do |fname|
23
+ File.new(fname,'r').each_with_index do |line, line_no|
24
+ line = line.strip
25
+ det = line[0..0]
26
+ if det == '|' or det == ':'
27
+ re = (det=='|' ? '\|' : det) + '(?: |$)'
28
+ cols = line.split(/#{re}/,-1)[1..-1].map{|c| c.strip}
29
+ if det=='|' && cols[0].include?('(')
30
+ new_fun = true
31
+ unless c_name.nil?
32
+ @table << [c_name, c_args, c_cmds, c_first_state, process_states(c_states)]
33
+ end
34
+ parts = cols[0].split('(')
35
+ c_name = parts.shift.underscore
36
+ c_args = parts.join('(').sub(/\)$/,'').split(',')
37
+ c_states = []
38
+ c_cmds = (cols[3]||'').split(';')
39
+ c_first_state = cols[4]
40
+ # TODO: error if cols[1] or cols[2] have anything
41
+ elsif det == ':' && new_fun
42
+ c_args += cols[0].sub(/\)$/,'').split(',')
43
+ c_cmds += (cols[3]||'').split(';')
44
+ c_first_state += (cols[4]||'')
45
+ elsif det == '|'
46
+ new_fun = false
47
+ conditionals, inputs = parse_input(cols[1])
48
+ c_states << {:name => cols[0],
49
+ :input => inputs,
50
+ :cond => conditionals,
51
+ :acc => cols[2],
52
+ :exprs => (cols[3]||'').split(';'),
53
+ :next => cols[4]}
54
+ elsif det == ':' && (c_states.size > 0)
55
+ conditionals, inputs = parse_input(cols[1],c_states[-1][:input])
56
+ c_states[-1][:name] += (cols[0]||'')
57
+ c_states[-1][:input] = inputs
58
+ c_states[-1][:cond] += conditionals
59
+ c_states[-1][:acc] += cols[2]
60
+ c_states[-1][:exprs]+= (cols[3]||'').split(';')
61
+ c_states[-1][:next] += cols[4]
62
+ end
63
+ end
64
+ end
65
+ unless c_name.nil?
66
+ @table << [c_name, c_args, c_cmds, c_first_state, process_states(c_states)]
67
+ end
68
+ end
69
+ pp @table
70
+ end
71
+
72
+ # consolidate same-name states and (eventually) combine / optimize where
73
+ # appropriate.
74
+ def process_states(instates)
75
+ outstates = {}
76
+ instates.each do |inst|
77
+ name = inst.delete(:name)
78
+ outstates[name] ||= []
79
+ outstates[name] << inst
80
+ end
81
+ return outstates
82
+ end
83
+
84
+ def parse_input(val,inputs=nil)
85
+ iters = 0
86
+ conds = []
87
+ while val.strip.length > 0 && iters < 100
88
+ case
89
+ when val =~ /--+/um
90
+ val.sub!($&,'')
91
+ when val =~ /\s*\{([^\}]+)\}\s*/um
92
+ conds << $1
93
+ val.sub!($&, '')
94
+ when val =~ /\s*\[\^([^\]]+)\]\s*/um
95
+ inputs ||= CharSet.new(:exclude)
96
+ parse_combine_ranges($1, inputs)
97
+ val.sub!($&, '')
98
+ when val =~ /\s*\[([^\]]+)\]\s*/um
99
+ inputs ||= CharSet.new(:include)
100
+ parse_combine_ranges($1, inputs)
101
+ val.sub!($&, '')
102
+ when val =~ /^\s*\./um
103
+ inputs ||= CharSet.new(:include)
104
+ inputs << :any
105
+ val.sub!($&, '')
106
+ end
107
+ iters += 1
108
+ end
109
+ return conds, inputs
110
+ end
111
+
112
+ ESCAPES = {'\t' => "\t", '\n' => "\n",
113
+ '\r' => "\r", '\f' => "\f",
114
+ '\b' => "\b", '\a' => "\a",
115
+ '\e' => "\e", '\s' => " ",
116
+ '\[' => '[', '\]' => ']'}
117
+ def parse_combine_ranges(raw, input)
118
+ raw.gsub!(/\\[tnrfbaes\[\]]/){|m| ESCAPES[m]}
119
+ if raw =~ /((?:.-.)*)((?:.)*)/um
120
+ ranges = $1
121
+ singles = $2
122
+ if ranges.length > 0
123
+ _, range, ranges = ranges.partition /.-./um
124
+ input << range
125
+ end while ranges.length > 0
126
+ singles.scan(/./um).each{|s| input << s}
127
+ end
128
+ end
129
+ end
130
+ end
data/lib/genmachine.rb CHANGED
@@ -1,8 +1,22 @@
1
+ require 'genmachine/spec_parser'
2
+ require 'genmachine/generator'
3
+ require 'genmachine/generators/helpers/general'
4
+
5
+ Dir[File.join(File.dirname(__FILE__),'genmachine','generators','*.rb')].each do |fname|
6
+ name = File.basename(fname)
7
+ require "genmachine/generators/#{name}"
8
+ end
9
+
1
10
  module GenMachine
2
- class Builder
3
- def initialize(files)
4
- @files = files
5
- @table = []
11
+ class << self
12
+ def generators
13
+ Generators.constants.reduce({}) do |langs,const|
14
+ klass = Generators.const_get(const)
15
+ if klass.const_defined?('GENMACHINE_TARGET')
16
+ langs[klass::GENMACHINE_TARGET] = klass
17
+ end
18
+ langs
19
+ end
6
20
  end
7
21
  end
8
22
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: genmachine
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 29
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 0
10
- version: 0.0.0
9
+ - 1
10
+ version: 0.0.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Joseph Wecker
@@ -15,8 +15,8 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-08-10 00:00:00 -07:00
19
- default_executable:
18
+ date: 2011-08-11 00:00:00 -07:00
19
+ default_executable: genmachine
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
22
22
  requirement: &id001 !ruby/object:Gem::Requirement
@@ -80,8 +80,8 @@ dependencies:
80
80
  type: :development
81
81
  description: "Takes a state table where the following are defined: state, input+conditions, accumulate-action, pre-transition-actions, and transition-to. It takes that state table and generates very fast parsers. Similar to Ragel. Currently only outputs pure Ruby."
82
82
  email: joseph.wecker@gmail.com
83
- executables: []
84
-
83
+ executables:
84
+ - genmachine
85
85
  extensions: []
86
86
 
87
87
  extra_rdoc_files:
@@ -94,7 +94,16 @@ files:
94
94
  - README.rdoc
95
95
  - Rakefile
96
96
  - VERSION
97
+ - bin/genmachine
98
+ - genmachine.gemspec
97
99
  - lib/genmachine.rb
100
+ - lib/genmachine/char_set.rb
101
+ - lib/genmachine/generator.rb
102
+ - lib/genmachine/generators/helpers/general.rb
103
+ - lib/genmachine/generators/helpers/ruby.rb
104
+ - lib/genmachine/generators/ruby.rb
105
+ - lib/genmachine/generators/templates/ruby/lib.erb.rb
106
+ - lib/genmachine/spec_parser.rb
98
107
  - test/helper.rb
99
108
  - test/test_genmachine.rb
100
109
  has_rdoc: true