genmachine 0.0.0 → 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/VERSION +1 -1
- data/bin/genmachine +66 -0
- data/genmachine.gemspec +68 -0
- data/lib/genmachine/char_set.rb +59 -0
- data/lib/genmachine/generator.rb +38 -0
- data/lib/genmachine/generators/helpers/general.rb +62 -0
- data/lib/genmachine/generators/helpers/ruby.rb +121 -0
- data/lib/genmachine/generators/ruby.rb +8 -0
- data/lib/genmachine/generators/templates/ruby/lib.erb.rb +175 -0
- data/lib/genmachine/spec_parser.rb +130 -0
- data/lib/genmachine.rb +18 -4
- metadata +16 -7
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.1
|
data/bin/genmachine
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'genmachine'
|
4
|
+
|
5
|
+
BANNER = "Usage: genmachine [options] STATE_TABLE_FILES"
|
6
|
+
|
7
|
+
generators = GenMachine.generators
|
8
|
+
languages = generators.keys
|
9
|
+
|
10
|
+
options = {}
|
11
|
+
opts = OptionParser.new do |opts|
|
12
|
+
opts.version = '0.0.1'
|
13
|
+
opts.banner = BANNER
|
14
|
+
opts.on('-c', '--classname NAME',
|
15
|
+
"Class/module/function name for generated library code "+
|
16
|
+
"(default STATE_TABLE_FILE)") do |v|
|
17
|
+
options[:classname] = v
|
18
|
+
end
|
19
|
+
opts.on('-l', '--language LANGUAGE',
|
20
|
+
"Language to generate code for- currently one of [#{languages.join(',')}] " +
|
21
|
+
"(default #{languages.first})") do |v|
|
22
|
+
options[:language] = v.to_underscored.to_sym
|
23
|
+
end
|
24
|
+
opts.on('-t', '--test-with FILE',
|
25
|
+
"Try parsing the specified file after generating the parser "+
|
26
|
+
"(default STATE_TABLE_FILE.gmtest if it exists)") do |v|
|
27
|
+
options[:test_file] = v
|
28
|
+
end
|
29
|
+
opts.on('-e', '--[no-]executable',
|
30
|
+
"Generate an executable parser (default true)") do |v|
|
31
|
+
options[:executable] = v
|
32
|
+
end
|
33
|
+
opts.on('-o', '--output-dir DIR',
|
34
|
+
"Output directory for generated file(s) (default ./)") do |v|
|
35
|
+
options[:output_dir] = v
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
files = opts.parse(ARGV)
|
40
|
+
if files.size < 1
|
41
|
+
$stderr.puts BANNER
|
42
|
+
exit 1
|
43
|
+
end
|
44
|
+
|
45
|
+
file_base = files[0].chomp(File.extname(files[0]))
|
46
|
+
name_base = File.basename(file_base)
|
47
|
+
options[:executable] ||= true
|
48
|
+
options[:language] ||= languages.first
|
49
|
+
options[:classname] ||= name_base.capitalize + 'Parser'
|
50
|
+
options[:test_file] ||= files[0] + '.gmtest'
|
51
|
+
options[:output_dif] ||= './'
|
52
|
+
options[:class_fname] = options[:classname].to_underscored + '.rb'
|
53
|
+
options[:exe_fname] = name_base.to_underscored
|
54
|
+
|
55
|
+
unless languages.include? options[:language]
|
56
|
+
$stderr.puts "I don't know how to generate a parser in '#{options[:language]}' - try one of [#{languages.join(',')}]"
|
57
|
+
exit 2
|
58
|
+
end
|
59
|
+
|
60
|
+
#spec_parser = GenMachine::SpecParser.new(files)
|
61
|
+
#options[:spec_ast] = spec_parser.build
|
62
|
+
options[:spec_ast] = []
|
63
|
+
gen = generators[options[:language]].new(options)
|
64
|
+
gen.generate_class
|
65
|
+
gen.generate_executable
|
66
|
+
gen.run_test
|
data/genmachine.gemspec
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{genmachine}
|
8
|
+
s.version = "0.0.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Joseph Wecker"]
|
12
|
+
s.date = %q{2011-08-11}
|
13
|
+
s.default_executable = %q{genmachine}
|
14
|
+
s.description = %q{Takes a state table where the following are defined: state, input+conditions, accumulate-action, pre-transition-actions, and transition-to. It takes that state table and generates very fast parsers. Similar to Ragel. Currently only outputs pure Ruby.}
|
15
|
+
s.email = %q{joseph.wecker@gmail.com}
|
16
|
+
s.executables = ["genmachine"]
|
17
|
+
s.extra_rdoc_files = [
|
18
|
+
"LICENSE.txt",
|
19
|
+
"README.rdoc"
|
20
|
+
]
|
21
|
+
s.files = [
|
22
|
+
".document",
|
23
|
+
"Gemfile",
|
24
|
+
"LICENSE.txt",
|
25
|
+
"README.rdoc",
|
26
|
+
"Rakefile",
|
27
|
+
"VERSION",
|
28
|
+
"bin/genmachine",
|
29
|
+
"genmachine.gemspec",
|
30
|
+
"lib/genmachine.rb",
|
31
|
+
"lib/genmachine/char_set.rb",
|
32
|
+
"lib/genmachine/generator.rb",
|
33
|
+
"lib/genmachine/generators/helpers/general.rb",
|
34
|
+
"lib/genmachine/generators/helpers/ruby.rb",
|
35
|
+
"lib/genmachine/generators/ruby.rb",
|
36
|
+
"lib/genmachine/generators/templates/ruby/lib.erb.rb",
|
37
|
+
"lib/genmachine/spec_parser.rb",
|
38
|
+
"test/helper.rb",
|
39
|
+
"test/test_genmachine.rb"
|
40
|
+
]
|
41
|
+
s.homepage = %q{http://github.com/josephwecker/genmachine}
|
42
|
+
s.licenses = ["MIT"]
|
43
|
+
s.require_paths = ["lib"]
|
44
|
+
s.rubygems_version = %q{1.6.2}
|
45
|
+
s.summary = %q{Generates parsers based on a fancy state-table}
|
46
|
+
|
47
|
+
if s.respond_to? :specification_version then
|
48
|
+
s.specification_version = 3
|
49
|
+
|
50
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
51
|
+
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
52
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
53
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
54
|
+
s.add_development_dependency(%q<rcov>, [">= 0"])
|
55
|
+
else
|
56
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
57
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
58
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
59
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
60
|
+
end
|
61
|
+
else
|
62
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
63
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
64
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
65
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module GenMachine
|
2
|
+
class CharSet
|
3
|
+
attr_accessor :kind
|
4
|
+
def initialize(kind = :include)
|
5
|
+
@kind = kind
|
6
|
+
@include_intervals = []
|
7
|
+
@include_any = false
|
8
|
+
end
|
9
|
+
|
10
|
+
def inspect() for_conditional.inspect end
|
11
|
+
def [](k) for_conditional[k] end
|
12
|
+
|
13
|
+
def for_conditional
|
14
|
+
if @include_any
|
15
|
+
return {:kind=>@kind, :ranges=>[:any]}
|
16
|
+
else
|
17
|
+
ivals = @include_intervals.map do |a,b|
|
18
|
+
a == b ? a : [a,b]
|
19
|
+
end
|
20
|
+
return {:kind=>@kind, :ranges=>ivals}
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def +(val) self.send(:<<,val) end
|
25
|
+
def <<(val)
|
26
|
+
if val.is_a?(String) && val =~ /([^-])-([^-])/
|
27
|
+
include_range($1,$2)
|
28
|
+
elsif val.is_a?(Range)
|
29
|
+
include_range(val.first, val.last)
|
30
|
+
elsif val == :any
|
31
|
+
@include_any = true
|
32
|
+
else include_char(val) end
|
33
|
+
end
|
34
|
+
|
35
|
+
def include_char(char) include_range(char,char) end
|
36
|
+
|
37
|
+
def include_range(from, to)
|
38
|
+
from = from.utf8_chars[0] if from.is_a?(String)
|
39
|
+
to = to.utf8_chars[0] if to.is_a?(String)
|
40
|
+
@include_intervals << [from,to].sort
|
41
|
+
if @include_intervals.length > 1
|
42
|
+
@include_intervals.sort!
|
43
|
+
merged = []
|
44
|
+
curr_a, curr_b = @include_intervals.shift
|
45
|
+
@include_intervals.each_with_index do |ab,i|
|
46
|
+
a,b = ab
|
47
|
+
if a <= (curr_b+1)
|
48
|
+
curr_b = [curr_b,b].max
|
49
|
+
else
|
50
|
+
merged << [curr_a, curr_b]
|
51
|
+
curr_a,curr_b = ab
|
52
|
+
end
|
53
|
+
end
|
54
|
+
merged << [curr_a, curr_b]
|
55
|
+
@include_intervals = merged
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module GenMachine
|
2
|
+
module Generators
|
3
|
+
module Generator
|
4
|
+
def initialize(opts={})
|
5
|
+
opts ||= {}
|
6
|
+
@spec_ast = opts.delete(:spec_ast)
|
7
|
+
@gen_executable = opts.delete(:executable) || false
|
8
|
+
@classname = opts.delete(:classname) || 'MiscParser'
|
9
|
+
@test_file = opts.delete(:test_file)
|
10
|
+
@output_dir = opts.delete(:output_dir) || './'
|
11
|
+
@class_fname = opts.delete(:class_fname)|| @classname.to_underscored
|
12
|
+
@exe_fname = opts.delete(:exe_fname) || @classname.sub(/parser$/i,'').to_underscored
|
13
|
+
raise ArgumentError, "Must include the table specification data (:spec_ast)" if @spec_ast.nil?
|
14
|
+
end
|
15
|
+
|
16
|
+
def generate_class
|
17
|
+
puts "Generate class (#{@classname}): #{@class_fname}"
|
18
|
+
#f = File.new(@class_fname, 'w+')
|
19
|
+
#f.write(@libraries[language].result(binding))
|
20
|
+
#f.close
|
21
|
+
end
|
22
|
+
|
23
|
+
def generate_executable
|
24
|
+
return unless @gen_executable
|
25
|
+
puts "Generate executable: #{@exe_fname}"
|
26
|
+
#f = File.new(@exe_fname, 'w+')
|
27
|
+
#f.write(@executables[language].result(binding))
|
28
|
+
#f.chmod(0755)
|
29
|
+
#f.close
|
30
|
+
end
|
31
|
+
|
32
|
+
def run_test
|
33
|
+
return if @test_file.nil?
|
34
|
+
puts "Run test"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
class String
|
2
|
+
def to_underscored
|
3
|
+
self.gsub(/::/, '/').
|
4
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
5
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
6
|
+
gsub(/[^a-zA-Z\d_]/,'_').
|
7
|
+
downcase
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_utf8_char_array
|
11
|
+
self.unpack("U*")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
module GenMachine
|
16
|
+
module Helpers
|
17
|
+
module General
|
18
|
+
def accumulates?(states)
|
19
|
+
states.each do |name,clauses|
|
20
|
+
clauses.each do |c|
|
21
|
+
c[:exprs].each do |e|
|
22
|
+
return true if e =~ /<<s($|[^a-zA-Z0-9_])/
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
return false
|
27
|
+
end
|
28
|
+
|
29
|
+
def accumulators(states)
|
30
|
+
accs = {}
|
31
|
+
states.each do |name,clauses|
|
32
|
+
clauses.each do |c|
|
33
|
+
exprs = c[:exprs].dup
|
34
|
+
exprs << c[:acc].dup
|
35
|
+
exprs.each do |e|
|
36
|
+
if e =~ /^([a-zA-Z_][a-zA-Z0-9_]*)?\s*<<\s*(<?)([a-zA-Z_][a-zA-Z0-9_]*)?$/
|
37
|
+
accs[$1]=true unless ($1.nil? or $1 == '' or $1 == 'p' or $1 == 's')
|
38
|
+
accs[$3]=true unless ($3.nil? or $3 == '' or $3 == 'p' or $3 == 's')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
return accs.keys
|
44
|
+
end
|
45
|
+
|
46
|
+
def makes_calls?(states)
|
47
|
+
# TODO: implement
|
48
|
+
false
|
49
|
+
end
|
50
|
+
|
51
|
+
def eof_state?(states)
|
52
|
+
states.each{|name,clauses| return true if name=='{eof}'}
|
53
|
+
return false
|
54
|
+
end
|
55
|
+
|
56
|
+
def eof_clause?(clauses)
|
57
|
+
clauses.each{|c| return true if c[:cond].include?('eof')}
|
58
|
+
return false
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module GenMachine
|
2
|
+
module Helpers
|
3
|
+
module Ruby
|
4
|
+
def rb_conditional(clause,states,clauses)
|
5
|
+
has_eof_state = eof_state?(states) || eof_clause?(clauses)
|
6
|
+
out = ''
|
7
|
+
if clause[:cond].size > 0
|
8
|
+
out += '('+clause[:cond].join(' || ')+')'
|
9
|
+
out += "&& (#{rb_charset_cond(clause[:input],has_eof_state)})" unless clause[:input].nil?
|
10
|
+
else
|
11
|
+
out += rb_charset_cond(clause[:input],has_eof_state,',')
|
12
|
+
end
|
13
|
+
return rb_vars(out)
|
14
|
+
end
|
15
|
+
|
16
|
+
def rb_charset_cond(input,has_eof_state,sep='||')
|
17
|
+
return 'true' if input.nil?
|
18
|
+
outs = []
|
19
|
+
sep = '||' if input[:kind] == :exclude
|
20
|
+
input[:ranges].each do |range|
|
21
|
+
if input[:kind] == :include
|
22
|
+
if range.is_a? Array
|
23
|
+
outs << "nl?" if (range[0] <= 0x0a) && (range[1] >= 0x0a)
|
24
|
+
outs << "space?" if (range[0] <= 0x20) && (range[1] >= 0x20)
|
25
|
+
outs << "(c>#{range[0]-1}&&c<#{range[1]+1})"
|
26
|
+
else
|
27
|
+
outs << case range
|
28
|
+
when 0x0a; 'nl?'
|
29
|
+
when 0x20; 'space?'
|
30
|
+
when :any; has_eof_state ? 'true' : 'c!=:eof'
|
31
|
+
else "c==#{range}" end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
out = outs.join(sep)
|
36
|
+
out = '!('+out+')' if input[:kind] == :exclude
|
37
|
+
return out
|
38
|
+
end
|
39
|
+
|
40
|
+
def rb_commands(clause,currstate)
|
41
|
+
cmds = []
|
42
|
+
cmds += rb_simple_acc_commands(clause[:acc])
|
43
|
+
clause[:exprs].each do |expr|
|
44
|
+
if expr.include? '<<'
|
45
|
+
cmds += rb_acc_commands(expr)
|
46
|
+
else
|
47
|
+
cmds << rb_vars(expr.strip)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
cmds += rb_transition_commands(clause[:next],currstate)
|
51
|
+
return cmds.join('; ')
|
52
|
+
end
|
53
|
+
|
54
|
+
def rb_transition_commands(st,currstate)
|
55
|
+
st = st.strip.split(';').map{|s|s.strip}
|
56
|
+
out = []
|
57
|
+
add_next = false
|
58
|
+
st.each do |s|
|
59
|
+
case
|
60
|
+
when s =~ /^([^\(\[]+)(?:\[([^\]]*)\])?\(([^\)]*)\)$/ # Call another group
|
61
|
+
funname = $1
|
62
|
+
rename = $2
|
63
|
+
params = $3.split(',').map{|p|rb_vars(p)}
|
64
|
+
params << 's'
|
65
|
+
params << "'#{rename}'" unless (rename.nil? or rename.strip=='')
|
66
|
+
out << "state=#{funname}(#{params.join(',')})"
|
67
|
+
add_next = true
|
68
|
+
when s =~ /^<done>$/
|
69
|
+
out << "return(s)"
|
70
|
+
add_next = false
|
71
|
+
when s =~ /^<([^>]+)>$/
|
72
|
+
out << "return(#{rb_vars($1)})"
|
73
|
+
add_next = false
|
74
|
+
when s =~ /^(:[a-zA-Z0-9_:-]+)$/
|
75
|
+
out << "state='#{$1}'" unless currstate == $1
|
76
|
+
add_next = true
|
77
|
+
else
|
78
|
+
out << s
|
79
|
+
end
|
80
|
+
end
|
81
|
+
out << 'next' if add_next
|
82
|
+
return out
|
83
|
+
end
|
84
|
+
|
85
|
+
def rb_vars(str)
|
86
|
+
str.tr('$','@').gsub /(:[a-zA-Z0-9_:-]+)/, '\'\1\''
|
87
|
+
end
|
88
|
+
|
89
|
+
def rb_simple_acc_commands(acc_phrase)
|
90
|
+
case
|
91
|
+
when (acc_phrase.nil? or acc_phrase == ''); return ['@fwd=true']
|
92
|
+
when acc_phrase.strip == '<<'; return []
|
93
|
+
when acc_phrase.strip =~ /^([a-zA-Z_][a-zA-Z0-9_]*)\s*<<\s*$/
|
94
|
+
return ["#{rb_vars(acc_phrase.strip)}c"]
|
95
|
+
else raise("Can't figure out your accumulator statement: #{acc_phrase}")
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def rb_acc_commands(acc_phrase)
|
100
|
+
case
|
101
|
+
when (acc_phrase.nil? or acc_phrase == ''); return ['@fwd=true']
|
102
|
+
when acc_phrase.strip == '<<'; return []
|
103
|
+
when acc_phrase.strip =~ /^([a-zA-Z_][a-zA-Z0-9_]*)\s*<<\s*(<?)([a-zA-Z_][a-zA-Z0-9_]*)$/
|
104
|
+
into = $1
|
105
|
+
value = $3
|
106
|
+
clear_it = $2 == '<'
|
107
|
+
into = rb_vars(into)
|
108
|
+
value = rb_vars(value)
|
109
|
+
if clear_it
|
110
|
+
out = ["(#{into}<<#{value} if #{value}.size>0)"]
|
111
|
+
out << "#{value}=''"
|
112
|
+
else
|
113
|
+
out = ["#{into}<<#{value}"]
|
114
|
+
end
|
115
|
+
return out
|
116
|
+
else raise("Can't figure out your accumulator statement: #{acc_phrase}")
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
$KCODE="U"
|
3
|
+
|
4
|
+
module <%= classname %>
|
5
|
+
def self.parse(str) Parser.new(str).parse end
|
6
|
+
def self.parse_file(fname) Parser.new(IO.read(fname)).parse end
|
7
|
+
|
8
|
+
class Node
|
9
|
+
attr_accessor :name, :children, :start_line, :start_pos, :end_line, :end_pos
|
10
|
+
def initialize(name='node',line=:unknown,pos=:unknown)
|
11
|
+
@name = name
|
12
|
+
@children = []
|
13
|
+
@start_line = line
|
14
|
+
@start_pos = pos
|
15
|
+
@end_line = :unknown
|
16
|
+
@end_pos = :unknown
|
17
|
+
end
|
18
|
+
def <<(val) @children<<val end
|
19
|
+
end
|
20
|
+
|
21
|
+
class Parser < StringScanner
|
22
|
+
def init(source, opts={})
|
23
|
+
opts ||= {}
|
24
|
+
super ensure_encoding(source)
|
25
|
+
@global = {}
|
26
|
+
end
|
27
|
+
|
28
|
+
def ensure_encoding(source)
|
29
|
+
if defined?(::Encoding)
|
30
|
+
if source.encoding == ::Encoding::ASCII_8BIT
|
31
|
+
b = source[0, 4].bytes.to_a
|
32
|
+
source =
|
33
|
+
case
|
34
|
+
when b.size>=4 && b[0]==0 && b[1]==0 && b[2]==0
|
35
|
+
source.dup.force_encoding(::Encoding::UTF_32BE).encode!(::Encoding::UTF_8)
|
36
|
+
when b.size>=4 && b[0]==0 && b[2]==0
|
37
|
+
source.dup.force_encoding(::Encoding::UTF_16BE).encode!(::Encoding::UTF_8)
|
38
|
+
when b.size>=4 && b[1]==0 && b[2]==0 && b[3]==0
|
39
|
+
source.dup.force_encoding(::Encoding::UTF_32LE).encode!(::Encoding::UTF_8)
|
40
|
+
when b.size>=4 && b[1]==0 && b[3]==0
|
41
|
+
source.dup.force_encoding(::Encoding::UTF_16LE).encode!(::Encoding::UTF_8)
|
42
|
+
else source.dup end
|
43
|
+
else source = source.encode(::Encoding::UTF_8) end
|
44
|
+
source.force_encoding(::Encoding::ASCII_8BIT)
|
45
|
+
else
|
46
|
+
b = source
|
47
|
+
source =
|
48
|
+
case
|
49
|
+
when b.size >= 4 && b[0] == 0 && b[1] == 0 && b[2] == 0; JSON.iconv('utf-8', 'utf-32be', b)
|
50
|
+
when b.size >= 4 && b[0] == 0 && b[2] == 0; JSON.iconv('utf-8', 'utf-16be', b)
|
51
|
+
when b.size >= 4 && b[1] == 0 && b[2] == 0 && b[3] == 0; JSON.iconv('utf-8', 'utf-32le', b)
|
52
|
+
when b.size >= 4 && b[1] == 0 && b[3] == 0; JSON.iconv('utf-8', 'utf-16le', b)
|
53
|
+
else b end
|
54
|
+
end
|
55
|
+
return source
|
56
|
+
end
|
57
|
+
|
58
|
+
def parse
|
59
|
+
reset
|
60
|
+
@line = 1
|
61
|
+
@pos = 1
|
62
|
+
@leading = true
|
63
|
+
@indent = 0
|
64
|
+
@ast = <%= @table[0][0] %>
|
65
|
+
return @ast
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def error(msg)
|
71
|
+
$stderr.puts "#{msg} | line: #{@line} | char: #{@pos}"
|
72
|
+
end
|
73
|
+
|
74
|
+
def global_state(c)
|
75
|
+
# Unicode newline characters & combinations
|
76
|
+
# Plus leading space for indents.
|
77
|
+
# Also tracks line and position for the AST
|
78
|
+
@last_is_newline = @last_is_space = false
|
79
|
+
case c
|
80
|
+
when 0x0b, 0x0c, 0x85, 0x2028, 0x2029
|
81
|
+
@last_is_newline = true; @line += 1; @pos = 1
|
82
|
+
@leading = true; @indent = 0
|
83
|
+
when 0x0a
|
84
|
+
nc = peek(1).unpack('U')[0]
|
85
|
+
if nc == 0x0d then getch; c = 0x0a0d end
|
86
|
+
@last_is_newline = true; @line += 1; @pos = 1
|
87
|
+
@leading = true; @indent = 0
|
88
|
+
when 0x0d
|
89
|
+
nc = peek(1).unpack('U')[0]
|
90
|
+
if nc == 0x0a then getch; c = 0x0d0a end
|
91
|
+
@last_is_newline = true; @line += 1; @pos = 1
|
92
|
+
@leading = true; @indent = 0
|
93
|
+
when 0x20
|
94
|
+
@indent += 1 if @leading
|
95
|
+
@last_is_space = true; @pos += 1
|
96
|
+
else @leading = false; @pos += 1 end
|
97
|
+
return @last_c = c
|
98
|
+
end
|
99
|
+
|
100
|
+
def nl?() return @last_is_newline end
|
101
|
+
def space?() return @last_is_space end
|
102
|
+
|
103
|
+
def nextchar
|
104
|
+
if @fwd then @fwd = false; return @last_c
|
105
|
+
else
|
106
|
+
c = getch
|
107
|
+
if c.nil?
|
108
|
+
c = :eof
|
109
|
+
@last_is_space = @last_is_newline = false
|
110
|
+
return @last_c = c
|
111
|
+
end
|
112
|
+
return global_state(c.unpack('U')[0])
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def eof?() return @last_c == :eof end
|
117
|
+
|
118
|
+
<%- @table.each do |name, args, cmds, first_state, states| -%>
|
119
|
+
<%- args << "p=nil" -%>
|
120
|
+
<%- args << "name='#{name}'" -%>
|
121
|
+
def <%= name %>(<%= args.join(',') %>)
|
122
|
+
<%- cmds.each do |c| -%>
|
123
|
+
<%= rb_vars(c) %>
|
124
|
+
<%- end -%>
|
125
|
+
state='<%= first_state %>'
|
126
|
+
<%- if states.size > 1 or accumulates?(states) or makes_calls?(states) -%>
|
127
|
+
s = Node.new(name,@line,@pos)
|
128
|
+
<%- end -%>
|
129
|
+
<%- accumulators(states).each do |_acc| -%>
|
130
|
+
<%= _acc %> ||= ''
|
131
|
+
<%- end -%>
|
132
|
+
loop do
|
133
|
+
c = nextchar
|
134
|
+
<%- has_fallthru = false -%>
|
135
|
+
<%- if eof_state?(states) -%>
|
136
|
+
state = '{eof}' if c==:eof
|
137
|
+
<%- end -%>
|
138
|
+
case state
|
139
|
+
<%- states.each do |st_name, clauses| -%>
|
140
|
+
when '<%= st_name %>'
|
141
|
+
<%- if clauses.size > 1 -%>
|
142
|
+
case
|
143
|
+
<%- clauses.each_with_index do |clause,i| -%>
|
144
|
+
<%- cond = rb_conditional(clause,states,clauses) -%>
|
145
|
+
<%- if cond == 'true' -%>
|
146
|
+
else <%= rb_commands(clause,st_name) %>
|
147
|
+
<%- break -%>
|
148
|
+
<%- else -%>
|
149
|
+
when <%= cond %>; <%= rb_commands(clause,st_name) %>
|
150
|
+
<%- has_fallthru = true if i == clauses.size-1 -%>
|
151
|
+
<%- end -%>
|
152
|
+
<%- end -%>
|
153
|
+
end
|
154
|
+
<%- else -%>
|
155
|
+
<%- cond = rb_conditional(clauses[0],states,clauses) -%>
|
156
|
+
<%- if cond == 'true' -%>
|
157
|
+
<%= rb_commands(clauses[0],st_name) %>
|
158
|
+
<%- else -%>
|
159
|
+
if <%= cond %>
|
160
|
+
<%= rb_commands(clauses[0],st_name) %>
|
161
|
+
end
|
162
|
+
<%- end -%>
|
163
|
+
<%- end -%>
|
164
|
+
<%- end -%>
|
165
|
+
end
|
166
|
+
<%- if has_fallthru -%>
|
167
|
+
error("Unexpected #{c}")
|
168
|
+
@fwd = true
|
169
|
+
return
|
170
|
+
<%- end -%>
|
171
|
+
end
|
172
|
+
end
|
173
|
+
<%- end -%>
|
174
|
+
end
|
175
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
module GenMachine
|
2
|
+
# This is a quick and dirty parser used for bootstrapping. Which means
|
3
|
+
# it'll eventually be replaced when the real parser is written as a
|
4
|
+
# genmachine table.
|
5
|
+
class SpecParser
|
6
|
+
def initialize(files)
|
7
|
+
@table = []
|
8
|
+
@files = files
|
9
|
+
@libraries = {}
|
10
|
+
@executables = {}
|
11
|
+
template_base = File.expand_path(File.dirname(__FILE__))+'/templates/'
|
12
|
+
LANGUAGES.each do |lang|
|
13
|
+
tbase = template_base + lang.to_s + '/'
|
14
|
+
@libraries[lang] = ERB.new(IO.read(tbase+'library.erb.rb'),nil,'-')
|
15
|
+
@executables[lang] = ERB.new(IO.read(tbase+'executable.erb'),nil,'-')
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def build
|
20
|
+
c_name = c_args = c_cmds = c_first_state = c_states = nil
|
21
|
+
new_fun = false
|
22
|
+
@files.each do |fname|
|
23
|
+
File.new(fname,'r').each_with_index do |line, line_no|
|
24
|
+
line = line.strip
|
25
|
+
det = line[0..0]
|
26
|
+
if det == '|' or det == ':'
|
27
|
+
re = (det=='|' ? '\|' : det) + '(?: |$)'
|
28
|
+
cols = line.split(/#{re}/,-1)[1..-1].map{|c| c.strip}
|
29
|
+
if det=='|' && cols[0].include?('(')
|
30
|
+
new_fun = true
|
31
|
+
unless c_name.nil?
|
32
|
+
@table << [c_name, c_args, c_cmds, c_first_state, process_states(c_states)]
|
33
|
+
end
|
34
|
+
parts = cols[0].split('(')
|
35
|
+
c_name = parts.shift.underscore
|
36
|
+
c_args = parts.join('(').sub(/\)$/,'').split(',')
|
37
|
+
c_states = []
|
38
|
+
c_cmds = (cols[3]||'').split(';')
|
39
|
+
c_first_state = cols[4]
|
40
|
+
# TODO: error if cols[1] or cols[2] have anything
|
41
|
+
elsif det == ':' && new_fun
|
42
|
+
c_args += cols[0].sub(/\)$/,'').split(',')
|
43
|
+
c_cmds += (cols[3]||'').split(';')
|
44
|
+
c_first_state += (cols[4]||'')
|
45
|
+
elsif det == '|'
|
46
|
+
new_fun = false
|
47
|
+
conditionals, inputs = parse_input(cols[1])
|
48
|
+
c_states << {:name => cols[0],
|
49
|
+
:input => inputs,
|
50
|
+
:cond => conditionals,
|
51
|
+
:acc => cols[2],
|
52
|
+
:exprs => (cols[3]||'').split(';'),
|
53
|
+
:next => cols[4]}
|
54
|
+
elsif det == ':' && (c_states.size > 0)
|
55
|
+
conditionals, inputs = parse_input(cols[1],c_states[-1][:input])
|
56
|
+
c_states[-1][:name] += (cols[0]||'')
|
57
|
+
c_states[-1][:input] = inputs
|
58
|
+
c_states[-1][:cond] += conditionals
|
59
|
+
c_states[-1][:acc] += cols[2]
|
60
|
+
c_states[-1][:exprs]+= (cols[3]||'').split(';')
|
61
|
+
c_states[-1][:next] += cols[4]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
unless c_name.nil?
|
66
|
+
@table << [c_name, c_args, c_cmds, c_first_state, process_states(c_states)]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
pp @table
|
70
|
+
end
|
71
|
+
|
72
|
+
# consolidate same-name states and (eventually) combine / optimize where
|
73
|
+
# appropriate.
|
74
|
+
def process_states(instates)
|
75
|
+
outstates = {}
|
76
|
+
instates.each do |inst|
|
77
|
+
name = inst.delete(:name)
|
78
|
+
outstates[name] ||= []
|
79
|
+
outstates[name] << inst
|
80
|
+
end
|
81
|
+
return outstates
|
82
|
+
end
|
83
|
+
|
84
|
+
def parse_input(val,inputs=nil)
|
85
|
+
iters = 0
|
86
|
+
conds = []
|
87
|
+
while val.strip.length > 0 && iters < 100
|
88
|
+
case
|
89
|
+
when val =~ /--+/um
|
90
|
+
val.sub!($&,'')
|
91
|
+
when val =~ /\s*\{([^\}]+)\}\s*/um
|
92
|
+
conds << $1
|
93
|
+
val.sub!($&, '')
|
94
|
+
when val =~ /\s*\[\^([^\]]+)\]\s*/um
|
95
|
+
inputs ||= CharSet.new(:exclude)
|
96
|
+
parse_combine_ranges($1, inputs)
|
97
|
+
val.sub!($&, '')
|
98
|
+
when val =~ /\s*\[([^\]]+)\]\s*/um
|
99
|
+
inputs ||= CharSet.new(:include)
|
100
|
+
parse_combine_ranges($1, inputs)
|
101
|
+
val.sub!($&, '')
|
102
|
+
when val =~ /^\s*\./um
|
103
|
+
inputs ||= CharSet.new(:include)
|
104
|
+
inputs << :any
|
105
|
+
val.sub!($&, '')
|
106
|
+
end
|
107
|
+
iters += 1
|
108
|
+
end
|
109
|
+
return conds, inputs
|
110
|
+
end
|
111
|
+
|
112
|
+
ESCAPES = {'\t' => "\t", '\n' => "\n",
|
113
|
+
'\r' => "\r", '\f' => "\f",
|
114
|
+
'\b' => "\b", '\a' => "\a",
|
115
|
+
'\e' => "\e", '\s' => " ",
|
116
|
+
'\[' => '[', '\]' => ']'}
|
117
|
+
def parse_combine_ranges(raw, input)
|
118
|
+
raw.gsub!(/\\[tnrfbaes\[\]]/){|m| ESCAPES[m]}
|
119
|
+
if raw =~ /((?:.-.)*)((?:.)*)/um
|
120
|
+
ranges = $1
|
121
|
+
singles = $2
|
122
|
+
if ranges.length > 0
|
123
|
+
_, range, ranges = ranges.partition /.-./um
|
124
|
+
input << range
|
125
|
+
end while ranges.length > 0
|
126
|
+
singles.scan(/./um).each{|s| input << s}
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
data/lib/genmachine.rb
CHANGED
@@ -1,8 +1,22 @@
|
|
1
|
+
require 'genmachine/spec_parser'
|
2
|
+
require 'genmachine/generator'
|
3
|
+
require 'genmachine/generators/helpers/general'
|
4
|
+
|
5
|
+
Dir[File.join(File.dirname(__FILE__),'genmachine','generators','*.rb')].each do |fname|
|
6
|
+
name = File.basename(fname)
|
7
|
+
require "genmachine/generators/#{name}"
|
8
|
+
end
|
9
|
+
|
1
10
|
module GenMachine
|
2
|
-
class
|
3
|
-
def
|
4
|
-
|
5
|
-
|
11
|
+
class << self
|
12
|
+
def generators
|
13
|
+
Generators.constants.reduce({}) do |langs,const|
|
14
|
+
klass = Generators.const_get(const)
|
15
|
+
if klass.const_defined?('GENMACHINE_TARGET')
|
16
|
+
langs[klass::GENMACHINE_TARGET] = klass
|
17
|
+
end
|
18
|
+
langs
|
19
|
+
end
|
6
20
|
end
|
7
21
|
end
|
8
22
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: genmachine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 29
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Joseph Wecker
|
@@ -15,8 +15,8 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-08-
|
19
|
-
default_executable:
|
18
|
+
date: 2011-08-11 00:00:00 -07:00
|
19
|
+
default_executable: genmachine
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
22
|
requirement: &id001 !ruby/object:Gem::Requirement
|
@@ -80,8 +80,8 @@ dependencies:
|
|
80
80
|
type: :development
|
81
81
|
description: "Takes a state table where the following are defined: state, input+conditions, accumulate-action, pre-transition-actions, and transition-to. It takes that state table and generates very fast parsers. Similar to Ragel. Currently only outputs pure Ruby."
|
82
82
|
email: joseph.wecker@gmail.com
|
83
|
-
executables:
|
84
|
-
|
83
|
+
executables:
|
84
|
+
- genmachine
|
85
85
|
extensions: []
|
86
86
|
|
87
87
|
extra_rdoc_files:
|
@@ -94,7 +94,16 @@ files:
|
|
94
94
|
- README.rdoc
|
95
95
|
- Rakefile
|
96
96
|
- VERSION
|
97
|
+
- bin/genmachine
|
98
|
+
- genmachine.gemspec
|
97
99
|
- lib/genmachine.rb
|
100
|
+
- lib/genmachine/char_set.rb
|
101
|
+
- lib/genmachine/generator.rb
|
102
|
+
- lib/genmachine/generators/helpers/general.rb
|
103
|
+
- lib/genmachine/generators/helpers/ruby.rb
|
104
|
+
- lib/genmachine/generators/ruby.rb
|
105
|
+
- lib/genmachine/generators/templates/ruby/lib.erb.rb
|
106
|
+
- lib/genmachine/spec_parser.rb
|
98
107
|
- test/helper.rb
|
99
108
|
- test/test_genmachine.rb
|
100
109
|
has_rdoc: true
|