genmachine 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/bin/genmachine +66 -0
- data/genmachine.gemspec +68 -0
- data/lib/genmachine/char_set.rb +59 -0
- data/lib/genmachine/generator.rb +38 -0
- data/lib/genmachine/generators/helpers/general.rb +62 -0
- data/lib/genmachine/generators/helpers/ruby.rb +121 -0
- data/lib/genmachine/generators/ruby.rb +8 -0
- data/lib/genmachine/generators/templates/ruby/lib.erb.rb +175 -0
- data/lib/genmachine/spec_parser.rb +130 -0
- data/lib/genmachine.rb +18 -4
- metadata +16 -7
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.1
|
data/bin/genmachine
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
require 'optparse'
|
3
|
+
require 'genmachine'
|
4
|
+
|
5
|
+
BANNER = "Usage: genmachine [options] STATE_TABLE_FILES"
|
6
|
+
|
7
|
+
generators = GenMachine.generators
|
8
|
+
languages = generators.keys
|
9
|
+
|
10
|
+
options = {}
|
11
|
+
opts = OptionParser.new do |opts|
|
12
|
+
opts.version = '0.0.1'
|
13
|
+
opts.banner = BANNER
|
14
|
+
opts.on('-c', '--classname NAME',
|
15
|
+
"Class/module/function name for generated library code "+
|
16
|
+
"(default STATE_TABLE_FILE)") do |v|
|
17
|
+
options[:classname] = v
|
18
|
+
end
|
19
|
+
opts.on('-l', '--language LANGUAGE',
|
20
|
+
"Language to generate code for- currently one of [#{languages.join(',')}] " +
|
21
|
+
"(default #{languages.first})") do |v|
|
22
|
+
options[:language] = v.to_underscored.to_sym
|
23
|
+
end
|
24
|
+
opts.on('-t', '--test-with FILE',
|
25
|
+
"Try parsing the specified file after generating the parser "+
|
26
|
+
"(default STATE_TABLE_FILE.gmtest if it exists)") do |v|
|
27
|
+
options[:test_file] = v
|
28
|
+
end
|
29
|
+
opts.on('-e', '--[no-]executable',
|
30
|
+
"Generate an executable parser (default true)") do |v|
|
31
|
+
options[:executable] = v
|
32
|
+
end
|
33
|
+
opts.on('-o', '--output-dir DIR',
|
34
|
+
"Output directory for generated file(s) (default ./)") do |v|
|
35
|
+
options[:output_dir] = v
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
files = opts.parse(ARGV)
|
40
|
+
if files.size < 1
|
41
|
+
$stderr.puts BANNER
|
42
|
+
exit 1
|
43
|
+
end
|
44
|
+
|
45
|
+
file_base = files[0].chomp(File.extname(files[0]))
|
46
|
+
name_base = File.basename(file_base)
|
47
|
+
options[:executable] ||= true
|
48
|
+
options[:language] ||= languages.first
|
49
|
+
options[:classname] ||= name_base.capitalize + 'Parser'
|
50
|
+
options[:test_file] ||= files[0] + '.gmtest'
|
51
|
+
options[:output_dif] ||= './'
|
52
|
+
options[:class_fname] = options[:classname].to_underscored + '.rb'
|
53
|
+
options[:exe_fname] = name_base.to_underscored
|
54
|
+
|
55
|
+
unless languages.include? options[:language]
|
56
|
+
$stderr.puts "I don't know how to generate a parser in '#{options[:language]}' - try one of [#{languages.join(',')}]"
|
57
|
+
exit 2
|
58
|
+
end
|
59
|
+
|
60
|
+
#spec_parser = GenMachine::SpecParser.new(files)
|
61
|
+
#options[:spec_ast] = spec_parser.build
|
62
|
+
options[:spec_ast] = []
|
63
|
+
gen = generators[options[:language]].new(options)
|
64
|
+
gen.generate_class
|
65
|
+
gen.generate_executable
|
66
|
+
gen.run_test
|
data/genmachine.gemspec
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{genmachine}
|
8
|
+
s.version = "0.0.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Joseph Wecker"]
|
12
|
+
s.date = %q{2011-08-11}
|
13
|
+
s.default_executable = %q{genmachine}
|
14
|
+
s.description = %q{Takes a state table where the following are defined: state, input+conditions, accumulate-action, pre-transition-actions, and transition-to. It takes that state table and generates very fast parsers. Similar to Ragel. Currently only outputs pure Ruby.}
|
15
|
+
s.email = %q{joseph.wecker@gmail.com}
|
16
|
+
s.executables = ["genmachine"]
|
17
|
+
s.extra_rdoc_files = [
|
18
|
+
"LICENSE.txt",
|
19
|
+
"README.rdoc"
|
20
|
+
]
|
21
|
+
s.files = [
|
22
|
+
".document",
|
23
|
+
"Gemfile",
|
24
|
+
"LICENSE.txt",
|
25
|
+
"README.rdoc",
|
26
|
+
"Rakefile",
|
27
|
+
"VERSION",
|
28
|
+
"bin/genmachine",
|
29
|
+
"genmachine.gemspec",
|
30
|
+
"lib/genmachine.rb",
|
31
|
+
"lib/genmachine/char_set.rb",
|
32
|
+
"lib/genmachine/generator.rb",
|
33
|
+
"lib/genmachine/generators/helpers/general.rb",
|
34
|
+
"lib/genmachine/generators/helpers/ruby.rb",
|
35
|
+
"lib/genmachine/generators/ruby.rb",
|
36
|
+
"lib/genmachine/generators/templates/ruby/lib.erb.rb",
|
37
|
+
"lib/genmachine/spec_parser.rb",
|
38
|
+
"test/helper.rb",
|
39
|
+
"test/test_genmachine.rb"
|
40
|
+
]
|
41
|
+
s.homepage = %q{http://github.com/josephwecker/genmachine}
|
42
|
+
s.licenses = ["MIT"]
|
43
|
+
s.require_paths = ["lib"]
|
44
|
+
s.rubygems_version = %q{1.6.2}
|
45
|
+
s.summary = %q{Generates parsers based on a fancy state-table}
|
46
|
+
|
47
|
+
if s.respond_to? :specification_version then
|
48
|
+
s.specification_version = 3
|
49
|
+
|
50
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
51
|
+
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
52
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
53
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.4"])
|
54
|
+
s.add_development_dependency(%q<rcov>, [">= 0"])
|
55
|
+
else
|
56
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
57
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
58
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
59
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
60
|
+
end
|
61
|
+
else
|
62
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
63
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
64
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.4"])
|
65
|
+
s.add_dependency(%q<rcov>, [">= 0"])
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module GenMachine
|
2
|
+
class CharSet
|
3
|
+
attr_accessor :kind
|
4
|
+
def initialize(kind = :include)
|
5
|
+
@kind = kind
|
6
|
+
@include_intervals = []
|
7
|
+
@include_any = false
|
8
|
+
end
|
9
|
+
|
10
|
+
def inspect() for_conditional.inspect end
|
11
|
+
def [](k) for_conditional[k] end
|
12
|
+
|
13
|
+
def for_conditional
|
14
|
+
if @include_any
|
15
|
+
return {:kind=>@kind, :ranges=>[:any]}
|
16
|
+
else
|
17
|
+
ivals = @include_intervals.map do |a,b|
|
18
|
+
a == b ? a : [a,b]
|
19
|
+
end
|
20
|
+
return {:kind=>@kind, :ranges=>ivals}
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def +(val) self.send(:<<,val) end
|
25
|
+
def <<(val)
|
26
|
+
if val.is_a?(String) && val =~ /([^-])-([^-])/
|
27
|
+
include_range($1,$2)
|
28
|
+
elsif val.is_a?(Range)
|
29
|
+
include_range(val.first, val.last)
|
30
|
+
elsif val == :any
|
31
|
+
@include_any = true
|
32
|
+
else include_char(val) end
|
33
|
+
end
|
34
|
+
|
35
|
+
def include_char(char) include_range(char,char) end
|
36
|
+
|
37
|
+
def include_range(from, to)
|
38
|
+
from = from.utf8_chars[0] if from.is_a?(String)
|
39
|
+
to = to.utf8_chars[0] if to.is_a?(String)
|
40
|
+
@include_intervals << [from,to].sort
|
41
|
+
if @include_intervals.length > 1
|
42
|
+
@include_intervals.sort!
|
43
|
+
merged = []
|
44
|
+
curr_a, curr_b = @include_intervals.shift
|
45
|
+
@include_intervals.each_with_index do |ab,i|
|
46
|
+
a,b = ab
|
47
|
+
if a <= (curr_b+1)
|
48
|
+
curr_b = [curr_b,b].max
|
49
|
+
else
|
50
|
+
merged << [curr_a, curr_b]
|
51
|
+
curr_a,curr_b = ab
|
52
|
+
end
|
53
|
+
end
|
54
|
+
merged << [curr_a, curr_b]
|
55
|
+
@include_intervals = merged
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module GenMachine
|
2
|
+
module Generators
|
3
|
+
module Generator
|
4
|
+
def initialize(opts={})
|
5
|
+
opts ||= {}
|
6
|
+
@spec_ast = opts.delete(:spec_ast)
|
7
|
+
@gen_executable = opts.delete(:executable) || false
|
8
|
+
@classname = opts.delete(:classname) || 'MiscParser'
|
9
|
+
@test_file = opts.delete(:test_file)
|
10
|
+
@output_dir = opts.delete(:output_dir) || './'
|
11
|
+
@class_fname = opts.delete(:class_fname)|| @classname.to_underscored
|
12
|
+
@exe_fname = opts.delete(:exe_fname) || @classname.sub(/parser$/i,'').to_underscored
|
13
|
+
raise ArgumentError, "Must include the table specification data (:spec_ast)" if @spec_ast.nil?
|
14
|
+
end
|
15
|
+
|
16
|
+
def generate_class
|
17
|
+
puts "Generate class (#{@classname}): #{@class_fname}"
|
18
|
+
#f = File.new(@class_fname, 'w+')
|
19
|
+
#f.write(@libraries[language].result(binding))
|
20
|
+
#f.close
|
21
|
+
end
|
22
|
+
|
23
|
+
def generate_executable
|
24
|
+
return unless @gen_executable
|
25
|
+
puts "Generate executable: #{@exe_fname}"
|
26
|
+
#f = File.new(@exe_fname, 'w+')
|
27
|
+
#f.write(@executables[language].result(binding))
|
28
|
+
#f.chmod(0755)
|
29
|
+
#f.close
|
30
|
+
end
|
31
|
+
|
32
|
+
def run_test
|
33
|
+
return if @test_file.nil?
|
34
|
+
puts "Run test"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
class String
|
2
|
+
def to_underscored
|
3
|
+
self.gsub(/::/, '/').
|
4
|
+
gsub(/([A-Z]+)([A-Z][a-z])/,'\1_\2').
|
5
|
+
gsub(/([a-z\d])([A-Z])/,'\1_\2').
|
6
|
+
gsub(/[^a-zA-Z\d_]/,'_').
|
7
|
+
downcase
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_utf8_char_array
|
11
|
+
self.unpack("U*")
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
module GenMachine
|
16
|
+
module Helpers
|
17
|
+
module General
|
18
|
+
def accumulates?(states)
|
19
|
+
states.each do |name,clauses|
|
20
|
+
clauses.each do |c|
|
21
|
+
c[:exprs].each do |e|
|
22
|
+
return true if e =~ /<<s($|[^a-zA-Z0-9_])/
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
return false
|
27
|
+
end
|
28
|
+
|
29
|
+
def accumulators(states)
|
30
|
+
accs = {}
|
31
|
+
states.each do |name,clauses|
|
32
|
+
clauses.each do |c|
|
33
|
+
exprs = c[:exprs].dup
|
34
|
+
exprs << c[:acc].dup
|
35
|
+
exprs.each do |e|
|
36
|
+
if e =~ /^([a-zA-Z_][a-zA-Z0-9_]*)?\s*<<\s*(<?)([a-zA-Z_][a-zA-Z0-9_]*)?$/
|
37
|
+
accs[$1]=true unless ($1.nil? or $1 == '' or $1 == 'p' or $1 == 's')
|
38
|
+
accs[$3]=true unless ($3.nil? or $3 == '' or $3 == 'p' or $3 == 's')
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
return accs.keys
|
44
|
+
end
|
45
|
+
|
46
|
+
def makes_calls?(states)
|
47
|
+
# TODO: implement
|
48
|
+
false
|
49
|
+
end
|
50
|
+
|
51
|
+
def eof_state?(states)
|
52
|
+
states.each{|name,clauses| return true if name=='{eof}'}
|
53
|
+
return false
|
54
|
+
end
|
55
|
+
|
56
|
+
def eof_clause?(clauses)
|
57
|
+
clauses.each{|c| return true if c[:cond].include?('eof')}
|
58
|
+
return false
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,121 @@
|
|
1
|
+
module GenMachine
|
2
|
+
module Helpers
|
3
|
+
module Ruby
|
4
|
+
def rb_conditional(clause,states,clauses)
|
5
|
+
has_eof_state = eof_state?(states) || eof_clause?(clauses)
|
6
|
+
out = ''
|
7
|
+
if clause[:cond].size > 0
|
8
|
+
out += '('+clause[:cond].join(' || ')+')'
|
9
|
+
out += "&& (#{rb_charset_cond(clause[:input],has_eof_state)})" unless clause[:input].nil?
|
10
|
+
else
|
11
|
+
out += rb_charset_cond(clause[:input],has_eof_state,',')
|
12
|
+
end
|
13
|
+
return rb_vars(out)
|
14
|
+
end
|
15
|
+
|
16
|
+
def rb_charset_cond(input,has_eof_state,sep='||')
|
17
|
+
return 'true' if input.nil?
|
18
|
+
outs = []
|
19
|
+
sep = '||' if input[:kind] == :exclude
|
20
|
+
input[:ranges].each do |range|
|
21
|
+
if input[:kind] == :include
|
22
|
+
if range.is_a? Array
|
23
|
+
outs << "nl?" if (range[0] <= 0x0a) && (range[1] >= 0x0a)
|
24
|
+
outs << "space?" if (range[0] <= 0x20) && (range[1] >= 0x20)
|
25
|
+
outs << "(c>#{range[0]-1}&&c<#{range[1]+1})"
|
26
|
+
else
|
27
|
+
outs << case range
|
28
|
+
when 0x0a; 'nl?'
|
29
|
+
when 0x20; 'space?'
|
30
|
+
when :any; has_eof_state ? 'true' : 'c!=:eof'
|
31
|
+
else "c==#{range}" end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
out = outs.join(sep)
|
36
|
+
out = '!('+out+')' if input[:kind] == :exclude
|
37
|
+
return out
|
38
|
+
end
|
39
|
+
|
40
|
+
def rb_commands(clause,currstate)
|
41
|
+
cmds = []
|
42
|
+
cmds += rb_simple_acc_commands(clause[:acc])
|
43
|
+
clause[:exprs].each do |expr|
|
44
|
+
if expr.include? '<<'
|
45
|
+
cmds += rb_acc_commands(expr)
|
46
|
+
else
|
47
|
+
cmds << rb_vars(expr.strip)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
cmds += rb_transition_commands(clause[:next],currstate)
|
51
|
+
return cmds.join('; ')
|
52
|
+
end
|
53
|
+
|
54
|
+
def rb_transition_commands(st,currstate)
|
55
|
+
st = st.strip.split(';').map{|s|s.strip}
|
56
|
+
out = []
|
57
|
+
add_next = false
|
58
|
+
st.each do |s|
|
59
|
+
case
|
60
|
+
when s =~ /^([^\(\[]+)(?:\[([^\]]*)\])?\(([^\)]*)\)$/ # Call another group
|
61
|
+
funname = $1
|
62
|
+
rename = $2
|
63
|
+
params = $3.split(',').map{|p|rb_vars(p)}
|
64
|
+
params << 's'
|
65
|
+
params << "'#{rename}'" unless (rename.nil? or rename.strip=='')
|
66
|
+
out << "state=#{funname}(#{params.join(',')})"
|
67
|
+
add_next = true
|
68
|
+
when s =~ /^<done>$/
|
69
|
+
out << "return(s)"
|
70
|
+
add_next = false
|
71
|
+
when s =~ /^<([^>]+)>$/
|
72
|
+
out << "return(#{rb_vars($1)})"
|
73
|
+
add_next = false
|
74
|
+
when s =~ /^(:[a-zA-Z0-9_:-]+)$/
|
75
|
+
out << "state='#{$1}'" unless currstate == $1
|
76
|
+
add_next = true
|
77
|
+
else
|
78
|
+
out << s
|
79
|
+
end
|
80
|
+
end
|
81
|
+
out << 'next' if add_next
|
82
|
+
return out
|
83
|
+
end
|
84
|
+
|
85
|
+
def rb_vars(str)
|
86
|
+
str.tr('$','@').gsub /(:[a-zA-Z0-9_:-]+)/, '\'\1\''
|
87
|
+
end
|
88
|
+
|
89
|
+
def rb_simple_acc_commands(acc_phrase)
|
90
|
+
case
|
91
|
+
when (acc_phrase.nil? or acc_phrase == ''); return ['@fwd=true']
|
92
|
+
when acc_phrase.strip == '<<'; return []
|
93
|
+
when acc_phrase.strip =~ /^([a-zA-Z_][a-zA-Z0-9_]*)\s*<<\s*$/
|
94
|
+
return ["#{rb_vars(acc_phrase.strip)}c"]
|
95
|
+
else raise("Can't figure out your accumulator statement: #{acc_phrase}")
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
99
|
+
def rb_acc_commands(acc_phrase)
|
100
|
+
case
|
101
|
+
when (acc_phrase.nil? or acc_phrase == ''); return ['@fwd=true']
|
102
|
+
when acc_phrase.strip == '<<'; return []
|
103
|
+
when acc_phrase.strip =~ /^([a-zA-Z_][a-zA-Z0-9_]*)\s*<<\s*(<?)([a-zA-Z_][a-zA-Z0-9_]*)$/
|
104
|
+
into = $1
|
105
|
+
value = $3
|
106
|
+
clear_it = $2 == '<'
|
107
|
+
into = rb_vars(into)
|
108
|
+
value = rb_vars(value)
|
109
|
+
if clear_it
|
110
|
+
out = ["(#{into}<<#{value} if #{value}.size>0)"]
|
111
|
+
out << "#{value}=''"
|
112
|
+
else
|
113
|
+
out = ["#{into}<<#{value}"]
|
114
|
+
end
|
115
|
+
return out
|
116
|
+
else raise("Can't figure out your accumulator statement: #{acc_phrase}")
|
117
|
+
end
|
118
|
+
end
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,175 @@
|
|
1
|
+
require 'strscan'
|
2
|
+
$KCODE="U"
|
3
|
+
|
4
|
+
module <%= classname %>
|
5
|
+
def self.parse(str) Parser.new(str).parse end
|
6
|
+
def self.parse_file(fname) Parser.new(IO.read(fname)).parse end
|
7
|
+
|
8
|
+
class Node
|
9
|
+
attr_accessor :name, :children, :start_line, :start_pos, :end_line, :end_pos
|
10
|
+
def initialize(name='node',line=:unknown,pos=:unknown)
|
11
|
+
@name = name
|
12
|
+
@children = []
|
13
|
+
@start_line = line
|
14
|
+
@start_pos = pos
|
15
|
+
@end_line = :unknown
|
16
|
+
@end_pos = :unknown
|
17
|
+
end
|
18
|
+
def <<(val) @children<<val end
|
19
|
+
end
|
20
|
+
|
21
|
+
class Parser < StringScanner
|
22
|
+
def init(source, opts={})
|
23
|
+
opts ||= {}
|
24
|
+
super ensure_encoding(source)
|
25
|
+
@global = {}
|
26
|
+
end
|
27
|
+
|
28
|
+
def ensure_encoding(source)
|
29
|
+
if defined?(::Encoding)
|
30
|
+
if source.encoding == ::Encoding::ASCII_8BIT
|
31
|
+
b = source[0, 4].bytes.to_a
|
32
|
+
source =
|
33
|
+
case
|
34
|
+
when b.size>=4 && b[0]==0 && b[1]==0 && b[2]==0
|
35
|
+
source.dup.force_encoding(::Encoding::UTF_32BE).encode!(::Encoding::UTF_8)
|
36
|
+
when b.size>=4 && b[0]==0 && b[2]==0
|
37
|
+
source.dup.force_encoding(::Encoding::UTF_16BE).encode!(::Encoding::UTF_8)
|
38
|
+
when b.size>=4 && b[1]==0 && b[2]==0 && b[3]==0
|
39
|
+
source.dup.force_encoding(::Encoding::UTF_32LE).encode!(::Encoding::UTF_8)
|
40
|
+
when b.size>=4 && b[1]==0 && b[3]==0
|
41
|
+
source.dup.force_encoding(::Encoding::UTF_16LE).encode!(::Encoding::UTF_8)
|
42
|
+
else source.dup end
|
43
|
+
else source = source.encode(::Encoding::UTF_8) end
|
44
|
+
source.force_encoding(::Encoding::ASCII_8BIT)
|
45
|
+
else
|
46
|
+
b = source
|
47
|
+
source =
|
48
|
+
case
|
49
|
+
when b.size >= 4 && b[0] == 0 && b[1] == 0 && b[2] == 0; JSON.iconv('utf-8', 'utf-32be', b)
|
50
|
+
when b.size >= 4 && b[0] == 0 && b[2] == 0; JSON.iconv('utf-8', 'utf-16be', b)
|
51
|
+
when b.size >= 4 && b[1] == 0 && b[2] == 0 && b[3] == 0; JSON.iconv('utf-8', 'utf-32le', b)
|
52
|
+
when b.size >= 4 && b[1] == 0 && b[3] == 0; JSON.iconv('utf-8', 'utf-16le', b)
|
53
|
+
else b end
|
54
|
+
end
|
55
|
+
return source
|
56
|
+
end
|
57
|
+
|
58
|
+
def parse
|
59
|
+
reset
|
60
|
+
@line = 1
|
61
|
+
@pos = 1
|
62
|
+
@leading = true
|
63
|
+
@indent = 0
|
64
|
+
@ast = <%= @table[0][0] %>
|
65
|
+
return @ast
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
def error(msg)
|
71
|
+
$stderr.puts "#{msg} | line: #{@line} | char: #{@pos}"
|
72
|
+
end
|
73
|
+
|
74
|
+
def global_state(c)
|
75
|
+
# Unicode newline characters & combinations
|
76
|
+
# Plus leading space for indents.
|
77
|
+
# Also tracks line and position for the AST
|
78
|
+
@last_is_newline = @last_is_space = false
|
79
|
+
case c
|
80
|
+
when 0x0b, 0x0c, 0x85, 0x2028, 0x2029
|
81
|
+
@last_is_newline = true; @line += 1; @pos = 1
|
82
|
+
@leading = true; @indent = 0
|
83
|
+
when 0x0a
|
84
|
+
nc = peek(1).unpack('U')[0]
|
85
|
+
if nc == 0x0d then getch; c = 0x0a0d end
|
86
|
+
@last_is_newline = true; @line += 1; @pos = 1
|
87
|
+
@leading = true; @indent = 0
|
88
|
+
when 0x0d
|
89
|
+
nc = peek(1).unpack('U')[0]
|
90
|
+
if nc == 0x0a then getch; c = 0x0d0a end
|
91
|
+
@last_is_newline = true; @line += 1; @pos = 1
|
92
|
+
@leading = true; @indent = 0
|
93
|
+
when 0x20
|
94
|
+
@indent += 1 if @leading
|
95
|
+
@last_is_space = true; @pos += 1
|
96
|
+
else @leading = false; @pos += 1 end
|
97
|
+
return @last_c = c
|
98
|
+
end
|
99
|
+
|
100
|
+
def nl?() return @last_is_newline end
|
101
|
+
def space?() return @last_is_space end
|
102
|
+
|
103
|
+
def nextchar
|
104
|
+
if @fwd then @fwd = false; return @last_c
|
105
|
+
else
|
106
|
+
c = getch
|
107
|
+
if c.nil?
|
108
|
+
c = :eof
|
109
|
+
@last_is_space = @last_is_newline = false
|
110
|
+
return @last_c = c
|
111
|
+
end
|
112
|
+
return global_state(c.unpack('U')[0])
|
113
|
+
end
|
114
|
+
end
|
115
|
+
|
116
|
+
def eof?() return @last_c == :eof end
|
117
|
+
|
118
|
+
<%- @table.each do |name, args, cmds, first_state, states| -%>
|
119
|
+
<%- args << "p=nil" -%>
|
120
|
+
<%- args << "name='#{name}'" -%>
|
121
|
+
def <%= name %>(<%= args.join(',') %>)
|
122
|
+
<%- cmds.each do |c| -%>
|
123
|
+
<%= rb_vars(c) %>
|
124
|
+
<%- end -%>
|
125
|
+
state='<%= first_state %>'
|
126
|
+
<%- if states.size > 1 or accumulates?(states) or makes_calls?(states) -%>
|
127
|
+
s = Node.new(name,@line,@pos)
|
128
|
+
<%- end -%>
|
129
|
+
<%- accumulators(states).each do |_acc| -%>
|
130
|
+
<%= _acc %> ||= ''
|
131
|
+
<%- end -%>
|
132
|
+
loop do
|
133
|
+
c = nextchar
|
134
|
+
<%- has_fallthru = false -%>
|
135
|
+
<%- if eof_state?(states) -%>
|
136
|
+
state = '{eof}' if c==:eof
|
137
|
+
<%- end -%>
|
138
|
+
case state
|
139
|
+
<%- states.each do |st_name, clauses| -%>
|
140
|
+
when '<%= st_name %>'
|
141
|
+
<%- if clauses.size > 1 -%>
|
142
|
+
case
|
143
|
+
<%- clauses.each_with_index do |clause,i| -%>
|
144
|
+
<%- cond = rb_conditional(clause,states,clauses) -%>
|
145
|
+
<%- if cond == 'true' -%>
|
146
|
+
else <%= rb_commands(clause,st_name) %>
|
147
|
+
<%- break -%>
|
148
|
+
<%- else -%>
|
149
|
+
when <%= cond %>; <%= rb_commands(clause,st_name) %>
|
150
|
+
<%- has_fallthru = true if i == clauses.size-1 -%>
|
151
|
+
<%- end -%>
|
152
|
+
<%- end -%>
|
153
|
+
end
|
154
|
+
<%- else -%>
|
155
|
+
<%- cond = rb_conditional(clauses[0],states,clauses) -%>
|
156
|
+
<%- if cond == 'true' -%>
|
157
|
+
<%= rb_commands(clauses[0],st_name) %>
|
158
|
+
<%- else -%>
|
159
|
+
if <%= cond %>
|
160
|
+
<%= rb_commands(clauses[0],st_name) %>
|
161
|
+
end
|
162
|
+
<%- end -%>
|
163
|
+
<%- end -%>
|
164
|
+
<%- end -%>
|
165
|
+
end
|
166
|
+
<%- if has_fallthru -%>
|
167
|
+
error("Unexpected #{c}")
|
168
|
+
@fwd = true
|
169
|
+
return
|
170
|
+
<%- end -%>
|
171
|
+
end
|
172
|
+
end
|
173
|
+
<%- end -%>
|
174
|
+
end
|
175
|
+
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
module GenMachine
|
2
|
+
# This is a quick and dirty parser used for bootstrapping. Which means
|
3
|
+
# it'll eventually be replaced when the real parser is written as a
|
4
|
+
# genmachine table.
|
5
|
+
class SpecParser
|
6
|
+
def initialize(files)
|
7
|
+
@table = []
|
8
|
+
@files = files
|
9
|
+
@libraries = {}
|
10
|
+
@executables = {}
|
11
|
+
template_base = File.expand_path(File.dirname(__FILE__))+'/templates/'
|
12
|
+
LANGUAGES.each do |lang|
|
13
|
+
tbase = template_base + lang.to_s + '/'
|
14
|
+
@libraries[lang] = ERB.new(IO.read(tbase+'library.erb.rb'),nil,'-')
|
15
|
+
@executables[lang] = ERB.new(IO.read(tbase+'executable.erb'),nil,'-')
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def build
|
20
|
+
c_name = c_args = c_cmds = c_first_state = c_states = nil
|
21
|
+
new_fun = false
|
22
|
+
@files.each do |fname|
|
23
|
+
File.new(fname,'r').each_with_index do |line, line_no|
|
24
|
+
line = line.strip
|
25
|
+
det = line[0..0]
|
26
|
+
if det == '|' or det == ':'
|
27
|
+
re = (det=='|' ? '\|' : det) + '(?: |$)'
|
28
|
+
cols = line.split(/#{re}/,-1)[1..-1].map{|c| c.strip}
|
29
|
+
if det=='|' && cols[0].include?('(')
|
30
|
+
new_fun = true
|
31
|
+
unless c_name.nil?
|
32
|
+
@table << [c_name, c_args, c_cmds, c_first_state, process_states(c_states)]
|
33
|
+
end
|
34
|
+
parts = cols[0].split('(')
|
35
|
+
c_name = parts.shift.underscore
|
36
|
+
c_args = parts.join('(').sub(/\)$/,'').split(',')
|
37
|
+
c_states = []
|
38
|
+
c_cmds = (cols[3]||'').split(';')
|
39
|
+
c_first_state = cols[4]
|
40
|
+
# TODO: error if cols[1] or cols[2] have anything
|
41
|
+
elsif det == ':' && new_fun
|
42
|
+
c_args += cols[0].sub(/\)$/,'').split(',')
|
43
|
+
c_cmds += (cols[3]||'').split(';')
|
44
|
+
c_first_state += (cols[4]||'')
|
45
|
+
elsif det == '|'
|
46
|
+
new_fun = false
|
47
|
+
conditionals, inputs = parse_input(cols[1])
|
48
|
+
c_states << {:name => cols[0],
|
49
|
+
:input => inputs,
|
50
|
+
:cond => conditionals,
|
51
|
+
:acc => cols[2],
|
52
|
+
:exprs => (cols[3]||'').split(';'),
|
53
|
+
:next => cols[4]}
|
54
|
+
elsif det == ':' && (c_states.size > 0)
|
55
|
+
conditionals, inputs = parse_input(cols[1],c_states[-1][:input])
|
56
|
+
c_states[-1][:name] += (cols[0]||'')
|
57
|
+
c_states[-1][:input] = inputs
|
58
|
+
c_states[-1][:cond] += conditionals
|
59
|
+
c_states[-1][:acc] += cols[2]
|
60
|
+
c_states[-1][:exprs]+= (cols[3]||'').split(';')
|
61
|
+
c_states[-1][:next] += cols[4]
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
unless c_name.nil?
|
66
|
+
@table << [c_name, c_args, c_cmds, c_first_state, process_states(c_states)]
|
67
|
+
end
|
68
|
+
end
|
69
|
+
pp @table
|
70
|
+
end
|
71
|
+
|
72
|
+
# consolidate same-name states and (eventually) combine / optimize where
|
73
|
+
# appropriate.
|
74
|
+
def process_states(instates)
|
75
|
+
outstates = {}
|
76
|
+
instates.each do |inst|
|
77
|
+
name = inst.delete(:name)
|
78
|
+
outstates[name] ||= []
|
79
|
+
outstates[name] << inst
|
80
|
+
end
|
81
|
+
return outstates
|
82
|
+
end
|
83
|
+
|
84
|
+
def parse_input(val,inputs=nil)
|
85
|
+
iters = 0
|
86
|
+
conds = []
|
87
|
+
while val.strip.length > 0 && iters < 100
|
88
|
+
case
|
89
|
+
when val =~ /--+/um
|
90
|
+
val.sub!($&,'')
|
91
|
+
when val =~ /\s*\{([^\}]+)\}\s*/um
|
92
|
+
conds << $1
|
93
|
+
val.sub!($&, '')
|
94
|
+
when val =~ /\s*\[\^([^\]]+)\]\s*/um
|
95
|
+
inputs ||= CharSet.new(:exclude)
|
96
|
+
parse_combine_ranges($1, inputs)
|
97
|
+
val.sub!($&, '')
|
98
|
+
when val =~ /\s*\[([^\]]+)\]\s*/um
|
99
|
+
inputs ||= CharSet.new(:include)
|
100
|
+
parse_combine_ranges($1, inputs)
|
101
|
+
val.sub!($&, '')
|
102
|
+
when val =~ /^\s*\./um
|
103
|
+
inputs ||= CharSet.new(:include)
|
104
|
+
inputs << :any
|
105
|
+
val.sub!($&, '')
|
106
|
+
end
|
107
|
+
iters += 1
|
108
|
+
end
|
109
|
+
return conds, inputs
|
110
|
+
end
|
111
|
+
|
112
|
+
ESCAPES = {'\t' => "\t", '\n' => "\n",
|
113
|
+
'\r' => "\r", '\f' => "\f",
|
114
|
+
'\b' => "\b", '\a' => "\a",
|
115
|
+
'\e' => "\e", '\s' => " ",
|
116
|
+
'\[' => '[', '\]' => ']'}
|
117
|
+
def parse_combine_ranges(raw, input)
|
118
|
+
raw.gsub!(/\\[tnrfbaes\[\]]/){|m| ESCAPES[m]}
|
119
|
+
if raw =~ /((?:.-.)*)((?:.)*)/um
|
120
|
+
ranges = $1
|
121
|
+
singles = $2
|
122
|
+
if ranges.length > 0
|
123
|
+
_, range, ranges = ranges.partition /.-./um
|
124
|
+
input << range
|
125
|
+
end while ranges.length > 0
|
126
|
+
singles.scan(/./um).each{|s| input << s}
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
data/lib/genmachine.rb
CHANGED
@@ -1,8 +1,22 @@
|
|
1
|
+
require 'genmachine/spec_parser'
|
2
|
+
require 'genmachine/generator'
|
3
|
+
require 'genmachine/generators/helpers/general'
|
4
|
+
|
5
|
+
Dir[File.join(File.dirname(__FILE__),'genmachine','generators','*.rb')].each do |fname|
|
6
|
+
name = File.basename(fname)
|
7
|
+
require "genmachine/generators/#{name}"
|
8
|
+
end
|
9
|
+
|
1
10
|
module GenMachine
|
2
|
-
class
|
3
|
-
def
|
4
|
-
|
5
|
-
|
11
|
+
class << self
|
12
|
+
def generators
|
13
|
+
Generators.constants.reduce({}) do |langs,const|
|
14
|
+
klass = Generators.const_get(const)
|
15
|
+
if klass.const_defined?('GENMACHINE_TARGET')
|
16
|
+
langs[klass::GENMACHINE_TARGET] = klass
|
17
|
+
end
|
18
|
+
langs
|
19
|
+
end
|
6
20
|
end
|
7
21
|
end
|
8
22
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: genmachine
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 29
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Joseph Wecker
|
@@ -15,8 +15,8 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-08-
|
19
|
-
default_executable:
|
18
|
+
date: 2011-08-11 00:00:00 -07:00
|
19
|
+
default_executable: genmachine
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
22
22
|
requirement: &id001 !ruby/object:Gem::Requirement
|
@@ -80,8 +80,8 @@ dependencies:
|
|
80
80
|
type: :development
|
81
81
|
description: "Takes a state table where the following are defined: state, input+conditions, accumulate-action, pre-transition-actions, and transition-to. It takes that state table and generates very fast parsers. Similar to Ragel. Currently only outputs pure Ruby."
|
82
82
|
email: joseph.wecker@gmail.com
|
83
|
-
executables:
|
84
|
-
|
83
|
+
executables:
|
84
|
+
- genmachine
|
85
85
|
extensions: []
|
86
86
|
|
87
87
|
extra_rdoc_files:
|
@@ -94,7 +94,16 @@ files:
|
|
94
94
|
- README.rdoc
|
95
95
|
- Rakefile
|
96
96
|
- VERSION
|
97
|
+
- bin/genmachine
|
98
|
+
- genmachine.gemspec
|
97
99
|
- lib/genmachine.rb
|
100
|
+
- lib/genmachine/char_set.rb
|
101
|
+
- lib/genmachine/generator.rb
|
102
|
+
- lib/genmachine/generators/helpers/general.rb
|
103
|
+
- lib/genmachine/generators/helpers/ruby.rb
|
104
|
+
- lib/genmachine/generators/ruby.rb
|
105
|
+
- lib/genmachine/generators/templates/ruby/lib.erb.rb
|
106
|
+
- lib/genmachine/spec_parser.rb
|
98
107
|
- test/helper.rb
|
99
108
|
- test/test_genmachine.rb
|
100
109
|
has_rdoc: true
|