aurum 0.1.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +29 -0
- data/examples/dangling_else/grammar.rb +23 -0
- data/examples/expression/grammar.rb +28 -0
- data/examples/smalltalk/grammar.rb +151 -0
- data/examples/smalltalk/interpreter.rb +70 -0
- data/examples/yacc/grammar.rb +72 -0
- data/lib/aurum.rb +1 -9
- data/lib/aurum/engine.rb +39 -175
- data/lib/aurum/engine/parsing_facility.rb +107 -0
- data/lib/aurum/engine/tokenization_facility.rb +86 -0
- data/lib/aurum/grammar.rb +52 -219
- data/lib/aurum/grammar/automata.rb +194 -0
- data/lib/aurum/grammar/builder/augmented_grammar.rb +83 -0
- data/lib/aurum/grammar/builder/dot_logger.rb +66 -0
- data/lib/aurum/grammar/builder/lexical_table_builder.rb +55 -0
- data/lib/aurum/grammar/builder/parsing_table_builder.rb +238 -0
- data/lib/aurum/grammar/builder/set_of_items.rb +190 -0
- data/lib/aurum/grammar/compiled_tables.rb +20 -0
- data/lib/aurum/grammar/dsl/lexical_definition.rb +94 -0
- data/lib/aurum/grammar/dsl/syntax_definition.rb +79 -0
- data/lib/aurum/grammar/lexical_rules.rb +224 -0
- data/lib/aurum/grammar/metalang/grammar.rb +47 -0
- data/lib/aurum/grammar/syntax_rules.rb +95 -0
- data/spec/builder/dsl_definition/aurum_grammar_spec.rb +33 -0
- data/spec/engine/lexer_spec.rb +59 -0
- data/spec/engine/parser_spec.rb +90 -0
- data/spec/examples/dangling_else_example.rb +30 -0
- data/spec/examples/expression_example.rb +48 -0
- data/spec/examples/smalltalk_example.rb +50 -0
- data/spec/examples/yacc_spec.rb +30 -0
- data/spec/grammar/builder/lexical_table/automata_spec.rb +55 -0
- data/spec/grammar/builder/lexical_table/builder_spec.rb +78 -0
- data/spec/grammar/builder/lexical_table/character_set_spec.rb +100 -0
- data/spec/grammar/builder/lexical_table/pattern_spec.rb +11 -0
- data/spec/grammar/builder/lexical_table/regular_expression.rb +40 -0
- data/spec/grammar/builder/parsing_table/augmented_grammar_spec.rb +36 -0
- data/spec/grammar/builder/parsing_table/builder_spec.rb +152 -0
- data/spec/grammar/builder/parsing_table/digraph_traverser_spec.rb +42 -0
- data/spec/grammar/builder/parsing_table/item_spec.rb +51 -0
- data/spec/grammar/builder/parsing_table/sources_spec.rb +66 -0
- data/spec/grammar/builder/parsing_table/state_spec.rb +82 -0
- data/spec/grammar/dsl/character_classes_builder_spec.rb +50 -0
- data/spec/grammar/dsl/lexical_rules_builder_spec.rb +181 -0
- data/spec/grammar/dsl/precedence_builder_spec.rb +64 -0
- data/spec/grammar/dsl/productions_builder_spec.rb +78 -0
- data/spec/grammar/metalang/metalang_spec.rb +0 -0
- data/spec/grammar/precedence_spec.rb +42 -0
- data/spec/grammar/syntax_rules_spec.rb +31 -0
- data/spec/parser_matcher.rb +69 -0
- data/spec/pattern_matcher.rb +123 -0
- data/spec/spec_helper.rb +133 -0
- metadata +70 -36
- data/example/expression/expression.rb +0 -35
- data/example/expression/lisp.rb +0 -26
- data/lib/aurum/lexical_table_generator.rb +0 -429
- data/lib/aurum/parsing_table_generator.rb +0 -464
- data/test/engine/lexer_test.rb +0 -59
- data/test/engine/semantic_attributes_test.rb +0 -15
- data/test/grammar_definition/character_class_definition_test.rb +0 -28
- data/test/grammar_definition/grammar_definition_test.rb +0 -55
- data/test/grammar_definition/lexical_definition_test.rb +0 -56
- data/test/grammar_definition/operator_precedence_definition_test.rb +0 -35
- data/test/grammar_definition/production_definition_test.rb +0 -60
- data/test/lexical_table_generator/automata_test.rb +0 -74
- data/test/lexical_table_generator/character_set_test.rb +0 -73
- data/test/lexical_table_generator/interval_test.rb +0 -36
- data/test/lexical_table_generator/pattern_test.rb +0 -115
- data/test/lexical_table_generator/subset_determinizer_test.rb +0 -19
- data/test/lexical_table_generator/table_generator_test.rb +0 -126
- data/test/parsing_table_generator/augmented_grammar_test.rb +0 -45
- data/test/parsing_table_generator/lalr_n_computation_test.rb +0 -92
- data/test/parsing_table_generator/lr_0_automata_test.rb +0 -94
- data/test/parsing_table_generator/lr_item_test.rb +0 -27
- data/test/parsing_table_generator/parsing_table_state_test.rb +0 -39
- data/test/parsing_table_generator/precedence_table_test.rb +0 -28
- data/test/parsing_table_generator/production_test.rb +0 -9
- data/test/test_helper.rb +0 -103
data/Rakefile
ADDED
@@ -0,0 +1,29 @@
|
|
1
|
+
require 'rake'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/gempackagetask'
|
4
|
+
require 'spec/rake/spectask'
|
5
|
+
|
6
|
+
spec = Gem::Specification.new do |s|
|
7
|
+
s.name = 'aurum'
|
8
|
+
s.version = '0.2.0'
|
9
|
+
s.author = 'Vincent Xu'
|
10
|
+
s.email = 'x at bjug dot org'
|
11
|
+
s.homepage = 'http://rubyforge.org/projects/aurum'
|
12
|
+
s.platform = Gem::Platform::RUBY
|
13
|
+
s.summary = 'Aurum is a LALR(n) parser generator written in Ruby.'
|
14
|
+
s.files = FileList['{examples,lib,spec}/**/*', 'Rakefile'].to_a
|
15
|
+
s.require_path = 'lib'
|
16
|
+
s.autorequire = 'aurum'
|
17
|
+
s.has_rdoc = false
|
18
|
+
end
|
19
|
+
|
20
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
21
|
+
pkg.need_tar = true
|
22
|
+
end
|
23
|
+
|
24
|
+
Spec::Rake::SpecTask.new('spec') do |t|
|
25
|
+
t.spec_files = FileList['spec/**/*_spec.rb', 'spec/**/*_example.rb']
|
26
|
+
t.spec_opts = ['--format', 'html:result.html', '--format', 'specdoc']
|
27
|
+
end
|
28
|
+
|
29
|
+
task :default => [:spec]
|
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'aurum'
|
2
|
+
module Aurum
|
3
|
+
module Examples
|
4
|
+
class DanglingElse < Aurum::Grammar
|
5
|
+
tokens do
|
6
|
+
ignore enum(" \r\n").one_or_more
|
7
|
+
end
|
8
|
+
|
9
|
+
productions do
|
10
|
+
statement 'if', '(', expression, ')', restricted_statement,
|
11
|
+
'else', statement {statement.s_exp = [:if, expression.s_exp, restricted_statement.s_exp, statement1.s_exp]}
|
12
|
+
statement 'if', '(', expression, ')',
|
13
|
+
statement {statement.s_exp = [:if, expression.s_exp, statement1.s_exp, nil]}
|
14
|
+
statement simple_statement
|
15
|
+
restricted_statement 'if', '(', expression, ')', restricted_statement,
|
16
|
+
'else', restricted_statement {statement.s_exp = [:if, expression.s_exp, restricted_statement1.s_exp, restricted_statement2.s_exp]}
|
17
|
+
restricted_statement simple_statement
|
18
|
+
simple_statement 'other' do simple_statement.s_exp = [:other] end
|
19
|
+
expression 'expr' do expression.s_exp = [:expr] end
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'aurum'
|
2
|
+
module Aurum
|
3
|
+
module Examples
|
4
|
+
class ExpressionGrammar < Aurum::Grammar
|
5
|
+
tokens do
|
6
|
+
ignore string(' ').one_or_more
|
7
|
+
_number range(?0, ?9).one_or_more
|
8
|
+
end
|
9
|
+
|
10
|
+
precedences do
|
11
|
+
left '*', '/'
|
12
|
+
left '+', '-'
|
13
|
+
end
|
14
|
+
|
15
|
+
productions do
|
16
|
+
expression expression, '+', expression {expression.value = expression1.value + expression2.value}
|
17
|
+
expression expression, '-', expression {expression.value = expression1.value - expression2.value}
|
18
|
+
expression expression, '*', expression {expression.value = expression1.value * expression2.value}
|
19
|
+
expression expression, '/', expression {expression.value = expression1.value / expression2.value}
|
20
|
+
expression '(', expression, ')' do expression.value = expression1.value end
|
21
|
+
expression _number {expression.value = _number.value.to_i}
|
22
|
+
expression '+', _number {expression.value = _number.value.to_i}
|
23
|
+
expression '-', _number {expression.value = -_number.value.to_i}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
@@ -0,0 +1,151 @@
|
|
1
|
+
require 'aurum'
|
2
|
+
=begin
|
3
|
+
module Smalltalk
|
4
|
+
class UnaryMessageSending < Aurum::Grammar
|
5
|
+
productions do
|
6
|
+
unary_expression higher_precedence, unary_messages do
|
7
|
+
unary_expression.s_exp = [:unary, higher_precedence.s_exp, unary_messages.s_exp]
|
8
|
+
end
|
9
|
+
unary_messages unary_messages, _message do
|
10
|
+
unary_messages.s_exp = unary_messages1.s_exp + [_message.value]
|
11
|
+
end
|
12
|
+
unary_messages _message do
|
13
|
+
unary_messages.s_exp = [_message.value]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
class BinaryMessageSending < Aurum::Grammar
|
19
|
+
productions do
|
20
|
+
binary_expression higher_precedence, binary_messages do
|
21
|
+
binary_expression.s_exp = [:binary, higher_precedence.s_exp, binary_messages.s_exp]
|
22
|
+
end
|
23
|
+
binary_messages binary_messages, binary_message do
|
24
|
+
binary_messages.s_exp << binary_message.s_exp
|
25
|
+
end
|
26
|
+
binary_messages binary_message
|
27
|
+
binary_message _binary_message, higher_precedence do
|
28
|
+
binary_message.s_exp = [[_binary_message.value, higher_precedence.s_exp]]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
class KeywordMessageSending < Aurum::Grammar
|
34
|
+
productions do
|
35
|
+
keyword_expression higher_precedence, keyword_messages do
|
36
|
+
keyword_expression.s_exp = [:keyword, higher_precedence.s_exp, keyword_messages.s_exp]
|
37
|
+
end
|
38
|
+
keyword_messages keyword_messages, keyword_message do
|
39
|
+
keyword_messages.s_exp = keyword_messages1.s_exp + keyword_message.s_exp
|
40
|
+
end
|
41
|
+
keyword_messages keyword_message
|
42
|
+
keyword_message _keyword, higher_precedence do
|
43
|
+
keyword_message.s_exp = [[_keyword.value, higher_precedence.s_exp]]
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class Block < Aurum::Grammar
|
49
|
+
productions do
|
50
|
+
block '[', block_parameters, '|', sentences, ']' do
|
51
|
+
block.s_exp = [:block, block_parameters.s_exp, sentences.s_exp]
|
52
|
+
end
|
53
|
+
block '[', sentences, ']' do
|
54
|
+
block.s_exp = [:block, [:params], sentences.s_exp]
|
55
|
+
end
|
56
|
+
block_parameters block_parameters, ':', _identifier do
|
57
|
+
block_parameters.s_exp = block_parameters1.s_exp + _identifier.value
|
58
|
+
end
|
59
|
+
block_parameters ':', _identifier do
|
60
|
+
block_parameters.s_exp = [:params, _identifier.value]
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class Grammar < Aurum::Grammar
|
66
|
+
character_classes do
|
67
|
+
letter range(?a, ?z) + range(?A, ?Z)
|
68
|
+
identifier_tail letter + range(?0, ?9) + enum('_')
|
69
|
+
end
|
70
|
+
|
71
|
+
tokens do
|
72
|
+
comment double_quote, ~double_quote
|
73
|
+
whitespaces enum(" \r\n").one_or_more
|
74
|
+
|
75
|
+
identifier letter, identifier_tail.zero_or_more
|
76
|
+
|
77
|
+
_identifier identifier
|
78
|
+
_binary_selector enum('!%&*+/<=>?@~,\\').one_or_more
|
79
|
+
_keyword identifier, ':'
|
80
|
+
ignore comment
|
81
|
+
ignore whitespaces
|
82
|
+
end
|
83
|
+
|
84
|
+
include_grammar UnaryMessageSending, :syntax => {:higher_precedence => :primary, :_message => :_identifier}
|
85
|
+
include_grammar BinaryMessageSending, :syntax => {:higher_precedence => :higher_precedence_binary, :_binary_message => :_binary_selector}
|
86
|
+
include_grammar KeywordMessageSending,:syntax => {:higher_precedence => :higher_precedence_keyword}
|
87
|
+
include_grammar Block
|
88
|
+
|
89
|
+
productions do
|
90
|
+
program variables, sentences do
|
91
|
+
program.s_exp = [:program, variables.s_exp, sentences.s_exp]
|
92
|
+
end
|
93
|
+
|
94
|
+
variables '|', identifiers, '|' do
|
95
|
+
variables.s_exp = [] + identifiers.s_exp
|
96
|
+
end
|
97
|
+
variables _ do
|
98
|
+
variables.s_exp = []
|
99
|
+
end
|
100
|
+
|
101
|
+
identifiers identifiers, _identifier do
|
102
|
+
identifiers.s_exp = identifiers.s_exp + _identifier.value
|
103
|
+
end
|
104
|
+
identifiers _ do
|
105
|
+
identifiers.s_exp = []
|
106
|
+
end
|
107
|
+
|
108
|
+
sentences sentences, expression, '.' do
|
109
|
+
sentences.s_exp = sentences1.s_exp + [expression.s_exp]
|
110
|
+
end
|
111
|
+
sentences _ do
|
112
|
+
sentences.s_exp = []
|
113
|
+
end
|
114
|
+
sentence expression, '.' do
|
115
|
+
sentence.s_exp = expression.s_exp
|
116
|
+
end
|
117
|
+
|
118
|
+
expression _identifier, ':=', message_expression do
|
119
|
+
expression.s_exp = [:assign, _identifier.value, message_expression.s_exp]
|
120
|
+
end
|
121
|
+
expression _identifier, ':=', primary do
|
122
|
+
expression.s_exp = [:assign, _identifier.value, primary.s_exp]
|
123
|
+
end
|
124
|
+
expression message_expression
|
125
|
+
expression primary
|
126
|
+
|
127
|
+
primary _identifier do
|
128
|
+
primary.s_exp = [:var, _identifier.value]
|
129
|
+
end
|
130
|
+
primary block
|
131
|
+
primary '(', message_expression, ')' do
|
132
|
+
primary.s_exp = message_expression.s_exp
|
133
|
+
end
|
134
|
+
primary '(', primary, ')' do
|
135
|
+
primary.s_exp = primary1.s_exp
|
136
|
+
end
|
137
|
+
|
138
|
+
message_expression unary_expression
|
139
|
+
message_expression binary_expression
|
140
|
+
message_expression keyword_expression
|
141
|
+
|
142
|
+
higher_precedence_binary primary
|
143
|
+
higher_precedence_binary unary_expression
|
144
|
+
|
145
|
+
higher_precedence_keyword primary
|
146
|
+
higher_precedence_keyword unary_expression
|
147
|
+
higher_precedence_keyword binary_expression
|
148
|
+
end
|
149
|
+
end
|
150
|
+
end
|
151
|
+
=end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), 'grammar')
|
2
|
+
require 'stringio'
|
3
|
+
=begin
|
4
|
+
class Smalltalk::Interpreter
|
5
|
+
@@parsing_table = Smalltalk::Grammar.parsing_table :program
|
6
|
+
@@lexical_table = Smalltalk::Grammar.lexical_table
|
7
|
+
|
8
|
+
def initialize context = {}
|
9
|
+
@parser = Aurum::Engine::Parser.new(@@parsing_table)
|
10
|
+
@context = context
|
11
|
+
end
|
12
|
+
|
13
|
+
def run source
|
14
|
+
@lexer = Aurum::Engine::Lexer.new(@@lexical_table, source)
|
15
|
+
eval_s_exp(@parser.parse(@lexer).s_exp)
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
def program variables, sentences
|
20
|
+
variables.each {|variable| @context[variable] = nil}
|
21
|
+
sentences.each {|sentence| eval_s_exp(sentence)}
|
22
|
+
end
|
23
|
+
|
24
|
+
def var name
|
25
|
+
@context[name]
|
26
|
+
end
|
27
|
+
|
28
|
+
def unary object_s_exp, unary_messages
|
29
|
+
unary_messages.inject((eval_s_exp(object_s_exp))) {|obj, message| obj = obj.send(message)}
|
30
|
+
end
|
31
|
+
|
32
|
+
def binary object_s_exp, binary_messages
|
33
|
+
binary_messages.inject((eval_s_exp(object_s_exp))) {|obj, message| obj = obj.send(message.first, eval_s_exp(message.last))}
|
34
|
+
end
|
35
|
+
|
36
|
+
def keyword object_s_exp, keyword_messages
|
37
|
+
obj = eval_s_exp(object_s_exp)
|
38
|
+
messages, values = '', []
|
39
|
+
for keyword_message in keyword_messages
|
40
|
+
messages << keyword_message.first
|
41
|
+
values << eval_s_exp(keyword_message.last)
|
42
|
+
end
|
43
|
+
obj.send(messages.gsub(':', '_'), values)
|
44
|
+
end
|
45
|
+
|
46
|
+
def eval_s_exp s_exp
|
47
|
+
self.send(s_exp.first, *s_exp[1..-1])
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Transcript
|
52
|
+
def self.show_ message
|
53
|
+
puts message
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.hello
|
57
|
+
show_ 'Hello'
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
interpreter = Smalltalk::Interpreter.new 'Transcript' => Transcript,
|
62
|
+
'helloMessage' => 'Hello World'
|
63
|
+
|
64
|
+
interpreter.run(<<-EOF)
|
65
|
+
"keyword message sending"
|
66
|
+
Transcript show: helloMessage.
|
67
|
+
"unary message sending"
|
68
|
+
Transcript hello.
|
69
|
+
EOF
|
70
|
+
=end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
require 'aurum'
|
2
|
+
module Aurum
|
3
|
+
module Examples
|
4
|
+
class YaccGrammar < Aurum::Grammar
|
5
|
+
character_classes do
|
6
|
+
letter range(?a, ?z) + range(?A, ?Z)
|
7
|
+
id_tail range(?a, ?z) + range(?A, ?Z) + range(?0, ?9) + string('_')
|
8
|
+
comment_char any - enum('*')
|
9
|
+
end
|
10
|
+
|
11
|
+
tokens do
|
12
|
+
multi_lines_comment '/*', ~ string('*/')
|
13
|
+
single_line_comment '//', ~ (string("\r") | string("\n"))
|
14
|
+
|
15
|
+
_identifier letter, id_tail.zero_or_more
|
16
|
+
_literal "'", ~ string("'")
|
17
|
+
_declaration_code '%{', ~ string('%}')
|
18
|
+
_union_name '<', ~ string('>')
|
19
|
+
_source_code '{', ~ string('}')
|
20
|
+
|
21
|
+
|
22
|
+
ignore enum(" \r\n\t").one_or_more
|
23
|
+
ignore multi_lines_comment
|
24
|
+
ignore single_line_comment
|
25
|
+
end
|
26
|
+
|
27
|
+
productions do
|
28
|
+
grammar tokens, '%%', rules, end_marker
|
29
|
+
|
30
|
+
tokens tokens, token
|
31
|
+
tokens _
|
32
|
+
|
33
|
+
token '%start', _identifier
|
34
|
+
token '%union', _source_code
|
35
|
+
token '%token', union_name, symbols
|
36
|
+
token '%left', union_name, symbols
|
37
|
+
token '%right', union_name, symbols
|
38
|
+
token '%nonassoc', union_name, symbols
|
39
|
+
token '%type', union_name, symbols
|
40
|
+
token _declaration_code
|
41
|
+
|
42
|
+
symbols symbols, symbol
|
43
|
+
symbols symbol
|
44
|
+
|
45
|
+
symbol _identifier
|
46
|
+
symbol _literal
|
47
|
+
|
48
|
+
end_marker '%%'
|
49
|
+
end_marker _
|
50
|
+
|
51
|
+
union_name _union_name
|
52
|
+
union_name _
|
53
|
+
|
54
|
+
rules rules, rule
|
55
|
+
rules _
|
56
|
+
|
57
|
+
rule _identifier, ':', rule_body, ';'
|
58
|
+
rule_body rule_body, '|', rule_handle
|
59
|
+
rule_body rule_handle
|
60
|
+
|
61
|
+
rule_handle symbols, prec, source_code
|
62
|
+
rule_handle source_code
|
63
|
+
|
64
|
+
prec '%prec', symbol
|
65
|
+
prec _
|
66
|
+
|
67
|
+
source_code _source_code
|
68
|
+
source_code _
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
data/lib/aurum.rb
CHANGED
@@ -1,10 +1,2 @@
|
|
1
|
-
require 'aurum/lexical_table_generator'
|
2
|
-
require 'aurum/parsing_table_generator'
|
3
|
-
require 'aurum/engine'
|
4
1
|
require 'aurum/grammar'
|
5
|
-
|
6
|
-
Enumerable.class_eval do
|
7
|
-
def grep_each condition
|
8
|
-
condition.kind_of?(Proc) ? each {|x| yield x if condition.call x} : each {|x| yield x if eval(condition.to_s)}
|
9
|
-
end
|
10
|
-
end
|
2
|
+
require 'aurum/engine'
|
data/lib/aurum/engine.rb
CHANGED
@@ -1,175 +1,39 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
until @recognized
|
41
|
-
next_state, char = @start_state, nil
|
42
|
-
while next_state
|
43
|
-
lexeme << char if char
|
44
|
-
state, char = next_state, @input.get_char
|
45
|
-
next_state = goto state, char
|
46
|
-
end
|
47
|
-
@input.pushback char
|
48
|
-
return Unknown unless actions = @accepts[state]
|
49
|
-
if actions.first == IgnoreAction
|
50
|
-
lexeme = ''
|
51
|
-
else
|
52
|
-
actions.first.execute self, lexeme
|
53
|
-
end
|
54
|
-
end
|
55
|
-
@recognized.value = lexeme unless @recognized.value
|
56
|
-
@recognized
|
57
|
-
end
|
58
|
-
|
59
|
-
def pushback symbol
|
60
|
-
@pushback_symbol = symbol
|
61
|
-
end
|
62
|
-
|
63
|
-
def goto state, input
|
64
|
-
return nil unless input
|
65
|
-
next_state = @table[state].find {|tran| tran.symbols.include?(input)}
|
66
|
-
next_state ? next_state.destination : nil
|
67
|
-
end
|
68
|
-
|
69
|
-
def shift_to state
|
70
|
-
@start_state = goto 0, -@lexical_states.index(state)-1
|
71
|
-
end
|
72
|
-
|
73
|
-
def recognize token
|
74
|
-
@recognized = Aurum::Symbol.new token, true
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
RecognizeTokenAction.class_eval do
|
79
|
-
def execute lexer, lexeme
|
80
|
-
recognized = lexer.recognize token
|
81
|
-
action.call recognized, lexeme if action
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
ChangeStateAction.class_eval do
|
86
|
-
def execute lexer, lexeme
|
87
|
-
lexer.shift_to state
|
88
|
-
end
|
89
|
-
end
|
90
|
-
|
91
|
-
RecognizeTokenAndChangeStateAction.class_eval do
|
92
|
-
def execute lexer, lexeme
|
93
|
-
lexer.recognize token
|
94
|
-
lexer.shift_to state
|
95
|
-
end
|
96
|
-
end
|
97
|
-
|
98
|
-
class Parser
|
99
|
-
def initialize productions, parsing_table
|
100
|
-
@productions, @parsing_table = productions, parsing_table
|
101
|
-
end
|
102
|
-
|
103
|
-
def parse lexer
|
104
|
-
lookahead, state_stack, symbol_stack, value_stack = lexer.next_symbol, [0], [], []
|
105
|
-
lookahead_shift = 0
|
106
|
-
while (true)
|
107
|
-
state = @parsing_table[state_stack.last]
|
108
|
-
action = state[lookahead]
|
109
|
-
if action.kind_of? ShiftAction
|
110
|
-
state_stack.push action.state
|
111
|
-
symbol_stack.push lookahead unless action.is_lookahead_shift
|
112
|
-
action.is_lookahead_shift ? lookahead_shift += 1 : lookahead = lexer.next_symbol
|
113
|
-
elsif action.kind_of? ReduceAction
|
114
|
-
handle = @productions[action.handle]
|
115
|
-
lookahead_shift.times { state_stack.pop }
|
116
|
-
lookahead_shift = 0
|
117
|
-
if action.is_read_reduce
|
118
|
-
state_stack.push state
|
119
|
-
symbol_stack.push lookahead
|
120
|
-
lookahead = lexer.next_symbol
|
121
|
-
end
|
122
|
-
state_stack.slice! -handle.symbols.length..-1
|
123
|
-
symbols = symbol_stack.slice! -handle.symbols.length..-1
|
124
|
-
handle.nonterminal == Aurum::START and return value_stack.pop
|
125
|
-
if handle.action
|
126
|
-
context = {handle.nonterminal.name => [SemanticAttributes.new]}
|
127
|
-
handle.symbols.reverse.each_with_index do |symbol, index|
|
128
|
-
context[symbol.name] = [] unless context.has_key? symbol.name
|
129
|
-
context[symbol.name] << (symbol.is_terminal ? symbols[-index-1] : value_stack.pop)
|
130
|
-
end
|
131
|
-
SemanticActionContext.new(context).instance_eval &handle.action
|
132
|
-
value_stack.push context[handle.nonterminal.name][0] if context[handle.nonterminal.name]
|
133
|
-
end
|
134
|
-
goto = @parsing_table[state_stack.last][handle.nonterminal]
|
135
|
-
if goto.kind_of? ShiftAction
|
136
|
-
state_stack.push goto.state
|
137
|
-
symbol_stack.push nil
|
138
|
-
else
|
139
|
-
lexer.pushback lookahead
|
140
|
-
lookahead = handle.nonterminal
|
141
|
-
end
|
142
|
-
else
|
143
|
-
error_recover
|
144
|
-
end
|
145
|
-
end
|
146
|
-
end
|
147
|
-
|
148
|
-
class SemanticActionContext
|
149
|
-
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new' || m=='instance_eval'}
|
150
|
-
def initialize hash
|
151
|
-
@hash = hash
|
152
|
-
end
|
153
|
-
|
154
|
-
def method_missing name, *args
|
155
|
-
name_string = name.to_s
|
156
|
-
index = name_string =~ /\d+/ ? name_string.slice!(/\d+/).to_i : 0
|
157
|
-
@hash[name_string][-index] and return @hash[name_string][-index]
|
158
|
-
SemanticAttributes.new
|
159
|
-
end
|
160
|
-
end
|
161
|
-
|
162
|
-
class SemanticAttributes
|
163
|
-
instance_methods.each { |m| undef_method m unless m =~ /^__/ || m == 'new'}
|
164
|
-
def initialize
|
165
|
-
@hash = {}
|
166
|
-
end
|
167
|
-
|
168
|
-
def method_missing name, *args
|
169
|
-
name_string = name.to_s
|
170
|
-
return @hash[name_string] unless name_string[-1] == 61
|
171
|
-
@hash[name_string.slice(0..-2)] = args.first
|
172
|
-
end
|
173
|
-
end
|
174
|
-
end
|
175
|
-
end
|
1
|
+
require File.join(File.dirname(__FILE__), 'engine/parsing_facility')
|
2
|
+
require File.join(File.dirname(__FILE__), 'engine/tokenization_facility.rb')
|
3
|
+
|
4
|
+
module Aurum
|
5
|
+
class Parser
|
6
|
+
def Parser.new grammar, start_symbol
|
7
|
+
Class.new do
|
8
|
+
@@parsing_table, @@semantic_actions = grammar.start_from(start_symbol)
|
9
|
+
include Aurum::Engine::BasicParsingCapability
|
10
|
+
include Aurum::Engine::SemanticActionExecutable
|
11
|
+
private
|
12
|
+
def parsing_table
|
13
|
+
@@parsing_table
|
14
|
+
end
|
15
|
+
def semantic_actions
|
16
|
+
@@semantic_actions
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class Lexer
|
23
|
+
def Lexer.new grammar
|
24
|
+
Class.new do
|
25
|
+
@@lexical_table = grammar.lexical_table
|
26
|
+
include Aurum::Engine::BasicTokenizationCapability
|
27
|
+
def initialize input
|
28
|
+
@input = Aurum::Engine::PushbackString.new(input)
|
29
|
+
@states, @line, @column, @push_back = [], 0, 0, []
|
30
|
+
shift_to('initial')
|
31
|
+
end
|
32
|
+
private
|
33
|
+
def lexical_table
|
34
|
+
@@lexical_table
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|