antelope 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.rspec +3 -0
  4. data/.yardopts +4 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +29 -0
  8. data/Rakefile +2 -0
  9. data/antelope.gemspec +30 -0
  10. data/bin/antelope +24 -0
  11. data/examples/deterministic.ace +27 -0
  12. data/examples/deterministic.output +229 -0
  13. data/examples/example.ace +45 -0
  14. data/examples/example.output +610 -0
  15. data/examples/simple.ace +26 -0
  16. data/examples/simple.output +194 -0
  17. data/lib/antelope/ace/compiler.rb +290 -0
  18. data/lib/antelope/ace/errors.rb +27 -0
  19. data/lib/antelope/ace/grammar/generation.rb +47 -0
  20. data/lib/antelope/ace/grammar/loading.rb +51 -0
  21. data/lib/antelope/ace/grammar/presidence.rb +59 -0
  22. data/lib/antelope/ace/grammar/production.rb +47 -0
  23. data/lib/antelope/ace/grammar/productions.rb +119 -0
  24. data/lib/antelope/ace/grammar/terminals.rb +41 -0
  25. data/lib/antelope/ace/grammar.rb +59 -0
  26. data/lib/antelope/ace/presidence.rb +51 -0
  27. data/lib/antelope/ace/scanner/first.rb +61 -0
  28. data/lib/antelope/ace/scanner/second.rb +160 -0
  29. data/lib/antelope/ace/scanner/third.rb +25 -0
  30. data/lib/antelope/ace/scanner.rb +110 -0
  31. data/lib/antelope/ace/token/epsilon.rb +22 -0
  32. data/lib/antelope/ace/token/error.rb +24 -0
  33. data/lib/antelope/ace/token/nonterminal.rb +15 -0
  34. data/lib/antelope/ace/token/terminal.rb +15 -0
  35. data/lib/antelope/ace/token.rb +171 -0
  36. data/lib/antelope/ace.rb +50 -0
  37. data/lib/antelope/automaton.rb +36 -0
  38. data/lib/antelope/generation/conflictor/conflict.rb +7 -0
  39. data/lib/antelope/generation/conflictor.rb +45 -0
  40. data/lib/antelope/generation/constructor/first.rb +52 -0
  41. data/lib/antelope/generation/constructor/follow.rb +46 -0
  42. data/lib/antelope/generation/constructor/lookahead.rb +42 -0
  43. data/lib/antelope/generation/constructor/nullable.rb +40 -0
  44. data/lib/antelope/generation/constructor.rb +81 -0
  45. data/lib/antelope/generation/recognizer/rule.rb +93 -0
  46. data/lib/antelope/generation/recognizer/state.rb +56 -0
  47. data/lib/antelope/generation/recognizer.rb +152 -0
  48. data/lib/antelope/generation/tableizer.rb +80 -0
  49. data/lib/antelope/generation.rb +12 -0
  50. data/lib/antelope/generator/output.rb +30 -0
  51. data/lib/antelope/generator/ruby.rb +57 -0
  52. data/lib/antelope/generator/templates/output.erb +49 -0
  53. data/lib/antelope/generator/templates/ruby.erb +62 -0
  54. data/lib/antelope/generator.rb +84 -0
  55. data/lib/antelope/version.rb +4 -0
  56. data/lib/antelope.rb +9 -0
  57. data/spec/antelope/ace/compiler_spec.rb +50 -0
  58. data/spec/antelope/ace/scanner_spec.rb +27 -0
  59. data/spec/antelope/automaton_spec.rb +29 -0
  60. data/spec/spec_helper.rb +38 -0
  61. data/spec/support/benchmark_helper.rb +5 -0
  62. metadata +223 -0
@@ -0,0 +1,24 @@
1
+ module Antelope
2
+ module Ace
3
+ class Token
4
+
5
+ # Defines an error token. This may be used internally by the
6
+ # parser when it enters panic mode; any tokens following this
7
+ # are the synchronisation tokens. This is considered a terminal
8
+ # for the purposes of rule definitions.
9
+ class Error < Terminal
10
+
11
+ # Initialize the error token. Technically takes no arguments.
12
+ # Sets the name to be `:$error`.
13
+ def initialize(*)
14
+ super :$error
15
+ end
16
+
17
+ # (see Token#error?)
18
+ def error?
19
+ true
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,15 @@
1
+ module Antelope
2
+ module Ace
3
+ class Token
4
+
5
+ # Defines a nonterminal token.
6
+ class Nonterminal < Token
7
+
8
+ # (see Token#nonterminal?)
9
+ def nonterminal?
10
+ true
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ module Antelope
2
+ module Ace
3
+ class Token
4
+
5
+ # Defines a terminal token.
6
+ class Terminal < Token
7
+
8
+ # (see Token#terminal?)
9
+ def terminal?
10
+ true
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,171 @@
1
+ require "antelope/ace/token/nonterminal"
2
+ require "antelope/ace/token/terminal"
3
+ require "antelope/ace/token/epsilon"
4
+ require "antelope/ace/token/error"
5
+
6
+
7
+ module Antelope
8
+ module Ace
9
+
10
+ # Defines a token type for productions/rules.
11
+ #
12
+ # @abstract This class should be inherited to define a real token.
13
+ # A base class does not match any token; however, any token can
14
+ # match the base class.
15
+ class Token
16
+
17
+ # The name of the token.
18
+ #
19
+ # @return [Symbol]
20
+ attr_reader :name
21
+
22
+ # The from state that this token is transitioned from. This is
23
+ # the _source_. This is used in the constructor in order to
24
+ # handle lookahead sets.
25
+ #
26
+ # @return [Recognizer::State]
27
+ attr_accessor :from
28
+
29
+ # The to state that this token is transitioned to. This is the
30
+ # _destination_. This is used in the constructor in order to
31
+ # handle lookahead sets.
32
+ #
33
+ # @return [Recognizer::State]
34
+ attr_accessor :to
35
+
36
+ # Initialize.
37
+ #
38
+ # @param name [Symbol] the name of the token.
39
+ # @param value [String?] the value of the token. This is only
40
+ # used in output representation to the developer.
41
+ def initialize(name, value = nil)
42
+ @name = name
43
+ @value = value
44
+ @from = nil
45
+ @to = nil
46
+ end
47
+
48
+ include Comparable
49
+
50
+ # Whether or not the token is a terminal.
51
+ #
52
+ # @abstract
53
+ # @return [Boolean]
54
+ def terminal?
55
+ false
56
+ end
57
+
58
+ # Whether or not the token is a nonterminal.
59
+ #
60
+ # @abstract
61
+ # @return [Boolean]
62
+ def nonterminal?
63
+ false
64
+ end
65
+
66
+ # Whether or not the token is an epsilon token.
67
+ #
68
+ # @abstract
69
+ # @return [Boolean]
70
+ def epsilon?
71
+ false
72
+ end
73
+
74
+ # Whether or not the token is an error token.
75
+ #
76
+ # @abstract
77
+ # @return [Boolean]
78
+ def error?
79
+ false
80
+ end
81
+
82
+ # Gives a string representation of the token. The output is
83
+ # formatted like so: `<data>["(" [<from_id>][:<to_id>] ")"]`,
84
+ # where `<data>` is either the value (if it's non-nil) or the
85
+ # name, `<from_id>` is the from state id, and `<to_id>` is the
86
+ # to state id. The last part of the format is optional; if
87
+ # neither the from state or to state is non-nil, it's non-
88
+ # existant.
89
+ #
90
+ # @return [String] the string representation.
91
+ # @see #from
92
+ # @see #to
93
+ # @see #name
94
+ def to_s
95
+ buf = if @value
96
+ @value.inspect
97
+ else
98
+ @name.to_s
99
+ end
100
+
101
+ if from or to
102
+ buf << "("
103
+ buf << "#{from.id}" if from
104
+ buf << ":#{to.id}" if to
105
+ buf << ")"
106
+ end
107
+
108
+ buf
109
+ end
110
+
111
+ # Compares this class to any other object. If the other object
112
+ # is a token, it converts both this class and the other object
113
+ # to an array and compares the array. Otherwise, it delegates
114
+ # the comparison.
115
+ #
116
+ # @param other [Object] the other object to compare.
117
+ # @return [Numeric]
118
+ def <=>(other)
119
+ if other.is_a? Token
120
+ to_a <=> other.to_a
121
+ else
122
+ super
123
+ end
124
+ end
125
+
126
+ # Compares this class and another object, fuzzily. If the other
127
+ # object is a token, it removes the transitions (to and from)
128
+ # on both objects and compares them like that. Otherwise, it
129
+ # delegates the comparison.
130
+ #
131
+ # @param other [Object] the other object to compare.
132
+ # @return [Boolean] if they are equal.
133
+ def ===(other)
134
+ if other.is_a? Token
135
+ without_transitions == other.without_transitions
136
+ else
137
+ super
138
+ end
139
+ end
140
+
141
+ # Creates a new token without to or from states.
142
+ #
143
+ # @return [Token]
144
+ def without_transitions
145
+ self.class.new(name, @value)
146
+ end
147
+
148
+ # Generates a hashs for this class.
149
+ #
150
+ # @note This is not intended for use. It is only defined to be
151
+ # compatible with Hashs (and by extension, Sets).
152
+ # @private
153
+ # @return [Object]
154
+ def hash
155
+ to_a.hash
156
+ end
157
+
158
+ alias_method :eql?, :==
159
+
160
+ # Creates an array representation of this class.
161
+ #
162
+ # @note This is not intended for use. It is only defined to
163
+ # make equality checking easier, and to create a hash.
164
+ # @private
165
+ # @return [Array<(Recognizer::State, Recognizer::State, Class, Symbol, String?)>]
166
+ def to_a
167
+ [to, from, self.class, name, @value]
168
+ end
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,50 @@
1
+ require "antelope/ace/errors"
2
+ require "antelope/ace/scanner"
3
+ require "antelope/ace/compiler"
4
+ require "antelope/ace/token"
5
+ require "antelope/ace/presidence"
6
+ require "antelope/ace/grammar"
7
+
8
+ module Antelope
9
+
10
+ # Defines the Ace file. The Ace file format works similarly to
11
+ # bison's y file format. The Ace file is seperated into three
12
+ # parts:
13
+ #
14
+ # <first>
15
+ # %%
16
+ # <second>
17
+ # %%
18
+ # <third>
19
+ #
20
+ # All parts may be empty; thus, the minimal file that Ace will
21
+ # accept would be
22
+ #
23
+ # %%
24
+ # %%
25
+ #
26
+ # The first part consists of _directives_ and _blocks_; directives
27
+ # look something like `"%" <directive>[ <argument>]*\n`, with
28
+ # `<directive>` being any alphanumerical character, including
29
+ # underscores and dashes, and `<argument>` being any word character
30
+ # or a quote-delimited string. Blocks consist of
31
+ # `"%{" <content> "\n" "\s"* "%}"`, with `<content>` being any
32
+ # characters. The content is copied directly into the body of the
33
+ # output.
34
+ #
35
+ # The second part consists of rules. Rules look something like
36
+ # this:
37
+ #
38
+ # <nonterminal>: (<nonterminal> | <terminal>)* ["{" <content> "}"] ["|" (<nonterminal> | <terminal>)* ["{" <content> "}"]]* [;]
39
+ #
40
+ # Where `<nonterminal>` is any lowercase alphabetical cahracter,
41
+ # `<terminal>` is any uppercase alphabetical character, and
42
+ # `<content>` is code to be used in the output file upon matching
43
+ # the specific rule.
44
+ #
45
+ # The thid part consists of a body, which is copied directly into
46
+ # the output.
47
+ module Ace
48
+
49
+ end
50
+ end
@@ -0,0 +1,36 @@
1
+ module Antelope
2
+ class Automaton
3
+ attr_accessor :states
4
+ attr_accessor :alphabet
5
+ attr_accessor :start
6
+ attr_accessor :accept
7
+ attr_accessor :transitions
8
+ attr_accessor :stack
9
+
10
+ def initialize(states = [], alphabet = [],
11
+ start = nil, accept = [], transitions = {})
12
+ @states = states
13
+ @alphabet = alphabet
14
+ @start = start
15
+ @accept = accept
16
+ @transitions = transitions
17
+ @stack = []
18
+ end
19
+
20
+ def run(input, &block)
21
+ block = block || method(:default_transition)
22
+
23
+ @stack = [@start]
24
+
25
+ input.each do |part|
26
+ @stack.push(block.call(@stack.last, part))
27
+ end
28
+
29
+ @accept.include? @stack.last
30
+ end
31
+
32
+ def default_transition(state, part)
33
+ @transitions[state][part]
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,7 @@
1
+ module Antelope
2
+ module Generation
3
+ class Conflictor
4
+ Conflict = Struct.new(:state, :type, :rules, :token)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,45 @@
1
+ require "antelope/generation/conflictor/conflict"
2
+
3
+ module Antelope
4
+ module Generation
5
+ class Conflictor
6
+
7
+ attr_accessor :parser
8
+ attr_accessor :conflicts
9
+
10
+ def initialize(parser)
11
+ @parser = parser
12
+ end
13
+
14
+ def call
15
+ recognize_conflicts
16
+ end
17
+
18
+ def recognize_conflicts
19
+
20
+ @conflicts = []
21
+
22
+ parser.states.each do |state|
23
+ state.rules.each do |rule|
24
+ if rule.lookahead.
25
+ any? { |tok| state.transitions.key?(tok.name) }
26
+ @conflicts << Conflict.new(state, :shift_reduce, [rule],
27
+ rule.lookahead - state.transitions.keys)
28
+ end
29
+ end
30
+
31
+ final_rules = state.rules.select(&:final?)
32
+
33
+ final_rules.each_cons(2) do |r1, r2|
34
+ if r1.lookahead.intersect? r2.lookahead
35
+ @conflicts << Conflict.new(state,
36
+ :reduce_reduce,
37
+ [r1, r2],
38
+ r1.lookahead.intersection(r2.lookahead))
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,52 @@
1
+ module Antelope
2
+ module Generation
3
+ class Constructor
4
+ module First
5
+
6
+ def initialize
7
+ @firstifying = []
8
+ super
9
+ end
10
+
11
+ def first(token)
12
+ case token
13
+ when Ace::Token::Nonterminal
14
+ firstifying(token) do
15
+ productions = parser.productions[token.name]
16
+ productions.map { |prod|
17
+ first(prod[:items]) }.inject(Set.new, :+)
18
+ end
19
+ when Array
20
+ first_array(token)
21
+ when Ace::Token::Epsilon
22
+ Set.new
23
+ when Ace::Token::Terminal
24
+ Set.new([token])
25
+ else
26
+ incorrect_argument! token, Ace::Token, Array
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def first_array(token)
33
+ token.dup.delete_if { |tok| @firstifying.include?(tok) }.
34
+ each_with_index.take_while do |tok, i|
35
+ if i.zero?
36
+ true
37
+ else
38
+ nullable?(token[i - 1])
39
+ end
40
+ end.map(&:first).map { |tok| first(tok) }.inject(Set.new, :+)
41
+ end
42
+
43
+ def firstifying(tok)
44
+ @firstifying << tok
45
+ out = yield
46
+ @firstifying.delete tok
47
+ out
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,46 @@
1
+ module Antelope
2
+ module Generation
3
+ class Constructor
4
+ module Follow
5
+
6
+ def initialize
7
+ @follows = {}
8
+ super
9
+ end
10
+
11
+ def follow(token)
12
+
13
+ if token.nonterminal?
14
+ token = token.name
15
+ elsif token.is_a? Symbol
16
+ else
17
+ incorrect_argument! token, Ace::Token::Nonterminal, Symbol
18
+ end
19
+
20
+ @follows.fetch(token) do
21
+ @follows[token] = Set.new
22
+ set = Set.new
23
+
24
+ parser.productions.each do |key, value|
25
+ value.each do |production|
26
+ items = production[:items]
27
+ positions = items.each_with_index.
28
+ find_all { |t, _| t.name == token }.
29
+ map(&:last).map(&:succ)
30
+ positions.map { |pos| first(items[pos..-1]) }.
31
+ inject(set, :merge)
32
+ positions.each do |pos|
33
+ if pos == items.size || nullable?(items[pos..-1])
34
+ set.merge follow(Ace::Token::Nonterminal.new(key))
35
+ end
36
+ end
37
+ end
38
+ end
39
+
40
+ @follows[token] = set
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,42 @@
1
+ module Antelope
2
+ module Generation
3
+ class Constructor
4
+ module Lookahead
5
+
6
+ def initialize
7
+ @lookaheads = {}
8
+ super
9
+ end
10
+
11
+ def lookahead(left, right = nil)
12
+ @lookaheads.fetch([left, right]) do
13
+ if right
14
+ set = Set.new
15
+
16
+ set += if nullable?(right)
17
+ first(right) + follow(left)
18
+ else
19
+ first(right)
20
+ end
21
+ else
22
+ set = lookahead_nonterminal(left)
23
+ end
24
+
25
+ @lookaheads[[left, right]] = set
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def lookahead_nonterminal(left)
32
+ set = Set.new
33
+ parser.productions[left].each do |production|
34
+ set += lookahead(left, production[:items])
35
+ end
36
+
37
+ set
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,40 @@
1
+ module Antelope
2
+ module Generation
3
+ class Constructor
4
+ module Nullable
5
+
6
+ def initialize
7
+ @nullifying = []
8
+ end
9
+
10
+ def nullable?(token)
11
+ case token
12
+ when Ace::Token::Nonterminal
13
+ nullifying(token) do
14
+ productions = parser.productions[token.name]
15
+ !!productions.any? { |prod| nullable?(prod[:items]) }
16
+ end
17
+ when Array
18
+ token.dup.delete_if { |tok|
19
+ @nullifying.include?(tok) }.all? { |tok| nullable?(tok) }
20
+ when Ace::Token::Epsilon
21
+ true
22
+ when Ace::Token::Terminal
23
+ false
24
+ else
25
+ incorrect_argument! token, Ace::Token, Array
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def nullifying(tok)
32
+ @nullifying << tok
33
+ out = yield
34
+ @nullifying.delete tok
35
+ out
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,81 @@
1
+ require "set"
2
+ require "antelope/generation/constructor/nullable"
3
+ require "antelope/generation/constructor/first"
4
+ require "antelope/generation/constructor/follow"
5
+ require "antelope/generation/constructor/lookahead"
6
+
7
+ module Antelope
8
+ module Generation
9
+ class Constructor
10
+
11
+ include Nullable
12
+ include First
13
+ include Follow
14
+ include Lookahead
15
+
16
+ attr_reader :parser
17
+ attr_reader :productions
18
+
19
+ def initialize(parser)
20
+ @parser = parser
21
+ @productions = []
22
+ super()
23
+ end
24
+
25
+ def call
26
+ parser.states.each do |state|
27
+ augment_state(state)
28
+ end.each do |state|
29
+ augment_rules(state)
30
+ end
31
+
32
+ @productions
33
+ end
34
+
35
+ def augment_state(state)
36
+ state.rules.select { |x| x.position.zero? }.each do |rule|
37
+ current_state = state
38
+
39
+ rule.left.from = state
40
+ rule.left.to = state.transitions[rule.left.name]
41
+
42
+ states = [state]
43
+
44
+ rule.right.each_with_index do |part, pos|
45
+ transition = current_state.transitions[part.name]
46
+ if part.nonterminal?
47
+ part.from = current_state
48
+ part.to = transition
49
+ end
50
+
51
+ states.push(transition)
52
+ current_state = transition
53
+ end
54
+
55
+ productions << rule unless productions.include?(rule)
56
+ end
57
+ end
58
+
59
+ def augment_rules(state)
60
+ state.rules.select { |x| x.position.zero? }.each do |rule|
61
+ current_state = state
62
+
63
+ rule.right.each do |part|
64
+ transition = current_state.transitions[part.name]
65
+ current_state = transition
66
+ end
67
+
68
+ final = current_state.rule_for(rule)
69
+
70
+ final.lookahead = follow(rule.left)
71
+ end
72
+ end
73
+
74
+ private
75
+
76
+ def incorrect_argument!(arg, *types)
77
+ raise ArgumentError, "Expected one of #{types.join(", ")}, got #{arg.class}"
78
+ end
79
+ end
80
+ end
81
+ end