antelope 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.rspec +3 -0
  4. data/.yardopts +4 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +29 -0
  8. data/Rakefile +2 -0
  9. data/antelope.gemspec +30 -0
  10. data/bin/antelope +24 -0
  11. data/examples/deterministic.ace +27 -0
  12. data/examples/deterministic.output +229 -0
  13. data/examples/example.ace +45 -0
  14. data/examples/example.output +610 -0
  15. data/examples/simple.ace +26 -0
  16. data/examples/simple.output +194 -0
  17. data/lib/antelope/ace/compiler.rb +290 -0
  18. data/lib/antelope/ace/errors.rb +27 -0
  19. data/lib/antelope/ace/grammar/generation.rb +47 -0
  20. data/lib/antelope/ace/grammar/loading.rb +51 -0
  21. data/lib/antelope/ace/grammar/presidence.rb +59 -0
  22. data/lib/antelope/ace/grammar/production.rb +47 -0
  23. data/lib/antelope/ace/grammar/productions.rb +119 -0
  24. data/lib/antelope/ace/grammar/terminals.rb +41 -0
  25. data/lib/antelope/ace/grammar.rb +59 -0
  26. data/lib/antelope/ace/presidence.rb +51 -0
  27. data/lib/antelope/ace/scanner/first.rb +61 -0
  28. data/lib/antelope/ace/scanner/second.rb +160 -0
  29. data/lib/antelope/ace/scanner/third.rb +25 -0
  30. data/lib/antelope/ace/scanner.rb +110 -0
  31. data/lib/antelope/ace/token/epsilon.rb +22 -0
  32. data/lib/antelope/ace/token/error.rb +24 -0
  33. data/lib/antelope/ace/token/nonterminal.rb +15 -0
  34. data/lib/antelope/ace/token/terminal.rb +15 -0
  35. data/lib/antelope/ace/token.rb +171 -0
  36. data/lib/antelope/ace.rb +50 -0
  37. data/lib/antelope/automaton.rb +36 -0
  38. data/lib/antelope/generation/conflictor/conflict.rb +7 -0
  39. data/lib/antelope/generation/conflictor.rb +45 -0
  40. data/lib/antelope/generation/constructor/first.rb +52 -0
  41. data/lib/antelope/generation/constructor/follow.rb +46 -0
  42. data/lib/antelope/generation/constructor/lookahead.rb +42 -0
  43. data/lib/antelope/generation/constructor/nullable.rb +40 -0
  44. data/lib/antelope/generation/constructor.rb +81 -0
  45. data/lib/antelope/generation/recognizer/rule.rb +93 -0
  46. data/lib/antelope/generation/recognizer/state.rb +56 -0
  47. data/lib/antelope/generation/recognizer.rb +152 -0
  48. data/lib/antelope/generation/tableizer.rb +80 -0
  49. data/lib/antelope/generation.rb +12 -0
  50. data/lib/antelope/generator/output.rb +30 -0
  51. data/lib/antelope/generator/ruby.rb +57 -0
  52. data/lib/antelope/generator/templates/output.erb +49 -0
  53. data/lib/antelope/generator/templates/ruby.erb +62 -0
  54. data/lib/antelope/generator.rb +84 -0
  55. data/lib/antelope/version.rb +4 -0
  56. data/lib/antelope.rb +9 -0
  57. data/spec/antelope/ace/compiler_spec.rb +50 -0
  58. data/spec/antelope/ace/scanner_spec.rb +27 -0
  59. data/spec/antelope/automaton_spec.rb +29 -0
  60. data/spec/spec_helper.rb +38 -0
  61. data/spec/support/benchmark_helper.rb +5 -0
  62. metadata +223 -0
@@ -0,0 +1,24 @@
1
+ module Antelope
2
+ module Ace
3
+ class Token
4
+
5
+ # Defines an error token. This may be used internally by the
6
+ # parser when it enters panic mode; any tokens following this
7
+ # are the synchronisation tokens. This is considered a terminal
8
+ # for the purposes of rule definitions.
9
+ class Error < Terminal
10
+
11
+ # Initialize the error token. Technically takes no arguments.
12
+ # Sets the name to be `:$error`.
13
+ def initialize(*)
14
+ super :$error
15
+ end
16
+
17
+ # (see Token#error?)
18
+ def error?
19
+ true
20
+ end
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,15 @@
1
+ module Antelope
2
+ module Ace
3
+ class Token
4
+
5
+ # Defines a nonterminal token.
6
+ class Nonterminal < Token
7
+
8
+ # (see Token#nonterminal?)
9
+ def nonterminal?
10
+ true
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,15 @@
1
+ module Antelope
2
+ module Ace
3
+ class Token
4
+
5
+ # Defines a terminal token.
6
+ class Terminal < Token
7
+
8
+ # (see Token#terminal?)
9
+ def terminal?
10
+ true
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,171 @@
1
+ require "antelope/ace/token/nonterminal"
2
+ require "antelope/ace/token/terminal"
3
+ require "antelope/ace/token/epsilon"
4
+ require "antelope/ace/token/error"
5
+
6
+
7
+ module Antelope
8
+ module Ace
9
+
10
+ # Defines a token type for productions/rules.
11
+ #
12
+ # @abstract This class should be inherited to define a real token.
13
+ # A base class does not match any token; however, any token can
14
+ # match the base class.
15
+ class Token
16
+
17
+ # The name of the token.
18
+ #
19
+ # @return [Symbol]
20
+ attr_reader :name
21
+
22
+ # The from state that this token is transitioned from. This is
23
+ # the _source_. This is used in the constructor in order to
24
+ # handle lookahead sets.
25
+ #
26
+ # @return [Recognizer::State]
27
+ attr_accessor :from
28
+
29
+ # The to state that this token is transitioned to. This is the
30
+ # _destination_. This is used in the constructor in order to
31
+ # handle lookahead sets.
32
+ #
33
+ # @return [Recognizer::State]
34
+ attr_accessor :to
35
+
36
+ # Initialize.
37
+ #
38
+ # @param name [Symbol] the name of the token.
39
+ # @param value [String?] the value of the token. This is only
40
+ # used in output representation to the developer.
41
+ def initialize(name, value = nil)
42
+ @name = name
43
+ @value = value
44
+ @from = nil
45
+ @to = nil
46
+ end
47
+
48
+ include Comparable
49
+
50
+ # Whether or not the token is a terminal.
51
+ #
52
+ # @abstract
53
+ # @return [Boolean]
54
+ def terminal?
55
+ false
56
+ end
57
+
58
+ # Whether or not the token is a nonterminal.
59
+ #
60
+ # @abstract
61
+ # @return [Boolean]
62
+ def nonterminal?
63
+ false
64
+ end
65
+
66
+ # Whether or not the token is an epsilon token.
67
+ #
68
+ # @abstract
69
+ # @return [Boolean]
70
+ def epsilon?
71
+ false
72
+ end
73
+
74
+ # Whether or not the token is an error token.
75
+ #
76
+ # @abstract
77
+ # @return [Boolean]
78
+ def error?
79
+ false
80
+ end
81
+
82
+ # Gives a string representation of the token. The output is
83
+ # formatted like so: `<data>["(" [<from_id>][:<to_id>] ")"]`,
84
+ # where `<data>` is either the value (if it's non-nil) or the
85
+ # name, `<from_id>` is the from state id, and `<to_id>` is the
86
+ # to state id. The last part of the format is optional; if
87
+ # neither the from state or to state is non-nil, it's non-
88
+ # existant.
89
+ #
90
+ # @return [String] the string representation.
91
+ # @see #from
92
+ # @see #to
93
+ # @see #name
94
+ def to_s
95
+ buf = if @value
96
+ @value.inspect
97
+ else
98
+ @name.to_s
99
+ end
100
+
101
+ if from or to
102
+ buf << "("
103
+ buf << "#{from.id}" if from
104
+ buf << ":#{to.id}" if to
105
+ buf << ")"
106
+ end
107
+
108
+ buf
109
+ end
110
+
111
+ # Compares this class to any other object. If the other object
112
+ # is a token, it converts both this class and the other object
113
+ # to an array and compares the array. Otherwise, it delegates
114
+ # the comparison.
115
+ #
116
+ # @param other [Object] the other object to compare.
117
+ # @return [Numeric]
118
+ def <=>(other)
119
+ if other.is_a? Token
120
+ to_a <=> other.to_a
121
+ else
122
+ super
123
+ end
124
+ end
125
+
126
+ # Compares this class and another object, fuzzily. If the other
127
+ # object is a token, it removes the transitions (to and from)
128
+ # on both objects and compares them like that. Otherwise, it
129
+ # delegates the comparison.
130
+ #
131
+ # @param other [Object] the other object to compare.
132
+ # @return [Boolean] if they are equal.
133
+ def ===(other)
134
+ if other.is_a? Token
135
+ without_transitions == other.without_transitions
136
+ else
137
+ super
138
+ end
139
+ end
140
+
141
+ # Creates a new token without to or from states.
142
+ #
143
+ # @return [Token]
144
+ def without_transitions
145
+ self.class.new(name, @value)
146
+ end
147
+
148
+ # Generates a hashs for this class.
149
+ #
150
+ # @note This is not intended for use. It is only defined to be
151
+ # compatible with Hashs (and by extension, Sets).
152
+ # @private
153
+ # @return [Object]
154
+ def hash
155
+ to_a.hash
156
+ end
157
+
158
+ alias_method :eql?, :==
159
+
160
+ # Creates an array representation of this class.
161
+ #
162
+ # @note This is not intended for use. It is only defined to
163
+ # make equality checking easier, and to create a hash.
164
+ # @private
165
+ # @return [Array<(Recognizer::State, Recognizer::State, Class, Symbol, String?)>]
166
+ def to_a
167
+ [to, from, self.class, name, @value]
168
+ end
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,50 @@
1
+ require "antelope/ace/errors"
2
+ require "antelope/ace/scanner"
3
+ require "antelope/ace/compiler"
4
+ require "antelope/ace/token"
5
+ require "antelope/ace/presidence"
6
+ require "antelope/ace/grammar"
7
+
8
+ module Antelope
9
+
10
+ # Defines the Ace file. The Ace file format works similarly to
11
+ # bison's y file format. The Ace file is seperated into three
12
+ # parts:
13
+ #
14
+ # <first>
15
+ # %%
16
+ # <second>
17
+ # %%
18
+ # <third>
19
+ #
20
+ # All parts may be empty; thus, the minimal file that Ace will
21
+ # accept would be
22
+ #
23
+ # %%
24
+ # %%
25
+ #
26
+ # The first part consists of _directives_ and _blocks_; directives
27
+ # look something like `"%" <directive>[ <argument>]*\n`, with
28
+ # `<directive>` being any alphanumerical character, including
29
+ # underscores and dashes, and `<argument>` being any word character
30
+ # or a quote-delimited string. Blocks consist of
31
+ # `"%{" <content> "\n" "\s"* "%}"`, with `<content>` being any
32
+ # characters. The content is copied directly into the body of the
33
+ # output.
34
+ #
35
+ # The second part consists of rules. Rules look something like
36
+ # this:
37
+ #
38
+ # <nonterminal>: (<nonterminal> | <terminal>)* ["{" <content> "}"] ["|" (<nonterminal> | <terminal>)* ["{" <content> "}"]]* [;]
39
+ #
40
+ # Where `<nonterminal>` is any lowercase alphabetical cahracter,
41
+ # `<terminal>` is any uppercase alphabetical character, and
42
+ # `<content>` is code to be used in the output file upon matching
43
+ # the specific rule.
44
+ #
45
+ # The thid part consists of a body, which is copied directly into
46
+ # the output.
47
+ module Ace
48
+
49
+ end
50
+ end
@@ -0,0 +1,36 @@
1
+ module Antelope
2
+ class Automaton
3
+ attr_accessor :states
4
+ attr_accessor :alphabet
5
+ attr_accessor :start
6
+ attr_accessor :accept
7
+ attr_accessor :transitions
8
+ attr_accessor :stack
9
+
10
+ def initialize(states = [], alphabet = [],
11
+ start = nil, accept = [], transitions = {})
12
+ @states = states
13
+ @alphabet = alphabet
14
+ @start = start
15
+ @accept = accept
16
+ @transitions = transitions
17
+ @stack = []
18
+ end
19
+
20
+ def run(input, &block)
21
+ block = block || method(:default_transition)
22
+
23
+ @stack = [@start]
24
+
25
+ input.each do |part|
26
+ @stack.push(block.call(@stack.last, part))
27
+ end
28
+
29
+ @accept.include? @stack.last
30
+ end
31
+
32
+ def default_transition(state, part)
33
+ @transitions[state][part]
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,7 @@
1
+ module Antelope
2
+ module Generation
3
+ class Conflictor
4
+ Conflict = Struct.new(:state, :type, :rules, :token)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,45 @@
1
+ require "antelope/generation/conflictor/conflict"
2
+
3
+ module Antelope
4
+ module Generation
5
+ class Conflictor
6
+
7
+ attr_accessor :parser
8
+ attr_accessor :conflicts
9
+
10
+ def initialize(parser)
11
+ @parser = parser
12
+ end
13
+
14
+ def call
15
+ recognize_conflicts
16
+ end
17
+
18
+ def recognize_conflicts
19
+
20
+ @conflicts = []
21
+
22
+ parser.states.each do |state|
23
+ state.rules.each do |rule|
24
+ if rule.lookahead.
25
+ any? { |tok| state.transitions.key?(tok.name) }
26
+ @conflicts << Conflict.new(state, :shift_reduce, [rule],
27
+ rule.lookahead - state.transitions.keys)
28
+ end
29
+ end
30
+
31
+ final_rules = state.rules.select(&:final?)
32
+
33
+ final_rules.each_cons(2) do |r1, r2|
34
+ if r1.lookahead.intersect? r2.lookahead
35
+ @conflicts << Conflict.new(state,
36
+ :reduce_reduce,
37
+ [r1, r2],
38
+ r1.lookahead.intersection(r2.lookahead))
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,52 @@
1
+ module Antelope
2
+ module Generation
3
+ class Constructor
4
+ module First
5
+
6
+ def initialize
7
+ @firstifying = []
8
+ super
9
+ end
10
+
11
+ def first(token)
12
+ case token
13
+ when Ace::Token::Nonterminal
14
+ firstifying(token) do
15
+ productions = parser.productions[token.name]
16
+ productions.map { |prod|
17
+ first(prod[:items]) }.inject(Set.new, :+)
18
+ end
19
+ when Array
20
+ first_array(token)
21
+ when Ace::Token::Epsilon
22
+ Set.new
23
+ when Ace::Token::Terminal
24
+ Set.new([token])
25
+ else
26
+ incorrect_argument! token, Ace::Token, Array
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def first_array(token)
33
+ token.dup.delete_if { |tok| @firstifying.include?(tok) }.
34
+ each_with_index.take_while do |tok, i|
35
+ if i.zero?
36
+ true
37
+ else
38
+ nullable?(token[i - 1])
39
+ end
40
+ end.map(&:first).map { |tok| first(tok) }.inject(Set.new, :+)
41
+ end
42
+
43
+ def firstifying(tok)
44
+ @firstifying << tok
45
+ out = yield
46
+ @firstifying.delete tok
47
+ out
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,46 @@
1
+ module Antelope
2
+ module Generation
3
+ class Constructor
4
+ module Follow
5
+
6
+ def initialize
7
+ @follows = {}
8
+ super
9
+ end
10
+
11
+ def follow(token)
12
+
13
+ if token.nonterminal?
14
+ token = token.name
15
+ elsif token.is_a? Symbol
16
+ else
17
+ incorrect_argument! token, Ace::Token::Nonterminal, Symbol
18
+ end
19
+
20
+ @follows.fetch(token) do
21
+ @follows[token] = Set.new
22
+ set = Set.new
23
+
24
+ parser.productions.each do |key, value|
25
+ value.each do |production|
26
+ items = production[:items]
27
+ positions = items.each_with_index.
28
+ find_all { |t, _| t.name == token }.
29
+ map(&:last).map(&:succ)
30
+ positions.map { |pos| first(items[pos..-1]) }.
31
+ inject(set, :merge)
32
+ positions.each do |pos|
33
+ if pos == items.size || nullable?(items[pos..-1])
34
+ set.merge follow(Ace::Token::Nonterminal.new(key))
35
+ end
36
+ end
37
+ end
38
+ end
39
+
40
+ @follows[token] = set
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,42 @@
1
+ module Antelope
2
+ module Generation
3
+ class Constructor
4
+ module Lookahead
5
+
6
+ def initialize
7
+ @lookaheads = {}
8
+ super
9
+ end
10
+
11
+ def lookahead(left, right = nil)
12
+ @lookaheads.fetch([left, right]) do
13
+ if right
14
+ set = Set.new
15
+
16
+ set += if nullable?(right)
17
+ first(right) + follow(left)
18
+ else
19
+ first(right)
20
+ end
21
+ else
22
+ set = lookahead_nonterminal(left)
23
+ end
24
+
25
+ @lookaheads[[left, right]] = set
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def lookahead_nonterminal(left)
32
+ set = Set.new
33
+ parser.productions[left].each do |production|
34
+ set += lookahead(left, production[:items])
35
+ end
36
+
37
+ set
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,40 @@
1
+ module Antelope
2
+ module Generation
3
+ class Constructor
4
+ module Nullable
5
+
6
+ def initialize
7
+ @nullifying = []
8
+ end
9
+
10
+ def nullable?(token)
11
+ case token
12
+ when Ace::Token::Nonterminal
13
+ nullifying(token) do
14
+ productions = parser.productions[token.name]
15
+ !!productions.any? { |prod| nullable?(prod[:items]) }
16
+ end
17
+ when Array
18
+ token.dup.delete_if { |tok|
19
+ @nullifying.include?(tok) }.all? { |tok| nullable?(tok) }
20
+ when Ace::Token::Epsilon
21
+ true
22
+ when Ace::Token::Terminal
23
+ false
24
+ else
25
+ incorrect_argument! token, Ace::Token, Array
26
+ end
27
+ end
28
+
29
+ private
30
+
31
+ def nullifying(tok)
32
+ @nullifying << tok
33
+ out = yield
34
+ @nullifying.delete tok
35
+ out
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,81 @@
1
+ require "set"
2
+ require "antelope/generation/constructor/nullable"
3
+ require "antelope/generation/constructor/first"
4
+ require "antelope/generation/constructor/follow"
5
+ require "antelope/generation/constructor/lookahead"
6
+
7
+ module Antelope
8
+ module Generation
9
+ class Constructor
10
+
11
+ include Nullable
12
+ include First
13
+ include Follow
14
+ include Lookahead
15
+
16
+ attr_reader :parser
17
+ attr_reader :productions
18
+
19
+ def initialize(parser)
20
+ @parser = parser
21
+ @productions = []
22
+ super()
23
+ end
24
+
25
+ def call
26
+ parser.states.each do |state|
27
+ augment_state(state)
28
+ end.each do |state|
29
+ augment_rules(state)
30
+ end
31
+
32
+ @productions
33
+ end
34
+
35
+ def augment_state(state)
36
+ state.rules.select { |x| x.position.zero? }.each do |rule|
37
+ current_state = state
38
+
39
+ rule.left.from = state
40
+ rule.left.to = state.transitions[rule.left.name]
41
+
42
+ states = [state]
43
+
44
+ rule.right.each_with_index do |part, pos|
45
+ transition = current_state.transitions[part.name]
46
+ if part.nonterminal?
47
+ part.from = current_state
48
+ part.to = transition
49
+ end
50
+
51
+ states.push(transition)
52
+ current_state = transition
53
+ end
54
+
55
+ productions << rule unless productions.include?(rule)
56
+ end
57
+ end
58
+
59
+ def augment_rules(state)
60
+ state.rules.select { |x| x.position.zero? }.each do |rule|
61
+ current_state = state
62
+
63
+ rule.right.each do |part|
64
+ transition = current_state.transitions[part.name]
65
+ current_state = transition
66
+ end
67
+
68
+ final = current_state.rule_for(rule)
69
+
70
+ final.lookahead = follow(rule.left)
71
+ end
72
+ end
73
+
74
+ private
75
+
76
+ def incorrect_argument!(arg, *types)
77
+ raise ArgumentError, "Expected one of #{types.join(", ")}, got #{arg.class}"
78
+ end
79
+ end
80
+ end
81
+ end