antelope 0.0.1 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +1 -0
  3. data/antelope.gemspec +0 -2
  4. data/bin/antelope +3 -20
  5. data/examples/deterministic.output +97 -103
  6. data/examples/example.ace +0 -1
  7. data/examples/example.output +259 -333
  8. data/examples/simple.output +85 -87
  9. data/lib/antelope/ace/compiler.rb +14 -16
  10. data/lib/antelope/ace/errors.rb +9 -3
  11. data/lib/antelope/ace/grammar/generation.rb +38 -7
  12. data/lib/antelope/ace/grammar/precedences.rb +59 -0
  13. data/lib/antelope/ace/grammar/production.rb +24 -25
  14. data/lib/antelope/ace/grammar/productions.rb +8 -8
  15. data/lib/antelope/ace/grammar.rb +3 -3
  16. data/lib/antelope/ace/{presidence.rb → precedence.rb} +11 -11
  17. data/lib/antelope/ace/scanner/second.rb +2 -2
  18. data/lib/antelope/ace/token.rb +1 -1
  19. data/lib/antelope/ace.rb +2 -2
  20. data/lib/antelope/cli.rb +33 -0
  21. data/lib/antelope/errors.rb +6 -0
  22. data/lib/antelope/generation/constructor/first.rb +40 -6
  23. data/lib/antelope/generation/constructor/follow.rb +83 -25
  24. data/lib/antelope/generation/constructor/nullable.rb +24 -2
  25. data/lib/antelope/generation/constructor.rb +39 -13
  26. data/lib/antelope/generation/errors.rb +15 -0
  27. data/lib/antelope/generation/recognizer/rule.rb +111 -11
  28. data/lib/antelope/generation/recognizer/state.rb +53 -5
  29. data/lib/antelope/generation/recognizer.rb +31 -1
  30. data/lib/antelope/generation/tableizer.rb +42 -10
  31. data/lib/antelope/generation.rb +1 -1
  32. data/lib/antelope/generator/templates/output.erb +19 -18
  33. data/lib/antelope/version.rb +1 -1
  34. data/lib/antelope.rb +3 -2
  35. metadata +7 -36
  36. data/lib/antelope/ace/grammar/presidence.rb +0 -59
  37. data/lib/antelope/automaton.rb +0 -36
  38. data/lib/antelope/generation/conflictor/conflict.rb +0 -7
  39. data/lib/antelope/generation/conflictor.rb +0 -45
  40. data/lib/antelope/generation/constructor/lookahead.rb +0 -42
@@ -4,10 +4,26 @@ require "securerandom"
4
4
  module Antelope
5
5
  module Generation
6
6
  class Recognizer
7
+
8
+ # A state within the parser. A state has a set of rules, as
9
+ # well as transitions on those rules.
7
10
  class State
8
11
 
12
+ # All of the rules in this state.
13
+ #
14
+ # @return [Set<Rule>]
9
15
  attr_reader :rules
16
+
17
+ # All of the transitions that can be made on this state.
18
+ #
19
+ # @return [Hash<(Symbol, State)>]
10
20
  attr_reader :transitions
21
+
22
+ # The id of this state. This starts off as a string of
23
+ # hexadecmial characters, but after all of the states are
24
+ # finalized, this becomes a numeric.
25
+ #
26
+ # @return [String, Numeric]
11
27
  attr_accessor :id
12
28
 
13
29
  include Enumerable
@@ -15,19 +31,31 @@ module Antelope
15
31
 
16
32
  def_delegator :@rules, :each
17
33
 
34
+ # Initialize the state.
18
35
  def initialize
19
36
  @rules = Set.new
20
- @transitions = {} #Hash.new { |hash, key| hash[key] = State.new }
37
+ @transitions = {}
21
38
  @id = SecureRandom.hex
22
39
  end
23
40
 
41
+ # Gives a nice string representation of the state.
42
+ #
43
+ # @return [String]
24
44
  def inspect
25
- "#<#{self.class} id=#{id} transitions=[#{transitions.keys.join(", ")}] rules=[{#{rules.to_a.join("} {")}}]>"
45
+ "#<#{self.class} id=#{id} " \
46
+ "transitions=[#{transitions.keys.join(", ")}] " \
47
+ "rules=[{#{rules.to_a.join("} {")}}]>"
26
48
  end
27
49
 
50
+ # Merges another state with this state. It copies all of the
51
+ # rules into this state, and then merges the transitions on
52
+ # the given state to this state. It then returns self.
53
+ #
54
+ # @raise [ArgumentError] if the given argument is not a state.
55
+ # @param other [State] the state to merge.
56
+ # @return [self]
28
57
  def merge!(other)
29
- return if other == :_ignore
30
- raise ArgumentError, "Expected #{self.class}, " +
58
+ raise ArgumentError, "Expected #{self.class}, " \
31
59
  "got #{other.class}" unless other.is_a? State
32
60
 
33
61
  self << other
@@ -36,16 +64,36 @@ module Antelope
36
64
  self
37
65
  end
38
66
 
67
+ # Finds the rule that match the given production. It
68
+ # uses fuzzy equality checking. It returns the first rule
69
+ # that matches.
70
+ #
71
+ # @param production [Rule] the rule to compare.
72
+ # @return [Rule?]
39
73
  def rule_for(production)
40
74
  rules.find { |rule| production === rule }
41
75
  end
42
76
 
77
+ # Appends the given object to this state. The given object
78
+ # must be a state or a rule. If it's a state, it appends all
79
+ # of the rules in the state to this state. If it's a rule, it
80
+ # adds the rule to our rules.
81
+ #
82
+ # @raise [ArgumentError] if the argument isn't a {State} or a
83
+ # {Rule}.
84
+ # @param rule [State, Rule] the object to append.
85
+ # @return [self]
43
86
  def <<(rule)
44
87
  if rule.is_a? State
45
88
  rule.rules.each { |r| self << r }
46
- else
89
+ elsif rule.is_a? Rule
47
90
  rules << rule unless rules.include? rule
91
+ else
92
+ raise ArgumentError, "Expected #{State} or #{Rule}, " \
93
+ "got #{rule.class}"
48
94
  end
95
+
96
+ self
49
97
  end
50
98
 
51
99
  alias_method :push, :<<
@@ -80,7 +80,10 @@ module Antelope
80
80
  # Computes all states. Uses a fix point iteration to determine
81
81
  # when no states have been added. Loops through every state and
82
82
  # every rule, looking for rules that have an active nonterminal
83
- # and computing
83
+ # and computing the closure for said rule.
84
+ #
85
+ # @return [void]
86
+ # @see #compute_closure
84
87
  def compute_states
85
88
  fixed_point(states) do
86
89
  states.dup.each do |state|
@@ -104,6 +107,11 @@ module Antelope
104
107
  end
105
108
  end
106
109
 
110
+ # Given a state, it does a fixed point iteration on the rules of
111
+ # the state that have an active nonterminal, and add the
112
+ # corresponding production rules to the state.
113
+ #
114
+ # @return [void]
107
115
  def compute_closure(state)
108
116
  fixed_point(state.rules) do
109
117
  state.rules.select { |_| _.active.nonterminal? }.each do |rule|
@@ -116,16 +124,29 @@ module Antelope
116
124
 
117
125
  private
118
126
 
127
+ # Find a state that include a specific rule, or yields the rule.
128
+ #
129
+ # @param rule [Rule]
130
+ # @yield [rule]
131
+ # @return [State]
119
132
  def find_state_for(rule)
120
133
  states.find { |state| state.include?(rule) } or yield(rule)
121
134
  end
122
135
 
136
+ # Changes the IDs of the states into a more friendly format.
137
+ #
138
+ # @return [void]
123
139
  def redefine_state_ids
124
140
  states.each_with_index do |state, i|
125
141
  state.id = i
126
142
  end
127
143
  end
128
144
 
145
+ # Redefines all of the rule ids to make them more friendly.
146
+ # Every rule in every state is given a unique ID, reguardless if
147
+ # the rules are equivalent.
148
+ #
149
+ # @return [void]
129
150
  def redefine_rule_ids
130
151
  start = 0
131
152
 
@@ -137,6 +158,15 @@ module Antelope
137
158
  end
138
159
  end
139
160
 
161
+ # Begins a fixed point iteration on the given enumerable. It
162
+ # initializes the added elements to one; then, while the number
163
+ # of added elements is not zero, it yields and checks for added
164
+ # elements.
165
+ #
166
+ # @param enum [Enumerable]
167
+ # @yield for every iteration. Guarenteed to do so at least
168
+ # once.
169
+ # @return [void]
140
170
  def fixed_point(enum)
141
171
  added = 1
142
172
 
@@ -1,30 +1,55 @@
1
1
  module Antelope
2
2
  module Generation
3
3
 
4
- class UnresolvableConflictError < StandardError; end
5
-
4
+ # Constructs the table required for the parser.
6
5
  class Tableizer
7
6
 
8
- attr_accessor :parser
7
+ # The grammar that the table is based off of.
8
+ #
9
+ # @return [Ace::Grammar]
10
+ attr_accessor :grammar
11
+
12
+ # The table itself.
13
+ #
14
+ # @return [Array<Hash<(Symbol, Array<(Symbol, Numeric)>)>>]
9
15
  attr_accessor :table
16
+
17
+ # All rules in the grammar.
18
+ #
19
+ # @return [Hash<(Numeric, Recognizer::Rule)>]
10
20
  attr_accessor :rules
11
21
 
12
- def initialize(parser)
13
- @parser = parser
22
+ # Initialize.
23
+ #
24
+ # @param grammar [Ace::Grammar]
25
+ def initialize(grammar)
26
+ @grammar = grammar
14
27
  end
15
28
 
29
+ # Construct the table, and then check the table for conflicts.
30
+ #
31
+ # @return [void]
32
+ # @see #tablize
33
+ # @see #conflictize
16
34
  def call
17
35
  tablize
18
36
  conflictize
19
37
  end
20
38
 
39
+ # Construct a table based on the grammar. The table itself is
40
+ # an array whose elements are hashes; the index of the array
41
+ # corresponds to the state ID, and the keys of the hashes
42
+ # correspond to acceptable tokens. The values of the hashes
43
+ # should be an array of arrays (at this point).
44
+ #
45
+ # @return [void]
21
46
  def tablize
22
- @table = Array.new(parser.states.size) do
47
+ @table = Array.new(grammar.states.size) do
23
48
  Hash.new { |h, k| h[k] = [] }
24
49
  end
25
50
  @rules = []
26
51
 
27
- parser.states.each do |state|
52
+ grammar.states.each do |state|
28
53
  state.transitions.each do |on, to|
29
54
  table[state.id][on] << [:state, to.id]
30
55
  end
@@ -47,6 +72,13 @@ module Antelope
47
72
  table
48
73
  end
49
74
 
75
+ # Resolve any conflicts through precedence, if we can. If we
76
+ # can't, let the user know. This makes sure that every value
77
+ # of the hashes is a single array.
78
+ #
79
+ # @raise [UnresolvableConflictError] if a conflict could not be
80
+ # resolved using precedence rules.
81
+ # @return [void]
50
82
  def conflictize
51
83
  @table.each_with_index do |v, state|
52
84
  v.each do |on, data|
@@ -55,16 +87,16 @@ module Antelope
55
87
  next
56
88
  end
57
89
 
58
- terminal = parser.presidence_for(on)
90
+ terminal = grammar.precedence_for(on)
59
91
 
60
92
  state_part = data.select { |(t, d)| t == :state }.first
61
93
  rule_part = data.select { |(t, d)| t == :reduce}.first
62
94
 
63
- result = @rules[rule_part[1]].presidence <=> terminal
95
+ result = @rules[rule_part[1]].precedence <=> terminal
64
96
 
65
97
  case result
66
98
  when 0
67
- p v, terminal, @rules[rule_part[1]].presidence
99
+ p v, terminal, @rules[rule_part[1]].precedence
68
100
  raise UnresolvableConflictError,
69
101
  "Could not determine move for #{on} in state #{state}"
70
102
  when 1
@@ -1,4 +1,4 @@
1
- require "antelope/generation/conflictor"
1
+ require "antelope/generation/errors"
2
2
  require "antelope/generation/constructor"
3
3
  require "antelope/generation/recognizer"
4
4
  require "antelope/generation/tableizer"
@@ -1,36 +1,37 @@
1
1
  Productions:
2
- % constructor.productions.each do |production|
3
- <%= production.to_s(false) %>
4
- % end
5
-
6
- Original Productions:
7
2
  % grammar.productions.each do |k, v|
8
3
  % v.each do |prod|
9
- <%= k %> → <%= prod[:items].join(" ") %>
10
- <%= prod[:block] %>
4
+ <%= k %> → <%= prod[:items].join(" ") %> <%= prod[:block] %>
11
5
  % end
12
6
  % end
13
7
 
14
- Conflicts:
15
- % conflictor.conflicts.each do |conflict|
16
- State <%= conflict.state.id %>:
17
- rules : <%= conflict.rules.map(&:id).join(", ") %>
18
- type : <%= conflict.type %>
19
- tokens: {<%= conflict.token.to_a.join(", ") %>}
8
+ Productions, Again:
9
+ % constructor.productions.each do |production|
10
+ <%= production.to_s(false) %>
20
11
  % end
21
12
 
22
- Presidence:
13
+ FOLLOW:
14
+ % constructor.instance_variable_get(:@follows).each do |k, v|
15
+ <%= k %>: {<%= v.map(&:to_s).join(", ") %>}
16
+ % end
17
+
18
+ Precedence:
23
19
  --- highest
24
- % grammar.presidence.each do |pr|
20
+ % grammar.precedence.each do |pr|
25
21
  <%= "%-8s" % pr.type %> <%= pr.level %>:
26
22
  {<%= pr.tokens.to_a.join(", ") %>}
27
23
  % end
28
24
  --- lowest
29
25
 
30
26
  Table:
31
- % PP.pp(Hash[tableizer.table.each_with_index.to_a.map(&:reverse)], _erbout)
32
-
33
- % PP.pp(tableizer.rules, _erbout)
27
+ % len = tableizer.table.flatten.map(&:keys).flatten.map(&:size).max
28
+ % tableizer.table.each_with_index do |v, i|
29
+ State <%= i %>:
30
+ % v.each do |token, action|
31
+ <%= "%-#{len}s" % token %>: <%= action[0] %> (<%= action[1] %>)
32
+ % end
33
+ % end
34
+ <%# PP.pp(Hash[tableizer.table.each_with_index.to_a.map(&:reverse)], _erbout) %>
34
35
 
35
36
  % grammar.states.each do |state|
36
37
  State <%= state.id %>:
@@ -1,4 +1,4 @@
1
1
  module Antelope
2
2
  # The current running version of antelope.
3
- VERSION = "0.0.1".freeze
3
+ VERSION = "0.1.0".freeze
4
4
  end
data/lib/antelope.rb CHANGED
@@ -1,9 +1,10 @@
1
- require "antelope/automaton"
1
+ require "antelope/errors"
2
2
  require "antelope/generation"
3
3
  require "antelope/generator"
4
4
  require "antelope/version"
5
5
  require "antelope/ace"
6
6
 
7
+ # Antelope, the compiler compiler.
7
8
  module Antelope
8
- # Your code goes here...
9
+
9
10
  end
metadata CHANGED
@@ -1,43 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: antelope
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Rodi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-16 00:00:00.000000000 Z
11
+ date: 2014-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: liquid
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '2.6'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '2.6'
27
- - !ruby/object:Gem::Dependency
28
- name: sourcify
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '0.5'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: '0.5'
41
13
  - !ruby/object:Gem::Dependency
42
14
  name: hashie
43
15
  requirement: !ruby/object:Gem::Requirement
@@ -152,11 +124,11 @@ files:
152
124
  - lib/antelope/ace/grammar.rb
153
125
  - lib/antelope/ace/grammar/generation.rb
154
126
  - lib/antelope/ace/grammar/loading.rb
155
- - lib/antelope/ace/grammar/presidence.rb
127
+ - lib/antelope/ace/grammar/precedences.rb
156
128
  - lib/antelope/ace/grammar/production.rb
157
129
  - lib/antelope/ace/grammar/productions.rb
158
130
  - lib/antelope/ace/grammar/terminals.rb
159
- - lib/antelope/ace/presidence.rb
131
+ - lib/antelope/ace/precedence.rb
160
132
  - lib/antelope/ace/scanner.rb
161
133
  - lib/antelope/ace/scanner/first.rb
162
134
  - lib/antelope/ace/scanner/second.rb
@@ -166,15 +138,14 @@ files:
166
138
  - lib/antelope/ace/token/error.rb
167
139
  - lib/antelope/ace/token/nonterminal.rb
168
140
  - lib/antelope/ace/token/terminal.rb
169
- - lib/antelope/automaton.rb
141
+ - lib/antelope/cli.rb
142
+ - lib/antelope/errors.rb
170
143
  - lib/antelope/generation.rb
171
- - lib/antelope/generation/conflictor.rb
172
- - lib/antelope/generation/conflictor/conflict.rb
173
144
  - lib/antelope/generation/constructor.rb
174
145
  - lib/antelope/generation/constructor/first.rb
175
146
  - lib/antelope/generation/constructor/follow.rb
176
- - lib/antelope/generation/constructor/lookahead.rb
177
147
  - lib/antelope/generation/constructor/nullable.rb
148
+ - lib/antelope/generation/errors.rb
178
149
  - lib/antelope/generation/recognizer.rb
179
150
  - lib/antelope/generation/recognizer/rule.rb
180
151
  - lib/antelope/generation/recognizer/state.rb
@@ -1,59 +0,0 @@
1
- require "set"
2
-
3
- module Antelope
4
- module Ace
5
- class Grammar
6
-
7
- # Manages presidence for tokens.
8
- module Presidence
9
-
10
- # Accesses the generated presidence list. Lazily generates
11
- # the presidence rules on the go, and then caches it.
12
- #
13
- # @return [Array<Ace::Presidence>]
14
- def presidence
15
- @_presidence ||= generate_presidence
16
- end
17
-
18
- # Finds a presidence rule for a given token. If no direct
19
- # rule is defined for that token, it will check for a rule
20
- # defined for the special symbol, `:_`. By default, there
21
- # is always a rule defined for `:_`.
22
- #
23
- # @param token [Ace::Token, Symbol]
24
- # @return [Ace::Presidence]
25
- def presidence_for(token)
26
- token = token.name if token.is_a?(Token)
27
-
28
- set = Set.new([token, :_])
29
-
30
- presidence.
31
- select { |pr| set.intersect?(pr.tokens) }.
32
- first
33
- end
34
-
35
- private
36
-
37
- # Generates the presidence rules. Loops through the compiler
38
- # given presidence settings, and then adds two default
39
- # presidence rules; one for `:$` (level 0, nonassoc), and one
40
- # for `:_` (level 1, nonassoc).
41
- #
42
- # @return [Array<Ace::Presidence>]
43
- def generate_presidence
44
- size = @compiler.options[:prec].size + 1
45
- presidence = @compiler.options[:prec].
46
- each_with_index.map do |prec, i|
47
- Ace::Presidence.new(prec[0], prec[1..-1].to_set, size - i)
48
- end
49
-
50
- presidence <<
51
- Ace::Presidence.new(:nonassoc, [:"$"].to_set, 0) <<
52
- Ace::Presidence.new(:nonassoc, [:_].to_set, 1)
53
- presidence.sort_by { |_| _.level }.reverse
54
- end
55
-
56
- end
57
- end
58
- end
59
- end
@@ -1,36 +0,0 @@
1
- module Antelope
2
- class Automaton
3
- attr_accessor :states
4
- attr_accessor :alphabet
5
- attr_accessor :start
6
- attr_accessor :accept
7
- attr_accessor :transitions
8
- attr_accessor :stack
9
-
10
- def initialize(states = [], alphabet = [],
11
- start = nil, accept = [], transitions = {})
12
- @states = states
13
- @alphabet = alphabet
14
- @start = start
15
- @accept = accept
16
- @transitions = transitions
17
- @stack = []
18
- end
19
-
20
- def run(input, &block)
21
- block = block || method(:default_transition)
22
-
23
- @stack = [@start]
24
-
25
- input.each do |part|
26
- @stack.push(block.call(@stack.last, part))
27
- end
28
-
29
- @accept.include? @stack.last
30
- end
31
-
32
- def default_transition(state, part)
33
- @transitions[state][part]
34
- end
35
- end
36
- end
@@ -1,7 +0,0 @@
1
- module Antelope
2
- module Generation
3
- class Conflictor
4
- Conflict = Struct.new(:state, :type, :rules, :token)
5
- end
6
- end
7
- end
@@ -1,45 +0,0 @@
1
- require "antelope/generation/conflictor/conflict"
2
-
3
- module Antelope
4
- module Generation
5
- class Conflictor
6
-
7
- attr_accessor :parser
8
- attr_accessor :conflicts
9
-
10
- def initialize(parser)
11
- @parser = parser
12
- end
13
-
14
- def call
15
- recognize_conflicts
16
- end
17
-
18
- def recognize_conflicts
19
-
20
- @conflicts = []
21
-
22
- parser.states.each do |state|
23
- state.rules.each do |rule|
24
- if rule.lookahead.
25
- any? { |tok| state.transitions.key?(tok.name) }
26
- @conflicts << Conflict.new(state, :shift_reduce, [rule],
27
- rule.lookahead - state.transitions.keys)
28
- end
29
- end
30
-
31
- final_rules = state.rules.select(&:final?)
32
-
33
- final_rules.each_cons(2) do |r1, r2|
34
- if r1.lookahead.intersect? r2.lookahead
35
- @conflicts << Conflict.new(state,
36
- :reduce_reduce,
37
- [r1, r2],
38
- r1.lookahead.intersection(r2.lookahead))
39
- end
40
- end
41
- end
42
- end
43
- end
44
- end
45
- end
@@ -1,42 +0,0 @@
1
- module Antelope
2
- module Generation
3
- class Constructor
4
- module Lookahead
5
-
6
- def initialize
7
- @lookaheads = {}
8
- super
9
- end
10
-
11
- def lookahead(left, right = nil)
12
- @lookaheads.fetch([left, right]) do
13
- if right
14
- set = Set.new
15
-
16
- set += if nullable?(right)
17
- first(right) + follow(left)
18
- else
19
- first(right)
20
- end
21
- else
22
- set = lookahead_nonterminal(left)
23
- end
24
-
25
- @lookaheads[[left, right]] = set
26
- end
27
- end
28
-
29
- private
30
-
31
- def lookahead_nonterminal(left)
32
- set = Set.new
33
- parser.productions[left].each do |production|
34
- set += lookahead(left, production[:items])
35
- end
36
-
37
- set
38
- end
39
- end
40
- end
41
- end
42
- end