antelope 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.yardopts +1 -0
  3. data/antelope.gemspec +0 -2
  4. data/bin/antelope +3 -20
  5. data/examples/deterministic.output +97 -103
  6. data/examples/example.ace +0 -1
  7. data/examples/example.output +259 -333
  8. data/examples/simple.output +85 -87
  9. data/lib/antelope/ace/compiler.rb +14 -16
  10. data/lib/antelope/ace/errors.rb +9 -3
  11. data/lib/antelope/ace/grammar/generation.rb +38 -7
  12. data/lib/antelope/ace/grammar/precedences.rb +59 -0
  13. data/lib/antelope/ace/grammar/production.rb +24 -25
  14. data/lib/antelope/ace/grammar/productions.rb +8 -8
  15. data/lib/antelope/ace/grammar.rb +3 -3
  16. data/lib/antelope/ace/{presidence.rb → precedence.rb} +11 -11
  17. data/lib/antelope/ace/scanner/second.rb +2 -2
  18. data/lib/antelope/ace/token.rb +1 -1
  19. data/lib/antelope/ace.rb +2 -2
  20. data/lib/antelope/cli.rb +33 -0
  21. data/lib/antelope/errors.rb +6 -0
  22. data/lib/antelope/generation/constructor/first.rb +40 -6
  23. data/lib/antelope/generation/constructor/follow.rb +83 -25
  24. data/lib/antelope/generation/constructor/nullable.rb +24 -2
  25. data/lib/antelope/generation/constructor.rb +39 -13
  26. data/lib/antelope/generation/errors.rb +15 -0
  27. data/lib/antelope/generation/recognizer/rule.rb +111 -11
  28. data/lib/antelope/generation/recognizer/state.rb +53 -5
  29. data/lib/antelope/generation/recognizer.rb +31 -1
  30. data/lib/antelope/generation/tableizer.rb +42 -10
  31. data/lib/antelope/generation.rb +1 -1
  32. data/lib/antelope/generator/templates/output.erb +19 -18
  33. data/lib/antelope/version.rb +1 -1
  34. data/lib/antelope.rb +3 -2
  35. metadata +7 -36
  36. data/lib/antelope/ace/grammar/presidence.rb +0 -59
  37. data/lib/antelope/automaton.rb +0 -36
  38. data/lib/antelope/generation/conflictor/conflict.rb +0 -7
  39. data/lib/antelope/generation/conflictor.rb +0 -45
  40. data/lib/antelope/generation/constructor/lookahead.rb +0 -42
@@ -4,10 +4,26 @@ require "securerandom"
4
4
  module Antelope
5
5
  module Generation
6
6
  class Recognizer
7
+
8
+ # A state within the parser. A state has a set of rules, as
9
+ # well as transitions on those rules.
7
10
  class State
8
11
 
12
+ # All of the rules in this state.
13
+ #
14
+ # @return [Set<Rule>]
9
15
  attr_reader :rules
16
+
17
+ # All of the transitions that can be made on this state.
18
+ #
19
+ # @return [Hash<(Symbol, State)>]
10
20
  attr_reader :transitions
21
+
22
+ # The id of this state. This starts off as a string of
23
+ # hexadecmial characters, but after all of the states are
24
+ # finalized, this becomes a numeric.
25
+ #
26
+ # @return [String, Numeric]
11
27
  attr_accessor :id
12
28
 
13
29
  include Enumerable
@@ -15,19 +31,31 @@ module Antelope
15
31
 
16
32
  def_delegator :@rules, :each
17
33
 
34
+ # Initialize the state.
18
35
  def initialize
19
36
  @rules = Set.new
20
- @transitions = {} #Hash.new { |hash, key| hash[key] = State.new }
37
+ @transitions = {}
21
38
  @id = SecureRandom.hex
22
39
  end
23
40
 
41
+ # Gives a nice string representation of the state.
42
+ #
43
+ # @return [String]
24
44
  def inspect
25
- "#<#{self.class} id=#{id} transitions=[#{transitions.keys.join(", ")}] rules=[{#{rules.to_a.join("} {")}}]>"
45
+ "#<#{self.class} id=#{id} " \
46
+ "transitions=[#{transitions.keys.join(", ")}] " \
47
+ "rules=[{#{rules.to_a.join("} {")}}]>"
26
48
  end
27
49
 
50
+ # Merges another state with this state. It copies all of the
51
+ # rules into this state, and then merges the transitions on
52
+ # the given state to this state. It then returns self.
53
+ #
54
+ # @raise [ArgumentError] if the given argument is not a state.
55
+ # @param other [State] the state to merge.
56
+ # @return [self]
28
57
  def merge!(other)
29
- return if other == :_ignore
30
- raise ArgumentError, "Expected #{self.class}, " +
58
+ raise ArgumentError, "Expected #{self.class}, " \
31
59
  "got #{other.class}" unless other.is_a? State
32
60
 
33
61
  self << other
@@ -36,16 +64,36 @@ module Antelope
36
64
  self
37
65
  end
38
66
 
67
+ # Finds the rule that match the given production. It
68
+ # uses fuzzy equality checking. It returns the first rule
69
+ # that matches.
70
+ #
71
+ # @param production [Rule] the rule to compare.
72
+ # @return [Rule?]
39
73
  def rule_for(production)
40
74
  rules.find { |rule| production === rule }
41
75
  end
42
76
 
77
+ # Appends the given object to this state. The given object
78
+ # must be a state or a rule. If it's a state, it appends all
79
+ # of the rules in the state to this state. If it's a rule, it
80
+ # adds the rule to our rules.
81
+ #
82
+ # @raise [ArgumentError] if the argument isn't a {State} or a
83
+ # {Rule}.
84
+ # @param rule [State, Rule] the object to append.
85
+ # @return [self]
43
86
  def <<(rule)
44
87
  if rule.is_a? State
45
88
  rule.rules.each { |r| self << r }
46
- else
89
+ elsif rule.is_a? Rule
47
90
  rules << rule unless rules.include? rule
91
+ else
92
+ raise ArgumentError, "Expected #{State} or #{Rule}, " \
93
+ "got #{rule.class}"
48
94
  end
95
+
96
+ self
49
97
  end
50
98
 
51
99
  alias_method :push, :<<
@@ -80,7 +80,10 @@ module Antelope
80
80
  # Computes all states. Uses a fix point iteration to determine
81
81
  # when no states have been added. Loops through every state and
82
82
  # every rule, looking for rules that have an active nonterminal
83
- # and computing
83
+ # and computing the closure for said rule.
84
+ #
85
+ # @return [void]
86
+ # @see #compute_closure
84
87
  def compute_states
85
88
  fixed_point(states) do
86
89
  states.dup.each do |state|
@@ -104,6 +107,11 @@ module Antelope
104
107
  end
105
108
  end
106
109
 
110
+ # Given a state, it does a fixed point iteration on the rules of
111
+ # the state that have an active nonterminal, and add the
112
+ # corresponding production rules to the state.
113
+ #
114
+ # @return [void]
107
115
  def compute_closure(state)
108
116
  fixed_point(state.rules) do
109
117
  state.rules.select { |_| _.active.nonterminal? }.each do |rule|
@@ -116,16 +124,29 @@ module Antelope
116
124
 
117
125
  private
118
126
 
127
+ # Find a state that include a specific rule, or yields the rule.
128
+ #
129
+ # @param rule [Rule]
130
+ # @yield [rule]
131
+ # @return [State]
119
132
  def find_state_for(rule)
120
133
  states.find { |state| state.include?(rule) } or yield(rule)
121
134
  end
122
135
 
136
+ # Changes the IDs of the states into a more friendly format.
137
+ #
138
+ # @return [void]
123
139
  def redefine_state_ids
124
140
  states.each_with_index do |state, i|
125
141
  state.id = i
126
142
  end
127
143
  end
128
144
 
145
+ # Redefines all of the rule ids to make them more friendly.
146
+ # Every rule in every state is given a unique ID, reguardless if
147
+ # the rules are equivalent.
148
+ #
149
+ # @return [void]
129
150
  def redefine_rule_ids
130
151
  start = 0
131
152
 
@@ -137,6 +158,15 @@ module Antelope
137
158
  end
138
159
  end
139
160
 
161
+ # Begins a fixed point iteration on the given enumerable. It
162
+ # initializes the added elements to one; then, while the number
163
+ # of added elements is not zero, it yields and checks for added
164
+ # elements.
165
+ #
166
+ # @param enum [Enumerable]
167
+ # @yield for every iteration. Guarenteed to do so at least
168
+ # once.
169
+ # @return [void]
140
170
  def fixed_point(enum)
141
171
  added = 1
142
172
 
@@ -1,30 +1,55 @@
1
1
  module Antelope
2
2
  module Generation
3
3
 
4
- class UnresolvableConflictError < StandardError; end
5
-
4
+ # Constructs the table required for the parser.
6
5
  class Tableizer
7
6
 
8
- attr_accessor :parser
7
+ # The grammar that the table is based off of.
8
+ #
9
+ # @return [Ace::Grammar]
10
+ attr_accessor :grammar
11
+
12
+ # The table itself.
13
+ #
14
+ # @return [Array<Hash<(Symbol, Array<(Symbol, Numeric)>)>>]
9
15
  attr_accessor :table
16
+
17
+ # All rules in the grammar.
18
+ #
19
+ # @return [Hash<(Numeric, Recognizer::Rule)>]
10
20
  attr_accessor :rules
11
21
 
12
- def initialize(parser)
13
- @parser = parser
22
+ # Initialize.
23
+ #
24
+ # @param grammar [Ace::Grammar]
25
+ def initialize(grammar)
26
+ @grammar = grammar
14
27
  end
15
28
 
29
+ # Construct the table, and then check the table for conflicts.
30
+ #
31
+ # @return [void]
32
+ # @see #tablize
33
+ # @see #conflictize
16
34
  def call
17
35
  tablize
18
36
  conflictize
19
37
  end
20
38
 
39
+ # Construct a table based on the grammar. The table itself is
40
+ # an array whose elements are hashes; the index of the array
41
+ # corresponds to the state ID, and the keys of the hashes
42
+ # correspond to acceptable tokens. The values of the hashes
43
+ # should be an array of arrays (at this point).
44
+ #
45
+ # @return [void]
21
46
  def tablize
22
- @table = Array.new(parser.states.size) do
47
+ @table = Array.new(grammar.states.size) do
23
48
  Hash.new { |h, k| h[k] = [] }
24
49
  end
25
50
  @rules = []
26
51
 
27
- parser.states.each do |state|
52
+ grammar.states.each do |state|
28
53
  state.transitions.each do |on, to|
29
54
  table[state.id][on] << [:state, to.id]
30
55
  end
@@ -47,6 +72,13 @@ module Antelope
47
72
  table
48
73
  end
49
74
 
75
+ # Resolve any conflicts through precedence, if we can. If we
76
+ # can't, let the user know. This makes sure that every value
77
+ # of the hashes is a single array.
78
+ #
79
+ # @raise [UnresolvableConflictError] if a conflict could not be
80
+ # resolved using precedence rules.
81
+ # @return [void]
50
82
  def conflictize
51
83
  @table.each_with_index do |v, state|
52
84
  v.each do |on, data|
@@ -55,16 +87,16 @@ module Antelope
55
87
  next
56
88
  end
57
89
 
58
- terminal = parser.presidence_for(on)
90
+ terminal = grammar.precedence_for(on)
59
91
 
60
92
  state_part = data.select { |(t, d)| t == :state }.first
61
93
  rule_part = data.select { |(t, d)| t == :reduce}.first
62
94
 
63
- result = @rules[rule_part[1]].presidence <=> terminal
95
+ result = @rules[rule_part[1]].precedence <=> terminal
64
96
 
65
97
  case result
66
98
  when 0
67
- p v, terminal, @rules[rule_part[1]].presidence
99
+ p v, terminal, @rules[rule_part[1]].precedence
68
100
  raise UnresolvableConflictError,
69
101
  "Could not determine move for #{on} in state #{state}"
70
102
  when 1
@@ -1,4 +1,4 @@
1
- require "antelope/generation/conflictor"
1
+ require "antelope/generation/errors"
2
2
  require "antelope/generation/constructor"
3
3
  require "antelope/generation/recognizer"
4
4
  require "antelope/generation/tableizer"
@@ -1,36 +1,37 @@
1
1
  Productions:
2
- % constructor.productions.each do |production|
3
- <%= production.to_s(false) %>
4
- % end
5
-
6
- Original Productions:
7
2
  % grammar.productions.each do |k, v|
8
3
  % v.each do |prod|
9
- <%= k %> → <%= prod[:items].join(" ") %>
10
- <%= prod[:block] %>
4
+ <%= k %> → <%= prod[:items].join(" ") %> <%= prod[:block] %>
11
5
  % end
12
6
  % end
13
7
 
14
- Conflicts:
15
- % conflictor.conflicts.each do |conflict|
16
- State <%= conflict.state.id %>:
17
- rules : <%= conflict.rules.map(&:id).join(", ") %>
18
- type : <%= conflict.type %>
19
- tokens: {<%= conflict.token.to_a.join(", ") %>}
8
+ Productions, Again:
9
+ % constructor.productions.each do |production|
10
+ <%= production.to_s(false) %>
20
11
  % end
21
12
 
22
- Presidence:
13
+ FOLLOW:
14
+ % constructor.instance_variable_get(:@follows).each do |k, v|
15
+ <%= k %>: {<%= v.map(&:to_s).join(", ") %>}
16
+ % end
17
+
18
+ Precedence:
23
19
  --- highest
24
- % grammar.presidence.each do |pr|
20
+ % grammar.precedence.each do |pr|
25
21
  <%= "%-8s" % pr.type %> <%= pr.level %>:
26
22
  {<%= pr.tokens.to_a.join(", ") %>}
27
23
  % end
28
24
  --- lowest
29
25
 
30
26
  Table:
31
- % PP.pp(Hash[tableizer.table.each_with_index.to_a.map(&:reverse)], _erbout)
32
-
33
- % PP.pp(tableizer.rules, _erbout)
27
+ % len = tableizer.table.flatten.map(&:keys).flatten.map(&:size).max
28
+ % tableizer.table.each_with_index do |v, i|
29
+ State <%= i %>:
30
+ % v.each do |token, action|
31
+ <%= "%-#{len}s" % token %>: <%= action[0] %> (<%= action[1] %>)
32
+ % end
33
+ % end
34
+ <%# PP.pp(Hash[tableizer.table.each_with_index.to_a.map(&:reverse)], _erbout) %>
34
35
 
35
36
  % grammar.states.each do |state|
36
37
  State <%= state.id %>:
@@ -1,4 +1,4 @@
1
1
  module Antelope
2
2
  # The current running version of antelope.
3
- VERSION = "0.0.1".freeze
3
+ VERSION = "0.1.0".freeze
4
4
  end
data/lib/antelope.rb CHANGED
@@ -1,9 +1,10 @@
1
- require "antelope/automaton"
1
+ require "antelope/errors"
2
2
  require "antelope/generation"
3
3
  require "antelope/generator"
4
4
  require "antelope/version"
5
5
  require "antelope/ace"
6
6
 
7
+ # Antelope, the compiler compiler.
7
8
  module Antelope
8
- # Your code goes here...
9
+
9
10
  end
metadata CHANGED
@@ -1,43 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: antelope
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jeremy Rodi
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-16 00:00:00.000000000 Z
11
+ date: 2014-06-18 00:00:00.000000000 Z
12
12
  dependencies:
13
- - !ruby/object:Gem::Dependency
14
- name: liquid
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - "~>"
18
- - !ruby/object:Gem::Version
19
- version: '2.6'
20
- type: :runtime
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - "~>"
25
- - !ruby/object:Gem::Version
26
- version: '2.6'
27
- - !ruby/object:Gem::Dependency
28
- name: sourcify
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '0.5'
34
- type: :runtime
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: '0.5'
41
13
  - !ruby/object:Gem::Dependency
42
14
  name: hashie
43
15
  requirement: !ruby/object:Gem::Requirement
@@ -152,11 +124,11 @@ files:
152
124
  - lib/antelope/ace/grammar.rb
153
125
  - lib/antelope/ace/grammar/generation.rb
154
126
  - lib/antelope/ace/grammar/loading.rb
155
- - lib/antelope/ace/grammar/presidence.rb
127
+ - lib/antelope/ace/grammar/precedences.rb
156
128
  - lib/antelope/ace/grammar/production.rb
157
129
  - lib/antelope/ace/grammar/productions.rb
158
130
  - lib/antelope/ace/grammar/terminals.rb
159
- - lib/antelope/ace/presidence.rb
131
+ - lib/antelope/ace/precedence.rb
160
132
  - lib/antelope/ace/scanner.rb
161
133
  - lib/antelope/ace/scanner/first.rb
162
134
  - lib/antelope/ace/scanner/second.rb
@@ -166,15 +138,14 @@ files:
166
138
  - lib/antelope/ace/token/error.rb
167
139
  - lib/antelope/ace/token/nonterminal.rb
168
140
  - lib/antelope/ace/token/terminal.rb
169
- - lib/antelope/automaton.rb
141
+ - lib/antelope/cli.rb
142
+ - lib/antelope/errors.rb
170
143
  - lib/antelope/generation.rb
171
- - lib/antelope/generation/conflictor.rb
172
- - lib/antelope/generation/conflictor/conflict.rb
173
144
  - lib/antelope/generation/constructor.rb
174
145
  - lib/antelope/generation/constructor/first.rb
175
146
  - lib/antelope/generation/constructor/follow.rb
176
- - lib/antelope/generation/constructor/lookahead.rb
177
147
  - lib/antelope/generation/constructor/nullable.rb
148
+ - lib/antelope/generation/errors.rb
178
149
  - lib/antelope/generation/recognizer.rb
179
150
  - lib/antelope/generation/recognizer/rule.rb
180
151
  - lib/antelope/generation/recognizer/state.rb
@@ -1,59 +0,0 @@
1
- require "set"
2
-
3
- module Antelope
4
- module Ace
5
- class Grammar
6
-
7
- # Manages presidence for tokens.
8
- module Presidence
9
-
10
- # Accesses the generated presidence list. Lazily generates
11
- # the presidence rules on the go, and then caches it.
12
- #
13
- # @return [Array<Ace::Presidence>]
14
- def presidence
15
- @_presidence ||= generate_presidence
16
- end
17
-
18
- # Finds a presidence rule for a given token. If no direct
19
- # rule is defined for that token, it will check for a rule
20
- # defined for the special symbol, `:_`. By default, there
21
- # is always a rule defined for `:_`.
22
- #
23
- # @param token [Ace::Token, Symbol]
24
- # @return [Ace::Presidence]
25
- def presidence_for(token)
26
- token = token.name if token.is_a?(Token)
27
-
28
- set = Set.new([token, :_])
29
-
30
- presidence.
31
- select { |pr| set.intersect?(pr.tokens) }.
32
- first
33
- end
34
-
35
- private
36
-
37
- # Generates the presidence rules. Loops through the compiler
38
- # given presidence settings, and then adds two default
39
- # presidence rules; one for `:$` (level 0, nonassoc), and one
40
- # for `:_` (level 1, nonassoc).
41
- #
42
- # @return [Array<Ace::Presidence>]
43
- def generate_presidence
44
- size = @compiler.options[:prec].size + 1
45
- presidence = @compiler.options[:prec].
46
- each_with_index.map do |prec, i|
47
- Ace::Presidence.new(prec[0], prec[1..-1].to_set, size - i)
48
- end
49
-
50
- presidence <<
51
- Ace::Presidence.new(:nonassoc, [:"$"].to_set, 0) <<
52
- Ace::Presidence.new(:nonassoc, [:_].to_set, 1)
53
- presidence.sort_by { |_| _.level }.reverse
54
- end
55
-
56
- end
57
- end
58
- end
59
- end
@@ -1,36 +0,0 @@
1
- module Antelope
2
- class Automaton
3
- attr_accessor :states
4
- attr_accessor :alphabet
5
- attr_accessor :start
6
- attr_accessor :accept
7
- attr_accessor :transitions
8
- attr_accessor :stack
9
-
10
- def initialize(states = [], alphabet = [],
11
- start = nil, accept = [], transitions = {})
12
- @states = states
13
- @alphabet = alphabet
14
- @start = start
15
- @accept = accept
16
- @transitions = transitions
17
- @stack = []
18
- end
19
-
20
- def run(input, &block)
21
- block = block || method(:default_transition)
22
-
23
- @stack = [@start]
24
-
25
- input.each do |part|
26
- @stack.push(block.call(@stack.last, part))
27
- end
28
-
29
- @accept.include? @stack.last
30
- end
31
-
32
- def default_transition(state, part)
33
- @transitions[state][part]
34
- end
35
- end
36
- end
@@ -1,7 +0,0 @@
1
- module Antelope
2
- module Generation
3
- class Conflictor
4
- Conflict = Struct.new(:state, :type, :rules, :token)
5
- end
6
- end
7
- end
@@ -1,45 +0,0 @@
1
- require "antelope/generation/conflictor/conflict"
2
-
3
- module Antelope
4
- module Generation
5
- class Conflictor
6
-
7
- attr_accessor :parser
8
- attr_accessor :conflicts
9
-
10
- def initialize(parser)
11
- @parser = parser
12
- end
13
-
14
- def call
15
- recognize_conflicts
16
- end
17
-
18
- def recognize_conflicts
19
-
20
- @conflicts = []
21
-
22
- parser.states.each do |state|
23
- state.rules.each do |rule|
24
- if rule.lookahead.
25
- any? { |tok| state.transitions.key?(tok.name) }
26
- @conflicts << Conflict.new(state, :shift_reduce, [rule],
27
- rule.lookahead - state.transitions.keys)
28
- end
29
- end
30
-
31
- final_rules = state.rules.select(&:final?)
32
-
33
- final_rules.each_cons(2) do |r1, r2|
34
- if r1.lookahead.intersect? r2.lookahead
35
- @conflicts << Conflict.new(state,
36
- :reduce_reduce,
37
- [r1, r2],
38
- r1.lookahead.intersection(r2.lookahead))
39
- end
40
- end
41
- end
42
- end
43
- end
44
- end
45
- end
@@ -1,42 +0,0 @@
1
- module Antelope
2
- module Generation
3
- class Constructor
4
- module Lookahead
5
-
6
- def initialize
7
- @lookaheads = {}
8
- super
9
- end
10
-
11
- def lookahead(left, right = nil)
12
- @lookaheads.fetch([left, right]) do
13
- if right
14
- set = Set.new
15
-
16
- set += if nullable?(right)
17
- first(right) + follow(left)
18
- else
19
- first(right)
20
- end
21
- else
22
- set = lookahead_nonterminal(left)
23
- end
24
-
25
- @lookaheads[[left, right]] = set
26
- end
27
- end
28
-
29
- private
30
-
31
- def lookahead_nonterminal(left)
32
- set = Set.new
33
- parser.productions[left].each do |production|
34
- set += lookahead(left, production[:items])
35
- end
36
-
37
- set
38
- end
39
- end
40
- end
41
- end
42
- end