antelope 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.rspec +3 -0
  4. data/.yardopts +4 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +29 -0
  8. data/Rakefile +2 -0
  9. data/antelope.gemspec +30 -0
  10. data/bin/antelope +24 -0
  11. data/examples/deterministic.ace +27 -0
  12. data/examples/deterministic.output +229 -0
  13. data/examples/example.ace +45 -0
  14. data/examples/example.output +610 -0
  15. data/examples/simple.ace +26 -0
  16. data/examples/simple.output +194 -0
  17. data/lib/antelope/ace/compiler.rb +290 -0
  18. data/lib/antelope/ace/errors.rb +27 -0
  19. data/lib/antelope/ace/grammar/generation.rb +47 -0
  20. data/lib/antelope/ace/grammar/loading.rb +51 -0
  21. data/lib/antelope/ace/grammar/presidence.rb +59 -0
  22. data/lib/antelope/ace/grammar/production.rb +47 -0
  23. data/lib/antelope/ace/grammar/productions.rb +119 -0
  24. data/lib/antelope/ace/grammar/terminals.rb +41 -0
  25. data/lib/antelope/ace/grammar.rb +59 -0
  26. data/lib/antelope/ace/presidence.rb +51 -0
  27. data/lib/antelope/ace/scanner/first.rb +61 -0
  28. data/lib/antelope/ace/scanner/second.rb +160 -0
  29. data/lib/antelope/ace/scanner/third.rb +25 -0
  30. data/lib/antelope/ace/scanner.rb +110 -0
  31. data/lib/antelope/ace/token/epsilon.rb +22 -0
  32. data/lib/antelope/ace/token/error.rb +24 -0
  33. data/lib/antelope/ace/token/nonterminal.rb +15 -0
  34. data/lib/antelope/ace/token/terminal.rb +15 -0
  35. data/lib/antelope/ace/token.rb +171 -0
  36. data/lib/antelope/ace.rb +50 -0
  37. data/lib/antelope/automaton.rb +36 -0
  38. data/lib/antelope/generation/conflictor/conflict.rb +7 -0
  39. data/lib/antelope/generation/conflictor.rb +45 -0
  40. data/lib/antelope/generation/constructor/first.rb +52 -0
  41. data/lib/antelope/generation/constructor/follow.rb +46 -0
  42. data/lib/antelope/generation/constructor/lookahead.rb +42 -0
  43. data/lib/antelope/generation/constructor/nullable.rb +40 -0
  44. data/lib/antelope/generation/constructor.rb +81 -0
  45. data/lib/antelope/generation/recognizer/rule.rb +93 -0
  46. data/lib/antelope/generation/recognizer/state.rb +56 -0
  47. data/lib/antelope/generation/recognizer.rb +152 -0
  48. data/lib/antelope/generation/tableizer.rb +80 -0
  49. data/lib/antelope/generation.rb +12 -0
  50. data/lib/antelope/generator/output.rb +30 -0
  51. data/lib/antelope/generator/ruby.rb +57 -0
  52. data/lib/antelope/generator/templates/output.erb +49 -0
  53. data/lib/antelope/generator/templates/ruby.erb +62 -0
  54. data/lib/antelope/generator.rb +84 -0
  55. data/lib/antelope/version.rb +4 -0
  56. data/lib/antelope.rb +9 -0
  57. data/spec/antelope/ace/compiler_spec.rb +50 -0
  58. data/spec/antelope/ace/scanner_spec.rb +27 -0
  59. data/spec/antelope/automaton_spec.rb +29 -0
  60. data/spec/spec_helper.rb +38 -0
  61. data/spec/support/benchmark_helper.rb +5 -0
  62. metadata +223 -0
@@ -0,0 +1,93 @@
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ module Generation
5
+ class Recognizer
6
+ class Rule
7
+
8
+ attr_reader :left
9
+ attr_reader :right
10
+ attr_reader :position
11
+ attr_reader :block
12
+ attr_accessor :lookahead
13
+ attr_accessor :id
14
+ attr_accessor :presidence
15
+ attr_reader :production
16
+
17
+ include Comparable
18
+
19
+ def initialize(production, position, inherited = false)
20
+ @left = production.label
21
+ @position = position
22
+ @lookahead = Set.new
23
+ @presidence = production.prec
24
+ @production = production
25
+ @block = production.block
26
+ @id = SecureRandom.hex
27
+
28
+ if inherited
29
+ @right = inherited
30
+ else
31
+ @right = production.items.map(&:dup).freeze
32
+ end
33
+ end
34
+
35
+ def inspect
36
+ "#<#{self.class} id=#{id} left=#{left} right=[#{right.join(" ")}] position=#{position}>"
37
+ end
38
+
39
+ def to_s(dot = true)
40
+ "#{id}/#{presidence.type.to_s[0]}#{presidence.level}: #{left} → #{right[0, position].join(" ")}#{" • " if dot}#{right[position..-1].join(" ")}"
41
+ end
42
+
43
+ def active
44
+ right[position] or Ace::Token.new(nil)
45
+ end
46
+
47
+ def succ
48
+ Rule.new(production, position + 1)
49
+ end
50
+
51
+ def succ?
52
+ right.size > (position)
53
+ end
54
+
55
+ def final?
56
+ !succ?
57
+ end
58
+
59
+ def <=>(other)
60
+ if other.is_a? Rule
61
+ to_a <=> other.to_a
62
+ else
63
+ super
64
+ end
65
+ end
66
+
67
+ def without_transitions
68
+ @_without_transitions ||=
69
+ Rule.new(production, position)
70
+ end
71
+
72
+ def ===(other)
73
+ if other.is_a? Rule
74
+ left === other.left and right.each_with_index.
75
+ all? { |e, i| e === other.right[i] }
76
+ else
77
+ super
78
+ end
79
+ end
80
+
81
+ def hash
82
+ to_a.hash
83
+ end
84
+
85
+ alias_method :eql?, :==
86
+
87
+ def to_a
88
+ [left, right, position]
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,56 @@
1
+ require "forwardable"
2
+ require "securerandom"
3
+
4
+ module Antelope
5
+ module Generation
6
+ class Recognizer
7
+ class State
8
+
9
+ attr_reader :rules
10
+ attr_reader :transitions
11
+ attr_accessor :id
12
+
13
+ include Enumerable
14
+ extend Forwardable
15
+
16
+ def_delegator :@rules, :each
17
+
18
+ def initialize
19
+ @rules = Set.new
20
+ @transitions = {} #Hash.new { |hash, key| hash[key] = State.new }
21
+ @id = SecureRandom.hex
22
+ end
23
+
24
+ def inspect
25
+ "#<#{self.class} id=#{id} transitions=[#{transitions.keys.join(", ")}] rules=[{#{rules.to_a.join("} {")}}]>"
26
+ end
27
+
28
+ def merge!(other)
29
+ return if other == :_ignore
30
+ raise ArgumentError, "Expected #{self.class}, " +
31
+ "got #{other.class}" unless other.is_a? State
32
+
33
+ self << other
34
+ self.transitions.merge! other.transitions
35
+
36
+ self
37
+ end
38
+
39
+ def rule_for(production)
40
+ rules.find { |rule| production === rule }
41
+ end
42
+
43
+ def <<(rule)
44
+ if rule.is_a? State
45
+ rule.rules.each { |r| self << r }
46
+ else
47
+ rules << rule unless rules.include? rule
48
+ end
49
+ end
50
+
51
+ alias_method :push, :<<
52
+
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,152 @@
1
+ require "antelope/generation/recognizer/rule"
2
+ require "antelope/generation/recognizer/state"
3
+
4
+ module Antelope
5
+ module Generation
6
+
7
+ # Recognizes all of the states in the grammar.
8
+ #
9
+ # @see http://redjazz96.tumblr.com/post/88446352960
10
+ class Recognizer
11
+
12
+ # A list of all of the states in the grammar.
13
+ #
14
+ # @return [Set<State>]
15
+ attr_reader :states
16
+
17
+ # The initial state. This is the state that is constructed from
18
+ # the rule with the left-hand side being `$start`.
19
+ #
20
+ # @return [State]
21
+ attr_reader :start
22
+
23
+ # The grammar that the recognizer is running off of.
24
+ #
25
+ # @return [Ace::Grammar]
26
+ attr_reader :grammar
27
+
28
+ # Initialize the recognizer.
29
+ #
30
+ # @param grammar [Ace::Grammar]
31
+ def initialize(grammar)
32
+ @grammar = grammar
33
+ @states = Set.new
34
+ end
35
+
36
+ # Runs the recognizer. After all states have been created, it
37
+ # resets the state ids into a more friendly form (they were
38
+ # originally hexadecimal, see {State#initialize}), and then
39
+ # resets the rule ids in each state into a more friendly form
40
+ # (they were also originally hexadecmial, see {Rule#initialize}
41
+ # ).
42
+ #
43
+ # @see #compute_initial_state
44
+ # @return [void]
45
+ def call
46
+ @states = Set.new
47
+ @start = compute_initial_state
48
+ redefine_state_ids
49
+ redefine_rule_ids
50
+ grammar.states = states
51
+ end
52
+
53
+ # Computes the initial state. Starting with the default
54
+ # production of `$start`, it then generates the whole state
55
+ # and then the spawned states from it.
56
+ #
57
+ # @return [State]
58
+ def compute_initial_state
59
+ production = grammar.productions[:$start][0]
60
+ rule = Rule.new(production, 0)
61
+ compute_whole_state(rule)
62
+ end
63
+
64
+ # Computes the entire initial state from the initial rule.
65
+ # It starts with a blank state, adds the initial rule to it, and
66
+ # then generates the closure for that state; it then computes
67
+ # the rest of the states in the grammar.
68
+ #
69
+ # @param rule [Rule] the initial rule.
70
+ # @return [State]
71
+ def compute_whole_state(rule)
72
+ state = State.new
73
+ state << rule
74
+ compute_closure(state)
75
+ states << state
76
+ compute_states
77
+ state
78
+ end
79
+
80
+ # Computes all states. Uses a fix point iteration to determine
81
+ # when no states have been added. Loops through every state and
82
+ # every rule, looking for rules that have an active nonterminal
83
+ # and computing
84
+ def compute_states
85
+ fixed_point(states) do
86
+ states.dup.each do |state|
87
+ state.rules.each do |rule|
88
+ next unless rule.succ?
89
+ transitional = find_state_for(rule.succ) do |succ|
90
+ ns = State.new
91
+ ns << succ
92
+ compute_closure(ns)
93
+ states << ns
94
+ ns
95
+ end
96
+
97
+ if state.transitions[rule.active.name]
98
+ state.transitions[rule.active.name].merge! transitional
99
+ else
100
+ state.transitions[rule.active.name] = transitional
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+
107
+ def compute_closure(state)
108
+ fixed_point(state.rules) do
109
+ state.rules.select { |_| _.active.nonterminal? }.each do |rule|
110
+ grammar.productions[rule.active.name].each do |prod|
111
+ state << Rule.new(prod, 0)
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ private
118
+
119
+ def find_state_for(rule)
120
+ states.find { |state| state.include?(rule) } or yield(rule)
121
+ end
122
+
123
+ def redefine_state_ids
124
+ states.each_with_index do |state, i|
125
+ state.id = i
126
+ end
127
+ end
128
+
129
+ def redefine_rule_ids
130
+ start = 0
131
+
132
+ states.each do |state|
133
+ state.rules.each do |rule|
134
+ rule.id = start
135
+ start += 1
136
+ end
137
+ end
138
+ end
139
+
140
+ def fixed_point(enum)
141
+ added = 1
142
+
143
+ until added.zero?
144
+ added = enum.size
145
+ yield
146
+ added = enum.size - added
147
+ end
148
+ end
149
+
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,80 @@
1
+ module Antelope
2
+ module Generation
3
+
4
+ class UnresolvableConflictError < StandardError; end
5
+
6
+ class Tableizer
7
+
8
+ attr_accessor :parser
9
+ attr_accessor :table
10
+ attr_accessor :rules
11
+
12
+ def initialize(parser)
13
+ @parser = parser
14
+ end
15
+
16
+ def call
17
+ tablize
18
+ conflictize
19
+ end
20
+
21
+ def tablize
22
+ @table = Array.new(parser.states.size) do
23
+ Hash.new { |h, k| h[k] = [] }
24
+ end
25
+ @rules = []
26
+
27
+ parser.states.each do |state|
28
+ state.transitions.each do |on, to|
29
+ table[state.id][on] << [:state, to.id]
30
+ end
31
+
32
+ state.rules.each do |rule|
33
+ @rules[rule.id] = rule
34
+ if rule.final?
35
+ rule.lookahead.each do |look|
36
+ table[state.id][look.name] <<
37
+ [:reduce, rule.production.id]
38
+ end
39
+
40
+ if rule.production.id.zero?
41
+ table[state.id][:"$"] = [[:accept, rule.production.id]]
42
+ end
43
+ end
44
+ end
45
+ end
46
+
47
+ table
48
+ end
49
+
50
+ def conflictize
51
+ @table.each_with_index do |v, state|
52
+ v.each do |on, data|
53
+ if data.size == 1
54
+ @table[state][on] = data[0]
55
+ next
56
+ end
57
+
58
+ terminal = parser.presidence_for(on)
59
+
60
+ state_part = data.select { |(t, d)| t == :state }.first
61
+ rule_part = data.select { |(t, d)| t == :reduce}.first
62
+
63
+ result = @rules[rule_part[1]].presidence <=> terminal
64
+
65
+ case result
66
+ when 0
67
+ p v, terminal, @rules[rule_part[1]].presidence
68
+ raise UnresolvableConflictError,
69
+ "Could not determine move for #{on} in state #{state}"
70
+ when 1
71
+ @table[state][on] = rule_part
72
+ when -1
73
+ @table[state][on] = state_part
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,12 @@
1
+ require "antelope/generation/conflictor"
2
+ require "antelope/generation/constructor"
3
+ require "antelope/generation/recognizer"
4
+ require "antelope/generation/tableizer"
5
+
6
+ module Antelope
7
+
8
+ # Contains the generation mods.
9
+ module Generation
10
+
11
+ end
12
+ end
@@ -0,0 +1,30 @@
1
+ require "pp"
2
+
3
+ module Antelope
4
+ class Generator
5
+
6
+ # Generates an output file, mainly for debugging. Included always
7
+ # as a generator for a grammar.
8
+ class Output < Generator
9
+
10
+ # Defines singleton method for every mod that the grammar passed
11
+ # to the generator.
12
+ #
13
+ # @see Generator#initialize
14
+ def initialize(*)
15
+ super
16
+ mods.each do |k, v|
17
+ define_singleton_method (k) { v }
18
+ end
19
+ end
20
+
21
+ # Actually performs the generation. Uses the template in
22
+ # output.erb, and generates the file `<file>.output`.
23
+ #
24
+ # @return [void]
25
+ def generate
26
+ template "output.erb", "#{file}.output"
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,57 @@
1
+ require "pp"
2
+
3
+ module Antelope
4
+ class Generator
5
+
6
+ # Generates a ruby parser.
7
+ class Ruby < Generator
8
+
9
+ # Creates an action table for the parser.
10
+ #
11
+ # @return [String]
12
+ def generate_action_table
13
+ out = ""
14
+ PP.pp(mods[:tableizer].table, out)
15
+ out
16
+ end
17
+
18
+ # Outputs an array of all of the productions.
19
+ #
20
+ # @return [String]
21
+ def generate_productions_list
22
+ out = "["
23
+
24
+ grammar.all_productions.each do |production|
25
+ out <<
26
+ "[" <<
27
+ production.label.name.inspect <<
28
+ ", " <<
29
+ production.items.size.inspect <<
30
+ ", "
31
+
32
+ block = if production.block.empty?
33
+ "proc {}"
34
+ else
35
+ "proc #{production.block}"
36
+ end
37
+
38
+ out << block << "],\n"
39
+ end
40
+
41
+ out.chomp!(",\n")
42
+
43
+ out << "]"
44
+ end
45
+
46
+ # Actually performs the generation. Takes the template from
47
+ # ruby.erb and outputs it to `<file>_parser.rb`.
48
+ #
49
+ # @return [void]
50
+ def generate
51
+ template "ruby.erb", "#{file}_parser.rb" do |body|
52
+ sprintf(grammar.compiler.body, :write => body)
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,49 @@
1
+ Productions:
2
+ % constructor.productions.each do |production|
3
+ <%= production.to_s(false) %>
4
+ % end
5
+
6
+ Original Productions:
7
+ % grammar.productions.each do |k, v|
8
+ % v.each do |prod|
9
+ <%= k %> → <%= prod[:items].join(" ") %>
10
+ <%= prod[:block] %>
11
+ % end
12
+ % end
13
+
14
+ Conflicts:
15
+ % conflictor.conflicts.each do |conflict|
16
+ State <%= conflict.state.id %>:
17
+ rules : <%= conflict.rules.map(&:id).join(", ") %>
18
+ type : <%= conflict.type %>
19
+ tokens: {<%= conflict.token.to_a.join(", ") %>}
20
+ % end
21
+
22
+ Presidence:
23
+ --- highest
24
+ % grammar.presidence.each do |pr|
25
+ <%= "%-8s" % pr.type %> <%= pr.level %>:
26
+ {<%= pr.tokens.to_a.join(", ") %>}
27
+ % end
28
+ --- lowest
29
+
30
+ Table:
31
+ % PP.pp(Hash[tableizer.table.each_with_index.to_a.map(&:reverse)], _erbout)
32
+
33
+ % PP.pp(tableizer.rules, _erbout)
34
+
35
+ % grammar.states.each do |state|
36
+ State <%= state.id %>:
37
+ rules:
38
+ % state.rules.each do |rule|
39
+ <%= rule %>
40
+ {<%= rule.lookahead.to_a.join(", ") %>}
41
+ % end
42
+
43
+ transitions:
44
+ % max = state.transitions.keys.map(&:length).max || 0
45
+ % state.transitions.each do |on, to|
46
+ <%= "%-#{max}s" % on %>: State <%= to.id %>
47
+ % end
48
+
49
+ % end
@@ -0,0 +1,62 @@
1
+ # This file assumes that the output of the generator will be placed
2
+ # within a module or a class. However, the module/class requires a
3
+ # `type` method, which takes a terminal and gives its type, as a
4
+ # symbol. These types should line up with the terminals that were
5
+ # defined in the original grammar.
6
+
7
+ # The actions to take during parsing. In every state, there are a
8
+ # set of acceptable peek tokens; this table tells the parser what
9
+ # to do on each acceptable peek token. The possible actions include
10
+ # `:accept`, `:reduce`, and `:state`; `:accept` means to accept the
11
+ # input and return the value of the pasing. `:reduce` means to
12
+ # reduce the top of the stack into a given nonterminal. `:state`
13
+ # means to transition to another state.
14
+ #
15
+ # @return [Array<Hash<(Symbol, Array<(Symbol, Numeric)>)>>]
16
+ ACTION_TABLE = <%= generate_action_table %>.freeze # >
17
+
18
+ # A list of all of the productions. Only includes the left-hand side,
19
+ # the number of tokens on the right-hand side, and the block to call
20
+ # on reduction.
21
+ #
22
+ # @return [Array<Array<(Symbol, Numeric, Proc)>>]
23
+ PRODUCTIONS = <%= generate_productions_list %>.freeze # >
24
+
25
+ # Runs the parser.
26
+ #
27
+ # @param input [Array<Object>] the input to run the parser over.
28
+ # @return [Object] the result of the accept.
29
+ def parse(input)
30
+ stack = []
31
+ stack.push([nil, 0])
32
+ input = input.dup
33
+ last = nil
34
+
35
+ until stack.empty? do
36
+ peek_token = if input.empty?
37
+ :"$"
38
+ else
39
+ type(input.first)
40
+ end
41
+
42
+ action = ACTION_TABLE[stack.last.last].fetch(peek_token)
43
+ case action.first
44
+ when :accept
45
+ production = PRODUCTIONS[action.last]
46
+ last = stack.pop(production[1]).first.first
47
+ stack.pop
48
+ when :reduce
49
+ production = PRODUCTIONS[action.last]
50
+ removing = stack.pop(production[1])
51
+ value = production[2].call(*removing.map(&:first))
52
+ goto = ACTION_TABLE[stack.last.last][production[0]]
53
+ stack.push([value, goto.last])
54
+ when :state
55
+ stack.push([input.shift, action.last])
56
+ else
57
+ raise
58
+ end
59
+ end
60
+
61
+ last
62
+ end
@@ -0,0 +1,84 @@
1
+ require "antelope/generator/output"
2
+ require "antelope/generator/ruby"
3
+ require "erb"
4
+ require "pathname"
5
+
6
+ module Antelope
7
+
8
+ # Generates a parser. This is normally the parent class, and the
9
+ # specific implementations inherit from this. The generated
10
+ # parser should, ideally, be completely independent (not requiring
11
+ # any external source code), as well as be under a permissive
12
+ # license.
13
+ class Generator
14
+
15
+ # The modifiers that were applied to the grammar.
16
+ #
17
+ # @return [Hash<(Symbol, Object)>]
18
+ attr_reader :mods
19
+
20
+ # The file name (not including the extension) that the grammar
21
+ # should output to.
22
+ #
23
+ # @return [String]
24
+ attr_reader :file
25
+
26
+ # The grammar that the generator is for.
27
+ #
28
+ # @return [Ace::Grammar]
29
+ attr_reader :grammar
30
+
31
+ # The source root directory for templates. Overwrite to change.
32
+ #
33
+ # @return [Pathname]
34
+ def self.source_root
35
+ Pathname.new("../generator/templates").expand_path(__FILE__)
36
+ end
37
+
38
+ # Initialize the generator.
39
+ #
40
+ # @param grammar [Grammar]
41
+ # @param mods [Hash<(Symbol, Object)>]
42
+ def initialize(grammar, mods)
43
+ @file = grammar.name
44
+ @grammar = grammar
45
+ @mods = mods
46
+ end
47
+
48
+ # Actually does the generation. A subclass should implement this.
49
+ #
50
+ # @raise [NotImplementedError]
51
+ # @return [void]
52
+ def generate
53
+ raise NotImplementedError
54
+ end
55
+
56
+ protected
57
+
58
+ # Copies a template from the source, runs it through erb (in the
59
+ # context of this class), and then outputs it at the destination.
60
+ # If given a block, it will call the block after the template is
61
+ # run through erb with the content from erb; the result of the
62
+ # block is then used as the content instead.
63
+ #
64
+ # @param source [String] the source file. This should be in
65
+ # {.source_root}.
66
+ # @param destination [String] the destination file. This will be
67
+ # in {Ace::Grammar#output}.
68
+ # @yieldparam [String] content The content that ERB created.
69
+ # @yieldreturn [String] The new content to write to the output.
70
+ # @return [void]
71
+ def template(source, destination)
72
+ src_file = self.class.source_root + source
73
+ src = src_file.open("r")
74
+ context = instance_eval('binding')
75
+ content = ERB.new(src.read, nil, "%").result(context)
76
+ content = yield content if block_given?
77
+ dest_file = grammar.output + destination
78
+ dest_file.open("w") do |f|
79
+ f.write(content)
80
+ end
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,4 @@
1
+ module Antelope
2
+ # The current running version of antelope.
3
+ VERSION = "0.0.1".freeze
4
+ end
data/lib/antelope.rb ADDED
@@ -0,0 +1,9 @@
1
+ require "antelope/automaton"
2
+ require "antelope/generation"
3
+ require "antelope/generator"
4
+ require "antelope/version"
5
+ require "antelope/ace"
6
+
7
+ module Antelope
8
+ # Your code goes here...
9
+ end