antelope 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (62) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +23 -0
  3. data/.rspec +3 -0
  4. data/.yardopts +4 -0
  5. data/Gemfile +7 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +29 -0
  8. data/Rakefile +2 -0
  9. data/antelope.gemspec +30 -0
  10. data/bin/antelope +24 -0
  11. data/examples/deterministic.ace +27 -0
  12. data/examples/deterministic.output +229 -0
  13. data/examples/example.ace +45 -0
  14. data/examples/example.output +610 -0
  15. data/examples/simple.ace +26 -0
  16. data/examples/simple.output +194 -0
  17. data/lib/antelope/ace/compiler.rb +290 -0
  18. data/lib/antelope/ace/errors.rb +27 -0
  19. data/lib/antelope/ace/grammar/generation.rb +47 -0
  20. data/lib/antelope/ace/grammar/loading.rb +51 -0
  21. data/lib/antelope/ace/grammar/presidence.rb +59 -0
  22. data/lib/antelope/ace/grammar/production.rb +47 -0
  23. data/lib/antelope/ace/grammar/productions.rb +119 -0
  24. data/lib/antelope/ace/grammar/terminals.rb +41 -0
  25. data/lib/antelope/ace/grammar.rb +59 -0
  26. data/lib/antelope/ace/presidence.rb +51 -0
  27. data/lib/antelope/ace/scanner/first.rb +61 -0
  28. data/lib/antelope/ace/scanner/second.rb +160 -0
  29. data/lib/antelope/ace/scanner/third.rb +25 -0
  30. data/lib/antelope/ace/scanner.rb +110 -0
  31. data/lib/antelope/ace/token/epsilon.rb +22 -0
  32. data/lib/antelope/ace/token/error.rb +24 -0
  33. data/lib/antelope/ace/token/nonterminal.rb +15 -0
  34. data/lib/antelope/ace/token/terminal.rb +15 -0
  35. data/lib/antelope/ace/token.rb +171 -0
  36. data/lib/antelope/ace.rb +50 -0
  37. data/lib/antelope/automaton.rb +36 -0
  38. data/lib/antelope/generation/conflictor/conflict.rb +7 -0
  39. data/lib/antelope/generation/conflictor.rb +45 -0
  40. data/lib/antelope/generation/constructor/first.rb +52 -0
  41. data/lib/antelope/generation/constructor/follow.rb +46 -0
  42. data/lib/antelope/generation/constructor/lookahead.rb +42 -0
  43. data/lib/antelope/generation/constructor/nullable.rb +40 -0
  44. data/lib/antelope/generation/constructor.rb +81 -0
  45. data/lib/antelope/generation/recognizer/rule.rb +93 -0
  46. data/lib/antelope/generation/recognizer/state.rb +56 -0
  47. data/lib/antelope/generation/recognizer.rb +152 -0
  48. data/lib/antelope/generation/tableizer.rb +80 -0
  49. data/lib/antelope/generation.rb +12 -0
  50. data/lib/antelope/generator/output.rb +30 -0
  51. data/lib/antelope/generator/ruby.rb +57 -0
  52. data/lib/antelope/generator/templates/output.erb +49 -0
  53. data/lib/antelope/generator/templates/ruby.erb +62 -0
  54. data/lib/antelope/generator.rb +84 -0
  55. data/lib/antelope/version.rb +4 -0
  56. data/lib/antelope.rb +9 -0
  57. data/spec/antelope/ace/compiler_spec.rb +50 -0
  58. data/spec/antelope/ace/scanner_spec.rb +27 -0
  59. data/spec/antelope/automaton_spec.rb +29 -0
  60. data/spec/spec_helper.rb +38 -0
  61. data/spec/support/benchmark_helper.rb +5 -0
  62. metadata +223 -0
@@ -0,0 +1,93 @@
1
+ # encoding: utf-8
2
+
3
+ module Antelope
4
+ module Generation
5
+ class Recognizer
6
+ class Rule
7
+
8
+ attr_reader :left
9
+ attr_reader :right
10
+ attr_reader :position
11
+ attr_reader :block
12
+ attr_accessor :lookahead
13
+ attr_accessor :id
14
+ attr_accessor :presidence
15
+ attr_reader :production
16
+
17
+ include Comparable
18
+
19
+ def initialize(production, position, inherited = false)
20
+ @left = production.label
21
+ @position = position
22
+ @lookahead = Set.new
23
+ @presidence = production.prec
24
+ @production = production
25
+ @block = production.block
26
+ @id = SecureRandom.hex
27
+
28
+ if inherited
29
+ @right = inherited
30
+ else
31
+ @right = production.items.map(&:dup).freeze
32
+ end
33
+ end
34
+
35
+ def inspect
36
+ "#<#{self.class} id=#{id} left=#{left} right=[#{right.join(" ")}] position=#{position}>"
37
+ end
38
+
39
+ def to_s(dot = true)
40
+ "#{id}/#{presidence.type.to_s[0]}#{presidence.level}: #{left} → #{right[0, position].join(" ")}#{" • " if dot}#{right[position..-1].join(" ")}"
41
+ end
42
+
43
+ def active
44
+ right[position] or Ace::Token.new(nil)
45
+ end
46
+
47
+ def succ
48
+ Rule.new(production, position + 1)
49
+ end
50
+
51
+ def succ?
52
+ right.size > (position)
53
+ end
54
+
55
+ def final?
56
+ !succ?
57
+ end
58
+
59
+ def <=>(other)
60
+ if other.is_a? Rule
61
+ to_a <=> other.to_a
62
+ else
63
+ super
64
+ end
65
+ end
66
+
67
+ def without_transitions
68
+ @_without_transitions ||=
69
+ Rule.new(production, position)
70
+ end
71
+
72
+ def ===(other)
73
+ if other.is_a? Rule
74
+ left === other.left and right.each_with_index.
75
+ all? { |e, i| e === other.right[i] }
76
+ else
77
+ super
78
+ end
79
+ end
80
+
81
+ def hash
82
+ to_a.hash
83
+ end
84
+
85
+ alias_method :eql?, :==
86
+
87
+ def to_a
88
+ [left, right, position]
89
+ end
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,56 @@
1
+ require "forwardable"
2
+ require "securerandom"
3
+
4
+ module Antelope
5
+ module Generation
6
+ class Recognizer
7
+ class State
8
+
9
+ attr_reader :rules
10
+ attr_reader :transitions
11
+ attr_accessor :id
12
+
13
+ include Enumerable
14
+ extend Forwardable
15
+
16
+ def_delegator :@rules, :each
17
+
18
+ def initialize
19
+ @rules = Set.new
20
+ @transitions = {} #Hash.new { |hash, key| hash[key] = State.new }
21
+ @id = SecureRandom.hex
22
+ end
23
+
24
+ def inspect
25
+ "#<#{self.class} id=#{id} transitions=[#{transitions.keys.join(", ")}] rules=[{#{rules.to_a.join("} {")}}]>"
26
+ end
27
+
28
+ def merge!(other)
29
+ return if other == :_ignore
30
+ raise ArgumentError, "Expected #{self.class}, " +
31
+ "got #{other.class}" unless other.is_a? State
32
+
33
+ self << other
34
+ self.transitions.merge! other.transitions
35
+
36
+ self
37
+ end
38
+
39
+ def rule_for(production)
40
+ rules.find { |rule| production === rule }
41
+ end
42
+
43
+ def <<(rule)
44
+ if rule.is_a? State
45
+ rule.rules.each { |r| self << r }
46
+ else
47
+ rules << rule unless rules.include? rule
48
+ end
49
+ end
50
+
51
+ alias_method :push, :<<
52
+
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,152 @@
1
+ require "antelope/generation/recognizer/rule"
2
+ require "antelope/generation/recognizer/state"
3
+
4
+ module Antelope
5
+ module Generation
6
+
7
+ # Recognizes all of the states in the grammar.
8
+ #
9
+ # @see http://redjazz96.tumblr.com/post/88446352960
10
+ class Recognizer
11
+
12
+ # A list of all of the states in the grammar.
13
+ #
14
+ # @return [Set<State>]
15
+ attr_reader :states
16
+
17
+ # The initial state. This is the state that is constructed from
18
+ # the rule with the left-hand side being `$start`.
19
+ #
20
+ # @return [State]
21
+ attr_reader :start
22
+
23
+ # The grammar that the recognizer is running off of.
24
+ #
25
+ # @return [Ace::Grammar]
26
+ attr_reader :grammar
27
+
28
+ # Initialize the recognizer.
29
+ #
30
+ # @param grammar [Ace::Grammar]
31
+ def initialize(grammar)
32
+ @grammar = grammar
33
+ @states = Set.new
34
+ end
35
+
36
+ # Runs the recognizer. After all states have been created, it
37
+ # resets the state ids into a more friendly form (they were
38
+ # originally hexadecimal, see {State#initialize}), and then
39
+ # resets the rule ids in each state into a more friendly form
40
+ # (they were also originally hexadecmial, see {Rule#initialize}
41
+ # ).
42
+ #
43
+ # @see #compute_initial_state
44
+ # @return [void]
45
+ def call
46
+ @states = Set.new
47
+ @start = compute_initial_state
48
+ redefine_state_ids
49
+ redefine_rule_ids
50
+ grammar.states = states
51
+ end
52
+
53
+ # Computes the initial state. Starting with the default
54
+ # production of `$start`, it then generates the whole state
55
+ # and then the spawned states from it.
56
+ #
57
+ # @return [State]
58
+ def compute_initial_state
59
+ production = grammar.productions[:$start][0]
60
+ rule = Rule.new(production, 0)
61
+ compute_whole_state(rule)
62
+ end
63
+
64
+ # Computes the entire initial state from the initial rule.
65
+ # It starts with a blank state, adds the initial rule to it, and
66
+ # then generates the closure for that state; it then computes
67
+ # the rest of the states in the grammar.
68
+ #
69
+ # @param rule [Rule] the initial rule.
70
+ # @return [State]
71
+ def compute_whole_state(rule)
72
+ state = State.new
73
+ state << rule
74
+ compute_closure(state)
75
+ states << state
76
+ compute_states
77
+ state
78
+ end
79
+
80
+ # Computes all states. Uses a fix point iteration to determine
81
+ # when no states have been added. Loops through every state and
82
+ # every rule, looking for rules that have an active nonterminal
83
+ # and computing
84
+ def compute_states
85
+ fixed_point(states) do
86
+ states.dup.each do |state|
87
+ state.rules.each do |rule|
88
+ next unless rule.succ?
89
+ transitional = find_state_for(rule.succ) do |succ|
90
+ ns = State.new
91
+ ns << succ
92
+ compute_closure(ns)
93
+ states << ns
94
+ ns
95
+ end
96
+
97
+ if state.transitions[rule.active.name]
98
+ state.transitions[rule.active.name].merge! transitional
99
+ else
100
+ state.transitions[rule.active.name] = transitional
101
+ end
102
+ end
103
+ end
104
+ end
105
+ end
106
+
107
+ def compute_closure(state)
108
+ fixed_point(state.rules) do
109
+ state.rules.select { |_| _.active.nonterminal? }.each do |rule|
110
+ grammar.productions[rule.active.name].each do |prod|
111
+ state << Rule.new(prod, 0)
112
+ end
113
+ end
114
+ end
115
+ end
116
+
117
+ private
118
+
119
+ def find_state_for(rule)
120
+ states.find { |state| state.include?(rule) } or yield(rule)
121
+ end
122
+
123
+ def redefine_state_ids
124
+ states.each_with_index do |state, i|
125
+ state.id = i
126
+ end
127
+ end
128
+
129
+ def redefine_rule_ids
130
+ start = 0
131
+
132
+ states.each do |state|
133
+ state.rules.each do |rule|
134
+ rule.id = start
135
+ start += 1
136
+ end
137
+ end
138
+ end
139
+
140
+ def fixed_point(enum)
141
+ added = 1
142
+
143
+ until added.zero?
144
+ added = enum.size
145
+ yield
146
+ added = enum.size - added
147
+ end
148
+ end
149
+
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,80 @@
1
+ module Antelope
2
+ module Generation
3
+
4
+ class UnresolvableConflictError < StandardError; end
5
+
6
+ class Tableizer
7
+
8
+ attr_accessor :parser
9
+ attr_accessor :table
10
+ attr_accessor :rules
11
+
12
+ def initialize(parser)
13
+ @parser = parser
14
+ end
15
+
16
+ def call
17
+ tablize
18
+ conflictize
19
+ end
20
+
21
+ def tablize
22
+ @table = Array.new(parser.states.size) do
23
+ Hash.new { |h, k| h[k] = [] }
24
+ end
25
+ @rules = []
26
+
27
+ parser.states.each do |state|
28
+ state.transitions.each do |on, to|
29
+ table[state.id][on] << [:state, to.id]
30
+ end
31
+
32
+ state.rules.each do |rule|
33
+ @rules[rule.id] = rule
34
+ if rule.final?
35
+ rule.lookahead.each do |look|
36
+ table[state.id][look.name] <<
37
+ [:reduce, rule.production.id]
38
+ end
39
+
40
+ if rule.production.id.zero?
41
+ table[state.id][:"$"] = [[:accept, rule.production.id]]
42
+ end
43
+ end
44
+ end
45
+ end
46
+
47
+ table
48
+ end
49
+
50
+ def conflictize
51
+ @table.each_with_index do |v, state|
52
+ v.each do |on, data|
53
+ if data.size == 1
54
+ @table[state][on] = data[0]
55
+ next
56
+ end
57
+
58
+ terminal = parser.presidence_for(on)
59
+
60
+ state_part = data.select { |(t, d)| t == :state }.first
61
+ rule_part = data.select { |(t, d)| t == :reduce}.first
62
+
63
+ result = @rules[rule_part[1]].presidence <=> terminal
64
+
65
+ case result
66
+ when 0
67
+ p v, terminal, @rules[rule_part[1]].presidence
68
+ raise UnresolvableConflictError,
69
+ "Could not determine move for #{on} in state #{state}"
70
+ when 1
71
+ @table[state][on] = rule_part
72
+ when -1
73
+ @table[state][on] = state_part
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,12 @@
1
+ require "antelope/generation/conflictor"
2
+ require "antelope/generation/constructor"
3
+ require "antelope/generation/recognizer"
4
+ require "antelope/generation/tableizer"
5
+
6
+ module Antelope
7
+
8
+ # Contains the generation mods.
9
+ module Generation
10
+
11
+ end
12
+ end
@@ -0,0 +1,30 @@
1
+ require "pp"
2
+
3
+ module Antelope
4
+ class Generator
5
+
6
+ # Generates an output file, mainly for debugging. Included always
7
+ # as a generator for a grammar.
8
+ class Output < Generator
9
+
10
+ # Defines singleton method for every mod that the grammar passed
11
+ # to the generator.
12
+ #
13
+ # @see Generator#initialize
14
+ def initialize(*)
15
+ super
16
+ mods.each do |k, v|
17
+ define_singleton_method (k) { v }
18
+ end
19
+ end
20
+
21
+ # Actually performs the generation. Uses the template in
22
+ # output.erb, and generates the file `<file>.output`.
23
+ #
24
+ # @return [void]
25
+ def generate
26
+ template "output.erb", "#{file}.output"
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,57 @@
1
+ require "pp"
2
+
3
+ module Antelope
4
+ class Generator
5
+
6
+ # Generates a ruby parser.
7
+ class Ruby < Generator
8
+
9
+ # Creates an action table for the parser.
10
+ #
11
+ # @return [String]
12
+ def generate_action_table
13
+ out = ""
14
+ PP.pp(mods[:tableizer].table, out)
15
+ out
16
+ end
17
+
18
+ # Outputs an array of all of the productions.
19
+ #
20
+ # @return [String]
21
+ def generate_productions_list
22
+ out = "["
23
+
24
+ grammar.all_productions.each do |production|
25
+ out <<
26
+ "[" <<
27
+ production.label.name.inspect <<
28
+ ", " <<
29
+ production.items.size.inspect <<
30
+ ", "
31
+
32
+ block = if production.block.empty?
33
+ "proc {}"
34
+ else
35
+ "proc #{production.block}"
36
+ end
37
+
38
+ out << block << "],\n"
39
+ end
40
+
41
+ out.chomp!(",\n")
42
+
43
+ out << "]"
44
+ end
45
+
46
+ # Actually performs the generation. Takes the template from
47
+ # ruby.erb and outputs it to `<file>_parser.rb`.
48
+ #
49
+ # @return [void]
50
+ def generate
51
+ template "ruby.erb", "#{file}_parser.rb" do |body|
52
+ sprintf(grammar.compiler.body, :write => body)
53
+ end
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,49 @@
1
+ Productions:
2
+ % constructor.productions.each do |production|
3
+ <%= production.to_s(false) %>
4
+ % end
5
+
6
+ Original Productions:
7
+ % grammar.productions.each do |k, v|
8
+ % v.each do |prod|
9
+ <%= k %> → <%= prod[:items].join(" ") %>
10
+ <%= prod[:block] %>
11
+ % end
12
+ % end
13
+
14
+ Conflicts:
15
+ % conflictor.conflicts.each do |conflict|
16
+ State <%= conflict.state.id %>:
17
+ rules : <%= conflict.rules.map(&:id).join(", ") %>
18
+ type : <%= conflict.type %>
19
+ tokens: {<%= conflict.token.to_a.join(", ") %>}
20
+ % end
21
+
22
+ Presidence:
23
+ --- highest
24
+ % grammar.presidence.each do |pr|
25
+ <%= "%-8s" % pr.type %> <%= pr.level %>:
26
+ {<%= pr.tokens.to_a.join(", ") %>}
27
+ % end
28
+ --- lowest
29
+
30
+ Table:
31
+ % PP.pp(Hash[tableizer.table.each_with_index.to_a.map(&:reverse)], _erbout)
32
+
33
+ % PP.pp(tableizer.rules, _erbout)
34
+
35
+ % grammar.states.each do |state|
36
+ State <%= state.id %>:
37
+ rules:
38
+ % state.rules.each do |rule|
39
+ <%= rule %>
40
+ {<%= rule.lookahead.to_a.join(", ") %>}
41
+ % end
42
+
43
+ transitions:
44
+ % max = state.transitions.keys.map(&:length).max || 0
45
+ % state.transitions.each do |on, to|
46
+ <%= "%-#{max}s" % on %>: State <%= to.id %>
47
+ % end
48
+
49
+ % end
@@ -0,0 +1,62 @@
1
+ # This file assumes that the output of the generator will be placed
2
+ # within a module or a class. However, the module/class requires a
3
+ # `type` method, which takes a terminal and gives its type, as a
4
+ # symbol. These types should line up with the terminals that were
5
+ # defined in the original grammar.
6
+
7
+ # The actions to take during parsing. In every state, there are a
8
+ # set of acceptable peek tokens; this table tells the parser what
9
+ # to do on each acceptable peek token. The possible actions include
10
+ # `:accept`, `:reduce`, and `:state`; `:accept` means to accept the
11
+ # input and return the value of the pasing. `:reduce` means to
12
+ # reduce the top of the stack into a given nonterminal. `:state`
13
+ # means to transition to another state.
14
+ #
15
+ # @return [Array<Hash<(Symbol, Array<(Symbol, Numeric)>)>>]
16
+ ACTION_TABLE = <%= generate_action_table %>.freeze # >
17
+
18
+ # A list of all of the productions. Only includes the left-hand side,
19
+ # the number of tokens on the right-hand side, and the block to call
20
+ # on reduction.
21
+ #
22
+ # @return [Array<Array<(Symbol, Numeric, Proc)>>]
23
+ PRODUCTIONS = <%= generate_productions_list %>.freeze # >
24
+
25
+ # Runs the parser.
26
+ #
27
+ # @param input [Array<Object>] the input to run the parser over.
28
+ # @return [Object] the result of the accept.
29
+ def parse(input)
30
+ stack = []
31
+ stack.push([nil, 0])
32
+ input = input.dup
33
+ last = nil
34
+
35
+ until stack.empty? do
36
+ peek_token = if input.empty?
37
+ :"$"
38
+ else
39
+ type(input.first)
40
+ end
41
+
42
+ action = ACTION_TABLE[stack.last.last].fetch(peek_token)
43
+ case action.first
44
+ when :accept
45
+ production = PRODUCTIONS[action.last]
46
+ last = stack.pop(production[1]).first.first
47
+ stack.pop
48
+ when :reduce
49
+ production = PRODUCTIONS[action.last]
50
+ removing = stack.pop(production[1])
51
+ value = production[2].call(*removing.map(&:first))
52
+ goto = ACTION_TABLE[stack.last.last][production[0]]
53
+ stack.push([value, goto.last])
54
+ when :state
55
+ stack.push([input.shift, action.last])
56
+ else
57
+ raise
58
+ end
59
+ end
60
+
61
+ last
62
+ end
@@ -0,0 +1,84 @@
1
+ require "antelope/generator/output"
2
+ require "antelope/generator/ruby"
3
+ require "erb"
4
+ require "pathname"
5
+
6
+ module Antelope
7
+
8
+ # Generates a parser. This is normally the parent class, and the
9
+ # specific implementations inherit from this. The generated
10
+ # parser should, ideally, be completely independent (not requiring
11
+ # any external source code), as well as be under a permissive
12
+ # license.
13
+ class Generator
14
+
15
+ # The modifiers that were applied to the grammar.
16
+ #
17
+ # @return [Hash<(Symbol, Object)>]
18
+ attr_reader :mods
19
+
20
+ # The file name (not including the extension) that the grammar
21
+ # should output to.
22
+ #
23
+ # @return [String]
24
+ attr_reader :file
25
+
26
+ # The grammar that the generator is for.
27
+ #
28
+ # @return [Ace::Grammar]
29
+ attr_reader :grammar
30
+
31
+ # The source root directory for templates. Overwrite to change.
32
+ #
33
+ # @return [Pathname]
34
+ def self.source_root
35
+ Pathname.new("../generator/templates").expand_path(__FILE__)
36
+ end
37
+
38
+ # Initialize the generator.
39
+ #
40
+ # @param grammar [Grammar]
41
+ # @param mods [Hash<(Symbol, Object)>]
42
+ def initialize(grammar, mods)
43
+ @file = grammar.name
44
+ @grammar = grammar
45
+ @mods = mods
46
+ end
47
+
48
+ # Actually does the generation. A subclass should implement this.
49
+ #
50
+ # @raise [NotImplementedError]
51
+ # @return [void]
52
+ def generate
53
+ raise NotImplementedError
54
+ end
55
+
56
+ protected
57
+
58
+ # Copies a template from the source, runs it through erb (in the
59
+ # context of this class), and then outputs it at the destination.
60
+ # If given a block, it will call the block after the template is
61
+ # run through erb with the content from erb; the result of the
62
+ # block is then used as the content instead.
63
+ #
64
+ # @param source [String] the source file. This should be in
65
+ # {.source_root}.
66
+ # @param destination [String] the destination file. This will be
67
+ # in {Ace::Grammar#output}.
68
+ # @yieldparam [String] content The content that ERB created.
69
+ # @yieldreturn [String] The new content to write to the output.
70
+ # @return [void]
71
+ def template(source, destination)
72
+ src_file = self.class.source_root + source
73
+ src = src_file.open("r")
74
+ context = instance_eval('binding')
75
+ content = ERB.new(src.read, nil, "%").result(context)
76
+ content = yield content if block_given?
77
+ dest_file = grammar.output + destination
78
+ dest_file.open("w") do |f|
79
+ f.write(content)
80
+ end
81
+ end
82
+
83
+ end
84
+ end
@@ -0,0 +1,4 @@
1
+ module Antelope
2
+ # The current running version of antelope.
3
+ VERSION = "0.0.1".freeze
4
+ end
data/lib/antelope.rb ADDED
@@ -0,0 +1,9 @@
1
+ require "antelope/automaton"
2
+ require "antelope/generation"
3
+ require "antelope/generator"
4
+ require "antelope/version"
5
+ require "antelope/ace"
6
+
7
+ module Antelope
8
+ # Your code goes here...
9
+ end