dhaka 0.0.5 → 0.0.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -21,6 +21,12 @@
21
21
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
+ # An introduction to Dhaka and annotated examples can be found at the project homepage http://dhaka.rubyforge.org
25
+ #
26
+ # Further examples can be found in the test suites included with the gem.
27
+ module Dhaka
28
+ end
29
+
24
30
  require File.dirname(__FILE__)+'/grammar/grammar_symbol'
25
31
  require File.dirname(__FILE__)+'/grammar/production'
26
32
  require File.dirname(__FILE__)+'/grammar/closure_hash'
@@ -41,5 +47,3 @@ require File.dirname(__FILE__)+'/parser/compiled_parser'
41
47
 
42
48
  require File.dirname(__FILE__)+'/tokenizer/tokenizer'
43
49
  require File.dirname(__FILE__)+'/evaluator/evaluator'
44
-
45
-
@@ -1,21 +1,43 @@
1
1
  module Dhaka
2
+
3
+ # This is the abstract base evaluator class. It is not directly instantiated.
4
+ # When defining an evaluator for a specific grammar, we subclass it. e.g. for FooGrammar
5
+ # we create a FooEvaluator that subclasses Evaluator. Note that FooEvaluator may not
6
+ # be further subclassed.
7
+ #
8
+ # An evaluation rule for a given production named +bar+ is defined by calling +for_bar+ with
9
+ # a block that performs the evaluation. For detailed examples, see the evaluators in the
10
+ # test suite.
11
+
2
12
  class Evaluator
3
13
 
14
+ # Instantiates a new evaluator with the syntax tree of a parsed expression. Only subclasses
15
+ # of Evaluator are directly instantiated.
4
16
  def initialize(syntax_tree)
5
17
  @syntax_tree = syntax_tree
6
18
  @node_stack = []
7
19
  end
8
20
 
21
+ # Returns the evaluation result.
9
22
  def result
10
23
  evaluate(@syntax_tree)
11
24
  end
12
25
 
26
+ private
27
+
13
28
  def child_nodes
14
29
  @node_stack[-1]
15
30
  end
16
-
17
- private
18
31
 
32
+ def evaluate node
33
+ return node if (ParseTreeLeafNode === node)
34
+ @node_stack << node.child_nodes.collect {|child_node| evaluate(child_node)}
35
+ proc = self.class.actions[node.production.name]
36
+ result = self.instance_eval(&proc)
37
+ @node_stack.pop
38
+ result
39
+ end
40
+
19
41
  def self.inherited(evaluator)
20
42
  class << evaluator
21
43
  attr_accessor :grammar, :actions
@@ -29,34 +51,25 @@ module Dhaka
29
51
  check_definitions
30
52
  end
31
53
 
54
+ def self.method_missing(method_name, &blk)
55
+ if method_name.to_s =~ /^for_*/
56
+ rule_name = method_name.to_s[4..-1]
57
+ self.for_rule_named(rule_name, &blk)
58
+ end
59
+ end
60
+
32
61
  def self.check_definitions
33
62
  non_trivial_productions_with_rules_undefined = self.grammar.productions.select {|production| production.expansion.size != 1}.collect {|production| production.name} - self.actions.keys
34
63
  raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
35
64
  end
36
65
 
37
- def evaluate node
38
- return node if (ParseTreeLeafNode === node)
39
- @node_stack << node.child_nodes.collect {|child_node| evaluate(child_node)}
40
- proc = self.class.actions[node.production.name]
41
- result = self.instance_eval(&proc)
42
- @node_stack.pop
43
- result
44
- end
45
-
46
66
  def self.for_rule_named(name, &blk)
47
67
  self.actions[name] = blk
48
68
  end
49
-
50
- def self.method_missing(method_name, &blk)
51
- if method_name.to_s =~ /^for_*/
52
- rule_name = method_name.to_s[4..-1]
53
- self.for_rule_named(rule_name, &blk)
54
- end
55
- end
56
69
 
57
70
  end
58
71
 
59
- class EvaluatorDefinitionError < StandardError
72
+ class EvaluatorDefinitionError < StandardError #:nodoc:
60
73
  def initialize(non_trivial_productions_with_rules_undefined)
61
74
  @non_trivial_productions_with_rules_undefined = non_trivial_productions_with_rules_undefined
62
75
  end
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  module Dhaka
3
- class ClosureHash < Hash
3
+ # A subclass of Hash with a dirty flag
4
+ class ClosureHash < Hash #:nodoc:
4
5
  attr_accessor :dirty
5
6
  def initialize(&block)
6
7
  super(&block)
@@ -2,15 +2,24 @@
2
2
  require 'set'
3
3
  module Dhaka
4
4
 
5
+ # Reserved name for the start symbol for all grammars.
5
6
  START_SYMBOL_NAME = "_Start_"
6
- END_SYMBOL_NAME = "_End_"
7
+ END_SYMBOL_NAME = "_End_" #:nodoc:
7
8
 
9
+ # Productions for specific grammar symbols are defined in the context of this class.
8
10
  class ProductionBuilder
11
+
12
+ # +symbol+ is the grammar symbol that productions are being defined for.
9
13
  def initialize(grammar, symbol)
10
14
  @grammar = grammar
11
15
  @symbol = symbol
12
16
  end
13
17
 
18
+ # Creates a new production for +symbol+ with an expansion of +expansion+. The options hash can include
19
+ # a directive <tt>:prec</tt>, the value of which is a grammar symbol name. The precedence of the production is then
20
+ # set to the precedence of the grammar symbol corresponding to that name.
21
+ #
22
+ # See the arithmetic precedence grammar in the test suites for an example.
14
23
  def method_missing(production_name, expansion, options = {})
15
24
  expansion_symbols = expansion.collect {|name| @grammar.symbols[name]}
16
25
  if precedence_symbol_name = options[:prec]
@@ -25,12 +34,23 @@ module Dhaka
25
34
  end
26
35
  end
27
36
 
37
+ # The precedence builder defines three methods, +left+, +right+ and +nonassoc+. These accept arrays of grammar
38
+ # symbols all of which have the same precedence level and associativity. This works almost exactly like Yacc.
39
+ #
40
+ # See the arithmetic precedence grammar in the test suites for an example.
28
41
  class PrecedenceBuilder
29
- def initialize(grammar)
42
+ def initialize(grammar) #:nodoc:
30
43
  @grammar = grammar
31
44
  @precedence_level = 0
32
45
  end
33
- def method_missing(associativity, symbol_names)
46
+ [:left, :right, :nonassoc].each do |associativity|
47
+ define_method(associativity) do |symbols|
48
+ assign_precedences associativity, symbols
49
+ end
50
+ end
51
+
52
+ private
53
+ def assign_precedences(associativity, symbol_names)
34
54
  symbol_names.each do |symbol_name|
35
55
  symbol = @grammar.symbols[symbol_name]
36
56
  symbol.precedence = Precedence.new(@precedence_level, associativity)
@@ -39,8 +59,34 @@ module Dhaka
39
59
  end
40
60
  end
41
61
 
62
+ # This class is subclassed when specifying a grammar. Note that subclasses of this class may not be further subclassed.
42
63
  class Grammar
43
64
 
65
+ # Used for defining the productions for the symbol with name +symbol+. The block +blk+ is
66
+ # evaluated in the context of a ProductionBuilder.
67
+ def self.for_symbol symbol, &blk
68
+ symbol = symbols[symbol]
69
+ symbol.non_terminal = true
70
+ ProductionBuilder.new(self, symbol).instance_eval(&blk)
71
+ end
72
+
73
+ # Used for defining the precedences and associativities of symbols. The block +blk+ is
74
+ # evaluated in the context of a PrecedenceBuilder.
75
+ def self.precedences &blk
76
+ PrecedenceBuilder.new(self).instance_eval(&blk)
77
+ end
78
+
79
+ # Returns the grammar symbol identified by +name+
80
+ def self.symbol_for_name(name)
81
+ if symbols.has_key? name
82
+ symbols[name]
83
+ else
84
+ raise "No symbol with name #{name} found"
85
+ end
86
+ end
87
+
88
+ private
89
+
44
90
  def self.inherited(grammar)
45
91
  class << grammar
46
92
  attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
@@ -53,13 +99,10 @@ module Dhaka
53
99
  grammar.__first_cache = {}
54
100
  end
55
101
 
56
- def self.for_symbol symbol, &blk
57
- symbol = symbols[symbol]
58
- symbol.non_terminal = true
59
- ProductionBuilder.new(self, symbol).instance_eval(&blk)
102
+ def self.productions_for_symbol(symbol)
103
+ productions_by_symbol[symbol]
60
104
  end
61
105
 
62
-
63
106
  def self.productions
64
107
  productions_by_name.values
65
108
  end
@@ -68,17 +111,6 @@ module Dhaka
68
111
  productions_by_name[name]
69
112
  end
70
113
 
71
- def self.productions_for_symbol(symbol)
72
- productions_by_symbol[symbol]
73
- end
74
-
75
- def self.symbol_for_name(name)
76
- if symbols.has_key? name
77
- symbols[name]
78
- else
79
- raise "No symbol with name #{name} found"
80
- end
81
- end
82
114
 
83
115
  def self.terminal_symbols
84
116
  symbols.values.select {|symbol| symbol.terminal}
@@ -102,10 +134,6 @@ module Dhaka
102
134
  return channels, result
103
135
  end
104
136
 
105
- def self.precedences &blk
106
- PrecedenceBuilder.new(self).instance_eval(&blk)
107
- end
108
-
109
137
  def self.first(given_symbol)
110
138
  cached_result = self.__first_cache[given_symbol]
111
139
  return cached_result if cached_result
@@ -146,7 +174,6 @@ module Dhaka
146
174
  end
147
175
  return closure_hash
148
176
  end
149
-
150
-
151
177
  end
178
+
152
179
  end
@@ -1,5 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
2
  module Dhaka
3
+ # Each grammar symbol is uniquely identified by a string name. The name of a symbol can
4
+ # be anything and need not correspond to its character representation. For example, an ampersand in the
5
+ # character stream could be tokenized as a symbol with a name 'whatever'. In general, it's best to choose
6
+ # symbol names that are descriptive.
3
7
  class GrammarSymbol
4
8
  attr_reader :name
5
9
  attr_accessor :non_terminal, :nullable, :precedence, :associativity
@@ -1,5 +1,5 @@
1
1
  module Dhaka
2
- class Precedence
2
+ class Precedence #:nodoc:
3
3
  include Comparable
4
4
  attr_reader :precedence_level, :associativity
5
5
  def initialize(precedence_level, associativity)
@@ -1,9 +1,10 @@
1
1
  module Dhaka
2
- class Action
2
+ # Encapsulates code for Parser actions.
3
+ class Action #:nodoc:
3
4
  attr_reader :action_code
4
5
  end
5
6
 
6
- class ShiftAction < Action
7
+ class ShiftAction < Action #:nodoc:
7
8
  attr_reader :destination_state
8
9
  def initialize destination_state
9
10
  @destination_state = destination_state
@@ -20,7 +21,7 @@ module Dhaka
20
21
  end
21
22
  end
22
23
 
23
- class ReduceAction < Action
24
+ class ReduceAction < Action #:nodoc:
24
25
  attr_reader :production
25
26
  def initialize(production)
26
27
  @production = production
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  module Dhaka
3
- class Channel
3
+ # Represents channels for pumping of lookaheads between items
4
+ class Channel #:nodoc:
4
5
  attr_reader :start_item, :end_item
5
6
  def initialize(grammar, start_item, end_item)
6
7
  @grammar = grammar
@@ -23,7 +24,7 @@ module Dhaka
23
24
  end
24
25
  end
25
26
 
26
- class SpontaneousChannel < Channel
27
+ class SpontaneousChannel < Channel #:nodoc:
27
28
  def to_s
28
29
  "Spontaneous " + super.to_s
29
30
  end
@@ -40,7 +41,7 @@ module Dhaka
40
41
  end
41
42
  end
42
43
 
43
- class PassiveChannel < Channel
44
+ class PassiveChannel < Channel #:nodoc:
44
45
  def to_s
45
46
  "Passive " + super.to_s
46
47
  end
@@ -1,6 +1,8 @@
1
1
  module Dhaka
2
+ # This is the superclass of all compiled Parsers. It is only used by generated code.
2
3
  class CompiledParser
3
4
 
5
+ private
4
6
  def self.inherited(compiled_parser)
5
7
  class << compiled_parser
6
8
  attr_accessor :states, :grammar, :start_state_id
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  module Dhaka
3
- class Item
3
+ # Represents parser state items
4
+ class Item #:nodoc:
4
5
  attr_reader :production, :next_item_index, :lookaheadset
5
6
  def initialize(production, next_item_index)
6
7
  @production = production
@@ -1,21 +1,26 @@
1
1
  module Dhaka
2
+ # Returned on successful parsing of the input token stream.
2
3
  class ParseSuccessResult
3
- attr_accessor :syntax_tree
4
- def initialize(syntax_tree)
4
+ # Contains the parse result.
5
+ attr_accessor :syntax_tree
6
+ def initialize(syntax_tree) #:nodoc:
5
7
  @syntax_tree = syntax_tree
6
8
  end
7
-
8
- def has_error?
9
+ # This is false.
10
+ def has_error?
9
11
  false
10
12
  end
11
13
  end
14
+
15
+ # Returned on unsuccessful parsing of the input token stream.
12
16
  class ParseErrorResult
13
- attr_reader :bad_token_index
14
- def initialize(bad_token_index)
17
+ # The index of the token that caused the parse error.
18
+ attr_reader :bad_token_index
19
+ def initialize(bad_token_index) #:nodoc:
15
20
  @bad_token_index = bad_token_index
16
21
  end
17
-
18
- def has_error?
22
+ # This is true.
23
+ def has_error?
19
24
  true
20
25
  end
21
26
  end
@@ -1,20 +1,18 @@
1
1
  module Dhaka
2
- class ParseTreeCompositeNode
2
+ # These are composite nodes of the syntax tree returned by the successful parsing of a token stream.
3
+ class ParseTreeCompositeNode
3
4
  attr_reader :production, :child_nodes
4
- def initialize(production)
5
+ def initialize(production) #:nodoc:
5
6
  @production = production
6
7
  @child_nodes = []
7
8
  end
8
- def linearize
9
+ def linearize #:nodoc:
9
10
  child_nodes.collect {|child_node| child_node.linearize}.flatten + [production.name]
10
11
  end
11
- def to_s
12
+ def to_s #:nodoc:
12
13
  "CompositeNode: #{production.symbol} --> [#{child_nodes.join(", ")}]"
13
14
  end
14
- def dot_name
15
- "Node#{object_id}"
16
- end
17
-
15
+ # Returns the dot representation of the syntax tree.
18
16
  def to_dot
19
17
  result = []
20
18
  result << ["digraph x {", "node [fontsize=\"10\" shape=box size=\"5\"]"] if head_node?
@@ -28,31 +26,39 @@ module Dhaka
28
26
  result.join("\n")
29
27
  end
30
28
 
31
- def head_node?
29
+ def head_node? #:nodoc:
32
30
  production.symbol.name == START_SYMBOL_NAME
33
31
  end
32
+
33
+ def dot_name #:nodoc:
34
+ "Node#{object_id}"
35
+ end
36
+
34
37
  end
35
38
 
39
+ # These are leaf nodes of syntax trees. They contain tokens.
36
40
  class ParseTreeLeafNode
37
41
  attr_reader :token
38
- def initialize(token)
42
+ def initialize(token) #:nodoc:
39
43
  @token = token
40
44
  end
41
- def linearize
45
+ def linearize #:nodoc:
42
46
  []
43
47
  end
44
- def to_s
48
+ def to_s #:nodoc:
45
49
  "LeafNode: #{token}"
46
50
  end
47
- def dot_name
48
- "Node#{object_id}"
49
- end
51
+ # Returns the dot representation of this node.
50
52
  def to_dot
51
53
  label = "#{token}#{' : '+token.value.to_s if token.value}"
52
54
  "#{dot_name} [label=\"#{label}\"]"
53
55
  end
54
- def head_node?
56
+ def head_node? #:nodoc:
55
57
  false
56
58
  end
59
+
60
+ def dot_name #:nodoc:
61
+ "Node#{object_id}"
62
+ end
57
63
  end
58
64
  end
@@ -7,6 +7,9 @@ module Dhaka
7
7
  include ParserMethods
8
8
  attr_reader :grammar, :start_state
9
9
 
10
+ # Creates a new parser from the given grammar. Messages are logged by default to STDOUT
11
+ # and the log level is WARN. Shift-reduce conflicts are reported at WARN and reduce-reduce conflicts
12
+ # at ERROR. You may pass in your own logger. Logging at DEBUG shows a lot of progress output.
10
13
  def initialize(grammar, logger = nil)
11
14
  if logger
12
15
  @logger = logger
@@ -34,20 +37,7 @@ module Dhaka
34
37
  initialize_states
35
38
  end
36
39
 
37
- def initialize_states
38
- start_productions = @grammar.productions_for_symbol(@grammar.start_symbol)
39
- raise NoStartProductionsError.new(@grammar) if start_productions.empty?
40
- start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
41
- start_items.each {|start_item| start_item.lookaheadset << @grammar.end_symbol}
42
- @start_state = @states[start_items]
43
- @logger.debug("Pumping #{@channels.size} channels...")
44
- pump_channels
45
- @logger.debug("Generating shift actions...")
46
- generate_shift_actions
47
- @logger.debug("Generating reduce actions...")
48
- generate_reduce_actions
49
- end
50
-
40
+ # Returns the Ruby source of the generated parser compiled as +parser_class_name+. This can be written out to a file.
51
41
  def compile_to_ruby_source_as parser_class_name
52
42
  result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
53
43
  result << " self.grammar = #{@grammar.name}\n\n"
@@ -59,6 +49,9 @@ module Dhaka
59
49
  result
60
50
  end
61
51
 
52
+ # Returns the dot representation of the parser. If <tt>:hide_lookaheads</tt> is set to true in the
53
+ # options hash, lookaheads are not written out to the parser states, which is helpful when there are dozens
54
+ # of lookahead symbols for every item in every state.
62
55
  def to_dot(options = {})
63
56
  result = ["digraph x {", "node [fontsize=\"10\" shape=box size=\"5\"]"]
64
57
  result += states.collect { |state| state.to_dot(options) }
@@ -70,10 +63,28 @@ module Dhaka
70
63
  result << ['}']
71
64
  result.join("\n")
72
65
  end
66
+
67
+ private :start_state
68
+ private
69
+
73
70
  def states
74
71
  @states.values
75
72
  end
76
-
73
+
74
+ def initialize_states
75
+ start_productions = @grammar.productions_for_symbol(@grammar.start_symbol)
76
+ raise NoStartProductionsError.new(@grammar) if start_productions.empty?
77
+ start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
78
+ start_items.each {|start_item| start_item.lookaheadset << @grammar.end_symbol}
79
+ @start_state = @states[start_items]
80
+ @logger.debug("Pumping #{@channels.size} channels...")
81
+ pump_channels
82
+ @logger.debug("Generating shift actions...")
83
+ generate_shift_actions
84
+ @logger.debug("Generating reduce actions...")
85
+ generate_reduce_actions
86
+ end
87
+
77
88
  def generate_shift_actions
78
89
  @states.values.each do |state|
79
90
  @transitions[state].keys.each { |symbol|
@@ -157,21 +168,12 @@ module Dhaka
157
168
 
158
169
  end
159
170
 
160
-
161
- class ParserReduceReduceConflictError < StandardError
162
- def initialize(message)
163
- @message = message
164
- end
165
- def to_s
166
- @message
167
- end
168
- end
169
-
171
+ # Raised when trying to create a Parser for a grammar that has no productions for the start symbol
170
172
  class NoStartProductionsError < StandardError
171
- def initialize(grammar)
173
+ def initialize(grammar) #:nodoc:
172
174
  @grammar = grammar
173
175
  end
174
- def to_s
176
+ def to_s #:nodoc:
175
177
  "No start productions defined for #{@grammar.name}"
176
178
  end
177
179
  end
@@ -1,6 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
2
  module Dhaka
3
+ # This module is included both in Parser and CompiledParser.
3
4
  module ParserMethods
5
+ # +token_stream+ is an Enumerable of Token-s. Returns either a ParseSuccessResult or a ParseErrorResult.
4
6
  def parse token_stream
5
7
  parser_run = ParserRun.new(grammar, start_state, token_stream)
6
8
  parser_run.run
@@ -1,5 +1,5 @@
1
1
  module Dhaka
2
- class ParserRun
2
+ class ParserRun #:nodoc:
3
3
 
4
4
  def initialize(grammar, start_state, token_stream)
5
5
  @grammar = grammar
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'set'
3
3
  module Dhaka
4
- class ParserState
4
+ class ParserState #:nodoc:
5
5
 
6
6
  attr_accessor :items, :actions, :id
7
7
 
@@ -55,7 +55,7 @@ module Dhaka
55
55
 
56
56
  end
57
57
 
58
- class ItemSet < Set
58
+ class ItemSet < Set #:nodoc:
59
59
  def hash
60
60
  self.collect{|item| item.hash}.inject{|result, hashcode| result ^ hashcode}
61
61
  end
@@ -1,4 +1,6 @@
1
1
  module Dhaka
2
+ # Represents a portion of the input character stream that is mapped by the tokenizer
3
+ # to a symbol in the grammar.
2
4
  class Token
3
5
  attr_accessor :grammar_symbol, :value
4
6
  def initialize(grammar_symbol, value)
@@ -1,4 +1,10 @@
1
1
  module Dhaka
2
+
3
+ # Reserved constant used to identify the idle state of the tokenizer.
4
+ TOKENIZER_IDLE_STATE = :idle_state
5
+
6
+ # Raised when the tokenizer encounters a character that has no corresponding action in
7
+ # its current state.
2
8
  class UnrecognizedInputCharacterException < StandardError
3
9
  attr_reader :input, :char_index
4
10
  def initialize(input, char_index)
@@ -10,6 +16,8 @@ module Dhaka
10
16
  end
11
17
  end
12
18
 
19
+ # A tokenizer state encapsulates actions that should be performed upon
20
+ # encountering each permissible character for that state.
13
21
  class TokenizerState
14
22
  attr_reader :actions
15
23
 
@@ -17,72 +25,83 @@ module Dhaka
17
25
  @actions = {}
18
26
  end
19
27
 
28
+ # Define the action (+blk+) to be performed when encountering any of +characters+ in the token stream.
20
29
  def for_characters(characters, &blk)
21
30
  characters.each do |character|
22
31
  actions[character] = blk
23
32
  end
24
33
  end
25
34
 
26
- def for_character(character, &blk)
27
- actions[character[0]] = blk
28
- end
35
+ alias for_character for_characters
29
36
 
30
- def to_s
37
+ def to_s #:nodoc:
31
38
  actions.inspect
32
39
  end
33
40
 
34
41
  end
35
42
 
43
+ # This class contains a DSL for specifying tokenizers. Subclass it to implement tokenizers for specific grammars.
44
+ # Subclasses of this class may not be further subclassed.
45
+ #
46
+ # Tokenizers are state machines that are specified pretty much by hand. Each state of a tokenizer is identified
47
+ # by a Ruby symbol. The constant Dhaka::TOKENIZER_IDLE_STATE is reserved for the idle state of the tokenizer (the one
48
+ # that it starts in).
36
49
  class Tokenizer
37
-
38
- def self.inherited(tokenizer)
39
- class << tokenizer
40
- attr_accessor :states
41
- end
42
- tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
43
- end
44
-
50
+
51
+ # Define the action for the state named +state_name+.
45
52
  def self.for_state(state_name, &blk)
46
53
  states[state_name].instance_eval(&blk)
47
54
  end
48
-
55
+
56
+ # Tokenizes a string +input+ and returns an array of Token-s.
49
57
  def self.tokenize(input)
50
- TokenizerRun.new(self, input).run
58
+ self.new(input).run
51
59
  end
52
- end
53
-
54
- class TokenizerRun
55
-
60
+
61
+ # A slot that can be used to accumulate characters when processing multi-character tokens.
56
62
  attr_accessor :accumulator
63
+ # The tokens shifted so far.
57
64
  attr_reader :tokens
58
- def initialize(tokenizer, input)
59
- @tokenizer = tokenizer
65
+
66
+ def initialize(input) #:nodoc:
60
67
  @input = input
61
- @current_state = tokenizer.states[:idle_state]
68
+ @current_state = self.class.states[TOKENIZER_IDLE_STATE]
62
69
  @curr_char_index = 0
63
70
  @tokens = []
64
71
  end
65
72
 
66
- def run
67
- while curr_char
68
- blk = @current_state.actions[curr_char]
69
- raise UnrecognizedInputCharacterException.new(@input, @curr_char_index) unless blk
70
- instance_eval(&blk)
71
- end
72
- tokens
73
- end
74
-
73
+ # The character currently being processed.
75
74
  def curr_char
76
75
  @input[@curr_char_index] and @input[@curr_char_index].chr
77
76
  end
78
-
77
+
78
+ # Advance to the next character.
79
79
  def advance
80
80
  @curr_char_index += 1
81
81
  end
82
-
82
+
83
+ # Change the active state of the tokenizer to the state identified by the symbol +state_name+.
83
84
  def switch_to state_name
84
- @current_state = @tokenizer.states[state_name]
85
+ @current_state = self.class.states[state_name]
86
+ end
87
+
88
+ def run #:nodoc:
89
+ while curr_char
90
+ blk = @current_state.actions[curr_char]
91
+ raise UnrecognizedInputCharacterException.new(@input, @curr_char_index) unless blk
92
+ instance_eval(&blk)
93
+ end
94
+ tokens
95
+ end
96
+
97
+ private
98
+ def self.inherited(tokenizer)
99
+ class << tokenizer
100
+ attr_accessor :states
101
+ end
102
+ tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
85
103
  end
86
104
 
87
105
  end
106
+
88
107
  end
@@ -13,7 +13,7 @@ class ArithmeticPrecedenceTokenizer < Dhaka::Tokenizer
13
13
 
14
14
  all_characters = digits + parenths + operators + functions + arg_separator + whitespace
15
15
 
16
- for_state :idle_state do
16
+ for_state Dhaka::TOKENIZER_IDLE_STATE do
17
17
  for_characters(all_characters - (digits + whitespace)) do
18
18
  tokens << Dhaka::Token.new(ArithmeticPrecedenceGrammar.symbol_for_name(curr_char), nil)
19
19
  advance
@@ -29,15 +29,19 @@ class ArithmeticPrecedenceTokenizer < Dhaka::Tokenizer
29
29
 
30
30
  for_state :get_integer_literal do
31
31
  for_characters all_characters - digits do
32
- tokens << Dhaka::Token.new(ArithmeticPrecedenceGrammar.symbol_for_name('n'), accumulator.to_i)
33
- switch_to :idle_state
32
+ tokens << integer_literal_token(accumulator.to_i)
33
+ switch_to Dhaka::TOKENIZER_IDLE_STATE
34
34
  end
35
35
  for_characters digits do
36
36
  self.accumulator += curr_char
37
37
  advance
38
- tokens << Dhaka::Token.new(ArithmeticPrecedenceGrammar.symbol_for_name('n'), accumulator.to_i) unless curr_char
38
+ tokens << integer_literal_token(accumulator.to_i) unless curr_char
39
39
  end
40
40
  end
41
41
 
42
+ def integer_literal_token(value)
43
+ Dhaka::Token.new(ArithmeticPrecedenceGrammar.symbol_for_name('n'), value)
44
+ end
45
+
42
46
  end
43
47
 
@@ -13,7 +13,7 @@ class ArithmeticTokenizer < Dhaka::Tokenizer
13
13
 
14
14
  all_characters = digits + parenths + operators + functions + arg_separator + whitespace
15
15
 
16
- for_state :idle_state do
16
+ for_state Dhaka::TOKENIZER_IDLE_STATE do
17
17
  for_characters(all_characters - (digits + whitespace)) do
18
18
  tokens << Dhaka::Token.new(ArithmeticGrammar.symbol_for_name(curr_char), nil)
19
19
  advance
@@ -29,15 +29,19 @@ class ArithmeticTokenizer < Dhaka::Tokenizer
29
29
 
30
30
  for_state :get_integer_literal do
31
31
  for_characters all_characters - digits do
32
- tokens << Dhaka::Token.new(ArithmeticGrammar.symbol_for_name('n'), accumulator.to_i)
33
- switch_to :idle_state
32
+ tokens << integer_literal_token(accumulator.to_i)
33
+ switch_to Dhaka::TOKENIZER_IDLE_STATE
34
34
  end
35
35
  for_characters digits do
36
36
  self.accumulator += curr_char
37
37
  advance
38
- tokens << Dhaka::Token.new(ArithmeticGrammar.symbol_for_name('n'), accumulator.to_i) unless curr_char
38
+ tokens << integer_literal_token(accumulator.to_i) unless curr_char
39
39
  end
40
40
  end
41
41
 
42
+ def integer_literal_token(value)
43
+ Dhaka::Token.new(ArithmeticGrammar.symbol_for_name('n'), value)
44
+ end
45
+
42
46
  end
43
47
 
@@ -6,7 +6,7 @@ class BracketTokenizer < Dhaka::Tokenizer
6
6
 
7
7
  all_characters = ['(', '[', '{', 'B', '}', ']', ')']
8
8
 
9
- for_state :idle_state do
9
+ for_state Dhaka::TOKENIZER_IDLE_STATE do
10
10
  for_characters(all_characters) do
11
11
  tokens << Dhaka::Token.new(BracketGrammar.symbol_for_name(curr_char), nil)
12
12
  advance
@@ -42,12 +42,17 @@ class ParserTest < Test::Unit::TestCase
42
42
  'E ::= E -> - T [)-]'])
43
43
  expected_states[10] = Set.new(['T ::= ( E ) -> [#)-]'])
44
44
  expected_states[11] = Set.new(['_Start_ ::= S # -> [_End_]'])
45
- actual_states = Set.new(@parser.states.collect {|state| Set.new(state.items.values.collect {|item| item.to_s})})
45
+ actual_states = Set.new(@parser.send('states').collect {|state| Set.new(state.items.values.collect {|item| item.to_s})})
46
46
  #write_parser(@parser)
47
47
  expected_states.values.each do |state|
48
48
  assert set_finder(state, actual_states), "expected #{state.to_a}"
49
49
  end
50
50
  end
51
+
52
+ def test_parser_can_be_exported_to_dot_format
53
+ dot_representation = @parser.to_dot
54
+ end
55
+
51
56
  def build_tokens(token_symbol_names, grammar)
52
57
  token_symbol_names.collect {|symbol_name| Dhaka::Token.new(grammar.symbol_for_name(symbol_name), nil)}
53
58
  end
@@ -71,6 +76,11 @@ class ParserTest < Test::Unit::TestCase
71
76
  "start"], syntax_tree.linearize
72
77
  end
73
78
 
79
+ def test_syntax_trees_can_be_exported_to_dot_format
80
+ syntax_tree = @parser.parse(build_tokens(['(','n','-','(','n','-','n',')',')','-','n','#'], @grammar)).syntax_tree
81
+ syntax_tree.to_dot
82
+ end
83
+
74
84
  def get_linearized_parse_result(input, parser)
75
85
  parser.parse(build_tokens(input, parser.grammar)).syntax_tree.linearize
76
86
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: dhaka
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.5
7
- date: 2006-12-15 00:00:00 -05:00
6
+ version: 0.0.6
7
+ date: 2007-01-07 00:00:00 -05:00
8
8
  summary: An LALR1 parser generator written in Ruby
9
9
  require_paths:
10
10
  - lib
@@ -15,7 +15,7 @@ description:
15
15
  autorequire: dhaka
16
16
  default_executable:
17
17
  bindir: bin
18
- has_rdoc: false
18
+ has_rdoc: true
19
19
  required_ruby_version: !ruby/object:Gem::Version::Requirement
20
20
  requirements:
21
21
  - - ">"