dhaka 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,6 +21,12 @@
21
21
  # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22
22
  #++
23
23
 
24
+ # An introduction to Dhaka and annotated examples can be found at the project homepage http://dhaka.rubyforge.org
25
+ #
26
+ # Further examples can be found in the test suites included with the gem.
27
+ module Dhaka
28
+ end
29
+
24
30
  require File.dirname(__FILE__)+'/grammar/grammar_symbol'
25
31
  require File.dirname(__FILE__)+'/grammar/production'
26
32
  require File.dirname(__FILE__)+'/grammar/closure_hash'
@@ -41,5 +47,3 @@ require File.dirname(__FILE__)+'/parser/compiled_parser'
41
47
 
42
48
  require File.dirname(__FILE__)+'/tokenizer/tokenizer'
43
49
  require File.dirname(__FILE__)+'/evaluator/evaluator'
44
-
45
-
@@ -1,21 +1,43 @@
1
1
  module Dhaka
2
+
3
+ # This is the abstract base evaluator class. It is not directly instantiated.
4
+ # When defining an evaluator for a specific grammar, we subclass it. e.g. for FooGrammar
5
+ # we create a FooEvaluator that subclasses Evaluator. Note that FooEvaluator may not
6
+ # be further subclassed.
7
+ #
8
+ # An evaluation rule for a given production named +bar+ is defined by calling +for_bar+ with
9
+ # a block that performs the evaluation. For detailed examples, see the evaluators in the
10
+ # test suite.
11
+
2
12
  class Evaluator
3
13
 
14
+ # Instantiates a new evaluator with the syntax tree of a parsed expression. Only subclasses
15
+ # of Evaluator are directly instantiated.
4
16
  def initialize(syntax_tree)
5
17
  @syntax_tree = syntax_tree
6
18
  @node_stack = []
7
19
  end
8
20
 
21
+ # Returns the evaluation result.
9
22
  def result
10
23
  evaluate(@syntax_tree)
11
24
  end
12
25
 
26
+ private
27
+
13
28
  def child_nodes
14
29
  @node_stack[-1]
15
30
  end
16
-
17
- private
18
31
 
32
+ def evaluate node
33
+ return node if (ParseTreeLeafNode === node)
34
+ @node_stack << node.child_nodes.collect {|child_node| evaluate(child_node)}
35
+ proc = self.class.actions[node.production.name]
36
+ result = self.instance_eval(&proc)
37
+ @node_stack.pop
38
+ result
39
+ end
40
+
19
41
  def self.inherited(evaluator)
20
42
  class << evaluator
21
43
  attr_accessor :grammar, :actions
@@ -29,34 +51,25 @@ module Dhaka
29
51
  check_definitions
30
52
  end
31
53
 
54
+ def self.method_missing(method_name, &blk)
55
+ if method_name.to_s =~ /^for_*/
56
+ rule_name = method_name.to_s[4..-1]
57
+ self.for_rule_named(rule_name, &blk)
58
+ end
59
+ end
60
+
32
61
  def self.check_definitions
33
62
  non_trivial_productions_with_rules_undefined = self.grammar.productions.select {|production| production.expansion.size != 1}.collect {|production| production.name} - self.actions.keys
34
63
  raise EvaluatorDefinitionError.new(non_trivial_productions_with_rules_undefined) unless non_trivial_productions_with_rules_undefined.empty?
35
64
  end
36
65
 
37
- def evaluate node
38
- return node if (ParseTreeLeafNode === node)
39
- @node_stack << node.child_nodes.collect {|child_node| evaluate(child_node)}
40
- proc = self.class.actions[node.production.name]
41
- result = self.instance_eval(&proc)
42
- @node_stack.pop
43
- result
44
- end
45
-
46
66
  def self.for_rule_named(name, &blk)
47
67
  self.actions[name] = blk
48
68
  end
49
-
50
- def self.method_missing(method_name, &blk)
51
- if method_name.to_s =~ /^for_*/
52
- rule_name = method_name.to_s[4..-1]
53
- self.for_rule_named(rule_name, &blk)
54
- end
55
- end
56
69
 
57
70
  end
58
71
 
59
- class EvaluatorDefinitionError < StandardError
72
+ class EvaluatorDefinitionError < StandardError #:nodoc:
60
73
  def initialize(non_trivial_productions_with_rules_undefined)
61
74
  @non_trivial_productions_with_rules_undefined = non_trivial_productions_with_rules_undefined
62
75
  end
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  module Dhaka
3
- class ClosureHash < Hash
3
+ # A subclass of Hash with a dirty flag
4
+ class ClosureHash < Hash #:nodoc:
4
5
  attr_accessor :dirty
5
6
  def initialize(&block)
6
7
  super(&block)
@@ -2,15 +2,24 @@
2
2
  require 'set'
3
3
  module Dhaka
4
4
 
5
+ # Reserved name for the start symbol for all grammars.
5
6
  START_SYMBOL_NAME = "_Start_"
6
- END_SYMBOL_NAME = "_End_"
7
+ END_SYMBOL_NAME = "_End_" #:nodoc:
7
8
 
9
+ # Productions for specific grammar symbols are defined in the context of this class.
8
10
  class ProductionBuilder
11
+
12
+ # +symbol+ is the grammar symbol that productions are being defined for.
9
13
  def initialize(grammar, symbol)
10
14
  @grammar = grammar
11
15
  @symbol = symbol
12
16
  end
13
17
 
18
+ # Creates a new production for +symbol+ with an expansion of +expansion+. The options hash can include
19
+ # a directive <tt>:prec</tt>, the value of which is a grammar symbol name. The precedence of the production is then
20
+ # set to the precedence of the grammar symbol corresponding to that name.
21
+ #
22
+ # See the arithmetic precedence grammar in the test suites for an example.
14
23
  def method_missing(production_name, expansion, options = {})
15
24
  expansion_symbols = expansion.collect {|name| @grammar.symbols[name]}
16
25
  if precedence_symbol_name = options[:prec]
@@ -25,12 +34,23 @@ module Dhaka
25
34
  end
26
35
  end
27
36
 
37
+ # The precedence builder defines three methods, +left+, +right+ and +nonassoc+. These accept arrays of grammar
38
+ # symbols all of which have the same precedence level and associativity. This works almost exactly like Yacc.
39
+ #
40
+ # See the arithmetic precedence grammar in the test suites for an example.
28
41
  class PrecedenceBuilder
29
- def initialize(grammar)
42
+ def initialize(grammar) #:nodoc:
30
43
  @grammar = grammar
31
44
  @precedence_level = 0
32
45
  end
33
- def method_missing(associativity, symbol_names)
46
+ [:left, :right, :nonassoc].each do |associativity|
47
+ define_method(associativity) do |symbols|
48
+ assign_precedences associativity, symbols
49
+ end
50
+ end
51
+
52
+ private
53
+ def assign_precedences(associativity, symbol_names)
34
54
  symbol_names.each do |symbol_name|
35
55
  symbol = @grammar.symbols[symbol_name]
36
56
  symbol.precedence = Precedence.new(@precedence_level, associativity)
@@ -39,8 +59,34 @@ module Dhaka
39
59
  end
40
60
  end
41
61
 
62
+ # This class is subclassed when specifying a grammar. Note that subclasses of this class may not be further subclassed.
42
63
  class Grammar
43
64
 
65
+ # Used for defining the productions for the symbol with name +symbol+. The block +blk+ is
66
+ # evaluated in the context of a ProductionBuilder.
67
+ def self.for_symbol symbol, &blk
68
+ symbol = symbols[symbol]
69
+ symbol.non_terminal = true
70
+ ProductionBuilder.new(self, symbol).instance_eval(&blk)
71
+ end
72
+
73
+ # Used for defining the precedences and associativities of symbols. The block +blk+ is
74
+ # evaluated in the context of a PrecedenceBuilder.
75
+ def self.precedences &blk
76
+ PrecedenceBuilder.new(self).instance_eval(&blk)
77
+ end
78
+
79
+ # Returns the grammar symbol identified by +name+
80
+ def self.symbol_for_name(name)
81
+ if symbols.has_key? name
82
+ symbols[name]
83
+ else
84
+ raise "No symbol with name #{name} found"
85
+ end
86
+ end
87
+
88
+ private
89
+
44
90
  def self.inherited(grammar)
45
91
  class << grammar
46
92
  attr_accessor :symbols, :productions_by_symbol, :productions_by_name, :start_symbol, :end_symbol, :__first_cache
@@ -53,13 +99,10 @@ module Dhaka
53
99
  grammar.__first_cache = {}
54
100
  end
55
101
 
56
- def self.for_symbol symbol, &blk
57
- symbol = symbols[symbol]
58
- symbol.non_terminal = true
59
- ProductionBuilder.new(self, symbol).instance_eval(&blk)
102
+ def self.productions_for_symbol(symbol)
103
+ productions_by_symbol[symbol]
60
104
  end
61
105
 
62
-
63
106
  def self.productions
64
107
  productions_by_name.values
65
108
  end
@@ -68,17 +111,6 @@ module Dhaka
68
111
  productions_by_name[name]
69
112
  end
70
113
 
71
- def self.productions_for_symbol(symbol)
72
- productions_by_symbol[symbol]
73
- end
74
-
75
- def self.symbol_for_name(name)
76
- if symbols.has_key? name
77
- symbols[name]
78
- else
79
- raise "No symbol with name #{name} found"
80
- end
81
- end
82
114
 
83
115
  def self.terminal_symbols
84
116
  symbols.values.select {|symbol| symbol.terminal}
@@ -102,10 +134,6 @@ module Dhaka
102
134
  return channels, result
103
135
  end
104
136
 
105
- def self.precedences &blk
106
- PrecedenceBuilder.new(self).instance_eval(&blk)
107
- end
108
-
109
137
  def self.first(given_symbol)
110
138
  cached_result = self.__first_cache[given_symbol]
111
139
  return cached_result if cached_result
@@ -146,7 +174,6 @@ module Dhaka
146
174
  end
147
175
  return closure_hash
148
176
  end
149
-
150
-
151
177
  end
178
+
152
179
  end
@@ -1,5 +1,9 @@
1
1
  #!/usr/bin/env ruby
2
2
  module Dhaka
3
+ # Each grammar symbol is uniquely identified by a string name. The name of a symbol can
4
+ # be anything and need not correspond to its character representation. For example, an ampersand in the
5
+ # character stream could be tokenized as a symbol with a name 'whatever'. In general, it's best to choose
6
+ # symbol names that are descriptive.
3
7
  class GrammarSymbol
4
8
  attr_reader :name
5
9
  attr_accessor :non_terminal, :nullable, :precedence, :associativity
@@ -1,5 +1,5 @@
1
1
  module Dhaka
2
- class Precedence
2
+ class Precedence #:nodoc:
3
3
  include Comparable
4
4
  attr_reader :precedence_level, :associativity
5
5
  def initialize(precedence_level, associativity)
@@ -1,9 +1,10 @@
1
1
  module Dhaka
2
- class Action
2
+ # Encapsulates code for Parser actions.
3
+ class Action #:nodoc:
3
4
  attr_reader :action_code
4
5
  end
5
6
 
6
- class ShiftAction < Action
7
+ class ShiftAction < Action #:nodoc:
7
8
  attr_reader :destination_state
8
9
  def initialize destination_state
9
10
  @destination_state = destination_state
@@ -20,7 +21,7 @@ module Dhaka
20
21
  end
21
22
  end
22
23
 
23
- class ReduceAction < Action
24
+ class ReduceAction < Action #:nodoc:
24
25
  attr_reader :production
25
26
  def initialize(production)
26
27
  @production = production
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  module Dhaka
3
- class Channel
3
+ # Represents channels for pumping of lookaheads between items
4
+ class Channel #:nodoc:
4
5
  attr_reader :start_item, :end_item
5
6
  def initialize(grammar, start_item, end_item)
6
7
  @grammar = grammar
@@ -23,7 +24,7 @@ module Dhaka
23
24
  end
24
25
  end
25
26
 
26
- class SpontaneousChannel < Channel
27
+ class SpontaneousChannel < Channel #:nodoc:
27
28
  def to_s
28
29
  "Spontaneous " + super.to_s
29
30
  end
@@ -40,7 +41,7 @@ module Dhaka
40
41
  end
41
42
  end
42
43
 
43
- class PassiveChannel < Channel
44
+ class PassiveChannel < Channel #:nodoc:
44
45
  def to_s
45
46
  "Passive " + super.to_s
46
47
  end
@@ -1,6 +1,8 @@
1
1
  module Dhaka
2
+ # This is the superclass of all compiled Parsers. It is only used by generated code.
2
3
  class CompiledParser
3
4
 
5
+ private
4
6
  def self.inherited(compiled_parser)
5
7
  class << compiled_parser
6
8
  attr_accessor :states, :grammar, :start_state_id
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  module Dhaka
3
- class Item
3
+ # Represents parser state items
4
+ class Item #:nodoc:
4
5
  attr_reader :production, :next_item_index, :lookaheadset
5
6
  def initialize(production, next_item_index)
6
7
  @production = production
@@ -1,21 +1,26 @@
1
1
  module Dhaka
2
+ # Returned on successful parsing of the input token stream.
2
3
  class ParseSuccessResult
3
- attr_accessor :syntax_tree
4
- def initialize(syntax_tree)
4
+ # Contains the parse result.
5
+ attr_accessor :syntax_tree
6
+ def initialize(syntax_tree) #:nodoc:
5
7
  @syntax_tree = syntax_tree
6
8
  end
7
-
8
- def has_error?
9
+ # This is false.
10
+ def has_error?
9
11
  false
10
12
  end
11
13
  end
14
+
15
+ # Returned on unsuccessful parsing of the input token stream.
12
16
  class ParseErrorResult
13
- attr_reader :bad_token_index
14
- def initialize(bad_token_index)
17
+ # The index of the token that caused the parse error.
18
+ attr_reader :bad_token_index
19
+ def initialize(bad_token_index) #:nodoc:
15
20
  @bad_token_index = bad_token_index
16
21
  end
17
-
18
- def has_error?
22
+ # This is true.
23
+ def has_error?
19
24
  true
20
25
  end
21
26
  end
@@ -1,20 +1,18 @@
1
1
  module Dhaka
2
- class ParseTreeCompositeNode
2
+ # These are composite nodes of the syntax tree returned by the successful parsing of a token stream.
3
+ class ParseTreeCompositeNode
3
4
  attr_reader :production, :child_nodes
4
- def initialize(production)
5
+ def initialize(production) #:nodoc:
5
6
  @production = production
6
7
  @child_nodes = []
7
8
  end
8
- def linearize
9
+ def linearize #:nodoc:
9
10
  child_nodes.collect {|child_node| child_node.linearize}.flatten + [production.name]
10
11
  end
11
- def to_s
12
+ def to_s #:nodoc:
12
13
  "CompositeNode: #{production.symbol} --> [#{child_nodes.join(", ")}]"
13
14
  end
14
- def dot_name
15
- "Node#{object_id}"
16
- end
17
-
15
+ # Returns the dot representation of the syntax tree.
18
16
  def to_dot
19
17
  result = []
20
18
  result << ["digraph x {", "node [fontsize=\"10\" shape=box size=\"5\"]"] if head_node?
@@ -28,31 +26,39 @@ module Dhaka
28
26
  result.join("\n")
29
27
  end
30
28
 
31
- def head_node?
29
+ def head_node? #:nodoc:
32
30
  production.symbol.name == START_SYMBOL_NAME
33
31
  end
32
+
33
+ def dot_name #:nodoc:
34
+ "Node#{object_id}"
35
+ end
36
+
34
37
  end
35
38
 
39
+ # These are leaf nodes of syntax trees. They contain tokens.
36
40
  class ParseTreeLeafNode
37
41
  attr_reader :token
38
- def initialize(token)
42
+ def initialize(token) #:nodoc:
39
43
  @token = token
40
44
  end
41
- def linearize
45
+ def linearize #:nodoc:
42
46
  []
43
47
  end
44
- def to_s
48
+ def to_s #:nodoc:
45
49
  "LeafNode: #{token}"
46
50
  end
47
- def dot_name
48
- "Node#{object_id}"
49
- end
51
+ # Returns the dot representation of this node.
50
52
  def to_dot
51
53
  label = "#{token}#{' : '+token.value.to_s if token.value}"
52
54
  "#{dot_name} [label=\"#{label}\"]"
53
55
  end
54
- def head_node?
56
+ def head_node? #:nodoc:
55
57
  false
56
58
  end
59
+
60
+ def dot_name #:nodoc:
61
+ "Node#{object_id}"
62
+ end
57
63
  end
58
64
  end
@@ -7,6 +7,9 @@ module Dhaka
7
7
  include ParserMethods
8
8
  attr_reader :grammar, :start_state
9
9
 
10
+ # Creates a new parser from the given grammar. Messages are logged by default to STDOUT
11
+ # and the log level is WARN. Shift-reduce conflicts are reported at WARN and reduce-reduce conflicts
12
+ # at ERROR. You may pass in your own logger. Logging at DEBUG shows a lot of progress output.
10
13
  def initialize(grammar, logger = nil)
11
14
  if logger
12
15
  @logger = logger
@@ -34,20 +37,7 @@ module Dhaka
34
37
  initialize_states
35
38
  end
36
39
 
37
- def initialize_states
38
- start_productions = @grammar.productions_for_symbol(@grammar.start_symbol)
39
- raise NoStartProductionsError.new(@grammar) if start_productions.empty?
40
- start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
41
- start_items.each {|start_item| start_item.lookaheadset << @grammar.end_symbol}
42
- @start_state = @states[start_items]
43
- @logger.debug("Pumping #{@channels.size} channels...")
44
- pump_channels
45
- @logger.debug("Generating shift actions...")
46
- generate_shift_actions
47
- @logger.debug("Generating reduce actions...")
48
- generate_reduce_actions
49
- end
50
-
40
+ # Returns the Ruby source of the generated parser compiled as +parser_class_name+. This can be written out to a file.
51
41
  def compile_to_ruby_source_as parser_class_name
52
42
  result = "class #{parser_class_name} < Dhaka::CompiledParser\n\n"
53
43
  result << " self.grammar = #{@grammar.name}\n\n"
@@ -59,6 +49,9 @@ module Dhaka
59
49
  result
60
50
  end
61
51
 
52
+ # Returns the dot representation of the parser. If <tt>:hide_lookaheads</tt> is set to true in the
53
+ # options hash, lookaheads are not written out to the parser states, which is helpful when there are dozens
54
+ # of lookahead symbols for every item in every state.
62
55
  def to_dot(options = {})
63
56
  result = ["digraph x {", "node [fontsize=\"10\" shape=box size=\"5\"]"]
64
57
  result += states.collect { |state| state.to_dot(options) }
@@ -70,10 +63,28 @@ module Dhaka
70
63
  result << ['}']
71
64
  result.join("\n")
72
65
  end
66
+
67
+ private :start_state
68
+ private
69
+
73
70
  def states
74
71
  @states.values
75
72
  end
76
-
73
+
74
+ def initialize_states
75
+ start_productions = @grammar.productions_for_symbol(@grammar.start_symbol)
76
+ raise NoStartProductionsError.new(@grammar) if start_productions.empty?
77
+ start_items = ItemSet.new(start_productions.collect {|production| Item.new(production, 0)})
78
+ start_items.each {|start_item| start_item.lookaheadset << @grammar.end_symbol}
79
+ @start_state = @states[start_items]
80
+ @logger.debug("Pumping #{@channels.size} channels...")
81
+ pump_channels
82
+ @logger.debug("Generating shift actions...")
83
+ generate_shift_actions
84
+ @logger.debug("Generating reduce actions...")
85
+ generate_reduce_actions
86
+ end
87
+
77
88
  def generate_shift_actions
78
89
  @states.values.each do |state|
79
90
  @transitions[state].keys.each { |symbol|
@@ -157,21 +168,12 @@ module Dhaka
157
168
 
158
169
  end
159
170
 
160
-
161
- class ParserReduceReduceConflictError < StandardError
162
- def initialize(message)
163
- @message = message
164
- end
165
- def to_s
166
- @message
167
- end
168
- end
169
-
171
+ # Raised when trying to create a Parser for a grammar that has no productions for the start symbol
170
172
  class NoStartProductionsError < StandardError
171
- def initialize(grammar)
173
+ def initialize(grammar) #:nodoc:
172
174
  @grammar = grammar
173
175
  end
174
- def to_s
176
+ def to_s #:nodoc:
175
177
  "No start productions defined for #{@grammar.name}"
176
178
  end
177
179
  end
@@ -1,6 +1,8 @@
1
1
  #!/usr/bin/env ruby
2
2
  module Dhaka
3
+ # This module is included both in Parser and CompiledParser.
3
4
  module ParserMethods
5
+ # +token_stream+ is an Enumerable of Token-s. Returns either a ParseSuccessResult or a ParseErrorResult.
4
6
  def parse token_stream
5
7
  parser_run = ParserRun.new(grammar, start_state, token_stream)
6
8
  parser_run.run
@@ -1,5 +1,5 @@
1
1
  module Dhaka
2
- class ParserRun
2
+ class ParserRun #:nodoc:
3
3
 
4
4
  def initialize(grammar, start_state, token_stream)
5
5
  @grammar = grammar
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
  require 'set'
3
3
  module Dhaka
4
- class ParserState
4
+ class ParserState #:nodoc:
5
5
 
6
6
  attr_accessor :items, :actions, :id
7
7
 
@@ -55,7 +55,7 @@ module Dhaka
55
55
 
56
56
  end
57
57
 
58
- class ItemSet < Set
58
+ class ItemSet < Set #:nodoc:
59
59
  def hash
60
60
  self.collect{|item| item.hash}.inject{|result, hashcode| result ^ hashcode}
61
61
  end
@@ -1,4 +1,6 @@
1
1
  module Dhaka
2
+ # Represents a portion of the input character stream that is mapped by the tokenizer
3
+ # to a symbol in the grammar.
2
4
  class Token
3
5
  attr_accessor :grammar_symbol, :value
4
6
  def initialize(grammar_symbol, value)
@@ -1,4 +1,10 @@
1
1
  module Dhaka
2
+
3
+ # Reserved constant used to identify the idle state of the tokenizer.
4
+ TOKENIZER_IDLE_STATE = :idle_state
5
+
6
+ # Raised when the tokenizer encounters a character that has no corresponding action in
7
+ # its current state.
2
8
  class UnrecognizedInputCharacterException < StandardError
3
9
  attr_reader :input, :char_index
4
10
  def initialize(input, char_index)
@@ -10,6 +16,8 @@ module Dhaka
10
16
  end
11
17
  end
12
18
 
19
+ # A tokenizer state encapsulates actions that should be performed upon
20
+ # encountering each permissible character for that state.
13
21
  class TokenizerState
14
22
  attr_reader :actions
15
23
 
@@ -17,72 +25,83 @@ module Dhaka
17
25
  @actions = {}
18
26
  end
19
27
 
28
+ # Define the action (+blk+) to be performed when encountering any of +characters+ in the token stream.
20
29
  def for_characters(characters, &blk)
21
30
  characters.each do |character|
22
31
  actions[character] = blk
23
32
  end
24
33
  end
25
34
 
26
- def for_character(character, &blk)
27
- actions[character[0]] = blk
28
- end
35
+ alias for_character for_characters
29
36
 
30
- def to_s
37
+ def to_s #:nodoc:
31
38
  actions.inspect
32
39
  end
33
40
 
34
41
  end
35
42
 
43
+ # This class contains a DSL for specifying tokenizers. Subclass it to implement tokenizers for specific grammars.
44
+ # Subclasses of this class may not be further subclassed.
45
+ #
46
+ # Tokenizers are state machines that are specified pretty much by hand. Each state of a tokenizer is identified
47
+ # by a Ruby symbol. The constant Dhaka::TOKENIZER_IDLE_STATE is reserved for the idle state of the tokenizer (the one
48
+ # that it starts in).
36
49
  class Tokenizer
37
-
38
- def self.inherited(tokenizer)
39
- class << tokenizer
40
- attr_accessor :states
41
- end
42
- tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
43
- end
44
-
50
+
51
+ # Define the action for the state named +state_name+.
45
52
  def self.for_state(state_name, &blk)
46
53
  states[state_name].instance_eval(&blk)
47
54
  end
48
-
55
+
56
+ # Tokenizes a string +input+ and returns an array of Token-s.
49
57
  def self.tokenize(input)
50
- TokenizerRun.new(self, input).run
58
+ self.new(input).run
51
59
  end
52
- end
53
-
54
- class TokenizerRun
55
-
60
+
61
+ # A slot that can be used to accumulate characters when processing multi-character tokens.
56
62
  attr_accessor :accumulator
63
+ # The tokens shifted so far.
57
64
  attr_reader :tokens
58
- def initialize(tokenizer, input)
59
- @tokenizer = tokenizer
65
+
66
+ def initialize(input) #:nodoc:
60
67
  @input = input
61
- @current_state = tokenizer.states[:idle_state]
68
+ @current_state = self.class.states[TOKENIZER_IDLE_STATE]
62
69
  @curr_char_index = 0
63
70
  @tokens = []
64
71
  end
65
72
 
66
- def run
67
- while curr_char
68
- blk = @current_state.actions[curr_char]
69
- raise UnrecognizedInputCharacterException.new(@input, @curr_char_index) unless blk
70
- instance_eval(&blk)
71
- end
72
- tokens
73
- end
74
-
73
+ # The character currently being processed.
75
74
  def curr_char
76
75
  @input[@curr_char_index] and @input[@curr_char_index].chr
77
76
  end
78
-
77
+
78
+ # Advance to the next character.
79
79
  def advance
80
80
  @curr_char_index += 1
81
81
  end
82
-
82
+
83
+ # Change the active state of the tokenizer to the state identified by the symbol +state_name+.
83
84
  def switch_to state_name
84
- @current_state = @tokenizer.states[state_name]
85
+ @current_state = self.class.states[state_name]
86
+ end
87
+
88
+ def run #:nodoc:
89
+ while curr_char
90
+ blk = @current_state.actions[curr_char]
91
+ raise UnrecognizedInputCharacterException.new(@input, @curr_char_index) unless blk
92
+ instance_eval(&blk)
93
+ end
94
+ tokens
95
+ end
96
+
97
+ private
98
+ def self.inherited(tokenizer)
99
+ class << tokenizer
100
+ attr_accessor :states
101
+ end
102
+ tokenizer.states = Hash.new {|hash, key| hash[key] = TokenizerState.new}
85
103
  end
86
104
 
87
105
  end
106
+
88
107
  end
@@ -13,7 +13,7 @@ class ArithmeticPrecedenceTokenizer < Dhaka::Tokenizer
13
13
 
14
14
  all_characters = digits + parenths + operators + functions + arg_separator + whitespace
15
15
 
16
- for_state :idle_state do
16
+ for_state Dhaka::TOKENIZER_IDLE_STATE do
17
17
  for_characters(all_characters - (digits + whitespace)) do
18
18
  tokens << Dhaka::Token.new(ArithmeticPrecedenceGrammar.symbol_for_name(curr_char), nil)
19
19
  advance
@@ -29,15 +29,19 @@ class ArithmeticPrecedenceTokenizer < Dhaka::Tokenizer
29
29
 
30
30
  for_state :get_integer_literal do
31
31
  for_characters all_characters - digits do
32
- tokens << Dhaka::Token.new(ArithmeticPrecedenceGrammar.symbol_for_name('n'), accumulator.to_i)
33
- switch_to :idle_state
32
+ tokens << integer_literal_token(accumulator.to_i)
33
+ switch_to Dhaka::TOKENIZER_IDLE_STATE
34
34
  end
35
35
  for_characters digits do
36
36
  self.accumulator += curr_char
37
37
  advance
38
- tokens << Dhaka::Token.new(ArithmeticPrecedenceGrammar.symbol_for_name('n'), accumulator.to_i) unless curr_char
38
+ tokens << integer_literal_token(accumulator.to_i) unless curr_char
39
39
  end
40
40
  end
41
41
 
42
+ def integer_literal_token(value)
43
+ Dhaka::Token.new(ArithmeticPrecedenceGrammar.symbol_for_name('n'), value)
44
+ end
45
+
42
46
  end
43
47
 
@@ -13,7 +13,7 @@ class ArithmeticTokenizer < Dhaka::Tokenizer
13
13
 
14
14
  all_characters = digits + parenths + operators + functions + arg_separator + whitespace
15
15
 
16
- for_state :idle_state do
16
+ for_state Dhaka::TOKENIZER_IDLE_STATE do
17
17
  for_characters(all_characters - (digits + whitespace)) do
18
18
  tokens << Dhaka::Token.new(ArithmeticGrammar.symbol_for_name(curr_char), nil)
19
19
  advance
@@ -29,15 +29,19 @@ class ArithmeticTokenizer < Dhaka::Tokenizer
29
29
 
30
30
  for_state :get_integer_literal do
31
31
  for_characters all_characters - digits do
32
- tokens << Dhaka::Token.new(ArithmeticGrammar.symbol_for_name('n'), accumulator.to_i)
33
- switch_to :idle_state
32
+ tokens << integer_literal_token(accumulator.to_i)
33
+ switch_to Dhaka::TOKENIZER_IDLE_STATE
34
34
  end
35
35
  for_characters digits do
36
36
  self.accumulator += curr_char
37
37
  advance
38
- tokens << Dhaka::Token.new(ArithmeticGrammar.symbol_for_name('n'), accumulator.to_i) unless curr_char
38
+ tokens << integer_literal_token(accumulator.to_i) unless curr_char
39
39
  end
40
40
  end
41
41
 
42
+ def integer_literal_token(value)
43
+ Dhaka::Token.new(ArithmeticGrammar.symbol_for_name('n'), value)
44
+ end
45
+
42
46
  end
43
47
 
@@ -6,7 +6,7 @@ class BracketTokenizer < Dhaka::Tokenizer
6
6
 
7
7
  all_characters = ['(', '[', '{', 'B', '}', ']', ')']
8
8
 
9
- for_state :idle_state do
9
+ for_state Dhaka::TOKENIZER_IDLE_STATE do
10
10
  for_characters(all_characters) do
11
11
  tokens << Dhaka::Token.new(BracketGrammar.symbol_for_name(curr_char), nil)
12
12
  advance
@@ -42,12 +42,17 @@ class ParserTest < Test::Unit::TestCase
42
42
  'E ::= E -> - T [)-]'])
43
43
  expected_states[10] = Set.new(['T ::= ( E ) -> [#)-]'])
44
44
  expected_states[11] = Set.new(['_Start_ ::= S # -> [_End_]'])
45
- actual_states = Set.new(@parser.states.collect {|state| Set.new(state.items.values.collect {|item| item.to_s})})
45
+ actual_states = Set.new(@parser.send('states').collect {|state| Set.new(state.items.values.collect {|item| item.to_s})})
46
46
  #write_parser(@parser)
47
47
  expected_states.values.each do |state|
48
48
  assert set_finder(state, actual_states), "expected #{state.to_a}"
49
49
  end
50
50
  end
51
+
52
+ def test_parser_can_be_exported_to_dot_format
53
+ dot_representation = @parser.to_dot
54
+ end
55
+
51
56
  def build_tokens(token_symbol_names, grammar)
52
57
  token_symbol_names.collect {|symbol_name| Dhaka::Token.new(grammar.symbol_for_name(symbol_name), nil)}
53
58
  end
@@ -71,6 +76,11 @@ class ParserTest < Test::Unit::TestCase
71
76
  "start"], syntax_tree.linearize
72
77
  end
73
78
 
79
+ def test_syntax_trees_can_be_exported_to_dot_format
80
+ syntax_tree = @parser.parse(build_tokens(['(','n','-','(','n','-','n',')',')','-','n','#'], @grammar)).syntax_tree
81
+ syntax_tree.to_dot
82
+ end
83
+
74
84
  def get_linearized_parse_result(input, parser)
75
85
  parser.parse(build_tokens(input, parser.grammar)).syntax_tree.linearize
76
86
  end
metadata CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.9.0
3
3
  specification_version: 1
4
4
  name: dhaka
5
5
  version: !ruby/object:Gem::Version
6
- version: 0.0.5
7
- date: 2006-12-15 00:00:00 -05:00
6
+ version: 0.0.6
7
+ date: 2007-01-07 00:00:00 -05:00
8
8
  summary: An LALR1 parser generator written in Ruby
9
9
  require_paths:
10
10
  - lib
@@ -15,7 +15,7 @@ description:
15
15
  autorequire: dhaka
16
16
  default_executable:
17
17
  bindir: bin
18
- has_rdoc: false
18
+ has_rdoc: true
19
19
  required_ruby_version: !ruby/object:Gem::Version::Requirement
20
20
  requirements:
21
21
  - - ">"