rley 0.0.18 → 0.1.00

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- N2Y1Yjg4Mzg5ZWI5YjU1NzBkMzYzOWNkNTMzMzA4NGJjYjgwYWE2Yw==
4
+ MzdkNmMxNWFhMmZjYWY2MzUwM2JhZjUyYzQxYmJjMzdiZmFjMjRhOQ==
5
5
  data.tar.gz: !binary |-
6
- NDk0ZDFjNTkxZjNjZTE0NTRhNGFkMTc3YzYxNjY3NWExNzI0ZWMzOQ==
6
+ ODFlMzBlNjNjYjQyODIyNjQwYzY2OTUzMTAyNDI3NGIyMmFkOTcxMg==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- ZGRkMzE5Y2ExNmIwNThkNmZhYmVmZTNmMTc5ZjQxODEyZjU0MTdkZDMyNGVl
10
- ZmNlYzc4MjY4ZDJjNTA0NzU2MWUzMTFhNTVjNjAxZWVhYTMxYmM3NDI0MjM1
11
- Yjg0MTA3ODkzNTA5MjQ2NTVhNDY2ODdmNWIyMTAxNDlmMmJiMWE=
9
+ M2EyMDU5MDQ2YzFiYTMyNGU1OWYxYWQ2ODBjNjllZjNhZTUzZjU3NmY0YzE0
10
+ ODYzMTRiZjZmNzg2ZjZmMTIzZWI2MmNiNzY4M2ZiYjljZTcyN2E4MGNjZDNl
11
+ ZTQ5YTQ0OWRjMmFlNGIwNTFmMjU2Y2E4YzJkMjFiZGM4NjU4NDI=
12
12
  data.tar.gz: !binary |-
13
- NGQ4NjBkNGExZTdlNTkxMDEzNDAwYWEyNjE5NTJlZjhlYWI2Mjc0OTdjNjVl
14
- YWU2MzJiOWJkZmViNGMyNGJhZTYxM2Q4Y2FkMGJhOTM0YjJkNjczOTExYTgy
15
- NjQwNDVhODZiODUwY2EwOTBlMzJhY2JjODc3OTZkY2QwZTQ3MWE=
13
+ ZTRlMjdhNGM3MTIzZTUyZmQ5MDVjZGMxZjFlNzI0MTI3NTIyZjNlZDBiYmEz
14
+ ZTU1MDQ4NTBlNzJjYmRiMDRlZTdmODdhYTUwY2NiZDcxMTVlYTM5NjgwZDcz
15
+ MzM0YTBjZDJiMGI1YWMxMDI0MzYzOGQ4ZDU5ZjIwNWZkZDY2ZWY=
data/CHANGELOG.md CHANGED
@@ -1,3 +1,16 @@
1
+ ### 0.1.00 / 2014-12-05
2
+ * [CHANGE] Bumped version number: it is the first version able to generate a parse tree.
3
+ * [NEW] `Grammar#name2symbol` attribute and accessor. Retrieve a grammar symbol from its name.
4
+ * [NEW] Methods `DottedItem#prev_symbol`, `DottedItem#prev_position` to find symbol on left of dot.
5
+ * [NEW] Method `ParseState#precedes?`, predicate to check whether self is a predecessor of given parse state.
6
+ * [NEW] Method `Parsing#parse_tree` returns a ParseTree object that represents the result of a parse.
7
+
8
+
9
+ ### 0.0.18 / 2014-11-23
10
+ * [CHANGE] `EarleyParser#parse`: Optimization prevent repeated prediction of same non-terminal for same state set.
11
+ * [CHANGE] File `earley_parser_spec.rb`: Added new test for nullable grammar.
12
+ * [CHANGE] Style refactoring in classes `EarleyParser`, `StateSet`, `Grammar`, `NonTerminal`
13
+
1
14
  ### 0.0.17 / 2014-11-23
2
15
  * [CHANGE] File `earley_parser_spec.rb`: Added step-by-step test of ambiguous grammar parsing.
3
16
 
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.0.18'
6
+ Version = '0.1.00'
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm"
@@ -21,7 +21,7 @@ module Rley # This module is used as a namespace
21
21
  # Index of the next symbol (from the rhs) after the 'dot'.
22
22
  # If the dot is at the end of the rhs (i.e.) there is no next
23
23
  # symbol, then the position takes the value -1.
24
- # It the rhs is empty, then the postion is -2
24
+ # It the rhs is empty, then the position is -2
25
25
  attr_reader(:position)
26
26
 
27
27
  # @param aProduction
@@ -64,11 +64,37 @@ module Rley # This module is used as a namespace
64
64
  return production.lhs
65
65
  end
66
66
 
67
+ # Return the symbol before the dot.
68
+ # nil is returned if the dot is at the start of the rhs
69
+ def prev_symbol()
70
+ before_position = prev_position
71
+ if before_position.nil?
72
+ result = nil
73
+ else
74
+ result = production.rhs[before_position]
75
+ end
76
+
77
+ return result
78
+ end
79
+
67
80
  # Return the symbol after the dot.
68
81
  # nil is returned if the dot is at the end
69
82
  def next_symbol()
70
83
  return (position < 0) ? nil : production.rhs[position]
71
84
  end
85
+
86
+ # Calculate the position of the dot if were moved by
87
+ # one step on the left.
88
+ def prev_position()
89
+ case position
90
+ when -2, 0
91
+ result = nil
92
+ when -1
93
+ result = (production.rhs.size == 1) ? 0 : (production.rhs.size - 1)
94
+ else
95
+ result = position-1
96
+ end
97
+ end
72
98
 
73
99
  # An item with the dot in front of a terminal is called a shift item
74
100
  def shift_item?()
@@ -24,18 +24,36 @@ module Rley # This module is used as a namespace
24
24
 
25
25
  return result
26
26
  end
27
-
27
+
28
28
  # Returns true if the dot is at the end of the rhs of the production.
29
29
  # In other words, the complete rhs matches the input.
30
30
  def complete?()
31
31
  return dotted_rule.reduce_item?
32
32
  end
33
-
33
+
34
34
  # Next expected symbol in the production
35
35
  def next_symbol()
36
36
  return dotted_rule.next_symbol
37
37
  end
38
-
38
+
39
+ # Does this parse state have the 'other' as successor?
40
+ def precedes?(other)
41
+ return false if self == other
42
+
43
+ return false unless origin == other.origin
44
+ other_production = other.dotted_rule.production
45
+ return false unless dotted_rule.production == other_production
46
+
47
+ prev_position = other.dotted_rule.prev_position
48
+ if prev_position.nil?
49
+ result = false
50
+ else
51
+ result = dotted_rule.position == prev_position
52
+ end
53
+
54
+ return result
55
+ end
56
+
39
57
  # Give a String representation of itself.
40
58
  # The format of the text representation is
41
59
  # "format of dotted rule" + " | " + origin
@@ -43,13 +61,14 @@ module Rley # This module is used as a namespace
43
61
  def to_s()
44
62
  return dotted_rule.to_s + " | #{origin}"
45
63
  end
46
-
64
+
65
+
47
66
  private
48
-
67
+
49
68
  # Return the validated dotted item(rule)
50
69
  def valid_dotted_rule(aDottedRule)
51
70
  fail StandardError, 'Dotted item cannot be nil' if aDottedRule.nil?
52
-
71
+
53
72
  return aDottedRule
54
73
  end
55
74
  end # class
@@ -1,4 +1,5 @@
1
1
  require_relative 'chart'
2
+ require_relative '../ptree/parse_tree'
2
3
 
3
4
  module Rley # This module is used as a namespace
4
5
  module Parser # This module is used as a namespace
@@ -17,18 +18,61 @@ module Rley # This module is used as a namespace
17
18
  # followed the syntax specified by the grammar)
18
19
  def success?()
19
20
  # Success can be detected as follows:
20
- # The last chart entry has a parse state
21
- # that involves the start production and
22
- # has a dot positioned at the end of its rhs.
23
-
24
- start_dotted_rule = chart.start_dotted_rule
25
- start_production = start_dotted_rule.production
26
- last_chart_entry = chart.state_sets.last
27
- candidate_states = last_chart_entry.states_for(start_production)
28
- found = candidate_states.find(&:complete?)
29
-
21
+ # The last chart entry has a complete parse state
22
+ # with the start symbol as lhs
23
+ found = end_parse_state
30
24
  return !found.nil?
31
25
  end
26
+
27
+ # Factory method. Builds a ParseTree from the parse result.
28
+ # @return [ParseTree]
29
+ # Algorithm:
30
+ # set state_set_index = index of last state set in chart
31
+ # Search the completed parse state that corresponds to the full parse
32
+ def parse_tree()
33
+ state_set_index = chart.state_sets.size - 1
34
+ parse_state = end_parse_state
35
+ curr_dotted_item = parse_state.dotted_rule
36
+ full_range = { low: 0, high: state_set_index }
37
+ ptree = PTree::ParseTree.new(curr_dotted_item.production, full_range)
38
+ loop do
39
+ # Look at the symbol on left of the dot
40
+ curr_symbol = curr_dotted_item.prev_symbol
41
+ case curr_symbol
42
+ when Syntax::Terminal
43
+ state_set_index -= 1
44
+ ptree.step_back(state_set_index)
45
+ parse_state = chart[state_set_index].predecessor_state(parse_state)
46
+ curr_dotted_item = parse_state.dotted_rule
47
+
48
+ when Syntax::NonTerminal
49
+ # Retrieve complete states
50
+ new_states = chart[state_set_index].states_rewriting(curr_symbol)
51
+ # TODO: make this more robust
52
+ parse_state = new_states[0]
53
+ curr_dotted_item = parse_state.dotted_rule
54
+ ptree.current_node.range = { low: parse_state.origin }
55
+ node_range = ptree.current_node.range
56
+ ptree.add_children(curr_dotted_item.production, node_range)
57
+
58
+ when NilClass
59
+ lhs = curr_dotted_item.production.lhs
60
+ new_states = chart[state_set_index].states_expecting(lhs)
61
+ break if new_states.empty?
62
+ # TODO: make this more robust
63
+ parse_state = new_states[0]
64
+ curr_dotted_item = parse_state.dotted_rule
65
+ ptree.step_up(state_set_index)
66
+ ptree.current_node.range = { low: parse_state.origin }
67
+ break if ptree.root == ptree.current_node
68
+ else
69
+ msg = "Unexpected grammar symbol type #{curr_symbol.class}"
70
+ raise StandardError, msg
71
+ end
72
+ end
73
+
74
+ return ptree
75
+ end
32
76
 
33
77
 
34
78
  # Push a parse state (dotted item + origin) to the
@@ -91,6 +135,20 @@ module Rley # This module is used as a namespace
91
135
  def states_expecting(aTerminal, aPosition)
92
136
  return chart[aPosition].states_expecting(aTerminal)
93
137
  end
138
+
139
+ private
140
+
141
+ # Retrieve full parse state.
142
+ # After a successful parse, the last chart entry
143
+ # has a parse state that involves the start production and
144
+ # has a dot positioned at the end of its rhs.
145
+ def end_parse_state()
146
+ start_dotted_rule = chart.start_dotted_rule
147
+ start_production = start_dotted_rule.production
148
+ last_chart_entry = chart.state_sets[-1]
149
+ candidate_states = last_chart_entry.states_for(start_production)
150
+ return candidate_states.find(&:complete?)
151
+ end
94
152
  end # class
95
153
  end # module
96
154
  end # module
@@ -26,10 +26,31 @@ module Rley # This module is used as a namespace
26
26
  return states.select { |s| s.dotted_rule.next_symbol == aTerminal }
27
27
  end
28
28
 
29
+ # The list of complete ParseState that have the symbol as the lhs of their
30
+ # production
31
+ def states_rewriting(aNonTerm)
32
+ return states.select do |s|
33
+ (s.dotted_rule.production.lhs == aNonTerm) && s.complete?
34
+ end
35
+ end
36
+
29
37
  # The list of ParseState that involve the given production
30
38
  def states_for(aProduction)
31
39
  return states.select { |s| s.dotted_rule.production == aProduction }
32
40
  end
41
+
42
+ # Retrieve the parse state that is the predecessor of the given one.
43
+ def predecessor_state(aParseState)
44
+ if aParseState.dotted_rule.prev_position.nil?
45
+ raise StandardError, "#{aParseState}"
46
+ else
47
+ prod = aParseState.dotted_rule.production
48
+ candidate = states.find { |s| s.precedes?(aParseState) }
49
+ end
50
+
51
+ return candidate
52
+ end
53
+
33
54
 
34
55
  private
35
56
 
@@ -0,0 +1,21 @@
1
+ require_relative 'parse_tree_node' # Load superclass
2
+
3
+ module Rley # This module is used as a namespace
4
+ module PTree # This module is used as a namespace
5
+ class NonTerminalNode < ParseTreeNode
6
+ # Link to the input token
7
+ attr_reader(:children)
8
+
9
+ def initialize(aSymbol, aRange)
10
+ super(aSymbol, aRange)
11
+ @children = []
12
+ end
13
+
14
+ # @param aChildNode [ParseTreeNode-like] a child node.
15
+ def add_child(aChildNode)
16
+ children << aChildNode
17
+ end
18
+ end # class
19
+ end # module
20
+ end # module
21
+ # End of file
@@ -0,0 +1,95 @@
1
+ require_relative 'terminal_node'
2
+ require_relative 'non_terminal_node'
3
+
4
+ module Rley # This module is used as a namespace
5
+ module PTree # This module is used as a namespace
6
+ class ParseTree
7
+ # The root node of the tree
8
+ attr_reader(:root)
9
+
10
+ # The path to current node
11
+ attr_reader(:current_path)
12
+
13
+ def initialize(aProduction, aRange)
14
+ @root = NonTerminalNode.new(aProduction.lhs, aRange)
15
+ @current_path = [ @root ]
16
+ add_children(aProduction, aRange)
17
+ end
18
+
19
+ # Return the active node.
20
+ def current_node()
21
+ return current_path.last
22
+ end
23
+
24
+
25
+ def add_children(aProduction, aRange)
26
+ aProduction.rhs.each do |symb|
27
+ case symb
28
+ when Syntax::Terminal
29
+ new_node = TerminalNode.new(symb,{})
30
+ when Syntax::NonTerminal
31
+ new_node = NonTerminalNode.new(symb,{})
32
+ else
33
+ fail Standard, "Unknown grammar symbol type #{symb.class}"
34
+ end
35
+
36
+ current_node.add_child(new_node)
37
+ end
38
+
39
+ children = current_node.children
40
+ children.first.range = low_bound(aRange)
41
+ children.last.range = high_bound(aRange)
42
+ unless children.empty?
43
+ path_increment = [children.size - 1, children.last]
44
+ @current_path.concat(path_increment)
45
+ end
46
+ end
47
+
48
+ # Move the current node to the parent node.
49
+ # @param tokenPos [Fixnum] position of the matching input token
50
+ def step_up(tokenPos)
51
+ (pos, last_node) = current_path.pop(2)
52
+ #last_node.range = low_bound({low: tokenPos})
53
+ end
54
+
55
+
56
+
57
+ # Move the current node to the previous sibling node.
58
+ # @param tokenPos [Fixnum] position of the matching input token
59
+ def step_back(tokenPos)
60
+ (pos, last_node) = current_path[-2, 2]
61
+ last_node.range = low_bound({low: tokenPos})
62
+
63
+ if pos > 0
64
+ current_path.pop(2)
65
+ new_pos = pos - 1
66
+ new_curr_node = current_path.last.children[new_pos]
67
+ current_path << new_pos
68
+ current_path << new_curr_node
69
+ new_curr_node.range = high_bound({high: tokenPos})
70
+ end
71
+ end
72
+
73
+ private
74
+ def low_bound(aRange)
75
+ result = case aRange
76
+ when Hash then aRange[:low]
77
+ when TokenRange then aRange.low
78
+ end
79
+
80
+ return { low: result }
81
+ end
82
+
83
+ def high_bound(aRange)
84
+ result = case aRange
85
+ when Hash then aRange[:high]
86
+ when TokenRange then aRange.high
87
+ end
88
+
89
+ return { high: result }
90
+ end
91
+
92
+ end # class
93
+ end # module
94
+ end # module
95
+ # End of file
@@ -0,0 +1,25 @@
1
+ require_relative 'token_range'
2
+
3
+ module Rley # This module is used as a namespace
4
+ module PTree # This module is used as a namespace
5
+ class ParseTreeNode
6
+ # Link to the grammar symbol
7
+ attr_reader(:symbol)
8
+
9
+ # A range of indices for tokens corresponding to the node.
10
+ attr_reader(:range)
11
+
12
+
13
+ def initialize(aSymbol, aRange)
14
+ @symbol = aSymbol
15
+ @range = TokenRange.new(aRange)
16
+ end
17
+
18
+ def range=(aRange)
19
+ range.assign(aRange)
20
+ end
21
+
22
+ end # class
23
+ end # module
24
+ end # module
25
+ # End of file
@@ -0,0 +1,16 @@
1
+ require_relative 'parse_tree_node' # Load superclass
2
+
3
+ module Rley # This module is used as a namespace
4
+ module PTree # This module is used as a namespace
5
+ class TerminalNode < ParseTreeNode
6
+ # Link to the input token
7
+ attr_writer(:token)
8
+
9
+ def initialize(aTerminalSymbol, aRange)
10
+ super(aTerminalSymbol, aRange)
11
+ end
12
+
13
+ end # class
14
+ end # module
15
+ end # module
16
+ # End of file
@@ -0,0 +1,61 @@
1
+ module Rley # This module is used as a namespace
2
+ module PTree # This module is used as a namespace
3
+ class TokenRange
4
+ # The index of the lower bound of token range
5
+ attr_reader(:low)
6
+
7
+ # The index of the upper bound of token range
8
+ attr_reader(:high)
9
+
10
+ # @param aRangeRep [Hash]
11
+ def initialize(aRangeRep)
12
+ assign_low(aRangeRep)
13
+ assign_high(aRangeRep)
14
+ end
15
+
16
+
17
+ def ==(other)
18
+ return true if object_id == other.object_id
19
+
20
+ case other
21
+ when Hash
22
+ result = low == other[:low] && high == other[:high]
23
+ when TokenRange
24
+ result = low == other.low && high == other.high
25
+ end
26
+
27
+ return result
28
+ end
29
+
30
+ # true when both bounds aren't nil.
31
+ def bounded?()
32
+ return !(low.nil? || high.nil?)
33
+ end
34
+
35
+ # Conditional assign
36
+ def assign(aRange)
37
+ return if bounded?
38
+
39
+ assign_low(aRange) if low.nil?
40
+ assign_high(aRange) if high.nil?
41
+ end
42
+
43
+ private
44
+ def assign_low(aRange)
45
+ case aRange
46
+ when Hash then @low = aRange.fetch(:low, nil)
47
+ when TokenRange then @low = aRange.low
48
+ end
49
+ end
50
+
51
+ def assign_high(aRange)
52
+ case aRange
53
+ when Hash then @high = aRange.fetch(:high, nil)
54
+ when TokenRange then @high = aRange.high
55
+ end
56
+ end
57
+
58
+ end # class
59
+ end # module
60
+ end # module
61
+ # End of file
@@ -20,12 +20,16 @@ module Rley # This module is used as a namespace
20
20
 
21
21
  # The list of grammar symbols in the language.
22
22
  attr_reader(:symbols)
23
+
24
+ # A Hash with pairs of the kind: symbol name => grammar symbol
25
+ attr_reader(:name2symbol)
23
26
 
24
27
  # @param theProduction [Array of Production] the list of productions
25
28
  # of the grammar.
26
29
  def initialize(theProductions)
27
30
  @rules = []
28
31
  @symbols = []
32
+ @name2symbol = {}
29
33
  valid_productions = validate_productions(theProductions)
30
34
  # TODO: use topological sorting
31
35
  @start_symbol = valid_productions[0].lhs
@@ -50,13 +54,10 @@ module Rley # This module is used as a namespace
50
54
  def add_production(aProduction)
51
55
  @rules << aProduction
52
56
  the_lhs = aProduction.lhs
53
- @symbols << the_lhs unless @symbols.include? the_lhs
57
+ add_symbol(the_lhs)
54
58
 
55
59
  # TODO: remove quadratic execution time
56
- aProduction.rhs.each do |symb|
57
- next if symbols.include? symb
58
- @symbols << symb
59
- end
60
+ aProduction.rhs.each { |symb| add_symbol(symb) }
60
61
  end
61
62
 
62
63
 
@@ -104,6 +105,16 @@ module Rley # This module is used as a namespace
104
105
 
105
106
  return nullable
106
107
  end
108
+
109
+ private
110
+
111
+ def add_symbol(aSymbol)
112
+ its_name = aSymbol.name
113
+ unless name2symbol.include? its_name
114
+ @symbols << aSymbol
115
+ @name2symbol[its_name] = aSymbol
116
+ end
117
+ end
107
118
  end # class
108
119
  end # module
109
120
  end # module
@@ -16,7 +16,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
16
16
  def build_prod(theLHS, *theRHSSymbols)
17
17
  return Syntax::Production.new(theLHS, theRHSSymbols)
18
18
  end
19
-
19
+
20
20
  let(:t_a) { Syntax::Terminal.new('A') }
21
21
  let(:t_b) { Syntax::Terminal.new('B') }
22
22
  let(:t_c) { Syntax::Terminal.new('C') }
@@ -43,7 +43,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
43
43
  it 'should know its production' do
44
44
  expect(subject.production).to eq(sample_prod)
45
45
  end
46
-
46
+
47
47
  it 'should know the lhs of the production' do
48
48
  expect(subject.lhs).to eq(sample_prod.lhs)
49
49
  end
@@ -71,11 +71,11 @@ module Rley # Open this namespace to avoid module qualifier prefixes
71
71
  context 'Provided service:' do
72
72
  it 'should whether its dot is at start position' do
73
73
  expect(subject).not_to be_at_start
74
-
74
+
75
75
  # At start position
76
76
  instance1 = DottedItem.new(sample_prod, 0)
77
77
  expect(instance1).to be_at_start
78
-
78
+
79
79
  # At start/end at the same time (production is empty)
80
80
  instance2 = DottedItem.new(build_prod(nt_sentence), 0)
81
81
  expect(instance2).to be_at_start
@@ -91,10 +91,38 @@ module Rley # Open this namespace to avoid module qualifier prefixes
91
91
  expect(second_instance).to be_reduce_item
92
92
  end
93
93
 
94
+ it 'should know the symbol before the dot' do
95
+ expect(subject.prev_symbol).to eq(t_a)
96
+
97
+ # Case of an empty production
98
+ instance = DottedItem.new(empty_prod, 0)
99
+ expect(instance.prev_symbol).to be_nil
100
+
101
+ # Case of a dot at start position
102
+ instance = DottedItem.new(sample_prod, 0)
103
+ expect(instance.prev_symbol).to be_nil
104
+ end
105
+
94
106
  it 'should know the symbol after the dot' do
95
107
  expect(subject.next_symbol).to eq(t_b)
96
108
  end
97
109
 
110
+ it 'should calculate the previous position of the dot' do
111
+ expect(subject.prev_position).to eq(0)
112
+
113
+ # Case of an empty production
114
+ instance = DottedItem.new(empty_prod, 0)
115
+ expect(instance.prev_position).to be_nil
116
+
117
+ # Case of a dot at start position
118
+ instance = DottedItem.new(sample_prod, 0)
119
+ expect(instance.prev_position).to be_nil
120
+
121
+ # Case of single symbol production
122
+ instance = DottedItem.new(other_prod, 1)
123
+ expect(instance.prev_position).to eq(0)
124
+ end
125
+
98
126
  it 'should give its text representation' do
99
127
  expectation = 'sentence => A . B C'
100
128
  expect(subject.to_s).to eq(expectation)
@@ -91,6 +91,15 @@ module Rley # Open this namespace to avoid module qualifier prefixes
91
91
  expect(subject.next_symbol).to eq(t_c)
92
92
  end
93
93
 
94
+ it 'should know whether another instance follows this one' do
95
+ expect(subject.precedes?(subject)).to eq(false)
96
+ state1 = ParseState.new(DottedItem.new(sample_prod, 1), origin_val)
97
+ expect(state1.precedes?(subject)).to eq(true)
98
+ state0 = ParseState.new(DottedItem.new(sample_prod, 0), origin_val)
99
+ expect(state0.precedes?(state1)).to eq(true)
100
+ expect(state0.precedes?(subject)).to eq(false)
101
+ end
102
+
94
103
  it 'should know its text representation' do
95
104
  expected = 'sentence => A B . C | 3'
96
105
  expect(subject.to_s).to eq(expected)
@@ -3,8 +3,10 @@ require_relative '../../spec_helper'
3
3
  require_relative '../../../lib/rley/syntax/non_terminal'
4
4
  require_relative '../../../lib/rley/syntax/verbatim_symbol'
5
5
  require_relative '../../../lib/rley/syntax/production'
6
+ require_relative '../../../lib/rley/syntax/grammar_builder'
6
7
  require_relative '../../../lib/rley/parser/dotted_item'
7
8
  require_relative '../../../lib/rley/parser/token'
9
+ require_relative '../../../lib/rley/parser/earley_parser'
8
10
  # Load the class under test
9
11
  require_relative '../../../lib/rley/parser/parsing'
10
12
 
@@ -48,7 +50,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
48
50
  tokens = grm1_tokens
49
51
  expect { Parsing.new(start_rule, tokens) }.not_to raise_error
50
52
  end
51
-
53
+
52
54
  it 'should know the input tokens' do
53
55
  expect(subject.tokens).to eq(grm1_tokens)
54
56
  end
@@ -58,28 +60,28 @@ module Rley # Open this namespace to avoid module qualifier prefixes
58
60
  end
59
61
 
60
62
  end # context
61
-
63
+
62
64
  context 'Parsing:' do
63
65
  it 'should push a state to a given chart entry' do
64
66
  expect(subject.chart[1]).to be_empty
65
67
  item = DottedItem.new(prod_A1, 1)
66
-
68
+
67
69
  subject.push_state(item, 1, 1)
68
70
  expect(subject.chart[1]).not_to be_empty
69
71
  expect(subject.chart[1].first.dotted_rule).to eq(item)
70
-
72
+
71
73
  # Pushing twice the same state must be no-op
72
74
  subject.push_state(item, 1, 1)
73
75
  expect(subject.chart[1].size).to eq(1)
74
76
  end
75
-
77
+
76
78
  it 'should complain when trying to push a nil dotted item' do
77
79
  err = StandardError
78
80
  msg = 'Dotted item may not be nil'
79
81
  expect { subject.push_state(nil, 1, 1) }.to raise_error(err, msg)
80
82
  end
81
-
82
-
83
+
84
+
83
85
  it 'should retrieve the parse states that expect a given terminal' do
84
86
  item1 = DottedItem.new(prod_A1, 2)
85
87
  item2 = DottedItem.new(prod_A1, 1)
@@ -89,27 +91,51 @@ module Rley # Open this namespace to avoid module qualifier prefixes
89
91
  expect(states.size).to eq(1)
90
92
  expect(states[0].dotted_rule).to eq(item1)
91
93
  end
92
-
94
+
93
95
  it 'should update the states upon token match' do
94
96
  # When a input token matches an expected terminal symbol
95
97
  # then new parse states must be pushed to the following chart slot
96
98
  expect(subject.chart[1]).to be_empty
97
-
99
+
98
100
  item1 = DottedItem.new(prod_A1, 0)
99
101
  item2 = DottedItem.new(prod_A2, 0)
100
102
  subject.push_state(item1, 0, 0)
101
103
  subject.push_state(item2, 0, 0)
102
104
  subject.scanning(a_, 0) { |i| i } # Code block is mock
103
-
105
+
104
106
  # Expected side effect: a new state at chart[1]
105
107
  expect(subject.chart[1].size).to eq(1)
106
108
  new_state = subject.chart[1].states[0]
107
109
  expect(new_state.dotted_rule).to eq(item1)
108
110
  expect(new_state.origin).to eq(0)
109
111
  end
110
-
111
- end
112
-
112
+
113
+ end # context
114
+
115
+ context 'Parse tree building:' do
116
+ let(:sample_grammar1) do
117
+ builder = Syntax::GrammarBuilder.new
118
+ builder.add_terminals('a', 'b', 'c')
119
+ builder.add_production('S' => ['A'])
120
+ builder.add_production('A' => %w(a A c))
121
+ builder.add_production('A' => ['b'])
122
+ builder.grammar
123
+ end
124
+
125
+ let(:token_seq1) do
126
+ %w(a a b c c).map do |letter|
127
+ Token.new(letter, sample_grammar1.name2symbol[letter])
128
+ end
129
+ end
130
+
131
+
132
+ it 'should build the parse tree for a non-ambiguous grammar' do
133
+ parser = EarleyParser.new(sample_grammar1)
134
+ instance = parser.parse(token_seq1)
135
+ ptree = instance.parse_tree
136
+ expect(ptree).to be_kind_of(PTree::ParseTree)
137
+ end
138
+ end # context
113
139
  end # describe
114
140
  end # module
115
141
  end # module
@@ -56,6 +56,22 @@ module Rley # Open this namespace to avoid module qualifier prefixes
56
56
  allow(dotted_rule2).to receive(:production).and_return(a_prod)
57
57
  expect(subject.states_for(a_prod)).to eq([state2])
58
58
  end
59
+
60
+ it 'should list the states that rewrite a given non-terminal' do
61
+ non_term = double('fake-non-terminal')
62
+ prod1 = double('fake-production1')
63
+ prod2 = double('fake-production2')
64
+
65
+ # Adding states
66
+ subject.push_state(state1)
67
+ subject.push_state(state2)
68
+ allow(dotted_rule1).to receive(:production).and_return(prod1)
69
+ allow(prod1).to receive(:lhs).and_return(:dummy)
70
+ allow(dotted_rule2).to receive(:production).and_return(prod2)
71
+ allow(dotted_rule2).to receive(:reduce_item?).and_return(true)
72
+ allow(prod2).to receive(:lhs).and_return(non_term)
73
+ expect(subject.states_rewriting(non_term)).to eq([state2])
74
+ end
59
75
 
60
76
  end # context
61
77
 
@@ -0,0 +1,36 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ # Load the class under test
4
+ require_relative '../../../lib/rley/ptree/non_terminal_node'
5
+
6
+ module Rley # Open this namespace to avoid module qualifier prefixes
7
+ module PTree # Open this namespace to avoid module qualifier prefixes
8
+ describe NonTerminalNode do
9
+ let(:sample_symbol) { double('fake-symbol') }
10
+ let(:sample_range) { double('fake-range') }
11
+
12
+ subject { NonTerminalNode.new(sample_symbol, sample_range) }
13
+
14
+ context 'Initialization:' do
15
+ it "shouldn't have children yet" do
16
+ expect(subject.children).to be_empty
17
+ end
18
+ end # context
19
+
20
+ context 'Provided services:' do
21
+ it 'should accept children' do
22
+ child1 = double('first_child')
23
+ child2 = double('second_child')
24
+ child3 = double('third_child')
25
+ expect { subject.add_child(child1) }.not_to raise_error
26
+ subject.add_child(child2)
27
+ subject.add_child(child3)
28
+ expect(subject.children).to eq([child1, child2, child3])
29
+ end
30
+ end # context
31
+
32
+ end # describe
33
+ end # module
34
+ end # module
35
+
36
+ # End of file
@@ -0,0 +1,32 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ # Load the class under test
4
+ require_relative '../../../lib/rley/ptree/parse_tree_node'
5
+
6
+ module Rley # Open this namespace to avoid module qualifier prefixes
7
+ module PTree # Open this namespace to avoid module qualifier prefixes
8
+ describe ParseTreeNode do
9
+ let(:sample_symbol) { double('fake-symbol') }
10
+ let(:sample_range) { {low: 0, high: 5} }
11
+
12
+ subject { ParseTreeNode.new(sample_symbol, sample_range) }
13
+
14
+ context 'Initialization:' do
15
+ it 'should be created with a symbol and a range' do
16
+ expect { ParseTreeNode.new(sample_symbol, sample_range) }.not_to raise_error
17
+ end
18
+
19
+ it 'should know its symbol' do
20
+ expect(subject.symbol).to eq(sample_symbol)
21
+ end
22
+
23
+ it "should know its range" do
24
+ expect(subject.range).to eq(sample_range)
25
+ end
26
+ end # context
27
+
28
+ end # describe
29
+ end # module
30
+ end # module
31
+
32
+ # End of file
@@ -0,0 +1,95 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ require_relative '../../../lib/rley/syntax/grammar_builder'
4
+ # Load the class under test
5
+ require_relative '../../../lib/rley/ptree/parse_tree'
6
+
7
+ module Rley # Open this namespace to avoid module qualifier prefixes
8
+ module PTree # Open this namespace to avoid module qualifier prefixes
9
+ describe ParseTree do
10
+ let(:sample_grammar) do
11
+ builder = Syntax::GrammarBuilder.new
12
+ builder.add_terminals('a', 'b', 'c')
13
+ builder.add_production('S' => ['A'])
14
+ builder.add_production('A' => %w(a A c))
15
+ builder.add_production('A' => ['b'])
16
+ builder.grammar
17
+ end
18
+
19
+ let(:sample_prod) { sample_grammar.rules[0] }
20
+ let(:sample_range) { {low:0, high:5} }
21
+ subject { ParseTree.new(sample_prod, sample_range) }
22
+
23
+ context 'Initialization:' do
24
+ it 'should be created with a production and a range' do
25
+ construction = -> { ParseTreeNode.new(sample_prod, sample_range) }
26
+ expect(construction).not_to raise_error
27
+ end
28
+
29
+ it 'should know its root node' do
30
+ its_root = subject.root
31
+ expect(its_root.symbol.name).to eq('S')
32
+ expect(its_root.range).to eq(sample_range)
33
+ expect(its_root.children.size).to eq(1)
34
+ expect(its_root.children[0].symbol.name).to eq('A')
35
+ expect(its_root.children[0].range).to eq(sample_range)
36
+ end
37
+
38
+ it 'should know its current path' do
39
+ path = subject.current_path
40
+
41
+ # Given the tree:
42
+ # S[0,5]
43
+ # +- A[0,5] <- current node
44
+ # Expected path: [S[0,5], 0, A[0,5]]
45
+ expect(path.size).to eq(3)
46
+ expect(path[0]).to eq(subject.root)
47
+ expect(path[1]).to eq(0)
48
+ expect(path[2]).to eq(subject.root.children[0])
49
+ expect(path[2].range).to eq(sample_range)
50
+ end
51
+ end # context
52
+
53
+ context 'Provided service:' do
54
+ it 'should add children to current node' do
55
+ subject.add_children(sample_grammar.rules[1], sample_range)
56
+
57
+ # Given the tree:
58
+ # S[0,5]
59
+ # +- A[0,5]
60
+ # +-a[0,nil]
61
+ # +-A[nil, nil]
62
+ # +-c[nil, 5] <- current node
63
+ # Expected path: [S[0,5], 0, A[0,5], 2, c[nil, 5]]
64
+ path = subject.current_path
65
+ expect(path.size).to eq(5)
66
+ expect(path[3]).to eq(2)
67
+ expect(path[4].symbol.name).to eq('c')
68
+ expect(path[4].range.low).to be_nil
69
+ expect(path[4].range.high).to eq(5)
70
+ end
71
+
72
+ it 'should step back to a previous sibling node' do
73
+ subject.add_children(sample_grammar.rules[1], sample_range)
74
+ subject.step_back(4)
75
+ # Expected tree:
76
+ # S[0,5]
77
+ # +- A[0,5]
78
+ # +-a[0,nil]
79
+ # +-A[nil, 4] <- current node
80
+ # +-c[4, 5]
81
+ # Expected path: [S[0,5], 0, A[0,5], 1, A[nil, 4]]
82
+ path = subject.current_path
83
+ expect(path.size).to eq(5)
84
+ expect(path[3]).to eq(1)
85
+ expect(path[4].symbol.name).to eq('A')
86
+ expect(path[4].range.low).to be_nil
87
+ expect(path[4].range.high).to eq(4)
88
+ end
89
+ end
90
+
91
+ end # describe
92
+ end # module
93
+ end # module
94
+
95
+ # End of file
@@ -0,0 +1,103 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ # Load the class under test
4
+ require_relative '../../../lib/rley/ptree/token_range'
5
+
6
+ module Rley # Open this namespace to avoid module qualifier prefixes
7
+ module PTree # Open this namespace to avoid module qualifier prefixes
8
+ describe TokenRange do
9
+
10
+ let(:sample_range) { {low: 0, high: 5} }
11
+
12
+ # Default instantiation rule
13
+ subject { TokenRange.new(sample_range) }
14
+
15
+ context 'Initialization:' do
16
+ it 'should be created with a range Hash' do
17
+ # No bounds provided
18
+ expect { TokenRange.new({}) }.not_to raise_error
19
+
20
+ # Low bound provided
21
+ expect { TokenRange.new({low: 0}) }.not_to raise_error
22
+
23
+ # High bound provided
24
+ expect { TokenRange.new({high: 5}) }.not_to raise_error
25
+
26
+ # Both bounds provided
27
+ expect { TokenRange.new({low: 0, high: 5}) }.not_to raise_error
28
+ end
29
+
30
+ it 'should know its low bound' do
31
+ expect(subject.low).to eq(0)
32
+ end
33
+
34
+ it 'should know its low bound' do
35
+ expect(subject.high).to eq(5)
36
+ end
37
+ end # context
38
+
39
+ context 'Provided services:' do
40
+ it 'should know whether it is bounded or not' do
41
+ expect(subject).to be_bounded
42
+
43
+ # Case: only low bound is set
44
+ instance = TokenRange.new({low: 0})
45
+ expect(instance).not_to be_bounded
46
+
47
+ # Case: only upper bound is set
48
+ instance = TokenRange.new({high: 5})
49
+ expect(instance).not_to be_bounded
50
+
51
+ # No bound is set
52
+ instance = TokenRange.new({})
53
+ expect(instance).not_to be_bounded
54
+ end
55
+
56
+ it 'should assign it open bounds' do
57
+ some_range = {low: 1, high: 4}
58
+
59
+ ###########
60
+ # Case of bounded token range...
61
+ subject.assign(some_range)
62
+
63
+ # ... should be unchanged
64
+ expect(subject.low).to eq(sample_range[:low])
65
+ expect(subject.high).to eq(sample_range[:high])
66
+
67
+ ###########
68
+ # Case: only low bound is set
69
+ instance = TokenRange.new({low: 0})
70
+ instance.assign(some_range)
71
+
72
+ # Expectation: high is assigned the new value
73
+ expect(instance).to be_bounded
74
+ expect(instance.low).to eq(0)
75
+ expect(instance.high).to eq(4)
76
+
77
+ ###########
78
+ # Case: only high bound is set
79
+ instance = TokenRange.new({high: 5})
80
+ instance.assign(some_range)
81
+
82
+ # Expectation: low is assigned the new value
83
+ expect(instance).to be_bounded
84
+ expect(instance.low).to eq(1)
85
+ expect(instance.high).to eq(5)
86
+
87
+ ###########
88
+ # Case: no bound is set
89
+ instance = TokenRange.new({})
90
+ instance.assign(some_range)
91
+
92
+ # Expectation: low is assigned the new value
93
+ expect(instance).to be_bounded
94
+ expect(instance.low).to eq(1)
95
+ expect(instance.high).to eq(4)
96
+ end
97
+ end
98
+
99
+ end # describe
100
+ end # module
101
+ end # module
102
+
103
+ # End of file
@@ -117,11 +117,12 @@ module Rley # Open this namespace to avoid module qualifier prefixes
117
117
  let(:nominal_prods) { Production}
118
118
  =end
119
119
 
120
+ subject do
121
+ productions = [prod_S, prod_A1, prod_A2]
122
+ Grammar.new(productions)
123
+ end
124
+
120
125
  context 'Initialization:' do
121
- subject do
122
- productions = [prod_S, prod_A1, prod_A2]
123
- Grammar.new(productions)
124
- end
125
126
 
126
127
  it 'should be created with a list of productions' do
127
128
  expect { Grammar.new([prod_S, prod_A1, prod_A2]) }.not_to raise_error
@@ -144,11 +145,23 @@ module Rley # Open this namespace to avoid module qualifier prefixes
144
145
  end
145
146
  end # context
146
147
 
147
- context 'Non-nullable grammar:' do
148
- subject do
149
- productions = [prod_S, prod_A1, prod_A2]
150
- Grammar.new(productions)
148
+ # let(:nt_S) { NonTerminal.new('S') }
149
+ # let(:nt_A) { NonTerminal.new('A') }
150
+ # let(:a_) { VerbatimSymbol.new('a') }
151
+ # let(:b_) { VerbatimSymbol.new('b') }
152
+ # let(:c_) { VerbatimSymbol.new('c') }
153
+
154
+ context 'Provided services:' do
155
+ it 'should retrieve its symbols from their name' do
156
+ expect(subject.name2symbol['S']).to eq(nt_S)
157
+ expect(subject.name2symbol['A']).to eq(nt_A)
158
+ expect(subject.name2symbol['a']).to eq(a_)
159
+ expect(subject.name2symbol['b']).to eq(b_)
160
+ expect(subject.name2symbol['c']).to eq(c_)
151
161
  end
162
+ end # context
163
+
164
+ context 'Non-nullable grammar:' do
152
165
 
153
166
  it 'should mark all its nonterminals as non-nullable' do
154
167
  nonterms = subject.non_terminals
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.18
4
+ version: 0.1.00
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-11-23 00:00:00.000000000 Z
11
+ date: 2014-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -96,6 +96,11 @@ files:
96
96
  - lib/rley/parser/parsing.rb
97
97
  - lib/rley/parser/state_set.rb
98
98
  - lib/rley/parser/token.rb
99
+ - lib/rley/ptree/non_terminal_node.rb
100
+ - lib/rley/ptree/parse_tree.rb
101
+ - lib/rley/ptree/parse_tree_node.rb
102
+ - lib/rley/ptree/terminal_node.rb
103
+ - lib/rley/ptree/token_range.rb
99
104
  - lib/rley/syntax/grammar.rb
100
105
  - lib/rley/syntax/grammar_builder.rb
101
106
  - lib/rley/syntax/grm_symbol.rb
@@ -112,6 +117,10 @@ files:
112
117
  - spec/rley/parser/parsing_spec.rb
113
118
  - spec/rley/parser/state_set_spec.rb
114
119
  - spec/rley/parser/token_spec.rb
120
+ - spec/rley/ptree/non_terminal_node_spec.rb
121
+ - spec/rley/ptree/parse_tree_node_spec.rb
122
+ - spec/rley/ptree/parse_tree_spec.rb
123
+ - spec/rley/ptree/token_range_spec.rb
115
124
  - spec/rley/syntax/grammar_builder_spec.rb
116
125
  - spec/rley/syntax/grammar_spec.rb
117
126
  - spec/rley/syntax/grm_symbol_spec.rb
@@ -161,6 +170,10 @@ test_files:
161
170
  - spec/rley/parser/parsing_spec.rb
162
171
  - spec/rley/parser/state_set_spec.rb
163
172
  - spec/rley/parser/token_spec.rb
173
+ - spec/rley/ptree/non_terminal_node_spec.rb
174
+ - spec/rley/ptree/parse_tree_node_spec.rb
175
+ - spec/rley/ptree/parse_tree_spec.rb
176
+ - spec/rley/ptree/token_range_spec.rb
164
177
  - spec/rley/syntax/grammar_builder_spec.rb
165
178
  - spec/rley/syntax/grammar_spec.rb
166
179
  - spec/rley/syntax/grm_symbol_spec.rb