rley 0.0.02

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +15 -0
  2. data/.rspec +1 -0
  3. data/.rubocop.yml +74 -0
  4. data/.ruby-gemset +1 -0
  5. data/.ruby-version +1 -0
  6. data/.simplecov +7 -0
  7. data/.travis.yml +21 -0
  8. data/.yardopts +6 -0
  9. data/CHANGELOG.md +10 -0
  10. data/Gemfile +8 -0
  11. data/LICENSE.txt +19 -0
  12. data/README.md +19 -0
  13. data/Rakefile +32 -0
  14. data/lib/rley/constants.rb +26 -0
  15. data/lib/rley/parser/chart.rb +39 -0
  16. data/lib/rley/parser/dotted_item.rb +80 -0
  17. data/lib/rley/parser/earley_parser.rb +177 -0
  18. data/lib/rley/parser/parse_state.rb +54 -0
  19. data/lib/rley/parser/parsing.rb +101 -0
  20. data/lib/rley/parser/state_set.rb +47 -0
  21. data/lib/rley/parser/token.rb +21 -0
  22. data/lib/rley/syntax/grammar.rb +59 -0
  23. data/lib/rley/syntax/grm_symbol.rb +18 -0
  24. data/lib/rley/syntax/literal.rb +20 -0
  25. data/lib/rley/syntax/non_terminal.rb +18 -0
  26. data/lib/rley/syntax/production.rb +42 -0
  27. data/lib/rley/syntax/symbol_seq.rb +36 -0
  28. data/lib/rley/syntax/terminal.rb +18 -0
  29. data/lib/rley/syntax/verbatim_symbol.rb +21 -0
  30. data/spec/rley/parser/chart_spec.rb +47 -0
  31. data/spec/rley/parser/dotted_item_spec.rb +108 -0
  32. data/spec/rley/parser/earley_parser_spec.rb +271 -0
  33. data/spec/rley/parser/parse_state_spec.rb +99 -0
  34. data/spec/rley/parser/parsing_spec.rb +118 -0
  35. data/spec/rley/parser/state_set_spec.rb +68 -0
  36. data/spec/rley/parser/token_spec.rb +40 -0
  37. data/spec/rley/syntax/grammar_spec.rb +149 -0
  38. data/spec/rley/syntax/grm_symbol_spec.rb +29 -0
  39. data/spec/rley/syntax/literal_spec.rb +32 -0
  40. data/spec/rley/syntax/non_terminal_spec.rb +29 -0
  41. data/spec/rley/syntax/production_spec.rb +50 -0
  42. data/spec/rley/syntax/symbol_seq_spec.rb +65 -0
  43. data/spec/rley/syntax/terminal_spec.rb +29 -0
  44. data/spec/rley/syntax/verbatim_symbol_spec.rb +32 -0
  45. data/spec/spec_helper.rb +21 -0
  46. metadata +166 -0
@@ -0,0 +1,99 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ require_relative '../../../lib/rley/syntax/terminal'
4
+ require_relative '../../../lib/rley/syntax/non_terminal'
5
+ require_relative '../../../lib/rley/syntax/production'
6
+ require_relative '../../../lib/rley/parser/dotted_item'
7
+
8
+ # Load the class under test
9
+ require_relative '../../../lib/rley/parser/parse_state'
10
+
11
+ module Rley # Open this namespace to avoid module qualifier prefixes
12
+ module Parser # Open this namespace to avoid module qualifier prefixes
13
+
14
+ describe ParseState do
15
+
16
+ let(:t_a) { Syntax::Terminal.new('A') }
17
+ let(:t_b) { Syntax::Terminal.new('B') }
18
+ let(:t_c) { Syntax::Terminal.new('C') }
19
+ let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
20
+
21
+ let(:sample_prod) do
22
+ Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
23
+ end
24
+
25
+ let(:other_prod) do
26
+ Syntax::Production.new(nt_sentence, [t_a])
27
+ end
28
+
29
+ let(:empty_prod) do
30
+ Syntax::Production.new(nt_sentence,[])
31
+ end
32
+
33
+ let(:origin_val) { 3 }
34
+ let(:dotted_rule) { DottedItem.new(sample_prod, 2) }
35
+ let(:other_dotted_rule) { double('mock-dotted-item') }
36
+
37
+ # Default instantiation rule
38
+ subject { ParseState.new(dotted_rule, origin_val) }
39
+
40
+ context 'Initialization:' do
41
+
42
+ it 'should be created with a dotted item and a origin position' do
43
+ expect { ParseState.new(dotted_rule, origin_val) }.not_to raise_error
44
+ end
45
+
46
+ it 'should complain when the dotted rule is nil' do
47
+ err = StandardError
48
+ msg = 'Dotted item cannot be nil'
49
+ expect { ParseState.new(nil, 2) }.to raise_error(err, msg)
50
+ end
51
+
52
+ it 'should know the related dotted rule' do
53
+ expect(subject.dotted_rule).to eq(dotted_rule)
54
+ end
55
+
56
+ it 'should know the origin value' do
57
+ expect(subject.origin).to eq(origin_val)
58
+ end
59
+
60
+
61
+ end # context
62
+
63
+ context 'Provided services:' do
64
+ it 'should compare with itself' do
65
+ expect(subject == subject).to eq(true)
66
+ end
67
+
68
+ it 'should compare with another' do
69
+ equal = ParseState.new(dotted_rule, origin_val)
70
+ expect(subject == equal).to eq(true)
71
+
72
+ # Same dotted_rule, different origin
73
+ diff_origin = ParseState.new(dotted_rule, 2)
74
+ expect(subject == diff_origin).to eq(false)
75
+
76
+ # Different dotted item, same origin
77
+ diff_rule = ParseState.new(other_dotted_rule, 3)
78
+ expect(subject == diff_rule).to eq(false)
79
+ end
80
+
81
+ it 'should know if the parsing reached the end of the production' do
82
+ expect(subject).not_to be_complete
83
+ at_end = DottedItem.new(sample_prod, 3)
84
+
85
+ instance = ParseState.new(at_end, 2)
86
+ expect(instance).to be_complete
87
+ end
88
+
89
+ it 'should know the next expected symbol' do
90
+ expect(subject.next_symbol).to eq(t_c)
91
+ end
92
+ end # context
93
+
94
+ end # describe
95
+
96
+ end # module
97
+ end # module
98
+
99
+ # End of file
@@ -0,0 +1,118 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ require_relative '../../../lib/rley/syntax/non_terminal'
4
+ require_relative '../../../lib/rley/syntax/verbatim_symbol'
5
+ require_relative '../../../lib/rley/syntax/production'
6
+ require_relative '../../../lib/rley/parser/dotted_item'
7
+ require_relative '../../../lib/rley/parser/token'
8
+ # Load the class under test
9
+ require_relative '../../../lib/rley/parser/parsing'
10
+
11
+ module Rley # Open this namespace to avoid module qualifier prefixes
12
+ module Parser # Open this namespace to avoid module qualifier prefixes
13
+
14
+ describe Parsing do
15
+
16
+ # Grammar 1: A very simple language
17
+ # S ::= A.
18
+ # A ::= "a" A "c".
19
+ # A ::= "b".
20
+ let(:nt_S) { Syntax::NonTerminal.new('S') }
21
+ let(:nt_A) { Syntax::NonTerminal.new('A') }
22
+ let(:a_) { Syntax::VerbatimSymbol.new('a') }
23
+ let(:b_) { Syntax::VerbatimSymbol.new('b') }
24
+ let(:c_) { Syntax::VerbatimSymbol.new('c') }
25
+ let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
26
+ let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
27
+ let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
28
+ let(:start_dotted_rule) { DottedItem.new(prod_S, 0) }
29
+
30
+ # Helper method that mimicks the output of a tokenizer
31
+ # for the language specified by gramma_abc
32
+ let(:grm1_tokens) do
33
+ [
34
+ Token.new('a', a_),
35
+ Token.new('a', a_),
36
+ Token.new('b', b_),
37
+ Token.new('c', c_),
38
+ Token.new('c', c_)
39
+ ]
40
+ end
41
+
42
+ # Default instantiation rule
43
+ subject { Parsing.new(start_dotted_rule, grm1_tokens) }
44
+
45
+ context 'Initialization:' do
46
+
47
+ it 'should be created with a list of tokens and a start dotted rule' do
48
+ expect { Parsing.new(start_dotted_rule, grm1_tokens) }.not_to raise_error
49
+ end
50
+
51
+ it 'should know the input tokens' do
52
+ expect(subject.tokens).to eq(grm1_tokens)
53
+ end
54
+
55
+ it 'should know its chart object' do
56
+ expect(subject.chart).to be_kind_of(Chart)
57
+ end
58
+
59
+ end # context
60
+
61
+ context 'Parsing:' do
62
+ it 'should push a state to a given chart entry' do
63
+ expect(subject.chart[1]).to be_empty
64
+ item = DottedItem.new(prod_A1, 1)
65
+
66
+ subject.push_state(item, 1, 1)
67
+ expect(subject.chart[1]).not_to be_empty
68
+ expect(subject.chart[1].first.dotted_rule).to eq(item)
69
+
70
+ # Pushing twice the same state must be no-op
71
+ subject.push_state(item, 1, 1)
72
+ expect(subject.chart[1].size).to eq(1)
73
+ end
74
+
75
+ it 'should complain when trying to push a nil dotted item' do
76
+ err = StandardError
77
+ msg = 'Dotted item may not be nil'
78
+ expect { subject.push_state(nil, 1, 1) }.to raise_error(err, msg)
79
+ end
80
+
81
+
82
+ it 'should retrieve the parse states that expect a given terminal' do
83
+ item1 = DottedItem.new(prod_A1, 2)
84
+ item2 = DottedItem.new(prod_A1, 1)
85
+ subject.push_state(item1, 2, 2)
86
+ subject.push_state(item2, 2, 2)
87
+ states = subject.states_expecting(c_, 2)
88
+ expect(states.size).to eq(1)
89
+ expect(states[0].dotted_rule).to eq(item1)
90
+ end
91
+
92
+ it 'should update the states upon token match' do
93
+ # When a input token matches an expected terminal symbol
94
+ # then new parse states must be pushed to the following chart slot
95
+ expect(subject.chart[1]).to be_empty
96
+
97
+ item1 = DottedItem.new(prod_A1, 0)
98
+ item2 = DottedItem.new(prod_A2, 0)
99
+ subject.push_state(item1, 0, 0)
100
+ subject.push_state(item2, 0, 0)
101
+ subject.scanning(a_, 0) { |i| i } # Code block is mock
102
+
103
+ # Expected side effect: a new state at chart[1]
104
+ expect(subject.chart[1].size).to eq(1)
105
+ new_state = subject.chart[1].states[0]
106
+ expect(new_state.dotted_rule).to eq(item1)
107
+ expect(new_state.origin).to eq(0)
108
+ end
109
+
110
+ # completion(aState, aPosition, &nextMapping)
111
+ end
112
+
113
+ end # describe
114
+
115
+ end # module
116
+ end # module
117
+
118
+ # End of file
@@ -0,0 +1,68 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ require_relative '../../../lib/rley/parser/parse_state'
4
+
5
+ # Load the class under test
6
+ require_relative '../../../lib/rley/parser/state_set'
7
+
8
+ module Rley # Open this namespace to avoid module qualifier prefixes
9
+ module Parser # Open this namespace to avoid module qualifier prefixes
10
+
11
+ describe StateSet do
12
+ let(:dotted_rule1) { double('fake_dotted_rule1') }
13
+ let(:state1) { ParseState.new(dotted_rule1, 2) }
14
+ let(:dotted_rule2) { double('fake_dotted_rule2') }
15
+ let(:state2) { ParseState.new(dotted_rule2, 5) }
16
+
17
+ context 'Initialization:' do
18
+
19
+ it 'should be created without argument' do
20
+ expect { StateSet.new }.not_to raise_error
21
+ end
22
+ end # context
23
+
24
+ context 'Provided services:' do
25
+
26
+ it 'should push a state' do
27
+ expect(subject.states).to be_empty
28
+ expect { subject.push_state(state1) }.not_to raise_error
29
+ expect(subject).not_to be_empty
30
+ subject.push_state(state2)
31
+ expect(subject.states).to eq([state1, state2])
32
+ end
33
+
34
+ it 'should list the states expecting a given terminal' do
35
+ # Case of no state
36
+ expect(subject.states_expecting(:a)).to be_empty
37
+
38
+ # Adding states
39
+ subject.push_state(state1)
40
+ subject.push_state(state2)
41
+ allow(dotted_rule1).to receive(:next_symbol).and_return(:b)
42
+ allow(dotted_rule2).to receive(:next_symbol).and_return(:a)
43
+ expect(subject.states_expecting(:a)).to eq([state2])
44
+ expect(subject.states_expecting(:b)).to eq([state1])
45
+ end
46
+
47
+ it 'should list the states related to a production' do
48
+ a_prod = double('fake-production')
49
+
50
+ # Case of no state
51
+ expect(subject.states_for(a_prod)).to be_empty
52
+
53
+ # Adding states
54
+ subject.push_state(state1)
55
+ subject.push_state(state2)
56
+ allow(dotted_rule1).to receive(:production).and_return(:dummy)
57
+ allow(dotted_rule2).to receive(:production).and_return(a_prod)
58
+ expect(subject.states_for(a_prod)).to eq([state2])
59
+ end
60
+
61
+ end # context
62
+
63
+ end # describe
64
+
65
+ end # module
66
+ end # module
67
+
68
+ # End of file
@@ -0,0 +1,40 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ require_relative '../../../lib/rley/syntax/terminal'
4
+
5
+ # Load the class under test
6
+ require_relative '../../../lib/rley/parser/token'
7
+
8
+ module Rley # Open this namespace to avoid module qualifier prefixes
9
+ module Parser # Open this namespace to avoid module qualifier prefixes
10
+
11
+ describe Token do
12
+
13
+ let(:lexeme) { '"some text"' }
14
+ let(:sample_terminal) { Syntax::Terminal.new('if') }
15
+
16
+ context 'Initialization:' do
17
+
18
+
19
+ # Default instantiation rule
20
+ subject { Token.new(lexeme, sample_terminal) }
21
+
22
+ it 'should be created with a lexeme and a terminal argument' do
23
+ expect { Token.new(lexeme, sample_terminal) }.not_to raise_error
24
+ end
25
+
26
+ it 'should know its lexeme' do
27
+ expect(subject.lexeme).to eq(lexeme)
28
+ end
29
+
30
+ it 'should know its terminal' do
31
+ expect(subject.terminal).to eq(sample_terminal)
32
+ end
33
+ end # context
34
+
35
+ end # describe
36
+
37
+ end # module
38
+ end # module
39
+
40
+ # End of file
@@ -0,0 +1,149 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ require_relative '../../../lib/rley/syntax/verbatim_symbol'
4
+ require_relative '../../../lib/rley/syntax/non_terminal'
5
+ require_relative '../../../lib/rley/syntax/production'
6
+
7
+ # Load the class under test
8
+ require_relative '../../../lib/rley/syntax/grammar'
9
+
10
+ module Rley # Open this namespace to avoid module qualifier prefixes
11
+ module Syntax # Open this namespace to avoid module qualifier prefixes
12
+
13
+ describe Grammar do
14
+
15
+ # Factory method. Builds a list of productions
16
+ # having same lhs and the symbols sequence
17
+ # in their rhs.
18
+ def alternate_prods(aNonTerminal, sequences)
19
+ prods = sequences.map do |symbs|
20
+ Production.new(aNonTerminal, symbs)
21
+ end
22
+
23
+ return prods
24
+ end
25
+
26
+ def build_verbatim_symbols(symbols)
27
+ result = {}
28
+ symbols.each { |symb| result[symb] = VerbatimSymbol.new(symb) }
29
+ result
30
+ end
31
+
32
+ # Grammar 1: arithmetical expressions with integers
33
+ let(:grm1_ops) do
34
+ operators = %w[+ - * / ( ) ]
35
+ build_verbatim_symbols(operators)
36
+ end
37
+
38
+ # Grammar symbols for integer arithmetic expressions
39
+ let(:number) { Literal.new('number', /\d+/)} # Limited to positive integers
40
+ let(:add_op) { NonTerminal.new('add_op') }
41
+ let(:add_operators) { [grm1_ops['+'], grm1_ops['-']] }
42
+ let(:mult_op) { NonTerminal.new('mult_op') }
43
+ let(:mult_operators) { [grm1_ops['*'], grm1_ops['/']] }
44
+ let(:factor) { NonTerminal.new('factor') }
45
+ let(:term) { NonTerminal.new('term') }
46
+ let(:expression) { NonTerminal.new('expression') }
47
+
48
+
49
+ # Productions for grammar 1
50
+ let(:add_op_prods) { alternate_prods(add_op, add_operators) }
51
+ let(:mult_op_prods) { alternate_prods(mult_op, mult_operators) }
52
+ let(:factor_prods) do
53
+ alternatives = [
54
+ [number],
55
+ [grm1_ops['-'], factor],
56
+ [grm1_ops['('], expression, grm1_ops[')']]
57
+ ]
58
+ alternate_prods(factor, alternatives)
59
+ end
60
+ let(:term_prods) do
61
+ alternatives = [[factor], [term, mult_op, factor]]
62
+ alternate_prods(term, alternatives)
63
+ end
64
+ let(:expression_prods) do
65
+ alternatives = [ [term], [expression, add_op, term]]
66
+ alternate_prods(expression, alternatives)
67
+ end
68
+
69
+ # Grammar 2: A very simple language
70
+ # S ::= A.
71
+ # A ::= "a" A "c".
72
+ # A ::= "b".
73
+ let(:nt_S) { NonTerminal.new('S') }
74
+ let(:nt_A) { NonTerminal.new('A') }
75
+ let(:a_) { VerbatimSymbol.new('a') }
76
+ let(:b_) { VerbatimSymbol.new('b') }
77
+ let(:c_) { VerbatimSymbol.new('c') }
78
+ let(:prod_S) { Production.new(nt_S, [nt_A]) }
79
+ let(:prod_A1) { Production.new(nt_A, [a_, nt_A, c_]) }
80
+ let(:prod_A2) { Production.new(nt_A, [b_]) }
81
+
82
+ =begin
83
+ # Non-terminals that specify the lexicon of the language
84
+ let(:noun) { NonTerminal.new('Noun') }
85
+ let(:noun_list) { %w(flights breeze trip morning) }
86
+ let(:verb) { NonTerminal.new('Verb') }
87
+ let(:verb_list) { %w(is prefer like need want fly) }
88
+ let(:adjective) { NonTerminal.new('Adjective') }
89
+ let(:adjective_list) { %w(cheapest non-stop first latest other direct) }
90
+ let(:pronoun) { NonTerminal.new('Pronoun') }
91
+ let(:pronoun_list) { %w(me I you it) }
92
+ let(:proper_noun) { NonTerminal.new('Proper_noun') }
93
+ let(:proper_noun_list) do [ 'Alaska', 'Baltimore', 'Los Angeles',
94
+ 'Chicago', 'United', 'American' ]
95
+ end
96
+ let(:determiner) { NonTerminal.new('Determiner') }
97
+ let(:determiner_list) { %w(the a an this these that) }
98
+ let(:preposition) { NonTerminal.new('Preposition') }
99
+ let(:preposition_list) { %w(from to on near) }
100
+ let(:conjunction) { NonTerminal.new('Conjunction') }
101
+ let(:conjunction_list) { %w(and or but) }
102
+
103
+
104
+
105
+
106
+ let(:noun_prods) { prods_for_list(noun, noun_list) }
107
+ let(:verb_prods) { prods_for_list(verb, verb_list) }
108
+ let(:adjective_prods) { prods_for_list(adjective, adjective_list) }
109
+ let(:pronoun_prods) { prods_for_list(pronoun, pronoun_list) }
110
+ let(:proper_pronoun_prods) do
111
+ prods_for_list(proper_pronoun, proper_pronoun_list)
112
+ end
113
+ let(:determiner_prods) { prods_for_list(determiner, determiner_list) }
114
+ let(:preposition_prods) { prods_for_list(preposition, preposition_list) }
115
+ let(:conjunction_prods) { prods_for_list(conjunction, conjunction_list) }
116
+
117
+ # Productions for the L0 language (from Jurafki & Martin)
118
+ let(:nominal_prods) { Production}
119
+ =end
120
+
121
+ context 'Initialization:' do
122
+ subject do
123
+ productions = [prod_S, prod_A1, prod_A2]
124
+ Grammar.new(productions)
125
+ end
126
+
127
+ it 'should be created with a list of productions' do
128
+ expect { Grammar.new([prod_S, prod_A1, prod_A2]) }.not_to raise_error
129
+ end
130
+
131
+ it 'should know its productions' do
132
+ expect(subject.rules).to eq([prod_S, prod_A1, prod_A2])
133
+ end
134
+
135
+ it 'should know its start symbol' do
136
+ expect(subject.start_symbol).to eq(nt_S)
137
+ end
138
+
139
+ it 'should know all its symbols' do
140
+ expect(subject.symbols).to eq([nt_S, nt_A, a_, c_, b_])
141
+ end
142
+ end # context
143
+
144
+ end # describe
145
+
146
+ end # module
147
+ end # module
148
+
149
+ # End of file