rley 0.0.02

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +15 -0
  2. data/.rspec +1 -0
  3. data/.rubocop.yml +74 -0
  4. data/.ruby-gemset +1 -0
  5. data/.ruby-version +1 -0
  6. data/.simplecov +7 -0
  7. data/.travis.yml +21 -0
  8. data/.yardopts +6 -0
  9. data/CHANGELOG.md +10 -0
  10. data/Gemfile +8 -0
  11. data/LICENSE.txt +19 -0
  12. data/README.md +19 -0
  13. data/Rakefile +32 -0
  14. data/lib/rley/constants.rb +26 -0
  15. data/lib/rley/parser/chart.rb +39 -0
  16. data/lib/rley/parser/dotted_item.rb +80 -0
  17. data/lib/rley/parser/earley_parser.rb +177 -0
  18. data/lib/rley/parser/parse_state.rb +54 -0
  19. data/lib/rley/parser/parsing.rb +101 -0
  20. data/lib/rley/parser/state_set.rb +47 -0
  21. data/lib/rley/parser/token.rb +21 -0
  22. data/lib/rley/syntax/grammar.rb +59 -0
  23. data/lib/rley/syntax/grm_symbol.rb +18 -0
  24. data/lib/rley/syntax/literal.rb +20 -0
  25. data/lib/rley/syntax/non_terminal.rb +18 -0
  26. data/lib/rley/syntax/production.rb +42 -0
  27. data/lib/rley/syntax/symbol_seq.rb +36 -0
  28. data/lib/rley/syntax/terminal.rb +18 -0
  29. data/lib/rley/syntax/verbatim_symbol.rb +21 -0
  30. data/spec/rley/parser/chart_spec.rb +47 -0
  31. data/spec/rley/parser/dotted_item_spec.rb +108 -0
  32. data/spec/rley/parser/earley_parser_spec.rb +271 -0
  33. data/spec/rley/parser/parse_state_spec.rb +99 -0
  34. data/spec/rley/parser/parsing_spec.rb +118 -0
  35. data/spec/rley/parser/state_set_spec.rb +68 -0
  36. data/spec/rley/parser/token_spec.rb +40 -0
  37. data/spec/rley/syntax/grammar_spec.rb +149 -0
  38. data/spec/rley/syntax/grm_symbol_spec.rb +29 -0
  39. data/spec/rley/syntax/literal_spec.rb +32 -0
  40. data/spec/rley/syntax/non_terminal_spec.rb +29 -0
  41. data/spec/rley/syntax/production_spec.rb +50 -0
  42. data/spec/rley/syntax/symbol_seq_spec.rb +65 -0
  43. data/spec/rley/syntax/terminal_spec.rb +29 -0
  44. data/spec/rley/syntax/verbatim_symbol_spec.rb +32 -0
  45. data/spec/spec_helper.rb +21 -0
  46. metadata +166 -0
@@ -0,0 +1,99 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ require_relative '../../../lib/rley/syntax/terminal'
4
+ require_relative '../../../lib/rley/syntax/non_terminal'
5
+ require_relative '../../../lib/rley/syntax/production'
6
+ require_relative '../../../lib/rley/parser/dotted_item'
7
+
8
+ # Load the class under test
9
+ require_relative '../../../lib/rley/parser/parse_state'
10
+
11
+ module Rley # Open this namespace to avoid module qualifier prefixes
12
+ module Parser # Open this namespace to avoid module qualifier prefixes
13
+
14
+ describe ParseState do
15
+
16
+ let(:t_a) { Syntax::Terminal.new('A') }
17
+ let(:t_b) { Syntax::Terminal.new('B') }
18
+ let(:t_c) { Syntax::Terminal.new('C') }
19
+ let(:nt_sentence) { Syntax::NonTerminal.new('sentence') }
20
+
21
+ let(:sample_prod) do
22
+ Syntax::Production.new(nt_sentence, [t_a, t_b, t_c])
23
+ end
24
+
25
+ let(:other_prod) do
26
+ Syntax::Production.new(nt_sentence, [t_a])
27
+ end
28
+
29
+ let(:empty_prod) do
30
+ Syntax::Production.new(nt_sentence,[])
31
+ end
32
+
33
+ let(:origin_val) { 3 }
34
+ let(:dotted_rule) { DottedItem.new(sample_prod, 2) }
35
+ let(:other_dotted_rule) { double('mock-dotted-item') }
36
+
37
+ # Default instantiation rule
38
+ subject { ParseState.new(dotted_rule, origin_val) }
39
+
40
+ context 'Initialization:' do
41
+
42
+ it 'should be created with a dotted item and a origin position' do
43
+ expect { ParseState.new(dotted_rule, origin_val) }.not_to raise_error
44
+ end
45
+
46
+ it 'should complain when the dotted rule is nil' do
47
+ err = StandardError
48
+ msg = 'Dotted item cannot be nil'
49
+ expect { ParseState.new(nil, 2) }.to raise_error(err, msg)
50
+ end
51
+
52
+ it 'should know the related dotted rule' do
53
+ expect(subject.dotted_rule).to eq(dotted_rule)
54
+ end
55
+
56
+ it 'should know the origin value' do
57
+ expect(subject.origin).to eq(origin_val)
58
+ end
59
+
60
+
61
+ end # context
62
+
63
+ context 'Provided services:' do
64
+ it 'should compare with itself' do
65
+ expect(subject == subject).to eq(true)
66
+ end
67
+
68
+ it 'should compare with another' do
69
+ equal = ParseState.new(dotted_rule, origin_val)
70
+ expect(subject == equal).to eq(true)
71
+
72
+ # Same dotted_rule, different origin
73
+ diff_origin = ParseState.new(dotted_rule, 2)
74
+ expect(subject == diff_origin).to eq(false)
75
+
76
+ # Different dotted item, same origin
77
+ diff_rule = ParseState.new(other_dotted_rule, 3)
78
+ expect(subject == diff_rule).to eq(false)
79
+ end
80
+
81
+ it 'should know if the parsing reached the end of the production' do
82
+ expect(subject).not_to be_complete
83
+ at_end = DottedItem.new(sample_prod, 3)
84
+
85
+ instance = ParseState.new(at_end, 2)
86
+ expect(instance).to be_complete
87
+ end
88
+
89
+ it 'should know the next expected symbol' do
90
+ expect(subject.next_symbol).to eq(t_c)
91
+ end
92
+ end # context
93
+
94
+ end # describe
95
+
96
+ end # module
97
+ end # module
98
+
99
+ # End of file
@@ -0,0 +1,118 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ require_relative '../../../lib/rley/syntax/non_terminal'
4
+ require_relative '../../../lib/rley/syntax/verbatim_symbol'
5
+ require_relative '../../../lib/rley/syntax/production'
6
+ require_relative '../../../lib/rley/parser/dotted_item'
7
+ require_relative '../../../lib/rley/parser/token'
8
+ # Load the class under test
9
+ require_relative '../../../lib/rley/parser/parsing'
10
+
11
+ module Rley # Open this namespace to avoid module qualifier prefixes
12
+ module Parser # Open this namespace to avoid module qualifier prefixes
13
+
14
+ describe Parsing do
15
+
16
+ # Grammar 1: A very simple language
17
+ # S ::= A.
18
+ # A ::= "a" A "c".
19
+ # A ::= "b".
20
+ let(:nt_S) { Syntax::NonTerminal.new('S') }
21
+ let(:nt_A) { Syntax::NonTerminal.new('A') }
22
+ let(:a_) { Syntax::VerbatimSymbol.new('a') }
23
+ let(:b_) { Syntax::VerbatimSymbol.new('b') }
24
+ let(:c_) { Syntax::VerbatimSymbol.new('c') }
25
+ let(:prod_S) { Syntax::Production.new(nt_S, [nt_A]) }
26
+ let(:prod_A1) { Syntax::Production.new(nt_A, [a_, nt_A, c_]) }
27
+ let(:prod_A2) { Syntax::Production.new(nt_A, [b_]) }
28
+ let(:start_dotted_rule) { DottedItem.new(prod_S, 0) }
29
+
30
+ # Helper method that mimicks the output of a tokenizer
31
+ # for the language specified by gramma_abc
32
+ let(:grm1_tokens) do
33
+ [
34
+ Token.new('a', a_),
35
+ Token.new('a', a_),
36
+ Token.new('b', b_),
37
+ Token.new('c', c_),
38
+ Token.new('c', c_)
39
+ ]
40
+ end
41
+
42
+ # Default instantiation rule
43
+ subject { Parsing.new(start_dotted_rule, grm1_tokens) }
44
+
45
+ context 'Initialization:' do
46
+
47
+ it 'should be created with a list of tokens and a start dotted rule' do
48
+ expect { Parsing.new(start_dotted_rule, grm1_tokens) }.not_to raise_error
49
+ end
50
+
51
+ it 'should know the input tokens' do
52
+ expect(subject.tokens).to eq(grm1_tokens)
53
+ end
54
+
55
+ it 'should know its chart object' do
56
+ expect(subject.chart).to be_kind_of(Chart)
57
+ end
58
+
59
+ end # context
60
+
61
+ context 'Parsing:' do
62
+ it 'should push a state to a given chart entry' do
63
+ expect(subject.chart[1]).to be_empty
64
+ item = DottedItem.new(prod_A1, 1)
65
+
66
+ subject.push_state(item, 1, 1)
67
+ expect(subject.chart[1]).not_to be_empty
68
+ expect(subject.chart[1].first.dotted_rule).to eq(item)
69
+
70
+ # Pushing twice the same state must be no-op
71
+ subject.push_state(item, 1, 1)
72
+ expect(subject.chart[1].size).to eq(1)
73
+ end
74
+
75
+ it 'should complain when trying to push a nil dotted item' do
76
+ err = StandardError
77
+ msg = 'Dotted item may not be nil'
78
+ expect { subject.push_state(nil, 1, 1) }.to raise_error(err, msg)
79
+ end
80
+
81
+
82
+ it 'should retrieve the parse states that expect a given terminal' do
83
+ item1 = DottedItem.new(prod_A1, 2)
84
+ item2 = DottedItem.new(prod_A1, 1)
85
+ subject.push_state(item1, 2, 2)
86
+ subject.push_state(item2, 2, 2)
87
+ states = subject.states_expecting(c_, 2)
88
+ expect(states.size).to eq(1)
89
+ expect(states[0].dotted_rule).to eq(item1)
90
+ end
91
+
92
+ it 'should update the states upon token match' do
93
+ # When a input token matches an expected terminal symbol
94
+ # then new parse states must be pushed to the following chart slot
95
+ expect(subject.chart[1]).to be_empty
96
+
97
+ item1 = DottedItem.new(prod_A1, 0)
98
+ item2 = DottedItem.new(prod_A2, 0)
99
+ subject.push_state(item1, 0, 0)
100
+ subject.push_state(item2, 0, 0)
101
+ subject.scanning(a_, 0) { |i| i } # Code block is mock
102
+
103
+ # Expected side effect: a new state at chart[1]
104
+ expect(subject.chart[1].size).to eq(1)
105
+ new_state = subject.chart[1].states[0]
106
+ expect(new_state.dotted_rule).to eq(item1)
107
+ expect(new_state.origin).to eq(0)
108
+ end
109
+
110
+ # completion(aState, aPosition, &nextMapping)
111
+ end
112
+
113
+ end # describe
114
+
115
+ end # module
116
+ end # module
117
+
118
+ # End of file
@@ -0,0 +1,68 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ require_relative '../../../lib/rley/parser/parse_state'
4
+
5
+ # Load the class under test
6
+ require_relative '../../../lib/rley/parser/state_set'
7
+
8
+ module Rley # Open this namespace to avoid module qualifier prefixes
9
+ module Parser # Open this namespace to avoid module qualifier prefixes
10
+
11
+ describe StateSet do
12
+ let(:dotted_rule1) { double('fake_dotted_rule1') }
13
+ let(:state1) { ParseState.new(dotted_rule1, 2) }
14
+ let(:dotted_rule2) { double('fake_dotted_rule2') }
15
+ let(:state2) { ParseState.new(dotted_rule2, 5) }
16
+
17
+ context 'Initialization:' do
18
+
19
+ it 'should be created without argument' do
20
+ expect { StateSet.new }.not_to raise_error
21
+ end
22
+ end # context
23
+
24
+ context 'Provided services:' do
25
+
26
+ it 'should push a state' do
27
+ expect(subject.states).to be_empty
28
+ expect { subject.push_state(state1) }.not_to raise_error
29
+ expect(subject).not_to be_empty
30
+ subject.push_state(state2)
31
+ expect(subject.states).to eq([state1, state2])
32
+ end
33
+
34
+ it 'should list the states expecting a given terminal' do
35
+ # Case of no state
36
+ expect(subject.states_expecting(:a)).to be_empty
37
+
38
+ # Adding states
39
+ subject.push_state(state1)
40
+ subject.push_state(state2)
41
+ allow(dotted_rule1).to receive(:next_symbol).and_return(:b)
42
+ allow(dotted_rule2).to receive(:next_symbol).and_return(:a)
43
+ expect(subject.states_expecting(:a)).to eq([state2])
44
+ expect(subject.states_expecting(:b)).to eq([state1])
45
+ end
46
+
47
+ it 'should list the states related to a production' do
48
+ a_prod = double('fake-production')
49
+
50
+ # Case of no state
51
+ expect(subject.states_for(a_prod)).to be_empty
52
+
53
+ # Adding states
54
+ subject.push_state(state1)
55
+ subject.push_state(state2)
56
+ allow(dotted_rule1).to receive(:production).and_return(:dummy)
57
+ allow(dotted_rule2).to receive(:production).and_return(a_prod)
58
+ expect(subject.states_for(a_prod)).to eq([state2])
59
+ end
60
+
61
+ end # context
62
+
63
+ end # describe
64
+
65
+ end # module
66
+ end # module
67
+
68
+ # End of file
@@ -0,0 +1,40 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ require_relative '../../../lib/rley/syntax/terminal'
4
+
5
+ # Load the class under test
6
+ require_relative '../../../lib/rley/parser/token'
7
+
8
+ module Rley # Open this namespace to avoid module qualifier prefixes
9
+ module Parser # Open this namespace to avoid module qualifier prefixes
10
+
11
+ describe Token do
12
+
13
+ let(:lexeme) { '"some text"' }
14
+ let(:sample_terminal) { Syntax::Terminal.new('if') }
15
+
16
+ context 'Initialization:' do
17
+
18
+
19
+ # Default instantiation rule
20
+ subject { Token.new(lexeme, sample_terminal) }
21
+
22
+ it 'should be created with a lexeme and a terminal argument' do
23
+ expect { Token.new(lexeme, sample_terminal) }.not_to raise_error
24
+ end
25
+
26
+ it 'should know its lexeme' do
27
+ expect(subject.lexeme).to eq(lexeme)
28
+ end
29
+
30
+ it 'should know its terminal' do
31
+ expect(subject.terminal).to eq(sample_terminal)
32
+ end
33
+ end # context
34
+
35
+ end # describe
36
+
37
+ end # module
38
+ end # module
39
+
40
+ # End of file
@@ -0,0 +1,149 @@
1
+ require_relative '../../spec_helper'
2
+
3
+ require_relative '../../../lib/rley/syntax/verbatim_symbol'
4
+ require_relative '../../../lib/rley/syntax/non_terminal'
5
+ require_relative '../../../lib/rley/syntax/production'
6
+
7
+ # Load the class under test
8
+ require_relative '../../../lib/rley/syntax/grammar'
9
+
10
+ module Rley # Open this namespace to avoid module qualifier prefixes
11
+ module Syntax # Open this namespace to avoid module qualifier prefixes
12
+
13
+ describe Grammar do
14
+
15
+ # Factory method. Builds a list of productions
16
+ # having same lhs and the symbols sequence
17
+ # in their rhs.
18
+ def alternate_prods(aNonTerminal, sequences)
19
+ prods = sequences.map do |symbs|
20
+ Production.new(aNonTerminal, symbs)
21
+ end
22
+
23
+ return prods
24
+ end
25
+
26
+ def build_verbatim_symbols(symbols)
27
+ result = {}
28
+ symbols.each { |symb| result[symb] = VerbatimSymbol.new(symb) }
29
+ result
30
+ end
31
+
32
+ # Grammar 1: arithmetical expressions with integers
33
+ let(:grm1_ops) do
34
+ operators = %w[+ - * / ( ) ]
35
+ build_verbatim_symbols(operators)
36
+ end
37
+
38
+ # Grammar symbols for integer arithmetic expressions
39
+ let(:number) { Literal.new('number', /\d+/)} # Limited to positive integers
40
+ let(:add_op) { NonTerminal.new('add_op') }
41
+ let(:add_operators) { [grm1_ops['+'], grm1_ops['-']] }
42
+ let(:mult_op) { NonTerminal.new('mult_op') }
43
+ let(:mult_operators) { [grm1_ops['*'], grm1_ops['/']] }
44
+ let(:factor) { NonTerminal.new('factor') }
45
+ let(:term) { NonTerminal.new('term') }
46
+ let(:expression) { NonTerminal.new('expression') }
47
+
48
+
49
+ # Productions for grammar 1
50
+ let(:add_op_prods) { alternate_prods(add_op, add_operators) }
51
+ let(:mult_op_prods) { alternate_prods(mult_op, mult_operators) }
52
+ let(:factor_prods) do
53
+ alternatives = [
54
+ [number],
55
+ [grm1_ops['-'], factor],
56
+ [grm1_ops['('], expression, grm1_ops[')']]
57
+ ]
58
+ alternate_prods(factor, alternatives)
59
+ end
60
+ let(:term_prods) do
61
+ alternatives = [[factor], [term, mult_op, factor]]
62
+ alternate_prods(term, alternatives)
63
+ end
64
+ let(:expression_prods) do
65
+ alternatives = [ [term], [expression, add_op, term]]
66
+ alternate_prods(expression, alternatives)
67
+ end
68
+
69
+ # Grammar 2: A very simple language
70
+ # S ::= A.
71
+ # A ::= "a" A "c".
72
+ # A ::= "b".
73
+ let(:nt_S) { NonTerminal.new('S') }
74
+ let(:nt_A) { NonTerminal.new('A') }
75
+ let(:a_) { VerbatimSymbol.new('a') }
76
+ let(:b_) { VerbatimSymbol.new('b') }
77
+ let(:c_) { VerbatimSymbol.new('c') }
78
+ let(:prod_S) { Production.new(nt_S, [nt_A]) }
79
+ let(:prod_A1) { Production.new(nt_A, [a_, nt_A, c_]) }
80
+ let(:prod_A2) { Production.new(nt_A, [b_]) }
81
+
82
+ =begin
83
+ # Non-terminals that specify the lexicon of the language
84
+ let(:noun) { NonTerminal.new('Noun') }
85
+ let(:noun_list) { %w(flights breeze trip morning) }
86
+ let(:verb) { NonTerminal.new('Verb') }
87
+ let(:verb_list) { %w(is prefer like need want fly) }
88
+ let(:adjective) { NonTerminal.new('Adjective') }
89
+ let(:adjective_list) { %w(cheapest non-stop first latest other direct) }
90
+ let(:pronoun) { NonTerminal.new('Pronoun') }
91
+ let(:pronoun_list) { %w(me I you it) }
92
+ let(:proper_noun) { NonTerminal.new('Proper_noun') }
93
+ let(:proper_noun_list) do [ 'Alaska', 'Baltimore', 'Los Angeles',
94
+ 'Chicago', 'United', 'American' ]
95
+ end
96
+ let(:determiner) { NonTerminal.new('Determiner') }
97
+ let(:determiner_list) { %w(the a an this these that) }
98
+ let(:preposition) { NonTerminal.new('Preposition') }
99
+ let(:preposition_list) { %w(from to on near) }
100
+ let(:conjunction) { NonTerminal.new('Conjunction') }
101
+ let(:conjunction_list) { %w(and or but) }
102
+
103
+
104
+
105
+
106
+ let(:noun_prods) { prods_for_list(noun, noun_list) }
107
+ let(:verb_prods) { prods_for_list(verb, verb_list) }
108
+ let(:adjective_prods) { prods_for_list(adjective, adjective_list) }
109
+ let(:pronoun_prods) { prods_for_list(pronoun, pronoun_list) }
110
+ let(:proper_pronoun_prods) do
111
+ prods_for_list(proper_pronoun, proper_pronoun_list)
112
+ end
113
+ let(:determiner_prods) { prods_for_list(determiner, determiner_list) }
114
+ let(:preposition_prods) { prods_for_list(preposition, preposition_list) }
115
+ let(:conjunction_prods) { prods_for_list(conjunction, conjunction_list) }
116
+
117
+ # Productions for the L0 language (from Jurafki & Martin)
118
+ let(:nominal_prods) { Production}
119
+ =end
120
+
121
+ context 'Initialization:' do
122
+ subject do
123
+ productions = [prod_S, prod_A1, prod_A2]
124
+ Grammar.new(productions)
125
+ end
126
+
127
+ it 'should be created with a list of productions' do
128
+ expect { Grammar.new([prod_S, prod_A1, prod_A2]) }.not_to raise_error
129
+ end
130
+
131
+ it 'should know its productions' do
132
+ expect(subject.rules).to eq([prod_S, prod_A1, prod_A2])
133
+ end
134
+
135
+ it 'should know its start symbol' do
136
+ expect(subject.start_symbol).to eq(nt_S)
137
+ end
138
+
139
+ it 'should know all its symbols' do
140
+ expect(subject.symbols).to eq([nt_S, nt_A, a_, c_, b_])
141
+ end
142
+ end # context
143
+
144
+ end # describe
145
+
146
+ end # module
147
+ end # module
148
+
149
+ # End of file