rley 0.3.06 → 0.3.07
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/parse_forest_builder.rb +4 -3
- data/spec/rley/parser/groucho_spec.rb +156 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b9b70296885dc161ee4fdedf19ff1c2e04256086
|
4
|
+
data.tar.gz: 993b5b46f0edf140140d4713161c88b3cc846ca6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c390b234b9412d2b48d92ad725bfad8860dea3c1b9d21ef75ffb6191f05cc23a3bccfa8975c6a79e9651b29fba345298e1c053454523390d118eeab9f8b6ad24
|
7
|
+
data.tar.gz: 5358fb0d0201b821be251d47f804dd9362ea970e4765462346658106ecf8e452e6088d239b0ce63073bc7bc9db9299018f523ac9024de6cac3b85e664ba36fe8
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
### 0.3.07 / 2016-11-08
|
2
|
+
* [FIX] The sharing a of forest node could be repeated in a production in a revisit event.
|
3
|
+
* [CHANGE] Method `ParseWalkerFactory#process_end_entry`. Added a guard condition to avoid repeated node sharing
|
4
|
+
* [NEW] RSpec file `ambiguous_parse_spec.rb` added in order to test the fix.
|
5
|
+
|
1
6
|
### 0.3.06 / 2016-11-06
|
2
7
|
* [FIX] There were missing links to shared parse forest nodes for ambiguous parses.
|
3
8
|
* [NEW] RSpec file `ambiguous_parse_spec.rb` added in order to test the parse forest building for an ambiguous parse.
|
data/lib/rley/constants.rb
CHANGED
@@ -92,9 +92,10 @@ module Rley # This module is used as a namespace
|
|
92
92
|
# Retrieve the already existing node corresponding
|
93
93
|
# to re-visited entry
|
94
94
|
popular = @entry2node[anEntry]
|
95
|
-
|
96
|
-
# Share with parent
|
97
|
-
curr_parent.
|
95
|
+
|
96
|
+
# Share with parent (if needed)...
|
97
|
+
children = curr_parent.subnodes
|
98
|
+
curr_parent.add_subnode(popular) unless children.include? popular
|
98
99
|
|
99
100
|
else
|
100
101
|
raise NotImplementedError
|
@@ -0,0 +1,156 @@
|
|
1
|
+
# Purpose: to test the parse forest generation for an emblematic
|
2
|
+
# ambiguous sentence
|
3
|
+
# Based on example found at: http://www.nltk.org/book_1ed/ch08.html
|
4
|
+
require_relative '../../spec_helper'
|
5
|
+
|
6
|
+
require_relative '../../../lib/rley/parser/gfg_earley_parser'
|
7
|
+
require_relative '../../../lib/rley/parser/parse_walker_factory'
|
8
|
+
|
9
|
+
require_relative '../support/grammar_helper'
|
10
|
+
require_relative '../support/expectation_helper'
|
11
|
+
require_relative '../support/grammar_ambig01_helper'
|
12
|
+
|
13
|
+
# Load the class under test
|
14
|
+
require_relative '../../../lib/rley/parser/parse_forest_builder'
|
15
|
+
|
16
|
+
module Rley # Open this namespace to avoid module qualifier prefixes
|
17
|
+
module Parser
|
18
|
+
describe 'Coping with a NLP ambiguous toy grammar' do
|
19
|
+
include GrammarHelper # Mix-in with token factory method
|
20
|
+
include ExpectationHelper # Mix-in with expectation on parse entry sets
|
21
|
+
|
22
|
+
let(:sample_grammar) do
|
23
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
24
|
+
builder.add_terminals('N', 'V', 'Pro') # N(oun), V(erb), Pro(noun)
|
25
|
+
builder.add_terminals('Det', 'P') # Det(erminer), P(reposition)
|
26
|
+
builder.add_production('S' => %w(NP VP))
|
27
|
+
builder.add_production('NP' => %w(Det N))
|
28
|
+
builder.add_production('NP' => %w(Det N PP))
|
29
|
+
builder.add_production('NP' => 'Pro')
|
30
|
+
builder.add_production('VP' => %w(V NP))
|
31
|
+
builder.add_production('VP' => %w(VP PP))
|
32
|
+
builder.add_production('PP' => %w(P NP))
|
33
|
+
builder.grammar
|
34
|
+
end
|
35
|
+
|
36
|
+
# The lexicon is just a Hash with pairs of the form:
|
37
|
+
# word => terminal symbol name
|
38
|
+
Groucho_lexicon = {
|
39
|
+
'elephant' => 'N',
|
40
|
+
'pajamas' => 'N',
|
41
|
+
'shot' => 'V',
|
42
|
+
'I' => 'Pro',
|
43
|
+
'an' => 'Det',
|
44
|
+
'my' => 'Det',
|
45
|
+
'in' => 'P'
|
46
|
+
}.freeze
|
47
|
+
|
48
|
+
# Highly simplified tokenizer implementation.
|
49
|
+
def tokenizer(aText, aGrammar)
|
50
|
+
tokens = aText.scan(/\S+/).map do |word|
|
51
|
+
term_name = Groucho_lexicon[word]
|
52
|
+
if term_name.nil?
|
53
|
+
raise StandardError, "Word '#{word}' not found in lexicon"
|
54
|
+
end
|
55
|
+
terminal = aGrammar.name2symbol[term_name]
|
56
|
+
Rley::Parser::Token.new(word, terminal)
|
57
|
+
end
|
58
|
+
|
59
|
+
return tokens
|
60
|
+
end
|
61
|
+
|
62
|
+
let(:sentence_tokens) do
|
63
|
+
sentence = 'I shot an elephant in my pajamas'
|
64
|
+
tokenizer(sentence, sample_grammar)
|
65
|
+
end
|
66
|
+
|
67
|
+
let(:sentence_result) do
|
68
|
+
parser = Parser::GFGEarleyParser.new(sample_grammar)
|
69
|
+
parser.parse(sentence_tokens)
|
70
|
+
end
|
71
|
+
|
72
|
+
# Emit a text representation of the current path.
|
73
|
+
def path_to_s()
|
74
|
+
text_parts = subject.curr_path.map do |path_element|
|
75
|
+
path_element.to_string(0)
|
76
|
+
end
|
77
|
+
return text_parts.join('/')
|
78
|
+
end
|
79
|
+
|
80
|
+
def next_event(eventType, anEntryText)
|
81
|
+
event = @walker.next
|
82
|
+
subject.receive_event(*event)
|
83
|
+
expect(event[0]).to eq(eventType)
|
84
|
+
expect(event[1].to_s).to eq(anEntryText)
|
85
|
+
end
|
86
|
+
|
87
|
+
def expected_curr_parent(anExpectation)
|
88
|
+
expect(subject.curr_parent.to_string(0)).to eq(anExpectation)
|
89
|
+
end
|
90
|
+
|
91
|
+
def expected_curr_path(anExpectation)
|
92
|
+
expect(path_to_s).to eq(anExpectation)
|
93
|
+
end
|
94
|
+
|
95
|
+
def expected_first_child(anExpectation)
|
96
|
+
child = subject.curr_parent.subnodes.first
|
97
|
+
expect(child.to_string(0)).to eq(anExpectation)
|
98
|
+
end
|
99
|
+
|
100
|
+
def root_children
|
101
|
+
subject.forest.root.subnodes
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
before(:each) do
|
106
|
+
factory = ParseWalkerFactory.new
|
107
|
+
accept_entry = sentence_result.accepting_entry
|
108
|
+
accept_index = sentence_result.chart.last_index
|
109
|
+
@walker = factory.build_walker(accept_entry, accept_index)
|
110
|
+
end
|
111
|
+
|
112
|
+
context 'Parse ambiguous sentence' do
|
113
|
+
|
114
|
+
subject { ParseForestBuilder.new(sentence_tokens) }
|
115
|
+
|
116
|
+
it 'should build a parse forest with a correct root node' do
|
117
|
+
next_event(:visit, 'S. | 0') # Event 1
|
118
|
+
expected_curr_path('S[0, 7]')
|
119
|
+
# Root node should have no child
|
120
|
+
expect(root_children.size).to be_zero
|
121
|
+
|
122
|
+
next_event(:visit, 'S => NP VP . | 0') # Event 2
|
123
|
+
expected_curr_path('S[0, 7]')
|
124
|
+
|
125
|
+
next_event(:visit, 'VP. | 1') # Event 3
|
126
|
+
expected_curr_path('S[0, 7]/VP[1, 7]')
|
127
|
+
# Root node should have one child
|
128
|
+
expect(root_children.size).to eq(1)
|
129
|
+
expect(root_children.first.to_string(0)).to eq('VP[1, 7]')
|
130
|
+
|
131
|
+
25.times do
|
132
|
+
event = @walker.next
|
133
|
+
subject.receive_event(*event)
|
134
|
+
end
|
135
|
+
|
136
|
+
next_event(:visit, 'NP. | 0') # Event 29
|
137
|
+
expected_curr_path('S[0, 7]/NP[0, 1]')
|
138
|
+
# Root node should have two children
|
139
|
+
expect(root_children.size).to eq(2)
|
140
|
+
expect(root_children.first.to_string(0)).to eq('NP[0, 1]')
|
141
|
+
|
142
|
+
18.times do
|
143
|
+
event = @walker.next
|
144
|
+
subject.receive_event(*event)
|
145
|
+
end
|
146
|
+
|
147
|
+
next_event(:revisit, 'NP. | 0') # Event 48
|
148
|
+
expected_curr_path('S[0, 7]')
|
149
|
+
# Root node should still have two children
|
150
|
+
expect(root_children.size).to eq(2)
|
151
|
+
end
|
152
|
+
end # context
|
153
|
+
end # describe
|
154
|
+
end # module
|
155
|
+
end # module
|
156
|
+
# End of file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.07
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-11-
|
11
|
+
date: 2016-11-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -233,6 +233,7 @@ files:
|
|
233
233
|
- spec/rley/parser/gfg_earley_parser_spec.rb
|
234
234
|
- spec/rley/parser/gfg_parsing_spec.rb
|
235
235
|
- spec/rley/parser/grm_items_builder_spec.rb
|
236
|
+
- spec/rley/parser/groucho_spec.rb
|
236
237
|
- spec/rley/parser/parse_entry_set_spec.rb
|
237
238
|
- spec/rley/parser/parse_entry_spec.rb
|
238
239
|
- spec/rley/parser/parse_forest_builder_spec.rb
|
@@ -320,6 +321,7 @@ test_files:
|
|
320
321
|
- spec/rley/parser/gfg_earley_parser_spec.rb
|
321
322
|
- spec/rley/parser/gfg_parsing_spec.rb
|
322
323
|
- spec/rley/parser/grm_items_builder_spec.rb
|
324
|
+
- spec/rley/parser/groucho_spec.rb
|
323
325
|
- spec/rley/parser/parse_entry_set_spec.rb
|
324
326
|
- spec/rley/parser/parse_entry_spec.rb
|
325
327
|
- spec/rley/parser/parse_forest_builder_spec.rb
|