rley 0.3.06 → 0.3.07
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/parse_forest_builder.rb +4 -3
- data/spec/rley/parser/groucho_spec.rb +156 -0
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b9b70296885dc161ee4fdedf19ff1c2e04256086
|
4
|
+
data.tar.gz: 993b5b46f0edf140140d4713161c88b3cc846ca6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c390b234b9412d2b48d92ad725bfad8860dea3c1b9d21ef75ffb6191f05cc23a3bccfa8975c6a79e9651b29fba345298e1c053454523390d118eeab9f8b6ad24
|
7
|
+
data.tar.gz: 5358fb0d0201b821be251d47f804dd9362ea970e4765462346658106ecf8e452e6088d239b0ce63073bc7bc9db9299018f523ac9024de6cac3b85e664ba36fe8
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
### 0.3.07 / 2016-11-08
|
2
|
+
* [FIX] The sharing a of forest node could be repeated in a production in a revisit event.
|
3
|
+
* [CHANGE] Method `ParseWalkerFactory#process_end_entry`. Added a guard condition to avoid repeated node sharing
|
4
|
+
* [NEW] RSpec file `ambiguous_parse_spec.rb` added in order to test the fix.
|
5
|
+
|
1
6
|
### 0.3.06 / 2016-11-06
|
2
7
|
* [FIX] There were missing links to shared parse forest nodes for ambiguous parses.
|
3
8
|
* [NEW] RSpec file `ambiguous_parse_spec.rb` added in order to test the parse forest building for an ambiguous parse.
|
data/lib/rley/constants.rb
CHANGED
@@ -92,9 +92,10 @@ module Rley # This module is used as a namespace
|
|
92
92
|
# Retrieve the already existing node corresponding
|
93
93
|
# to re-visited entry
|
94
94
|
popular = @entry2node[anEntry]
|
95
|
-
|
96
|
-
# Share with parent
|
97
|
-
curr_parent.
|
95
|
+
|
96
|
+
# Share with parent (if needed)...
|
97
|
+
children = curr_parent.subnodes
|
98
|
+
curr_parent.add_subnode(popular) unless children.include? popular
|
98
99
|
|
99
100
|
else
|
100
101
|
raise NotImplementedError
|
@@ -0,0 +1,156 @@
|
|
1
|
+
# Purpose: to test the parse forest generation for an emblematic
|
2
|
+
# ambiguous sentence
|
3
|
+
# Based on example found at: http://www.nltk.org/book_1ed/ch08.html
|
4
|
+
require_relative '../../spec_helper'
|
5
|
+
|
6
|
+
require_relative '../../../lib/rley/parser/gfg_earley_parser'
|
7
|
+
require_relative '../../../lib/rley/parser/parse_walker_factory'
|
8
|
+
|
9
|
+
require_relative '../support/grammar_helper'
|
10
|
+
require_relative '../support/expectation_helper'
|
11
|
+
require_relative '../support/grammar_ambig01_helper'
|
12
|
+
|
13
|
+
# Load the class under test
|
14
|
+
require_relative '../../../lib/rley/parser/parse_forest_builder'
|
15
|
+
|
16
|
+
module Rley # Open this namespace to avoid module qualifier prefixes
|
17
|
+
module Parser
|
18
|
+
describe 'Coping with a NLP ambiguous toy grammar' do
|
19
|
+
include GrammarHelper # Mix-in with token factory method
|
20
|
+
include ExpectationHelper # Mix-in with expectation on parse entry sets
|
21
|
+
|
22
|
+
let(:sample_grammar) do
|
23
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
24
|
+
builder.add_terminals('N', 'V', 'Pro') # N(oun), V(erb), Pro(noun)
|
25
|
+
builder.add_terminals('Det', 'P') # Det(erminer), P(reposition)
|
26
|
+
builder.add_production('S' => %w(NP VP))
|
27
|
+
builder.add_production('NP' => %w(Det N))
|
28
|
+
builder.add_production('NP' => %w(Det N PP))
|
29
|
+
builder.add_production('NP' => 'Pro')
|
30
|
+
builder.add_production('VP' => %w(V NP))
|
31
|
+
builder.add_production('VP' => %w(VP PP))
|
32
|
+
builder.add_production('PP' => %w(P NP))
|
33
|
+
builder.grammar
|
34
|
+
end
|
35
|
+
|
36
|
+
# The lexicon is just a Hash with pairs of the form:
|
37
|
+
# word => terminal symbol name
|
38
|
+
Groucho_lexicon = {
|
39
|
+
'elephant' => 'N',
|
40
|
+
'pajamas' => 'N',
|
41
|
+
'shot' => 'V',
|
42
|
+
'I' => 'Pro',
|
43
|
+
'an' => 'Det',
|
44
|
+
'my' => 'Det',
|
45
|
+
'in' => 'P'
|
46
|
+
}.freeze
|
47
|
+
|
48
|
+
# Highly simplified tokenizer implementation.
|
49
|
+
def tokenizer(aText, aGrammar)
|
50
|
+
tokens = aText.scan(/\S+/).map do |word|
|
51
|
+
term_name = Groucho_lexicon[word]
|
52
|
+
if term_name.nil?
|
53
|
+
raise StandardError, "Word '#{word}' not found in lexicon"
|
54
|
+
end
|
55
|
+
terminal = aGrammar.name2symbol[term_name]
|
56
|
+
Rley::Parser::Token.new(word, terminal)
|
57
|
+
end
|
58
|
+
|
59
|
+
return tokens
|
60
|
+
end
|
61
|
+
|
62
|
+
let(:sentence_tokens) do
|
63
|
+
sentence = 'I shot an elephant in my pajamas'
|
64
|
+
tokenizer(sentence, sample_grammar)
|
65
|
+
end
|
66
|
+
|
67
|
+
let(:sentence_result) do
|
68
|
+
parser = Parser::GFGEarleyParser.new(sample_grammar)
|
69
|
+
parser.parse(sentence_tokens)
|
70
|
+
end
|
71
|
+
|
72
|
+
# Emit a text representation of the current path.
|
73
|
+
def path_to_s()
|
74
|
+
text_parts = subject.curr_path.map do |path_element|
|
75
|
+
path_element.to_string(0)
|
76
|
+
end
|
77
|
+
return text_parts.join('/')
|
78
|
+
end
|
79
|
+
|
80
|
+
def next_event(eventType, anEntryText)
|
81
|
+
event = @walker.next
|
82
|
+
subject.receive_event(*event)
|
83
|
+
expect(event[0]).to eq(eventType)
|
84
|
+
expect(event[1].to_s).to eq(anEntryText)
|
85
|
+
end
|
86
|
+
|
87
|
+
def expected_curr_parent(anExpectation)
|
88
|
+
expect(subject.curr_parent.to_string(0)).to eq(anExpectation)
|
89
|
+
end
|
90
|
+
|
91
|
+
def expected_curr_path(anExpectation)
|
92
|
+
expect(path_to_s).to eq(anExpectation)
|
93
|
+
end
|
94
|
+
|
95
|
+
def expected_first_child(anExpectation)
|
96
|
+
child = subject.curr_parent.subnodes.first
|
97
|
+
expect(child.to_string(0)).to eq(anExpectation)
|
98
|
+
end
|
99
|
+
|
100
|
+
def root_children
|
101
|
+
subject.forest.root.subnodes
|
102
|
+
end
|
103
|
+
|
104
|
+
|
105
|
+
before(:each) do
|
106
|
+
factory = ParseWalkerFactory.new
|
107
|
+
accept_entry = sentence_result.accepting_entry
|
108
|
+
accept_index = sentence_result.chart.last_index
|
109
|
+
@walker = factory.build_walker(accept_entry, accept_index)
|
110
|
+
end
|
111
|
+
|
112
|
+
context 'Parse ambiguous sentence' do
|
113
|
+
|
114
|
+
subject { ParseForestBuilder.new(sentence_tokens) }
|
115
|
+
|
116
|
+
it 'should build a parse forest with a correct root node' do
|
117
|
+
next_event(:visit, 'S. | 0') # Event 1
|
118
|
+
expected_curr_path('S[0, 7]')
|
119
|
+
# Root node should have no child
|
120
|
+
expect(root_children.size).to be_zero
|
121
|
+
|
122
|
+
next_event(:visit, 'S => NP VP . | 0') # Event 2
|
123
|
+
expected_curr_path('S[0, 7]')
|
124
|
+
|
125
|
+
next_event(:visit, 'VP. | 1') # Event 3
|
126
|
+
expected_curr_path('S[0, 7]/VP[1, 7]')
|
127
|
+
# Root node should have one child
|
128
|
+
expect(root_children.size).to eq(1)
|
129
|
+
expect(root_children.first.to_string(0)).to eq('VP[1, 7]')
|
130
|
+
|
131
|
+
25.times do
|
132
|
+
event = @walker.next
|
133
|
+
subject.receive_event(*event)
|
134
|
+
end
|
135
|
+
|
136
|
+
next_event(:visit, 'NP. | 0') # Event 29
|
137
|
+
expected_curr_path('S[0, 7]/NP[0, 1]')
|
138
|
+
# Root node should have two children
|
139
|
+
expect(root_children.size).to eq(2)
|
140
|
+
expect(root_children.first.to_string(0)).to eq('NP[0, 1]')
|
141
|
+
|
142
|
+
18.times do
|
143
|
+
event = @walker.next
|
144
|
+
subject.receive_event(*event)
|
145
|
+
end
|
146
|
+
|
147
|
+
next_event(:revisit, 'NP. | 0') # Event 48
|
148
|
+
expected_curr_path('S[0, 7]')
|
149
|
+
# Root node should still have two children
|
150
|
+
expect(root_children.size).to eq(2)
|
151
|
+
end
|
152
|
+
end # context
|
153
|
+
end # describe
|
154
|
+
end # module
|
155
|
+
end # module
|
156
|
+
# End of file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.07
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-11-
|
11
|
+
date: 2016-11-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -233,6 +233,7 @@ files:
|
|
233
233
|
- spec/rley/parser/gfg_earley_parser_spec.rb
|
234
234
|
- spec/rley/parser/gfg_parsing_spec.rb
|
235
235
|
- spec/rley/parser/grm_items_builder_spec.rb
|
236
|
+
- spec/rley/parser/groucho_spec.rb
|
236
237
|
- spec/rley/parser/parse_entry_set_spec.rb
|
237
238
|
- spec/rley/parser/parse_entry_spec.rb
|
238
239
|
- spec/rley/parser/parse_forest_builder_spec.rb
|
@@ -320,6 +321,7 @@ test_files:
|
|
320
321
|
- spec/rley/parser/gfg_earley_parser_spec.rb
|
321
322
|
- spec/rley/parser/gfg_parsing_spec.rb
|
322
323
|
- spec/rley/parser/grm_items_builder_spec.rb
|
324
|
+
- spec/rley/parser/groucho_spec.rb
|
323
325
|
- spec/rley/parser/parse_entry_set_spec.rb
|
324
326
|
- spec/rley/parser/parse_entry_spec.rb
|
325
327
|
- spec/rley/parser/parse_forest_builder_spec.rb
|