rley 0.3.06 → 0.3.07

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1bbd7b9c764c5f24a73b8f3f158e6c25f6f7ccd6
4
- data.tar.gz: 63676212ceb56eb1b09f511a6fade48abcd7e3ab
3
+ metadata.gz: b9b70296885dc161ee4fdedf19ff1c2e04256086
4
+ data.tar.gz: 993b5b46f0edf140140d4713161c88b3cc846ca6
5
5
  SHA512:
6
- metadata.gz: 2a17d1c3447e72f59f4f8268b7e90ccfda098fb0769b73219866d42fa77095e2e06f6bc980e6b0e5928ca1217c9c1eb8b5b973ff12b511fa3327745353caf5bd
7
- data.tar.gz: e03b4dc4cebf8fca9049e367641248601850398ed77a725ab92e3cb22f2552224dc143a8121d142f0e38522a5cd1dc5ef570341542d0b39845a3ab7b02596a97
6
+ metadata.gz: c390b234b9412d2b48d92ad725bfad8860dea3c1b9d21ef75ffb6191f05cc23a3bccfa8975c6a79e9651b29fba345298e1c053454523390d118eeab9f8b6ad24
7
+ data.tar.gz: 5358fb0d0201b821be251d47f804dd9362ea970e4765462346658106ecf8e452e6088d239b0ce63073bc7bc9db9299018f523ac9024de6cac3b85e664ba36fe8
@@ -1,3 +1,8 @@
1
+ ### 0.3.07 / 2016-11-08
2
+ * [FIX] The sharing a of forest node could be repeated in a production in a revisit event.
3
+ * [CHANGE] Method `ParseWalkerFactory#process_end_entry`. Added a guard condition to avoid repeated node sharing
4
+ * [NEW] RSpec file `ambiguous_parse_spec.rb` added in order to test the fix.
5
+
1
6
  ### 0.3.06 / 2016-11-06
2
7
  * [FIX] There were missing links to shared parse forest nodes for ambiguous parses.
3
8
  * [NEW] RSpec file `ambiguous_parse_spec.rb` added in order to test the parse forest building for an ambiguous parse.
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.3.06'.freeze
6
+ Version = '0.3.07'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -92,9 +92,10 @@ module Rley # This module is used as a namespace
92
92
  # Retrieve the already existing node corresponding
93
93
  # to re-visited entry
94
94
  popular = @entry2node[anEntry]
95
-
96
- # Share with parent
97
- curr_parent.add_subnode(popular)
95
+
96
+ # Share with parent (if needed)...
97
+ children = curr_parent.subnodes
98
+ curr_parent.add_subnode(popular) unless children.include? popular
98
99
 
99
100
  else
100
101
  raise NotImplementedError
@@ -0,0 +1,156 @@
1
+ # Purpose: to test the parse forest generation for an emblematic
2
+ # ambiguous sentence
3
+ # Based on example found at: http://www.nltk.org/book_1ed/ch08.html
4
+ require_relative '../../spec_helper'
5
+
6
+ require_relative '../../../lib/rley/parser/gfg_earley_parser'
7
+ require_relative '../../../lib/rley/parser/parse_walker_factory'
8
+
9
+ require_relative '../support/grammar_helper'
10
+ require_relative '../support/expectation_helper'
11
+ require_relative '../support/grammar_ambig01_helper'
12
+
13
+ # Load the class under test
14
+ require_relative '../../../lib/rley/parser/parse_forest_builder'
15
+
16
+ module Rley # Open this namespace to avoid module qualifier prefixes
17
+ module Parser
18
+ describe 'Coping with a NLP ambiguous toy grammar' do
19
+ include GrammarHelper # Mix-in with token factory method
20
+ include ExpectationHelper # Mix-in with expectation on parse entry sets
21
+
22
+ let(:sample_grammar) do
23
+ builder = Rley::Syntax::GrammarBuilder.new
24
+ builder.add_terminals('N', 'V', 'Pro') # N(oun), V(erb), Pro(noun)
25
+ builder.add_terminals('Det', 'P') # Det(erminer), P(reposition)
26
+ builder.add_production('S' => %w(NP VP))
27
+ builder.add_production('NP' => %w(Det N))
28
+ builder.add_production('NP' => %w(Det N PP))
29
+ builder.add_production('NP' => 'Pro')
30
+ builder.add_production('VP' => %w(V NP))
31
+ builder.add_production('VP' => %w(VP PP))
32
+ builder.add_production('PP' => %w(P NP))
33
+ builder.grammar
34
+ end
35
+
36
+ # The lexicon is just a Hash with pairs of the form:
37
+ # word => terminal symbol name
38
+ Groucho_lexicon = {
39
+ 'elephant' => 'N',
40
+ 'pajamas' => 'N',
41
+ 'shot' => 'V',
42
+ 'I' => 'Pro',
43
+ 'an' => 'Det',
44
+ 'my' => 'Det',
45
+ 'in' => 'P'
46
+ }.freeze
47
+
48
+ # Highly simplified tokenizer implementation.
49
+ def tokenizer(aText, aGrammar)
50
+ tokens = aText.scan(/\S+/).map do |word|
51
+ term_name = Groucho_lexicon[word]
52
+ if term_name.nil?
53
+ raise StandardError, "Word '#{word}' not found in lexicon"
54
+ end
55
+ terminal = aGrammar.name2symbol[term_name]
56
+ Rley::Parser::Token.new(word, terminal)
57
+ end
58
+
59
+ return tokens
60
+ end
61
+
62
+ let(:sentence_tokens) do
63
+ sentence = 'I shot an elephant in my pajamas'
64
+ tokenizer(sentence, sample_grammar)
65
+ end
66
+
67
+ let(:sentence_result) do
68
+ parser = Parser::GFGEarleyParser.new(sample_grammar)
69
+ parser.parse(sentence_tokens)
70
+ end
71
+
72
+ # Emit a text representation of the current path.
73
+ def path_to_s()
74
+ text_parts = subject.curr_path.map do |path_element|
75
+ path_element.to_string(0)
76
+ end
77
+ return text_parts.join('/')
78
+ end
79
+
80
+ def next_event(eventType, anEntryText)
81
+ event = @walker.next
82
+ subject.receive_event(*event)
83
+ expect(event[0]).to eq(eventType)
84
+ expect(event[1].to_s).to eq(anEntryText)
85
+ end
86
+
87
+ def expected_curr_parent(anExpectation)
88
+ expect(subject.curr_parent.to_string(0)).to eq(anExpectation)
89
+ end
90
+
91
+ def expected_curr_path(anExpectation)
92
+ expect(path_to_s).to eq(anExpectation)
93
+ end
94
+
95
+ def expected_first_child(anExpectation)
96
+ child = subject.curr_parent.subnodes.first
97
+ expect(child.to_string(0)).to eq(anExpectation)
98
+ end
99
+
100
+ def root_children
101
+ subject.forest.root.subnodes
102
+ end
103
+
104
+
105
+ before(:each) do
106
+ factory = ParseWalkerFactory.new
107
+ accept_entry = sentence_result.accepting_entry
108
+ accept_index = sentence_result.chart.last_index
109
+ @walker = factory.build_walker(accept_entry, accept_index)
110
+ end
111
+
112
+ context 'Parse ambiguous sentence' do
113
+
114
+ subject { ParseForestBuilder.new(sentence_tokens) }
115
+
116
+ it 'should build a parse forest with a correct root node' do
117
+ next_event(:visit, 'S. | 0') # Event 1
118
+ expected_curr_path('S[0, 7]')
119
+ # Root node should have no child
120
+ expect(root_children.size).to be_zero
121
+
122
+ next_event(:visit, 'S => NP VP . | 0') # Event 2
123
+ expected_curr_path('S[0, 7]')
124
+
125
+ next_event(:visit, 'VP. | 1') # Event 3
126
+ expected_curr_path('S[0, 7]/VP[1, 7]')
127
+ # Root node should have one child
128
+ expect(root_children.size).to eq(1)
129
+ expect(root_children.first.to_string(0)).to eq('VP[1, 7]')
130
+
131
+ 25.times do
132
+ event = @walker.next
133
+ subject.receive_event(*event)
134
+ end
135
+
136
+ next_event(:visit, 'NP. | 0') # Event 29
137
+ expected_curr_path('S[0, 7]/NP[0, 1]')
138
+ # Root node should have two children
139
+ expect(root_children.size).to eq(2)
140
+ expect(root_children.first.to_string(0)).to eq('NP[0, 1]')
141
+
142
+ 18.times do
143
+ event = @walker.next
144
+ subject.receive_event(*event)
145
+ end
146
+
147
+ next_event(:revisit, 'NP. | 0') # Event 48
148
+ expected_curr_path('S[0, 7]')
149
+ # Root node should still have two children
150
+ expect(root_children.size).to eq(2)
151
+ end
152
+ end # context
153
+ end # describe
154
+ end # module
155
+ end # module
156
+ # End of file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.06
4
+ version: 0.3.07
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-06 00:00:00.000000000 Z
11
+ date: 2016-11-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -233,6 +233,7 @@ files:
233
233
  - spec/rley/parser/gfg_earley_parser_spec.rb
234
234
  - spec/rley/parser/gfg_parsing_spec.rb
235
235
  - spec/rley/parser/grm_items_builder_spec.rb
236
+ - spec/rley/parser/groucho_spec.rb
236
237
  - spec/rley/parser/parse_entry_set_spec.rb
237
238
  - spec/rley/parser/parse_entry_spec.rb
238
239
  - spec/rley/parser/parse_forest_builder_spec.rb
@@ -320,6 +321,7 @@ test_files:
320
321
  - spec/rley/parser/gfg_earley_parser_spec.rb
321
322
  - spec/rley/parser/gfg_parsing_spec.rb
322
323
  - spec/rley/parser/grm_items_builder_spec.rb
324
+ - spec/rley/parser/groucho_spec.rb
323
325
  - spec/rley/parser/parse_entry_set_spec.rb
324
326
  - spec/rley/parser/parse_entry_spec.rb
325
327
  - spec/rley/parser/parse_forest_builder_spec.rb