rley 0.3.06 → 0.3.07

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1bbd7b9c764c5f24a73b8f3f158e6c25f6f7ccd6
4
- data.tar.gz: 63676212ceb56eb1b09f511a6fade48abcd7e3ab
3
+ metadata.gz: b9b70296885dc161ee4fdedf19ff1c2e04256086
4
+ data.tar.gz: 993b5b46f0edf140140d4713161c88b3cc846ca6
5
5
  SHA512:
6
- metadata.gz: 2a17d1c3447e72f59f4f8268b7e90ccfda098fb0769b73219866d42fa77095e2e06f6bc980e6b0e5928ca1217c9c1eb8b5b973ff12b511fa3327745353caf5bd
7
- data.tar.gz: e03b4dc4cebf8fca9049e367641248601850398ed77a725ab92e3cb22f2552224dc143a8121d142f0e38522a5cd1dc5ef570341542d0b39845a3ab7b02596a97
6
+ metadata.gz: c390b234b9412d2b48d92ad725bfad8860dea3c1b9d21ef75ffb6191f05cc23a3bccfa8975c6a79e9651b29fba345298e1c053454523390d118eeab9f8b6ad24
7
+ data.tar.gz: 5358fb0d0201b821be251d47f804dd9362ea970e4765462346658106ecf8e452e6088d239b0ce63073bc7bc9db9299018f523ac9024de6cac3b85e664ba36fe8
@@ -1,3 +1,8 @@
1
+ ### 0.3.07 / 2016-11-08
2
+ * [FIX] The sharing a of forest node could be repeated in a production in a revisit event.
3
+ * [CHANGE] Method `ParseWalkerFactory#process_end_entry`. Added a guard condition to avoid repeated node sharing
4
+ * [NEW] RSpec file `ambiguous_parse_spec.rb` added in order to test the fix.
5
+
1
6
  ### 0.3.06 / 2016-11-06
2
7
  * [FIX] There were missing links to shared parse forest nodes for ambiguous parses.
3
8
  * [NEW] RSpec file `ambiguous_parse_spec.rb` added in order to test the parse forest building for an ambiguous parse.
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.3.06'.freeze
6
+ Version = '0.3.07'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -92,9 +92,10 @@ module Rley # This module is used as a namespace
92
92
  # Retrieve the already existing node corresponding
93
93
  # to re-visited entry
94
94
  popular = @entry2node[anEntry]
95
-
96
- # Share with parent
97
- curr_parent.add_subnode(popular)
95
+
96
+ # Share with parent (if needed)...
97
+ children = curr_parent.subnodes
98
+ curr_parent.add_subnode(popular) unless children.include? popular
98
99
 
99
100
  else
100
101
  raise NotImplementedError
@@ -0,0 +1,156 @@
1
+ # Purpose: to test the parse forest generation for an emblematic
2
+ # ambiguous sentence
3
+ # Based on example found at: http://www.nltk.org/book_1ed/ch08.html
4
+ require_relative '../../spec_helper'
5
+
6
+ require_relative '../../../lib/rley/parser/gfg_earley_parser'
7
+ require_relative '../../../lib/rley/parser/parse_walker_factory'
8
+
9
+ require_relative '../support/grammar_helper'
10
+ require_relative '../support/expectation_helper'
11
+ require_relative '../support/grammar_ambig01_helper'
12
+
13
+ # Load the class under test
14
+ require_relative '../../../lib/rley/parser/parse_forest_builder'
15
+
16
+ module Rley # Open this namespace to avoid module qualifier prefixes
17
+ module Parser
18
+ describe 'Coping with a NLP ambiguous toy grammar' do
19
+ include GrammarHelper # Mix-in with token factory method
20
+ include ExpectationHelper # Mix-in with expectation on parse entry sets
21
+
22
+ let(:sample_grammar) do
23
+ builder = Rley::Syntax::GrammarBuilder.new
24
+ builder.add_terminals('N', 'V', 'Pro') # N(oun), V(erb), Pro(noun)
25
+ builder.add_terminals('Det', 'P') # Det(erminer), P(reposition)
26
+ builder.add_production('S' => %w(NP VP))
27
+ builder.add_production('NP' => %w(Det N))
28
+ builder.add_production('NP' => %w(Det N PP))
29
+ builder.add_production('NP' => 'Pro')
30
+ builder.add_production('VP' => %w(V NP))
31
+ builder.add_production('VP' => %w(VP PP))
32
+ builder.add_production('PP' => %w(P NP))
33
+ builder.grammar
34
+ end
35
+
36
+ # The lexicon is just a Hash with pairs of the form:
37
+ # word => terminal symbol name
38
+ Groucho_lexicon = {
39
+ 'elephant' => 'N',
40
+ 'pajamas' => 'N',
41
+ 'shot' => 'V',
42
+ 'I' => 'Pro',
43
+ 'an' => 'Det',
44
+ 'my' => 'Det',
45
+ 'in' => 'P'
46
+ }.freeze
47
+
48
+ # Highly simplified tokenizer implementation.
49
+ def tokenizer(aText, aGrammar)
50
+ tokens = aText.scan(/\S+/).map do |word|
51
+ term_name = Groucho_lexicon[word]
52
+ if term_name.nil?
53
+ raise StandardError, "Word '#{word}' not found in lexicon"
54
+ end
55
+ terminal = aGrammar.name2symbol[term_name]
56
+ Rley::Parser::Token.new(word, terminal)
57
+ end
58
+
59
+ return tokens
60
+ end
61
+
62
+ let(:sentence_tokens) do
63
+ sentence = 'I shot an elephant in my pajamas'
64
+ tokenizer(sentence, sample_grammar)
65
+ end
66
+
67
+ let(:sentence_result) do
68
+ parser = Parser::GFGEarleyParser.new(sample_grammar)
69
+ parser.parse(sentence_tokens)
70
+ end
71
+
72
+ # Emit a text representation of the current path.
73
+ def path_to_s()
74
+ text_parts = subject.curr_path.map do |path_element|
75
+ path_element.to_string(0)
76
+ end
77
+ return text_parts.join('/')
78
+ end
79
+
80
+ def next_event(eventType, anEntryText)
81
+ event = @walker.next
82
+ subject.receive_event(*event)
83
+ expect(event[0]).to eq(eventType)
84
+ expect(event[1].to_s).to eq(anEntryText)
85
+ end
86
+
87
+ def expected_curr_parent(anExpectation)
88
+ expect(subject.curr_parent.to_string(0)).to eq(anExpectation)
89
+ end
90
+
91
+ def expected_curr_path(anExpectation)
92
+ expect(path_to_s).to eq(anExpectation)
93
+ end
94
+
95
+ def expected_first_child(anExpectation)
96
+ child = subject.curr_parent.subnodes.first
97
+ expect(child.to_string(0)).to eq(anExpectation)
98
+ end
99
+
100
+ def root_children
101
+ subject.forest.root.subnodes
102
+ end
103
+
104
+
105
+ before(:each) do
106
+ factory = ParseWalkerFactory.new
107
+ accept_entry = sentence_result.accepting_entry
108
+ accept_index = sentence_result.chart.last_index
109
+ @walker = factory.build_walker(accept_entry, accept_index)
110
+ end
111
+
112
+ context 'Parse ambiguous sentence' do
113
+
114
+ subject { ParseForestBuilder.new(sentence_tokens) }
115
+
116
+ it 'should build a parse forest with a correct root node' do
117
+ next_event(:visit, 'S. | 0') # Event 1
118
+ expected_curr_path('S[0, 7]')
119
+ # Root node should have no child
120
+ expect(root_children.size).to be_zero
121
+
122
+ next_event(:visit, 'S => NP VP . | 0') # Event 2
123
+ expected_curr_path('S[0, 7]')
124
+
125
+ next_event(:visit, 'VP. | 1') # Event 3
126
+ expected_curr_path('S[0, 7]/VP[1, 7]')
127
+ # Root node should have one child
128
+ expect(root_children.size).to eq(1)
129
+ expect(root_children.first.to_string(0)).to eq('VP[1, 7]')
130
+
131
+ 25.times do
132
+ event = @walker.next
133
+ subject.receive_event(*event)
134
+ end
135
+
136
+ next_event(:visit, 'NP. | 0') # Event 29
137
+ expected_curr_path('S[0, 7]/NP[0, 1]')
138
+ # Root node should have two children
139
+ expect(root_children.size).to eq(2)
140
+ expect(root_children.first.to_string(0)).to eq('NP[0, 1]')
141
+
142
+ 18.times do
143
+ event = @walker.next
144
+ subject.receive_event(*event)
145
+ end
146
+
147
+ next_event(:revisit, 'NP. | 0') # Event 48
148
+ expected_curr_path('S[0, 7]')
149
+ # Root node should still have two children
150
+ expect(root_children.size).to eq(2)
151
+ end
152
+ end # context
153
+ end # describe
154
+ end # module
155
+ end # module
156
+ # End of file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.06
4
+ version: 0.3.07
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-06 00:00:00.000000000 Z
11
+ date: 2016-11-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -233,6 +233,7 @@ files:
233
233
  - spec/rley/parser/gfg_earley_parser_spec.rb
234
234
  - spec/rley/parser/gfg_parsing_spec.rb
235
235
  - spec/rley/parser/grm_items_builder_spec.rb
236
+ - spec/rley/parser/groucho_spec.rb
236
237
  - spec/rley/parser/parse_entry_set_spec.rb
237
238
  - spec/rley/parser/parse_entry_spec.rb
238
239
  - spec/rley/parser/parse_forest_builder_spec.rb
@@ -320,6 +321,7 @@ test_files:
320
321
  - spec/rley/parser/gfg_earley_parser_spec.rb
321
322
  - spec/rley/parser/gfg_parsing_spec.rb
322
323
  - spec/rley/parser/grm_items_builder_spec.rb
324
+ - spec/rley/parser/groucho_spec.rb
323
325
  - spec/rley/parser/parse_entry_set_spec.rb
324
326
  - spec/rley/parser/parse_entry_spec.rb
325
327
  - spec/rley/parser/parse_forest_builder_spec.rb