rley 0.3.09 → 0.3.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +16 -19
- data/examples/NLP/mini_en_demo.rb +15 -15
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/parser/parse_forest_builder.rb +21 -20
- data/lib/rley/sppf/parse_forest.rb +12 -0
- data/lib/rley/syntax/grammar_builder.rb +6 -1
- data/spec/rley/parser/parse_forest_factory_spec.rb +1 -1
- data/spec/rley/syntax/grammar_builder_spec.rb +30 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20ceec50ffa7359dbf635a00184c604654cdf9bb
|
4
|
+
data.tar.gz: 76e9432f62d88127257c66277e21be37fd1b00bd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a80c8e38ede7dd3908a52b73fd8fa13fc7087537d8272c8d648764902cb29f0d5db23c157a2b704eb66e2c8322d7e2ee4822dfd36fadb25ac22e6ec9f517f3eb
|
7
|
+
data.tar.gz: 9d23a806c510790c6fcc19566520e11c9292d642b2debe20784c4a884d646c173a789ae3060345ad659698554fce6748078ef7d5cc2353f7e1c3495ecd156104
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
### 0.3.10 / 2016-12-04
|
2
|
+
* [NEW] Method `ParseForest#ambiguous?`. Indicates whether the parse is ambiguous.
|
3
|
+
* [CHANGE] File `README.md` updated with new grammar builder syntax & typo fixes.
|
4
|
+
* [CHANGE] Method `GrammarBuilder#initialize`: Accepts a block argument that allows lighter construction.
|
5
|
+
|
6
|
+
### 0.3.09 / 2016-11-27
|
7
|
+
* [CHANGE] File `README.md` fully rewritten and added an example.
|
8
|
+
* [CHANGE] Directory `examples` completely reorganized.
|
9
|
+
|
1
10
|
### 0.3.09 / 2016-11-27
|
2
11
|
* [CHANGE] File `README.md` fully rewritten and added an example.
|
3
12
|
* [CHANGE] Directory `examples` completely reorganized.
|
data/README.md
CHANGED
@@ -9,7 +9,7 @@
|
|
9
9
|
[Rley](https://github.com/famished-tiger/Rley)
|
10
10
|
======
|
11
11
|
|
12
|
-
A Ruby library for constructing general parsers for _any_ context-free
|
12
|
+
A Ruby library for constructing general parsers for _any_ context-free language.
|
13
13
|
|
14
14
|
|
15
15
|
What is Rley?
|
@@ -69,7 +69,7 @@ The tour is organized into the following steps:
|
|
69
69
|
1. [Defining the language grammar](#defining-the-language-grammar)
|
70
70
|
2. [Creating a lexicon](#creating-a-lexicon)
|
71
71
|
3. [Creating a tokenizer](#creating-a-tokenizer)
|
72
|
-
4. [Building the parser](building-the-parser)
|
72
|
+
4. [Building the parser](#building-the-parser)
|
73
73
|
5. [Parsing some input](#parsing-some-input)
|
74
74
|
6. [Generating the parse forest](#generating-the-parse-forest)
|
75
75
|
|
@@ -84,21 +84,20 @@ The subset of English grammar is based on an example from the NLTK book.
|
|
84
84
|
require 'rley' # Load Rley library
|
85
85
|
|
86
86
|
# Instantiate a builder object that will build the grammar for us
|
87
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
87
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
88
|
+
# Terminal symbols (= word categories in lexicon)
|
89
|
+
add_terminals('Noun', 'Proper-Noun', 'Verb')
|
90
|
+
add_terminals('Determiner', 'Preposition')
|
91
|
+
|
92
|
+
# Here we define the productions (= grammar rules)
|
93
|
+
rule 'S' => %w[NP VP]
|
94
|
+
rule 'NP' => 'Proper-Noun'
|
95
|
+
rule 'NP' => %w[Determiner Noun]
|
96
|
+
rule 'NP' => %w[Determiner Noun PP]
|
97
|
+
rule 'VP' => %w[Verb NP]
|
98
|
+
rule 'VP' => %w[Verb NP PP]
|
99
|
+
rule 'PP' => %w[Preposition NP]
|
100
|
+
end
|
102
101
|
# And now, let's build the grammar...
|
103
102
|
grammar = builder.grammar
|
104
103
|
```
|
@@ -208,8 +207,6 @@ Here are a few other ones:
|
|
208
207
|
## Thanks to:
|
209
208
|
* Professor Keshav Pingali, one of the creators of the Grammar Flow Graph parsing approach for his encouraging e-mail exchanges.
|
210
209
|
|
211
|
-
---
|
212
|
-
|
213
210
|
Copyright
|
214
211
|
---------
|
215
212
|
Copyright (c) 2014-2016, Dimitri Geshef.
|
@@ -9,20 +9,20 @@ require 'rley' # Load Rley library
|
|
9
9
|
# with a very simplified syntax.
|
10
10
|
|
11
11
|
# Instantiate a builder object that will build the grammar for us
|
12
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
12
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
13
|
+
# Next 2 lines we define the terminal symbols (=word categories in the lexicon)
|
14
|
+
add_terminals('Noun', 'Proper-Noun', 'Verb')
|
15
|
+
add_terminals('Determiner', 'Preposition')
|
13
16
|
|
14
|
-
#
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
builder.add_production('VP' => %w[Verb NP])
|
24
|
-
builder.add_production('VP' => %w[Verb NP PP])
|
25
|
-
builder.add_production('PP' => %w[Preposition NP])
|
17
|
+
# Here we define the productions (= grammar rules)
|
18
|
+
rule 'S' => %w[NP VP]
|
19
|
+
rule 'NP' => 'Proper-Noun'
|
20
|
+
rule 'NP' => %w[Determiner Noun]
|
21
|
+
rule 'NP' => %w[Determiner Noun PP]
|
22
|
+
rule 'VP' => %w[Verb NP]
|
23
|
+
rule 'VP' => %w[Verb NP PP]
|
24
|
+
rule 'PP' => %w[Preposition NP]
|
25
|
+
end
|
26
26
|
|
27
27
|
# And now, let's build the grammar...
|
28
28
|
grammar = builder.grammar
|
@@ -70,8 +70,6 @@ def tokenizer(aTextToParse, aGrammar)
|
|
70
70
|
return tokens
|
71
71
|
end
|
72
72
|
|
73
|
-
More realistic NLP will will most probably
|
74
|
-
|
75
73
|
########################################
|
76
74
|
# Step 4. Create a parser for that grammar
|
77
75
|
# Easy with Rley...
|
@@ -80,6 +78,7 @@ parser = Rley::Parser::GFGEarleyParser.new(grammar)
|
|
80
78
|
########################################
|
81
79
|
# Step 5. Parsing the input
|
82
80
|
input_to_parse = 'John saw Mary with a telescope'
|
81
|
+
# input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
|
83
82
|
# Convert input text into a sequence of token objects...
|
84
83
|
tokens = tokenizer(input_to_parse, grammar)
|
85
84
|
result = parser.parse(tokens)
|
@@ -90,3 +89,4 @@ puts "Parsing successful? #{result.success?}" # => Parsing successful? true
|
|
90
89
|
# Step 6. Generating the parse forest
|
91
90
|
pforest = result.parse_forest
|
92
91
|
|
92
|
+
puts "Parsing ambiguous? #{pforest.ambiguous?}" # => Parsing ambiguous? false
|
data/lib/rley/constants.rb
CHANGED
@@ -29,7 +29,7 @@ module Rley # This module is used as a namespace
|
|
29
29
|
# A hash with pairs of the form: visited parse entry => forest node
|
30
30
|
attr_reader(:entry2node)
|
31
31
|
|
32
|
-
# A hash with pairs of the form:
|
32
|
+
# A hash with pairs of the form:
|
33
33
|
# parent end entry => path to alternative node
|
34
34
|
# This is needed for synchronizing backtracking
|
35
35
|
attr_reader(:entry2path_to_alt)
|
@@ -89,12 +89,12 @@ module Rley # This module is used as a namespace
|
|
89
89
|
|
90
90
|
|
91
91
|
when :revisit
|
92
|
-
# Retrieve the already existing node corresponding
|
92
|
+
# Retrieve the already existing node corresponding
|
93
93
|
# to re-visited entry
|
94
94
|
popular = @entry2node[anEntry]
|
95
|
-
|
95
|
+
|
96
96
|
# Share with parent (if needed)...
|
97
|
-
children = curr_parent.subnodes
|
97
|
+
children = curr_parent.subnodes
|
98
98
|
curr_parent.add_subnode(popular) unless children.include? popular
|
99
99
|
|
100
100
|
else
|
@@ -105,7 +105,7 @@ module Rley # This module is used as a namespace
|
|
105
105
|
|
106
106
|
def process_item_entry(anEvent, anEntry, anIndex)
|
107
107
|
case anEvent
|
108
|
-
when :visit
|
108
|
+
when :visit
|
109
109
|
if anEntry.exit_entry?
|
110
110
|
# Previous entry was an end entry (X. pattern)
|
111
111
|
# Does the previous entry have multiple antecedent?
|
@@ -119,7 +119,7 @@ module Rley # This module is used as a namespace
|
|
119
119
|
create_alternative_node(anEntry)
|
120
120
|
end
|
121
121
|
end
|
122
|
-
|
122
|
+
|
123
123
|
# Does this entry have multiple antecedent?
|
124
124
|
if anEntry.antecedents.size > 1
|
125
125
|
# Store current path for later backtracking
|
@@ -129,7 +129,7 @@ module Rley # This module is used as a namespace
|
|
129
129
|
# curr_parent.refinement = :or
|
130
130
|
|
131
131
|
create_alternative_node(anEntry)
|
132
|
-
end
|
132
|
+
end
|
133
133
|
|
134
134
|
# Retrieve the grammar symbol before the dot (if any)
|
135
135
|
prev_symbol = anEntry.prev_symbol
|
@@ -146,7 +146,7 @@ module Rley # This module is used as a namespace
|
|
146
146
|
end
|
147
147
|
curr_path.pop if curr_parent.kind_of?(SPPF::AlternativeNode)
|
148
148
|
end
|
149
|
-
|
149
|
+
|
150
150
|
when :backtrack
|
151
151
|
# Restore path
|
152
152
|
@curr_path = entry2path_to_alt[anEntry].dup
|
@@ -154,9 +154,9 @@ module Rley # This module is used as a namespace
|
|
154
154
|
antecedent_index = curr_parent.subnodes.size
|
155
155
|
# puts "Current parent #{curr_parent.to_string(0)}"
|
156
156
|
# puts "Antecedent index #{antecedent_index}"
|
157
|
-
|
158
|
-
create_alternative_node(anEntry)
|
159
|
-
|
157
|
+
|
158
|
+
create_alternative_node(anEntry)
|
159
|
+
|
160
160
|
when :revisit
|
161
161
|
# Retrieve the grammar symbol before the dot (if any)
|
162
162
|
prev_symbol = anEntry.prev_symbol
|
@@ -172,7 +172,7 @@ module Rley # This module is used as a namespace
|
|
172
172
|
create_epsilon_node(anEntry, anIndex)
|
173
173
|
end
|
174
174
|
curr_path.pop if curr_parent.kind_of?(SPPF::AlternativeNode)
|
175
|
-
end
|
175
|
+
end
|
176
176
|
end
|
177
177
|
end
|
178
178
|
|
@@ -187,8 +187,8 @@ module Rley # This module is used as a namespace
|
|
187
187
|
non_terminal = nonTSymb.nil? ? anEntry.vertex.non_terminal : nonTSymb
|
188
188
|
new_node = Rley::SPPF::NonTerminalNode.new(non_terminal, aRange)
|
189
189
|
entry2node[anEntry] = new_node
|
190
|
-
# puts "FOREST ADD #{curr_parent.key if curr_parent}/#{new_node.key}"
|
191
190
|
add_subnode(new_node)
|
191
|
+
# puts "FOREST ADD #{curr_parent.key if curr_parent}/#{new_node.key}"
|
192
192
|
|
193
193
|
return new_node
|
194
194
|
end
|
@@ -200,6 +200,7 @@ module Rley # This module is used as a namespace
|
|
200
200
|
range = curr_parent.range
|
201
201
|
alternative = Rley::SPPF::AlternativeNode.new(vertex, range)
|
202
202
|
add_subnode(alternative)
|
203
|
+
forest.is_ambiguous = true
|
203
204
|
# puts "FOREST ADD #{alternative.key}"
|
204
205
|
|
205
206
|
return alternative
|
@@ -231,13 +232,13 @@ module Rley # This module is used as a namespace
|
|
231
232
|
# Add the given node if not yet present in parse forest
|
232
233
|
def add_node_to_forest(aNode)
|
233
234
|
key_node = aNode.key
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
235
|
+
if forest.include?(key_node)
|
236
|
+
new_node = forest.key2node[key_node]
|
237
|
+
else
|
238
|
+
new_node = aNode
|
239
|
+
forest.key2node[key_node] = new_node
|
240
|
+
# puts "FOREST ADD #{key_node}"
|
241
|
+
end
|
241
242
|
add_subnode(new_node, false)
|
242
243
|
|
243
244
|
return new_node
|
@@ -1,5 +1,6 @@
|
|
1
1
|
require_relative 'token_node'
|
2
2
|
require_relative 'non_terminal_node'
|
3
|
+
require_relative 'alternative_node'
|
3
4
|
|
4
5
|
module Rley # This module is used as a namespace
|
5
6
|
module SPPF # This module is used as a namespace
|
@@ -18,17 +19,28 @@ module Rley # This module is used as a namespace
|
|
18
19
|
|
19
20
|
# A Hash with pairs of the kind node key => node
|
20
21
|
attr_reader(:key2node)
|
22
|
+
|
23
|
+
# A setter that tells that the parse is ambiguous.
|
24
|
+
attr_writer(:is_ambiguous)
|
25
|
+
|
21
26
|
|
22
27
|
# @param theRootNode [ParseForestNode] The root node of the parse tree.
|
23
28
|
def initialize(theRootNode)
|
24
29
|
@root = theRootNode
|
25
30
|
@key2node = {}
|
31
|
+
@is_ambiguous = false
|
26
32
|
end
|
27
33
|
|
28
34
|
# Returns true if the given node is present in the forest.
|
29
35
|
def include?(aNode)
|
30
36
|
return key2node.include?(aNode)
|
31
37
|
end
|
38
|
+
|
39
|
+
# Returns true if the parse encountered a structural ambiguity
|
40
|
+
# (i.e. more than one parse tree for the given input)
|
41
|
+
def ambiguous?()
|
42
|
+
return @is_ambiguous
|
43
|
+
end
|
32
44
|
|
33
45
|
|
34
46
|
# Part of the 'visitee' role in the Visitor design pattern.
|
@@ -18,9 +18,12 @@ module Rley # This module is used as a namespace
|
|
18
18
|
# The list of production rules for the grammar to build
|
19
19
|
attr_reader(:productions)
|
20
20
|
|
21
|
-
|
21
|
+
|
22
|
+
def initialize(&aBlock)
|
22
23
|
@symbols = {}
|
23
24
|
@productions = []
|
25
|
+
|
26
|
+
instance_exec(&aBlock) if block_given?
|
24
27
|
end
|
25
28
|
|
26
29
|
# Retrieve a grammar symbol from its name.
|
@@ -87,6 +90,8 @@ module Rley # This module is used as a namespace
|
|
87
90
|
|
88
91
|
return @grammar
|
89
92
|
end
|
93
|
+
|
94
|
+
alias rule add_production
|
90
95
|
|
91
96
|
private
|
92
97
|
|
@@ -17,7 +17,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
17
17
|
|
18
18
|
let(:sample_grammar) do
|
19
19
|
# Grammar based on paper from Elisabeth Scott
|
20
|
-
# "SPPF
|
20
|
+
# "SPPF-Style Parsing From Earley Recognizers" in
|
21
21
|
# Notes in Theoretical Computer Science 203, (2008), pp. 53-67
|
22
22
|
# contains a hidden left recursion and a cycle
|
23
23
|
builder = Syntax::GrammarBuilder.new
|
@@ -6,10 +6,10 @@ require_relative '../../../lib/rley/syntax/grammar_builder'
|
|
6
6
|
module Rley # Open this namespace to avoid module qualifier prefixes
|
7
7
|
module Syntax # Open this namespace to avoid module qualifier prefixes
|
8
8
|
describe GrammarBuilder do
|
9
|
-
context 'Initialization:' do
|
10
|
-
it '
|
9
|
+
context 'Initialization without argument:' do
|
10
|
+
it 'could be created without argument' do
|
11
11
|
expect { GrammarBuilder.new }.not_to raise_error
|
12
|
-
end
|
12
|
+
end
|
13
13
|
|
14
14
|
it 'should have no grammar symbols at start' do
|
15
15
|
expect(subject.symbols).to be_empty
|
@@ -19,6 +19,24 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
19
19
|
expect(subject.productions).to be_empty
|
20
20
|
end
|
21
21
|
end # context
|
22
|
+
|
23
|
+
context 'Initialization with argument:' do
|
24
|
+
it 'could be created with a block argument' do
|
25
|
+
expect do GrammarBuilder.new { nil }
|
26
|
+
end.not_to raise_error
|
27
|
+
end
|
28
|
+
|
29
|
+
it 'could have grammar symbols from block argument' do
|
30
|
+
instance = GrammarBuilder.new do
|
31
|
+
add_terminals('a', 'b', 'c')
|
32
|
+
end
|
33
|
+
expect(instance.symbols.size).to eq(3)
|
34
|
+
end
|
35
|
+
|
36
|
+
it 'should have no productions at start' do
|
37
|
+
expect(subject.productions).to be_empty
|
38
|
+
end
|
39
|
+
end # context
|
22
40
|
|
23
41
|
context 'Adding symbols:' do
|
24
42
|
it 'should build terminals from their names' do
|
@@ -79,7 +97,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
79
97
|
expect_rhs = [ subject['a'], subject['A'], subject['c'] ]
|
80
98
|
expect(new_prod.rhs.members).to eq(expect_rhs)
|
81
99
|
|
82
|
-
|
100
|
+
# GrammarBuilder#rule is an alias of add_production
|
101
|
+
subject.rule('A' => ['b'])
|
83
102
|
expect(subject.productions.size).to eq(3)
|
84
103
|
new_prod = subject.productions.last
|
85
104
|
expect(new_prod.lhs).to eq(subject['A'])
|
@@ -89,11 +108,13 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
89
108
|
|
90
109
|
context 'Building grammar:' do
|
91
110
|
subject do
|
92
|
-
instance = GrammarBuilder.new
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
111
|
+
instance = GrammarBuilder.new do
|
112
|
+
add_terminals('a', 'b', 'c')
|
113
|
+
add_production('S' => ['A'])
|
114
|
+
add_production('A' => %w(a A c))
|
115
|
+
add_production('A' => ['b'])
|
116
|
+
end
|
117
|
+
|
97
118
|
instance
|
98
119
|
end
|
99
120
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-12-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|