rley 0.6.03 → 0.6.04
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +6 -6
- data/examples/NLP/pico_en_demo.rb +7 -7
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +10 -2
- data/lib/rley/parse_rep/ast_base_builder.rb +2 -2
- data/lib/rley/parse_rep/parse_tree_builder.rb +1 -0
- data/lib/rley/parser/error_reason.rb +1 -1
- data/lib/rley/parser/gfg_parsing.rb +18 -8
- data/lib/rley/parser/parse_walker_factory.rb +2 -0
- data/lib/rley/syntax/grammar_builder.rb +5 -4
- data/lib/rley/syntax/production.rb +11 -1
- data/spec/rley/support/grammar_abc_helper.rb +1 -1
- data/spec/rley/syntax/grammar_builder_spec.rb +9 -1
- data/spec/rley/syntax/production_spec.rb +8 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7dde019f49699000c53d19d500ccf1e5497ab155
|
4
|
+
data.tar.gz: 967a24d5da4566776c5d011971801aec07e0dbc8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6415d986dd8296c28f7af9d32b8329a03c8861386830eb915ffa196de311848420cd35a171d661a7ce68948d428b224f1a8c2f009aa2b734477eecb9bf32a679
|
7
|
+
data.tar.gz: a0453cd6917b6658d549053216cfb9134ec5dc3e7de7e6cf0e2f3240502532077897264d456be748c7bdd8a5adeb9104f266b8004f7d19fef0e9fe9db3ade1c9
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
### 0.6.04 / 2018-03-30
|
2
|
+
* [CHANGE] Method `GrammarBuilder#add_production` Simplified rule entry syntax
|
3
|
+
* [CHANGE] File `examples/NML/pico_en_demo.rb` updated to reflect the simplified rule syntax.
|
4
|
+
* [CHANGE] File `README.md` updated to reflect the simplified rule syntax.
|
5
|
+
* [CHANGE] Struct EngineConfig added the diganose field.
|
6
|
+
* [FIX] Method `Production#initialize`: Now raises an exception when rhs is nil
|
7
|
+
* [FIX] Method `ParseTreeBuilder#create_tree`: Now raises an exception when root node of tree is nil
|
8
|
+
* [FIX] Method `GFGParsing#nullable_rule`: Nasty bug with antecedent link fixed.
|
9
|
+
|
1
10
|
### 0.6.03 / 2018-03-10
|
2
11
|
* [CHANGE] File `lib/parser/error_reason.rb` Improved error location reporting
|
3
12
|
* [CHANGE] File `README.md` updated to reflect the improved error location reporting.
|
data/README.md
CHANGED
@@ -107,13 +107,13 @@ The subset of English grammar is based on an example from the NLTK book.
|
|
107
107
|
add_terminals('Determiner', 'Preposition')
|
108
108
|
|
109
109
|
# Here we define the productions (= grammar rules)
|
110
|
-
rule 'S' =>
|
110
|
+
rule 'S' => 'NP VP'
|
111
111
|
rule 'NP' => 'Proper-Noun'
|
112
|
-
rule 'NP' =>
|
113
|
-
rule 'NP' =>
|
114
|
-
rule 'VP' =>
|
115
|
-
rule 'VP' =>
|
116
|
-
rule 'PP' =>
|
112
|
+
rule 'NP' => 'Determiner Noun'
|
113
|
+
rule 'NP' => 'Determiner Noun PP'
|
114
|
+
rule 'VP' => 'Verb NP'
|
115
|
+
rule 'VP' => 'Verb NP PP'
|
116
|
+
rule 'PP' => 'Preposition NP'
|
117
117
|
end
|
118
118
|
```
|
119
119
|
|
@@ -21,13 +21,13 @@ engine.build_grammar do
|
|
21
21
|
add_terminals('Determiner', 'Preposition')
|
22
22
|
|
23
23
|
# Here we define the productions (= grammar rules)
|
24
|
-
rule 'S' =>
|
24
|
+
rule 'S' => 'NP VP'
|
25
25
|
rule 'NP' => 'Proper-Noun'
|
26
|
-
rule 'NP' =>
|
27
|
-
rule 'NP' =>
|
28
|
-
rule 'VP' =>
|
29
|
-
rule 'VP' =>
|
30
|
-
rule 'PP' =>
|
26
|
+
rule 'NP' => 'Determiner Noun'
|
27
|
+
rule 'NP' => 'Determiner Noun PP'
|
28
|
+
rule 'VP' => 'Verb NP'
|
29
|
+
rule 'VP' => 'Verb NP PP'
|
30
|
+
rule 'PP' => 'Preposition NP'
|
31
31
|
end
|
32
32
|
|
33
33
|
########################################
|
@@ -98,7 +98,7 @@ end
|
|
98
98
|
|
99
99
|
########################################
|
100
100
|
# Step 5. Parse the input
|
101
|
-
input_to_parse = 'John saw Mary with a '
|
101
|
+
input_to_parse = 'John saw Mary with a telescope'
|
102
102
|
# input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
|
103
103
|
# Convert input text into a sequence of token objects...
|
104
104
|
tokens = tokenizer(input_to_parse)
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/engine.rb
CHANGED
@@ -5,12 +5,14 @@ require_relative './parse_rep/parse_tree_factory'
|
|
5
5
|
module Rley # This module is used as a namespace
|
6
6
|
EngineConfig = Struct.new(
|
7
7
|
:parse_repr,
|
8
|
-
:repr_builder
|
8
|
+
:repr_builder,
|
9
|
+
:diagnose
|
9
10
|
) do
|
10
11
|
def initialize()
|
11
12
|
super()
|
12
13
|
self.parse_repr = :parse_tree
|
13
14
|
self.repr_builder = :default
|
15
|
+
self.diagnose = false
|
14
16
|
end
|
15
17
|
end
|
16
18
|
|
@@ -18,7 +20,12 @@ module Rley # This module is used as a namespace
|
|
18
20
|
# an Engine object provides a higher-level interface that shields
|
19
21
|
# Rley client code from the lower-level classes.
|
20
22
|
class Engine
|
23
|
+
# @!attribute [r] configuration
|
24
|
+
# @return [EngineConfig] the engine's configuration
|
21
25
|
attr_reader :configuration
|
26
|
+
|
27
|
+
# @!attribute [r] grammar
|
28
|
+
# @return [Rley::Syntax::Grammar] the grammar of the language to parse
|
22
29
|
attr_reader :grammar
|
23
30
|
|
24
31
|
# Constructor.
|
@@ -28,7 +35,7 @@ module Rley # This module is used as a namespace
|
|
28
35
|
end
|
29
36
|
|
30
37
|
# Factory method.
|
31
|
-
# @param
|
38
|
+
# @param aBlock [Proc, Lambda] Code block for creating the grammar.
|
32
39
|
def build_grammar(&aBlock)
|
33
40
|
builder = Rley::Syntax::GrammarBuilder.new(&aBlock)
|
34
41
|
@grammar = builder.grammar
|
@@ -53,6 +60,7 @@ module Rley # This module is used as a namespace
|
|
53
60
|
tokens << a_token
|
54
61
|
end
|
55
62
|
parser = build_parser(grammar)
|
63
|
+
parser.gf_graph.diagnose if configuration.diagnose
|
56
64
|
return parser.parse(tokens)
|
57
65
|
end
|
58
66
|
|
@@ -68,7 +68,7 @@ module Rley # This module is used as a namespace
|
|
68
68
|
# Simply return an epsilon symbol
|
69
69
|
# @param _range [Lexical::TokenRange]
|
70
70
|
# @param _tokens [Array<Lexical::Token>]
|
71
|
-
# @param
|
71
|
+
# @param _children [Array<Object>]
|
72
72
|
def return_epsilon(_range, _tokens, _children)
|
73
73
|
return nil
|
74
74
|
end
|
@@ -84,7 +84,7 @@ module Rley # This module is used as a namespace
|
|
84
84
|
|
85
85
|
# Factory method for creating a node object for the given
|
86
86
|
# input token.
|
87
|
-
# @param
|
87
|
+
# @param aProduction [Syntax::Production] Relevant production rule
|
88
88
|
# @param aTerminal [Syntax::Terminal] Terminal associated with the token
|
89
89
|
# @param aTokenPosition [Integer] Position of token in the input stream
|
90
90
|
# @param aToken [Lexical::Token] The input token
|
@@ -79,6 +79,7 @@ module Rley # This module is used as a namespace
|
|
79
79
|
# Create a parse tree object with given
|
80
80
|
# node as root node.
|
81
81
|
def create_tree(aRootNode)
|
82
|
+
raise StandardError, 'Root node of parse tree is nil!' if aRootNode.nil?
|
82
83
|
return Rley::PTree::ParseTree.new(aRootNode)
|
83
84
|
end
|
84
85
|
|
@@ -9,7 +9,7 @@ module Rley # Module used as a namespace
|
|
9
9
|
attr_reader(:rank)
|
10
10
|
|
11
11
|
# Constructor
|
12
|
-
# @param
|
12
|
+
# @param aRank [Fixnum] The sequence number of the offending input token.
|
13
13
|
def initialize(aRank)
|
14
14
|
@rank = aRank
|
15
15
|
end
|
@@ -66,18 +66,29 @@ module Rley # This module is used as a namespace
|
|
66
66
|
# This method is invoked when a dotted entry is added
|
67
67
|
# to the parse entry set of the from [A => alpha . B beta, k]
|
68
68
|
# and B is nullable
|
69
|
-
# Then the
|
70
|
-
#
|
69
|
+
# Then the following entries are added to the current sigma set:
|
70
|
+
# [.B, i]
|
71
|
+
# [B => ., i] TODO: what if indirectly nullable?
|
72
|
+
# [B., i]
|
73
|
+
# [A => alpha B . beta, k]
|
71
74
|
def nullable_rule(anEntry, aPosition)
|
72
75
|
next_symbol = anEntry.next_symbol
|
73
|
-
end_vertex = gf_graph.end_vertex_for[next_symbol]
|
74
76
|
pos = aPosition
|
75
|
-
|
77
|
+
start = gf_graph.start_vertex_for[next_symbol]
|
78
|
+
start_entry = apply_rule(anEntry, start, pos, pos, :nullable_rule)
|
79
|
+
|
80
|
+
end_vertex = gf_graph.end_vertex_for[next_symbol]
|
81
|
+
end_entry = push_entry(end_vertex, pos, pos, :nullable_rule)
|
82
|
+
|
83
|
+
start.edges.each do |edge|
|
84
|
+
succ = edge.successor # succ always an ItemVertex
|
85
|
+
next if succ.dotted_item.production.generative?
|
86
|
+
succ_entry = apply_rule(start_entry, succ, pos, pos, :nullable_rule)
|
87
|
+
apply_rule(succ_entry, end_vertex, pos, pos, :nullable_rule)
|
88
|
+
end
|
89
|
+
|
76
90
|
curr_vertex = anEntry.vertex
|
77
91
|
next_vertex = curr_vertex.shortcut.successor
|
78
|
-
|
79
|
-
# first pos == origin
|
80
|
-
# second pos == position
|
81
92
|
apply_rule(end_entry, next_vertex, anEntry.origin, pos, :nullable_rule)
|
82
93
|
end
|
83
94
|
|
@@ -249,7 +260,6 @@ END_MSG
|
|
249
260
|
def unexpected_token(aPosition)
|
250
261
|
unexpected = tokens[aPosition]
|
251
262
|
expected = chart.sets[aPosition].expected_terminals
|
252
|
-
|
253
263
|
reason = UnexpectedToken.new(aPosition, unexpected, expected)
|
254
264
|
faulty(reason)
|
255
265
|
end
|
@@ -63,6 +63,8 @@ module Rley # This module is used as a namespace
|
|
63
63
|
receiver << event unless event.nil?
|
64
64
|
|
65
65
|
if ctx.curr_entry.orphan? # No antecedent?...
|
66
|
+
err_msg = "No antecedent for #{ctx.curr_entry} at rank #{ctx.entry_set_index}"
|
67
|
+
raise StandardError, err_msg unless ctx.curr_entry.start_entry?
|
66
68
|
break if ctx.backtrack_points.empty?
|
67
69
|
receiver << use_backtrack_point(ctx)
|
68
70
|
receiver << visit_entry(ctx.curr_entry, ctx)
|
@@ -73,13 +73,14 @@ module Rley # This module is used as a namespace
|
|
73
73
|
lhs = get_nonterminal(lhs_name)
|
74
74
|
case rhs_repr
|
75
75
|
when Array
|
76
|
-
|
76
|
+
rhs_members = rhs_repr.map { |name| get_nonterminal(name) }
|
77
77
|
when String
|
78
|
-
|
78
|
+
# rhs_members = [get_nonterminal(rhs_repr)]
|
79
|
+
rhs_members = rhs_repr.scan(/\S+/).map { |name| get_nonterminal(name) }
|
79
80
|
when Terminal
|
80
|
-
|
81
|
+
rhs_members = [rhs_repr]
|
81
82
|
end
|
82
|
-
new_prod = Production.new(lhs,
|
83
|
+
new_prod = Production.new(lhs, rhs_members)
|
83
84
|
productions << new_prod
|
84
85
|
end
|
85
86
|
|
@@ -35,7 +35,7 @@ module Rley # This module is used as a namespace
|
|
35
35
|
# @param theSymbols [list<Terminal | NonTerminal>] symbols of rhs.
|
36
36
|
def initialize(aNonTerminal, theSymbols)
|
37
37
|
@lhs = valid_lhs(aNonTerminal)
|
38
|
-
@rhs =
|
38
|
+
@rhs = valid_rhs(theSymbols)
|
39
39
|
end
|
40
40
|
|
41
41
|
# Is the rhs empty?
|
@@ -83,6 +83,16 @@ module Rley # This module is used as a namespace
|
|
83
83
|
|
84
84
|
return aNonTerminal
|
85
85
|
end
|
86
|
+
|
87
|
+
def valid_rhs(theSymbols)
|
88
|
+
if theSymbols.nil?
|
89
|
+
msg_prefix = 'Right side of a production of the kind '
|
90
|
+
msg_suffix = "'#{lhs.name}' => ... is nil."
|
91
|
+
raise StandardError, msg_prefix + msg_suffix
|
92
|
+
end
|
93
|
+
|
94
|
+
return SymbolSeq.new(theSymbols)
|
95
|
+
end
|
86
96
|
end # class
|
87
97
|
end # module
|
88
98
|
end # module
|
@@ -97,10 +97,18 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
97
97
|
expect(new_prod.lhs).to eq(subject['A'])
|
98
98
|
expect_rhs = [subject['a'], subject['A'], subject['c']]
|
99
99
|
expect(new_prod.rhs.members).to eq(expect_rhs)
|
100
|
+
|
101
|
+
# Try another syntax
|
102
|
+
subject.add_production('A' => 'a A c')
|
103
|
+
expect(subject.productions.size).to eq(3)
|
104
|
+
new_prod = subject.productions.last
|
105
|
+
expect(new_prod.lhs).to eq(subject['A'])
|
106
|
+
expect_rhs = [subject['a'], subject['A'], subject['c']]
|
107
|
+
expect(new_prod.rhs.members).to eq(expect_rhs)
|
100
108
|
|
101
109
|
# GrammarBuilder#rule is an alias of add_production
|
102
110
|
subject.rule('A' => ['b'])
|
103
|
-
expect(subject.productions.size).to eq(
|
111
|
+
expect(subject.productions.size).to eq(4)
|
104
112
|
new_prod = subject.productions.last
|
105
113
|
expect(new_prod.lhs).to eq(subject['A'])
|
106
114
|
expect(new_prod.rhs[0]).to eq(subject['b'])
|
@@ -22,6 +22,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
22
22
|
it 'should be created with a non-terminal and a symbol sequence' do
|
23
23
|
expect { Production.new(sentence, sequence) }.not_to raise_error
|
24
24
|
end
|
25
|
+
|
26
|
+
it 'should complain when its rhs is nil' do
|
27
|
+
err = StandardError
|
28
|
+
msg_prefix = 'Right side of a production of the kind '
|
29
|
+
msg_suffix = "'Sentence' => ... is nil."
|
30
|
+
msg = msg_prefix + msg_suffix
|
31
|
+
expect { Production.new(sentence, nil) }.to raise_error(err, msg)
|
32
|
+
end
|
25
33
|
|
26
34
|
it 'should know its lhs' do
|
27
35
|
expect(subject.lhs).to eq(sentence)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.04
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-03-
|
11
|
+
date: 2018-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: coveralls
|