rley 0.6.03 → 0.6.04
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/README.md +6 -6
- data/examples/NLP/pico_en_demo.rb +7 -7
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +10 -2
- data/lib/rley/parse_rep/ast_base_builder.rb +2 -2
- data/lib/rley/parse_rep/parse_tree_builder.rb +1 -0
- data/lib/rley/parser/error_reason.rb +1 -1
- data/lib/rley/parser/gfg_parsing.rb +18 -8
- data/lib/rley/parser/parse_walker_factory.rb +2 -0
- data/lib/rley/syntax/grammar_builder.rb +5 -4
- data/lib/rley/syntax/production.rb +11 -1
- data/spec/rley/support/grammar_abc_helper.rb +1 -1
- data/spec/rley/syntax/grammar_builder_spec.rb +9 -1
- data/spec/rley/syntax/production_spec.rb +8 -0
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7dde019f49699000c53d19d500ccf1e5497ab155
|
4
|
+
data.tar.gz: 967a24d5da4566776c5d011971801aec07e0dbc8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6415d986dd8296c28f7af9d32b8329a03c8861386830eb915ffa196de311848420cd35a171d661a7ce68948d428b224f1a8c2f009aa2b734477eecb9bf32a679
|
7
|
+
data.tar.gz: a0453cd6917b6658d549053216cfb9134ec5dc3e7de7e6cf0e2f3240502532077897264d456be748c7bdd8a5adeb9104f266b8004f7d19fef0e9fe9db3ade1c9
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,12 @@
|
|
1
|
+
### 0.6.04 / 2018-03-30
|
2
|
+
* [CHANGE] Method `GrammarBuilder#add_production` Simplified rule entry syntax
|
3
|
+
* [CHANGE] File `examples/NML/pico_en_demo.rb` updated to reflect the simplified rule syntax.
|
4
|
+
* [CHANGE] File `README.md` updated to reflect the simplified rule syntax.
|
5
|
+
* [CHANGE] Struct EngineConfig added the diganose field.
|
6
|
+
* [FIX] Method `Production#initialize`: Now raises an exception when rhs is nil
|
7
|
+
* [FIX] Method `ParseTreeBuilder#create_tree`: Now raises an exception when root node of tree is nil
|
8
|
+
* [FIX] Method `GFGParsing#nullable_rule`: Nasty bug with antecedent link fixed.
|
9
|
+
|
1
10
|
### 0.6.03 / 2018-03-10
|
2
11
|
* [CHANGE] File `lib/parser/error_reason.rb` Improved error location reporting
|
3
12
|
* [CHANGE] File `README.md` updated to reflect the improved error location reporting.
|
data/README.md
CHANGED
@@ -107,13 +107,13 @@ The subset of English grammar is based on an example from the NLTK book.
|
|
107
107
|
add_terminals('Determiner', 'Preposition')
|
108
108
|
|
109
109
|
# Here we define the productions (= grammar rules)
|
110
|
-
rule 'S' =>
|
110
|
+
rule 'S' => 'NP VP'
|
111
111
|
rule 'NP' => 'Proper-Noun'
|
112
|
-
rule 'NP' =>
|
113
|
-
rule 'NP' =>
|
114
|
-
rule 'VP' =>
|
115
|
-
rule 'VP' =>
|
116
|
-
rule 'PP' =>
|
112
|
+
rule 'NP' => 'Determiner Noun'
|
113
|
+
rule 'NP' => 'Determiner Noun PP'
|
114
|
+
rule 'VP' => 'Verb NP'
|
115
|
+
rule 'VP' => 'Verb NP PP'
|
116
|
+
rule 'PP' => 'Preposition NP'
|
117
117
|
end
|
118
118
|
```
|
119
119
|
|
@@ -21,13 +21,13 @@ engine.build_grammar do
|
|
21
21
|
add_terminals('Determiner', 'Preposition')
|
22
22
|
|
23
23
|
# Here we define the productions (= grammar rules)
|
24
|
-
rule 'S' =>
|
24
|
+
rule 'S' => 'NP VP'
|
25
25
|
rule 'NP' => 'Proper-Noun'
|
26
|
-
rule 'NP' =>
|
27
|
-
rule 'NP' =>
|
28
|
-
rule 'VP' =>
|
29
|
-
rule 'VP' =>
|
30
|
-
rule 'PP' =>
|
26
|
+
rule 'NP' => 'Determiner Noun'
|
27
|
+
rule 'NP' => 'Determiner Noun PP'
|
28
|
+
rule 'VP' => 'Verb NP'
|
29
|
+
rule 'VP' => 'Verb NP PP'
|
30
|
+
rule 'PP' => 'Preposition NP'
|
31
31
|
end
|
32
32
|
|
33
33
|
########################################
|
@@ -98,7 +98,7 @@ end
|
|
98
98
|
|
99
99
|
########################################
|
100
100
|
# Step 5. Parse the input
|
101
|
-
input_to_parse = 'John saw Mary with a '
|
101
|
+
input_to_parse = 'John saw Mary with a telescope'
|
102
102
|
# input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
|
103
103
|
# Convert input text into a sequence of token objects...
|
104
104
|
tokens = tokenizer(input_to_parse)
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/engine.rb
CHANGED
@@ -5,12 +5,14 @@ require_relative './parse_rep/parse_tree_factory'
|
|
5
5
|
module Rley # This module is used as a namespace
|
6
6
|
EngineConfig = Struct.new(
|
7
7
|
:parse_repr,
|
8
|
-
:repr_builder
|
8
|
+
:repr_builder,
|
9
|
+
:diagnose
|
9
10
|
) do
|
10
11
|
def initialize()
|
11
12
|
super()
|
12
13
|
self.parse_repr = :parse_tree
|
13
14
|
self.repr_builder = :default
|
15
|
+
self.diagnose = false
|
14
16
|
end
|
15
17
|
end
|
16
18
|
|
@@ -18,7 +20,12 @@ module Rley # This module is used as a namespace
|
|
18
20
|
# an Engine object provides a higher-level interface that shields
|
19
21
|
# Rley client code from the lower-level classes.
|
20
22
|
class Engine
|
23
|
+
# @!attribute [r] configuration
|
24
|
+
# @return [EngineConfig] the engine's configuration
|
21
25
|
attr_reader :configuration
|
26
|
+
|
27
|
+
# @!attribute [r] grammar
|
28
|
+
# @return [Rley::Syntax::Grammar] the grammar of the language to parse
|
22
29
|
attr_reader :grammar
|
23
30
|
|
24
31
|
# Constructor.
|
@@ -28,7 +35,7 @@ module Rley # This module is used as a namespace
|
|
28
35
|
end
|
29
36
|
|
30
37
|
# Factory method.
|
31
|
-
# @param
|
38
|
+
# @param aBlock [Proc, Lambda] Code block for creating the grammar.
|
32
39
|
def build_grammar(&aBlock)
|
33
40
|
builder = Rley::Syntax::GrammarBuilder.new(&aBlock)
|
34
41
|
@grammar = builder.grammar
|
@@ -53,6 +60,7 @@ module Rley # This module is used as a namespace
|
|
53
60
|
tokens << a_token
|
54
61
|
end
|
55
62
|
parser = build_parser(grammar)
|
63
|
+
parser.gf_graph.diagnose if configuration.diagnose
|
56
64
|
return parser.parse(tokens)
|
57
65
|
end
|
58
66
|
|
@@ -68,7 +68,7 @@ module Rley # This module is used as a namespace
|
|
68
68
|
# Simply return an epsilon symbol
|
69
69
|
# @param _range [Lexical::TokenRange]
|
70
70
|
# @param _tokens [Array<Lexical::Token>]
|
71
|
-
# @param
|
71
|
+
# @param _children [Array<Object>]
|
72
72
|
def return_epsilon(_range, _tokens, _children)
|
73
73
|
return nil
|
74
74
|
end
|
@@ -84,7 +84,7 @@ module Rley # This module is used as a namespace
|
|
84
84
|
|
85
85
|
# Factory method for creating a node object for the given
|
86
86
|
# input token.
|
87
|
-
# @param
|
87
|
+
# @param aProduction [Syntax::Production] Relevant production rule
|
88
88
|
# @param aTerminal [Syntax::Terminal] Terminal associated with the token
|
89
89
|
# @param aTokenPosition [Integer] Position of token in the input stream
|
90
90
|
# @param aToken [Lexical::Token] The input token
|
@@ -79,6 +79,7 @@ module Rley # This module is used as a namespace
|
|
79
79
|
# Create a parse tree object with given
|
80
80
|
# node as root node.
|
81
81
|
def create_tree(aRootNode)
|
82
|
+
raise StandardError, 'Root node of parse tree is nil!' if aRootNode.nil?
|
82
83
|
return Rley::PTree::ParseTree.new(aRootNode)
|
83
84
|
end
|
84
85
|
|
@@ -9,7 +9,7 @@ module Rley # Module used as a namespace
|
|
9
9
|
attr_reader(:rank)
|
10
10
|
|
11
11
|
# Constructor
|
12
|
-
# @param
|
12
|
+
# @param aRank [Fixnum] The sequence number of the offending input token.
|
13
13
|
def initialize(aRank)
|
14
14
|
@rank = aRank
|
15
15
|
end
|
@@ -66,18 +66,29 @@ module Rley # This module is used as a namespace
|
|
66
66
|
# This method is invoked when a dotted entry is added
|
67
67
|
# to the parse entry set of the from [A => alpha . B beta, k]
|
68
68
|
# and B is nullable
|
69
|
-
# Then the
|
70
|
-
#
|
69
|
+
# Then the following entries are added to the current sigma set:
|
70
|
+
# [.B, i]
|
71
|
+
# [B => ., i] TODO: what if indirectly nullable?
|
72
|
+
# [B., i]
|
73
|
+
# [A => alpha B . beta, k]
|
71
74
|
def nullable_rule(anEntry, aPosition)
|
72
75
|
next_symbol = anEntry.next_symbol
|
73
|
-
end_vertex = gf_graph.end_vertex_for[next_symbol]
|
74
76
|
pos = aPosition
|
75
|
-
|
77
|
+
start = gf_graph.start_vertex_for[next_symbol]
|
78
|
+
start_entry = apply_rule(anEntry, start, pos, pos, :nullable_rule)
|
79
|
+
|
80
|
+
end_vertex = gf_graph.end_vertex_for[next_symbol]
|
81
|
+
end_entry = push_entry(end_vertex, pos, pos, :nullable_rule)
|
82
|
+
|
83
|
+
start.edges.each do |edge|
|
84
|
+
succ = edge.successor # succ always an ItemVertex
|
85
|
+
next if succ.dotted_item.production.generative?
|
86
|
+
succ_entry = apply_rule(start_entry, succ, pos, pos, :nullable_rule)
|
87
|
+
apply_rule(succ_entry, end_vertex, pos, pos, :nullable_rule)
|
88
|
+
end
|
89
|
+
|
76
90
|
curr_vertex = anEntry.vertex
|
77
91
|
next_vertex = curr_vertex.shortcut.successor
|
78
|
-
|
79
|
-
# first pos == origin
|
80
|
-
# second pos == position
|
81
92
|
apply_rule(end_entry, next_vertex, anEntry.origin, pos, :nullable_rule)
|
82
93
|
end
|
83
94
|
|
@@ -249,7 +260,6 @@ END_MSG
|
|
249
260
|
def unexpected_token(aPosition)
|
250
261
|
unexpected = tokens[aPosition]
|
251
262
|
expected = chart.sets[aPosition].expected_terminals
|
252
|
-
|
253
263
|
reason = UnexpectedToken.new(aPosition, unexpected, expected)
|
254
264
|
faulty(reason)
|
255
265
|
end
|
@@ -63,6 +63,8 @@ module Rley # This module is used as a namespace
|
|
63
63
|
receiver << event unless event.nil?
|
64
64
|
|
65
65
|
if ctx.curr_entry.orphan? # No antecedent?...
|
66
|
+
err_msg = "No antecedent for #{ctx.curr_entry} at rank #{ctx.entry_set_index}"
|
67
|
+
raise StandardError, err_msg unless ctx.curr_entry.start_entry?
|
66
68
|
break if ctx.backtrack_points.empty?
|
67
69
|
receiver << use_backtrack_point(ctx)
|
68
70
|
receiver << visit_entry(ctx.curr_entry, ctx)
|
@@ -73,13 +73,14 @@ module Rley # This module is used as a namespace
|
|
73
73
|
lhs = get_nonterminal(lhs_name)
|
74
74
|
case rhs_repr
|
75
75
|
when Array
|
76
|
-
|
76
|
+
rhs_members = rhs_repr.map { |name| get_nonterminal(name) }
|
77
77
|
when String
|
78
|
-
|
78
|
+
# rhs_members = [get_nonterminal(rhs_repr)]
|
79
|
+
rhs_members = rhs_repr.scan(/\S+/).map { |name| get_nonterminal(name) }
|
79
80
|
when Terminal
|
80
|
-
|
81
|
+
rhs_members = [rhs_repr]
|
81
82
|
end
|
82
|
-
new_prod = Production.new(lhs,
|
83
|
+
new_prod = Production.new(lhs, rhs_members)
|
83
84
|
productions << new_prod
|
84
85
|
end
|
85
86
|
|
@@ -35,7 +35,7 @@ module Rley # This module is used as a namespace
|
|
35
35
|
# @param theSymbols [list<Terminal | NonTerminal>] symbols of rhs.
|
36
36
|
def initialize(aNonTerminal, theSymbols)
|
37
37
|
@lhs = valid_lhs(aNonTerminal)
|
38
|
-
@rhs =
|
38
|
+
@rhs = valid_rhs(theSymbols)
|
39
39
|
end
|
40
40
|
|
41
41
|
# Is the rhs empty?
|
@@ -83,6 +83,16 @@ module Rley # This module is used as a namespace
|
|
83
83
|
|
84
84
|
return aNonTerminal
|
85
85
|
end
|
86
|
+
|
87
|
+
def valid_rhs(theSymbols)
|
88
|
+
if theSymbols.nil?
|
89
|
+
msg_prefix = 'Right side of a production of the kind '
|
90
|
+
msg_suffix = "'#{lhs.name}' => ... is nil."
|
91
|
+
raise StandardError, msg_prefix + msg_suffix
|
92
|
+
end
|
93
|
+
|
94
|
+
return SymbolSeq.new(theSymbols)
|
95
|
+
end
|
86
96
|
end # class
|
87
97
|
end # module
|
88
98
|
end # module
|
@@ -97,10 +97,18 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
97
97
|
expect(new_prod.lhs).to eq(subject['A'])
|
98
98
|
expect_rhs = [subject['a'], subject['A'], subject['c']]
|
99
99
|
expect(new_prod.rhs.members).to eq(expect_rhs)
|
100
|
+
|
101
|
+
# Try another syntax
|
102
|
+
subject.add_production('A' => 'a A c')
|
103
|
+
expect(subject.productions.size).to eq(3)
|
104
|
+
new_prod = subject.productions.last
|
105
|
+
expect(new_prod.lhs).to eq(subject['A'])
|
106
|
+
expect_rhs = [subject['a'], subject['A'], subject['c']]
|
107
|
+
expect(new_prod.rhs.members).to eq(expect_rhs)
|
100
108
|
|
101
109
|
# GrammarBuilder#rule is an alias of add_production
|
102
110
|
subject.rule('A' => ['b'])
|
103
|
-
expect(subject.productions.size).to eq(
|
111
|
+
expect(subject.productions.size).to eq(4)
|
104
112
|
new_prod = subject.productions.last
|
105
113
|
expect(new_prod.lhs).to eq(subject['A'])
|
106
114
|
expect(new_prod.rhs[0]).to eq(subject['b'])
|
@@ -22,6 +22,14 @@ module Rley # Open this namespace to avoid module qualifier prefixes
|
|
22
22
|
it 'should be created with a non-terminal and a symbol sequence' do
|
23
23
|
expect { Production.new(sentence, sequence) }.not_to raise_error
|
24
24
|
end
|
25
|
+
|
26
|
+
it 'should complain when its rhs is nil' do
|
27
|
+
err = StandardError
|
28
|
+
msg_prefix = 'Right side of a production of the kind '
|
29
|
+
msg_suffix = "'Sentence' => ... is nil."
|
30
|
+
msg = msg_prefix + msg_suffix
|
31
|
+
expect { Production.new(sentence, nil) }.to raise_error(err, msg)
|
32
|
+
end
|
25
33
|
|
26
34
|
it 'should know its lhs' do
|
27
35
|
expect(subject.lhs).to eq(sentence)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.6.
|
4
|
+
version: 0.6.04
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-03-
|
11
|
+
date: 2018-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: coveralls
|