rley 0.5.10 → 0.5.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/LICENSE.txt +1 -1
- data/README.md +2 -1
- data/appveyor.yml +6 -5
- data/examples/NLP/engtagger.rb +176 -0
- data/examples/general/SRL/lib/ast_builder.rb +217 -21
- data/examples/general/SRL/lib/grammar.rb +33 -5
- data/examples/general/SRL/lib/regex/alternation.rb +30 -0
- data/examples/general/SRL/lib/regex/char_class.rb +28 -22
- data/examples/general/SRL/lib/regex/char_shorthand.rb +50 -0
- data/examples/general/SRL/lib/regex/character.rb +5 -3
- data/examples/general/SRL/lib/regex/concatenation.rb +32 -0
- data/examples/general/SRL/lib/regex/non_capturing_group.rb +29 -0
- data/examples/general/SRL/lib/regex/wildcard.rb +26 -0
- data/examples/general/SRL/lib/regex_repr.rb +5 -0
- data/examples/general/SRL/lib/tokenizer.rb +28 -3
- data/examples/general/SRL/spec/integration_spec.rb +151 -8
- data/examples/general/SRL/spec/tokenizer_spec.rb +12 -0
- data/examples/general/left.rb +36 -0
- data/examples/general/right.rb +36 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/gfg/edge.rb +12 -1
- data/lib/rley/gfg/grm_flow_graph.rb +21 -1
- data/lib/rley/gfg/item_vertex.rb +1 -1
- data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
- data/lib/rley/gfg/start_vertex.rb +1 -0
- data/lib/rley/gfg/vertex.rb +27 -0
- data/lib/rley/lexical/token.rb +1 -0
- data/lib/rley/parser/error_reason.rb +2 -1
- data/lib/rley/parser/gfg_chart.rb +14 -0
- data/lib/rley/parser/gfg_earley_parser.rb +0 -1
- data/lib/rley/parser/gfg_parsing.rb +4 -3
- data/lib/rley/parser/parse_entry.rb +33 -3
- data/lib/rley/parser/parse_entry_set.rb +14 -2
- data/lib/rley/parser/parse_tree_builder.rb +1 -1
- data/lib/rley/parser/parse_walker_factory.rb +0 -1
- data/lib/rley/syntax/grm_symbol.rb +2 -0
- data/lib/rley/syntax/production.rb +15 -3
- data/lib/rley/syntax/symbol_seq.rb +16 -1
- data/spec/rley/gfg/end_vertex_spec.rb +9 -1
- data/spec/rley/gfg/grm_flow_graph_spec.rb +9 -0
- data/spec/rley/gfg/item_vertex_spec.rb +9 -0
- data/spec/rley/gfg/start_vertex_spec.rb +9 -1
- data/spec/rley/parser/gfg_parsing_spec.rb +0 -1
- data/spec/rley/parser/parse_entry_set_spec.rb +15 -0
- data/spec/rley/parser/parse_entry_spec.rb +24 -13
- data/spec/rley/parser/parse_tracer_spec.rb +1 -1
- data/spec/rley/syntax/production_spec.rb +10 -0
- data/spec/rley/syntax/symbol_seq_spec.rb +5 -0
- metadata +10 -2
@@ -62,6 +62,18 @@ module SRL
|
|
62
62
|
end
|
63
63
|
end # context
|
64
64
|
|
65
|
+
context 'String literal tokenization:' do
|
66
|
+
it "should recognize 'literally ...'" do
|
67
|
+
input = 'literally "hello"'
|
68
|
+
subject.scanner.string = input
|
69
|
+
expectations = [
|
70
|
+
['LITERALLY', 'literally'],
|
71
|
+
['STRING_LIT', 'hello']
|
72
|
+
]
|
73
|
+
match_expectations(subject, expectations)
|
74
|
+
end
|
75
|
+
end # context
|
76
|
+
|
65
77
|
context 'Character range tokenization:' do
|
66
78
|
it "should recognize 'letter from ... to ...'" do
|
67
79
|
input = 'letter a to f'
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Purpose: define a grammar with left-recursive rule
|
2
|
+
require 'rley' # Load Rley library
|
3
|
+
|
4
|
+
# Instantiate a builder object that will build the grammar for us
|
5
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
6
|
+
add_terminals('DOT')
|
7
|
+
|
8
|
+
# Grammar with left recursive rule.
|
9
|
+
rule 'l_dots' => []
|
10
|
+
rule 'l_dots' => %w[l_dots DOT]
|
11
|
+
end
|
12
|
+
|
13
|
+
# And now, let's build the grammar...
|
14
|
+
grammar = builder.grammar
|
15
|
+
|
16
|
+
# Highly simplified tokenizer implementation.
|
17
|
+
def tokenizer(aText, aGrammar)
|
18
|
+
tokens = aText.scan(/\./).map do |dot|
|
19
|
+
terminal = aGrammar.name2symbol['DOT']
|
20
|
+
Rley::Lexical::Token.new(dot, terminal)
|
21
|
+
end
|
22
|
+
|
23
|
+
return tokens
|
24
|
+
end
|
25
|
+
|
26
|
+
input_to_parse = '.' * 500 # Input = 500 consecutive dots
|
27
|
+
|
28
|
+
parser = Rley::Parser::GFGEarleyParser.new(grammar)
|
29
|
+
tokens = tokenizer(input_to_parse, grammar)
|
30
|
+
result = parser.parse(tokens)
|
31
|
+
|
32
|
+
puts "Parsing successful? #{result.success?}"
|
33
|
+
unless result.success?
|
34
|
+
puts result.failure_reason.message
|
35
|
+
exit(1)
|
36
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Purpose: define a grammar with right-recursive rule
|
2
|
+
require 'rley' # Load Rley library
|
3
|
+
|
4
|
+
# Instantiate a builder object that will build the grammar for us
|
5
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
6
|
+
add_terminals('DOT')
|
7
|
+
|
8
|
+
# Grammar with left recursive rule.
|
9
|
+
rule 'r_dots' => []
|
10
|
+
rule 'r_dots' => %w[DOT r_dots]
|
11
|
+
end
|
12
|
+
|
13
|
+
# And now, let's build the grammar...
|
14
|
+
grammar = builder.grammar
|
15
|
+
|
16
|
+
# Highly simplified tokenizer implementation.
|
17
|
+
def tokenizer(aText, aGrammar)
|
18
|
+
tokens = aText.scan(/\./).map do |dot|
|
19
|
+
terminal = aGrammar.name2symbol['DOT']
|
20
|
+
Rley::Lexical::Token.new(dot, terminal)
|
21
|
+
end
|
22
|
+
|
23
|
+
return tokens
|
24
|
+
end
|
25
|
+
|
26
|
+
input_to_parse = '.' * 500 # Input = 500 consecutive dots
|
27
|
+
|
28
|
+
parser = Rley::Parser::GFGEarleyParser.new(grammar)
|
29
|
+
tokens = tokenizer(input_to_parse, grammar)
|
30
|
+
result = parser.parse(tokens) # Takes about 20 seconds on my computer!!!!
|
31
|
+
|
32
|
+
puts "Parsing successful? #{result.success?}"
|
33
|
+
unless result.success?
|
34
|
+
puts result.failure_reason.message
|
35
|
+
exit(1)
|
36
|
+
end
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/gfg/edge.rb
CHANGED
@@ -4,17 +4,28 @@ module Rley # This module is used as a namespace
|
|
4
4
|
# Responsibilities:
|
5
5
|
# - To know the successor vertex
|
6
6
|
class Edge
|
7
|
-
# The destination vertex of the edge .
|
7
|
+
# @return [Vertex] The destination vertex of the edge .
|
8
8
|
attr_reader :successor
|
9
9
|
|
10
|
+
# Construct a directed edge between two given vertices
|
11
|
+
# @param [Vertex]
|
12
|
+
# @param [Vertex]
|
10
13
|
def initialize(thePredecessor, theSuccessor)
|
11
14
|
@successor = theSuccessor
|
12
15
|
thePredecessor.add_edge(self)
|
13
16
|
end
|
14
17
|
|
18
|
+
# @return [String]
|
15
19
|
def to_s()
|
16
20
|
" --> #{successor.label}"
|
17
21
|
end
|
22
|
+
|
23
|
+
# Returns a string containing a human-readable representation of the
|
24
|
+
# production.
|
25
|
+
# @return [String]
|
26
|
+
def inspect()
|
27
|
+
to_s
|
28
|
+
end
|
18
29
|
end # class
|
19
30
|
end # module
|
20
31
|
end # module
|
@@ -35,10 +35,11 @@ module Rley # This module is used as a namespace
|
|
35
35
|
# A GFG has three types of directed edges linking the vertices.
|
36
36
|
# call edge, return edge and scan edge.
|
37
37
|
class GrmFlowGraph
|
38
|
-
# The set of all vertices in the graph
|
38
|
+
# @return [Array<Vertex>] The set of all vertices in the graph
|
39
39
|
attr_reader :vertices
|
40
40
|
|
41
41
|
# The vertex marked as start node of the graph
|
42
|
+
# @return [StartVertex>]
|
42
43
|
attr_reader :start_vertex
|
43
44
|
|
44
45
|
# A Hash with pairs of the form: non-terminal symbol => start node
|
@@ -57,6 +58,25 @@ module Rley # This module is used as a namespace
|
|
57
58
|
|
58
59
|
build_graph(theDottedItems)
|
59
60
|
end
|
61
|
+
|
62
|
+
# Returns a string containing a human-readable representation of the
|
63
|
+
# production.
|
64
|
+
# @return [String]
|
65
|
+
def inspect()
|
66
|
+
result = "#<#{self.class.name}:#{self.object_id}"
|
67
|
+
result << ' @vertices=['
|
68
|
+
list = vertices.map { |v| "#<#{v.selfie}>" }
|
69
|
+
result << list.join(', ')
|
70
|
+
result << '] '
|
71
|
+
edges = []
|
72
|
+
vertices.each do |v|
|
73
|
+
edges << v.edges do |e|
|
74
|
+
result << "#{v.object_id} #{e.inspect}"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
result << "edges=[#{edges.join(",\n ")}]>"
|
78
|
+
return result
|
79
|
+
end
|
60
80
|
|
61
81
|
# Retrieve the vertex with given vertex label.
|
62
82
|
# @param aVertexLabel [String] the label of a vertex from the graph
|
data/lib/rley/gfg/item_vertex.rb
CHANGED
data/lib/rley/gfg/vertex.rb
CHANGED
@@ -28,6 +28,28 @@ module Rley # This module is used as a namespace
|
|
28
28
|
return false # Default implementation
|
29
29
|
end
|
30
30
|
|
31
|
+
# Returns a string containing a human-readable representation of the
|
32
|
+
# vertex.
|
33
|
+
# @return [String]
|
34
|
+
def inspect()
|
35
|
+
result = '#<'
|
36
|
+
result << selfie
|
37
|
+
edges.each { |e| result << e.inspect }
|
38
|
+
result << specific_inspect()
|
39
|
+
result << '>'
|
40
|
+
|
41
|
+
return result
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns a string containing a human-readable representation of the
|
45
|
+
# vertex without the edges.
|
46
|
+
# @return [String]
|
47
|
+
def selfie()
|
48
|
+
result = "#{self.class.name}:#{self.object_id}"
|
49
|
+
result << %Q[ label="#{self.label}"]
|
50
|
+
return result
|
51
|
+
end
|
52
|
+
|
31
53
|
# Retrieve the grammar symbol before the dot.
|
32
54
|
# @return [GrmSymbol, NilClass] The symbol or otherwise nil.
|
33
55
|
def prev_symbol()
|
@@ -49,6 +71,11 @@ module Rley # This module is used as a namespace
|
|
49
71
|
raise StandardError, 'At most one edge accepted' unless edges.empty?
|
50
72
|
return anEdge
|
51
73
|
end
|
74
|
+
|
75
|
+
def specific_inspect()
|
76
|
+
return ''
|
77
|
+
end
|
78
|
+
|
52
79
|
end # class
|
53
80
|
end # module
|
54
81
|
end # module
|
data/lib/rley/lexical/token.rb
CHANGED
@@ -22,6 +22,7 @@ module Rley # This module is used as a namespace
|
|
22
22
|
# @param theLexeme [String] the lexeme (= piece of text from input)
|
23
23
|
# @param aTerminal [Syntax::Terminal] The terminal symbol corresponding to the lexeme.
|
24
24
|
def initialize(theLexeme, aTerminal)
|
25
|
+
raise 'Internal error: nil terminal symbol detected' if aTerminal.nil?
|
25
26
|
@lexeme = theLexeme
|
26
27
|
@terminal = aTerminal
|
27
28
|
end
|
@@ -16,7 +16,7 @@ module Rley # Module used as a namespace
|
|
16
16
|
|
17
17
|
# Returns the result of invoking reason.to_s.
|
18
18
|
def message()
|
19
|
-
return to_s
|
19
|
+
return self.to_s
|
20
20
|
end
|
21
21
|
|
22
22
|
# Return this reason's class name and message
|
@@ -51,6 +51,7 @@ module Rley # Module used as a namespace
|
|
51
51
|
|
52
52
|
def initialize(aPosition, lastToken, expectedTerminals)
|
53
53
|
super(aPosition)
|
54
|
+
raise StandardError, 'Internal error: nil token' if lastToken.nil?
|
54
55
|
@last_token = lastToken.dup
|
55
56
|
@expected_terminals = expectedTerminals.dup
|
56
57
|
end
|
@@ -44,7 +44,14 @@ module Rley # This module is used as a namespace
|
|
44
44
|
end
|
45
45
|
|
46
46
|
# Push a parse entry for the chart entry with given index
|
47
|
+
# @param anIndex [Integer] The rank of the token in the input stream.
|
48
|
+
# @return [ParseEntry] the passed parse entry if it is pushed
|
47
49
|
def push_entry(aVertex, anOrigin, anIndex, _reason)
|
50
|
+
# puts "push_entry:"
|
51
|
+
# puts " aVertex #{aVertex.inspect}"
|
52
|
+
# puts " anOrigin: #{anOrigin}"
|
53
|
+
# puts " anIndex: #{anIndex}"
|
54
|
+
# puts " _reason: #{_reason}"
|
48
55
|
new_entry = ParseEntry.new(aVertex, anOrigin)
|
49
56
|
pushed = self[anIndex].push_entry(new_entry)
|
50
57
|
|
@@ -66,6 +73,13 @@ module Rley # This module is used as a namespace
|
|
66
73
|
|
67
74
|
# Retrieve all the end entries (i.e. of the form
|
68
75
|
last_entries = sets[last_index].entries.select(&:end_entry?)
|
76
|
+
# last_entries.each_with_index do |entry, index|
|
77
|
+
# if entry.nil?
|
78
|
+
# puts "Nil entry at index #{index}"
|
79
|
+
# else
|
80
|
+
# puts entry
|
81
|
+
# end
|
82
|
+
# end
|
69
83
|
|
70
84
|
# ... now find the end vertex for start symbol and with origin at zero.
|
71
85
|
success_entries = last_entries.select do |entry|
|
@@ -28,7 +28,7 @@ module Rley # This module is used as a namespace
|
|
28
28
|
# @return [Hash{ParseEntry => Array<ParseEntry>}]
|
29
29
|
attr_reader(:antecedence)
|
30
30
|
|
31
|
-
# The reason of a parse failure
|
31
|
+
# @return [ErrorReason] The reason of a parse failure
|
32
32
|
attr_reader(:failure_reason)
|
33
33
|
|
34
34
|
# Constructor
|
@@ -163,8 +163,9 @@ module Rley # This module is used as a namespace
|
|
163
163
|
# Return true if the parse was successful (= input tokens
|
164
164
|
# followed the syntax specified by the grammar)
|
165
165
|
def success?()
|
166
|
+
return false if @failure_reason
|
166
167
|
return chart.accepting_entry ? true : false
|
167
|
-
end
|
168
|
+
end
|
168
169
|
|
169
170
|
# Return true if there are more than one complete state
|
170
171
|
# for the same lhs and same origin in any state set.
|
@@ -210,7 +211,7 @@ module Rley # This module is used as a namespace
|
|
210
211
|
end
|
211
212
|
|
212
213
|
# A notification that the parsing reached an end
|
213
|
-
def done
|
214
|
+
def done()
|
214
215
|
# Parse not successful and no reason identified
|
215
216
|
# Assuming that parse failed because of a premature end
|
216
217
|
premature_end unless success? || failure_reason
|
@@ -8,22 +8,39 @@ module Rley # This module is used as a namespace
|
|
8
8
|
# - To know whether the vertex is a start, end or item vertex
|
9
9
|
# - To know the next symbol to expect
|
10
10
|
class ParseEntry
|
11
|
-
# Link to a vertex of the GFG
|
11
|
+
# @return [GFG::Vertex] Link to a vertex of the GFG
|
12
12
|
attr_reader(:vertex)
|
13
13
|
|
14
|
-
# Links to preceding parse entries
|
14
|
+
# @return [Array<ParseEntry>] Links to preceding parse entries
|
15
15
|
attr_reader(:antecedents)
|
16
16
|
|
17
17
|
# the position in the input that matches the beginning of the rhs
|
18
18
|
# of the production.
|
19
|
+
# @return [Integer]
|
19
20
|
attr_reader(:origin)
|
20
21
|
|
22
|
+
# @param aVertex [GFG::Vertex]
|
23
|
+
# @param theOrigin [Integer]
|
21
24
|
def initialize(aVertex, theOrigin)
|
22
25
|
@vertex = valid_vertex(aVertex)
|
23
26
|
@origin = theOrigin
|
24
27
|
@antecedents = []
|
25
28
|
end
|
26
29
|
|
30
|
+
# Returns a string containing a human-readable representation of the
|
31
|
+
# production.
|
32
|
+
# @return [String]
|
33
|
+
def inspect()
|
34
|
+
result = selfie()
|
35
|
+
result << " @antecedents=["
|
36
|
+
antecedents.each do |antec|
|
37
|
+
result << antec.selfie
|
38
|
+
end
|
39
|
+
result << ']>'
|
40
|
+
|
41
|
+
return result
|
42
|
+
end
|
43
|
+
|
27
44
|
# Add a link to an antecedent parse entry
|
28
45
|
def add_antecedent(anAntecedent)
|
29
46
|
antecedents << anAntecedent
|
@@ -75,7 +92,7 @@ module Rley # This module is used as a namespace
|
|
75
92
|
def next_symbol()
|
76
93
|
return vertex.next_symbol
|
77
94
|
end
|
78
|
-
|
95
|
+
|
79
96
|
# Return true if the entry has no antecedent entry
|
80
97
|
def orphan?()
|
81
98
|
return antecedents.empty?
|
@@ -125,9 +142,22 @@ module Rley # This module is used as a namespace
|
|
125
142
|
return vertex.label + " | #{origin}"
|
126
143
|
end
|
127
144
|
|
145
|
+
protected
|
146
|
+
|
147
|
+
# Returns a human-readable and partial representation of itself.
|
148
|
+
# @return [String]
|
149
|
+
def selfie()
|
150
|
+
result = "#<#{self.class.name}:#{self.object_id}"
|
151
|
+
result << " @vertex=<#{vertex.class.name}:#{vertex.object_id}"
|
152
|
+
result << " label=#{vertex.label}>"
|
153
|
+
result << " @origin=#{origin}"
|
154
|
+
|
155
|
+
return result
|
156
|
+
end
|
128
157
|
|
129
158
|
private
|
130
159
|
|
160
|
+
|
131
161
|
# Return the validated GFG vertex
|
132
162
|
def valid_vertex(aVertex)
|
133
163
|
raise StandardError, 'GFG vertex cannot be nil' if aVertex.nil?
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require 'forwardable' # Delegation
|
1
|
+
require 'forwardable' # For the Delegation pattern
|
2
2
|
|
3
3
|
require_relative '../syntax/terminal'
|
4
4
|
require_relative '../syntax/non_terminal'
|
@@ -11,14 +11,26 @@ module Rley # This module is used as a namespace
|
|
11
11
|
extend Forwardable
|
12
12
|
def_delegators :entries, :empty?, :size, :first, :last, :pop, :each
|
13
13
|
|
14
|
-
# The set of parse entries
|
14
|
+
# @return [Array<ParseEntry>] The set of parse entries
|
15
15
|
attr_reader :entries
|
16
16
|
|
17
|
+
# Constructor.
|
17
18
|
def initialize()
|
18
19
|
@entries = []
|
19
20
|
@entries4term = Hash.new { |hash, key| hash[key] = [] }
|
20
21
|
@entries4n_term = Hash.new { |hash, key| hash[key] = [] }
|
21
22
|
end
|
23
|
+
|
24
|
+
# Returns a string containing a human-readable representation of the
|
25
|
+
# set of parse entries.
|
26
|
+
# @return [String]
|
27
|
+
def inspect()
|
28
|
+
result = "#<#{self.class.name}:#{self.object_id}"
|
29
|
+
result << ' @entries=['
|
30
|
+
entries.each { |e| result << e.inspect }
|
31
|
+
result << ']>'
|
32
|
+
return result
|
33
|
+
end
|
22
34
|
|
23
35
|
# Access the entry at given position
|
24
36
|
def [](index)
|