rley 0.5.10 → 0.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +15 -0
- data/LICENSE.txt +1 -1
- data/README.md +2 -1
- data/appveyor.yml +6 -5
- data/examples/NLP/engtagger.rb +176 -0
- data/examples/general/SRL/lib/ast_builder.rb +217 -21
- data/examples/general/SRL/lib/grammar.rb +33 -5
- data/examples/general/SRL/lib/regex/alternation.rb +30 -0
- data/examples/general/SRL/lib/regex/char_class.rb +28 -22
- data/examples/general/SRL/lib/regex/char_shorthand.rb +50 -0
- data/examples/general/SRL/lib/regex/character.rb +5 -3
- data/examples/general/SRL/lib/regex/concatenation.rb +32 -0
- data/examples/general/SRL/lib/regex/non_capturing_group.rb +29 -0
- data/examples/general/SRL/lib/regex/wildcard.rb +26 -0
- data/examples/general/SRL/lib/regex_repr.rb +5 -0
- data/examples/general/SRL/lib/tokenizer.rb +28 -3
- data/examples/general/SRL/spec/integration_spec.rb +151 -8
- data/examples/general/SRL/spec/tokenizer_spec.rb +12 -0
- data/examples/general/left.rb +36 -0
- data/examples/general/right.rb +36 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/gfg/edge.rb +12 -1
- data/lib/rley/gfg/grm_flow_graph.rb +21 -1
- data/lib/rley/gfg/item_vertex.rb +1 -1
- data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
- data/lib/rley/gfg/start_vertex.rb +1 -0
- data/lib/rley/gfg/vertex.rb +27 -0
- data/lib/rley/lexical/token.rb +1 -0
- data/lib/rley/parser/error_reason.rb +2 -1
- data/lib/rley/parser/gfg_chart.rb +14 -0
- data/lib/rley/parser/gfg_earley_parser.rb +0 -1
- data/lib/rley/parser/gfg_parsing.rb +4 -3
- data/lib/rley/parser/parse_entry.rb +33 -3
- data/lib/rley/parser/parse_entry_set.rb +14 -2
- data/lib/rley/parser/parse_tree_builder.rb +1 -1
- data/lib/rley/parser/parse_walker_factory.rb +0 -1
- data/lib/rley/syntax/grm_symbol.rb +2 -0
- data/lib/rley/syntax/production.rb +15 -3
- data/lib/rley/syntax/symbol_seq.rb +16 -1
- data/spec/rley/gfg/end_vertex_spec.rb +9 -1
- data/spec/rley/gfg/grm_flow_graph_spec.rb +9 -0
- data/spec/rley/gfg/item_vertex_spec.rb +9 -0
- data/spec/rley/gfg/start_vertex_spec.rb +9 -1
- data/spec/rley/parser/gfg_parsing_spec.rb +0 -1
- data/spec/rley/parser/parse_entry_set_spec.rb +15 -0
- data/spec/rley/parser/parse_entry_spec.rb +24 -13
- data/spec/rley/parser/parse_tracer_spec.rb +1 -1
- data/spec/rley/syntax/production_spec.rb +10 -0
- data/spec/rley/syntax/symbol_seq_spec.rb +5 -0
- metadata +10 -2
@@ -62,6 +62,18 @@ module SRL
|
|
62
62
|
end
|
63
63
|
end # context
|
64
64
|
|
65
|
+
context 'String literal tokenization:' do
|
66
|
+
it "should recognize 'literally ...'" do
|
67
|
+
input = 'literally "hello"'
|
68
|
+
subject.scanner.string = input
|
69
|
+
expectations = [
|
70
|
+
['LITERALLY', 'literally'],
|
71
|
+
['STRING_LIT', 'hello']
|
72
|
+
]
|
73
|
+
match_expectations(subject, expectations)
|
74
|
+
end
|
75
|
+
end # context
|
76
|
+
|
65
77
|
context 'Character range tokenization:' do
|
66
78
|
it "should recognize 'letter from ... to ...'" do
|
67
79
|
input = 'letter a to f'
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Purpose: define a grammar with left-recursive rule
|
2
|
+
require 'rley' # Load Rley library
|
3
|
+
|
4
|
+
# Instantiate a builder object that will build the grammar for us
|
5
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
6
|
+
add_terminals('DOT')
|
7
|
+
|
8
|
+
# Grammar with left recursive rule.
|
9
|
+
rule 'l_dots' => []
|
10
|
+
rule 'l_dots' => %w[l_dots DOT]
|
11
|
+
end
|
12
|
+
|
13
|
+
# And now, let's build the grammar...
|
14
|
+
grammar = builder.grammar
|
15
|
+
|
16
|
+
# Highly simplified tokenizer implementation.
|
17
|
+
def tokenizer(aText, aGrammar)
|
18
|
+
tokens = aText.scan(/\./).map do |dot|
|
19
|
+
terminal = aGrammar.name2symbol['DOT']
|
20
|
+
Rley::Lexical::Token.new(dot, terminal)
|
21
|
+
end
|
22
|
+
|
23
|
+
return tokens
|
24
|
+
end
|
25
|
+
|
26
|
+
input_to_parse = '.' * 500 # Input = 500 consecutive dots
|
27
|
+
|
28
|
+
parser = Rley::Parser::GFGEarleyParser.new(grammar)
|
29
|
+
tokens = tokenizer(input_to_parse, grammar)
|
30
|
+
result = parser.parse(tokens)
|
31
|
+
|
32
|
+
puts "Parsing successful? #{result.success?}"
|
33
|
+
unless result.success?
|
34
|
+
puts result.failure_reason.message
|
35
|
+
exit(1)
|
36
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
# Purpose: define a grammar with right-recursive rule
|
2
|
+
require 'rley' # Load Rley library
|
3
|
+
|
4
|
+
# Instantiate a builder object that will build the grammar for us
|
5
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
6
|
+
add_terminals('DOT')
|
7
|
+
|
8
|
+
# Grammar with left recursive rule.
|
9
|
+
rule 'r_dots' => []
|
10
|
+
rule 'r_dots' => %w[DOT r_dots]
|
11
|
+
end
|
12
|
+
|
13
|
+
# And now, let's build the grammar...
|
14
|
+
grammar = builder.grammar
|
15
|
+
|
16
|
+
# Highly simplified tokenizer implementation.
|
17
|
+
def tokenizer(aText, aGrammar)
|
18
|
+
tokens = aText.scan(/\./).map do |dot|
|
19
|
+
terminal = aGrammar.name2symbol['DOT']
|
20
|
+
Rley::Lexical::Token.new(dot, terminal)
|
21
|
+
end
|
22
|
+
|
23
|
+
return tokens
|
24
|
+
end
|
25
|
+
|
26
|
+
input_to_parse = '.' * 500 # Input = 500 consecutive dots
|
27
|
+
|
28
|
+
parser = Rley::Parser::GFGEarleyParser.new(grammar)
|
29
|
+
tokens = tokenizer(input_to_parse, grammar)
|
30
|
+
result = parser.parse(tokens) # Takes about 20 seconds on my computer!!!!
|
31
|
+
|
32
|
+
puts "Parsing successful? #{result.success?}"
|
33
|
+
unless result.success?
|
34
|
+
puts result.failure_reason.message
|
35
|
+
exit(1)
|
36
|
+
end
|
data/lib/rley/constants.rb
CHANGED
data/lib/rley/gfg/edge.rb
CHANGED
@@ -4,17 +4,28 @@ module Rley # This module is used as a namespace
|
|
4
4
|
# Responsibilities:
|
5
5
|
# - To know the successor vertex
|
6
6
|
class Edge
|
7
|
-
# The destination vertex of the edge .
|
7
|
+
# @return [Vertex] The destination vertex of the edge .
|
8
8
|
attr_reader :successor
|
9
9
|
|
10
|
+
# Construct a directed edge between two given vertices
|
11
|
+
# @param [Vertex]
|
12
|
+
# @param [Vertex]
|
10
13
|
def initialize(thePredecessor, theSuccessor)
|
11
14
|
@successor = theSuccessor
|
12
15
|
thePredecessor.add_edge(self)
|
13
16
|
end
|
14
17
|
|
18
|
+
# @return [String]
|
15
19
|
def to_s()
|
16
20
|
" --> #{successor.label}"
|
17
21
|
end
|
22
|
+
|
23
|
+
# Returns a string containing a human-readable representation of the
|
24
|
+
# production.
|
25
|
+
# @return [String]
|
26
|
+
def inspect()
|
27
|
+
to_s
|
28
|
+
end
|
18
29
|
end # class
|
19
30
|
end # module
|
20
31
|
end # module
|
@@ -35,10 +35,11 @@ module Rley # This module is used as a namespace
|
|
35
35
|
# A GFG has three types of directed edges linking the vertices.
|
36
36
|
# call edge, return edge and scan edge.
|
37
37
|
class GrmFlowGraph
|
38
|
-
# The set of all vertices in the graph
|
38
|
+
# @return [Array<Vertex>] The set of all vertices in the graph
|
39
39
|
attr_reader :vertices
|
40
40
|
|
41
41
|
# The vertex marked as start node of the graph
|
42
|
+
# @return [StartVertex>]
|
42
43
|
attr_reader :start_vertex
|
43
44
|
|
44
45
|
# A Hash with pairs of the form: non-terminal symbol => start node
|
@@ -57,6 +58,25 @@ module Rley # This module is used as a namespace
|
|
57
58
|
|
58
59
|
build_graph(theDottedItems)
|
59
60
|
end
|
61
|
+
|
62
|
+
# Returns a string containing a human-readable representation of the
|
63
|
+
# production.
|
64
|
+
# @return [String]
|
65
|
+
def inspect()
|
66
|
+
result = "#<#{self.class.name}:#{self.object_id}"
|
67
|
+
result << ' @vertices=['
|
68
|
+
list = vertices.map { |v| "#<#{v.selfie}>" }
|
69
|
+
result << list.join(', ')
|
70
|
+
result << '] '
|
71
|
+
edges = []
|
72
|
+
vertices.each do |v|
|
73
|
+
edges << v.edges do |e|
|
74
|
+
result << "#{v.object_id} #{e.inspect}"
|
75
|
+
end
|
76
|
+
end
|
77
|
+
result << "edges=[#{edges.join(",\n ")}]>"
|
78
|
+
return result
|
79
|
+
end
|
60
80
|
|
61
81
|
# Retrieve the vertex with given vertex label.
|
62
82
|
# @param aVertexLabel [String] the label of a vertex from the graph
|
data/lib/rley/gfg/item_vertex.rb
CHANGED
data/lib/rley/gfg/vertex.rb
CHANGED
@@ -28,6 +28,28 @@ module Rley # This module is used as a namespace
|
|
28
28
|
return false # Default implementation
|
29
29
|
end
|
30
30
|
|
31
|
+
# Returns a string containing a human-readable representation of the
|
32
|
+
# vertex.
|
33
|
+
# @return [String]
|
34
|
+
def inspect()
|
35
|
+
result = '#<'
|
36
|
+
result << selfie
|
37
|
+
edges.each { |e| result << e.inspect }
|
38
|
+
result << specific_inspect()
|
39
|
+
result << '>'
|
40
|
+
|
41
|
+
return result
|
42
|
+
end
|
43
|
+
|
44
|
+
# Returns a string containing a human-readable representation of the
|
45
|
+
# vertex without the edges.
|
46
|
+
# @return [String]
|
47
|
+
def selfie()
|
48
|
+
result = "#{self.class.name}:#{self.object_id}"
|
49
|
+
result << %Q[ label="#{self.label}"]
|
50
|
+
return result
|
51
|
+
end
|
52
|
+
|
31
53
|
# Retrieve the grammar symbol before the dot.
|
32
54
|
# @return [GrmSymbol, NilClass] The symbol or otherwise nil.
|
33
55
|
def prev_symbol()
|
@@ -49,6 +71,11 @@ module Rley # This module is used as a namespace
|
|
49
71
|
raise StandardError, 'At most one edge accepted' unless edges.empty?
|
50
72
|
return anEdge
|
51
73
|
end
|
74
|
+
|
75
|
+
def specific_inspect()
|
76
|
+
return ''
|
77
|
+
end
|
78
|
+
|
52
79
|
end # class
|
53
80
|
end # module
|
54
81
|
end # module
|
data/lib/rley/lexical/token.rb
CHANGED
@@ -22,6 +22,7 @@ module Rley # This module is used as a namespace
|
|
22
22
|
# @param theLexeme [String] the lexeme (= piece of text from input)
|
23
23
|
# @param aTerminal [Syntax::Terminal] The terminal symbol corresponding to the lexeme.
|
24
24
|
def initialize(theLexeme, aTerminal)
|
25
|
+
raise 'Internal error: nil terminal symbol detected' if aTerminal.nil?
|
25
26
|
@lexeme = theLexeme
|
26
27
|
@terminal = aTerminal
|
27
28
|
end
|
@@ -16,7 +16,7 @@ module Rley # Module used as a namespace
|
|
16
16
|
|
17
17
|
# Returns the result of invoking reason.to_s.
|
18
18
|
def message()
|
19
|
-
return to_s
|
19
|
+
return self.to_s
|
20
20
|
end
|
21
21
|
|
22
22
|
# Return this reason's class name and message
|
@@ -51,6 +51,7 @@ module Rley # Module used as a namespace
|
|
51
51
|
|
52
52
|
def initialize(aPosition, lastToken, expectedTerminals)
|
53
53
|
super(aPosition)
|
54
|
+
raise StandardError, 'Internal error: nil token' if lastToken.nil?
|
54
55
|
@last_token = lastToken.dup
|
55
56
|
@expected_terminals = expectedTerminals.dup
|
56
57
|
end
|
@@ -44,7 +44,14 @@ module Rley # This module is used as a namespace
|
|
44
44
|
end
|
45
45
|
|
46
46
|
# Push a parse entry for the chart entry with given index
|
47
|
+
# @param anIndex [Integer] The rank of the token in the input stream.
|
48
|
+
# @return [ParseEntry] the passed parse entry if it is pushed
|
47
49
|
def push_entry(aVertex, anOrigin, anIndex, _reason)
|
50
|
+
# puts "push_entry:"
|
51
|
+
# puts " aVertex #{aVertex.inspect}"
|
52
|
+
# puts " anOrigin: #{anOrigin}"
|
53
|
+
# puts " anIndex: #{anIndex}"
|
54
|
+
# puts " _reason: #{_reason}"
|
48
55
|
new_entry = ParseEntry.new(aVertex, anOrigin)
|
49
56
|
pushed = self[anIndex].push_entry(new_entry)
|
50
57
|
|
@@ -66,6 +73,13 @@ module Rley # This module is used as a namespace
|
|
66
73
|
|
67
74
|
# Retrieve all the end entries (i.e. of the form
|
68
75
|
last_entries = sets[last_index].entries.select(&:end_entry?)
|
76
|
+
# last_entries.each_with_index do |entry, index|
|
77
|
+
# if entry.nil?
|
78
|
+
# puts "Nil entry at index #{index}"
|
79
|
+
# else
|
80
|
+
# puts entry
|
81
|
+
# end
|
82
|
+
# end
|
69
83
|
|
70
84
|
# ... now find the end vertex for start symbol and with origin at zero.
|
71
85
|
success_entries = last_entries.select do |entry|
|
@@ -28,7 +28,7 @@ module Rley # This module is used as a namespace
|
|
28
28
|
# @return [Hash{ParseEntry => Array<ParseEntry>}]
|
29
29
|
attr_reader(:antecedence)
|
30
30
|
|
31
|
-
# The reason of a parse failure
|
31
|
+
# @return [ErrorReason] The reason of a parse failure
|
32
32
|
attr_reader(:failure_reason)
|
33
33
|
|
34
34
|
# Constructor
|
@@ -163,8 +163,9 @@ module Rley # This module is used as a namespace
|
|
163
163
|
# Return true if the parse was successful (= input tokens
|
164
164
|
# followed the syntax specified by the grammar)
|
165
165
|
def success?()
|
166
|
+
return false if @failure_reason
|
166
167
|
return chart.accepting_entry ? true : false
|
167
|
-
end
|
168
|
+
end
|
168
169
|
|
169
170
|
# Return true if there are more than one complete state
|
170
171
|
# for the same lhs and same origin in any state set.
|
@@ -210,7 +211,7 @@ module Rley # This module is used as a namespace
|
|
210
211
|
end
|
211
212
|
|
212
213
|
# A notification that the parsing reached an end
|
213
|
-
def done
|
214
|
+
def done()
|
214
215
|
# Parse not successful and no reason identified
|
215
216
|
# Assuming that parse failed because of a premature end
|
216
217
|
premature_end unless success? || failure_reason
|
@@ -8,22 +8,39 @@ module Rley # This module is used as a namespace
|
|
8
8
|
# - To know whether the vertex is a start, end or item vertex
|
9
9
|
# - To know the next symbol to expect
|
10
10
|
class ParseEntry
|
11
|
-
# Link to a vertex of the GFG
|
11
|
+
# @return [GFG::Vertex] Link to a vertex of the GFG
|
12
12
|
attr_reader(:vertex)
|
13
13
|
|
14
|
-
# Links to preceding parse entries
|
14
|
+
# @return [Array<ParseEntry>] Links to preceding parse entries
|
15
15
|
attr_reader(:antecedents)
|
16
16
|
|
17
17
|
# the position in the input that matches the beginning of the rhs
|
18
18
|
# of the production.
|
19
|
+
# @return [Integer]
|
19
20
|
attr_reader(:origin)
|
20
21
|
|
22
|
+
# @param aVertex [GFG::Vertex]
|
23
|
+
# @param theOrigin [Integer]
|
21
24
|
def initialize(aVertex, theOrigin)
|
22
25
|
@vertex = valid_vertex(aVertex)
|
23
26
|
@origin = theOrigin
|
24
27
|
@antecedents = []
|
25
28
|
end
|
26
29
|
|
30
|
+
# Returns a string containing a human-readable representation of the
|
31
|
+
# production.
|
32
|
+
# @return [String]
|
33
|
+
def inspect()
|
34
|
+
result = selfie()
|
35
|
+
result << " @antecedents=["
|
36
|
+
antecedents.each do |antec|
|
37
|
+
result << antec.selfie
|
38
|
+
end
|
39
|
+
result << ']>'
|
40
|
+
|
41
|
+
return result
|
42
|
+
end
|
43
|
+
|
27
44
|
# Add a link to an antecedent parse entry
|
28
45
|
def add_antecedent(anAntecedent)
|
29
46
|
antecedents << anAntecedent
|
@@ -75,7 +92,7 @@ module Rley # This module is used as a namespace
|
|
75
92
|
def next_symbol()
|
76
93
|
return vertex.next_symbol
|
77
94
|
end
|
78
|
-
|
95
|
+
|
79
96
|
# Return true if the entry has no antecedent entry
|
80
97
|
def orphan?()
|
81
98
|
return antecedents.empty?
|
@@ -125,9 +142,22 @@ module Rley # This module is used as a namespace
|
|
125
142
|
return vertex.label + " | #{origin}"
|
126
143
|
end
|
127
144
|
|
145
|
+
protected
|
146
|
+
|
147
|
+
# Returns a human-readable and partial representation of itself.
|
148
|
+
# @return [String]
|
149
|
+
def selfie()
|
150
|
+
result = "#<#{self.class.name}:#{self.object_id}"
|
151
|
+
result << " @vertex=<#{vertex.class.name}:#{vertex.object_id}"
|
152
|
+
result << " label=#{vertex.label}>"
|
153
|
+
result << " @origin=#{origin}"
|
154
|
+
|
155
|
+
return result
|
156
|
+
end
|
128
157
|
|
129
158
|
private
|
130
159
|
|
160
|
+
|
131
161
|
# Return the validated GFG vertex
|
132
162
|
def valid_vertex(aVertex)
|
133
163
|
raise StandardError, 'GFG vertex cannot be nil' if aVertex.nil?
|
@@ -1,4 +1,4 @@
|
|
1
|
-
require 'forwardable' # Delegation
|
1
|
+
require 'forwardable' # For the Delegation pattern
|
2
2
|
|
3
3
|
require_relative '../syntax/terminal'
|
4
4
|
require_relative '../syntax/non_terminal'
|
@@ -11,14 +11,26 @@ module Rley # This module is used as a namespace
|
|
11
11
|
extend Forwardable
|
12
12
|
def_delegators :entries, :empty?, :size, :first, :last, :pop, :each
|
13
13
|
|
14
|
-
# The set of parse entries
|
14
|
+
# @return [Array<ParseEntry>] The set of parse entries
|
15
15
|
attr_reader :entries
|
16
16
|
|
17
|
+
# Constructor.
|
17
18
|
def initialize()
|
18
19
|
@entries = []
|
19
20
|
@entries4term = Hash.new { |hash, key| hash[key] = [] }
|
20
21
|
@entries4n_term = Hash.new { |hash, key| hash[key] = [] }
|
21
22
|
end
|
23
|
+
|
24
|
+
# Returns a string containing a human-readable representation of the
|
25
|
+
# set of parse entries.
|
26
|
+
# @return [String]
|
27
|
+
def inspect()
|
28
|
+
result = "#<#{self.class.name}:#{self.object_id}"
|
29
|
+
result << ' @entries=['
|
30
|
+
entries.each { |e| result << e.inspect }
|
31
|
+
result << ']>'
|
32
|
+
return result
|
33
|
+
end
|
22
34
|
|
23
35
|
# Access the entry at given position
|
24
36
|
def [](index)
|