rley 0.5.10 → 0.5.11

Sign up to get free protection for your applications and to get access to all the features.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +2 -1
  5. data/appveyor.yml +6 -5
  6. data/examples/NLP/engtagger.rb +176 -0
  7. data/examples/general/SRL/lib/ast_builder.rb +217 -21
  8. data/examples/general/SRL/lib/grammar.rb +33 -5
  9. data/examples/general/SRL/lib/regex/alternation.rb +30 -0
  10. data/examples/general/SRL/lib/regex/char_class.rb +28 -22
  11. data/examples/general/SRL/lib/regex/char_shorthand.rb +50 -0
  12. data/examples/general/SRL/lib/regex/character.rb +5 -3
  13. data/examples/general/SRL/lib/regex/concatenation.rb +32 -0
  14. data/examples/general/SRL/lib/regex/non_capturing_group.rb +29 -0
  15. data/examples/general/SRL/lib/regex/wildcard.rb +26 -0
  16. data/examples/general/SRL/lib/regex_repr.rb +5 -0
  17. data/examples/general/SRL/lib/tokenizer.rb +28 -3
  18. data/examples/general/SRL/spec/integration_spec.rb +151 -8
  19. data/examples/general/SRL/spec/tokenizer_spec.rb +12 -0
  20. data/examples/general/left.rb +36 -0
  21. data/examples/general/right.rb +36 -0
  22. data/lib/rley/constants.rb +1 -1
  23. data/lib/rley/gfg/edge.rb +12 -1
  24. data/lib/rley/gfg/grm_flow_graph.rb +21 -1
  25. data/lib/rley/gfg/item_vertex.rb +1 -1
  26. data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
  27. data/lib/rley/gfg/start_vertex.rb +1 -0
  28. data/lib/rley/gfg/vertex.rb +27 -0
  29. data/lib/rley/lexical/token.rb +1 -0
  30. data/lib/rley/parser/error_reason.rb +2 -1
  31. data/lib/rley/parser/gfg_chart.rb +14 -0
  32. data/lib/rley/parser/gfg_earley_parser.rb +0 -1
  33. data/lib/rley/parser/gfg_parsing.rb +4 -3
  34. data/lib/rley/parser/parse_entry.rb +33 -3
  35. data/lib/rley/parser/parse_entry_set.rb +14 -2
  36. data/lib/rley/parser/parse_tree_builder.rb +1 -1
  37. data/lib/rley/parser/parse_walker_factory.rb +0 -1
  38. data/lib/rley/syntax/grm_symbol.rb +2 -0
  39. data/lib/rley/syntax/production.rb +15 -3
  40. data/lib/rley/syntax/symbol_seq.rb +16 -1
  41. data/spec/rley/gfg/end_vertex_spec.rb +9 -1
  42. data/spec/rley/gfg/grm_flow_graph_spec.rb +9 -0
  43. data/spec/rley/gfg/item_vertex_spec.rb +9 -0
  44. data/spec/rley/gfg/start_vertex_spec.rb +9 -1
  45. data/spec/rley/parser/gfg_parsing_spec.rb +0 -1
  46. data/spec/rley/parser/parse_entry_set_spec.rb +15 -0
  47. data/spec/rley/parser/parse_entry_spec.rb +24 -13
  48. data/spec/rley/parser/parse_tracer_spec.rb +1 -1
  49. data/spec/rley/syntax/production_spec.rb +10 -0
  50. data/spec/rley/syntax/symbol_seq_spec.rb +5 -0
  51. metadata +10 -2
@@ -62,6 +62,18 @@ module SRL
62
62
  end
63
63
  end # context
64
64
 
65
+ context 'String literal tokenization:' do
66
+ it "should recognize 'literally ...'" do
67
+ input = 'literally "hello"'
68
+ subject.scanner.string = input
69
+ expectations = [
70
+ ['LITERALLY', 'literally'],
71
+ ['STRING_LIT', 'hello']
72
+ ]
73
+ match_expectations(subject, expectations)
74
+ end
75
+ end # context
76
+
65
77
  context 'Character range tokenization:' do
66
78
  it "should recognize 'letter from ... to ...'" do
67
79
  input = 'letter a to f'
@@ -0,0 +1,36 @@
1
+ # Purpose: define a grammar with left-recursive rule
2
+ require 'rley' # Load Rley library
3
+
4
+ # Instantiate a builder object that will build the grammar for us
5
+ builder = Rley::Syntax::GrammarBuilder.new do
6
+ add_terminals('DOT')
7
+
8
+ # Grammar with left recursive rule.
9
+ rule 'l_dots' => []
10
+ rule 'l_dots' => %w[l_dots DOT]
11
+ end
12
+
13
+ # And now, let's build the grammar...
14
+ grammar = builder.grammar
15
+
16
+ # Highly simplified tokenizer implementation.
17
+ def tokenizer(aText, aGrammar)
18
+ tokens = aText.scan(/\./).map do |dot|
19
+ terminal = aGrammar.name2symbol['DOT']
20
+ Rley::Lexical::Token.new(dot, terminal)
21
+ end
22
+
23
+ return tokens
24
+ end
25
+
26
+ input_to_parse = '.' * 500 # Input = 500 consecutive dots
27
+
28
+ parser = Rley::Parser::GFGEarleyParser.new(grammar)
29
+ tokens = tokenizer(input_to_parse, grammar)
30
+ result = parser.parse(tokens)
31
+
32
+ puts "Parsing successful? #{result.success?}"
33
+ unless result.success?
34
+ puts result.failure_reason.message
35
+ exit(1)
36
+ end
@@ -0,0 +1,36 @@
1
+ # Purpose: define a grammar with right-recursive rule
2
+ require 'rley' # Load Rley library
3
+
4
+ # Instantiate a builder object that will build the grammar for us
5
+ builder = Rley::Syntax::GrammarBuilder.new do
6
+ add_terminals('DOT')
7
+
8
+ # Grammar with left recursive rule.
9
+ rule 'r_dots' => []
10
+ rule 'r_dots' => %w[DOT r_dots]
11
+ end
12
+
13
+ # And now, let's build the grammar...
14
+ grammar = builder.grammar
15
+
16
+ # Highly simplified tokenizer implementation.
17
+ def tokenizer(aText, aGrammar)
18
+ tokens = aText.scan(/\./).map do |dot|
19
+ terminal = aGrammar.name2symbol['DOT']
20
+ Rley::Lexical::Token.new(dot, terminal)
21
+ end
22
+
23
+ return tokens
24
+ end
25
+
26
+ input_to_parse = '.' * 500 # Input = 500 consecutive dots
27
+
28
+ parser = Rley::Parser::GFGEarleyParser.new(grammar)
29
+ tokens = tokenizer(input_to_parse, grammar)
30
+ result = parser.parse(tokens) # Takes about 20 seconds on my computer!!!!
31
+
32
+ puts "Parsing successful? #{result.success?}"
33
+ unless result.success?
34
+ puts result.failure_reason.message
35
+ exit(1)
36
+ end
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.5.10'.freeze
6
+ Version = '0.5.11'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -4,17 +4,28 @@ module Rley # This module is used as a namespace
4
4
  # Responsibilities:
5
5
  # - To know the successor vertex
6
6
  class Edge
7
- # The destination vertex of the edge .
7
+ # @return [Vertex] The destination vertex of the edge .
8
8
  attr_reader :successor
9
9
 
10
+ # Construct a directed edge between two given vertices
11
+ # @param [Vertex]
12
+ # @param [Vertex]
10
13
  def initialize(thePredecessor, theSuccessor)
11
14
  @successor = theSuccessor
12
15
  thePredecessor.add_edge(self)
13
16
  end
14
17
 
18
+ # @return [String]
15
19
  def to_s()
16
20
  " --> #{successor.label}"
17
21
  end
22
+
23
+ # Returns a string containing a human-readable representation of the
24
+ # production.
25
+ # @return [String]
26
+ def inspect()
27
+ to_s
28
+ end
18
29
  end # class
19
30
  end # module
20
31
  end # module
@@ -35,10 +35,11 @@ module Rley # This module is used as a namespace
35
35
  # A GFG has three types of directed edges linking the vertices.
36
36
  # call edge, return edge and scan edge.
37
37
  class GrmFlowGraph
38
- # The set of all vertices in the graph
38
+ # @return [Array<Vertex>] The set of all vertices in the graph
39
39
  attr_reader :vertices
40
40
 
41
41
  # The vertex marked as start node of the graph
42
+ # @return [StartVertex>]
42
43
  attr_reader :start_vertex
43
44
 
44
45
  # A Hash with pairs of the form: non-terminal symbol => start node
@@ -57,6 +58,25 @@ module Rley # This module is used as a namespace
57
58
 
58
59
  build_graph(theDottedItems)
59
60
  end
61
+
62
+ # Returns a string containing a human-readable representation of the
63
+ # production.
64
+ # @return [String]
65
+ def inspect()
66
+ result = "#<#{self.class.name}:#{self.object_id}"
67
+ result << ' @vertices=['
68
+ list = vertices.map { |v| "#<#{v.selfie}>" }
69
+ result << list.join(', ')
70
+ result << '] '
71
+ edges = []
72
+ vertices.each do |v|
73
+ edges << v.edges do |e|
74
+ result << "#{v.object_id} #{e.inspect}"
75
+ end
76
+ end
77
+ result << "edges=[#{edges.join(",\n ")}]>"
78
+ return result
79
+ end
60
80
 
61
81
  # Retrieve the vertex with given vertex label.
62
82
  # @param aVertexLabel [String] the label of a vertex from the graph
@@ -74,7 +74,7 @@ module Rley # This module is used as a namespace
74
74
  # @return [Syntax::GrmSymbol] The non-terminal symbol at left side of production.
75
75
  def lhs()
76
76
  return dotted_item.lhs
77
- end
77
+ end
78
78
  end # class
79
79
  end # module
80
80
  end # module
@@ -26,7 +26,7 @@ module Rley # This module is used as a namespace
26
26
  # A start vertex may accept an indegree and outdegree greater than one
27
27
  def check_add_edge(anEdge)
28
28
  return anEdge
29
- end
29
+ end
30
30
  end # class
31
31
  end # module
32
32
  end # module
@@ -12,6 +12,7 @@ module Rley # This module is used as a namespace
12
12
  super(aNonTerminal)
13
13
  end
14
14
 
15
+ # @return [String]
15
16
  def label()
16
17
  return ".#{non_terminal}"
17
18
  end
@@ -28,6 +28,28 @@ module Rley # This module is used as a namespace
28
28
  return false # Default implementation
29
29
  end
30
30
 
31
+ # Returns a string containing a human-readable representation of the
32
+ # vertex.
33
+ # @return [String]
34
+ def inspect()
35
+ result = '#<'
36
+ result << selfie
37
+ edges.each { |e| result << e.inspect }
38
+ result << specific_inspect()
39
+ result << '>'
40
+
41
+ return result
42
+ end
43
+
44
+ # Returns a string containing a human-readable representation of the
45
+ # vertex without the edges.
46
+ # @return [String]
47
+ def selfie()
48
+ result = "#{self.class.name}:#{self.object_id}"
49
+ result << %Q[ label="#{self.label}"]
50
+ return result
51
+ end
52
+
31
53
  # Retrieve the grammar symbol before the dot.
32
54
  # @return [GrmSymbol, NilClass] The symbol or otherwise nil.
33
55
  def prev_symbol()
@@ -49,6 +71,11 @@ module Rley # This module is used as a namespace
49
71
  raise StandardError, 'At most one edge accepted' unless edges.empty?
50
72
  return anEdge
51
73
  end
74
+
75
+ def specific_inspect()
76
+ return ''
77
+ end
78
+
52
79
  end # class
53
80
  end # module
54
81
  end # module
@@ -22,6 +22,7 @@ module Rley # This module is used as a namespace
22
22
  # @param theLexeme [String] the lexeme (= piece of text from input)
23
23
  # @param aTerminal [Syntax::Terminal] The terminal symbol corresponding to the lexeme.
24
24
  def initialize(theLexeme, aTerminal)
25
+ raise 'Internal error: nil terminal symbol detected' if aTerminal.nil?
25
26
  @lexeme = theLexeme
26
27
  @terminal = aTerminal
27
28
  end
@@ -16,7 +16,7 @@ module Rley # Module used as a namespace
16
16
 
17
17
  # Returns the result of invoking reason.to_s.
18
18
  def message()
19
- return to_s
19
+ return self.to_s
20
20
  end
21
21
 
22
22
  # Return this reason's class name and message
@@ -51,6 +51,7 @@ module Rley # Module used as a namespace
51
51
 
52
52
  def initialize(aPosition, lastToken, expectedTerminals)
53
53
  super(aPosition)
54
+ raise StandardError, 'Internal error: nil token' if lastToken.nil?
54
55
  @last_token = lastToken.dup
55
56
  @expected_terminals = expectedTerminals.dup
56
57
  end
@@ -44,7 +44,14 @@ module Rley # This module is used as a namespace
44
44
  end
45
45
 
46
46
  # Push a parse entry for the chart entry with given index
47
+ # @param anIndex [Integer] The rank of the token in the input stream.
48
+ # @return [ParseEntry] the passed parse entry if it is pushed
47
49
  def push_entry(aVertex, anOrigin, anIndex, _reason)
50
+ # puts "push_entry:"
51
+ # puts " aVertex #{aVertex.inspect}"
52
+ # puts " anOrigin: #{anOrigin}"
53
+ # puts " anIndex: #{anIndex}"
54
+ # puts " _reason: #{_reason}"
48
55
  new_entry = ParseEntry.new(aVertex, anOrigin)
49
56
  pushed = self[anIndex].push_entry(new_entry)
50
57
 
@@ -66,6 +73,13 @@ module Rley # This module is used as a namespace
66
73
 
67
74
  # Retrieve all the end entries (i.e. of the form
68
75
  last_entries = sets[last_index].entries.select(&:end_entry?)
76
+ # last_entries.each_with_index do |entry, index|
77
+ # if entry.nil?
78
+ # puts "Nil entry at index #{index}"
79
+ # else
80
+ # puts entry
81
+ # end
82
+ # end
69
83
 
70
84
  # ... now find the end vertex for start symbol and with origin at zero.
71
85
  success_entries = last_entries.select do |entry|
@@ -46,7 +46,6 @@ module Rley # This module is used as a namespace
46
46
  break unless scan_success
47
47
  end
48
48
  end
49
-
50
49
  result.done # End of parsing process
51
50
  return result
52
51
  end
@@ -28,7 +28,7 @@ module Rley # This module is used as a namespace
28
28
  # @return [Hash{ParseEntry => Array<ParseEntry>}]
29
29
  attr_reader(:antecedence)
30
30
 
31
- # The reason of a parse failure
31
+ # @return [ErrorReason] The reason of a parse failure
32
32
  attr_reader(:failure_reason)
33
33
 
34
34
  # Constructor
@@ -163,8 +163,9 @@ module Rley # This module is used as a namespace
163
163
  # Return true if the parse was successful (= input tokens
164
164
  # followed the syntax specified by the grammar)
165
165
  def success?()
166
+ return false if @failure_reason
166
167
  return chart.accepting_entry ? true : false
167
- end
168
+ end
168
169
 
169
170
  # Return true if there are more than one complete state
170
171
  # for the same lhs and same origin in any state set.
@@ -210,7 +211,7 @@ module Rley # This module is used as a namespace
210
211
  end
211
212
 
212
213
  # A notification that the parsing reached an end
213
- def done
214
+ def done()
214
215
  # Parse not successful and no reason identified
215
216
  # Assuming that parse failed because of a premature end
216
217
  premature_end unless success? || failure_reason
@@ -8,22 +8,39 @@ module Rley # This module is used as a namespace
8
8
  # - To know whether the vertex is a start, end or item vertex
9
9
  # - To know the next symbol to expect
10
10
  class ParseEntry
11
- # Link to a vertex of the GFG
11
+ # @return [GFG::Vertex] Link to a vertex of the GFG
12
12
  attr_reader(:vertex)
13
13
 
14
- # Links to preceding parse entries
14
+ # @return [Array<ParseEntry>] Links to preceding parse entries
15
15
  attr_reader(:antecedents)
16
16
 
17
17
  # the position in the input that matches the beginning of the rhs
18
18
  # of the production.
19
+ # @return [Integer]
19
20
  attr_reader(:origin)
20
21
 
22
+ # @param aVertex [GFG::Vertex]
23
+ # @param theOrigin [Integer]
21
24
  def initialize(aVertex, theOrigin)
22
25
  @vertex = valid_vertex(aVertex)
23
26
  @origin = theOrigin
24
27
  @antecedents = []
25
28
  end
26
29
 
30
+ # Returns a string containing a human-readable representation of the
31
+ # production.
32
+ # @return [String]
33
+ def inspect()
34
+ result = selfie()
35
+ result << " @antecedents=["
36
+ antecedents.each do |antec|
37
+ result << antec.selfie
38
+ end
39
+ result << ']>'
40
+
41
+ return result
42
+ end
43
+
27
44
  # Add a link to an antecedent parse entry
28
45
  def add_antecedent(anAntecedent)
29
46
  antecedents << anAntecedent
@@ -75,7 +92,7 @@ module Rley # This module is used as a namespace
75
92
  def next_symbol()
76
93
  return vertex.next_symbol
77
94
  end
78
-
95
+
79
96
  # Return true if the entry has no antecedent entry
80
97
  def orphan?()
81
98
  return antecedents.empty?
@@ -125,9 +142,22 @@ module Rley # This module is used as a namespace
125
142
  return vertex.label + " | #{origin}"
126
143
  end
127
144
 
145
+ protected
146
+
147
+ # Returns a human-readable and partial representation of itself.
148
+ # @return [String]
149
+ def selfie()
150
+ result = "#<#{self.class.name}:#{self.object_id}"
151
+ result << " @vertex=<#{vertex.class.name}:#{vertex.object_id}"
152
+ result << " label=#{vertex.label}>"
153
+ result << " @origin=#{origin}"
154
+
155
+ return result
156
+ end
128
157
 
129
158
  private
130
159
 
160
+
131
161
  # Return the validated GFG vertex
132
162
  def valid_vertex(aVertex)
133
163
  raise StandardError, 'GFG vertex cannot be nil' if aVertex.nil?
@@ -1,4 +1,4 @@
1
- require 'forwardable' # Delegation
1
+ require 'forwardable' # For the Delegation pattern
2
2
 
3
3
  require_relative '../syntax/terminal'
4
4
  require_relative '../syntax/non_terminal'
@@ -11,14 +11,26 @@ module Rley # This module is used as a namespace
11
11
  extend Forwardable
12
12
  def_delegators :entries, :empty?, :size, :first, :last, :pop, :each
13
13
 
14
- # The set of parse entries
14
+ # @return [Array<ParseEntry>] The set of parse entries
15
15
  attr_reader :entries
16
16
 
17
+ # Constructor.
17
18
  def initialize()
18
19
  @entries = []
19
20
  @entries4term = Hash.new { |hash, key| hash[key] = [] }
20
21
  @entries4n_term = Hash.new { |hash, key| hash[key] = [] }
21
22
  end
23
+
24
+ # Returns a string containing a human-readable representation of the
25
+ # set of parse entries.
26
+ # @return [String]
27
+ def inspect()
28
+ result = "#<#{self.class.name}:#{self.object_id}"
29
+ result << ' @entries=['
30
+ entries.each { |e| result << e.inspect }
31
+ result << ']>'
32
+ return result
33
+ end
22
34
 
23
35
  # Access the entry at given position
24
36
  def [](index)
@@ -135,7 +135,7 @@ module Rley # This module is used as a namespace
135
135
  process_middle_entry(anEntry, anIndex)
136
136
  end
137
137
  else
138
- $stderr.puts "waiko '#{anEvent}'"
138
+ $stderr.puts "Internal Errore '#{anEvent}'"
139
139
  raise NotImplementedError
140
140
  end
141
141
  end