rley 0.5.10 → 0.5.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +15 -0
  3. data/LICENSE.txt +1 -1
  4. data/README.md +2 -1
  5. data/appveyor.yml +6 -5
  6. data/examples/NLP/engtagger.rb +176 -0
  7. data/examples/general/SRL/lib/ast_builder.rb +217 -21
  8. data/examples/general/SRL/lib/grammar.rb +33 -5
  9. data/examples/general/SRL/lib/regex/alternation.rb +30 -0
  10. data/examples/general/SRL/lib/regex/char_class.rb +28 -22
  11. data/examples/general/SRL/lib/regex/char_shorthand.rb +50 -0
  12. data/examples/general/SRL/lib/regex/character.rb +5 -3
  13. data/examples/general/SRL/lib/regex/concatenation.rb +32 -0
  14. data/examples/general/SRL/lib/regex/non_capturing_group.rb +29 -0
  15. data/examples/general/SRL/lib/regex/wildcard.rb +26 -0
  16. data/examples/general/SRL/lib/regex_repr.rb +5 -0
  17. data/examples/general/SRL/lib/tokenizer.rb +28 -3
  18. data/examples/general/SRL/spec/integration_spec.rb +151 -8
  19. data/examples/general/SRL/spec/tokenizer_spec.rb +12 -0
  20. data/examples/general/left.rb +36 -0
  21. data/examples/general/right.rb +36 -0
  22. data/lib/rley/constants.rb +1 -1
  23. data/lib/rley/gfg/edge.rb +12 -1
  24. data/lib/rley/gfg/grm_flow_graph.rb +21 -1
  25. data/lib/rley/gfg/item_vertex.rb +1 -1
  26. data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
  27. data/lib/rley/gfg/start_vertex.rb +1 -0
  28. data/lib/rley/gfg/vertex.rb +27 -0
  29. data/lib/rley/lexical/token.rb +1 -0
  30. data/lib/rley/parser/error_reason.rb +2 -1
  31. data/lib/rley/parser/gfg_chart.rb +14 -0
  32. data/lib/rley/parser/gfg_earley_parser.rb +0 -1
  33. data/lib/rley/parser/gfg_parsing.rb +4 -3
  34. data/lib/rley/parser/parse_entry.rb +33 -3
  35. data/lib/rley/parser/parse_entry_set.rb +14 -2
  36. data/lib/rley/parser/parse_tree_builder.rb +1 -1
  37. data/lib/rley/parser/parse_walker_factory.rb +0 -1
  38. data/lib/rley/syntax/grm_symbol.rb +2 -0
  39. data/lib/rley/syntax/production.rb +15 -3
  40. data/lib/rley/syntax/symbol_seq.rb +16 -1
  41. data/spec/rley/gfg/end_vertex_spec.rb +9 -1
  42. data/spec/rley/gfg/grm_flow_graph_spec.rb +9 -0
  43. data/spec/rley/gfg/item_vertex_spec.rb +9 -0
  44. data/spec/rley/gfg/start_vertex_spec.rb +9 -1
  45. data/spec/rley/parser/gfg_parsing_spec.rb +0 -1
  46. data/spec/rley/parser/parse_entry_set_spec.rb +15 -0
  47. data/spec/rley/parser/parse_entry_spec.rb +24 -13
  48. data/spec/rley/parser/parse_tracer_spec.rb +1 -1
  49. data/spec/rley/syntax/production_spec.rb +10 -0
  50. data/spec/rley/syntax/symbol_seq_spec.rb +5 -0
  51. metadata +10 -2
@@ -62,6 +62,18 @@ module SRL
62
62
  end
63
63
  end # context
64
64
 
65
+ context 'String literal tokenization:' do
66
+ it "should recognize 'literally ...'" do
67
+ input = 'literally "hello"'
68
+ subject.scanner.string = input
69
+ expectations = [
70
+ ['LITERALLY', 'literally'],
71
+ ['STRING_LIT', 'hello']
72
+ ]
73
+ match_expectations(subject, expectations)
74
+ end
75
+ end # context
76
+
65
77
  context 'Character range tokenization:' do
66
78
  it "should recognize 'letter from ... to ...'" do
67
79
  input = 'letter a to f'
@@ -0,0 +1,36 @@
1
+ # Purpose: define a grammar with left-recursive rule
2
+ require 'rley' # Load Rley library
3
+
4
+ # Instantiate a builder object that will build the grammar for us
5
+ builder = Rley::Syntax::GrammarBuilder.new do
6
+ add_terminals('DOT')
7
+
8
+ # Grammar with left recursive rule.
9
+ rule 'l_dots' => []
10
+ rule 'l_dots' => %w[l_dots DOT]
11
+ end
12
+
13
+ # And now, let's build the grammar...
14
+ grammar = builder.grammar
15
+
16
+ # Highly simplified tokenizer implementation.
17
+ def tokenizer(aText, aGrammar)
18
+ tokens = aText.scan(/\./).map do |dot|
19
+ terminal = aGrammar.name2symbol['DOT']
20
+ Rley::Lexical::Token.new(dot, terminal)
21
+ end
22
+
23
+ return tokens
24
+ end
25
+
26
+ input_to_parse = '.' * 500 # Input = 500 consecutive dots
27
+
28
+ parser = Rley::Parser::GFGEarleyParser.new(grammar)
29
+ tokens = tokenizer(input_to_parse, grammar)
30
+ result = parser.parse(tokens)
31
+
32
+ puts "Parsing successful? #{result.success?}"
33
+ unless result.success?
34
+ puts result.failure_reason.message
35
+ exit(1)
36
+ end
@@ -0,0 +1,36 @@
1
+ # Purpose: define a grammar with right-recursive rule
2
+ require 'rley' # Load Rley library
3
+
4
+ # Instantiate a builder object that will build the grammar for us
5
+ builder = Rley::Syntax::GrammarBuilder.new do
6
+ add_terminals('DOT')
7
+
8
+ # Grammar with left recursive rule.
9
+ rule 'r_dots' => []
10
+ rule 'r_dots' => %w[DOT r_dots]
11
+ end
12
+
13
+ # And now, let's build the grammar...
14
+ grammar = builder.grammar
15
+
16
+ # Highly simplified tokenizer implementation.
17
+ def tokenizer(aText, aGrammar)
18
+ tokens = aText.scan(/\./).map do |dot|
19
+ terminal = aGrammar.name2symbol['DOT']
20
+ Rley::Lexical::Token.new(dot, terminal)
21
+ end
22
+
23
+ return tokens
24
+ end
25
+
26
+ input_to_parse = '.' * 500 # Input = 500 consecutive dots
27
+
28
+ parser = Rley::Parser::GFGEarleyParser.new(grammar)
29
+ tokens = tokenizer(input_to_parse, grammar)
30
+ result = parser.parse(tokens) # Takes about 20 seconds on my computer!!!!
31
+
32
+ puts "Parsing successful? #{result.success?}"
33
+ unless result.success?
34
+ puts result.failure_reason.message
35
+ exit(1)
36
+ end
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.5.10'.freeze
6
+ Version = '0.5.11'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -4,17 +4,28 @@ module Rley # This module is used as a namespace
4
4
  # Responsibilities:
5
5
  # - To know the successor vertex
6
6
  class Edge
7
- # The destination vertex of the edge .
7
+ # @return [Vertex] The destination vertex of the edge .
8
8
  attr_reader :successor
9
9
 
10
+ # Construct a directed edge between two given vertices
11
+ # @param [Vertex]
12
+ # @param [Vertex]
10
13
  def initialize(thePredecessor, theSuccessor)
11
14
  @successor = theSuccessor
12
15
  thePredecessor.add_edge(self)
13
16
  end
14
17
 
18
+ # @return [String]
15
19
  def to_s()
16
20
  " --> #{successor.label}"
17
21
  end
22
+
23
+ # Returns a string containing a human-readable representation of the
24
+ # production.
25
+ # @return [String]
26
+ def inspect()
27
+ to_s
28
+ end
18
29
  end # class
19
30
  end # module
20
31
  end # module
@@ -35,10 +35,11 @@ module Rley # This module is used as a namespace
35
35
  # A GFG has three types of directed edges linking the vertices.
36
36
  # call edge, return edge and scan edge.
37
37
  class GrmFlowGraph
38
- # The set of all vertices in the graph
38
+ # @return [Array<Vertex>] The set of all vertices in the graph
39
39
  attr_reader :vertices
40
40
 
41
41
  # The vertex marked as start node of the graph
42
+ # @return [StartVertex>]
42
43
  attr_reader :start_vertex
43
44
 
44
45
  # A Hash with pairs of the form: non-terminal symbol => start node
@@ -57,6 +58,25 @@ module Rley # This module is used as a namespace
57
58
 
58
59
  build_graph(theDottedItems)
59
60
  end
61
+
62
+ # Returns a string containing a human-readable representation of the
63
+ # production.
64
+ # @return [String]
65
+ def inspect()
66
+ result = "#<#{self.class.name}:#{self.object_id}"
67
+ result << ' @vertices=['
68
+ list = vertices.map { |v| "#<#{v.selfie}>" }
69
+ result << list.join(', ')
70
+ result << '] '
71
+ edges = []
72
+ vertices.each do |v|
73
+ edges << v.edges do |e|
74
+ result << "#{v.object_id} #{e.inspect}"
75
+ end
76
+ end
77
+ result << "edges=[#{edges.join(",\n ")}]>"
78
+ return result
79
+ end
60
80
 
61
81
  # Retrieve the vertex with given vertex label.
62
82
  # @param aVertexLabel [String] the label of a vertex from the graph
@@ -74,7 +74,7 @@ module Rley # This module is used as a namespace
74
74
  # @return [Syntax::GrmSymbol] The non-terminal symbol at left side of production.
75
75
  def lhs()
76
76
  return dotted_item.lhs
77
- end
77
+ end
78
78
  end # class
79
79
  end # module
80
80
  end # module
@@ -26,7 +26,7 @@ module Rley # This module is used as a namespace
26
26
  # A start vertex may accept an indegree and outdegree greater than one
27
27
  def check_add_edge(anEdge)
28
28
  return anEdge
29
- end
29
+ end
30
30
  end # class
31
31
  end # module
32
32
  end # module
@@ -12,6 +12,7 @@ module Rley # This module is used as a namespace
12
12
  super(aNonTerminal)
13
13
  end
14
14
 
15
+ # @return [String]
15
16
  def label()
16
17
  return ".#{non_terminal}"
17
18
  end
@@ -28,6 +28,28 @@ module Rley # This module is used as a namespace
28
28
  return false # Default implementation
29
29
  end
30
30
 
31
+ # Returns a string containing a human-readable representation of the
32
+ # vertex.
33
+ # @return [String]
34
+ def inspect()
35
+ result = '#<'
36
+ result << selfie
37
+ edges.each { |e| result << e.inspect }
38
+ result << specific_inspect()
39
+ result << '>'
40
+
41
+ return result
42
+ end
43
+
44
+ # Returns a string containing a human-readable representation of the
45
+ # vertex without the edges.
46
+ # @return [String]
47
+ def selfie()
48
+ result = "#{self.class.name}:#{self.object_id}"
49
+ result << %Q[ label="#{self.label}"]
50
+ return result
51
+ end
52
+
31
53
  # Retrieve the grammar symbol before the dot.
32
54
  # @return [GrmSymbol, NilClass] The symbol or otherwise nil.
33
55
  def prev_symbol()
@@ -49,6 +71,11 @@ module Rley # This module is used as a namespace
49
71
  raise StandardError, 'At most one edge accepted' unless edges.empty?
50
72
  return anEdge
51
73
  end
74
+
75
+ def specific_inspect()
76
+ return ''
77
+ end
78
+
52
79
  end # class
53
80
  end # module
54
81
  end # module
@@ -22,6 +22,7 @@ module Rley # This module is used as a namespace
22
22
  # @param theLexeme [String] the lexeme (= piece of text from input)
23
23
  # @param aTerminal [Syntax::Terminal] The terminal symbol corresponding to the lexeme.
24
24
  def initialize(theLexeme, aTerminal)
25
+ raise 'Internal error: nil terminal symbol detected' if aTerminal.nil?
25
26
  @lexeme = theLexeme
26
27
  @terminal = aTerminal
27
28
  end
@@ -16,7 +16,7 @@ module Rley # Module used as a namespace
16
16
 
17
17
  # Returns the result of invoking reason.to_s.
18
18
  def message()
19
- return to_s
19
+ return self.to_s
20
20
  end
21
21
 
22
22
  # Return this reason's class name and message
@@ -51,6 +51,7 @@ module Rley # Module used as a namespace
51
51
 
52
52
  def initialize(aPosition, lastToken, expectedTerminals)
53
53
  super(aPosition)
54
+ raise StandardError, 'Internal error: nil token' if lastToken.nil?
54
55
  @last_token = lastToken.dup
55
56
  @expected_terminals = expectedTerminals.dup
56
57
  end
@@ -44,7 +44,14 @@ module Rley # This module is used as a namespace
44
44
  end
45
45
 
46
46
  # Push a parse entry for the chart entry with given index
47
+ # @param anIndex [Integer] The rank of the token in the input stream.
48
+ # @return [ParseEntry] the passed parse entry if it is pushed
47
49
  def push_entry(aVertex, anOrigin, anIndex, _reason)
50
+ # puts "push_entry:"
51
+ # puts " aVertex #{aVertex.inspect}"
52
+ # puts " anOrigin: #{anOrigin}"
53
+ # puts " anIndex: #{anIndex}"
54
+ # puts " _reason: #{_reason}"
48
55
  new_entry = ParseEntry.new(aVertex, anOrigin)
49
56
  pushed = self[anIndex].push_entry(new_entry)
50
57
 
@@ -66,6 +73,13 @@ module Rley # This module is used as a namespace
66
73
 
67
74
  # Retrieve all the end entries (i.e. of the form
68
75
  last_entries = sets[last_index].entries.select(&:end_entry?)
76
+ # last_entries.each_with_index do |entry, index|
77
+ # if entry.nil?
78
+ # puts "Nil entry at index #{index}"
79
+ # else
80
+ # puts entry
81
+ # end
82
+ # end
69
83
 
70
84
  # ... now find the end vertex for start symbol and with origin at zero.
71
85
  success_entries = last_entries.select do |entry|
@@ -46,7 +46,6 @@ module Rley # This module is used as a namespace
46
46
  break unless scan_success
47
47
  end
48
48
  end
49
-
50
49
  result.done # End of parsing process
51
50
  return result
52
51
  end
@@ -28,7 +28,7 @@ module Rley # This module is used as a namespace
28
28
  # @return [Hash{ParseEntry => Array<ParseEntry>}]
29
29
  attr_reader(:antecedence)
30
30
 
31
- # The reason of a parse failure
31
+ # @return [ErrorReason] The reason of a parse failure
32
32
  attr_reader(:failure_reason)
33
33
 
34
34
  # Constructor
@@ -163,8 +163,9 @@ module Rley # This module is used as a namespace
163
163
  # Return true if the parse was successful (= input tokens
164
164
  # followed the syntax specified by the grammar)
165
165
  def success?()
166
+ return false if @failure_reason
166
167
  return chart.accepting_entry ? true : false
167
- end
168
+ end
168
169
 
169
170
  # Return true if there are more than one complete state
170
171
  # for the same lhs and same origin in any state set.
@@ -210,7 +211,7 @@ module Rley # This module is used as a namespace
210
211
  end
211
212
 
212
213
  # A notification that the parsing reached an end
213
- def done
214
+ def done()
214
215
  # Parse not successful and no reason identified
215
216
  # Assuming that parse failed because of a premature end
216
217
  premature_end unless success? || failure_reason
@@ -8,22 +8,39 @@ module Rley # This module is used as a namespace
8
8
  # - To know whether the vertex is a start, end or item vertex
9
9
  # - To know the next symbol to expect
10
10
  class ParseEntry
11
- # Link to a vertex of the GFG
11
+ # @return [GFG::Vertex] Link to a vertex of the GFG
12
12
  attr_reader(:vertex)
13
13
 
14
- # Links to preceding parse entries
14
+ # @return [Array<ParseEntry>] Links to preceding parse entries
15
15
  attr_reader(:antecedents)
16
16
 
17
17
  # the position in the input that matches the beginning of the rhs
18
18
  # of the production.
19
+ # @return [Integer]
19
20
  attr_reader(:origin)
20
21
 
22
+ # @param aVertex [GFG::Vertex]
23
+ # @param theOrigin [Integer]
21
24
  def initialize(aVertex, theOrigin)
22
25
  @vertex = valid_vertex(aVertex)
23
26
  @origin = theOrigin
24
27
  @antecedents = []
25
28
  end
26
29
 
30
+ # Returns a string containing a human-readable representation of the
31
+ # production.
32
+ # @return [String]
33
+ def inspect()
34
+ result = selfie()
35
+ result << " @antecedents=["
36
+ antecedents.each do |antec|
37
+ result << antec.selfie
38
+ end
39
+ result << ']>'
40
+
41
+ return result
42
+ end
43
+
27
44
  # Add a link to an antecedent parse entry
28
45
  def add_antecedent(anAntecedent)
29
46
  antecedents << anAntecedent
@@ -75,7 +92,7 @@ module Rley # This module is used as a namespace
75
92
  def next_symbol()
76
93
  return vertex.next_symbol
77
94
  end
78
-
95
+
79
96
  # Return true if the entry has no antecedent entry
80
97
  def orphan?()
81
98
  return antecedents.empty?
@@ -125,9 +142,22 @@ module Rley # This module is used as a namespace
125
142
  return vertex.label + " | #{origin}"
126
143
  end
127
144
 
145
+ protected
146
+
147
+ # Returns a human-readable and partial representation of itself.
148
+ # @return [String]
149
+ def selfie()
150
+ result = "#<#{self.class.name}:#{self.object_id}"
151
+ result << " @vertex=<#{vertex.class.name}:#{vertex.object_id}"
152
+ result << " label=#{vertex.label}>"
153
+ result << " @origin=#{origin}"
154
+
155
+ return result
156
+ end
128
157
 
129
158
  private
130
159
 
160
+
131
161
  # Return the validated GFG vertex
132
162
  def valid_vertex(aVertex)
133
163
  raise StandardError, 'GFG vertex cannot be nil' if aVertex.nil?
@@ -1,4 +1,4 @@
1
- require 'forwardable' # Delegation
1
+ require 'forwardable' # For the Delegation pattern
2
2
 
3
3
  require_relative '../syntax/terminal'
4
4
  require_relative '../syntax/non_terminal'
@@ -11,14 +11,26 @@ module Rley # This module is used as a namespace
11
11
  extend Forwardable
12
12
  def_delegators :entries, :empty?, :size, :first, :last, :pop, :each
13
13
 
14
- # The set of parse entries
14
+ # @return [Array<ParseEntry>] The set of parse entries
15
15
  attr_reader :entries
16
16
 
17
+ # Constructor.
17
18
  def initialize()
18
19
  @entries = []
19
20
  @entries4term = Hash.new { |hash, key| hash[key] = [] }
20
21
  @entries4n_term = Hash.new { |hash, key| hash[key] = [] }
21
22
  end
23
+
24
+ # Returns a string containing a human-readable representation of the
25
+ # set of parse entries.
26
+ # @return [String]
27
+ def inspect()
28
+ result = "#<#{self.class.name}:#{self.object_id}"
29
+ result << ' @entries=['
30
+ entries.each { |e| result << e.inspect }
31
+ result << ']>'
32
+ return result
33
+ end
22
34
 
23
35
  # Access the entry at given position
24
36
  def [](index)
@@ -135,7 +135,7 @@ module Rley # This module is used as a namespace
135
135
  process_middle_entry(anEntry, anIndex)
136
136
  end
137
137
  else
138
- $stderr.puts "waiko '#{anEvent}'"
138
+ $stderr.puts "Internal Errore '#{anEvent}'"
139
139
  raise NotImplementedError
140
140
  end
141
141
  end