rley 0.6.07 → 0.6.08

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 25c9a2612411bdb3767901c9b21d1e38f5279c94
4
- data.tar.gz: 9c3ffe74bda63f31d40421b74a6f976ac3db49ad
3
+ metadata.gz: 552a2a05bcb871b84b8d86b7c0bab8de2d776754
4
+ data.tar.gz: 0f80fc6e91cbe752393e04d878afeb6646ea0d27
5
5
  SHA512:
6
- metadata.gz: d289304c2a693fb1eb14747751e205bc23cccb9941c5f416af1966e6b3fcf3d2ff58f52ae6d56a2cae40752bcccbc2cd9a2c9015cc555a5ad3ef1890306f15e9
7
- data.tar.gz: c59a9d80ff3b941efabed869de7e5d16b6ebc597c3a63d914de3976aa3c709a6cfde0630c8c931e14ae559e4c3fcbc5dddb935fa2d58c12c71fafad2fc817f4e
6
+ metadata.gz: 9cc73045f3e36363b201b61262ba9e8bac5154585033d8a1a48b0a45cdb259050bebdb42df747f0254276930e58488d740ab7a98d0659da485deb0ee22898798
7
+ data.tar.gz: 129611092776086798e54391535b50c9bdb673a8a4342fca81a3a8c66628113cf83aae1a872a1b1969845502347707e6994ed99e0a7ebeb25942acf8b0425eb4
@@ -1,12 +1,13 @@
1
1
  require 'rley' # Load Rley library
2
2
 
3
3
  ########################################
4
- # Step 0. Instantiate facade object of Rley library.
4
+ # Step 1. Instantiate facade object of Rley library.
5
5
  # It provides a unified, higher-level interface
6
6
  engine = Rley::Engine.new
7
7
 
8
+
8
9
  ########################################
9
- # Step 1. Define a grammar for a nano English-like language
10
+ # Step 2. Define a grammar for a nano English-like language
10
11
  # based on example from Jurafski & Martin book (chapter 8 of the book).
11
12
  # Bird, Steven, Edward Loper and Ewan Klein: "Speech and Language Processing";
12
13
  # 2009, Pearson Education, Inc., ISBN 978-0135041963
@@ -37,7 +38,7 @@ engine.build_grammar do
37
38
  end
38
39
 
39
40
  ########################################
40
- # Step 2. Creating a lexicon
41
+ # Step 3. Creating a lexicon
41
42
  # To simplify things, lexicon is implemented as a Hash with pairs of the form:
42
43
  # word => terminal symbol name
43
44
  Lexicon = {
@@ -63,7 +64,7 @@ Lexicon = {
63
64
  }.freeze
64
65
 
65
66
  ########################################
66
- # Step 3. Creating a tokenizer
67
+ # Step 4. Creating a tokenizer
67
68
  # A tokenizer reads the input string and converts it into a sequence of tokens
68
69
  # Highly simplified tokenizer implementation.
69
70
  def tokenizer(aTextToParse)
@@ -79,13 +80,11 @@ end
79
80
  ########################################
80
81
  # Step 5. Parsing the input
81
82
  input_to_parse = 'John saw Mary'
82
- # input_to_parse = 'John saw Mary with a telescope'
83
- # input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
84
83
  # Convert input text into a sequence of token objects...
85
84
  tokens = tokenizer(input_to_parse)
86
85
  result = engine.parse(tokens)
87
86
 
88
- puts "Parsing successful? #{result.success?}"
87
+ puts "Parsing '#{input_to_parse}' successful? #{result.success?}"
89
88
  unless result.success?
90
89
  puts result.failure_reason.message
91
90
  exit(1)
@@ -94,20 +93,38 @@ end
94
93
  ########################################
95
94
  # Step 6. Generating a parse tree from parse result
96
95
  ptree = engine.convert(result)
97
-
98
- # Let's create a parse tree visitor
99
96
  visitor = engine.ptree_visitor(ptree)
100
-
101
- # Let's create a formatter (i.e. visit event listener)
102
- # renderer = Rley::Formatter::Debug.new($stdout)
103
-
97
+ renderer = Rley::Formatter::Debug.new($stdout)
104
98
  # Let's create a formatter that will render the parse tree with characters
105
- renderer = Rley::Formatter::Asciitree.new($stdout)
99
+ # renderer = Rley::Formatter::Asciitree.new($stdout)
106
100
 
107
101
  # Let's create a formatter that will render the parse tree in labelled
108
102
  # bracket notation
109
103
  # renderer = Rley::Formatter::BracketNotation.new($stdout)
110
104
 
111
- # Subscribe the formatter to the visitor's event and launch the visit
112
105
  renderer.render(visitor)
106
+
107
+
108
+ ########################################
109
+ # Redoing Step 5 and 6 with an ambiguous sentence
110
+ input_to_parse = 'John saw Mary with a telescope'
111
+ # input_to_parse = 'the dog saw a man in the park' # This one is also ambiguous
112
+ # Convert input text into a sequence of token objects...
113
+ tokens = tokenizer(input_to_parse)
114
+ result = engine.parse(tokens)
115
+
116
+ puts ''
117
+ puts "Parsing '#{input_to_parse}' successful? #{result.success?}"
118
+ unless result.success?
119
+ puts result.failure_reason.message
120
+ exit(1)
121
+ end
122
+
123
+ ########################################
124
+ # Step 6. Generating a parse forest from parse result
125
+ pforest = engine.to_pforest(result)
126
+ visitor = engine.pforest_visitor(pforest)
127
+ renderer = Rley::Formatter::Debug.new($stdout)
128
+ renderer.render(visitor)
129
+
113
130
  # End of file
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.6.07'.freeze
6
+ Version = '0.6.08'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -1,13 +1,22 @@
1
1
  require_relative './syntax/grammar_builder'
2
2
  require_relative './parser/gfg_earley_parser'
3
+ require_relative './parse_tree_visitor'
4
+ require_relative './parse_forest_visitor'
3
5
  require_relative './parse_rep/parse_tree_factory'
6
+ require_relative './parse_rep/parse_forest_factory'
4
7
 
5
8
  module Rley # This module is used as a namespace
9
+ # Configuration of an Rley::Engine instance.
6
10
  EngineConfig = Struct.new(
11
+ # @!attribute [r] parse_repr
12
+ # Indicates how the parse result must represented
13
+ # @return [Symbol] allowed values are: :parse_tree, :parse_forest
7
14
  :parse_repr,
8
15
  :repr_builder,
9
16
  :diagnose
10
17
  ) do
18
+
19
+ # Constructor with default initialization.
11
20
  def initialize()
12
21
  super()
13
22
  self.parse_repr = :parse_tree
@@ -17,18 +26,22 @@ module Rley # This module is used as a namespace
17
26
  end
18
27
 
19
28
  # Implementation of the GoF Facade design pattern.
20
- # an Engine object provides a higher-level interface that shields
29
+ # An Engine object provides a higher-level interface that shields
21
30
  # Rley client code from the lower-level classes.
22
31
  class Engine
23
32
  # @!attribute [r] configuration
24
- # @return [EngineConfig] the engine's configuration
33
+ # @return [EngineConfig] the engine's configuration
25
34
  attr_reader :configuration
26
35
 
27
36
  # @!attribute [r] grammar
28
- # @return [Rley::Syntax::Grammar] the grammar of the language to parse
37
+ # @return [Rley::Syntax::Grammar] the grammar of the language to parse
29
38
  attr_reader :grammar
30
39
 
31
40
  # Constructor.
41
+ # @example Produce a parse forest
42
+ # Engine.new do |config|
43
+ # config.parse_repr = :parse_forest
44
+ # end
32
45
  def initialize()
33
46
  @configuration = EngineConfig.new
34
47
  yield configuration if block_given?
@@ -36,6 +49,17 @@ module Rley # This module is used as a namespace
36
49
 
37
50
  # Factory method.
38
51
  # @param aBlock [Proc, Lambda] Code block for creating the grammar.
52
+ # @return [Rley::Syntax::Grammar] the grammar of the language to parse.
53
+ # @example Grammar for array of integers
54
+ # instance = Engine.new
55
+ # instance.build_grammar do
56
+ # add_terminals('LBRACKET', 'RBRACKET', 'COMMA', 'INTEGER')
57
+ # add_production('start' => 'array')
58
+ # add_production('array' => 'LBRACKET elements RBRACKET')
59
+ # add_production('array' => 'LBRACKET RBRACKET')
60
+ # add_production('elements' => 'elements COMMA INTEGER')
61
+ # add_production('elements' => 'INTEGER')
62
+ # end
39
63
  def build_grammar(&aBlock)
40
64
  builder = Rley::Syntax::GrammarBuilder.new(&aBlock)
41
65
  @grammar = builder.grammar
@@ -43,6 +67,7 @@ module Rley # This module is used as a namespace
43
67
 
44
68
  # Use the given grammar.
45
69
  # @param aGrammar [Rley::Syntax::Grammar]
70
+ # @return [Rley::Syntax::Grammar] the grammar of the language to parse.
46
71
  def use_grammar(aGrammar)
47
72
  @grammar = aGrammar
48
73
  end
@@ -70,6 +95,7 @@ module Rley # This module is used as a namespace
70
95
  # Convert raw parse result into a more convenient representation
71
96
  # (parse tree or parse forest) as specified by the configuration.
72
97
  # @param aRawParse [Parser::GFGParsing]
98
+ # @return [Rley::PTree::ParseTree, Rley::SPPF::ParseForest]
73
99
  def convert(aRawParse)
74
100
  result = case configuration.parse_repr
75
101
  when :parse_tree
@@ -83,6 +109,7 @@ module Rley # This module is used as a namespace
83
109
 
84
110
  # Convert raw parse result into a parse tree representation
85
111
  # @param aRawParse [Parser::GFGParsing]
112
+ # @return [Rley::PTree::ParseTree]
86
113
  def to_ptree(aRawParse)
87
114
  factory = ParseRep::ParseTreeFactory.new(aRawParse)
88
115
  if configuration.repr_builder == :default
@@ -96,29 +123,31 @@ module Rley # This module is used as a namespace
96
123
 
97
124
  # Convert raw parse result into a parse forest representation
98
125
  # @param aRawParse [Parser::GFGParsing]
99
- # def to_pforest(aRawParse)
100
- # factory = ParseRep::ParseForestFactory.new(aRawParse)
101
- # if configuration.repr_builder == :default
102
- # result = factory.create(nil)
103
- # else
104
- # result = factory.create(configuration.repr_builder)
105
- # end
126
+ # @return [Rley::SPPF::ParseForest]
127
+ def to_pforest(aRawParse)
128
+ factory = ParseRep::ParseForestFactory.new(aRawParse)
129
+ if configuration.repr_builder == :default
130
+ result = factory.create(nil)
131
+ else
132
+ result = factory.create(configuration.repr_builder)
133
+ end
106
134
 
107
- # return result
108
- # end
135
+ return result
136
+ end
109
137
 
110
138
  # Build a visitor for the given parse tree
111
- # @param aPTree[PTree::ParseTree]
139
+ # @param aPTree [PTree::ParseTree]
112
140
  # @return [ParseTreeVisitor]
113
141
  def ptree_visitor(aPTree)
114
- return Rley::ParseTreeVisitor.new(aPTree)
142
+ return ParseTreeVisitor.new(aPTree)
115
143
  end
116
144
 
117
- # @param aPTree[SPPF::ParseForest]
145
+ # Build a visitor for the given parse forest
146
+ # @param aPForest [SPPF::ParseForest]
118
147
  # @return [ParseForestVisitor]
119
- # def pforest_visitor(aPForest)
120
- # return Rley::ParseForestVisitor.new(aPForest)
121
- # end
148
+ def pforest_visitor(aPForest)
149
+ return ParseForestVisitor.new(aPForest)
150
+ end
122
151
 
123
152
  protected
124
153
 
@@ -17,77 +17,27 @@ module Rley # This module is used as a namespace
17
17
  super(anIO)
18
18
  @indentation = 0
19
19
  end
20
-
21
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
22
- # Notification of a visit event: the visitor is about to visit the given
23
- # parse tree
24
- # @param _ptree [ParseTree]
25
- def before_ptree(_ptree)
26
- output_event(__method__, indentation)
27
- indent
28
- end
29
-
30
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
31
- # Notification of a visit event: the visitor is about to visit
32
- # a non-terminal node
33
- # @param _nonterm [NonTerminalNode]
34
- def before_non_terminal(_nonterm)
35
- output_event(__method__, indentation)
36
- indent
37
- end
38
-
39
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
40
- # Notification of a visit event: the visitor is about to visit
41
- # the children of a non-terminal node
42
- # @param _parent [NonTerminalNode]
43
- # @param _children [Array] array of children nodes
44
- def before_subnodes(_parent, _children)
45
- output_event(__method__, indentation)
46
- indent
47
- end
48
-
49
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
50
- # Notification of a visit event: the visitor is about to visit
51
- # a terminal node
52
- # @param _term [TerminalNode]
53
- def before_terminal(_term)
54
- output_event(__method__, indentation)
55
- end
56
-
57
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
58
- # Notification of a visit event: the visitor completed the visit of
59
- # a terminal node.
60
- # @param _term [TerminalNode]
61
- def after_terminal(_term)
62
- output_event(__method__, indentation)
63
- end
64
-
65
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
66
- # Notification of a visit event: the visitor completed the visit of
67
- # a non-terminal node
68
- # @param _nonterm [NonTerminalNode]
69
- def after_non_terminal(_nonterm)
70
- dedent
71
- output_event(__method__, indentation)
72
- end
73
-
74
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
75
- # Notification of a visit event: the visitor completed the visit of
76
- # the children of a non-terminal node.
77
- # @param _parent [NonTerminalNode]
78
- # @param _children [Array] array of children nodes
79
- def after_subnodes(_parent, _children)
80
- dedent
81
- output_event(__method__, indentation)
82
- end
83
-
84
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
85
- # Notification of a visit event: the visitor completed the visit
86
- # of the given parse tree
87
- # @param _ptree [ParseTree]
88
- def after_ptree(_ptree)
89
- dedent
90
- output_event(__method__, indentation)
20
+
21
+ # Indicates that this formatter accepts all visit events
22
+ # provided their names start with 'before_' or 'after_'
23
+ # @return [Boolean]
24
+ def accept_all
25
+ return true
26
+ end
27
+
28
+ # Ghost method pattern.
29
+ def method_missing(mth, *args)
30
+ mth_name = mth.to_s
31
+ case mth_name
32
+ when /^before_/
33
+ output_event(mth_name, indentation)
34
+ indent unless mth_name == 'before_terminal'
35
+ when /^after_/
36
+ dedent unless mth_name == 'after_terminal'
37
+ output_event(mth_name, indentation)
38
+ else
39
+ super(mth, args)
40
+ end
91
41
  end
92
42
 
93
43
  private
@@ -1,25 +1,56 @@
1
+ # require 'pry'
2
+ require 'prime'
3
+
1
4
  module Rley # This module is used as a namespace
5
+ module SPPF # This module is used as a namespace
6
+ # Monkey-patching
7
+ class CompositeNode
8
+ attr_reader(:signatures)
9
+
10
+ # Associate for each edge between this node and each subnode
11
+ # an unique prime number (called a signature).
12
+ def add_edge_signatures(prime_enumerator)
13
+ @signatures = subnodes.map { |_| prime_enumerator.next }
14
+ end
15
+
16
+ def signature_exist?()
17
+ @signatures.nil? ? false : true
18
+ end
19
+ end # class
20
+ end # module
21
+
2
22
  # A visitor class dedicated in the visit of a parse forest.
3
23
  # It combines the Visitor and Observer patterns.
4
24
  class ParseForestVisitor
5
- # Link to the parse forest to visit
25
+ # @return [SPPF::ParseForest] Link to the parse forest to visit
6
26
  attr_reader(:pforest)
7
27
 
8
- # List of objects that subscribed to the visit event notification.
28
+ # @return [Array<Object>]
29
+ # List of objects that subscribed to the visit event notification.
9
30
  attr_reader(:subscribers)
10
31
 
11
- # A Hash with pairs of the form: Node => node visit data
12
- attr_reader(:agenda)
32
+ # @return [Enumerator]
33
+ # Enumerator that generates a sequence of prime numbers
34
+ attr_reader(:prime_enum)
35
+
36
+ # @return [Array<SPPF::CompositeNode, Integer>]
37
+ # Stack of [node, path signature]
38
+ # path signature: an integer value that represents the set of edges used
39
+ # in traversal
40
+ attr_reader(:legs)
13
41
 
14
- # Indicates the kind of forest traversal to perform: :post_order, :pre-order
15
- attr_reader(:traversal)
42
+ # @return [Hash{SPPF::CompositeNode, Array<Integer>}]
43
+ # Keep trace from which path(s) a given node was accessed
44
+ attr_reader(:node_accesses)
16
45
 
17
46
  # Build a visitor for the given pforest.
18
- # @param aParseForest [ParseForest] the parse tree to visit.
19
- def initialize(aParseForest, aTraversalStrategy = :post_order)
47
+ # @param aParseForest [SPPF::ParseForest] the parse tree to visit.
48
+ def initialize(aParseForest)
20
49
  @pforest = aParseForest
21
50
  @subscribers = []
22
- @traversal = aTraversalStrategy
51
+ @prime_enum = Prime.instance.each
52
+ @legs = []
53
+ @node_accesses = Hash.new { |h, key| h[key] = Array.new }
23
54
  end
24
55
 
25
56
  # Add a subscriber for the visit event notifications.
@@ -47,16 +78,28 @@ module Rley # This module is used as a namespace
47
78
  end
48
79
 
49
80
  # Visit event. The visitor is about to visit the given non terminal node.
50
- # @param aNonTerminalNode [NonTerminalNode] the node to visit.
51
- def visit_nonterminal(aNonTerminalNode)
52
- if @traversal == :post_order
53
- broadcast(:before_non_terminal, aNonTerminalNode)
54
- traverse_children(aNonTerminalNode)
55
- else
56
- traverse_children(aNonTerminalNode)
57
- broadcast(:before_non_terminal, aNonTerminalNode)
81
+ # @param nonTerminalNd [NonTerminalNode] the node to visit.
82
+ def visit_nonterminal(nonTerminalNd)
83
+ broadcast(:before_non_terminal, nonTerminalNd)
84
+ unless nonTerminalNd.signature_exist?
85
+ nonTerminalNd.add_edge_signatures(prime_enum)
86
+ end
87
+ traverse_children(nonTerminalNd)
88
+ broadcast(:after_non_terminal, nonTerminalNd)
89
+ end
90
+
91
+ # TODO: control the logic of this method.
92
+ # Visit event. The visitor is visiting the
93
+ # given alternative node.
94
+ # @param alternativeNd [AlternativeNode] the alternative node to visit.
95
+ def visit_alternative(alternativeNd)
96
+ broadcast(:before_alternative, alternativeNd)
97
+ unless alternativeNd.signature_exist?
98
+ alternativeNd.add_edge_signatures(prime_enum)
58
99
  end
59
- broadcast(:after_non_terminal, aNonTerminalNode)
100
+
101
+ traverse_children(alternativeNd)
102
+ broadcast(:after_alternative, alternativeNd)
60
103
  end
61
104
 
62
105
  # Visit event. The visitor is visiting the
@@ -67,12 +110,20 @@ module Rley # This module is used as a namespace
67
110
  broadcast(:after_terminal, aTerminalNode)
68
111
  end
69
112
 
113
+ # Visit event. The visitor is visiting the
114
+ # given epsilon node.
115
+ # @param anEpsilonNode [EpsilonNode] the terminal to visit.
116
+ def visit_epsilon(anEpsilonNode)
117
+ broadcast(:before_epsilon, anEpsilonNode)
118
+ broadcast(:after_epsilon, anEpsilonNode)
119
+ end
120
+
70
121
  # Visit event. The visitor has completed its visit of the given
71
122
  # non-terminal node.
72
123
  # @param aNonTerminalNode [NonTerminalNode] the node to visit.
73
- def end_visit_nonterminal(aNonTerminalNode)
74
- broadcast(:after_non_terminal, aNonTerminalNode)
75
- end
124
+ # def end_visit_nonterminal(aNonTerminalNode)
125
+ # broadcast(:after_non_terminal, aNonTerminalNode)
126
+ # end
76
127
 
77
128
  # Visit event. The visitor has completed the visit of the pforest.
78
129
  # @param aParseForest [ParseForest] the pforest to visit.
@@ -87,24 +138,57 @@ module Rley # This module is used as a namespace
87
138
  # @param aParentNode [NonTeminalNode] the (non-terminal) parent node.
88
139
  def traverse_children(aParentNode)
89
140
  children = aParentNode.children
90
- broadcast(:before_children, aParentNode, children)
141
+ broadcast(:before_subnodes, aParentNode, children)
91
142
 
92
143
  # Let's proceed with the visit of children
93
- children.each { |a_node| a_node.accept(self) }
144
+ children.each_with_index do |a_node, i|
145
+ edge_sign = aParentNode.signatures[i]
146
+ if a_node.kind_of?(SPPF::CompositeNode)
147
+ push_node(a_node, edge_sign)
148
+ access_paths = node_accesses[a_node]
149
+ last_path = legs.last[-1]
150
+ path_reused = access_paths.include?(last_path)
151
+ unless path_reused
152
+ node_accesses[a_node].push(last_path)
153
+ a_node.accept(self)
154
+ end
155
+ pop_node
156
+ else
157
+ a_node.accept(self)
158
+ end
159
+ end
94
160
 
95
- broadcast(:after_children, aParentNode, children)
161
+ broadcast(:after_subnodes, aParentNode, children)
96
162
  end
97
163
 
98
164
  # Send a notification to all subscribers.
99
165
  # @param msg [Symbol] event to notify
100
166
  # @param args [Array] arguments of the notification.
101
167
  def broadcast(msg, *args)
102
- subscribers.each do |a_subscriber|
103
- next unless a_subscriber.respond_to?(msg)
104
- a_subscriber.send(msg, *args)
168
+ subscribers.each do |subscr|
169
+ next unless subscr.respond_to?(msg) || subscr.respond_to?(:accept_all)
170
+ subscr.send(msg, *args)
171
+ end
172
+ end
173
+
174
+ def push_node(aCompositeNode, anEdgeSignature)
175
+ if legs.empty?
176
+ legs << [aCompositeNode, anEdgeSignature]
177
+ else
178
+ path_signature = legs.last[-1]
179
+ # binding.pry if anEdgeSignature == 37 && path_signature != 230
180
+ if (path_signature % anEdgeSignature).zero?
181
+ legs << [aCompositeNode, path_signature]
182
+ else
183
+ legs << [aCompositeNode, path_signature * anEdgeSignature]
184
+ end
105
185
  end
106
186
  end
187
+
188
+ def pop_node
189
+ return if legs.empty?
190
+ legs.pop
191
+ end
107
192
  end # class
108
193
  end # module
109
-
110
194
  # End of file