rley 0.6.07 → 0.6.08

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 25c9a2612411bdb3767901c9b21d1e38f5279c94
4
- data.tar.gz: 9c3ffe74bda63f31d40421b74a6f976ac3db49ad
3
+ metadata.gz: 552a2a05bcb871b84b8d86b7c0bab8de2d776754
4
+ data.tar.gz: 0f80fc6e91cbe752393e04d878afeb6646ea0d27
5
5
  SHA512:
6
- metadata.gz: d289304c2a693fb1eb14747751e205bc23cccb9941c5f416af1966e6b3fcf3d2ff58f52ae6d56a2cae40752bcccbc2cd9a2c9015cc555a5ad3ef1890306f15e9
7
- data.tar.gz: c59a9d80ff3b941efabed869de7e5d16b6ebc597c3a63d914de3976aa3c709a6cfde0630c8c931e14ae559e4c3fcbc5dddb935fa2d58c12c71fafad2fc817f4e
6
+ metadata.gz: 9cc73045f3e36363b201b61262ba9e8bac5154585033d8a1a48b0a45cdb259050bebdb42df747f0254276930e58488d740ab7a98d0659da485deb0ee22898798
7
+ data.tar.gz: 129611092776086798e54391535b50c9bdb673a8a4342fca81a3a8c66628113cf83aae1a872a1b1969845502347707e6994ed99e0a7ebeb25942acf8b0425eb4
@@ -1,12 +1,13 @@
1
1
  require 'rley' # Load Rley library
2
2
 
3
3
  ########################################
4
- # Step 0. Instantiate facade object of Rley library.
4
+ # Step 1. Instantiate facade object of Rley library.
5
5
  # It provides a unified, higher-level interface
6
6
  engine = Rley::Engine.new
7
7
 
8
+
8
9
  ########################################
9
- # Step 1. Define a grammar for a nano English-like language
10
+ # Step 2. Define a grammar for a nano English-like language
10
11
  # based on example from Jurafski & Martin book (chapter 8 of the book).
11
12
  # Bird, Steven, Edward Loper and Ewan Klein: "Speech and Language Processing";
12
13
  # 2009, Pearson Education, Inc., ISBN 978-0135041963
@@ -37,7 +38,7 @@ engine.build_grammar do
37
38
  end
38
39
 
39
40
  ########################################
40
- # Step 2. Creating a lexicon
41
+ # Step 3. Creating a lexicon
41
42
  # To simplify things, lexicon is implemented as a Hash with pairs of the form:
42
43
  # word => terminal symbol name
43
44
  Lexicon = {
@@ -63,7 +64,7 @@ Lexicon = {
63
64
  }.freeze
64
65
 
65
66
  ########################################
66
- # Step 3. Creating a tokenizer
67
+ # Step 4. Creating a tokenizer
67
68
  # A tokenizer reads the input string and converts it into a sequence of tokens
68
69
  # Highly simplified tokenizer implementation.
69
70
  def tokenizer(aTextToParse)
@@ -79,13 +80,11 @@ end
79
80
  ########################################
80
81
  # Step 5. Parsing the input
81
82
  input_to_parse = 'John saw Mary'
82
- # input_to_parse = 'John saw Mary with a telescope'
83
- # input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
84
83
  # Convert input text into a sequence of token objects...
85
84
  tokens = tokenizer(input_to_parse)
86
85
  result = engine.parse(tokens)
87
86
 
88
- puts "Parsing successful? #{result.success?}"
87
+ puts "Parsing '#{input_to_parse}' successful? #{result.success?}"
89
88
  unless result.success?
90
89
  puts result.failure_reason.message
91
90
  exit(1)
@@ -94,20 +93,38 @@ end
94
93
  ########################################
95
94
  # Step 6. Generating a parse tree from parse result
96
95
  ptree = engine.convert(result)
97
-
98
- # Let's create a parse tree visitor
99
96
  visitor = engine.ptree_visitor(ptree)
100
-
101
- # Let's create a formatter (i.e. visit event listener)
102
- # renderer = Rley::Formatter::Debug.new($stdout)
103
-
97
+ renderer = Rley::Formatter::Debug.new($stdout)
104
98
  # Let's create a formatter that will render the parse tree with characters
105
- renderer = Rley::Formatter::Asciitree.new($stdout)
99
+ # renderer = Rley::Formatter::Asciitree.new($stdout)
106
100
 
107
101
  # Let's create a formatter that will render the parse tree in labelled
108
102
  # bracket notation
109
103
  # renderer = Rley::Formatter::BracketNotation.new($stdout)
110
104
 
111
- # Subscribe the formatter to the visitor's event and launch the visit
112
105
  renderer.render(visitor)
106
+
107
+
108
+ ########################################
109
+ # Redoing Step 5 and 6 with an ambiguous sentence
110
+ input_to_parse = 'John saw Mary with a telescope'
111
+ # input_to_parse = 'the dog saw a man in the park' # This one is also ambiguous
112
+ # Convert input text into a sequence of token objects...
113
+ tokens = tokenizer(input_to_parse)
114
+ result = engine.parse(tokens)
115
+
116
+ puts ''
117
+ puts "Parsing '#{input_to_parse}' successful? #{result.success?}"
118
+ unless result.success?
119
+ puts result.failure_reason.message
120
+ exit(1)
121
+ end
122
+
123
+ ########################################
124
+ # Step 6. Generating a parse forest from parse result
125
+ pforest = engine.to_pforest(result)
126
+ visitor = engine.pforest_visitor(pforest)
127
+ renderer = Rley::Formatter::Debug.new($stdout)
128
+ renderer.render(visitor)
129
+
113
130
  # End of file
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.6.07'.freeze
6
+ Version = '0.6.08'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -1,13 +1,22 @@
1
1
  require_relative './syntax/grammar_builder'
2
2
  require_relative './parser/gfg_earley_parser'
3
+ require_relative './parse_tree_visitor'
4
+ require_relative './parse_forest_visitor'
3
5
  require_relative './parse_rep/parse_tree_factory'
6
+ require_relative './parse_rep/parse_forest_factory'
4
7
 
5
8
  module Rley # This module is used as a namespace
9
+ # Configuration of an Rley::Engine instance.
6
10
  EngineConfig = Struct.new(
11
+ # @!attribute [r] parse_repr
12
+ # Indicates how the parse result must represented
13
+ # @return [Symbol] allowed values are: :parse_tree, :parse_forest
7
14
  :parse_repr,
8
15
  :repr_builder,
9
16
  :diagnose
10
17
  ) do
18
+
19
+ # Constructor with default initialization.
11
20
  def initialize()
12
21
  super()
13
22
  self.parse_repr = :parse_tree
@@ -17,18 +26,22 @@ module Rley # This module is used as a namespace
17
26
  end
18
27
 
19
28
  # Implementation of the GoF Facade design pattern.
20
- # an Engine object provides a higher-level interface that shields
29
+ # An Engine object provides a higher-level interface that shields
21
30
  # Rley client code from the lower-level classes.
22
31
  class Engine
23
32
  # @!attribute [r] configuration
24
- # @return [EngineConfig] the engine's configuration
33
+ # @return [EngineConfig] the engine's configuration
25
34
  attr_reader :configuration
26
35
 
27
36
  # @!attribute [r] grammar
28
- # @return [Rley::Syntax::Grammar] the grammar of the language to parse
37
+ # @return [Rley::Syntax::Grammar] the grammar of the language to parse
29
38
  attr_reader :grammar
30
39
 
31
40
  # Constructor.
41
+ # @example Produce a parse forest
42
+ # Engine.new do |config|
43
+ # config.parse_repr = :parse_forest
44
+ # end
32
45
  def initialize()
33
46
  @configuration = EngineConfig.new
34
47
  yield configuration if block_given?
@@ -36,6 +49,17 @@ module Rley # This module is used as a namespace
36
49
 
37
50
  # Factory method.
38
51
  # @param aBlock [Proc, Lambda] Code block for creating the grammar.
52
+ # @return [Rley::Syntax::Grammar] the grammar of the language to parse.
53
+ # @example Grammar for array of integers
54
+ # instance = Engine.new
55
+ # instance.build_grammar do
56
+ # add_terminals('LBRACKET', 'RBRACKET', 'COMMA', 'INTEGER')
57
+ # add_production('start' => 'array')
58
+ # add_production('array' => 'LBRACKET elements RBRACKET')
59
+ # add_production('array' => 'LBRACKET RBRACKET')
60
+ # add_production('elements' => 'elements COMMA INTEGER')
61
+ # add_production('elements' => 'INTEGER')
62
+ # end
39
63
  def build_grammar(&aBlock)
40
64
  builder = Rley::Syntax::GrammarBuilder.new(&aBlock)
41
65
  @grammar = builder.grammar
@@ -43,6 +67,7 @@ module Rley # This module is used as a namespace
43
67
 
44
68
  # Use the given grammar.
45
69
  # @param aGrammar [Rley::Syntax::Grammar]
70
+ # @return [Rley::Syntax::Grammar] the grammar of the language to parse.
46
71
  def use_grammar(aGrammar)
47
72
  @grammar = aGrammar
48
73
  end
@@ -70,6 +95,7 @@ module Rley # This module is used as a namespace
70
95
  # Convert raw parse result into a more convenient representation
71
96
  # (parse tree or parse forest) as specified by the configuration.
72
97
  # @param aRawParse [Parser::GFGParsing]
98
+ # @return [Rley::PTree::ParseTree, Rley::SPPF::ParseForest]
73
99
  def convert(aRawParse)
74
100
  result = case configuration.parse_repr
75
101
  when :parse_tree
@@ -83,6 +109,7 @@ module Rley # This module is used as a namespace
83
109
 
84
110
  # Convert raw parse result into a parse tree representation
85
111
  # @param aRawParse [Parser::GFGParsing]
112
+ # @return [Rley::PTree::ParseTree]
86
113
  def to_ptree(aRawParse)
87
114
  factory = ParseRep::ParseTreeFactory.new(aRawParse)
88
115
  if configuration.repr_builder == :default
@@ -96,29 +123,31 @@ module Rley # This module is used as a namespace
96
123
 
97
124
  # Convert raw parse result into a parse forest representation
98
125
  # @param aRawParse [Parser::GFGParsing]
99
- # def to_pforest(aRawParse)
100
- # factory = ParseRep::ParseForestFactory.new(aRawParse)
101
- # if configuration.repr_builder == :default
102
- # result = factory.create(nil)
103
- # else
104
- # result = factory.create(configuration.repr_builder)
105
- # end
126
+ # @return [Rley::SPPF::ParseForest]
127
+ def to_pforest(aRawParse)
128
+ factory = ParseRep::ParseForestFactory.new(aRawParse)
129
+ if configuration.repr_builder == :default
130
+ result = factory.create(nil)
131
+ else
132
+ result = factory.create(configuration.repr_builder)
133
+ end
106
134
 
107
- # return result
108
- # end
135
+ return result
136
+ end
109
137
 
110
138
  # Build a visitor for the given parse tree
111
- # @param aPTree[PTree::ParseTree]
139
+ # @param aPTree [PTree::ParseTree]
112
140
  # @return [ParseTreeVisitor]
113
141
  def ptree_visitor(aPTree)
114
- return Rley::ParseTreeVisitor.new(aPTree)
142
+ return ParseTreeVisitor.new(aPTree)
115
143
  end
116
144
 
117
- # @param aPTree[SPPF::ParseForest]
145
+ # Build a visitor for the given parse forest
146
+ # @param aPForest [SPPF::ParseForest]
118
147
  # @return [ParseForestVisitor]
119
- # def pforest_visitor(aPForest)
120
- # return Rley::ParseForestVisitor.new(aPForest)
121
- # end
148
+ def pforest_visitor(aPForest)
149
+ return ParseForestVisitor.new(aPForest)
150
+ end
122
151
 
123
152
  protected
124
153
 
@@ -17,77 +17,27 @@ module Rley # This module is used as a namespace
17
17
  super(anIO)
18
18
  @indentation = 0
19
19
  end
20
-
21
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
22
- # Notification of a visit event: the visitor is about to visit the given
23
- # parse tree
24
- # @param _ptree [ParseTree]
25
- def before_ptree(_ptree)
26
- output_event(__method__, indentation)
27
- indent
28
- end
29
-
30
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
31
- # Notification of a visit event: the visitor is about to visit
32
- # a non-terminal node
33
- # @param _nonterm [NonTerminalNode]
34
- def before_non_terminal(_nonterm)
35
- output_event(__method__, indentation)
36
- indent
37
- end
38
-
39
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
40
- # Notification of a visit event: the visitor is about to visit
41
- # the children of a non-terminal node
42
- # @param _parent [NonTerminalNode]
43
- # @param _children [Array] array of children nodes
44
- def before_subnodes(_parent, _children)
45
- output_event(__method__, indentation)
46
- indent
47
- end
48
-
49
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
50
- # Notification of a visit event: the visitor is about to visit
51
- # a terminal node
52
- # @param _term [TerminalNode]
53
- def before_terminal(_term)
54
- output_event(__method__, indentation)
55
- end
56
-
57
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
58
- # Notification of a visit event: the visitor completed the visit of
59
- # a terminal node.
60
- # @param _term [TerminalNode]
61
- def after_terminal(_term)
62
- output_event(__method__, indentation)
63
- end
64
-
65
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
66
- # Notification of a visit event: the visitor completed the visit of
67
- # a non-terminal node
68
- # @param _nonterm [NonTerminalNode]
69
- def after_non_terminal(_nonterm)
70
- dedent
71
- output_event(__method__, indentation)
72
- end
73
-
74
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
75
- # Notification of a visit event: the visitor completed the visit of
76
- # the children of a non-terminal node.
77
- # @param _parent [NonTerminalNode]
78
- # @param _children [Array] array of children nodes
79
- def after_subnodes(_parent, _children)
80
- dedent
81
- output_event(__method__, indentation)
82
- end
83
-
84
- # Method called by a ParseTreeVisitor to which the formatter subscribed.
85
- # Notification of a visit event: the visitor completed the visit
86
- # of the given parse tree
87
- # @param _ptree [ParseTree]
88
- def after_ptree(_ptree)
89
- dedent
90
- output_event(__method__, indentation)
20
+
21
+ # Indicates that this formatter accepts all visit events
22
+ # provided their names start with 'before_' or 'after_'
23
+ # @return [Boolean]
24
+ def accept_all
25
+ return true
26
+ end
27
+
28
+ # Ghost method pattern.
29
+ def method_missing(mth, *args)
30
+ mth_name = mth.to_s
31
+ case mth_name
32
+ when /^before_/
33
+ output_event(mth_name, indentation)
34
+ indent unless mth_name == 'before_terminal'
35
+ when /^after_/
36
+ dedent unless mth_name == 'after_terminal'
37
+ output_event(mth_name, indentation)
38
+ else
39
+ super(mth, args)
40
+ end
91
41
  end
92
42
 
93
43
  private
@@ -1,25 +1,56 @@
1
+ # require 'pry'
2
+ require 'prime'
3
+
1
4
  module Rley # This module is used as a namespace
5
+ module SPPF # This module is used as a namespace
6
+ # Monkey-patching
7
+ class CompositeNode
8
+ attr_reader(:signatures)
9
+
10
+ # Associate for each edge between this node and each subnode
11
+ # an unique prime number (called a signature).
12
+ def add_edge_signatures(prime_enumerator)
13
+ @signatures = subnodes.map { |_| prime_enumerator.next }
14
+ end
15
+
16
+ def signature_exist?()
17
+ @signatures.nil? ? false : true
18
+ end
19
+ end # class
20
+ end # module
21
+
2
22
  # A visitor class dedicated in the visit of a parse forest.
3
23
  # It combines the Visitor and Observer patterns.
4
24
  class ParseForestVisitor
5
- # Link to the parse forest to visit
25
+ # @return [SPPF::ParseForest] Link to the parse forest to visit
6
26
  attr_reader(:pforest)
7
27
 
8
- # List of objects that subscribed to the visit event notification.
28
+ # @return [Array<Object>]
29
+ # List of objects that subscribed to the visit event notification.
9
30
  attr_reader(:subscribers)
10
31
 
11
- # A Hash with pairs of the form: Node => node visit data
12
- attr_reader(:agenda)
32
+ # @return [Enumerator]
33
+ # Enumerator that generates a sequence of prime numbers
34
+ attr_reader(:prime_enum)
35
+
36
+ # @return [Array<SPPF::CompositeNode, Integer>]
37
+ # Stack of [node, path signature]
38
+ # path signature: an integer value that represents the set of edges used
39
+ # in traversal
40
+ attr_reader(:legs)
13
41
 
14
- # Indicates the kind of forest traversal to perform: :post_order, :pre-order
15
- attr_reader(:traversal)
42
+ # @return [Hash{SPPF::CompositeNode, Array<Integer>}]
43
+ # Keep trace from which path(s) a given node was accessed
44
+ attr_reader(:node_accesses)
16
45
 
17
46
  # Build a visitor for the given pforest.
18
- # @param aParseForest [ParseForest] the parse tree to visit.
19
- def initialize(aParseForest, aTraversalStrategy = :post_order)
47
+ # @param aParseForest [SPPF::ParseForest] the parse tree to visit.
48
+ def initialize(aParseForest)
20
49
  @pforest = aParseForest
21
50
  @subscribers = []
22
- @traversal = aTraversalStrategy
51
+ @prime_enum = Prime.instance.each
52
+ @legs = []
53
+ @node_accesses = Hash.new { |h, key| h[key] = Array.new }
23
54
  end
24
55
 
25
56
  # Add a subscriber for the visit event notifications.
@@ -47,16 +78,28 @@ module Rley # This module is used as a namespace
47
78
  end
48
79
 
49
80
  # Visit event. The visitor is about to visit the given non terminal node.
50
- # @param aNonTerminalNode [NonTerminalNode] the node to visit.
51
- def visit_nonterminal(aNonTerminalNode)
52
- if @traversal == :post_order
53
- broadcast(:before_non_terminal, aNonTerminalNode)
54
- traverse_children(aNonTerminalNode)
55
- else
56
- traverse_children(aNonTerminalNode)
57
- broadcast(:before_non_terminal, aNonTerminalNode)
81
+ # @param nonTerminalNd [NonTerminalNode] the node to visit.
82
+ def visit_nonterminal(nonTerminalNd)
83
+ broadcast(:before_non_terminal, nonTerminalNd)
84
+ unless nonTerminalNd.signature_exist?
85
+ nonTerminalNd.add_edge_signatures(prime_enum)
86
+ end
87
+ traverse_children(nonTerminalNd)
88
+ broadcast(:after_non_terminal, nonTerminalNd)
89
+ end
90
+
91
+ # TODO: control the logic of this method.
92
+ # Visit event. The visitor is visiting the
93
+ # given alternative node.
94
+ # @param alternativeNd [AlternativeNode] the alternative node to visit.
95
+ def visit_alternative(alternativeNd)
96
+ broadcast(:before_alternative, alternativeNd)
97
+ unless alternativeNd.signature_exist?
98
+ alternativeNd.add_edge_signatures(prime_enum)
58
99
  end
59
- broadcast(:after_non_terminal, aNonTerminalNode)
100
+
101
+ traverse_children(alternativeNd)
102
+ broadcast(:after_alternative, alternativeNd)
60
103
  end
61
104
 
62
105
  # Visit event. The visitor is visiting the
@@ -67,12 +110,20 @@ module Rley # This module is used as a namespace
67
110
  broadcast(:after_terminal, aTerminalNode)
68
111
  end
69
112
 
113
+ # Visit event. The visitor is visiting the
114
+ # given epsilon node.
115
+ # @param anEpsilonNode [EpsilonNode] the terminal to visit.
116
+ def visit_epsilon(anEpsilonNode)
117
+ broadcast(:before_epsilon, anEpsilonNode)
118
+ broadcast(:after_epsilon, anEpsilonNode)
119
+ end
120
+
70
121
  # Visit event. The visitor has completed its visit of the given
71
122
  # non-terminal node.
72
123
  # @param aNonTerminalNode [NonTerminalNode] the node to visit.
73
- def end_visit_nonterminal(aNonTerminalNode)
74
- broadcast(:after_non_terminal, aNonTerminalNode)
75
- end
124
+ # def end_visit_nonterminal(aNonTerminalNode)
125
+ # broadcast(:after_non_terminal, aNonTerminalNode)
126
+ # end
76
127
 
77
128
  # Visit event. The visitor has completed the visit of the pforest.
78
129
  # @param aParseForest [ParseForest] the pforest to visit.
@@ -87,24 +138,57 @@ module Rley # This module is used as a namespace
87
138
  # @param aParentNode [NonTeminalNode] the (non-terminal) parent node.
88
139
  def traverse_children(aParentNode)
89
140
  children = aParentNode.children
90
- broadcast(:before_children, aParentNode, children)
141
+ broadcast(:before_subnodes, aParentNode, children)
91
142
 
92
143
  # Let's proceed with the visit of children
93
- children.each { |a_node| a_node.accept(self) }
144
+ children.each_with_index do |a_node, i|
145
+ edge_sign = aParentNode.signatures[i]
146
+ if a_node.kind_of?(SPPF::CompositeNode)
147
+ push_node(a_node, edge_sign)
148
+ access_paths = node_accesses[a_node]
149
+ last_path = legs.last[-1]
150
+ path_reused = access_paths.include?(last_path)
151
+ unless path_reused
152
+ node_accesses[a_node].push(last_path)
153
+ a_node.accept(self)
154
+ end
155
+ pop_node
156
+ else
157
+ a_node.accept(self)
158
+ end
159
+ end
94
160
 
95
- broadcast(:after_children, aParentNode, children)
161
+ broadcast(:after_subnodes, aParentNode, children)
96
162
  end
97
163
 
98
164
  # Send a notification to all subscribers.
99
165
  # @param msg [Symbol] event to notify
100
166
  # @param args [Array] arguments of the notification.
101
167
  def broadcast(msg, *args)
102
- subscribers.each do |a_subscriber|
103
- next unless a_subscriber.respond_to?(msg)
104
- a_subscriber.send(msg, *args)
168
+ subscribers.each do |subscr|
169
+ next unless subscr.respond_to?(msg) || subscr.respond_to?(:accept_all)
170
+ subscr.send(msg, *args)
171
+ end
172
+ end
173
+
174
+ def push_node(aCompositeNode, anEdgeSignature)
175
+ if legs.empty?
176
+ legs << [aCompositeNode, anEdgeSignature]
177
+ else
178
+ path_signature = legs.last[-1]
179
+ # binding.pry if anEdgeSignature == 37 && path_signature != 230
180
+ if (path_signature % anEdgeSignature).zero?
181
+ legs << [aCompositeNode, path_signature]
182
+ else
183
+ legs << [aCompositeNode, path_signature * anEdgeSignature]
184
+ end
105
185
  end
106
186
  end
187
+
188
+ def pop_node
189
+ return if legs.empty?
190
+ legs.pop
191
+ end
107
192
  end # class
108
193
  end # module
109
-
110
194
  # End of file