rley 0.3.09 → 0.3.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d22d11b51c4c72d3d8230c775b797afae04c4e8f
4
- data.tar.gz: e87ba3a3beeadd40a4447281c904f8dba4c8dc57
3
+ metadata.gz: 20ceec50ffa7359dbf635a00184c604654cdf9bb
4
+ data.tar.gz: 76e9432f62d88127257c66277e21be37fd1b00bd
5
5
  SHA512:
6
- metadata.gz: 446344010aafea29682d90bd71bdb73f1f189e694fb91907136e00cce015b098e32fb2679af3a0b6ae772d41892478b0a9270c43ebe77cee2c47cb319b7fd0aa
7
- data.tar.gz: 9509ea071e10c002e70af379089565b3b28269fb09c1f1d04ee9a04ec78dab7ac916e4f90602bbef5bc3eff8ea0952143faa5ec1d6c54277b60e7db810dab45d
6
+ metadata.gz: a80c8e38ede7dd3908a52b73fd8fa13fc7087537d8272c8d648764902cb29f0d5db23c157a2b704eb66e2c8322d7e2ee4822dfd36fadb25ac22e6ec9f517f3eb
7
+ data.tar.gz: 9d23a806c510790c6fcc19566520e11c9292d642b2debe20784c4a884d646c173a789ae3060345ad659698554fce6748078ef7d5cc2353f7e1c3495ecd156104
@@ -1,3 +1,12 @@
1
+ ### 0.3.10 / 2016-12-04
2
+ * [NEW] Method `ParseForest#ambiguous?`. Indicates whether the parse is ambiguous.
3
+ * [CHANGE] File `README.md` updated with new grammar builder syntax & typo fixes.
4
+ * [CHANGE] Method `GrammarBuilder#initialize`: Accepts a block argument that allows lighter construction.
5
+
6
+ ### 0.3.09 / 2016-11-27
7
+ * [CHANGE] File `README.md` fully rewritten and added an example.
8
+ * [CHANGE] Directory `examples` completely reorganized.
9
+
1
10
  ### 0.3.09 / 2016-11-27
2
11
  * [CHANGE] File `README.md` fully rewritten and added an example.
3
12
  * [CHANGE] Directory `examples` completely reorganized.
data/README.md CHANGED
@@ -9,7 +9,7 @@
9
9
  [Rley](https://github.com/famished-tiger/Rley)
10
10
  ======
11
11
 
12
- A Ruby library for constructing general parsers for _any_ context-free languages.
12
+ A Ruby library for constructing general parsers for _any_ context-free language.
13
13
 
14
14
 
15
15
  What is Rley?
@@ -69,7 +69,7 @@ The tour is organized into the following steps:
69
69
  1. [Defining the language grammar](#defining-the-language-grammar)
70
70
  2. [Creating a lexicon](#creating-a-lexicon)
71
71
  3. [Creating a tokenizer](#creating-a-tokenizer)
72
- 4. [Building the parser](building-the-parser)
72
+ 4. [Building the parser](#building-the-parser)
73
73
  5. [Parsing some input](#parsing-some-input)
74
74
  6. [Generating the parse forest](#generating-the-parse-forest)
75
75
 
@@ -84,21 +84,20 @@ The subset of English grammar is based on an example from the NLTK book.
84
84
  require 'rley' # Load Rley library
85
85
 
86
86
  # Instantiate a builder object that will build the grammar for us
87
- builder = Rley::Syntax::GrammarBuilder.new
88
-
89
- # Next 2 lines we define the terminal symbols (=word categories in the lexicon)
90
- builder.add_terminals('Noun', 'Proper-Noun', 'Verb')
91
- builder.add_terminals('Determiner', 'Preposition')
92
-
93
- # Here we define the productions (= grammar rules)
94
- builder.add_production('S' => %w[NP VP])
95
- builder.add_production('NP' => 'Proper-Noun')
96
- builder.add_production('NP' => %w[Determiner Noun])
97
- builder.add_production('NP' => %w[Determiner Noun PP])
98
- builder.add_production('VP' => %w[Verb NP])
99
- builder.add_production('VP' => %w[Verb NP PP])
100
- builder.add_production('PP' => %w[Preposition NP])
101
-
87
+ builder = Rley::Syntax::GrammarBuilder.new do
88
+ # Terminal symbols (= word categories in lexicon)
89
+ add_terminals('Noun', 'Proper-Noun', 'Verb')
90
+ add_terminals('Determiner', 'Preposition')
91
+
92
+ # Here we define the productions (= grammar rules)
93
+ rule 'S' => %w[NP VP]
94
+ rule 'NP' => 'Proper-Noun'
95
+ rule 'NP' => %w[Determiner Noun]
96
+ rule 'NP' => %w[Determiner Noun PP]
97
+ rule 'VP' => %w[Verb NP]
98
+ rule 'VP' => %w[Verb NP PP]
99
+ rule 'PP' => %w[Preposition NP]
100
+ end
102
101
  # And now, let's build the grammar...
103
102
  grammar = builder.grammar
104
103
  ```
@@ -208,8 +207,6 @@ Here are a few other ones:
208
207
  ## Thanks to:
209
208
  * Professor Keshav Pingali, one of the creators of the Grammar Flow Graph parsing approach for his encouraging e-mail exchanges.
210
209
 
211
- ---
212
-
213
210
  Copyright
214
211
  ---------
215
212
  Copyright (c) 2014-2016, Dimitri Geshef.
@@ -9,20 +9,20 @@ require 'rley' # Load Rley library
9
9
  # with a very simplified syntax.
10
10
 
11
11
  # Instantiate a builder object that will build the grammar for us
12
- builder = Rley::Syntax::GrammarBuilder.new
12
+ builder = Rley::Syntax::GrammarBuilder.new do
13
+ # Next 2 lines we define the terminal symbols (=word categories in the lexicon)
14
+ add_terminals('Noun', 'Proper-Noun', 'Verb')
15
+ add_terminals('Determiner', 'Preposition')
13
16
 
14
- # Next 2 lines we define the terminal symbols (=word categories in the lexicon)
15
- builder.add_terminals('Noun', 'Proper-Noun', 'Verb')
16
- builder.add_terminals('Determiner', 'Preposition')
17
-
18
- # Here we define the productions (= grammar rules)
19
- builder.add_production('S' => %w[NP VP])
20
- builder.add_production('NP' => 'Proper-Noun')
21
- builder.add_production('NP' => %w[Determiner Noun])
22
- builder.add_production('NP' => %w[Determiner Noun PP])
23
- builder.add_production('VP' => %w[Verb NP])
24
- builder.add_production('VP' => %w[Verb NP PP])
25
- builder.add_production('PP' => %w[Preposition NP])
17
+ # Here we define the productions (= grammar rules)
18
+ rule 'S' => %w[NP VP]
19
+ rule 'NP' => 'Proper-Noun'
20
+ rule 'NP' => %w[Determiner Noun]
21
+ rule 'NP' => %w[Determiner Noun PP]
22
+ rule 'VP' => %w[Verb NP]
23
+ rule 'VP' => %w[Verb NP PP]
24
+ rule 'PP' => %w[Preposition NP]
25
+ end
26
26
 
27
27
  # And now, let's build the grammar...
28
28
  grammar = builder.grammar
@@ -70,8 +70,6 @@ def tokenizer(aTextToParse, aGrammar)
70
70
  return tokens
71
71
  end
72
72
 
73
- More realistic NLP will will most probably
74
-
75
73
  ########################################
76
74
  # Step 4. Create a parser for that grammar
77
75
  # Easy with Rley...
@@ -80,6 +78,7 @@ parser = Rley::Parser::GFGEarleyParser.new(grammar)
80
78
  ########################################
81
79
  # Step 5. Parsing the input
82
80
  input_to_parse = 'John saw Mary with a telescope'
81
+ # input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
83
82
  # Convert input text into a sequence of token objects...
84
83
  tokens = tokenizer(input_to_parse, grammar)
85
84
  result = parser.parse(tokens)
@@ -90,3 +89,4 @@ puts "Parsing successful? #{result.success?}" # => Parsing successful? true
90
89
  # Step 6. Generating the parse forest
91
90
  pforest = result.parse_forest
92
91
 
92
+ puts "Parsing ambiguous? #{pforest.ambiguous?}" # => Parsing ambiguous? false
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.3.09'.freeze
6
+ Version = '0.3.10'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -29,7 +29,7 @@ module Rley # This module is used as a namespace
29
29
  # A hash with pairs of the form: visited parse entry => forest node
30
30
  attr_reader(:entry2node)
31
31
 
32
- # A hash with pairs of the form:
32
+ # A hash with pairs of the form:
33
33
  # parent end entry => path to alternative node
34
34
  # This is needed for synchronizing backtracking
35
35
  attr_reader(:entry2path_to_alt)
@@ -89,12 +89,12 @@ module Rley # This module is used as a namespace
89
89
 
90
90
 
91
91
  when :revisit
92
- # Retrieve the already existing node corresponding
92
+ # Retrieve the already existing node corresponding
93
93
  # to re-visited entry
94
94
  popular = @entry2node[anEntry]
95
-
95
+
96
96
  # Share with parent (if needed)...
97
- children = curr_parent.subnodes
97
+ children = curr_parent.subnodes
98
98
  curr_parent.add_subnode(popular) unless children.include? popular
99
99
 
100
100
  else
@@ -105,7 +105,7 @@ module Rley # This module is used as a namespace
105
105
 
106
106
  def process_item_entry(anEvent, anEntry, anIndex)
107
107
  case anEvent
108
- when :visit
108
+ when :visit
109
109
  if anEntry.exit_entry?
110
110
  # Previous entry was an end entry (X. pattern)
111
111
  # Does the previous entry have multiple antecedent?
@@ -119,7 +119,7 @@ module Rley # This module is used as a namespace
119
119
  create_alternative_node(anEntry)
120
120
  end
121
121
  end
122
-
122
+
123
123
  # Does this entry have multiple antecedent?
124
124
  if anEntry.antecedents.size > 1
125
125
  # Store current path for later backtracking
@@ -129,7 +129,7 @@ module Rley # This module is used as a namespace
129
129
  # curr_parent.refinement = :or
130
130
 
131
131
  create_alternative_node(anEntry)
132
- end
132
+ end
133
133
 
134
134
  # Retrieve the grammar symbol before the dot (if any)
135
135
  prev_symbol = anEntry.prev_symbol
@@ -146,7 +146,7 @@ module Rley # This module is used as a namespace
146
146
  end
147
147
  curr_path.pop if curr_parent.kind_of?(SPPF::AlternativeNode)
148
148
  end
149
-
149
+
150
150
  when :backtrack
151
151
  # Restore path
152
152
  @curr_path = entry2path_to_alt[anEntry].dup
@@ -154,9 +154,9 @@ module Rley # This module is used as a namespace
154
154
  antecedent_index = curr_parent.subnodes.size
155
155
  # puts "Current parent #{curr_parent.to_string(0)}"
156
156
  # puts "Antecedent index #{antecedent_index}"
157
-
158
- create_alternative_node(anEntry)
159
-
157
+
158
+ create_alternative_node(anEntry)
159
+
160
160
  when :revisit
161
161
  # Retrieve the grammar symbol before the dot (if any)
162
162
  prev_symbol = anEntry.prev_symbol
@@ -172,7 +172,7 @@ module Rley # This module is used as a namespace
172
172
  create_epsilon_node(anEntry, anIndex)
173
173
  end
174
174
  curr_path.pop if curr_parent.kind_of?(SPPF::AlternativeNode)
175
- end
175
+ end
176
176
  end
177
177
  end
178
178
 
@@ -187,8 +187,8 @@ module Rley # This module is used as a namespace
187
187
  non_terminal = nonTSymb.nil? ? anEntry.vertex.non_terminal : nonTSymb
188
188
  new_node = Rley::SPPF::NonTerminalNode.new(non_terminal, aRange)
189
189
  entry2node[anEntry] = new_node
190
- # puts "FOREST ADD #{curr_parent.key if curr_parent}/#{new_node.key}"
191
190
  add_subnode(new_node)
191
+ # puts "FOREST ADD #{curr_parent.key if curr_parent}/#{new_node.key}"
192
192
 
193
193
  return new_node
194
194
  end
@@ -200,6 +200,7 @@ module Rley # This module is used as a namespace
200
200
  range = curr_parent.range
201
201
  alternative = Rley::SPPF::AlternativeNode.new(vertex, range)
202
202
  add_subnode(alternative)
203
+ forest.is_ambiguous = true
203
204
  # puts "FOREST ADD #{alternative.key}"
204
205
 
205
206
  return alternative
@@ -231,13 +232,13 @@ module Rley # This module is used as a namespace
231
232
  # Add the given node if not yet present in parse forest
232
233
  def add_node_to_forest(aNode)
233
234
  key_node = aNode.key
234
- if forest.include?(key_node)
235
- new_node = forest.key2node[key_node]
236
- else
237
- new_node = aNode
238
- forest.key2node[key_node] = new_node
239
- # puts "FOREST ADD #{key_node}"
240
- end
235
+ if forest.include?(key_node)
236
+ new_node = forest.key2node[key_node]
237
+ else
238
+ new_node = aNode
239
+ forest.key2node[key_node] = new_node
240
+ # puts "FOREST ADD #{key_node}"
241
+ end
241
242
  add_subnode(new_node, false)
242
243
 
243
244
  return new_node
@@ -1,5 +1,6 @@
1
1
  require_relative 'token_node'
2
2
  require_relative 'non_terminal_node'
3
+ require_relative 'alternative_node'
3
4
 
4
5
  module Rley # This module is used as a namespace
5
6
  module SPPF # This module is used as a namespace
@@ -18,17 +19,28 @@ module Rley # This module is used as a namespace
18
19
 
19
20
  # A Hash with pairs of the kind node key => node
20
21
  attr_reader(:key2node)
22
+
23
+ # A setter that tells that the parse is ambiguous.
24
+ attr_writer(:is_ambiguous)
25
+
21
26
 
22
27
  # @param theRootNode [ParseForestNode] The root node of the parse tree.
23
28
  def initialize(theRootNode)
24
29
  @root = theRootNode
25
30
  @key2node = {}
31
+ @is_ambiguous = false
26
32
  end
27
33
 
28
34
  # Returns true if the given node is present in the forest.
29
35
  def include?(aNode)
30
36
  return key2node.include?(aNode)
31
37
  end
38
+
39
+ # Returns true if the parse encountered a structural ambiguity
40
+ # (i.e. more than one parse tree for the given input)
41
+ def ambiguous?()
42
+ return @is_ambiguous
43
+ end
32
44
 
33
45
 
34
46
  # Part of the 'visitee' role in the Visitor design pattern.
@@ -18,9 +18,12 @@ module Rley # This module is used as a namespace
18
18
  # The list of production rules for the grammar to build
19
19
  attr_reader(:productions)
20
20
 
21
- def initialize()
21
+
22
+ def initialize(&aBlock)
22
23
  @symbols = {}
23
24
  @productions = []
25
+
26
+ instance_exec(&aBlock) if block_given?
24
27
  end
25
28
 
26
29
  # Retrieve a grammar symbol from its name.
@@ -87,6 +90,8 @@ module Rley # This module is used as a namespace
87
90
 
88
91
  return @grammar
89
92
  end
93
+
94
+ alias rule add_production
90
95
 
91
96
  private
92
97
 
@@ -17,7 +17,7 @@ module Rley # Open this namespace to avoid module qualifier prefixes
17
17
 
18
18
  let(:sample_grammar) do
19
19
  # Grammar based on paper from Elisabeth Scott
20
- # "SPPF=Style Parsing From Earley Recognizers" in
20
+ # "SPPF-Style Parsing From Earley Recognizers" in
21
21
  # Notes in Theoretical Computer Science 203, (2008), pp. 53-67
22
22
  # contains a hidden left recursion and a cycle
23
23
  builder = Syntax::GrammarBuilder.new
@@ -6,10 +6,10 @@ require_relative '../../../lib/rley/syntax/grammar_builder'
6
6
  module Rley # Open this namespace to avoid module qualifier prefixes
7
7
  module Syntax # Open this namespace to avoid module qualifier prefixes
8
8
  describe GrammarBuilder do
9
- context 'Initialization:' do
10
- it 'should be created without argument' do
9
+ context 'Initialization without argument:' do
10
+ it 'could be created without argument' do
11
11
  expect { GrammarBuilder.new }.not_to raise_error
12
- end
12
+ end
13
13
 
14
14
  it 'should have no grammar symbols at start' do
15
15
  expect(subject.symbols).to be_empty
@@ -19,6 +19,24 @@ module Rley # Open this namespace to avoid module qualifier prefixes
19
19
  expect(subject.productions).to be_empty
20
20
  end
21
21
  end # context
22
+
23
+ context 'Initialization with argument:' do
24
+ it 'could be created with a block argument' do
25
+ expect do GrammarBuilder.new { nil }
26
+ end.not_to raise_error
27
+ end
28
+
29
+ it 'could have grammar symbols from block argument' do
30
+ instance = GrammarBuilder.new do
31
+ add_terminals('a', 'b', 'c')
32
+ end
33
+ expect(instance.symbols.size).to eq(3)
34
+ end
35
+
36
+ it 'should have no productions at start' do
37
+ expect(subject.productions).to be_empty
38
+ end
39
+ end # context
22
40
 
23
41
  context 'Adding symbols:' do
24
42
  it 'should build terminals from their names' do
@@ -79,7 +97,8 @@ module Rley # Open this namespace to avoid module qualifier prefixes
79
97
  expect_rhs = [ subject['a'], subject['A'], subject['c'] ]
80
98
  expect(new_prod.rhs.members).to eq(expect_rhs)
81
99
 
82
- subject.add_production('A' => ['b'])
100
+ # GrammarBuilder#rule is an alias of add_production
101
+ subject.rule('A' => ['b'])
83
102
  expect(subject.productions.size).to eq(3)
84
103
  new_prod = subject.productions.last
85
104
  expect(new_prod.lhs).to eq(subject['A'])
@@ -89,11 +108,13 @@ module Rley # Open this namespace to avoid module qualifier prefixes
89
108
 
90
109
  context 'Building grammar:' do
91
110
  subject do
92
- instance = GrammarBuilder.new
93
- instance.add_terminals('a', 'b', 'c')
94
- instance.add_production('S' => ['A'])
95
- instance.add_production('A' => %w(a A c))
96
- instance.add_production('A' => ['b'])
111
+ instance = GrammarBuilder.new do
112
+ add_terminals('a', 'b', 'c')
113
+ add_production('S' => ['A'])
114
+ add_production('A' => %w(a A c))
115
+ add_production('A' => ['b'])
116
+ end
117
+
97
118
  instance
98
119
  end
99
120
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.09
4
+ version: 0.3.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-11-27 00:00:00.000000000 Z
11
+ date: 2016-12-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake