rley 0.5.14 → 0.6.00
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +12 -2
- data/README.md +29 -31
- data/examples/NLP/benchmark_pico_en.rb +34 -34
- data/examples/NLP/engtagger.rb +1 -1
- data/examples/NLP/nano_eng/nano_en_demo.rb +23 -28
- data/examples/NLP/nano_eng/nano_grammar.rb +1 -1
- data/examples/NLP/pico_en_demo.rb +28 -31
- data/examples/data_formats/JSON/json_ast_builder.rb +11 -70
- data/examples/data_formats/JSON/json_demo.rb +32 -14
- data/examples/data_formats/JSON/json_grammar.rb +1 -1
- data/examples/data_formats/JSON/json_lexer.rb +5 -11
- data/examples/general/SRL/lib/ast_builder.rb +5 -28
- data/examples/general/SRL/lib/tokenizer.rb +2 -5
- data/examples/general/SRL/spec/integration_spec.rb +12 -5
- data/examples/general/SRL/spec/tokenizer_spec.rb +13 -14
- data/examples/general/SRL/srl_demo.rb +16 -9
- data/examples/general/calc_iter1/calc_ast_builder.rb +29 -85
- data/examples/general/calc_iter1/calc_demo.rb +15 -6
- data/examples/general/calc_iter1/calc_lexer.rb +2 -5
- data/examples/general/calc_iter1/spec/calculator_spec.rb +18 -19
- data/examples/general/calc_iter2/calc_ast_builder.rb +9 -107
- data/examples/general/calc_iter2/calc_demo.rb +15 -8
- data/examples/general/calc_iter2/calc_lexer.rb +3 -5
- data/examples/general/calc_iter2/spec/calculator_spec.rb +18 -31
- data/lib/rley.rb +2 -1
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +122 -0
- data/lib/rley/parse_rep/ast_base_builder.rb +128 -0
- data/lib/rley/{parser → parse_rep}/cst_builder.rb +1 -1
- data/lib/rley/{parser → parse_rep}/parse_forest_builder.rb +1 -1
- data/lib/rley/{parser → parse_rep}/parse_forest_factory.rb +2 -2
- data/lib/rley/{parser → parse_rep}/parse_rep_creator.rb +3 -3
- data/lib/rley/{parser → parse_rep}/parse_tree_builder.rb +4 -4
- data/lib/rley/{parser → parse_rep}/parse_tree_factory.rb +1 -1
- data/lib/rley/parser/gfg_parsing.rb +16 -4
- data/spec/rley/engine_spec.rb +127 -0
- data/spec/rley/formatter/asciitree_spec.rb +11 -13
- data/spec/rley/formatter/bracket_notation_spec.rb +11 -13
- data/spec/rley/formatter/debug_spec.rb +13 -15
- data/spec/rley/formatter/json_spec.rb +10 -14
- data/spec/rley/{parser → parse_rep}/ambiguous_parse_spec.rb +3 -3
- data/spec/rley/{parser → parse_rep}/ast_builder_spec.rb +34 -83
- data/spec/rley/{parser → parse_rep}/cst_builder_spec.rb +3 -3
- data/spec/rley/{parser → parse_rep}/groucho_spec.rb +3 -3
- data/spec/rley/{parser → parse_rep}/parse_forest_builder_spec.rb +4 -4
- data/spec/rley/{parser → parse_rep}/parse_forest_factory_spec.rb +2 -2
- data/spec/rley/{parser → parse_rep}/parse_tree_factory_spec.rb +2 -2
- data/spec/rley/parse_tree_visitor_spec.rb +12 -15
- data/spec/rley/support/ast_builder.rb +403 -0
- data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
- metadata +27 -28
- data/examples/data_formats/JSON/json_parser.rb +0 -46
- data/examples/general/SRL/lib/ast_building.rb +0 -20
- data/examples/general/SRL/lib/parser.rb +0 -26
- data/examples/general/calc_iter1/calc_parser.rb +0 -24
- data/examples/general/calc_iter2/ast_building.rb +0 -20
- data/examples/general/calc_iter2/calc_parser.rb +0 -24
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a7fa26ec5c47c28dd6d0cc68f32e8f023f92ce75
|
4
|
+
data.tar.gz: 73717a9a1c2daa9886952c502020b2fcf5e4bb33
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 055963ec1637d43b33703e87b454fd9a1590a3993609f2df89f0d3ca94a1b8eba7b39cf102d318e0c1da6fc3e29c51a84bd3929f3000b7c800ee4585260d9b78
|
7
|
+
data.tar.gz: 2b0bc71c159ecac03789644497af1ce789b7975efe2fb6fecc399e41fb346aec26ad67a9d1015ebed1d43a9e9fdd09321d7dfef5f86afa3ce5402b4f04b8f062
|
data/CHANGELOG.md
CHANGED
@@ -1,4 +1,14 @@
|
|
1
|
-
### 0.
|
1
|
+
### 0.6.00 / 2018-02-25
|
2
|
+
Version bump. Highlights: new programming interface through facade object, improved AST generation.
|
3
|
+
* [NEW] Class `Rley::Engine`: Implementation of Facade design pattern to reach more convenient interface.
|
4
|
+
* [NEW] Class `Rley::ParseRep::ASTBaseBuilder` Abstract class that simplifies the creation of custom AST (Abstract Syntax Tree)
|
5
|
+
* [NEW] Module `Rley::ParseRep` hosts the classes for building parse representations (parse trees and forests)
|
6
|
+
* [CHANGE] File `README.md` updated to reflect the new facade interface.
|
7
|
+
* [CHANGE] Almost all the examples have been updated to use the `Rley::Engine` facade and the new AST building.
|
8
|
+
* [CHANGE] All the specs have been updated to use the `Rley::Engine` facade and the new AST building.
|
9
|
+
* [DEPRECATED] Method `Rley::Parser::GFGParsing#parse_tree`.
|
10
|
+
|
11
|
+
### 0.5.14 / 2018-02-10
|
2
12
|
* [NEW] Files spec/rley/sppf/token_node_spec.rb` Added RSpec file for testing `SPPF::TokenNode` class.
|
3
13
|
* [CHANGE] Files `lib/rley/sppf` Minor update in the YARD documentation of the SPPF node classes.
|
4
14
|
* [FIX] Method `Parser::CSTRawNode#initialize`. Yard warning because of duplicate parameter names in documentation.
|
@@ -9,7 +19,7 @@
|
|
9
19
|
* [CHANGE] File `examples/general/SRL/ast_builder.rb Code refactoring to take profit of rule naming.
|
10
20
|
|
11
21
|
### 0.5.12 / 2018-02-03
|
12
|
-
* [CHANGE] Simple Regex Language is fully supported!...
|
22
|
+
* [CHANGE] Simple Regex Language is almost fully supported!...
|
13
23
|
* [CHANGE] File `examples/general/SRL/grammar.rb added missing rule productions for Simple Regex Language.
|
14
24
|
* [CHANGE] File `examples/general/SRL/ast_builder.rb Added transformation rules missing regular expressions features.
|
15
25
|
* [CHANGE] File `examples/general/SRL/spac/integration_spec.rb Added tests for covering SRL.
|
data/README.md
CHANGED
@@ -74,10 +74,10 @@ Installing the latest stable version is simple:
|
|
74
74
|
The purpose of this section is show how to create a parser for a minimalistic
|
75
75
|
English language subset.
|
76
76
|
The tour is organized as follows:
|
77
|
-
1. [
|
78
|
-
2. [
|
79
|
-
3. [Creating a
|
80
|
-
4. [
|
77
|
+
1. [Creating facade object of Rley library](#creating-facade-object-of-rley-library)
|
78
|
+
2. [Defining the language grammar](#defining-the-language-grammar)
|
79
|
+
3. [Creating a lexicon](#creating-a-lexicon)
|
80
|
+
4. [Creating a tokenizer](#creating-a-tokenizer)
|
81
81
|
5. [Parsing some input](#parsing-some-input)
|
82
82
|
6. [Generating the parse tree](#generating-the-parse-tree)
|
83
83
|
|
@@ -85,14 +85,22 @@ The complete source code of the example used in this tour can be found in the
|
|
85
85
|
[examples](https://github.com/famished-tiger/Rley/tree/master/examples/NLP/mini_en_demo.rb)
|
86
86
|
directory
|
87
87
|
|
88
|
-
### Defining the language grammar
|
89
|
-
The subset of English grammar is based on an example from the NLTK book.
|
90
88
|
|
89
|
+
### Creating facade object of Rley library
|
91
90
|
```ruby
|
92
91
|
require 'rley' # Load Rley library
|
93
92
|
|
94
|
-
#
|
95
|
-
|
93
|
+
# Let's create a facade object called 'engine'
|
94
|
+
# It provides a unified, higher-level interface
|
95
|
+
engine = Rley.Engine.new
|
96
|
+
```
|
97
|
+
|
98
|
+
|
99
|
+
### Defining the language grammar
|
100
|
+
The subset of English grammar is based on an example from the NLTK book.
|
101
|
+
|
102
|
+
```ruby
|
103
|
+
engine.build_grammar do
|
96
104
|
# Terminal symbols (= word categories in lexicon)
|
97
105
|
add_terminals('Noun', 'Proper-Noun', 'Verb')
|
98
106
|
add_terminals('Determiner', 'Preposition')
|
@@ -106,8 +114,6 @@ The subset of English grammar is based on an example from the NLTK book.
|
|
106
114
|
rule 'VP' => %w[Verb NP PP]
|
107
115
|
rule 'PP' => %w[Preposition NP]
|
108
116
|
end
|
109
|
-
# And now, let's build the grammar...
|
110
|
-
grammar = builder.grammar
|
111
117
|
```
|
112
118
|
|
113
119
|
### Creating a lexicon
|
@@ -141,14 +147,14 @@ The subset of English grammar is based on an example from the NLTK book.
|
|
141
147
|
|
142
148
|
### Creating a tokenizer
|
143
149
|
```ruby
|
144
|
-
# A tokenizer reads the input string and converts it into a sequence of tokens
|
145
|
-
#
|
146
|
-
|
150
|
+
# A tokenizer reads the input string and converts it into a sequence of tokens.
|
151
|
+
# Remark: Rley doesn't provide tokenizer functionality.
|
152
|
+
# Highly simplified tokenizer implementation
|
153
|
+
def tokenizer(aTextToParse)
|
147
154
|
tokens = aTextToParse.scan(/\S+/).map do |word|
|
148
155
|
term_name = Lexicon[word]
|
149
156
|
raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
|
150
|
-
|
151
|
-
Rley::Lexical::Token.new(word, terminal)
|
157
|
+
Rley::Lexical::Token.new(word, term_name)
|
152
158
|
end
|
153
159
|
|
154
160
|
return tokens
|
@@ -161,20 +167,12 @@ creating a lexicon and tokenizer from scratch. Here are a few Ruby Part-of-Speec
|
|
161
167
|
* [rbtagger](https://rubygems.org/gems/rbtagger)
|
162
168
|
|
163
169
|
|
164
|
-
|
165
|
-
### Building the parser
|
166
|
-
```ruby
|
167
|
-
# Easy with Rley...
|
168
|
-
parser = Rley::Parser::GFGEarleyParser.new(grammar)
|
169
|
-
```
|
170
|
-
|
171
|
-
|
172
170
|
### Parsing some input
|
173
171
|
```ruby
|
174
172
|
input_to_parse = 'John saw Mary with a telescope'
|
175
173
|
# Convert input text into a sequence of token objects...
|
176
|
-
tokens = tokenizer(input_to_parse
|
177
|
-
result =
|
174
|
+
tokens = tokenizer(input_to_parse)
|
175
|
+
result = engine.parse(tokens)
|
178
176
|
|
179
177
|
puts "Parsing successful? #{result.success?}" # => Parsing successful? true
|
180
178
|
```
|
@@ -194,7 +192,7 @@ For our whirlwind tour, we will opt for parse trees.
|
|
194
192
|
### Generating the parse tree
|
195
193
|
|
196
194
|
```ruby
|
197
|
-
ptree = result
|
195
|
+
ptree = engine.convert(result)
|
198
196
|
```
|
199
197
|
OK. Now that we have the parse tree, what we can do with it?
|
200
198
|
One option is to manipulate the parse tree and its node directly. For instance,
|
@@ -216,7 +214,7 @@ an one-liner:
|
|
216
214
|
|
217
215
|
```ruby
|
218
216
|
# Let's create a parse tree visitor
|
219
|
-
visitor =
|
217
|
+
visitor = engine.ptree_visitor(ptree)
|
220
218
|
```
|
221
219
|
|
222
220
|
#### Visiting the parse tree
|
@@ -359,8 +357,8 @@ above and, as an error, we delete the verb `saw` in the sentence to parse.
|
|
359
357
|
# Verb has been removed from the sentence on next line
|
360
358
|
input_to_parse = 'John Mary with a telescope'
|
361
359
|
# Convert input text into a sequence of token objects...
|
362
|
-
tokens = tokenizer(input_to_parse
|
363
|
-
result =
|
360
|
+
tokens = tokenizer(input_to_parse)
|
361
|
+
result = engine.parse(tokens)
|
364
362
|
|
365
363
|
puts "Parsing successful? #{result.success?}" # => Parsing successful? false
|
366
364
|
exit(1)
|
@@ -390,8 +388,8 @@ Let's experiment again with the original sentence but without the word
|
|
390
388
|
# Last word has been removed from the sentence on next line
|
391
389
|
input_to_parse = 'John saw Mary with a '
|
392
390
|
# Convert input text into a sequence of token objects...
|
393
|
-
tokens = tokenizer(input_to_parse
|
394
|
-
result =
|
391
|
+
tokens = tokenizer(input_to_parse)
|
392
|
+
result = engine.parse(tokens)
|
395
393
|
|
396
394
|
puts "Parsing successful? #{result.success?}" # => Parsing successful? false
|
397
395
|
unless result.success?
|
@@ -1,12 +1,24 @@
|
|
1
|
-
# File:
|
1
|
+
# File: benchmark_pico_en.rb
|
2
2
|
# Purpose: benchmark the parse speed
|
3
3
|
require 'benchmark'
|
4
4
|
require 'rley' # Load Rley library
|
5
5
|
|
6
|
-
|
7
|
-
|
6
|
+
########################################
|
7
|
+
# Step 0. Instantiate facade object of Rley library.
|
8
|
+
# It provides a unified, higher-level interface
|
9
|
+
engine = Rley::Engine.new
|
8
10
|
|
9
|
-
|
11
|
+
########################################
|
12
|
+
# Step 1. Define a grammar for a pico English-like language
|
13
|
+
# based on example from NLTK book (chapter 8 of the book).
|
14
|
+
# Bird, Steven, Edward Loper and Ewan Klein: "Natural Language Processing
|
15
|
+
# with Python"; 2009, O’Reilly Media Inc., ISBN 978-0596516499
|
16
|
+
# It defines the syntax of a sentence in a mini English-like language
|
17
|
+
# with a very simplified syntax and vocabulary
|
18
|
+
engine.build_grammar do
|
19
|
+
# Next 2 lines we define the terminal symbols
|
20
|
+
# (= word categories in the lexicon)
|
21
|
+
add_terminals('Noun', 'Proper-Noun', 'Verb')
|
10
22
|
add_terminals('Determiner', 'Preposition')
|
11
23
|
|
12
24
|
# Here we define the productions (= grammar rules)
|
@@ -17,10 +29,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
17
29
|
rule 'VP' => %w[Verb NP]
|
18
30
|
rule 'VP' => %w[Verb NP PP]
|
19
31
|
rule 'PP' => %w[Preposition NP]
|
20
|
-
end
|
21
|
-
|
22
|
-
# And now, let's build the grammar...
|
23
|
-
grammar = builder.grammar
|
32
|
+
end
|
24
33
|
|
25
34
|
########################################
|
26
35
|
# Step 2. Creating a lexicon
|
@@ -31,7 +40,7 @@ Lexicon = {
|
|
31
40
|
'dog' => 'Noun',
|
32
41
|
'cat' => 'Noun',
|
33
42
|
'telescope' => 'Noun',
|
34
|
-
'park' => 'Noun',
|
43
|
+
'park' => 'Noun',
|
35
44
|
'saw' => 'Verb',
|
36
45
|
'ate' => 'Verb',
|
37
46
|
'walked' => 'Verb',
|
@@ -49,44 +58,35 @@ Lexicon = {
|
|
49
58
|
}.freeze
|
50
59
|
|
51
60
|
########################################
|
52
|
-
# Step 3.
|
53
|
-
# A tokenizer reads the input string and converts it into a sequence of tokens
|
54
|
-
#
|
55
|
-
|
61
|
+
# Step 3. Create a tokenizer
|
62
|
+
# A tokenizer reads the input string and converts it into a sequence of tokens.
|
63
|
+
# Rley doesn't provide tokenizer functionality.
|
64
|
+
# (Highly simplified tokenizer implementation).
|
65
|
+
def tokenizer(aTextToParse)
|
56
66
|
tokens = aTextToParse.scan(/\S+/).map do |word|
|
57
67
|
term_name = Lexicon[word]
|
58
68
|
raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
|
59
|
-
|
60
|
-
Rley::Lexical::Token.new(word, terminal)
|
69
|
+
Rley::Lexical::Token.new(word, term_name)
|
61
70
|
end
|
62
|
-
|
71
|
+
|
63
72
|
return tokens
|
64
73
|
end
|
65
74
|
|
66
|
-
########################################
|
67
|
-
# Step 4. Create a parser for that grammar
|
68
|
-
# Easy with Rley...
|
69
|
-
parser = Rley::Parser::GFGEarleyParser.new(grammar)
|
70
75
|
|
71
76
|
########################################
|
72
|
-
# Step
|
77
|
+
# Step 4. Parse the input
|
73
78
|
input_to_parse = 'John saw Mary with a telescope'
|
74
|
-
|
79
|
+
# input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
|
75
80
|
# Convert input text into a sequence of token objects...
|
76
|
-
tokens = tokenizer(input_to_parse
|
81
|
+
tokens = tokenizer(input_to_parse)
|
82
|
+
result = engine.parse(tokens)
|
77
83
|
|
78
84
|
# Use Benchmark mix-in
|
79
85
|
include Benchmark
|
80
86
|
|
81
87
|
bm(6) do |meter|
|
82
|
-
meter.report("Parse 100 times") { 100.times {
|
83
|
-
meter.report("Parse 1000 times") { 1000.times {
|
84
|
-
meter.report("Parse 10000 times") { 10000.times {
|
85
|
-
meter.report("Parse 1000000 times") { 100000.times {
|
86
|
-
end
|
87
|
-
|
88
|
-
# puts "Parsing successful? #{result.success?}"
|
89
|
-
# unless result.success?
|
90
|
-
# puts result.failure_reason.message
|
91
|
-
# exit(1)
|
92
|
-
# end
|
88
|
+
meter.report("Parse 100 times") { 100.times { engine.parse(tokens) } }
|
89
|
+
meter.report("Parse 1000 times") { 1000.times { engine.parse(tokens) } }
|
90
|
+
meter.report("Parse 10000 times") { 10000.times { engine.parse(tokens) } }
|
91
|
+
meter.report("Parse 1000000 times") { 100000.times { engine.parse(tokens) } }
|
92
|
+
end
|
data/examples/NLP/engtagger.rb
CHANGED
@@ -153,7 +153,7 @@ def tokenizer(lexicon, grammar, tokens)
|
|
153
153
|
lexicon.each_with_index do |word, i|
|
154
154
|
term_name = tokens[i].last
|
155
155
|
terminal = grammar.name2symbol[term_name]
|
156
|
-
rley_tokens << Rley::
|
156
|
+
rley_tokens << Rley::Lexical::Token.new(word, terminal)
|
157
157
|
end
|
158
158
|
return rley_tokens
|
159
159
|
end
|
@@ -1,19 +1,22 @@
|
|
1
1
|
require 'rley' # Load Rley library
|
2
2
|
|
3
|
+
########################################
|
4
|
+
# Step 0. Instantiate facade object of Rley library.
|
5
|
+
# It provides a unified, higher-level interface
|
6
|
+
engine = Rley::Engine.new
|
7
|
+
|
3
8
|
########################################
|
4
9
|
# Step 1. Define a grammar for a nano English-like language
|
5
10
|
# based on example from Jurafski & Martin book (chapter 8 of the book).
|
6
11
|
# Bird, Steven, Edward Loper and Ewan Klein: "Speech and Language Processing";
|
7
12
|
# 2009, Pearson Education, Inc., ISBN 978-0135041963
|
8
|
-
# It defines the syntax of a sentence in a mini English-like language
|
13
|
+
# It defines the syntax of a sentence in a mini English-like language
|
9
14
|
# with a very simplified syntax and vocabulary
|
10
|
-
|
11
|
-
#
|
12
|
-
builder = Rley::Syntax::GrammarBuilder.new do
|
13
|
-
# Next 2 lines we define the terminal symbols
|
15
|
+
engine.build_grammar do
|
16
|
+
# Next 2 lines we define the terminal symbols
|
14
17
|
# (= word categories in the lexicon)
|
15
|
-
add_terminals('Noun', 'Proper-Noun', 'Pronoun', 'Verb')
|
16
|
-
add_terminals('Aux', '
|
18
|
+
add_terminals('Noun', 'Proper-Noun', 'Pronoun', 'Verb')
|
19
|
+
add_terminals('Aux', 'Determiner', 'Preposition')
|
17
20
|
|
18
21
|
# Here we define the productions (= grammar rules)
|
19
22
|
rule 'Start' => 'S'
|
@@ -22,7 +25,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
22
25
|
rule 'S' => 'VP'
|
23
26
|
rule 'NP' => 'Pronoun'
|
24
27
|
rule 'NP' => 'Proper-Noun'
|
25
|
-
rule 'NP' => %w[
|
28
|
+
rule 'NP' => %w[Determiner Nominal]
|
26
29
|
rule 'Nominal' => %[Noun]
|
27
30
|
rule 'Nominal' => %[Nominal Noun]
|
28
31
|
rule 'VP' => 'Verb'
|
@@ -31,10 +34,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
31
34
|
rule 'VP' => %w[Verb PP]
|
32
35
|
rule 'VP' => %w[VP PP]
|
33
36
|
rule 'PP' => %w[Preposition NP]
|
34
|
-
end
|
35
|
-
|
36
|
-
# And now, let's build the grammar...
|
37
|
-
grammar = builder.grammar
|
37
|
+
end
|
38
38
|
|
39
39
|
########################################
|
40
40
|
# Step 2. Creating a lexicon
|
@@ -45,7 +45,7 @@ Lexicon = {
|
|
45
45
|
'dog' => 'Noun',
|
46
46
|
'cat' => 'Noun',
|
47
47
|
'telescope' => 'Noun',
|
48
|
-
'park' => 'Noun',
|
48
|
+
'park' => 'Noun',
|
49
49
|
'saw' => 'Verb',
|
50
50
|
'ate' => 'Verb',
|
51
51
|
'walked' => 'Verb',
|
@@ -66,29 +66,24 @@ Lexicon = {
|
|
66
66
|
# Step 3. Creating a tokenizer
|
67
67
|
# A tokenizer reads the input string and converts it into a sequence of tokens
|
68
68
|
# Highly simplified tokenizer implementation.
|
69
|
-
def tokenizer(aTextToParse
|
69
|
+
def tokenizer(aTextToParse)
|
70
70
|
tokens = aTextToParse.scan(/\S+/).map do |word|
|
71
71
|
term_name = Lexicon[word]
|
72
72
|
raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
|
73
|
-
|
74
|
-
Rley::Lexical::Token.new(word, terminal)
|
73
|
+
Rley::Lexical::Token.new(word, term_name)
|
75
74
|
end
|
76
|
-
|
75
|
+
|
77
76
|
return tokens
|
78
77
|
end
|
79
78
|
|
80
|
-
########################################
|
81
|
-
# Step 4. Create a parser for that grammar
|
82
|
-
# Easy with Rley...
|
83
|
-
parser = Rley::Parser::GFGEarleyParser.new(grammar)
|
84
|
-
|
85
79
|
########################################
|
86
80
|
# Step 5. Parsing the input
|
87
|
-
input_to_parse = 'John saw Mary
|
81
|
+
input_to_parse = 'John saw Mary'
|
82
|
+
# input_to_parse = 'John saw Mary with a telescope'
|
88
83
|
# input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
|
89
84
|
# Convert input text into a sequence of token objects...
|
90
|
-
tokens = tokenizer(input_to_parse
|
91
|
-
result =
|
85
|
+
tokens = tokenizer(input_to_parse)
|
86
|
+
result = engine.parse(tokens)
|
92
87
|
|
93
88
|
puts "Parsing successful? #{result.success?}"
|
94
89
|
unless result.success?
|
@@ -98,10 +93,10 @@ end
|
|
98
93
|
|
99
94
|
########################################
|
100
95
|
# Step 6. Generating a parse tree from parse result
|
101
|
-
ptree = result
|
96
|
+
ptree = engine.convert(result)
|
102
97
|
|
103
98
|
# Let's create a parse tree visitor
|
104
|
-
visitor =
|
99
|
+
visitor = engine.ptree_visitor(ptree)
|
105
100
|
|
106
101
|
# Let's create a formatter (i.e. visit event listener)
|
107
102
|
# renderer = Rley::Formatter::Debug.new($stdout)
|
@@ -109,7 +104,7 @@ visitor = Rley::ParseTreeVisitor.new(ptree)
|
|
109
104
|
# Let's create a formatter that will render the parse tree with characters
|
110
105
|
renderer = Rley::Formatter::Asciitree.new($stdout)
|
111
106
|
|
112
|
-
# Let's create a formatter that will render the parse tree in labelled
|
107
|
+
# Let's create a formatter that will render the parse tree in labelled
|
113
108
|
# bracket notation
|
114
109
|
# renderer = Rley::Formatter::BracketNotation.new($stdout)
|
115
110
|
|
@@ -28,7 +28,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
28
28
|
rule 'yes_no_question' => %w[Aux NP VP]
|
29
29
|
rule 'wh_subject_question' => %w[Wh_NP NP VP]
|
30
30
|
rule 'wh_non_subject_question' => %w[Wh_NP Aux NP VP]
|
31
|
-
rule 'NP' => %[Predeterminer NP]
|
31
|
+
rule 'NP' => %w[Predeterminer NP]
|
32
32
|
rule 'NP' => 'Pronoun'
|
33
33
|
rule 'NP' => 'Proper-Noun'
|
34
34
|
rule 'NP' => %w[Det Card Ord Quant Nominal]
|
@@ -1,18 +1,22 @@
|
|
1
1
|
require 'rley' # Load Rley library
|
2
2
|
|
3
|
+
|
4
|
+
########################################
|
5
|
+
# Step 1. Creating facade object of Rley library
|
6
|
+
# It provides a unified, higher-level interface
|
7
|
+
engine = Rley::Engine.new
|
8
|
+
|
3
9
|
########################################
|
4
|
-
# Step
|
10
|
+
# Step 2. Define a grammar for a pico English-like language
|
5
11
|
# based on example from NLTK book (chapter 8 of the book).
|
6
|
-
# Bird, Steven, Edward Loper and Ewan Klein: "Natural Language Processing
|
12
|
+
# Bird, Steven, Edward Loper and Ewan Klein: "Natural Language Processing
|
7
13
|
# with Python"; 2009, O’Reilly Media Inc., ISBN 978-0596516499
|
8
|
-
# It defines the syntax of a sentence in a mini English-like language
|
14
|
+
# It defines the syntax of a sentence in a mini English-like language
|
9
15
|
# with a very simplified syntax and vocabulary
|
10
|
-
|
11
|
-
#
|
12
|
-
builder = Rley::Syntax::GrammarBuilder.new do
|
13
|
-
# Next 2 lines we define the terminal symbols
|
16
|
+
engine.build_grammar do
|
17
|
+
# Next 2 lines we define the terminal symbols
|
14
18
|
# (= word categories in the lexicon)
|
15
|
-
add_terminals('Noun', 'Proper-Noun', 'Verb')
|
19
|
+
add_terminals('Noun', 'Proper-Noun', 'Verb')
|
16
20
|
add_terminals('Determiner', 'Preposition')
|
17
21
|
|
18
22
|
# Here we define the productions (= grammar rules)
|
@@ -23,13 +27,10 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
23
27
|
rule 'VP' => %w[Verb NP]
|
24
28
|
rule 'VP' => %w[Verb NP PP]
|
25
29
|
rule 'PP' => %w[Preposition NP]
|
26
|
-
end
|
27
|
-
|
28
|
-
# And now, let's build the grammar...
|
29
|
-
grammar = builder.grammar
|
30
|
+
end
|
30
31
|
|
31
32
|
########################################
|
32
|
-
# Step
|
33
|
+
# Step 3. Creating a lexicon
|
33
34
|
# To simplify things, lexicon is implemented as a Hash with pairs of the form:
|
34
35
|
# word => terminal symbol name
|
35
36
|
Lexicon = {
|
@@ -37,7 +38,7 @@ Lexicon = {
|
|
37
38
|
'dog' => 'Noun',
|
38
39
|
'cat' => 'Noun',
|
39
40
|
'telescope' => 'Noun',
|
40
|
-
'park' => 'Noun',
|
41
|
+
'park' => 'Noun',
|
41
42
|
'saw' => 'Verb',
|
42
43
|
'ate' => 'Verb',
|
43
44
|
'walked' => 'Verb',
|
@@ -55,32 +56,28 @@ Lexicon = {
|
|
55
56
|
}.freeze
|
56
57
|
|
57
58
|
########################################
|
58
|
-
# Step
|
59
|
-
# A tokenizer reads the input string and converts it into a sequence of tokens
|
60
|
-
#
|
61
|
-
|
59
|
+
# Step 4. Create a tokenizer
|
60
|
+
# A tokenizer reads the input string and converts it into a sequence of tokens.
|
61
|
+
# Remark: Rley doesn't provide tokenizer functionality.
|
62
|
+
# Highly simplified tokenizer implementation
|
63
|
+
def tokenizer(aTextToParse)
|
62
64
|
tokens = aTextToParse.scan(/\S+/).map do |word|
|
63
65
|
term_name = Lexicon[word]
|
64
66
|
raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
|
65
|
-
|
66
|
-
Rley::Lexical::Token.new(word, terminal)
|
67
|
+
Rley::Lexical::Token.new(word, term_name)
|
67
68
|
end
|
68
|
-
|
69
|
+
|
69
70
|
return tokens
|
70
71
|
end
|
71
72
|
|
72
|
-
########################################
|
73
|
-
# Step 4. Create a parser for that grammar
|
74
|
-
# Easy with Rley...
|
75
|
-
parser = Rley::Parser::GFGEarleyParser.new(grammar)
|
76
73
|
|
77
74
|
########################################
|
78
|
-
# Step 5.
|
75
|
+
# Step 5. Parse the input
|
79
76
|
input_to_parse = 'John saw Mary with a telescope'
|
80
77
|
# input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
|
81
78
|
# Convert input text into a sequence of token objects...
|
82
|
-
tokens = tokenizer(input_to_parse
|
83
|
-
result =
|
79
|
+
tokens = tokenizer(input_to_parse)
|
80
|
+
result = engine.parse(tokens)
|
84
81
|
|
85
82
|
puts "Parsing successful? #{result.success?}"
|
86
83
|
unless result.success?
|
@@ -90,10 +87,10 @@ end
|
|
90
87
|
|
91
88
|
########################################
|
92
89
|
# Step 6. Generating a parse tree from parse result
|
93
|
-
ptree = result
|
90
|
+
ptree = engine.to_ptree(result)
|
94
91
|
|
95
92
|
# Let's create a parse tree visitor
|
96
|
-
visitor =
|
93
|
+
visitor = engine.ptree_visitor(ptree)
|
97
94
|
|
98
95
|
# Let's create a formatter (i.e. visit event listener)
|
99
96
|
# renderer = Rley::Formatter::Debug.new($stdout)
|
@@ -101,7 +98,7 @@ visitor = Rley::ParseTreeVisitor.new(ptree)
|
|
101
98
|
# Let's create a formatter that will render the parse tree with characters
|
102
99
|
renderer = Rley::Formatter::Asciitree.new($stdout)
|
103
100
|
|
104
|
-
# Let's create a formatter that will render the parse tree in labelled
|
101
|
+
# Let's create a formatter that will render the parse tree in labelled
|
105
102
|
# bracket notation
|
106
103
|
# renderer = Rley::Formatter::BracketNotation.new($stdout)
|
107
104
|
|