rley 0.5.14 → 0.6.00

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -2
  3. data/README.md +29 -31
  4. data/examples/NLP/benchmark_pico_en.rb +34 -34
  5. data/examples/NLP/engtagger.rb +1 -1
  6. data/examples/NLP/nano_eng/nano_en_demo.rb +23 -28
  7. data/examples/NLP/nano_eng/nano_grammar.rb +1 -1
  8. data/examples/NLP/pico_en_demo.rb +28 -31
  9. data/examples/data_formats/JSON/json_ast_builder.rb +11 -70
  10. data/examples/data_formats/JSON/json_demo.rb +32 -14
  11. data/examples/data_formats/JSON/json_grammar.rb +1 -1
  12. data/examples/data_formats/JSON/json_lexer.rb +5 -11
  13. data/examples/general/SRL/lib/ast_builder.rb +5 -28
  14. data/examples/general/SRL/lib/tokenizer.rb +2 -5
  15. data/examples/general/SRL/spec/integration_spec.rb +12 -5
  16. data/examples/general/SRL/spec/tokenizer_spec.rb +13 -14
  17. data/examples/general/SRL/srl_demo.rb +16 -9
  18. data/examples/general/calc_iter1/calc_ast_builder.rb +29 -85
  19. data/examples/general/calc_iter1/calc_demo.rb +15 -6
  20. data/examples/general/calc_iter1/calc_lexer.rb +2 -5
  21. data/examples/general/calc_iter1/spec/calculator_spec.rb +18 -19
  22. data/examples/general/calc_iter2/calc_ast_builder.rb +9 -107
  23. data/examples/general/calc_iter2/calc_demo.rb +15 -8
  24. data/examples/general/calc_iter2/calc_lexer.rb +3 -5
  25. data/examples/general/calc_iter2/spec/calculator_spec.rb +18 -31
  26. data/lib/rley.rb +2 -1
  27. data/lib/rley/constants.rb +1 -1
  28. data/lib/rley/engine.rb +122 -0
  29. data/lib/rley/parse_rep/ast_base_builder.rb +128 -0
  30. data/lib/rley/{parser → parse_rep}/cst_builder.rb +1 -1
  31. data/lib/rley/{parser → parse_rep}/parse_forest_builder.rb +1 -1
  32. data/lib/rley/{parser → parse_rep}/parse_forest_factory.rb +2 -2
  33. data/lib/rley/{parser → parse_rep}/parse_rep_creator.rb +3 -3
  34. data/lib/rley/{parser → parse_rep}/parse_tree_builder.rb +4 -4
  35. data/lib/rley/{parser → parse_rep}/parse_tree_factory.rb +1 -1
  36. data/lib/rley/parser/gfg_parsing.rb +16 -4
  37. data/spec/rley/engine_spec.rb +127 -0
  38. data/spec/rley/formatter/asciitree_spec.rb +11 -13
  39. data/spec/rley/formatter/bracket_notation_spec.rb +11 -13
  40. data/spec/rley/formatter/debug_spec.rb +13 -15
  41. data/spec/rley/formatter/json_spec.rb +10 -14
  42. data/spec/rley/{parser → parse_rep}/ambiguous_parse_spec.rb +3 -3
  43. data/spec/rley/{parser → parse_rep}/ast_builder_spec.rb +34 -83
  44. data/spec/rley/{parser → parse_rep}/cst_builder_spec.rb +3 -3
  45. data/spec/rley/{parser → parse_rep}/groucho_spec.rb +3 -3
  46. data/spec/rley/{parser → parse_rep}/parse_forest_builder_spec.rb +4 -4
  47. data/spec/rley/{parser → parse_rep}/parse_forest_factory_spec.rb +2 -2
  48. data/spec/rley/{parser → parse_rep}/parse_tree_factory_spec.rb +2 -2
  49. data/spec/rley/parse_tree_visitor_spec.rb +12 -15
  50. data/spec/rley/support/ast_builder.rb +403 -0
  51. data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
  52. metadata +27 -28
  53. data/examples/data_formats/JSON/json_parser.rb +0 -46
  54. data/examples/general/SRL/lib/ast_building.rb +0 -20
  55. data/examples/general/SRL/lib/parser.rb +0 -26
  56. data/examples/general/calc_iter1/calc_parser.rb +0 -24
  57. data/examples/general/calc_iter2/ast_building.rb +0 -20
  58. data/examples/general/calc_iter2/calc_parser.rb +0 -24
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 331a505963faf201ef03664616805ce831a591e0
4
- data.tar.gz: 1e80b03ca806612f72fa97db13dd49b1cc55e218
3
+ metadata.gz: a7fa26ec5c47c28dd6d0cc68f32e8f023f92ce75
4
+ data.tar.gz: 73717a9a1c2daa9886952c502020b2fcf5e4bb33
5
5
  SHA512:
6
- metadata.gz: e070d60d75abd4beace97c65d938a7bc4e81e1c9c4bc0c4d3f0b1bba4acd13662e3196f83113bfd5def31992d2df85367dc6a3fb6ab367480e6e1d51d9d6d98d
7
- data.tar.gz: 3816e3675513be5608d731fba3b37e9c08b41ced276403182790dcb8d38235cd88c7908b8a3540bf2c5afc87fec84e3340a1b61f9caf55fa095e9b4f6092774d
6
+ metadata.gz: 055963ec1637d43b33703e87b454fd9a1590a3993609f2df89f0d3ca94a1b8eba7b39cf102d318e0c1da6fc3e29c51a84bd3929f3000b7c800ee4585260d9b78
7
+ data.tar.gz: 2b0bc71c159ecac03789644497af1ce789b7975efe2fb6fecc399e41fb346aec26ad67a9d1015ebed1d43a9e9fdd09321d7dfef5f86afa3ce5402b4f04b8f062
@@ -1,4 +1,14 @@
1
- ### 0.5.14 / 2018-XX-XX
1
+ ### 0.6.00 / 2018-02-25
2
+ Version bump. Highlights: new programming interface through facade object, improved AST generation.
3
+ * [NEW] Class `Rley::Engine`: Implementation of Facade design pattern to reach more convenient interface.
4
+ * [NEW] Class `Rley::ParseRep::ASTBaseBuilder` Abstract class that simplifies the creation of custom AST (Abstract Syntax Tree)
5
+ * [NEW] Module `Rley::ParseRep` hosts the classes for building parse representations (parse trees and forests)
6
+ * [CHANGE] File `README.md` updated to reflect the new facade interface.
7
+ * [CHANGE] Almost all the examples have been updated to use the `Rley::Engine` facade and the new AST building.
8
+ * [CHANGE] All the specs have been updated to use the `Rley::Engine` facade and the new AST building.
9
+ * [DEPRECATED] Method `Rley::Parser::GFGParsing#parse_tree`.
10
+
11
+ ### 0.5.14 / 2018-02-10
2
12
  * [NEW] Files spec/rley/sppf/token_node_spec.rb` Added RSpec file for testing `SPPF::TokenNode` class.
3
13
  * [CHANGE] Files `lib/rley/sppf` Minor update in the YARD documentation of the SPPF node classes.
4
14
  * [FIX] Method `Parser::CSTRawNode#initialize`. Yard warning because of duplicate parameter names in documentation.
@@ -9,7 +19,7 @@
9
19
  * [CHANGE] File `examples/general/SRL/ast_builder.rb Code refactoring to take profit of rule naming.
10
20
 
11
21
  ### 0.5.12 / 2018-02-03
12
- * [CHANGE] Simple Regex Language is fully supported!...
22
+ * [CHANGE] Simple Regex Language is almost fully supported!...
13
23
  * [CHANGE] File `examples/general/SRL/grammar.rb added missing rule productions for Simple Regex Language.
14
24
  * [CHANGE] File `examples/general/SRL/ast_builder.rb Added transformation rules missing regular expressions features.
15
25
  * [CHANGE] File `examples/general/SRL/spac/integration_spec.rb Added tests for covering SRL.
data/README.md CHANGED
@@ -74,10 +74,10 @@ Installing the latest stable version is simple:
74
74
  The purpose of this section is show how to create a parser for a minimalistic
75
75
  English language subset.
76
76
  The tour is organized as follows:
77
- 1. [Defining the language grammar](#defining-the-language-grammar)
78
- 2. [Creating a lexicon](#creating-a-lexicon)
79
- 3. [Creating a tokenizer](#creating-a-tokenizer)
80
- 4. [Building the parser](#building-the-parser)
77
+ 1. [Creating facade object of Rley library](#creating-facade-object-of-rley-library)
78
+ 2. [Defining the language grammar](#defining-the-language-grammar)
79
+ 3. [Creating a lexicon](#creating-a-lexicon)
80
+ 4. [Creating a tokenizer](#creating-a-tokenizer)
81
81
  5. [Parsing some input](#parsing-some-input)
82
82
  6. [Generating the parse tree](#generating-the-parse-tree)
83
83
 
@@ -85,14 +85,22 @@ The complete source code of the example used in this tour can be found in the
85
85
  [examples](https://github.com/famished-tiger/Rley/tree/master/examples/NLP/mini_en_demo.rb)
86
86
  directory
87
87
 
88
- ### Defining the language grammar
89
- The subset of English grammar is based on an example from the NLTK book.
90
88
 
89
+ ### Creating facade object of Rley library
91
90
  ```ruby
92
91
  require 'rley' # Load Rley library
93
92
 
94
- # Instantiate a builder object that will build the grammar for us
95
- builder = Rley::Syntax::GrammarBuilder.new do
93
+ # Let's create a facade object called 'engine'
94
+ # It provides a unified, higher-level interface
95
+ engine = Rley.Engine.new
96
+ ```
97
+
98
+
99
+ ### Defining the language grammar
100
+ The subset of English grammar is based on an example from the NLTK book.
101
+
102
+ ```ruby
103
+ engine.build_grammar do
96
104
  # Terminal symbols (= word categories in lexicon)
97
105
  add_terminals('Noun', 'Proper-Noun', 'Verb')
98
106
  add_terminals('Determiner', 'Preposition')
@@ -106,8 +114,6 @@ The subset of English grammar is based on an example from the NLTK book.
106
114
  rule 'VP' => %w[Verb NP PP]
107
115
  rule 'PP' => %w[Preposition NP]
108
116
  end
109
- # And now, let's build the grammar...
110
- grammar = builder.grammar
111
117
  ```
112
118
 
113
119
  ### Creating a lexicon
@@ -141,14 +147,14 @@ The subset of English grammar is based on an example from the NLTK book.
141
147
 
142
148
  ### Creating a tokenizer
143
149
  ```ruby
144
- # A tokenizer reads the input string and converts it into a sequence of tokens
145
- # Highly simplified tokenizer implementation.
146
- def tokenizer(aTextToParse, aGrammar)
150
+ # A tokenizer reads the input string and converts it into a sequence of tokens.
151
+ # Remark: Rley doesn't provide tokenizer functionality.
152
+ # Highly simplified tokenizer implementation
153
+ def tokenizer(aTextToParse)
147
154
  tokens = aTextToParse.scan(/\S+/).map do |word|
148
155
  term_name = Lexicon[word]
149
156
  raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
150
- terminal = aGrammar.name2symbol[term_name]
151
- Rley::Lexical::Token.new(word, terminal)
157
+ Rley::Lexical::Token.new(word, term_name)
152
158
  end
153
159
 
154
160
  return tokens
@@ -161,20 +167,12 @@ creating a lexicon and tokenizer from scratch. Here are a few Ruby Part-of-Speec
161
167
  * [rbtagger](https://rubygems.org/gems/rbtagger)
162
168
 
163
169
 
164
-
165
- ### Building the parser
166
- ```ruby
167
- # Easy with Rley...
168
- parser = Rley::Parser::GFGEarleyParser.new(grammar)
169
- ```
170
-
171
-
172
170
  ### Parsing some input
173
171
  ```ruby
174
172
  input_to_parse = 'John saw Mary with a telescope'
175
173
  # Convert input text into a sequence of token objects...
176
- tokens = tokenizer(input_to_parse, grammar)
177
- result = parser.parse(tokens)
174
+ tokens = tokenizer(input_to_parse)
175
+ result = engine.parse(tokens)
178
176
 
179
177
  puts "Parsing successful? #{result.success?}" # => Parsing successful? true
180
178
  ```
@@ -194,7 +192,7 @@ For our whirlwind tour, we will opt for parse trees.
194
192
  ### Generating the parse tree
195
193
 
196
194
  ```ruby
197
- ptree = result.parse_tree
195
+ ptree = engine.convert(result)
198
196
  ```
199
197
  OK. Now that we have the parse tree, what we can do with it?
200
198
  One option is to manipulate the parse tree and its node directly. For instance,
@@ -216,7 +214,7 @@ an one-liner:
216
214
 
217
215
  ```ruby
218
216
  # Let's create a parse tree visitor
219
- visitor = Rley::ParseTreeVisitor.new(ptree)
217
+ visitor = engine.ptree_visitor(ptree)
220
218
  ```
221
219
 
222
220
  #### Visiting the parse tree
@@ -359,8 +357,8 @@ above and, as an error, we delete the verb `saw` in the sentence to parse.
359
357
  # Verb has been removed from the sentence on next line
360
358
  input_to_parse = 'John Mary with a telescope'
361
359
  # Convert input text into a sequence of token objects...
362
- tokens = tokenizer(input_to_parse, grammar)
363
- result = parser.parse(tokens)
360
+ tokens = tokenizer(input_to_parse)
361
+ result = engine.parse(tokens)
364
362
 
365
363
  puts "Parsing successful? #{result.success?}" # => Parsing successful? false
366
364
  exit(1)
@@ -390,8 +388,8 @@ Let's experiment again with the original sentence but without the word
390
388
  # Last word has been removed from the sentence on next line
391
389
  input_to_parse = 'John saw Mary with a '
392
390
  # Convert input text into a sequence of token objects...
393
- tokens = tokenizer(input_to_parse, grammar)
394
- result = parser.parse(tokens)
391
+ tokens = tokenizer(input_to_parse)
392
+ result = engine.parse(tokens)
395
393
 
396
394
  puts "Parsing successful? #{result.success?}" # => Parsing successful? false
397
395
  unless result.success?
@@ -1,12 +1,24 @@
1
- # File: benchmark_mini_en.rb
1
+ # File: benchmark_pico_en.rb
2
2
  # Purpose: benchmark the parse speed
3
3
  require 'benchmark'
4
4
  require 'rley' # Load Rley library
5
5
 
6
- # Instantiate a builder object that will build the grammar for us
7
- builder = Rley::Syntax::GrammarBuilder.new do
6
+ ########################################
7
+ # Step 0. Instantiate facade object of Rley library.
8
+ # It provides a unified, higher-level interface
9
+ engine = Rley::Engine.new
8
10
 
9
- add_terminals('Noun', 'Proper-Noun', 'Verb')
11
+ ########################################
12
+ # Step 1. Define a grammar for a pico English-like language
13
+ # based on example from NLTK book (chapter 8 of the book).
14
+ # Bird, Steven, Edward Loper and Ewan Klein: "Natural Language Processing
15
+ # with Python"; 2009, O’Reilly Media Inc., ISBN 978-0596516499
16
+ # It defines the syntax of a sentence in a mini English-like language
17
+ # with a very simplified syntax and vocabulary
18
+ engine.build_grammar do
19
+ # Next 2 lines we define the terminal symbols
20
+ # (= word categories in the lexicon)
21
+ add_terminals('Noun', 'Proper-Noun', 'Verb')
10
22
  add_terminals('Determiner', 'Preposition')
11
23
 
12
24
  # Here we define the productions (= grammar rules)
@@ -17,10 +29,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
17
29
  rule 'VP' => %w[Verb NP]
18
30
  rule 'VP' => %w[Verb NP PP]
19
31
  rule 'PP' => %w[Preposition NP]
20
- end
21
-
22
- # And now, let's build the grammar...
23
- grammar = builder.grammar
32
+ end
24
33
 
25
34
  ########################################
26
35
  # Step 2. Creating a lexicon
@@ -31,7 +40,7 @@ Lexicon = {
31
40
  'dog' => 'Noun',
32
41
  'cat' => 'Noun',
33
42
  'telescope' => 'Noun',
34
- 'park' => 'Noun',
43
+ 'park' => 'Noun',
35
44
  'saw' => 'Verb',
36
45
  'ate' => 'Verb',
37
46
  'walked' => 'Verb',
@@ -49,44 +58,35 @@ Lexicon = {
49
58
  }.freeze
50
59
 
51
60
  ########################################
52
- # Step 3. Creating a tokenizer
53
- # A tokenizer reads the input string and converts it into a sequence of tokens
54
- # Highly simplified tokenizer implementation.
55
- def tokenizer(aTextToParse, aGrammar)
61
+ # Step 3. Create a tokenizer
62
+ # A tokenizer reads the input string and converts it into a sequence of tokens.
63
+ # Rley doesn't provide tokenizer functionality.
64
+ # (Highly simplified tokenizer implementation).
65
+ def tokenizer(aTextToParse)
56
66
  tokens = aTextToParse.scan(/\S+/).map do |word|
57
67
  term_name = Lexicon[word]
58
68
  raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
59
- terminal = aGrammar.name2symbol[term_name]
60
- Rley::Lexical::Token.new(word, terminal)
69
+ Rley::Lexical::Token.new(word, term_name)
61
70
  end
62
-
71
+
63
72
  return tokens
64
73
  end
65
74
 
66
- ########################################
67
- # Step 4. Create a parser for that grammar
68
- # Easy with Rley...
69
- parser = Rley::Parser::GFGEarleyParser.new(grammar)
70
75
 
71
76
  ########################################
72
- # Step 5. Parsing the input
77
+ # Step 4. Parse the input
73
78
  input_to_parse = 'John saw Mary with a telescope'
74
-
79
+ # input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
75
80
  # Convert input text into a sequence of token objects...
76
- tokens = tokenizer(input_to_parse, grammar)
81
+ tokens = tokenizer(input_to_parse)
82
+ result = engine.parse(tokens)
77
83
 
78
84
  # Use Benchmark mix-in
79
85
  include Benchmark
80
86
 
81
87
  bm(6) do |meter|
82
- meter.report("Parse 100 times") { 100.times { parser.parse(tokens) } }
83
- meter.report("Parse 1000 times") { 1000.times { parser.parse(tokens) } }
84
- meter.report("Parse 10000 times") { 10000.times { parser.parse(tokens) } }
85
- meter.report("Parse 1000000 times") { 100000.times { parser.parse(tokens) } }
86
- end
87
-
88
- # puts "Parsing successful? #{result.success?}"
89
- # unless result.success?
90
- # puts result.failure_reason.message
91
- # exit(1)
92
- # end
88
+ meter.report("Parse 100 times") { 100.times { engine.parse(tokens) } }
89
+ meter.report("Parse 1000 times") { 1000.times { engine.parse(tokens) } }
90
+ meter.report("Parse 10000 times") { 10000.times { engine.parse(tokens) } }
91
+ meter.report("Parse 1000000 times") { 100000.times { engine.parse(tokens) } }
92
+ end
@@ -153,7 +153,7 @@ def tokenizer(lexicon, grammar, tokens)
153
153
  lexicon.each_with_index do |word, i|
154
154
  term_name = tokens[i].last
155
155
  terminal = grammar.name2symbol[term_name]
156
- rley_tokens << Rley::Tokens::Token.new(word, terminal)
156
+ rley_tokens << Rley::Lexical::Token.new(word, terminal)
157
157
  end
158
158
  return rley_tokens
159
159
  end
@@ -1,19 +1,22 @@
1
1
  require 'rley' # Load Rley library
2
2
 
3
+ ########################################
4
+ # Step 0. Instantiate facade object of Rley library.
5
+ # It provides a unified, higher-level interface
6
+ engine = Rley::Engine.new
7
+
3
8
  ########################################
4
9
  # Step 1. Define a grammar for a nano English-like language
5
10
  # based on example from Jurafski & Martin book (chapter 8 of the book).
6
11
  # Bird, Steven, Edward Loper and Ewan Klein: "Speech and Language Processing";
7
12
  # 2009, Pearson Education, Inc., ISBN 978-0135041963
8
- # It defines the syntax of a sentence in a mini English-like language
13
+ # It defines the syntax of a sentence in a mini English-like language
9
14
  # with a very simplified syntax and vocabulary
10
-
11
- # Instantiate a builder object that will build the grammar for us
12
- builder = Rley::Syntax::GrammarBuilder.new do
13
- # Next 2 lines we define the terminal symbols
15
+ engine.build_grammar do
16
+ # Next 2 lines we define the terminal symbols
14
17
  # (= word categories in the lexicon)
15
- add_terminals('Noun', 'Proper-Noun', 'Pronoun', 'Verb')
16
- add_terminals('Aux', 'Det', 'Preposition')
18
+ add_terminals('Noun', 'Proper-Noun', 'Pronoun', 'Verb')
19
+ add_terminals('Aux', 'Determiner', 'Preposition')
17
20
 
18
21
  # Here we define the productions (= grammar rules)
19
22
  rule 'Start' => 'S'
@@ -22,7 +25,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
22
25
  rule 'S' => 'VP'
23
26
  rule 'NP' => 'Pronoun'
24
27
  rule 'NP' => 'Proper-Noun'
25
- rule 'NP' => %w[Det Nominal]
28
+ rule 'NP' => %w[Determiner Nominal]
26
29
  rule 'Nominal' => %[Noun]
27
30
  rule 'Nominal' => %[Nominal Noun]
28
31
  rule 'VP' => 'Verb'
@@ -31,10 +34,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
31
34
  rule 'VP' => %w[Verb PP]
32
35
  rule 'VP' => %w[VP PP]
33
36
  rule 'PP' => %w[Preposition NP]
34
- end
35
-
36
- # And now, let's build the grammar...
37
- grammar = builder.grammar
37
+ end
38
38
 
39
39
  ########################################
40
40
  # Step 2. Creating a lexicon
@@ -45,7 +45,7 @@ Lexicon = {
45
45
  'dog' => 'Noun',
46
46
  'cat' => 'Noun',
47
47
  'telescope' => 'Noun',
48
- 'park' => 'Noun',
48
+ 'park' => 'Noun',
49
49
  'saw' => 'Verb',
50
50
  'ate' => 'Verb',
51
51
  'walked' => 'Verb',
@@ -66,29 +66,24 @@ Lexicon = {
66
66
  # Step 3. Creating a tokenizer
67
67
  # A tokenizer reads the input string and converts it into a sequence of tokens
68
68
  # Highly simplified tokenizer implementation.
69
- def tokenizer(aTextToParse, aGrammar)
69
+ def tokenizer(aTextToParse)
70
70
  tokens = aTextToParse.scan(/\S+/).map do |word|
71
71
  term_name = Lexicon[word]
72
72
  raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
73
- terminal = aGrammar.name2symbol[term_name]
74
- Rley::Lexical::Token.new(word, terminal)
73
+ Rley::Lexical::Token.new(word, term_name)
75
74
  end
76
-
75
+
77
76
  return tokens
78
77
  end
79
78
 
80
- ########################################
81
- # Step 4. Create a parser for that grammar
82
- # Easy with Rley...
83
- parser = Rley::Parser::GFGEarleyParser.new(grammar)
84
-
85
79
  ########################################
86
80
  # Step 5. Parsing the input
87
- input_to_parse = 'John saw Mary with a telescope'
81
+ input_to_parse = 'John saw Mary'
82
+ # input_to_parse = 'John saw Mary with a telescope'
88
83
  # input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
89
84
  # Convert input text into a sequence of token objects...
90
- tokens = tokenizer(input_to_parse, grammar)
91
- result = parser.parse(tokens)
85
+ tokens = tokenizer(input_to_parse)
86
+ result = engine.parse(tokens)
92
87
 
93
88
  puts "Parsing successful? #{result.success?}"
94
89
  unless result.success?
@@ -98,10 +93,10 @@ end
98
93
 
99
94
  ########################################
100
95
  # Step 6. Generating a parse tree from parse result
101
- ptree = result.parse_tree
96
+ ptree = engine.convert(result)
102
97
 
103
98
  # Let's create a parse tree visitor
104
- visitor = Rley::ParseTreeVisitor.new(ptree)
99
+ visitor = engine.ptree_visitor(ptree)
105
100
 
106
101
  # Let's create a formatter (i.e. visit event listener)
107
102
  # renderer = Rley::Formatter::Debug.new($stdout)
@@ -109,7 +104,7 @@ visitor = Rley::ParseTreeVisitor.new(ptree)
109
104
  # Let's create a formatter that will render the parse tree with characters
110
105
  renderer = Rley::Formatter::Asciitree.new($stdout)
111
106
 
112
- # Let's create a formatter that will render the parse tree in labelled
107
+ # Let's create a formatter that will render the parse tree in labelled
113
108
  # bracket notation
114
109
  # renderer = Rley::Formatter::BracketNotation.new($stdout)
115
110
 
@@ -28,7 +28,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
28
28
  rule 'yes_no_question' => %w[Aux NP VP]
29
29
  rule 'wh_subject_question' => %w[Wh_NP NP VP]
30
30
  rule 'wh_non_subject_question' => %w[Wh_NP Aux NP VP]
31
- rule 'NP' => %[Predeterminer NP]
31
+ rule 'NP' => %w[Predeterminer NP]
32
32
  rule 'NP' => 'Pronoun'
33
33
  rule 'NP' => 'Proper-Noun'
34
34
  rule 'NP' => %w[Det Card Ord Quant Nominal]
@@ -1,18 +1,22 @@
1
1
  require 'rley' # Load Rley library
2
2
 
3
+
4
+ ########################################
5
+ # Step 1. Creating facade object of Rley library
6
+ # It provides a unified, higher-level interface
7
+ engine = Rley::Engine.new
8
+
3
9
  ########################################
4
- # Step 1. Define a grammar for a pico English-like language
10
+ # Step 2. Define a grammar for a pico English-like language
5
11
  # based on example from NLTK book (chapter 8 of the book).
6
- # Bird, Steven, Edward Loper and Ewan Klein: "Natural Language Processing
12
+ # Bird, Steven, Edward Loper and Ewan Klein: "Natural Language Processing
7
13
  # with Python"; 2009, O’Reilly Media Inc., ISBN 978-0596516499
8
- # It defines the syntax of a sentence in a mini English-like language
14
+ # It defines the syntax of a sentence in a mini English-like language
9
15
  # with a very simplified syntax and vocabulary
10
-
11
- # Instantiate a builder object that will build the grammar for us
12
- builder = Rley::Syntax::GrammarBuilder.new do
13
- # Next 2 lines we define the terminal symbols
16
+ engine.build_grammar do
17
+ # Next 2 lines we define the terminal symbols
14
18
  # (= word categories in the lexicon)
15
- add_terminals('Noun', 'Proper-Noun', 'Verb')
19
+ add_terminals('Noun', 'Proper-Noun', 'Verb')
16
20
  add_terminals('Determiner', 'Preposition')
17
21
 
18
22
  # Here we define the productions (= grammar rules)
@@ -23,13 +27,10 @@ builder = Rley::Syntax::GrammarBuilder.new do
23
27
  rule 'VP' => %w[Verb NP]
24
28
  rule 'VP' => %w[Verb NP PP]
25
29
  rule 'PP' => %w[Preposition NP]
26
- end
27
-
28
- # And now, let's build the grammar...
29
- grammar = builder.grammar
30
+ end
30
31
 
31
32
  ########################################
32
- # Step 2. Creating a lexicon
33
+ # Step 3. Creating a lexicon
33
34
  # To simplify things, lexicon is implemented as a Hash with pairs of the form:
34
35
  # word => terminal symbol name
35
36
  Lexicon = {
@@ -37,7 +38,7 @@ Lexicon = {
37
38
  'dog' => 'Noun',
38
39
  'cat' => 'Noun',
39
40
  'telescope' => 'Noun',
40
- 'park' => 'Noun',
41
+ 'park' => 'Noun',
41
42
  'saw' => 'Verb',
42
43
  'ate' => 'Verb',
43
44
  'walked' => 'Verb',
@@ -55,32 +56,28 @@ Lexicon = {
55
56
  }.freeze
56
57
 
57
58
  ########################################
58
- # Step 3. Creating a tokenizer
59
- # A tokenizer reads the input string and converts it into a sequence of tokens
60
- # Highly simplified tokenizer implementation.
61
- def tokenizer(aTextToParse, aGrammar)
59
+ # Step 4. Create a tokenizer
60
+ # A tokenizer reads the input string and converts it into a sequence of tokens.
61
+ # Remark: Rley doesn't provide tokenizer functionality.
62
+ # Highly simplified tokenizer implementation
63
+ def tokenizer(aTextToParse)
62
64
  tokens = aTextToParse.scan(/\S+/).map do |word|
63
65
  term_name = Lexicon[word]
64
66
  raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
65
- terminal = aGrammar.name2symbol[term_name]
66
- Rley::Lexical::Token.new(word, terminal)
67
+ Rley::Lexical::Token.new(word, term_name)
67
68
  end
68
-
69
+
69
70
  return tokens
70
71
  end
71
72
 
72
- ########################################
73
- # Step 4. Create a parser for that grammar
74
- # Easy with Rley...
75
- parser = Rley::Parser::GFGEarleyParser.new(grammar)
76
73
 
77
74
  ########################################
78
- # Step 5. Parsing the input
75
+ # Step 5. Parse the input
79
76
  input_to_parse = 'John saw Mary with a telescope'
80
77
  # input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
81
78
  # Convert input text into a sequence of token objects...
82
- tokens = tokenizer(input_to_parse, grammar)
83
- result = parser.parse(tokens)
79
+ tokens = tokenizer(input_to_parse)
80
+ result = engine.parse(tokens)
84
81
 
85
82
  puts "Parsing successful? #{result.success?}"
86
83
  unless result.success?
@@ -90,10 +87,10 @@ end
90
87
 
91
88
  ########################################
92
89
  # Step 6. Generating a parse tree from parse result
93
- ptree = result.parse_tree
90
+ ptree = engine.to_ptree(result)
94
91
 
95
92
  # Let's create a parse tree visitor
96
- visitor = Rley::ParseTreeVisitor.new(ptree)
93
+ visitor = engine.ptree_visitor(ptree)
97
94
 
98
95
  # Let's create a formatter (i.e. visit event listener)
99
96
  # renderer = Rley::Formatter::Debug.new($stdout)
@@ -101,7 +98,7 @@ visitor = Rley::ParseTreeVisitor.new(ptree)
101
98
  # Let's create a formatter that will render the parse tree with characters
102
99
  renderer = Rley::Formatter::Asciitree.new($stdout)
103
100
 
104
- # Let's create a formatter that will render the parse tree in labelled
101
+ # Let's create a formatter that will render the parse tree in labelled
105
102
  # bracket notation
106
103
  # renderer = Rley::Formatter::BracketNotation.new($stdout)
107
104