rley 0.5.07 → 0.5.08

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/examples/NLP/{benchmark_mini_en.rb → benchmark_pico_en.rb} +0 -0
  4. data/examples/NLP/nano_eng/nano_en_demo.rb +118 -0
  5. data/examples/NLP/nano_eng/nano_grammar.rb +59 -0
  6. data/examples/NLP/{mini_en_demo.rb → pico_en_demo.rb} +2 -2
  7. data/examples/general/SRL/lib/ast_builder.rb +176 -0
  8. data/examples/general/SRL/lib/ast_building.rb +20 -0
  9. data/examples/general/SRL/lib/grammar.rb +32 -0
  10. data/examples/general/SRL/lib/parser.rb +26 -0
  11. data/examples/general/SRL/lib/regex/multiplicity.rb +94 -0
  12. data/examples/general/SRL/lib/regex_repr.rb +1 -0
  13. data/examples/general/SRL/lib/srl_demo.rb +67 -0
  14. data/examples/general/SRL/lib/tokenizer.rb +101 -0
  15. data/examples/general/SRL/spec/integration_spec.rb +103 -0
  16. data/examples/general/SRL/spec/regex/multiplicity_spec.rb +83 -0
  17. data/examples/general/SRL/spec/spec_helper.rb +25 -0
  18. data/examples/general/SRL/spec/tokenizer_spec.rb +125 -0
  19. data/examples/general/SRL/srl_demo.rb +57 -0
  20. data/examples/general/calc_iter1/calc_demo.rb +1 -1
  21. data/examples/general/calc_iter2/ast_building.rb +20 -0
  22. data/examples/general/calc_iter2/calc_ast_builder.rb +3 -23
  23. data/examples/general/calc_iter2/calc_demo.rb +1 -1
  24. data/lib/rley/base/base_parser.rb +1 -1
  25. data/lib/rley/base/grm_items_builder.rb +1 -1
  26. data/lib/rley/constants.rb +1 -1
  27. data/lib/rley/gfg/non_terminal_vertex.rb +1 -1
  28. data/lib/rley/parser/gfg_chart.rb +8 -3
  29. data/lib/rley/parser/gfg_earley_parser.rb +5 -2
  30. data/lib/rley/parser/gfg_parsing.rb +5 -1
  31. data/lib/rley/parser/parse_tree_builder.rb +16 -5
  32. data/lib/rley/ptree/terminal_node.rb +3 -2
  33. data/spec/rley/parser/ast_builder_spec.rb +2 -2
  34. data/spec/rley/parser/cst_builder_spec.rb +2 -3
  35. metadata +20 -4
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4ce368c99ffa556898d9a89788786e47b5e8b115
4
- data.tar.gz: 97a691b869089989f601556ed13dd24d729b7ad0
3
+ metadata.gz: 3c616b691fb51ba2eb00a25fee75ff4a80093990
4
+ data.tar.gz: 1039cfe8f29c8d1ec7c88fa83c18f9173763b8f2
5
5
  SHA512:
6
- metadata.gz: b50595273bf5f75e25b6e609de197be31a4f8583f1c1f93ef8494a8e5367facdbafd13326bd2097c9da657096c2f15e9dc8d439d16212f4e578134ce538984b2
7
- data.tar.gz: 8be6b6bdef48de1cbd19b2785530a5bc17e43804a43ff01f287a89a129a83897f8ea05ecb951018c99f888bbfefbbc33e89ee9cfb0adb2e86eba2e3971703c1d
6
+ metadata.gz: df7412344421bd421fb459fe5cf8053618dea1212c4da27e83cf41225dbaf664d9b143499978e6bcef2ae293a7bf9378d3ecb4867f989553f798e9723ba8344b
7
+ data.tar.gz: 436474ceafd2689137fab890b19ca24715ebe72dd1311b3ad64313bc130cf8bbce12fe35049008d20a89634309cbac882da70bc891522d45a58e8ce310b466a7
@@ -1,3 +1,9 @@
1
+ ### 0.5.08 / 2017-11-xx
2
+ * [FIX] Method `BaseParser::initialize` missing parameter name in doc caused a YARD warning.
3
+ * [FIX] Method `GrmItemsBuilder::build_dotted_items` missing parameter name in doc caused a YARD warning.
4
+ * [FIX] Method `NonTerminalVertex::initialize` missing parameter name in doc caused a YARD warning.
5
+
6
+
1
7
  ### 0.5.07 / 2017-11-11
2
8
  * [NEW] File `benchmark_mini_en.rb` added in `examples/NLP` folder for parsing performance measurements.
3
9
  * [CHANGE] Demo calculator in `examples/general/calc_iter2`: added support for log10 and cbrt functions. README.md slightly reworked.
@@ -0,0 +1,118 @@
1
+ require 'rley' # Load Rley library
2
+
3
+ ########################################
4
+ # Step 1. Define a grammar for a nano English-like language
5
+ # based on example from Jurafski & Martin book (chapter 8 of the book).
6
+ # Bird, Steven, Edward Loper and Ewan Klein: "Speech and Language Processing";
7
+ # 2009, Pearson Education, Inc., ISBN 978-0135041963
8
+ # It defines the syntax of a sentence in a mini English-like language
9
+ # with a very simplified syntax and vocabulary
10
+
11
+ # Instantiate a builder object that will build the grammar for us
12
+ builder = Rley::Syntax::GrammarBuilder.new do
13
+ # Next 2 lines we define the terminal symbols
14
+ # (= word categories in the lexicon)
15
+ add_terminals('Noun', 'Proper-Noun', 'Pronoun', 'Verb')
16
+ add_terminals('Aux', 'Det', 'Preposition')
17
+
18
+ # Here we define the productions (= grammar rules)
19
+ rule 'Start' => 'S'
20
+ rule 'S' => %w[NP VP]
21
+ rule 'S' => %w[Aux NP VP]
22
+ rule 'S' => 'VP'
23
+ rule 'NP' => 'Pronoun'
24
+ rule 'NP' => 'Proper-Noun'
25
+ rule 'NP' => %w[Det Nominal]
26
+ rule 'Nominal' => %[Noun]
27
+ rule 'Nominal' => %[Nominal Noun]
28
+ rule 'VP' => 'Verb'
29
+ rule 'VP' => %w[Verb NP]
30
+ rule 'VP' => %w[Verb NP PP]
31
+ rule 'VP' => %w[Verb PP]
32
+ rule 'VP' => %w[VP PP]
33
+ rule 'PP' => %w[Preposition NP]
34
+ end
35
+
36
+ # And now, let's build the grammar...
37
+ grammar = builder.grammar
38
+
39
+ ########################################
40
+ # Step 2. Creating a lexicon
41
+ # To simplify things, lexicon is implemented as a Hash with pairs of the form:
42
+ # word => terminal symbol name
43
+ Lexicon = {
44
+ 'man' => 'Noun',
45
+ 'dog' => 'Noun',
46
+ 'cat' => 'Noun',
47
+ 'telescope' => 'Noun',
48
+ 'park' => 'Noun',
49
+ 'saw' => 'Verb',
50
+ 'ate' => 'Verb',
51
+ 'walked' => 'Verb',
52
+ 'John' => 'Proper-Noun',
53
+ 'Mary' => 'Proper-Noun',
54
+ 'Bob' => 'Proper-Noun',
55
+ 'a' => 'Determiner',
56
+ 'an' => 'Determiner',
57
+ 'the' => 'Determiner',
58
+ 'my' => 'Determiner',
59
+ 'in' => 'Preposition',
60
+ 'on' => 'Preposition',
61
+ 'by' => 'Preposition',
62
+ 'with' => 'Preposition'
63
+ }.freeze
64
+
65
+ ########################################
66
+ # Step 3. Creating a tokenizer
67
+ # A tokenizer reads the input string and converts it into a sequence of tokens
68
+ # Highly simplified tokenizer implementation.
69
+ def tokenizer(aTextToParse, aGrammar)
70
+ tokens = aTextToParse.scan(/\S+/).map do |word|
71
+ term_name = Lexicon[word]
72
+ raise StandardError, "Word '#{word}' not found in lexicon" if term_name.nil?
73
+ terminal = aGrammar.name2symbol[term_name]
74
+ Rley::Lexical::Token.new(word, terminal)
75
+ end
76
+
77
+ return tokens
78
+ end
79
+
80
+ ########################################
81
+ # Step 4. Create a parser for that grammar
82
+ # Easy with Rley...
83
+ parser = Rley::Parser::GFGEarleyParser.new(grammar)
84
+
85
+ ########################################
86
+ # Step 5. Parsing the input
87
+ input_to_parse = 'John saw Mary with a telescope'
88
+ # input_to_parse = 'the dog saw a man in the park' # This one is ambiguous
89
+ # Convert input text into a sequence of token objects...
90
+ tokens = tokenizer(input_to_parse, grammar)
91
+ result = parser.parse(tokens)
92
+
93
+ puts "Parsing successful? #{result.success?}"
94
+ unless result.success?
95
+ puts result.failure_reason.message
96
+ exit(1)
97
+ end
98
+
99
+ ########################################
100
+ # Step 6. Generating a parse tree from parse result
101
+ ptree = result.parse_tree
102
+
103
+ # Let's create a parse tree visitor
104
+ visitor = Rley::ParseTreeVisitor.new(ptree)
105
+
106
+ # Let's create a formatter (i.e. visit event listener)
107
+ # renderer = Rley::Formatter::Debug.new($stdout)
108
+
109
+ # Let's create a formatter that will render the parse tree with characters
110
+ renderer = Rley::Formatter::Asciitree.new($stdout)
111
+
112
+ # Let's create a formatter that will render the parse tree in labelled
113
+ # bracket notation
114
+ # renderer = Rley::Formatter::BracketNotation.new($stdout)
115
+
116
+ # Subscribe the formatter to the visitor's event and launch the visit
117
+ renderer.render(visitor)
118
+ # End of file
@@ -0,0 +1,59 @@
1
+ # Grammar for a simple subset of English language
2
+ # It is called nano-English because it has a more elaborate
3
+ # grammar than pico-English but remains still tiny compared to "real" English
4
+ require 'rley' # Load the gem
5
+
6
+
7
+ ########################################
8
+ # Define a grammar for a nano English-like language
9
+ # based on chapter 12 from Jurafski & Martin book.
10
+ # Daniel Jurafsky,‎ James H. Martin: "Speech and Language Processing";
11
+ # 2009, Pearson Education, Inc., ISBN 978-0135041963
12
+ # It defines the syntax of a sentence in a mini English-like language
13
+ builder = Rley::Syntax::GrammarBuilder.new do
14
+ add_terminals('Pronoun', 'Proper-Noun')
15
+ add_terminals('Determiner', 'Noun')
16
+ add_terminals('Cardinal_number', 'Ordinal_number', 'Quant')
17
+ add_terminals('Verb', 'GerundV', 'Aux')
18
+ add_terminals('Predeterminer', 'Preposition')
19
+
20
+ rule 'language' => 'sentence'
21
+ rule 'sentence' => 'declarative'
22
+ rule 'sentence' => 'imperative'
23
+ rule 'sentence' => 'yes_no_question'
24
+ rule 'sentence' => 'wh_subject_question'
25
+ rule 'sentence' => 'wh_non_subject_question'
26
+ rule 'declarative' => %w[NP VP]
27
+ rule 'imperative' => 'VP'
28
+ rule 'yes_no_question' => %w[Aux NP VP]
29
+ rule 'wh_subject_question' => %w[Wh_NP NP VP]
30
+ rule 'wh_non_subject_question' => %w[Wh_NP Aux NP VP]
31
+ rule 'NP' => %[Predeterminer NP]
32
+ rule 'NP' => 'Pronoun'
33
+ rule 'NP' => 'Proper-Noun'
34
+ rule 'NP' => %w[Det Card Ord Quant Nominal]
35
+ rule 'VP' => 'Verb'
36
+ rule 'VP' => %w[Verb NP]
37
+ rule 'VP' => %w[Verb NP PP]
38
+ rule 'VP' => %w[Verb PP]
39
+ rule 'Det' => 'Determiner'
40
+ rule 'Det' => []
41
+ rule 'Card' => 'Cardinal_number'
42
+ rule 'Card' => []
43
+ rule 'Ord' => 'Ordinal_number'
44
+ rule 'Ord' => []
45
+ rule 'Nominal' => 'Noun'
46
+ rule 'Nominal' => %[Nominal Noun]
47
+ rule 'Nominal' => %w[Nominal GerundVP]
48
+ rule 'Nominal' => %w[Nominal RelClause]
49
+ rule 'PP' => %w[Preposition NP]
50
+ rule 'GerundVP' => 'GerundV'
51
+ rule 'GerundVP' => %w[GerundV NP]
52
+ rule 'GerundVP' => %w[GerundV NP PP]
53
+ rule 'GerundVP' => %w[GerundV PP]
54
+ rule 'RelClause' => %w[Relative_pronoun VP]
55
+
56
+ end
57
+
58
+ # And now build the grammar...
59
+ NanoGrammar = builder.grammar
@@ -1,12 +1,12 @@
1
1
  require 'rley' # Load Rley library
2
2
 
3
3
  ########################################
4
- # Step 1. Define a grammar for a micro English-like language
4
+ # Step 1. Define a grammar for a pico English-like language
5
5
  # based on example from NLTK book (chapter 8 of the book).
6
6
  # Bird, Steven, Edward Loper and Ewan Klein: "Natural Language Processing
7
7
  # with Python"; 2009, O’Reilly Media Inc., ISBN 978-0596516499
8
8
  # It defines the syntax of a sentence in a mini English-like language
9
- # with a very simplified syntax.
9
+ # with a very simplified syntax and vocabulary
10
10
 
11
11
  # Instantiate a builder object that will build the grammar for us
12
12
  builder = Rley::Syntax::GrammarBuilder.new do
@@ -0,0 +1,176 @@
1
+ require_relative 'ast_building'
2
+ require_relative 'regex_repr'
3
+
4
+ # The purpose of a ASTBuilder is to build piece by piece an AST
5
+ # (Abstract Syntax Tree) from a sequence of input tokens and
6
+ # visit events produced by walking over a GFGParsing object.
7
+ # Uses the Builder GoF pattern.
8
+ # The Builder pattern creates a complex object
9
+ # (say, a parse tree) from simpler objects (terminal and non-terminal
10
+ # nodes) and using a step by step approach.
11
+ class ASTBuilder < Rley::Parser::ParseTreeBuilder
12
+ include ASTBuilding
13
+
14
+ Terminal2NodeClass = { }.freeze
15
+
16
+ protected
17
+
18
+ # Overriding method.
19
+ # Factory method for creating a node object for the given
20
+ # input token.
21
+ # @param aTerminal [Terminal] Terminal symbol associated with the token
22
+ # @param aTokenPosition [Integer] Position of token in the input stream
23
+ # @param aToken [Token] The input token
24
+ def new_leaf_node(aProduction, aTerminal, aTokenPosition, aToken)
25
+ node = Rley::PTree::TerminalNode.new(aToken, aTokenPosition)
26
+
27
+ return node
28
+ end
29
+
30
+ # Method to override.
31
+ # Factory method for creating a parent node object.
32
+ # @param aProduction [Production] Production rule
33
+ # @param aRange [Range] Range of tokens matched by the rule
34
+ # @param theTokens [Array] The input tokens
35
+ # @param theChildren [Array] Children nodes (one per rhs symbol)
36
+ def new_parent_node(aProduction, aRange, theTokens, theChildren)
37
+ node = case aProduction.name
38
+ when 'srl_0' # rule 'srl' => 'quantifier'
39
+ return_first_child(aRange, theTokens, theChildren)
40
+
41
+ when 'quantifier_0' # rule 'quantifier' => 'ONCE'
42
+ multiplicity(1, 1)
43
+
44
+ when 'quantifier_1' # rule 'quantifier' => 'TWICE'
45
+ multiplicity(2, 2)
46
+
47
+ when 'quantifier_2' # rule 'quantifier' => %w[EXACTLY count TIMES]
48
+ reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
49
+
50
+ when 'quantifier_3' # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
51
+ reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
52
+
53
+ when 'quantifier_4' # rule 'quantifier' => 'OPTIONAL'
54
+ multiplicity(0, 1)
55
+
56
+ when 'quantifier_5' # rule 'quantifier' => %w[ONCE OR MORE]
57
+ multiplicity(1, :more)
58
+
59
+ when 'quantifier_6' # rule 'quantifier' => %w[NEVER OR MORE]
60
+ multiplicity(0, :more)
61
+
62
+ when 'quantifier_7' # rule 'quantifier' => %w[AT LEAST count TIMES]
63
+ reduce_quantifier_7(aProduction, aRange, theTokens, theChildren)
64
+
65
+ when 'count_0', 'count_1'
66
+ return_first_child(aRange, theTokens, theChildren)
67
+
68
+ when 'times_suffix_0', 'times_suffix_1'
69
+ nil
70
+ else
71
+ raise StandardError, "Don't know production #{aProduction.name}"
72
+ end
73
+
74
+ return node
75
+ end
76
+
77
+ def multiplicity(lowerBound, upperBound)
78
+ return SRL::Regex::Multiplicity.new(lowerBound, upperBound, :greedy)
79
+ end
80
+
81
+ # rule 'quantifier' => %w[EXACTLY count TIMES]
82
+ def reduce_quantifier_2(aProduction, aRange, theTokens, theChildren)
83
+ count = theChildren[1].token.lexeme.to_i
84
+ multiplicity(count, count)
85
+ end
86
+
87
+ # rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
88
+ def reduce_quantifier_3(aProduction, aRange, theTokens, theChildren)
89
+ upper = theChildren[3].token.lexeme.to_i
90
+ # lower = theChildren[1].token.lexeme.to_i
91
+ multiplicity(3, upper)
92
+ end
93
+
94
+ # rule 'quantifier' => %w[AT LEAST count TIMES]
95
+ def reduce_quantifier_7(aProduction, aRange, theTokens, theChildren)
96
+ count = theChildren[2].token.lexeme.to_i
97
+ multiplicity(count, :more)
98
+ end
99
+
100
+
101
+ =begin
102
+ def reduce_binary_operator(theChildren)
103
+ operator_node = theChildren[1]
104
+ operator_node.children << theChildren[0]
105
+ operator_node.children << theChildren[2]
106
+ return operator_node
107
+ end
108
+
109
+ # rule 'simple_expression' => %w[simple_expression add_operator term]
110
+ def reduce_simple_expression_1(_production, _range, _tokens, theChildren)
111
+ reduce_binary_operator(theChildren)
112
+ end
113
+
114
+ # rule 'term' => %w[term mul_operator factor]
115
+ def reduce_term_1(_production, _range, _tokens, theChildren)
116
+ reduce_binary_operator(theChildren)
117
+ end
118
+
119
+ # rule 'factor' => %w[simple_factor POWER simple_factor]]
120
+ def reduce_factor_1(aProduction, aRange, theTokens, theChildren)
121
+ result = PowerNode.new(theChildren[1].symbol, aRange)
122
+ result.children << theChildren[0]
123
+ result.children << theChildren[2]
124
+
125
+ return result
126
+ end
127
+
128
+ # rule 'simple_factor' => %[sign scalar]
129
+ def reduce_simple_factor_0(aProduction, aRange, theTokens, theChildren)
130
+ first_child = theChildren[0]
131
+ result = if first_child.kind_of?(CalcNegateNode)
132
+ -theChildren[1]
133
+ else
134
+ theChildren[1]
135
+ end
136
+
137
+ return result
138
+ end
139
+
140
+ # rule 'simple_factor' => %w[unary_function in_parenthesis]
141
+ def reduce_simple_factor_1(aProduction, aRange, theTokens, theChildren)
142
+ func = CalcUnaryFunction.new(theChildren[0].symbol, aRange.low)
143
+ func.func_name = theChildren[0].value
144
+ func.children << theChildren[1]
145
+ return func
146
+ end
147
+
148
+ # rule 'simple_factor' => %w[MINUS in_parenthesis]
149
+ def reduce_simple_factor_2(aProduction, aRange, theTokens, theChildren)
150
+ negation = CalcNegateNode.new(theChildren[0].symbol, aRange.low)
151
+ negation.children << theChildren[1]
152
+ return negation
153
+ end
154
+
155
+ # rule 'add_operator' => 'PLUS'
156
+ def reduce_add_operator_0(_production, aRange, _tokens, theChildren)
157
+ return CalcAddNode.new(theChildren[0].symbol, aRange)
158
+ end
159
+
160
+ # rule 'add_operator' => 'MINUS'
161
+ def reduce_add_operator_1(_production, aRange, _tokens, theChildren)
162
+ return CalcSubtractNode.new(theChildren[0].symbol, aRange)
163
+ end
164
+
165
+ # rule 'mul_operator' => 'STAR'
166
+ def reduce_mul_operator_0(_production, aRange, _tokens, theChildren)
167
+ return CalcMultiplyNode.new(theChildren[0].symbol, aRange)
168
+ end
169
+
170
+ # rule 'mul_operator' => 'DIVIDE'
171
+ def reduce_mul_operator_1(_production, aRange, _tokens, theChildren)
172
+ return CalcDivideNode.new(theChildren[0].symbol, aRange)
173
+ end
174
+ =end
175
+ end # class
176
+ # End of file
@@ -0,0 +1,20 @@
1
+ # Mix-in module that provides convenenience methods for
2
+ # constructing an AST (Abstract Syntax Tree).
3
+ module ASTBuilding
4
+ def return_first_child(_range, _tokens, theChildren)
5
+ return theChildren[0]
6
+ end
7
+
8
+ def return_second_child(_range, _tokens, theChildren)
9
+ return theChildren[1]
10
+ end
11
+
12
+ def return_last_child(_range, _tokens, theChildren)
13
+ return theChildren[-1]
14
+ end
15
+
16
+ def return_epsilon(_range, _tokens, _children)
17
+ return nil
18
+ end
19
+ end # module
20
+ # End of file
@@ -0,0 +1,32 @@
1
+ # Grammar for SRL (Simple Regex Language)
2
+ require 'rley' # Load the gem
3
+ module SRL
4
+ ########################################
5
+ # Work in progress.
6
+ # This is a very partial grammar of SRL.
7
+ # It will be expanded with the coming versions of Rley
8
+ builder = Rley::Syntax::GrammarBuilder.new do
9
+ add_terminals('DIGIT', 'INTEGER')
10
+ add_terminals('EXACTLY', 'TIMES', 'ONCE', 'TWICE')
11
+ add_terminals('BETWEEN', 'AND', 'OPTIONAL', 'OR')
12
+ add_terminals('MORE', 'NEVER', 'AT', 'LEAST')
13
+
14
+ # For the moment one focuses on quantifier syntax only...
15
+ rule 'srl' => 'quantifier'
16
+ rule 'quantifier' => 'ONCE'
17
+ rule 'quantifier' => 'TWICE'
18
+ rule 'quantifier' => %w[EXACTLY count TIMES]
19
+ rule 'quantifier' => %w[BETWEEN count AND count times_suffix]
20
+ rule 'quantifier' => 'OPTIONAL'
21
+ rule 'quantifier' => %w[ONCE OR MORE]
22
+ rule 'quantifier' => %w[NEVER OR MORE]
23
+ rule 'quantifier' => %w[AT LEAST count TIMES]
24
+ rule 'count' => 'DIGIT'
25
+ rule 'count' => 'INTEGER'
26
+ rule 'times_suffix' => 'TIMES'
27
+ rule 'times_suffix' => []
28
+ end
29
+
30
+ # And now build the grammar and make it accessible via a global constant
31
+ Grammar = builder.grammar
32
+ end # module