rley 0.3.08 → 0.3.09
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +190 -35
- data/examples/NLP/mini_en_demo.rb +92 -0
- data/lib/rley/constants.rb +1 -1
- metadata +3 -20
- data/examples/grammars/grammar_L0.rb +0 -32
- data/examples/grammars/grammar_abc.rb +0 -26
- data/examples/parsers/demo-JSON/JSON_grammar.rb +0 -31
- data/examples/parsers/demo-JSON/JSON_lexer.rb +0 -114
- data/examples/parsers/demo-JSON/JSON_parser.rb +0 -89
- data/examples/parsers/demo-JSON/demo_json.rb +0 -42
- data/examples/parsers/parsing_L0.rb +0 -124
- data/examples/parsers/parsing_L1.rb +0 -137
- data/examples/parsers/parsing_abc.rb +0 -71
- data/examples/parsers/parsing_ambig.rb +0 -92
- data/examples/parsers/parsing_another.rb +0 -70
- data/examples/parsers/parsing_b_expr.rb +0 -85
- data/examples/parsers/parsing_err_expr.rb +0 -74
- data/examples/parsers/parsing_groucho.rb +0 -97
- data/examples/parsers/parsing_right_recursive.rb +0 -70
- data/examples/parsers/parsing_tricky.rb +0 -91
- data/examples/parsers/tracing_parser.rb +0 -54
- data/examples/recognizers/recognizer_abc.rb +0 -71
@@ -1,26 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build a very simple grammar
|
2
|
-
require 'rley' # Load the gem
|
3
|
-
|
4
|
-
# A very simple language
|
5
|
-
# It recognizes/generates strings like 'b', 'abc', 'aabcc', 'aaabccc',...
|
6
|
-
# (based on example in N. Wirth's book "Compiler Construction", p. 6)
|
7
|
-
# S ::= A.
|
8
|
-
# A ::= "a" A "c".
|
9
|
-
# A ::= "b".
|
10
|
-
|
11
|
-
|
12
|
-
# Let's create the grammar step-by-step with the grammar builder:
|
13
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
14
|
-
builder.add_terminals('a', 'b', 'c')
|
15
|
-
builder.add_production('S' => 'A')
|
16
|
-
builder.add_production('A' => %w(a A c))
|
17
|
-
builder.add_production('A' => 'b')
|
18
|
-
|
19
|
-
# And now build the grammar...
|
20
|
-
grammar_abc = builder.grammar
|
21
|
-
|
22
|
-
# Prove that it is a grammar
|
23
|
-
puts grammar_abc.class.name
|
24
|
-
|
25
|
-
# End of file
|
26
|
-
|
@@ -1,31 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build and render a parse tree for JSON
|
2
|
-
# language
|
3
|
-
require 'rley' # Load the gem
|
4
|
-
|
5
|
-
|
6
|
-
########################################
|
7
|
-
# Define a grammar for JSON
|
8
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
9
|
-
builder.add_terminals('KEYWORD') # For true, false, null keywords
|
10
|
-
builder.add_terminals('JSON_STRING', 'JSON_NUMBER')
|
11
|
-
builder.add_terminals('LACCOL', 'RACCOL') # For '{', '}' delimiters
|
12
|
-
builder.add_terminals('LBRACKET', 'RBRACKET') # For '[', ']' delimiters
|
13
|
-
builder.add_terminals('COLON', 'COMMA') # For ':', ',' separators
|
14
|
-
builder.add_production('json_text' => 'json_value')
|
15
|
-
builder.add_production('json_value' => 'json_object')
|
16
|
-
builder.add_production('json_value' => 'json_array')
|
17
|
-
builder.add_production('json_value' => 'JSON_STRING')
|
18
|
-
builder.add_production('json_value' => 'JSON_NUMBER')
|
19
|
-
builder.add_production('json_value' => 'KEYWORD')
|
20
|
-
builder.add_production('json_object' => %w[LACCOL json_pairs RACCOL])
|
21
|
-
builder.add_production('json_object' => ['LACCOL', 'RACCOL'])
|
22
|
-
builder.add_production('json_pairs' => %w[json_pairs COMMA single_pair])
|
23
|
-
builder.add_production('json_pairs' => 'single_pair')
|
24
|
-
builder.add_production('single_pair' => %w[JSON_STRING COLON json_value])
|
25
|
-
builder.add_production('json_array' => %w[LBRACKET array_items RBRACKET])
|
26
|
-
builder.add_production('json_array' => ['RBRACKET', 'RBRACKET'])
|
27
|
-
builder.add_production('array_items' => %w[array_items COMMA json_value])
|
28
|
-
builder.add_production('array_items' => %w[json_value])
|
29
|
-
|
30
|
-
# And now build the grammar...
|
31
|
-
GrammarJSON = builder.grammar
|
@@ -1,114 +0,0 @@
|
|
1
|
-
# File: JSON_lexer.rb
|
2
|
-
# Lexer for the JSON data format
|
3
|
-
require 'rley' # Load the gem
|
4
|
-
require 'strscan'
|
5
|
-
|
6
|
-
# Lexer for JSON.
|
7
|
-
class JSONLexer
|
8
|
-
attr_reader(:scanner)
|
9
|
-
attr_reader(:lineno)
|
10
|
-
attr_reader(:line_start)
|
11
|
-
attr_reader(:name2symbol)
|
12
|
-
|
13
|
-
@@lexeme2name = {
|
14
|
-
'{' => 'LACCOL',
|
15
|
-
'}' => 'RACCOL',
|
16
|
-
'[' => 'LBRACKET',
|
17
|
-
']' => 'RBRACKET',
|
18
|
-
',' => 'COMMA',
|
19
|
-
':' => 'COLON'
|
20
|
-
}
|
21
|
-
|
22
|
-
class ScanError < StandardError ; end
|
23
|
-
|
24
|
-
public
|
25
|
-
def initialize(source, aGrammar)
|
26
|
-
@scanner = StringScanner.new(source)
|
27
|
-
@name2symbol = aGrammar.name2symbol
|
28
|
-
@lineno = 1
|
29
|
-
end
|
30
|
-
|
31
|
-
def tokens()
|
32
|
-
tok_sequence = []
|
33
|
-
until @scanner.eos? do
|
34
|
-
token = _next_token
|
35
|
-
tok_sequence << token unless token.nil?
|
36
|
-
end
|
37
|
-
|
38
|
-
return tok_sequence
|
39
|
-
end
|
40
|
-
|
41
|
-
private
|
42
|
-
def _next_token()
|
43
|
-
token = nil
|
44
|
-
skip_whitespaces
|
45
|
-
curr_ch = scanner.getch # curr_ch is at start of token or eof reached...
|
46
|
-
|
47
|
-
begin
|
48
|
-
break if curr_ch.nil?
|
49
|
-
|
50
|
-
case curr_ch
|
51
|
-
when '{', '}', '[', ']', ',', ':'
|
52
|
-
type_name = @@lexeme2name[curr_ch]
|
53
|
-
token_type = name2symbol[type_name]
|
54
|
-
token = Rley::Parser::Token.new(curr_ch, token_type)
|
55
|
-
|
56
|
-
# LITERALS
|
57
|
-
when '"' # Start string delimiter found
|
58
|
-
value = scanner.scan(/([^"\\]|\\.)*/)
|
59
|
-
end_delimiter = scanner.getch()
|
60
|
-
raise ScanError.new('No closing quotes (") found') if end_delimiter.nil?
|
61
|
-
token_type = name2symbol['JSON_STRING']
|
62
|
-
token = Rley::Parser::Token.new(value, token_type)
|
63
|
-
|
64
|
-
when /[ftn]/ # First letter of keywords
|
65
|
-
@scanner.pos = scanner.pos - 1 # Simulate putback
|
66
|
-
keyw = scanner.scan(/false|true|null/)
|
67
|
-
if keyw.nil?
|
68
|
-
invalid_keyw = scanner.scan(/\w+/)
|
69
|
-
raise ScanError.new("Invalid keyword: #{invalid_keyw}")
|
70
|
-
else
|
71
|
-
token_type = name2symbol['KEYWORD']
|
72
|
-
token = Rley::Parser::Token.new(keyw, token_type)
|
73
|
-
end
|
74
|
-
|
75
|
-
|
76
|
-
when /[-0-9]/ # Start character of number literal found
|
77
|
-
@scanner.pos = scanner.pos - 1 # Simulate putback
|
78
|
-
value = scanner.scan(/-?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/)
|
79
|
-
token_type = name2symbol['JSON_NUMBER']
|
80
|
-
token = Rley::Parser::Token.new(value, token_type)
|
81
|
-
|
82
|
-
|
83
|
-
else # Unknown token
|
84
|
-
erroneous = curr_ch.nil? ? '' : curr_ch
|
85
|
-
sequel = scanner.scan(/.{1,20}/)
|
86
|
-
erroneous += sequel unless sequel.nil?
|
87
|
-
raise ScanError.new("Unknown token #{erroneous}")
|
88
|
-
end #case
|
89
|
-
|
90
|
-
|
91
|
-
end while (token.nil? && curr_ch = scanner.getch())
|
92
|
-
|
93
|
-
return token
|
94
|
-
end
|
95
|
-
|
96
|
-
|
97
|
-
def skip_whitespaces()
|
98
|
-
matched = scanner.scan(/[ \t\f\n\r]+/)
|
99
|
-
return if matched.nil?
|
100
|
-
|
101
|
-
newline_count = 0
|
102
|
-
matched.scan(/\n\r?|\r/) { |_| newline_count += 1 }
|
103
|
-
newline_detected(newline_count)
|
104
|
-
end
|
105
|
-
|
106
|
-
|
107
|
-
def newline_detected(count)
|
108
|
-
@lineno += count
|
109
|
-
@line_start = scanner.pos()
|
110
|
-
end
|
111
|
-
|
112
|
-
end # class
|
113
|
-
|
114
|
-
|
@@ -1,89 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build and render a parse tree for JSON
|
2
|
-
# language
|
3
|
-
require 'pp'
|
4
|
-
require 'rley' # Load the gem
|
5
|
-
require_relative 'json_lexer'
|
6
|
-
|
7
|
-
# Steps to render a parse tree (of a valid parsed input):
|
8
|
-
# 1. Define a grammar
|
9
|
-
# 2. Create a parser for that grammar
|
10
|
-
# 3. Tokenize the input
|
11
|
-
# 4. Let the parser process the input
|
12
|
-
# 5. Generate a parse tree from the parse result
|
13
|
-
# 6. Render the parse tree (in JSON)
|
14
|
-
|
15
|
-
########################################
|
16
|
-
# Step 1. Load a grammar for JSON
|
17
|
-
require_relative 'JSON_grammar'
|
18
|
-
|
19
|
-
# A JSON parser derived from our general Earley parser.
|
20
|
-
class JSONParser < Rley::Parser::EarleyParser
|
21
|
-
attr_reader(:source_file)
|
22
|
-
|
23
|
-
# Constructor
|
24
|
-
def initialize()
|
25
|
-
# Builder the Earley parser with the JSON grammar
|
26
|
-
super(GrammarJSON)
|
27
|
-
end
|
28
|
-
|
29
|
-
def parse_file(aFilename)
|
30
|
-
tokens = tokenize_file(aFilename)
|
31
|
-
result = parse(tokens)
|
32
|
-
|
33
|
-
return result
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
def tokenize_file(aFilename)
|
39
|
-
input_source = nil
|
40
|
-
File.open(aFilename, 'r') { |f| input_source = f.read }
|
41
|
-
|
42
|
-
lexer = JSONLexer.new(input_source, GrammarJSON)
|
43
|
-
return lexer.tokens
|
44
|
-
end
|
45
|
-
end # class
|
46
|
-
|
47
|
-
=begin
|
48
|
-
########################################
|
49
|
-
# Step 3. Create a parser for that grammar
|
50
|
-
# parser = Rley::Parser::EarleyParser.new(GrammarJSON)
|
51
|
-
parser = JSONParser.new
|
52
|
-
|
53
|
-
|
54
|
-
########################################
|
55
|
-
# Step 4. Tokenize the input file
|
56
|
-
file_name = 'sample02.json'
|
57
|
-
=begin
|
58
|
-
input_source = nil
|
59
|
-
File.open(file_name, 'r') { |f| input_source = f.read }
|
60
|
-
|
61
|
-
lexer = JSONLexer.new(input_source, GrammarJSON)
|
62
|
-
tokens = lexer.tokens
|
63
|
-
#=end
|
64
|
-
|
65
|
-
########################################
|
66
|
-
# Step 5. Let the parser process the input
|
67
|
-
result = parser.parse_file(file_name) # parser.parse(tokens)
|
68
|
-
unless result.success?
|
69
|
-
puts "Parsing of '#{file_name}' failed"
|
70
|
-
exit(1)
|
71
|
-
end
|
72
|
-
|
73
|
-
########################################
|
74
|
-
# Step 6. Generate a parse tree from the parse result
|
75
|
-
ptree = result.parse_tree
|
76
|
-
|
77
|
-
########################################
|
78
|
-
# Step 7. Render the parse tree (in JSON)
|
79
|
-
# Let's create a parse tree visitor
|
80
|
-
visitor = Rley::ParseTreeVisitor.new(ptree)
|
81
|
-
|
82
|
-
#Here we create a renderer object...
|
83
|
-
renderer = Rley::Formatter::Json.new(STDOUT)
|
84
|
-
|
85
|
-
# Now emit the parse tree as JSON on the console output
|
86
|
-
puts "JSON rendering of the parse tree for '#{file_name}' input:"
|
87
|
-
renderer.render(visitor)
|
88
|
-
=end
|
89
|
-
# End of file
|
@@ -1,42 +0,0 @@
|
|
1
|
-
require_relative 'JSON_parser'
|
2
|
-
|
3
|
-
# Create a JSON parser object
|
4
|
-
parser = JSONParser.new
|
5
|
-
|
6
|
-
# Parse the input file with name given in command-line
|
7
|
-
if ARGV.empty?
|
8
|
-
msg = <<-END_MSG
|
9
|
-
Command-line symtax:
|
10
|
-
ruby #{__FILE__} filename
|
11
|
-
where:
|
12
|
-
filename is the name of a JSON file
|
13
|
-
|
14
|
-
Example:
|
15
|
-
ruby #{__FILE__} sample01.json
|
16
|
-
END_MSG
|
17
|
-
puts msg
|
18
|
-
exit(1)
|
19
|
-
end
|
20
|
-
file_name = ARGV[0]
|
21
|
-
result = parser.parse_file(file_name) # result object contains parse details
|
22
|
-
|
23
|
-
unless result.success?
|
24
|
-
# Stop if the parse failed...
|
25
|
-
puts "Parsing of '#{file_name}' failed"
|
26
|
-
exit(1)
|
27
|
-
end
|
28
|
-
|
29
|
-
# Generate a parse tree from the parse result
|
30
|
-
ptree = result.parse_tree
|
31
|
-
|
32
|
-
# Do something with the parse tree: render it on the output console.
|
33
|
-
# Step a: Let's create a parse tree visitor
|
34
|
-
visitor = Rley::ParseTreeVisitor.new(ptree)
|
35
|
-
|
36
|
-
# Step b: Select the rendition format to be JSON
|
37
|
-
renderer = Rley::Formatter::Json.new(STDOUT)
|
38
|
-
|
39
|
-
# Step c: Now emit the parse tree as JSON on the console output
|
40
|
-
puts "JSON rendering of the parse tree for '#{file_name}' input:"
|
41
|
-
renderer.render(visitor)
|
42
|
-
# End of file
|
@@ -1,124 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build and render a parse tree for the L0
|
2
|
-
# language
|
3
|
-
require 'pp'
|
4
|
-
require 'rley' # Load the gem
|
5
|
-
|
6
|
-
# Steps to render a parse tree (of a valid parsed input):
|
7
|
-
# 1. Define a grammar
|
8
|
-
# 2. Create a tokenizer for the language
|
9
|
-
# 3. Create a parser for that grammar
|
10
|
-
# 4. Tokenize the input
|
11
|
-
# 5. Let the parser process the input
|
12
|
-
# 6. Generate a parse tree from the parse result
|
13
|
-
# 7. Render the parse tree (in JSON)
|
14
|
-
|
15
|
-
########################################
|
16
|
-
# Step 1. Define a grammar for a micro English-like language
|
17
|
-
# based on Jurafky & Martin L0 language (chapter 12 of the book).
|
18
|
-
# It defines the syntax of a sentence in a language with a
|
19
|
-
# very limited syntax and lexicon in the context of airline reservation.
|
20
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
21
|
-
builder.add_terminals('Noun', 'Verb', 'Pronoun', 'Proper-Noun')
|
22
|
-
builder.add_terminals('Determiner', 'Preposition', )
|
23
|
-
builder.add_production('S' => %w[NP VP])
|
24
|
-
builder.add_production('NP' => 'Pronoun')
|
25
|
-
builder.add_production('NP' => 'Proper-Noun')
|
26
|
-
builder.add_production('NP' => %w[Determiner Nominal])
|
27
|
-
builder.add_production('Nominal' => %w[Nominal Noun])
|
28
|
-
builder.add_production('Nominal' => 'Noun')
|
29
|
-
builder.add_production('VP' => 'Verb')
|
30
|
-
builder.add_production('VP' => %w[Verb NP])
|
31
|
-
builder.add_production('VP' => %w[Verb NP PP])
|
32
|
-
builder.add_production('VP' => %w[Verb PP])
|
33
|
-
builder.add_production('PP' => %w[Preposition PP])
|
34
|
-
|
35
|
-
# And now build the grammar...
|
36
|
-
grammar_l0 = builder.grammar
|
37
|
-
|
38
|
-
|
39
|
-
########################################
|
40
|
-
# 2. Create a tokenizer for the language
|
41
|
-
# The tokenizer transforms the input into an array of tokens
|
42
|
-
# This is a very simplistic implementation for demo purposes.
|
43
|
-
|
44
|
-
# The lexicon is just a Hash with pairs of the form:
|
45
|
-
# word =>terminal symbol name
|
46
|
-
L0_lexicon = {
|
47
|
-
'flight' => 'Noun',
|
48
|
-
'breeze' => 'Noun',
|
49
|
-
'trip' => 'Noun',
|
50
|
-
'morning' => 'Noun',
|
51
|
-
'is' => 'Verb',
|
52
|
-
'prefer' => 'Verb',
|
53
|
-
'like' => 'Verb',
|
54
|
-
'need' => 'Verb',
|
55
|
-
'want' => 'Verb',
|
56
|
-
'fly' => 'Verb',
|
57
|
-
'me' => 'Pronoun',
|
58
|
-
'I' => 'Pronoun',
|
59
|
-
'you' => 'Pronoun',
|
60
|
-
'it' => 'Pronoun',
|
61
|
-
'Alaska' => 'Proper-Noun',
|
62
|
-
'Baltimore' => 'Proper-Noun',
|
63
|
-
'Chicago' => 'Proper-Noun',
|
64
|
-
'United' => 'Proper-Noun',
|
65
|
-
'American' => 'Proper-Noun',
|
66
|
-
'the' => 'Determiner',
|
67
|
-
'a' => 'Determiner',
|
68
|
-
'an' => 'Determiner',
|
69
|
-
'this' => 'Determiner',
|
70
|
-
'these' => 'Determiner',
|
71
|
-
'that' => 'Determiner',
|
72
|
-
'from' => 'Preposition',
|
73
|
-
'to' => 'Preposition',
|
74
|
-
'on' => 'Preposition',
|
75
|
-
'near' => 'Preposition'
|
76
|
-
}
|
77
|
-
|
78
|
-
# Highly simplified tokenizer implementation.
|
79
|
-
def tokenizer(aText, aGrammar)
|
80
|
-
tokens = aText.scan(/\S+/).map do |word|
|
81
|
-
term_name = L0_lexicon[word]
|
82
|
-
if term_name.nil?
|
83
|
-
raise StandardError, "Word '#{word}' not found in lexicon"
|
84
|
-
end
|
85
|
-
terminal = aGrammar.name2symbol[term_name]
|
86
|
-
Rley::Parser::Token.new(word, terminal)
|
87
|
-
end
|
88
|
-
|
89
|
-
return tokens
|
90
|
-
end
|
91
|
-
|
92
|
-
########################################
|
93
|
-
# Step 3. Create a parser for that grammar
|
94
|
-
parser = Rley::Parser::EarleyParser.new(grammar_l0)
|
95
|
-
|
96
|
-
########################################
|
97
|
-
# Step 3. Tokenize the input
|
98
|
-
valid_input = 'I prefer a morning flight'
|
99
|
-
# Another sentence: it is a flight from Chicago
|
100
|
-
tokens = tokenizer(valid_input, grammar_l0)
|
101
|
-
|
102
|
-
########################################
|
103
|
-
# Step 5. Let the parser process the input
|
104
|
-
result = parser.parse(tokens)
|
105
|
-
|
106
|
-
puts "Parsing success? #{result.success?}"
|
107
|
-
|
108
|
-
|
109
|
-
########################################
|
110
|
-
# Step 6. Generate a parse tree from the parse result
|
111
|
-
ptree = result.parse_tree
|
112
|
-
|
113
|
-
########################################
|
114
|
-
# Step 7. Render the parse tree (in JSON)
|
115
|
-
# Let's create a parse tree visitor
|
116
|
-
visitor = Rley::ParseTreeVisitor.new(ptree)
|
117
|
-
|
118
|
-
#Here we create a renderer object...
|
119
|
-
renderer = Rley::Formatter::Json.new(STDOUT)
|
120
|
-
|
121
|
-
# Now emit the parse tree as JSON on the console output
|
122
|
-
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
123
|
-
renderer.render(visitor)
|
124
|
-
# End of file
|
@@ -1,137 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build and render a parse tree for the L1
|
2
|
-
# language
|
3
|
-
require 'pp'
|
4
|
-
require 'rley' # Load the gem
|
5
|
-
|
6
|
-
# Steps to render a parse tree (of a valid parsed input):
|
7
|
-
# 1. Define a grammar
|
8
|
-
# 2. Create a tokenizer for the language
|
9
|
-
# 3. Create a parser for that grammar
|
10
|
-
# 4. Tokenize the input
|
11
|
-
# 5. Let the parser process the input
|
12
|
-
# 6. Generate a parse tree from the parse result
|
13
|
-
# 7. Render the parse tree (in JSON)
|
14
|
-
|
15
|
-
########################################
|
16
|
-
# Step 1. Define a grammar for a miniature English-like language
|
17
|
-
# based on Jurafky & Martin L1 language (chapter 13).
|
18
|
-
# It defines the syntax of a sentence in a language with a
|
19
|
-
# very limited syntax and lexicon in the context of airline reservation.
|
20
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
21
|
-
builder.add_terminals('Noun', 'Verb', 'Pronoun', 'Proper-Noun')
|
22
|
-
builder.add_terminals('Aux', 'Determiner', 'Preposition')
|
23
|
-
builder.add_production('Sentence' => 'S')
|
24
|
-
builder.add_production('S' => %w[Aux NP VP])
|
25
|
-
builder.add_production('S' => %w[NP VP])
|
26
|
-
builder.add_production('S' => %w[VP])
|
27
|
-
builder.add_production('NP' => 'Pronoun')
|
28
|
-
builder.add_production('NP' => 'Proper-Noun')
|
29
|
-
builder.add_production('NP' => %w[Determiner Nominal])
|
30
|
-
builder.add_production('Nominal' => %w[Nominal Noun])
|
31
|
-
builder.add_production('Nominal' => %w[Nominal PP])
|
32
|
-
builder.add_production('Nominal' => 'Noun')
|
33
|
-
builder.add_production('VP' => 'Verb')
|
34
|
-
builder.add_production('VP' => %w[Verb NP])
|
35
|
-
builder.add_production('VP' => %w[Verb NP PP])
|
36
|
-
builder.add_production('VP' => %w[Verb PP])
|
37
|
-
builder.add_production('VP' => %w[VP PP])
|
38
|
-
builder.add_production('PP' => %w[Preposition NP])
|
39
|
-
|
40
|
-
# And now build the grammar...
|
41
|
-
grammar_l1 = builder.grammar
|
42
|
-
|
43
|
-
|
44
|
-
########################################
|
45
|
-
# 2. Create a tokenizer for the language
|
46
|
-
# The tokenizer transforms the input into an array of tokens
|
47
|
-
# This is a very simplistic implementation for demo purposes.
|
48
|
-
|
49
|
-
# The lexicon is just a Hash with pairs of the form:
|
50
|
-
# word =>terminal symbol name
|
51
|
-
L1_lexicon = {
|
52
|
-
'does' => 'Aux',
|
53
|
-
'flight' => 'Noun',
|
54
|
-
'trip' => 'Noun',
|
55
|
-
'meal' => 'Noun',
|
56
|
-
'money' => 'Noun',
|
57
|
-
'morning' => 'Noun',
|
58
|
-
'is' => 'Verb',
|
59
|
-
'book' => 'Verb',
|
60
|
-
'prefer' => 'Verb',
|
61
|
-
'like' => 'Verb',
|
62
|
-
'need' => 'Verb',
|
63
|
-
'want' => 'Verb',
|
64
|
-
'fly' => 'Verb',
|
65
|
-
'show' => 'Verb',
|
66
|
-
'me' => 'Pronoun',
|
67
|
-
'I' => 'Pronoun',
|
68
|
-
'she' => 'Pronoun',
|
69
|
-
'you' => 'Pronoun',
|
70
|
-
'it' => 'Pronoun',
|
71
|
-
'Alaska' => 'Proper-Noun',
|
72
|
-
'Baltimore' => 'Proper-Noun',
|
73
|
-
'Chicago' => 'Proper-Noun',
|
74
|
-
'Houston' => 'Proper-Noun',
|
75
|
-
'NWA' => 'Proper-Noun',
|
76
|
-
'United' => 'Proper-Noun',
|
77
|
-
'American' => 'Proper-Noun',
|
78
|
-
'the' => 'Determiner',
|
79
|
-
'a' => 'Determiner',
|
80
|
-
'an' => 'Determiner',
|
81
|
-
'this' => 'Determiner',
|
82
|
-
'these' => 'Determiner',
|
83
|
-
'that' => 'Determiner',
|
84
|
-
'from' => 'Preposition',
|
85
|
-
'to' => 'Preposition',
|
86
|
-
'on' => 'Preposition',
|
87
|
-
'near' => 'Preposition',
|
88
|
-
'through' => 'Preposition'
|
89
|
-
}
|
90
|
-
|
91
|
-
# Highly simplified tokenizer implementation.
|
92
|
-
def tokenizer(aText, aGrammar)
|
93
|
-
tokens = aText.scan(/\S+/).map do |word|
|
94
|
-
term_name = L1_lexicon[word]
|
95
|
-
if term_name.nil?
|
96
|
-
raise StandardError, "Word '#{word}' not found in lexicon"
|
97
|
-
end
|
98
|
-
terminal = aGrammar.name2symbol[term_name]
|
99
|
-
Rley::Parser::Token.new(word, terminal)
|
100
|
-
end
|
101
|
-
|
102
|
-
return tokens
|
103
|
-
end
|
104
|
-
|
105
|
-
########################################
|
106
|
-
# Step 3. Tokenize the input
|
107
|
-
valid_input = 'I want the flight from Alaska through Chicago to Houston'
|
108
|
-
|
109
|
-
# Another sentence: it is a flight from Chicago
|
110
|
-
tokens = tokenizer(valid_input, grammar_l1)
|
111
|
-
|
112
|
-
########################################
|
113
|
-
# Step 4. Create a parser for that grammar
|
114
|
-
parser = Rley::Parser::EarleyParser.new(grammar_l1)
|
115
|
-
|
116
|
-
########################################
|
117
|
-
# Step 5. Let the parser process the input
|
118
|
-
result = parser.parse(tokens)
|
119
|
-
puts "Parsing success? #{result.success?}"
|
120
|
-
|
121
|
-
|
122
|
-
########################################
|
123
|
-
# Step 6. Generate a parse tree from the parse result
|
124
|
-
ptree = result.parse_tree
|
125
|
-
|
126
|
-
########################################
|
127
|
-
# Step 7. Render the parse tree (in JSON)
|
128
|
-
# Let's create a parse tree visitor
|
129
|
-
visitor = Rley::ParseTreeVisitor.new(ptree)
|
130
|
-
|
131
|
-
#Here we create a renderer object...
|
132
|
-
renderer = Rley::Formatter::Json.new(STDOUT)
|
133
|
-
|
134
|
-
# Now emit the parse tree as JSON on the console output
|
135
|
-
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
136
|
-
renderer.render(visitor)
|
137
|
-
# End of file
|