rley 0.3.08 → 0.3.09
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/README.md +190 -35
- data/examples/NLP/mini_en_demo.rb +92 -0
- data/lib/rley/constants.rb +1 -1
- metadata +3 -20
- data/examples/grammars/grammar_L0.rb +0 -32
- data/examples/grammars/grammar_abc.rb +0 -26
- data/examples/parsers/demo-JSON/JSON_grammar.rb +0 -31
- data/examples/parsers/demo-JSON/JSON_lexer.rb +0 -114
- data/examples/parsers/demo-JSON/JSON_parser.rb +0 -89
- data/examples/parsers/demo-JSON/demo_json.rb +0 -42
- data/examples/parsers/parsing_L0.rb +0 -124
- data/examples/parsers/parsing_L1.rb +0 -137
- data/examples/parsers/parsing_abc.rb +0 -71
- data/examples/parsers/parsing_ambig.rb +0 -92
- data/examples/parsers/parsing_another.rb +0 -70
- data/examples/parsers/parsing_b_expr.rb +0 -85
- data/examples/parsers/parsing_err_expr.rb +0 -74
- data/examples/parsers/parsing_groucho.rb +0 -97
- data/examples/parsers/parsing_right_recursive.rb +0 -70
- data/examples/parsers/parsing_tricky.rb +0 -91
- data/examples/parsers/tracing_parser.rb +0 -54
- data/examples/recognizers/recognizer_abc.rb +0 -71
@@ -1,26 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build a very simple grammar
|
2
|
-
require 'rley' # Load the gem
|
3
|
-
|
4
|
-
# A very simple language
|
5
|
-
# It recognizes/generates strings like 'b', 'abc', 'aabcc', 'aaabccc',...
|
6
|
-
# (based on example in N. Wirth's book "Compiler Construction", p. 6)
|
7
|
-
# S ::= A.
|
8
|
-
# A ::= "a" A "c".
|
9
|
-
# A ::= "b".
|
10
|
-
|
11
|
-
|
12
|
-
# Let's create the grammar step-by-step with the grammar builder:
|
13
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
14
|
-
builder.add_terminals('a', 'b', 'c')
|
15
|
-
builder.add_production('S' => 'A')
|
16
|
-
builder.add_production('A' => %w(a A c))
|
17
|
-
builder.add_production('A' => 'b')
|
18
|
-
|
19
|
-
# And now build the grammar...
|
20
|
-
grammar_abc = builder.grammar
|
21
|
-
|
22
|
-
# Prove that it is a grammar
|
23
|
-
puts grammar_abc.class.name
|
24
|
-
|
25
|
-
# End of file
|
26
|
-
|
@@ -1,31 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build and render a parse tree for JSON
|
2
|
-
# language
|
3
|
-
require 'rley' # Load the gem
|
4
|
-
|
5
|
-
|
6
|
-
########################################
|
7
|
-
# Define a grammar for JSON
|
8
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
9
|
-
builder.add_terminals('KEYWORD') # For true, false, null keywords
|
10
|
-
builder.add_terminals('JSON_STRING', 'JSON_NUMBER')
|
11
|
-
builder.add_terminals('LACCOL', 'RACCOL') # For '{', '}' delimiters
|
12
|
-
builder.add_terminals('LBRACKET', 'RBRACKET') # For '[', ']' delimiters
|
13
|
-
builder.add_terminals('COLON', 'COMMA') # For ':', ',' separators
|
14
|
-
builder.add_production('json_text' => 'json_value')
|
15
|
-
builder.add_production('json_value' => 'json_object')
|
16
|
-
builder.add_production('json_value' => 'json_array')
|
17
|
-
builder.add_production('json_value' => 'JSON_STRING')
|
18
|
-
builder.add_production('json_value' => 'JSON_NUMBER')
|
19
|
-
builder.add_production('json_value' => 'KEYWORD')
|
20
|
-
builder.add_production('json_object' => %w[LACCOL json_pairs RACCOL])
|
21
|
-
builder.add_production('json_object' => ['LACCOL', 'RACCOL'])
|
22
|
-
builder.add_production('json_pairs' => %w[json_pairs COMMA single_pair])
|
23
|
-
builder.add_production('json_pairs' => 'single_pair')
|
24
|
-
builder.add_production('single_pair' => %w[JSON_STRING COLON json_value])
|
25
|
-
builder.add_production('json_array' => %w[LBRACKET array_items RBRACKET])
|
26
|
-
builder.add_production('json_array' => ['RBRACKET', 'RBRACKET'])
|
27
|
-
builder.add_production('array_items' => %w[array_items COMMA json_value])
|
28
|
-
builder.add_production('array_items' => %w[json_value])
|
29
|
-
|
30
|
-
# And now build the grammar...
|
31
|
-
GrammarJSON = builder.grammar
|
@@ -1,114 +0,0 @@
|
|
1
|
-
# File: JSON_lexer.rb
|
2
|
-
# Lexer for the JSON data format
|
3
|
-
require 'rley' # Load the gem
|
4
|
-
require 'strscan'
|
5
|
-
|
6
|
-
# Lexer for JSON.
|
7
|
-
class JSONLexer
|
8
|
-
attr_reader(:scanner)
|
9
|
-
attr_reader(:lineno)
|
10
|
-
attr_reader(:line_start)
|
11
|
-
attr_reader(:name2symbol)
|
12
|
-
|
13
|
-
@@lexeme2name = {
|
14
|
-
'{' => 'LACCOL',
|
15
|
-
'}' => 'RACCOL',
|
16
|
-
'[' => 'LBRACKET',
|
17
|
-
']' => 'RBRACKET',
|
18
|
-
',' => 'COMMA',
|
19
|
-
':' => 'COLON'
|
20
|
-
}
|
21
|
-
|
22
|
-
class ScanError < StandardError ; end
|
23
|
-
|
24
|
-
public
|
25
|
-
def initialize(source, aGrammar)
|
26
|
-
@scanner = StringScanner.new(source)
|
27
|
-
@name2symbol = aGrammar.name2symbol
|
28
|
-
@lineno = 1
|
29
|
-
end
|
30
|
-
|
31
|
-
def tokens()
|
32
|
-
tok_sequence = []
|
33
|
-
until @scanner.eos? do
|
34
|
-
token = _next_token
|
35
|
-
tok_sequence << token unless token.nil?
|
36
|
-
end
|
37
|
-
|
38
|
-
return tok_sequence
|
39
|
-
end
|
40
|
-
|
41
|
-
private
|
42
|
-
def _next_token()
|
43
|
-
token = nil
|
44
|
-
skip_whitespaces
|
45
|
-
curr_ch = scanner.getch # curr_ch is at start of token or eof reached...
|
46
|
-
|
47
|
-
begin
|
48
|
-
break if curr_ch.nil?
|
49
|
-
|
50
|
-
case curr_ch
|
51
|
-
when '{', '}', '[', ']', ',', ':'
|
52
|
-
type_name = @@lexeme2name[curr_ch]
|
53
|
-
token_type = name2symbol[type_name]
|
54
|
-
token = Rley::Parser::Token.new(curr_ch, token_type)
|
55
|
-
|
56
|
-
# LITERALS
|
57
|
-
when '"' # Start string delimiter found
|
58
|
-
value = scanner.scan(/([^"\\]|\\.)*/)
|
59
|
-
end_delimiter = scanner.getch()
|
60
|
-
raise ScanError.new('No closing quotes (") found') if end_delimiter.nil?
|
61
|
-
token_type = name2symbol['JSON_STRING']
|
62
|
-
token = Rley::Parser::Token.new(value, token_type)
|
63
|
-
|
64
|
-
when /[ftn]/ # First letter of keywords
|
65
|
-
@scanner.pos = scanner.pos - 1 # Simulate putback
|
66
|
-
keyw = scanner.scan(/false|true|null/)
|
67
|
-
if keyw.nil?
|
68
|
-
invalid_keyw = scanner.scan(/\w+/)
|
69
|
-
raise ScanError.new("Invalid keyword: #{invalid_keyw}")
|
70
|
-
else
|
71
|
-
token_type = name2symbol['KEYWORD']
|
72
|
-
token = Rley::Parser::Token.new(keyw, token_type)
|
73
|
-
end
|
74
|
-
|
75
|
-
|
76
|
-
when /[-0-9]/ # Start character of number literal found
|
77
|
-
@scanner.pos = scanner.pos - 1 # Simulate putback
|
78
|
-
value = scanner.scan(/-?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/)
|
79
|
-
token_type = name2symbol['JSON_NUMBER']
|
80
|
-
token = Rley::Parser::Token.new(value, token_type)
|
81
|
-
|
82
|
-
|
83
|
-
else # Unknown token
|
84
|
-
erroneous = curr_ch.nil? ? '' : curr_ch
|
85
|
-
sequel = scanner.scan(/.{1,20}/)
|
86
|
-
erroneous += sequel unless sequel.nil?
|
87
|
-
raise ScanError.new("Unknown token #{erroneous}")
|
88
|
-
end #case
|
89
|
-
|
90
|
-
|
91
|
-
end while (token.nil? && curr_ch = scanner.getch())
|
92
|
-
|
93
|
-
return token
|
94
|
-
end
|
95
|
-
|
96
|
-
|
97
|
-
def skip_whitespaces()
|
98
|
-
matched = scanner.scan(/[ \t\f\n\r]+/)
|
99
|
-
return if matched.nil?
|
100
|
-
|
101
|
-
newline_count = 0
|
102
|
-
matched.scan(/\n\r?|\r/) { |_| newline_count += 1 }
|
103
|
-
newline_detected(newline_count)
|
104
|
-
end
|
105
|
-
|
106
|
-
|
107
|
-
def newline_detected(count)
|
108
|
-
@lineno += count
|
109
|
-
@line_start = scanner.pos()
|
110
|
-
end
|
111
|
-
|
112
|
-
end # class
|
113
|
-
|
114
|
-
|
@@ -1,89 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build and render a parse tree for JSON
|
2
|
-
# language
|
3
|
-
require 'pp'
|
4
|
-
require 'rley' # Load the gem
|
5
|
-
require_relative 'json_lexer'
|
6
|
-
|
7
|
-
# Steps to render a parse tree (of a valid parsed input):
|
8
|
-
# 1. Define a grammar
|
9
|
-
# 2. Create a parser for that grammar
|
10
|
-
# 3. Tokenize the input
|
11
|
-
# 4. Let the parser process the input
|
12
|
-
# 5. Generate a parse tree from the parse result
|
13
|
-
# 6. Render the parse tree (in JSON)
|
14
|
-
|
15
|
-
########################################
|
16
|
-
# Step 1. Load a grammar for JSON
|
17
|
-
require_relative 'JSON_grammar'
|
18
|
-
|
19
|
-
# A JSON parser derived from our general Earley parser.
|
20
|
-
class JSONParser < Rley::Parser::EarleyParser
|
21
|
-
attr_reader(:source_file)
|
22
|
-
|
23
|
-
# Constructor
|
24
|
-
def initialize()
|
25
|
-
# Builder the Earley parser with the JSON grammar
|
26
|
-
super(GrammarJSON)
|
27
|
-
end
|
28
|
-
|
29
|
-
def parse_file(aFilename)
|
30
|
-
tokens = tokenize_file(aFilename)
|
31
|
-
result = parse(tokens)
|
32
|
-
|
33
|
-
return result
|
34
|
-
end
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
def tokenize_file(aFilename)
|
39
|
-
input_source = nil
|
40
|
-
File.open(aFilename, 'r') { |f| input_source = f.read }
|
41
|
-
|
42
|
-
lexer = JSONLexer.new(input_source, GrammarJSON)
|
43
|
-
return lexer.tokens
|
44
|
-
end
|
45
|
-
end # class
|
46
|
-
|
47
|
-
=begin
|
48
|
-
########################################
|
49
|
-
# Step 3. Create a parser for that grammar
|
50
|
-
# parser = Rley::Parser::EarleyParser.new(GrammarJSON)
|
51
|
-
parser = JSONParser.new
|
52
|
-
|
53
|
-
|
54
|
-
########################################
|
55
|
-
# Step 4. Tokenize the input file
|
56
|
-
file_name = 'sample02.json'
|
57
|
-
=begin
|
58
|
-
input_source = nil
|
59
|
-
File.open(file_name, 'r') { |f| input_source = f.read }
|
60
|
-
|
61
|
-
lexer = JSONLexer.new(input_source, GrammarJSON)
|
62
|
-
tokens = lexer.tokens
|
63
|
-
#=end
|
64
|
-
|
65
|
-
########################################
|
66
|
-
# Step 5. Let the parser process the input
|
67
|
-
result = parser.parse_file(file_name) # parser.parse(tokens)
|
68
|
-
unless result.success?
|
69
|
-
puts "Parsing of '#{file_name}' failed"
|
70
|
-
exit(1)
|
71
|
-
end
|
72
|
-
|
73
|
-
########################################
|
74
|
-
# Step 6. Generate a parse tree from the parse result
|
75
|
-
ptree = result.parse_tree
|
76
|
-
|
77
|
-
########################################
|
78
|
-
# Step 7. Render the parse tree (in JSON)
|
79
|
-
# Let's create a parse tree visitor
|
80
|
-
visitor = Rley::ParseTreeVisitor.new(ptree)
|
81
|
-
|
82
|
-
#Here we create a renderer object...
|
83
|
-
renderer = Rley::Formatter::Json.new(STDOUT)
|
84
|
-
|
85
|
-
# Now emit the parse tree as JSON on the console output
|
86
|
-
puts "JSON rendering of the parse tree for '#{file_name}' input:"
|
87
|
-
renderer.render(visitor)
|
88
|
-
=end
|
89
|
-
# End of file
|
@@ -1,42 +0,0 @@
|
|
1
|
-
require_relative 'JSON_parser'
|
2
|
-
|
3
|
-
# Create a JSON parser object
|
4
|
-
parser = JSONParser.new
|
5
|
-
|
6
|
-
# Parse the input file with name given in command-line
|
7
|
-
if ARGV.empty?
|
8
|
-
msg = <<-END_MSG
|
9
|
-
Command-line symtax:
|
10
|
-
ruby #{__FILE__} filename
|
11
|
-
where:
|
12
|
-
filename is the name of a JSON file
|
13
|
-
|
14
|
-
Example:
|
15
|
-
ruby #{__FILE__} sample01.json
|
16
|
-
END_MSG
|
17
|
-
puts msg
|
18
|
-
exit(1)
|
19
|
-
end
|
20
|
-
file_name = ARGV[0]
|
21
|
-
result = parser.parse_file(file_name) # result object contains parse details
|
22
|
-
|
23
|
-
unless result.success?
|
24
|
-
# Stop if the parse failed...
|
25
|
-
puts "Parsing of '#{file_name}' failed"
|
26
|
-
exit(1)
|
27
|
-
end
|
28
|
-
|
29
|
-
# Generate a parse tree from the parse result
|
30
|
-
ptree = result.parse_tree
|
31
|
-
|
32
|
-
# Do something with the parse tree: render it on the output console.
|
33
|
-
# Step a: Let's create a parse tree visitor
|
34
|
-
visitor = Rley::ParseTreeVisitor.new(ptree)
|
35
|
-
|
36
|
-
# Step b: Select the rendition format to be JSON
|
37
|
-
renderer = Rley::Formatter::Json.new(STDOUT)
|
38
|
-
|
39
|
-
# Step c: Now emit the parse tree as JSON on the console output
|
40
|
-
puts "JSON rendering of the parse tree for '#{file_name}' input:"
|
41
|
-
renderer.render(visitor)
|
42
|
-
# End of file
|
@@ -1,124 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build and render a parse tree for the L0
|
2
|
-
# language
|
3
|
-
require 'pp'
|
4
|
-
require 'rley' # Load the gem
|
5
|
-
|
6
|
-
# Steps to render a parse tree (of a valid parsed input):
|
7
|
-
# 1. Define a grammar
|
8
|
-
# 2. Create a tokenizer for the language
|
9
|
-
# 3. Create a parser for that grammar
|
10
|
-
# 4. Tokenize the input
|
11
|
-
# 5. Let the parser process the input
|
12
|
-
# 6. Generate a parse tree from the parse result
|
13
|
-
# 7. Render the parse tree (in JSON)
|
14
|
-
|
15
|
-
########################################
|
16
|
-
# Step 1. Define a grammar for a micro English-like language
|
17
|
-
# based on Jurafky & Martin L0 language (chapter 12 of the book).
|
18
|
-
# It defines the syntax of a sentence in a language with a
|
19
|
-
# very limited syntax and lexicon in the context of airline reservation.
|
20
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
21
|
-
builder.add_terminals('Noun', 'Verb', 'Pronoun', 'Proper-Noun')
|
22
|
-
builder.add_terminals('Determiner', 'Preposition', )
|
23
|
-
builder.add_production('S' => %w[NP VP])
|
24
|
-
builder.add_production('NP' => 'Pronoun')
|
25
|
-
builder.add_production('NP' => 'Proper-Noun')
|
26
|
-
builder.add_production('NP' => %w[Determiner Nominal])
|
27
|
-
builder.add_production('Nominal' => %w[Nominal Noun])
|
28
|
-
builder.add_production('Nominal' => 'Noun')
|
29
|
-
builder.add_production('VP' => 'Verb')
|
30
|
-
builder.add_production('VP' => %w[Verb NP])
|
31
|
-
builder.add_production('VP' => %w[Verb NP PP])
|
32
|
-
builder.add_production('VP' => %w[Verb PP])
|
33
|
-
builder.add_production('PP' => %w[Preposition PP])
|
34
|
-
|
35
|
-
# And now build the grammar...
|
36
|
-
grammar_l0 = builder.grammar
|
37
|
-
|
38
|
-
|
39
|
-
########################################
|
40
|
-
# 2. Create a tokenizer for the language
|
41
|
-
# The tokenizer transforms the input into an array of tokens
|
42
|
-
# This is a very simplistic implementation for demo purposes.
|
43
|
-
|
44
|
-
# The lexicon is just a Hash with pairs of the form:
|
45
|
-
# word =>terminal symbol name
|
46
|
-
L0_lexicon = {
|
47
|
-
'flight' => 'Noun',
|
48
|
-
'breeze' => 'Noun',
|
49
|
-
'trip' => 'Noun',
|
50
|
-
'morning' => 'Noun',
|
51
|
-
'is' => 'Verb',
|
52
|
-
'prefer' => 'Verb',
|
53
|
-
'like' => 'Verb',
|
54
|
-
'need' => 'Verb',
|
55
|
-
'want' => 'Verb',
|
56
|
-
'fly' => 'Verb',
|
57
|
-
'me' => 'Pronoun',
|
58
|
-
'I' => 'Pronoun',
|
59
|
-
'you' => 'Pronoun',
|
60
|
-
'it' => 'Pronoun',
|
61
|
-
'Alaska' => 'Proper-Noun',
|
62
|
-
'Baltimore' => 'Proper-Noun',
|
63
|
-
'Chicago' => 'Proper-Noun',
|
64
|
-
'United' => 'Proper-Noun',
|
65
|
-
'American' => 'Proper-Noun',
|
66
|
-
'the' => 'Determiner',
|
67
|
-
'a' => 'Determiner',
|
68
|
-
'an' => 'Determiner',
|
69
|
-
'this' => 'Determiner',
|
70
|
-
'these' => 'Determiner',
|
71
|
-
'that' => 'Determiner',
|
72
|
-
'from' => 'Preposition',
|
73
|
-
'to' => 'Preposition',
|
74
|
-
'on' => 'Preposition',
|
75
|
-
'near' => 'Preposition'
|
76
|
-
}
|
77
|
-
|
78
|
-
# Highly simplified tokenizer implementation.
|
79
|
-
def tokenizer(aText, aGrammar)
|
80
|
-
tokens = aText.scan(/\S+/).map do |word|
|
81
|
-
term_name = L0_lexicon[word]
|
82
|
-
if term_name.nil?
|
83
|
-
raise StandardError, "Word '#{word}' not found in lexicon"
|
84
|
-
end
|
85
|
-
terminal = aGrammar.name2symbol[term_name]
|
86
|
-
Rley::Parser::Token.new(word, terminal)
|
87
|
-
end
|
88
|
-
|
89
|
-
return tokens
|
90
|
-
end
|
91
|
-
|
92
|
-
########################################
|
93
|
-
# Step 3. Create a parser for that grammar
|
94
|
-
parser = Rley::Parser::EarleyParser.new(grammar_l0)
|
95
|
-
|
96
|
-
########################################
|
97
|
-
# Step 3. Tokenize the input
|
98
|
-
valid_input = 'I prefer a morning flight'
|
99
|
-
# Another sentence: it is a flight from Chicago
|
100
|
-
tokens = tokenizer(valid_input, grammar_l0)
|
101
|
-
|
102
|
-
########################################
|
103
|
-
# Step 5. Let the parser process the input
|
104
|
-
result = parser.parse(tokens)
|
105
|
-
|
106
|
-
puts "Parsing success? #{result.success?}"
|
107
|
-
|
108
|
-
|
109
|
-
########################################
|
110
|
-
# Step 6. Generate a parse tree from the parse result
|
111
|
-
ptree = result.parse_tree
|
112
|
-
|
113
|
-
########################################
|
114
|
-
# Step 7. Render the parse tree (in JSON)
|
115
|
-
# Let's create a parse tree visitor
|
116
|
-
visitor = Rley::ParseTreeVisitor.new(ptree)
|
117
|
-
|
118
|
-
#Here we create a renderer object...
|
119
|
-
renderer = Rley::Formatter::Json.new(STDOUT)
|
120
|
-
|
121
|
-
# Now emit the parse tree as JSON on the console output
|
122
|
-
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
123
|
-
renderer.render(visitor)
|
124
|
-
# End of file
|
@@ -1,137 +0,0 @@
|
|
1
|
-
# Purpose: to demonstrate how to build and render a parse tree for the L1
|
2
|
-
# language
|
3
|
-
require 'pp'
|
4
|
-
require 'rley' # Load the gem
|
5
|
-
|
6
|
-
# Steps to render a parse tree (of a valid parsed input):
|
7
|
-
# 1. Define a grammar
|
8
|
-
# 2. Create a tokenizer for the language
|
9
|
-
# 3. Create a parser for that grammar
|
10
|
-
# 4. Tokenize the input
|
11
|
-
# 5. Let the parser process the input
|
12
|
-
# 6. Generate a parse tree from the parse result
|
13
|
-
# 7. Render the parse tree (in JSON)
|
14
|
-
|
15
|
-
########################################
|
16
|
-
# Step 1. Define a grammar for a miniature English-like language
|
17
|
-
# based on Jurafky & Martin L1 language (chapter 13).
|
18
|
-
# It defines the syntax of a sentence in a language with a
|
19
|
-
# very limited syntax and lexicon in the context of airline reservation.
|
20
|
-
builder = Rley::Syntax::GrammarBuilder.new
|
21
|
-
builder.add_terminals('Noun', 'Verb', 'Pronoun', 'Proper-Noun')
|
22
|
-
builder.add_terminals('Aux', 'Determiner', 'Preposition')
|
23
|
-
builder.add_production('Sentence' => 'S')
|
24
|
-
builder.add_production('S' => %w[Aux NP VP])
|
25
|
-
builder.add_production('S' => %w[NP VP])
|
26
|
-
builder.add_production('S' => %w[VP])
|
27
|
-
builder.add_production('NP' => 'Pronoun')
|
28
|
-
builder.add_production('NP' => 'Proper-Noun')
|
29
|
-
builder.add_production('NP' => %w[Determiner Nominal])
|
30
|
-
builder.add_production('Nominal' => %w[Nominal Noun])
|
31
|
-
builder.add_production('Nominal' => %w[Nominal PP])
|
32
|
-
builder.add_production('Nominal' => 'Noun')
|
33
|
-
builder.add_production('VP' => 'Verb')
|
34
|
-
builder.add_production('VP' => %w[Verb NP])
|
35
|
-
builder.add_production('VP' => %w[Verb NP PP])
|
36
|
-
builder.add_production('VP' => %w[Verb PP])
|
37
|
-
builder.add_production('VP' => %w[VP PP])
|
38
|
-
builder.add_production('PP' => %w[Preposition NP])
|
39
|
-
|
40
|
-
# And now build the grammar...
|
41
|
-
grammar_l1 = builder.grammar
|
42
|
-
|
43
|
-
|
44
|
-
########################################
|
45
|
-
# 2. Create a tokenizer for the language
|
46
|
-
# The tokenizer transforms the input into an array of tokens
|
47
|
-
# This is a very simplistic implementation for demo purposes.
|
48
|
-
|
49
|
-
# The lexicon is just a Hash with pairs of the form:
|
50
|
-
# word =>terminal symbol name
|
51
|
-
L1_lexicon = {
|
52
|
-
'does' => 'Aux',
|
53
|
-
'flight' => 'Noun',
|
54
|
-
'trip' => 'Noun',
|
55
|
-
'meal' => 'Noun',
|
56
|
-
'money' => 'Noun',
|
57
|
-
'morning' => 'Noun',
|
58
|
-
'is' => 'Verb',
|
59
|
-
'book' => 'Verb',
|
60
|
-
'prefer' => 'Verb',
|
61
|
-
'like' => 'Verb',
|
62
|
-
'need' => 'Verb',
|
63
|
-
'want' => 'Verb',
|
64
|
-
'fly' => 'Verb',
|
65
|
-
'show' => 'Verb',
|
66
|
-
'me' => 'Pronoun',
|
67
|
-
'I' => 'Pronoun',
|
68
|
-
'she' => 'Pronoun',
|
69
|
-
'you' => 'Pronoun',
|
70
|
-
'it' => 'Pronoun',
|
71
|
-
'Alaska' => 'Proper-Noun',
|
72
|
-
'Baltimore' => 'Proper-Noun',
|
73
|
-
'Chicago' => 'Proper-Noun',
|
74
|
-
'Houston' => 'Proper-Noun',
|
75
|
-
'NWA' => 'Proper-Noun',
|
76
|
-
'United' => 'Proper-Noun',
|
77
|
-
'American' => 'Proper-Noun',
|
78
|
-
'the' => 'Determiner',
|
79
|
-
'a' => 'Determiner',
|
80
|
-
'an' => 'Determiner',
|
81
|
-
'this' => 'Determiner',
|
82
|
-
'these' => 'Determiner',
|
83
|
-
'that' => 'Determiner',
|
84
|
-
'from' => 'Preposition',
|
85
|
-
'to' => 'Preposition',
|
86
|
-
'on' => 'Preposition',
|
87
|
-
'near' => 'Preposition',
|
88
|
-
'through' => 'Preposition'
|
89
|
-
}
|
90
|
-
|
91
|
-
# Highly simplified tokenizer implementation.
|
92
|
-
def tokenizer(aText, aGrammar)
|
93
|
-
tokens = aText.scan(/\S+/).map do |word|
|
94
|
-
term_name = L1_lexicon[word]
|
95
|
-
if term_name.nil?
|
96
|
-
raise StandardError, "Word '#{word}' not found in lexicon"
|
97
|
-
end
|
98
|
-
terminal = aGrammar.name2symbol[term_name]
|
99
|
-
Rley::Parser::Token.new(word, terminal)
|
100
|
-
end
|
101
|
-
|
102
|
-
return tokens
|
103
|
-
end
|
104
|
-
|
105
|
-
########################################
|
106
|
-
# Step 3. Tokenize the input
|
107
|
-
valid_input = 'I want the flight from Alaska through Chicago to Houston'
|
108
|
-
|
109
|
-
# Another sentence: it is a flight from Chicago
|
110
|
-
tokens = tokenizer(valid_input, grammar_l1)
|
111
|
-
|
112
|
-
########################################
|
113
|
-
# Step 4. Create a parser for that grammar
|
114
|
-
parser = Rley::Parser::EarleyParser.new(grammar_l1)
|
115
|
-
|
116
|
-
########################################
|
117
|
-
# Step 5. Let the parser process the input
|
118
|
-
result = parser.parse(tokens)
|
119
|
-
puts "Parsing success? #{result.success?}"
|
120
|
-
|
121
|
-
|
122
|
-
########################################
|
123
|
-
# Step 6. Generate a parse tree from the parse result
|
124
|
-
ptree = result.parse_tree
|
125
|
-
|
126
|
-
########################################
|
127
|
-
# Step 7. Render the parse tree (in JSON)
|
128
|
-
# Let's create a parse tree visitor
|
129
|
-
visitor = Rley::ParseTreeVisitor.new(ptree)
|
130
|
-
|
131
|
-
#Here we create a renderer object...
|
132
|
-
renderer = Rley::Formatter::Json.new(STDOUT)
|
133
|
-
|
134
|
-
# Now emit the parse tree as JSON on the console output
|
135
|
-
puts "JSON rendering of the parse tree for '#{valid_input}' input:"
|
136
|
-
renderer.render(visitor)
|
137
|
-
# End of file
|