rley 0.3.08 → 0.3.09

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,26 +0,0 @@
1
- # Purpose: to demonstrate how to build a very simple grammar
2
- require 'rley' # Load the gem
3
-
4
- # A very simple language
5
- # It recognizes/generates strings like 'b', 'abc', 'aabcc', 'aaabccc',...
6
- # (based on example in N. Wirth's book "Compiler Construction", p. 6)
7
- # S ::= A.
8
- # A ::= "a" A "c".
9
- # A ::= "b".
10
-
11
-
12
- # Let's create the grammar step-by-step with the grammar builder:
13
- builder = Rley::Syntax::GrammarBuilder.new
14
- builder.add_terminals('a', 'b', 'c')
15
- builder.add_production('S' => 'A')
16
- builder.add_production('A' => %w(a A c))
17
- builder.add_production('A' => 'b')
18
-
19
- # And now build the grammar...
20
- grammar_abc = builder.grammar
21
-
22
- # Prove that it is a grammar
23
- puts grammar_abc.class.name
24
-
25
- # End of file
26
-
@@ -1,31 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree for JSON
2
- # language
3
- require 'rley' # Load the gem
4
-
5
-
6
- ########################################
7
- # Define a grammar for JSON
8
- builder = Rley::Syntax::GrammarBuilder.new
9
- builder.add_terminals('KEYWORD') # For true, false, null keywords
10
- builder.add_terminals('JSON_STRING', 'JSON_NUMBER')
11
- builder.add_terminals('LACCOL', 'RACCOL') # For '{', '}' delimiters
12
- builder.add_terminals('LBRACKET', 'RBRACKET') # For '[', ']' delimiters
13
- builder.add_terminals('COLON', 'COMMA') # For ':', ',' separators
14
- builder.add_production('json_text' => 'json_value')
15
- builder.add_production('json_value' => 'json_object')
16
- builder.add_production('json_value' => 'json_array')
17
- builder.add_production('json_value' => 'JSON_STRING')
18
- builder.add_production('json_value' => 'JSON_NUMBER')
19
- builder.add_production('json_value' => 'KEYWORD')
20
- builder.add_production('json_object' => %w[LACCOL json_pairs RACCOL])
21
- builder.add_production('json_object' => ['LACCOL', 'RACCOL'])
22
- builder.add_production('json_pairs' => %w[json_pairs COMMA single_pair])
23
- builder.add_production('json_pairs' => 'single_pair')
24
- builder.add_production('single_pair' => %w[JSON_STRING COLON json_value])
25
- builder.add_production('json_array' => %w[LBRACKET array_items RBRACKET])
26
- builder.add_production('json_array' => ['RBRACKET', 'RBRACKET'])
27
- builder.add_production('array_items' => %w[array_items COMMA json_value])
28
- builder.add_production('array_items' => %w[json_value])
29
-
30
- # And now build the grammar...
31
- GrammarJSON = builder.grammar
@@ -1,114 +0,0 @@
1
- # File: JSON_lexer.rb
2
- # Lexer for the JSON data format
3
- require 'rley' # Load the gem
4
- require 'strscan'
5
-
6
- # Lexer for JSON.
7
- class JSONLexer
8
- attr_reader(:scanner)
9
- attr_reader(:lineno)
10
- attr_reader(:line_start)
11
- attr_reader(:name2symbol)
12
-
13
- @@lexeme2name = {
14
- '{' => 'LACCOL',
15
- '}' => 'RACCOL',
16
- '[' => 'LBRACKET',
17
- ']' => 'RBRACKET',
18
- ',' => 'COMMA',
19
- ':' => 'COLON'
20
- }
21
-
22
- class ScanError < StandardError ; end
23
-
24
- public
25
- def initialize(source, aGrammar)
26
- @scanner = StringScanner.new(source)
27
- @name2symbol = aGrammar.name2symbol
28
- @lineno = 1
29
- end
30
-
31
- def tokens()
32
- tok_sequence = []
33
- until @scanner.eos? do
34
- token = _next_token
35
- tok_sequence << token unless token.nil?
36
- end
37
-
38
- return tok_sequence
39
- end
40
-
41
- private
42
- def _next_token()
43
- token = nil
44
- skip_whitespaces
45
- curr_ch = scanner.getch # curr_ch is at start of token or eof reached...
46
-
47
- begin
48
- break if curr_ch.nil?
49
-
50
- case curr_ch
51
- when '{', '}', '[', ']', ',', ':'
52
- type_name = @@lexeme2name[curr_ch]
53
- token_type = name2symbol[type_name]
54
- token = Rley::Parser::Token.new(curr_ch, token_type)
55
-
56
- # LITERALS
57
- when '"' # Start string delimiter found
58
- value = scanner.scan(/([^"\\]|\\.)*/)
59
- end_delimiter = scanner.getch()
60
- raise ScanError.new('No closing quotes (") found') if end_delimiter.nil?
61
- token_type = name2symbol['JSON_STRING']
62
- token = Rley::Parser::Token.new(value, token_type)
63
-
64
- when /[ftn]/ # First letter of keywords
65
- @scanner.pos = scanner.pos - 1 # Simulate putback
66
- keyw = scanner.scan(/false|true|null/)
67
- if keyw.nil?
68
- invalid_keyw = scanner.scan(/\w+/)
69
- raise ScanError.new("Invalid keyword: #{invalid_keyw}")
70
- else
71
- token_type = name2symbol['KEYWORD']
72
- token = Rley::Parser::Token.new(keyw, token_type)
73
- end
74
-
75
-
76
- when /[-0-9]/ # Start character of number literal found
77
- @scanner.pos = scanner.pos - 1 # Simulate putback
78
- value = scanner.scan(/-?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/)
79
- token_type = name2symbol['JSON_NUMBER']
80
- token = Rley::Parser::Token.new(value, token_type)
81
-
82
-
83
- else # Unknown token
84
- erroneous = curr_ch.nil? ? '' : curr_ch
85
- sequel = scanner.scan(/.{1,20}/)
86
- erroneous += sequel unless sequel.nil?
87
- raise ScanError.new("Unknown token #{erroneous}")
88
- end #case
89
-
90
-
91
- end while (token.nil? && curr_ch = scanner.getch())
92
-
93
- return token
94
- end
95
-
96
-
97
- def skip_whitespaces()
98
- matched = scanner.scan(/[ \t\f\n\r]+/)
99
- return if matched.nil?
100
-
101
- newline_count = 0
102
- matched.scan(/\n\r?|\r/) { |_| newline_count += 1 }
103
- newline_detected(newline_count)
104
- end
105
-
106
-
107
- def newline_detected(count)
108
- @lineno += count
109
- @line_start = scanner.pos()
110
- end
111
-
112
- end # class
113
-
114
-
@@ -1,89 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree for JSON
2
- # language
3
- require 'pp'
4
- require 'rley' # Load the gem
5
- require_relative 'json_lexer'
6
-
7
- # Steps to render a parse tree (of a valid parsed input):
8
- # 1. Define a grammar
9
- # 2. Create a parser for that grammar
10
- # 3. Tokenize the input
11
- # 4. Let the parser process the input
12
- # 5. Generate a parse tree from the parse result
13
- # 6. Render the parse tree (in JSON)
14
-
15
- ########################################
16
- # Step 1. Load a grammar for JSON
17
- require_relative 'JSON_grammar'
18
-
19
- # A JSON parser derived from our general Earley parser.
20
- class JSONParser < Rley::Parser::EarleyParser
21
- attr_reader(:source_file)
22
-
23
- # Constructor
24
- def initialize()
25
- # Builder the Earley parser with the JSON grammar
26
- super(GrammarJSON)
27
- end
28
-
29
- def parse_file(aFilename)
30
- tokens = tokenize_file(aFilename)
31
- result = parse(tokens)
32
-
33
- return result
34
- end
35
-
36
- private
37
-
38
- def tokenize_file(aFilename)
39
- input_source = nil
40
- File.open(aFilename, 'r') { |f| input_source = f.read }
41
-
42
- lexer = JSONLexer.new(input_source, GrammarJSON)
43
- return lexer.tokens
44
- end
45
- end # class
46
-
47
- =begin
48
- ########################################
49
- # Step 3. Create a parser for that grammar
50
- # parser = Rley::Parser::EarleyParser.new(GrammarJSON)
51
- parser = JSONParser.new
52
-
53
-
54
- ########################################
55
- # Step 4. Tokenize the input file
56
- file_name = 'sample02.json'
57
- =begin
58
- input_source = nil
59
- File.open(file_name, 'r') { |f| input_source = f.read }
60
-
61
- lexer = JSONLexer.new(input_source, GrammarJSON)
62
- tokens = lexer.tokens
63
- #=end
64
-
65
- ########################################
66
- # Step 5. Let the parser process the input
67
- result = parser.parse_file(file_name) # parser.parse(tokens)
68
- unless result.success?
69
- puts "Parsing of '#{file_name}' failed"
70
- exit(1)
71
- end
72
-
73
- ########################################
74
- # Step 6. Generate a parse tree from the parse result
75
- ptree = result.parse_tree
76
-
77
- ########################################
78
- # Step 7. Render the parse tree (in JSON)
79
- # Let's create a parse tree visitor
80
- visitor = Rley::ParseTreeVisitor.new(ptree)
81
-
82
- #Here we create a renderer object...
83
- renderer = Rley::Formatter::Json.new(STDOUT)
84
-
85
- # Now emit the parse tree as JSON on the console output
86
- puts "JSON rendering of the parse tree for '#{file_name}' input:"
87
- renderer.render(visitor)
88
- =end
89
- # End of file
@@ -1,42 +0,0 @@
1
- require_relative 'JSON_parser'
2
-
3
- # Create a JSON parser object
4
- parser = JSONParser.new
5
-
6
- # Parse the input file with name given in command-line
7
- if ARGV.empty?
8
- msg = <<-END_MSG
9
- Command-line symtax:
10
- ruby #{__FILE__} filename
11
- where:
12
- filename is the name of a JSON file
13
-
14
- Example:
15
- ruby #{__FILE__} sample01.json
16
- END_MSG
17
- puts msg
18
- exit(1)
19
- end
20
- file_name = ARGV[0]
21
- result = parser.parse_file(file_name) # result object contains parse details
22
-
23
- unless result.success?
24
- # Stop if the parse failed...
25
- puts "Parsing of '#{file_name}' failed"
26
- exit(1)
27
- end
28
-
29
- # Generate a parse tree from the parse result
30
- ptree = result.parse_tree
31
-
32
- # Do something with the parse tree: render it on the output console.
33
- # Step a: Let's create a parse tree visitor
34
- visitor = Rley::ParseTreeVisitor.new(ptree)
35
-
36
- # Step b: Select the rendition format to be JSON
37
- renderer = Rley::Formatter::Json.new(STDOUT)
38
-
39
- # Step c: Now emit the parse tree as JSON on the console output
40
- puts "JSON rendering of the parse tree for '#{file_name}' input:"
41
- renderer.render(visitor)
42
- # End of file
@@ -1,124 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree for the L0
2
- # language
3
- require 'pp'
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the input
12
- # 6. Generate a parse tree from the parse result
13
- # 7. Render the parse tree (in JSON)
14
-
15
- ########################################
16
- # Step 1. Define a grammar for a micro English-like language
17
- # based on Jurafky & Martin L0 language (chapter 12 of the book).
18
- # It defines the syntax of a sentence in a language with a
19
- # very limited syntax and lexicon in the context of airline reservation.
20
- builder = Rley::Syntax::GrammarBuilder.new
21
- builder.add_terminals('Noun', 'Verb', 'Pronoun', 'Proper-Noun')
22
- builder.add_terminals('Determiner', 'Preposition', )
23
- builder.add_production('S' => %w[NP VP])
24
- builder.add_production('NP' => 'Pronoun')
25
- builder.add_production('NP' => 'Proper-Noun')
26
- builder.add_production('NP' => %w[Determiner Nominal])
27
- builder.add_production('Nominal' => %w[Nominal Noun])
28
- builder.add_production('Nominal' => 'Noun')
29
- builder.add_production('VP' => 'Verb')
30
- builder.add_production('VP' => %w[Verb NP])
31
- builder.add_production('VP' => %w[Verb NP PP])
32
- builder.add_production('VP' => %w[Verb PP])
33
- builder.add_production('PP' => %w[Preposition PP])
34
-
35
- # And now build the grammar...
36
- grammar_l0 = builder.grammar
37
-
38
-
39
- ########################################
40
- # 2. Create a tokenizer for the language
41
- # The tokenizer transforms the input into an array of tokens
42
- # This is a very simplistic implementation for demo purposes.
43
-
44
- # The lexicon is just a Hash with pairs of the form:
45
- # word =>terminal symbol name
46
- L0_lexicon = {
47
- 'flight' => 'Noun',
48
- 'breeze' => 'Noun',
49
- 'trip' => 'Noun',
50
- 'morning' => 'Noun',
51
- 'is' => 'Verb',
52
- 'prefer' => 'Verb',
53
- 'like' => 'Verb',
54
- 'need' => 'Verb',
55
- 'want' => 'Verb',
56
- 'fly' => 'Verb',
57
- 'me' => 'Pronoun',
58
- 'I' => 'Pronoun',
59
- 'you' => 'Pronoun',
60
- 'it' => 'Pronoun',
61
- 'Alaska' => 'Proper-Noun',
62
- 'Baltimore' => 'Proper-Noun',
63
- 'Chicago' => 'Proper-Noun',
64
- 'United' => 'Proper-Noun',
65
- 'American' => 'Proper-Noun',
66
- 'the' => 'Determiner',
67
- 'a' => 'Determiner',
68
- 'an' => 'Determiner',
69
- 'this' => 'Determiner',
70
- 'these' => 'Determiner',
71
- 'that' => 'Determiner',
72
- 'from' => 'Preposition',
73
- 'to' => 'Preposition',
74
- 'on' => 'Preposition',
75
- 'near' => 'Preposition'
76
- }
77
-
78
- # Highly simplified tokenizer implementation.
79
- def tokenizer(aText, aGrammar)
80
- tokens = aText.scan(/\S+/).map do |word|
81
- term_name = L0_lexicon[word]
82
- if term_name.nil?
83
- raise StandardError, "Word '#{word}' not found in lexicon"
84
- end
85
- terminal = aGrammar.name2symbol[term_name]
86
- Rley::Parser::Token.new(word, terminal)
87
- end
88
-
89
- return tokens
90
- end
91
-
92
- ########################################
93
- # Step 3. Create a parser for that grammar
94
- parser = Rley::Parser::EarleyParser.new(grammar_l0)
95
-
96
- ########################################
97
- # Step 3. Tokenize the input
98
- valid_input = 'I prefer a morning flight'
99
- # Another sentence: it is a flight from Chicago
100
- tokens = tokenizer(valid_input, grammar_l0)
101
-
102
- ########################################
103
- # Step 5. Let the parser process the input
104
- result = parser.parse(tokens)
105
-
106
- puts "Parsing success? #{result.success?}"
107
-
108
-
109
- ########################################
110
- # Step 6. Generate a parse tree from the parse result
111
- ptree = result.parse_tree
112
-
113
- ########################################
114
- # Step 7. Render the parse tree (in JSON)
115
- # Let's create a parse tree visitor
116
- visitor = Rley::ParseTreeVisitor.new(ptree)
117
-
118
- #Here we create a renderer object...
119
- renderer = Rley::Formatter::Json.new(STDOUT)
120
-
121
- # Now emit the parse tree as JSON on the console output
122
- puts "JSON rendering of the parse tree for '#{valid_input}' input:"
123
- renderer.render(visitor)
124
- # End of file
@@ -1,137 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree for the L1
2
- # language
3
- require 'pp'
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the input
12
- # 6. Generate a parse tree from the parse result
13
- # 7. Render the parse tree (in JSON)
14
-
15
- ########################################
16
- # Step 1. Define a grammar for a miniature English-like language
17
- # based on Jurafky & Martin L1 language (chapter 13).
18
- # It defines the syntax of a sentence in a language with a
19
- # very limited syntax and lexicon in the context of airline reservation.
20
- builder = Rley::Syntax::GrammarBuilder.new
21
- builder.add_terminals('Noun', 'Verb', 'Pronoun', 'Proper-Noun')
22
- builder.add_terminals('Aux', 'Determiner', 'Preposition')
23
- builder.add_production('Sentence' => 'S')
24
- builder.add_production('S' => %w[Aux NP VP])
25
- builder.add_production('S' => %w[NP VP])
26
- builder.add_production('S' => %w[VP])
27
- builder.add_production('NP' => 'Pronoun')
28
- builder.add_production('NP' => 'Proper-Noun')
29
- builder.add_production('NP' => %w[Determiner Nominal])
30
- builder.add_production('Nominal' => %w[Nominal Noun])
31
- builder.add_production('Nominal' => %w[Nominal PP])
32
- builder.add_production('Nominal' => 'Noun')
33
- builder.add_production('VP' => 'Verb')
34
- builder.add_production('VP' => %w[Verb NP])
35
- builder.add_production('VP' => %w[Verb NP PP])
36
- builder.add_production('VP' => %w[Verb PP])
37
- builder.add_production('VP' => %w[VP PP])
38
- builder.add_production('PP' => %w[Preposition NP])
39
-
40
- # And now build the grammar...
41
- grammar_l1 = builder.grammar
42
-
43
-
44
- ########################################
45
- # 2. Create a tokenizer for the language
46
- # The tokenizer transforms the input into an array of tokens
47
- # This is a very simplistic implementation for demo purposes.
48
-
49
- # The lexicon is just a Hash with pairs of the form:
50
- # word =>terminal symbol name
51
- L1_lexicon = {
52
- 'does' => 'Aux',
53
- 'flight' => 'Noun',
54
- 'trip' => 'Noun',
55
- 'meal' => 'Noun',
56
- 'money' => 'Noun',
57
- 'morning' => 'Noun',
58
- 'is' => 'Verb',
59
- 'book' => 'Verb',
60
- 'prefer' => 'Verb',
61
- 'like' => 'Verb',
62
- 'need' => 'Verb',
63
- 'want' => 'Verb',
64
- 'fly' => 'Verb',
65
- 'show' => 'Verb',
66
- 'me' => 'Pronoun',
67
- 'I' => 'Pronoun',
68
- 'she' => 'Pronoun',
69
- 'you' => 'Pronoun',
70
- 'it' => 'Pronoun',
71
- 'Alaska' => 'Proper-Noun',
72
- 'Baltimore' => 'Proper-Noun',
73
- 'Chicago' => 'Proper-Noun',
74
- 'Houston' => 'Proper-Noun',
75
- 'NWA' => 'Proper-Noun',
76
- 'United' => 'Proper-Noun',
77
- 'American' => 'Proper-Noun',
78
- 'the' => 'Determiner',
79
- 'a' => 'Determiner',
80
- 'an' => 'Determiner',
81
- 'this' => 'Determiner',
82
- 'these' => 'Determiner',
83
- 'that' => 'Determiner',
84
- 'from' => 'Preposition',
85
- 'to' => 'Preposition',
86
- 'on' => 'Preposition',
87
- 'near' => 'Preposition',
88
- 'through' => 'Preposition'
89
- }
90
-
91
- # Highly simplified tokenizer implementation.
92
- def tokenizer(aText, aGrammar)
93
- tokens = aText.scan(/\S+/).map do |word|
94
- term_name = L1_lexicon[word]
95
- if term_name.nil?
96
- raise StandardError, "Word '#{word}' not found in lexicon"
97
- end
98
- terminal = aGrammar.name2symbol[term_name]
99
- Rley::Parser::Token.new(word, terminal)
100
- end
101
-
102
- return tokens
103
- end
104
-
105
- ########################################
106
- # Step 3. Tokenize the input
107
- valid_input = 'I want the flight from Alaska through Chicago to Houston'
108
-
109
- # Another sentence: it is a flight from Chicago
110
- tokens = tokenizer(valid_input, grammar_l1)
111
-
112
- ########################################
113
- # Step 4. Create a parser for that grammar
114
- parser = Rley::Parser::EarleyParser.new(grammar_l1)
115
-
116
- ########################################
117
- # Step 5. Let the parser process the input
118
- result = parser.parse(tokens)
119
- puts "Parsing success? #{result.success?}"
120
-
121
-
122
- ########################################
123
- # Step 6. Generate a parse tree from the parse result
124
- ptree = result.parse_tree
125
-
126
- ########################################
127
- # Step 7. Render the parse tree (in JSON)
128
- # Let's create a parse tree visitor
129
- visitor = Rley::ParseTreeVisitor.new(ptree)
130
-
131
- #Here we create a renderer object...
132
- renderer = Rley::Formatter::Json.new(STDOUT)
133
-
134
- # Now emit the parse tree as JSON on the console output
135
- puts "JSON rendering of the parse tree for '#{valid_input}' input:"
136
- renderer.render(visitor)
137
- # End of file