rley 0.3.08 → 0.3.09

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,26 +0,0 @@
1
- # Purpose: to demonstrate how to build a very simple grammar
2
- require 'rley' # Load the gem
3
-
4
- # A very simple language
5
- # It recognizes/generates strings like 'b', 'abc', 'aabcc', 'aaabccc',...
6
- # (based on example in N. Wirth's book "Compiler Construction", p. 6)
7
- # S ::= A.
8
- # A ::= "a" A "c".
9
- # A ::= "b".
10
-
11
-
12
- # Let's create the grammar step-by-step with the grammar builder:
13
- builder = Rley::Syntax::GrammarBuilder.new
14
- builder.add_terminals('a', 'b', 'c')
15
- builder.add_production('S' => 'A')
16
- builder.add_production('A' => %w(a A c))
17
- builder.add_production('A' => 'b')
18
-
19
- # And now build the grammar...
20
- grammar_abc = builder.grammar
21
-
22
- # Prove that it is a grammar
23
- puts grammar_abc.class.name
24
-
25
- # End of file
26
-
@@ -1,31 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree for JSON
2
- # language
3
- require 'rley' # Load the gem
4
-
5
-
6
- ########################################
7
- # Define a grammar for JSON
8
- builder = Rley::Syntax::GrammarBuilder.new
9
- builder.add_terminals('KEYWORD') # For true, false, null keywords
10
- builder.add_terminals('JSON_STRING', 'JSON_NUMBER')
11
- builder.add_terminals('LACCOL', 'RACCOL') # For '{', '}' delimiters
12
- builder.add_terminals('LBRACKET', 'RBRACKET') # For '[', ']' delimiters
13
- builder.add_terminals('COLON', 'COMMA') # For ':', ',' separators
14
- builder.add_production('json_text' => 'json_value')
15
- builder.add_production('json_value' => 'json_object')
16
- builder.add_production('json_value' => 'json_array')
17
- builder.add_production('json_value' => 'JSON_STRING')
18
- builder.add_production('json_value' => 'JSON_NUMBER')
19
- builder.add_production('json_value' => 'KEYWORD')
20
- builder.add_production('json_object' => %w[LACCOL json_pairs RACCOL])
21
- builder.add_production('json_object' => ['LACCOL', 'RACCOL'])
22
- builder.add_production('json_pairs' => %w[json_pairs COMMA single_pair])
23
- builder.add_production('json_pairs' => 'single_pair')
24
- builder.add_production('single_pair' => %w[JSON_STRING COLON json_value])
25
- builder.add_production('json_array' => %w[LBRACKET array_items RBRACKET])
26
- builder.add_production('json_array' => ['RBRACKET', 'RBRACKET'])
27
- builder.add_production('array_items' => %w[array_items COMMA json_value])
28
- builder.add_production('array_items' => %w[json_value])
29
-
30
- # And now build the grammar...
31
- GrammarJSON = builder.grammar
@@ -1,114 +0,0 @@
1
- # File: JSON_lexer.rb
2
- # Lexer for the JSON data format
3
- require 'rley' # Load the gem
4
- require 'strscan'
5
-
6
- # Lexer for JSON.
7
- class JSONLexer
8
- attr_reader(:scanner)
9
- attr_reader(:lineno)
10
- attr_reader(:line_start)
11
- attr_reader(:name2symbol)
12
-
13
- @@lexeme2name = {
14
- '{' => 'LACCOL',
15
- '}' => 'RACCOL',
16
- '[' => 'LBRACKET',
17
- ']' => 'RBRACKET',
18
- ',' => 'COMMA',
19
- ':' => 'COLON'
20
- }
21
-
22
- class ScanError < StandardError ; end
23
-
24
- public
25
- def initialize(source, aGrammar)
26
- @scanner = StringScanner.new(source)
27
- @name2symbol = aGrammar.name2symbol
28
- @lineno = 1
29
- end
30
-
31
- def tokens()
32
- tok_sequence = []
33
- until @scanner.eos? do
34
- token = _next_token
35
- tok_sequence << token unless token.nil?
36
- end
37
-
38
- return tok_sequence
39
- end
40
-
41
- private
42
- def _next_token()
43
- token = nil
44
- skip_whitespaces
45
- curr_ch = scanner.getch # curr_ch is at start of token or eof reached...
46
-
47
- begin
48
- break if curr_ch.nil?
49
-
50
- case curr_ch
51
- when '{', '}', '[', ']', ',', ':'
52
- type_name = @@lexeme2name[curr_ch]
53
- token_type = name2symbol[type_name]
54
- token = Rley::Parser::Token.new(curr_ch, token_type)
55
-
56
- # LITERALS
57
- when '"' # Start string delimiter found
58
- value = scanner.scan(/([^"\\]|\\.)*/)
59
- end_delimiter = scanner.getch()
60
- raise ScanError.new('No closing quotes (") found') if end_delimiter.nil?
61
- token_type = name2symbol['JSON_STRING']
62
- token = Rley::Parser::Token.new(value, token_type)
63
-
64
- when /[ftn]/ # First letter of keywords
65
- @scanner.pos = scanner.pos - 1 # Simulate putback
66
- keyw = scanner.scan(/false|true|null/)
67
- if keyw.nil?
68
- invalid_keyw = scanner.scan(/\w+/)
69
- raise ScanError.new("Invalid keyword: #{invalid_keyw}")
70
- else
71
- token_type = name2symbol['KEYWORD']
72
- token = Rley::Parser::Token.new(keyw, token_type)
73
- end
74
-
75
-
76
- when /[-0-9]/ # Start character of number literal found
77
- @scanner.pos = scanner.pos - 1 # Simulate putback
78
- value = scanner.scan(/-?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/)
79
- token_type = name2symbol['JSON_NUMBER']
80
- token = Rley::Parser::Token.new(value, token_type)
81
-
82
-
83
- else # Unknown token
84
- erroneous = curr_ch.nil? ? '' : curr_ch
85
- sequel = scanner.scan(/.{1,20}/)
86
- erroneous += sequel unless sequel.nil?
87
- raise ScanError.new("Unknown token #{erroneous}")
88
- end #case
89
-
90
-
91
- end while (token.nil? && curr_ch = scanner.getch())
92
-
93
- return token
94
- end
95
-
96
-
97
- def skip_whitespaces()
98
- matched = scanner.scan(/[ \t\f\n\r]+/)
99
- return if matched.nil?
100
-
101
- newline_count = 0
102
- matched.scan(/\n\r?|\r/) { |_| newline_count += 1 }
103
- newline_detected(newline_count)
104
- end
105
-
106
-
107
- def newline_detected(count)
108
- @lineno += count
109
- @line_start = scanner.pos()
110
- end
111
-
112
- end # class
113
-
114
-
@@ -1,89 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree for JSON
2
- # language
3
- require 'pp'
4
- require 'rley' # Load the gem
5
- require_relative 'json_lexer'
6
-
7
- # Steps to render a parse tree (of a valid parsed input):
8
- # 1. Define a grammar
9
- # 2. Create a parser for that grammar
10
- # 3. Tokenize the input
11
- # 4. Let the parser process the input
12
- # 5. Generate a parse tree from the parse result
13
- # 6. Render the parse tree (in JSON)
14
-
15
- ########################################
16
- # Step 1. Load a grammar for JSON
17
- require_relative 'JSON_grammar'
18
-
19
- # A JSON parser derived from our general Earley parser.
20
- class JSONParser < Rley::Parser::EarleyParser
21
- attr_reader(:source_file)
22
-
23
- # Constructor
24
- def initialize()
25
- # Builder the Earley parser with the JSON grammar
26
- super(GrammarJSON)
27
- end
28
-
29
- def parse_file(aFilename)
30
- tokens = tokenize_file(aFilename)
31
- result = parse(tokens)
32
-
33
- return result
34
- end
35
-
36
- private
37
-
38
- def tokenize_file(aFilename)
39
- input_source = nil
40
- File.open(aFilename, 'r') { |f| input_source = f.read }
41
-
42
- lexer = JSONLexer.new(input_source, GrammarJSON)
43
- return lexer.tokens
44
- end
45
- end # class
46
-
47
- =begin
48
- ########################################
49
- # Step 3. Create a parser for that grammar
50
- # parser = Rley::Parser::EarleyParser.new(GrammarJSON)
51
- parser = JSONParser.new
52
-
53
-
54
- ########################################
55
- # Step 4. Tokenize the input file
56
- file_name = 'sample02.json'
57
- =begin
58
- input_source = nil
59
- File.open(file_name, 'r') { |f| input_source = f.read }
60
-
61
- lexer = JSONLexer.new(input_source, GrammarJSON)
62
- tokens = lexer.tokens
63
- #=end
64
-
65
- ########################################
66
- # Step 5. Let the parser process the input
67
- result = parser.parse_file(file_name) # parser.parse(tokens)
68
- unless result.success?
69
- puts "Parsing of '#{file_name}' failed"
70
- exit(1)
71
- end
72
-
73
- ########################################
74
- # Step 6. Generate a parse tree from the parse result
75
- ptree = result.parse_tree
76
-
77
- ########################################
78
- # Step 7. Render the parse tree (in JSON)
79
- # Let's create a parse tree visitor
80
- visitor = Rley::ParseTreeVisitor.new(ptree)
81
-
82
- #Here we create a renderer object...
83
- renderer = Rley::Formatter::Json.new(STDOUT)
84
-
85
- # Now emit the parse tree as JSON on the console output
86
- puts "JSON rendering of the parse tree for '#{file_name}' input:"
87
- renderer.render(visitor)
88
- =end
89
- # End of file
@@ -1,42 +0,0 @@
1
- require_relative 'JSON_parser'
2
-
3
- # Create a JSON parser object
4
- parser = JSONParser.new
5
-
6
- # Parse the input file with name given in command-line
7
- if ARGV.empty?
8
- msg = <<-END_MSG
9
- Command-line symtax:
10
- ruby #{__FILE__} filename
11
- where:
12
- filename is the name of a JSON file
13
-
14
- Example:
15
- ruby #{__FILE__} sample01.json
16
- END_MSG
17
- puts msg
18
- exit(1)
19
- end
20
- file_name = ARGV[0]
21
- result = parser.parse_file(file_name) # result object contains parse details
22
-
23
- unless result.success?
24
- # Stop if the parse failed...
25
- puts "Parsing of '#{file_name}' failed"
26
- exit(1)
27
- end
28
-
29
- # Generate a parse tree from the parse result
30
- ptree = result.parse_tree
31
-
32
- # Do something with the parse tree: render it on the output console.
33
- # Step a: Let's create a parse tree visitor
34
- visitor = Rley::ParseTreeVisitor.new(ptree)
35
-
36
- # Step b: Select the rendition format to be JSON
37
- renderer = Rley::Formatter::Json.new(STDOUT)
38
-
39
- # Step c: Now emit the parse tree as JSON on the console output
40
- puts "JSON rendering of the parse tree for '#{file_name}' input:"
41
- renderer.render(visitor)
42
- # End of file
@@ -1,124 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree for the L0
2
- # language
3
- require 'pp'
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the input
12
- # 6. Generate a parse tree from the parse result
13
- # 7. Render the parse tree (in JSON)
14
-
15
- ########################################
16
- # Step 1. Define a grammar for a micro English-like language
17
- # based on Jurafky & Martin L0 language (chapter 12 of the book).
18
- # It defines the syntax of a sentence in a language with a
19
- # very limited syntax and lexicon in the context of airline reservation.
20
- builder = Rley::Syntax::GrammarBuilder.new
21
- builder.add_terminals('Noun', 'Verb', 'Pronoun', 'Proper-Noun')
22
- builder.add_terminals('Determiner', 'Preposition', )
23
- builder.add_production('S' => %w[NP VP])
24
- builder.add_production('NP' => 'Pronoun')
25
- builder.add_production('NP' => 'Proper-Noun')
26
- builder.add_production('NP' => %w[Determiner Nominal])
27
- builder.add_production('Nominal' => %w[Nominal Noun])
28
- builder.add_production('Nominal' => 'Noun')
29
- builder.add_production('VP' => 'Verb')
30
- builder.add_production('VP' => %w[Verb NP])
31
- builder.add_production('VP' => %w[Verb NP PP])
32
- builder.add_production('VP' => %w[Verb PP])
33
- builder.add_production('PP' => %w[Preposition PP])
34
-
35
- # And now build the grammar...
36
- grammar_l0 = builder.grammar
37
-
38
-
39
- ########################################
40
- # 2. Create a tokenizer for the language
41
- # The tokenizer transforms the input into an array of tokens
42
- # This is a very simplistic implementation for demo purposes.
43
-
44
- # The lexicon is just a Hash with pairs of the form:
45
- # word =>terminal symbol name
46
- L0_lexicon = {
47
- 'flight' => 'Noun',
48
- 'breeze' => 'Noun',
49
- 'trip' => 'Noun',
50
- 'morning' => 'Noun',
51
- 'is' => 'Verb',
52
- 'prefer' => 'Verb',
53
- 'like' => 'Verb',
54
- 'need' => 'Verb',
55
- 'want' => 'Verb',
56
- 'fly' => 'Verb',
57
- 'me' => 'Pronoun',
58
- 'I' => 'Pronoun',
59
- 'you' => 'Pronoun',
60
- 'it' => 'Pronoun',
61
- 'Alaska' => 'Proper-Noun',
62
- 'Baltimore' => 'Proper-Noun',
63
- 'Chicago' => 'Proper-Noun',
64
- 'United' => 'Proper-Noun',
65
- 'American' => 'Proper-Noun',
66
- 'the' => 'Determiner',
67
- 'a' => 'Determiner',
68
- 'an' => 'Determiner',
69
- 'this' => 'Determiner',
70
- 'these' => 'Determiner',
71
- 'that' => 'Determiner',
72
- 'from' => 'Preposition',
73
- 'to' => 'Preposition',
74
- 'on' => 'Preposition',
75
- 'near' => 'Preposition'
76
- }
77
-
78
- # Highly simplified tokenizer implementation.
79
- def tokenizer(aText, aGrammar)
80
- tokens = aText.scan(/\S+/).map do |word|
81
- term_name = L0_lexicon[word]
82
- if term_name.nil?
83
- raise StandardError, "Word '#{word}' not found in lexicon"
84
- end
85
- terminal = aGrammar.name2symbol[term_name]
86
- Rley::Parser::Token.new(word, terminal)
87
- end
88
-
89
- return tokens
90
- end
91
-
92
- ########################################
93
- # Step 3. Create a parser for that grammar
94
- parser = Rley::Parser::EarleyParser.new(grammar_l0)
95
-
96
- ########################################
97
- # Step 3. Tokenize the input
98
- valid_input = 'I prefer a morning flight'
99
- # Another sentence: it is a flight from Chicago
100
- tokens = tokenizer(valid_input, grammar_l0)
101
-
102
- ########################################
103
- # Step 5. Let the parser process the input
104
- result = parser.parse(tokens)
105
-
106
- puts "Parsing success? #{result.success?}"
107
-
108
-
109
- ########################################
110
- # Step 6. Generate a parse tree from the parse result
111
- ptree = result.parse_tree
112
-
113
- ########################################
114
- # Step 7. Render the parse tree (in JSON)
115
- # Let's create a parse tree visitor
116
- visitor = Rley::ParseTreeVisitor.new(ptree)
117
-
118
- #Here we create a renderer object...
119
- renderer = Rley::Formatter::Json.new(STDOUT)
120
-
121
- # Now emit the parse tree as JSON on the console output
122
- puts "JSON rendering of the parse tree for '#{valid_input}' input:"
123
- renderer.render(visitor)
124
- # End of file
@@ -1,137 +0,0 @@
1
- # Purpose: to demonstrate how to build and render a parse tree for the L1
2
- # language
3
- require 'pp'
4
- require 'rley' # Load the gem
5
-
6
- # Steps to render a parse tree (of a valid parsed input):
7
- # 1. Define a grammar
8
- # 2. Create a tokenizer for the language
9
- # 3. Create a parser for that grammar
10
- # 4. Tokenize the input
11
- # 5. Let the parser process the input
12
- # 6. Generate a parse tree from the parse result
13
- # 7. Render the parse tree (in JSON)
14
-
15
- ########################################
16
- # Step 1. Define a grammar for a miniature English-like language
17
- # based on Jurafky & Martin L1 language (chapter 13).
18
- # It defines the syntax of a sentence in a language with a
19
- # very limited syntax and lexicon in the context of airline reservation.
20
- builder = Rley::Syntax::GrammarBuilder.new
21
- builder.add_terminals('Noun', 'Verb', 'Pronoun', 'Proper-Noun')
22
- builder.add_terminals('Aux', 'Determiner', 'Preposition')
23
- builder.add_production('Sentence' => 'S')
24
- builder.add_production('S' => %w[Aux NP VP])
25
- builder.add_production('S' => %w[NP VP])
26
- builder.add_production('S' => %w[VP])
27
- builder.add_production('NP' => 'Pronoun')
28
- builder.add_production('NP' => 'Proper-Noun')
29
- builder.add_production('NP' => %w[Determiner Nominal])
30
- builder.add_production('Nominal' => %w[Nominal Noun])
31
- builder.add_production('Nominal' => %w[Nominal PP])
32
- builder.add_production('Nominal' => 'Noun')
33
- builder.add_production('VP' => 'Verb')
34
- builder.add_production('VP' => %w[Verb NP])
35
- builder.add_production('VP' => %w[Verb NP PP])
36
- builder.add_production('VP' => %w[Verb PP])
37
- builder.add_production('VP' => %w[VP PP])
38
- builder.add_production('PP' => %w[Preposition NP])
39
-
40
- # And now build the grammar...
41
- grammar_l1 = builder.grammar
42
-
43
-
44
- ########################################
45
- # 2. Create a tokenizer for the language
46
- # The tokenizer transforms the input into an array of tokens
47
- # This is a very simplistic implementation for demo purposes.
48
-
49
- # The lexicon is just a Hash with pairs of the form:
50
- # word =>terminal symbol name
51
- L1_lexicon = {
52
- 'does' => 'Aux',
53
- 'flight' => 'Noun',
54
- 'trip' => 'Noun',
55
- 'meal' => 'Noun',
56
- 'money' => 'Noun',
57
- 'morning' => 'Noun',
58
- 'is' => 'Verb',
59
- 'book' => 'Verb',
60
- 'prefer' => 'Verb',
61
- 'like' => 'Verb',
62
- 'need' => 'Verb',
63
- 'want' => 'Verb',
64
- 'fly' => 'Verb',
65
- 'show' => 'Verb',
66
- 'me' => 'Pronoun',
67
- 'I' => 'Pronoun',
68
- 'she' => 'Pronoun',
69
- 'you' => 'Pronoun',
70
- 'it' => 'Pronoun',
71
- 'Alaska' => 'Proper-Noun',
72
- 'Baltimore' => 'Proper-Noun',
73
- 'Chicago' => 'Proper-Noun',
74
- 'Houston' => 'Proper-Noun',
75
- 'NWA' => 'Proper-Noun',
76
- 'United' => 'Proper-Noun',
77
- 'American' => 'Proper-Noun',
78
- 'the' => 'Determiner',
79
- 'a' => 'Determiner',
80
- 'an' => 'Determiner',
81
- 'this' => 'Determiner',
82
- 'these' => 'Determiner',
83
- 'that' => 'Determiner',
84
- 'from' => 'Preposition',
85
- 'to' => 'Preposition',
86
- 'on' => 'Preposition',
87
- 'near' => 'Preposition',
88
- 'through' => 'Preposition'
89
- }
90
-
91
- # Highly simplified tokenizer implementation.
92
- def tokenizer(aText, aGrammar)
93
- tokens = aText.scan(/\S+/).map do |word|
94
- term_name = L1_lexicon[word]
95
- if term_name.nil?
96
- raise StandardError, "Word '#{word}' not found in lexicon"
97
- end
98
- terminal = aGrammar.name2symbol[term_name]
99
- Rley::Parser::Token.new(word, terminal)
100
- end
101
-
102
- return tokens
103
- end
104
-
105
- ########################################
106
- # Step 3. Tokenize the input
107
- valid_input = 'I want the flight from Alaska through Chicago to Houston'
108
-
109
- # Another sentence: it is a flight from Chicago
110
- tokens = tokenizer(valid_input, grammar_l1)
111
-
112
- ########################################
113
- # Step 4. Create a parser for that grammar
114
- parser = Rley::Parser::EarleyParser.new(grammar_l1)
115
-
116
- ########################################
117
- # Step 5. Let the parser process the input
118
- result = parser.parse(tokens)
119
- puts "Parsing success? #{result.success?}"
120
-
121
-
122
- ########################################
123
- # Step 6. Generate a parse tree from the parse result
124
- ptree = result.parse_tree
125
-
126
- ########################################
127
- # Step 7. Render the parse tree (in JSON)
128
- # Let's create a parse tree visitor
129
- visitor = Rley::ParseTreeVisitor.new(ptree)
130
-
131
- #Here we create a renderer object...
132
- renderer = Rley::Formatter::Json.new(STDOUT)
133
-
134
- # Now emit the parse tree as JSON on the console output
135
- puts "JSON rendering of the parse tree for '#{valid_input}' input:"
136
- renderer.render(visitor)
137
- # End of file