rley 0.2.08 → 0.2.09
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/CHANGELOG.md +3 -0
- data/README.md +16 -0
- data/examples/parsers/demo-JSON/JSON_grammar.rb +31 -0
- data/examples/parsers/demo-JSON/JSON_lexer.rb +114 -0
- data/examples/parsers/demo-JSON/JSON_parser.rb +89 -0
- data/examples/parsers/demo-JSON/demo_json.rb +42 -0
- data/examples/parsers/parsing_L1.rb +4 -4
- data/lib/rley/constants.rb +1 -1
- metadata +6 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MTgzNDQzMWNlYTAwMWNiZjIxZjYxYzBiY2YyZjYyMDE2MjA4NjJkZg==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YTFjMDBmNWYzYmUwNjI4ZDNiNjczMjNhMTlhZjRjYzhlODUwMWM5Nw==
|
7
7
|
!binary "U0hBNTEy":
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
MjAwZjQ4YzdmMTMxOTAxY2IxMTQ0N2QzYzI0MWM1ZWIyOGJmNGZjMWE0YzVk
|
10
|
+
MTZlOGZmZWU2YzhiZmFhN2I5YzU5NGFkNDk0MTY2NjFmNzk4MzBhZWM5NjUz
|
11
|
+
NDgyODQxNTkzMWFkMGM2OTJhMDYyYWMxMjY1ZjQ0NjZlMjdhNTU=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
ZmZlMGNlY2Q1M2E1MzkwMThlNmI0NTEwOTEzNTcxNzZjYWU2MGE0MDQ3ODUz
|
14
|
+
MjhjYjZjMjQxMGQzZjJkMTYwZDNhNWViMjhmZDk4ZGM1MjFkYjIxNTJkYjBm
|
15
|
+
MjZkMTE2YzdhYjg0NTRiMzhkZjMzOWVlY2VlMGFlMWU4ZDAxOGM=
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -40,6 +40,22 @@ This project is in "earley" stage.
|
|
40
40
|
- Provide documentation and examples
|
41
41
|
|
42
42
|
|
43
|
+
### Other similar Ruby projects ###
|
44
|
+
__Rley__ isn't the sole Ruby implementation of the Earley parser algorithm.
|
45
|
+
Here are a few other ones:
|
46
|
+
- [Kanocc gem](https://rubygems.org/gems/kanocc) -- Advertised as a Ruby based parsing and translation framework.
|
47
|
+
Although the gem dates from 2009, the author still maintains its in a public repository in [Github](https://github.com/surlykke/Kanocc)
|
48
|
+
The grammar symbols (tokens and non-terminals) must be represented as (sub)classes.
|
49
|
+
Grammar rules are methods of the non-terminal classes. A rule can have a block code argument
|
50
|
+
that specifies the semantic action when that rule is applied.
|
51
|
+
- [lc1 project](https://github.com/kp0v/lc1) -- Advertised as a combination of Earley and Viterbi algorithms for [Probabilistic] Context Free Grammars
|
52
|
+
Aimed in parsing brazilian portuguese.
|
53
|
+
[earley project](https://github.com/joshingly/earley) -- An Earley parser (grammar rules are specified in JSON format).
|
54
|
+
The code doesn't seem to be maintained: latest commit dates from Nov. 2011.
|
55
|
+
- [linguist project](https://github.com/davidkellis/linguist) -- Advertised as library for parsing context-free languages.
|
56
|
+
It is a recognizer not a parser. In other words it can only tell whether a given input
|
57
|
+
conforms to the grammar rules or not. As such it cannot build parse trees.
|
58
|
+
The code doesn't seem to be maintained: latest commit dates from Oct. 2011.
|
43
59
|
|
44
60
|
Copyright
|
45
61
|
---------
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# Purpose: to demonstrate how to build and render a parse tree for JSON
|
2
|
+
# language
|
3
|
+
require 'rley' # Load the gem
|
4
|
+
|
5
|
+
|
6
|
+
########################################
|
7
|
+
# Define a grammar for JSON
|
8
|
+
builder = Rley::Syntax::GrammarBuilder.new
|
9
|
+
builder.add_terminals('KEYWORD') # For true, false, null keywords
|
10
|
+
builder.add_terminals('JSON_STRING', 'JSON_NUMBER')
|
11
|
+
builder.add_terminals('LACCOL', 'RACCOL') # For '{', '}' delimiters
|
12
|
+
builder.add_terminals('LBRACKET', 'RBRACKET') # For '[', ']' delimiters
|
13
|
+
builder.add_terminals('COLON', 'COMMA') # For ':', ',' separators
|
14
|
+
builder.add_production('json_text' => 'json_value')
|
15
|
+
builder.add_production('json_value' => 'json_object')
|
16
|
+
builder.add_production('json_value' => 'json_array')
|
17
|
+
builder.add_production('json_value' => 'JSON_STRING')
|
18
|
+
builder.add_production('json_value' => 'JSON_NUMBER')
|
19
|
+
builder.add_production('json_value' => 'KEYWORD')
|
20
|
+
builder.add_production('json_object' => %w[LACCOL json_pairs RACCOL])
|
21
|
+
builder.add_production('json_object' => ['LACCOL', 'RACCOL'])
|
22
|
+
builder.add_production('json_pairs' => %w[json_pairs COMMA single_pair])
|
23
|
+
builder.add_production('json_pairs' => 'single_pair')
|
24
|
+
builder.add_production('single_pair' => %w[JSON_STRING COLON json_value])
|
25
|
+
builder.add_production('json_array' => %w[LBRACKET array_items RBRACKET])
|
26
|
+
builder.add_production('json_array' => ['RBRACKET', 'RBRACKET'])
|
27
|
+
builder.add_production('array_items' => %w[array_items COMMA json_value])
|
28
|
+
builder.add_production('array_items' => %w[json_value])
|
29
|
+
|
30
|
+
# And now build the grammar...
|
31
|
+
GrammarJSON = builder.grammar
|
@@ -0,0 +1,114 @@
|
|
1
|
+
# File: JSON_lexer.rb
|
2
|
+
# Lexer for the JSON data format
|
3
|
+
require 'rley' # Load the gem
|
4
|
+
require 'strscan'
|
5
|
+
|
6
|
+
# Lexer for JSON.
|
7
|
+
class JSONLexer
|
8
|
+
attr_reader(:scanner)
|
9
|
+
attr_reader(:lineno)
|
10
|
+
attr_reader(:line_start)
|
11
|
+
attr_reader(:name2symbol)
|
12
|
+
|
13
|
+
@@lexeme2name = {
|
14
|
+
'{' => 'LACCOL',
|
15
|
+
'}' => 'RACCOL',
|
16
|
+
'[' => 'LBRACKET',
|
17
|
+
']' => 'RBRACKET',
|
18
|
+
',' => 'COMMA',
|
19
|
+
':' => 'COLON'
|
20
|
+
}
|
21
|
+
|
22
|
+
class ScanError < StandardError ; end
|
23
|
+
|
24
|
+
public
|
25
|
+
def initialize(source, aGrammar)
|
26
|
+
@scanner = StringScanner.new(source)
|
27
|
+
@name2symbol = aGrammar.name2symbol
|
28
|
+
@lineno = 1
|
29
|
+
end
|
30
|
+
|
31
|
+
def tokens()
|
32
|
+
tok_sequence = []
|
33
|
+
until @scanner.eos? do
|
34
|
+
token = _next_token
|
35
|
+
tok_sequence << token unless token.nil?
|
36
|
+
end
|
37
|
+
|
38
|
+
return tok_sequence
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
def _next_token()
|
43
|
+
token = nil
|
44
|
+
skip_whitespaces
|
45
|
+
curr_ch = scanner.getch # curr_ch is at start of token or eof reached...
|
46
|
+
|
47
|
+
begin
|
48
|
+
break if curr_ch.nil?
|
49
|
+
|
50
|
+
case curr_ch
|
51
|
+
when '{', '}', '[', ']', ',', ':'
|
52
|
+
type_name = @@lexeme2name[curr_ch]
|
53
|
+
token_type = name2symbol[type_name]
|
54
|
+
token = Rley::Parser::Token.new(curr_ch, token_type)
|
55
|
+
|
56
|
+
# LITERALS
|
57
|
+
when '"' # Start string delimiter found
|
58
|
+
value = scanner.scan(/([^"\\]|\\.)*/)
|
59
|
+
end_delimiter = scanner.getch()
|
60
|
+
raise ScanError.new('No closing quotes (") found') if end_delimiter.nil?
|
61
|
+
token_type = name2symbol['JSON_STRING']
|
62
|
+
token = Rley::Parser::Token.new(value, token_type)
|
63
|
+
|
64
|
+
when /[ftn]/ # First letter of keywords
|
65
|
+
@scanner.pos = scanner.pos - 1 # Simulate putback
|
66
|
+
keyw = scanner.scan(/false|true|null/)
|
67
|
+
if keyw.nil?
|
68
|
+
invalid_keyw = scanner.scan(/\w+/)
|
69
|
+
raise ScanError.new("Invalid keyword: #{invalid_keyw}")
|
70
|
+
else
|
71
|
+
token_type = name2symbol['KEYWORD']
|
72
|
+
token = Rley::Parser::Token.new(keyw, token_type)
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
when /[-0-9]/ # Start character of number literal found
|
77
|
+
@scanner.pos = scanner.pos - 1 # Simulate putback
|
78
|
+
value = scanner.scan(/-?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/)
|
79
|
+
token_type = name2symbol['JSON_NUMBER']
|
80
|
+
token = Rley::Parser::Token.new(value, token_type)
|
81
|
+
|
82
|
+
|
83
|
+
else # Unknown token
|
84
|
+
erroneous = curr_ch.nil? ? '' : curr_ch
|
85
|
+
sequel = scanner.scan(/.{1,20}/)
|
86
|
+
erroneous += sequel unless sequel.nil?
|
87
|
+
raise ScanError.new("Unknown token #{erroneous}")
|
88
|
+
end #case
|
89
|
+
|
90
|
+
|
91
|
+
end while (token.nil? && curr_ch = scanner.getch())
|
92
|
+
|
93
|
+
return token
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
def skip_whitespaces()
|
98
|
+
matched = scanner.scan(/[ \t\f\n\r]+/)
|
99
|
+
return if matched.nil?
|
100
|
+
|
101
|
+
newline_count = 0
|
102
|
+
matched.scan(/\n\r?|\r/) { |_| newline_count += 1 }
|
103
|
+
newline_detected(newline_count)
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
def newline_detected(count)
|
108
|
+
@lineno += count
|
109
|
+
@line_start = scanner.pos()
|
110
|
+
end
|
111
|
+
|
112
|
+
end # class
|
113
|
+
|
114
|
+
|
@@ -0,0 +1,89 @@
|
|
1
|
+
# Purpose: to demonstrate how to build and render a parse tree for JSON
|
2
|
+
# language
|
3
|
+
require 'pp'
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
require_relative 'json_lexer'
|
6
|
+
|
7
|
+
# Steps to render a parse tree (of a valid parsed input):
|
8
|
+
# 1. Define a grammar
|
9
|
+
# 2. Create a parser for that grammar
|
10
|
+
# 3. Tokenize the input
|
11
|
+
# 4. Let the parser process the input
|
12
|
+
# 5. Generate a parse tree from the parse result
|
13
|
+
# 6. Render the parse tree (in JSON)
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Step 1. Load a grammar for JSON
|
17
|
+
require_relative 'JSON_grammar'
|
18
|
+
|
19
|
+
# A JSON parser derived from our general Earley parser.
|
20
|
+
class JSONParser < Rley::Parser::EarleyParser
|
21
|
+
attr_reader(:source_file)
|
22
|
+
|
23
|
+
# Constructor
|
24
|
+
def initialize()
|
25
|
+
# Builder the Earley parser with the JSON grammar
|
26
|
+
super(GrammarJSON)
|
27
|
+
end
|
28
|
+
|
29
|
+
def parse_file(aFilename)
|
30
|
+
tokens = tokenize_file(aFilename)
|
31
|
+
result = parse(tokens)
|
32
|
+
|
33
|
+
return result
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def tokenize_file(aFilename)
|
39
|
+
input_source = nil
|
40
|
+
File.open(aFilename, 'r') { |f| input_source = f.read }
|
41
|
+
|
42
|
+
lexer = JSONLexer.new(input_source, GrammarJSON)
|
43
|
+
return lexer.tokens
|
44
|
+
end
|
45
|
+
end # class
|
46
|
+
|
47
|
+
=begin
|
48
|
+
########################################
|
49
|
+
# Step 3. Create a parser for that grammar
|
50
|
+
# parser = Rley::Parser::EarleyParser.new(GrammarJSON)
|
51
|
+
parser = JSONParser.new
|
52
|
+
|
53
|
+
|
54
|
+
########################################
|
55
|
+
# Step 4. Tokenize the input file
|
56
|
+
file_name = 'sample02.json'
|
57
|
+
=begin
|
58
|
+
input_source = nil
|
59
|
+
File.open(file_name, 'r') { |f| input_source = f.read }
|
60
|
+
|
61
|
+
lexer = JSONLexer.new(input_source, GrammarJSON)
|
62
|
+
tokens = lexer.tokens
|
63
|
+
#=end
|
64
|
+
|
65
|
+
########################################
|
66
|
+
# Step 5. Let the parser process the input
|
67
|
+
result = parser.parse_file(file_name) # parser.parse(tokens)
|
68
|
+
unless result.success?
|
69
|
+
puts "Parsing of '#{file_name}' failed"
|
70
|
+
exit(1)
|
71
|
+
end
|
72
|
+
|
73
|
+
########################################
|
74
|
+
# Step 6. Generate a parse tree from the parse result
|
75
|
+
ptree = result.parse_tree
|
76
|
+
|
77
|
+
########################################
|
78
|
+
# Step 7. Render the parse tree (in JSON)
|
79
|
+
# Let's create a parse tree visitor
|
80
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
81
|
+
|
82
|
+
#Here we create a renderer object...
|
83
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
84
|
+
|
85
|
+
# Now emit the parse tree as JSON on the console output
|
86
|
+
puts "JSON rendering of the parse tree for '#{file_name}' input:"
|
87
|
+
renderer.render(visitor)
|
88
|
+
=end
|
89
|
+
# End of file
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require_relative 'JSON_parser'
|
2
|
+
|
3
|
+
# Create a JSON parser object
|
4
|
+
parser = JSONParser.new
|
5
|
+
|
6
|
+
# Parse the input file with name given in command-line
|
7
|
+
if ARGV.empty?
|
8
|
+
msg = <<-END_MSG
|
9
|
+
Command-line symtax:
|
10
|
+
ruby #{__FILE__} filename
|
11
|
+
where:
|
12
|
+
filename is the name of a JSON file
|
13
|
+
|
14
|
+
Example:
|
15
|
+
ruby #{__FILE__} sample01.json
|
16
|
+
END_MSG
|
17
|
+
puts msg
|
18
|
+
exit(1)
|
19
|
+
end
|
20
|
+
file_name = ARGV[0]
|
21
|
+
result = parser.parse_file(file_name) # result object contains parse details
|
22
|
+
|
23
|
+
unless result.success?
|
24
|
+
# Stop if the parse failed...
|
25
|
+
puts "Parsing of '#{file_name}' failed"
|
26
|
+
exit(1)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Generate a parse tree from the parse result
|
30
|
+
ptree = result.parse_tree
|
31
|
+
|
32
|
+
# Do something with the parse tree: render it on the output console.
|
33
|
+
# Step a: Let's create a parse tree visitor
|
34
|
+
visitor = Rley::ParseTreeVisitor.new(ptree)
|
35
|
+
|
36
|
+
# Step b: Select the rendition format to be JSON
|
37
|
+
renderer = Rley::Formatter::Json.new(STDOUT)
|
38
|
+
|
39
|
+
# Step c: Now emit the parse tree as JSON on the console output
|
40
|
+
puts "JSON rendering of the parse tree for '#{file_name}' input:"
|
41
|
+
renderer.render(visitor)
|
42
|
+
# End of file
|
@@ -102,10 +102,6 @@ def tokenizer(aText, aGrammar)
|
|
102
102
|
return tokens
|
103
103
|
end
|
104
104
|
|
105
|
-
########################################
|
106
|
-
# Step 3. Create a parser for that grammar
|
107
|
-
parser = Rley::Parser::EarleyParser.new(grammar_l1)
|
108
|
-
|
109
105
|
########################################
|
110
106
|
# Step 3. Tokenize the input
|
111
107
|
valid_input = 'I want the flight from Alaska through Chicago to Houston'
|
@@ -113,6 +109,10 @@ valid_input = 'I want the flight from Alaska through Chicago to Houston'
|
|
113
109
|
# Another sentence: it is a flight from Chicago
|
114
110
|
tokens = tokenizer(valid_input, grammar_l1)
|
115
111
|
|
112
|
+
########################################
|
113
|
+
# Step 4. Create a parser for that grammar
|
114
|
+
parser = Rley::Parser::EarleyParser.new(grammar_l1)
|
115
|
+
|
116
116
|
########################################
|
117
117
|
# Step 5. Let the parser process the input
|
118
118
|
result = parser.parse(tokens)
|
data/lib/rley/constants.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.09
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rake
|
@@ -101,6 +101,10 @@ files:
|
|
101
101
|
- README.md
|
102
102
|
- examples/grammars/grammar_abc.rb
|
103
103
|
- examples/grammars/grammar_L0.rb
|
104
|
+
- examples/parsers/demo-JSON/demo_json.rb
|
105
|
+
- examples/parsers/demo-JSON/JSON_grammar.rb
|
106
|
+
- examples/parsers/demo-JSON/JSON_lexer.rb
|
107
|
+
- examples/parsers/demo-JSON/JSON_parser.rb
|
104
108
|
- examples/parsers/parsing_abc.rb
|
105
109
|
- examples/parsers/parsing_ambig.rb
|
106
110
|
- examples/parsers/parsing_b_expr.rb
|