rley 0.3.10 → 0.3.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +3 -0
- data/examples/data_formats/JSON/JSON_grammar.rb +31 -0
- data/examples/data_formats/JSON/JSON_lexer.rb +114 -0
- data/examples/data_formats/JSON/JSON_parser.rb +47 -0
- data/examples/data_formats/JSON/demo_json.rb +31 -0
- data/lib/rley/constants.rb +1 -1
- metadata +5 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ce491e5c66dbf214e515d360d74b17215783ba1b
|
4
|
+
data.tar.gz: 6527418c9d5070bf69b0d0f90d9463fb3d05354b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d057009d0dccc8b7365021c6e1e1f93c1c34fac3b0b1bee16a8a0502950b5436715d0f1e7ded9f79ebcb484a43e3b1d2bd7b22b2dfe0dc64a52d47bc18745cd3
|
7
|
+
data.tar.gz: 92d1b41cb0ac96f65626b198920d60ab76bab3cf4925e74466083e286d9915503143424a9e63cf1593cd089ffd69ac5a25fe7ae95b8ddbbb2f9934c005a106bb
|
data/CHANGELOG.md
CHANGED
@@ -1,3 +1,6 @@
|
|
1
|
+
### 0.3.11 / 2016-12-04
|
2
|
+
* [NEW] Directory `examples\data_formats\JSON`. A JSON demo parser.
|
3
|
+
|
1
4
|
### 0.3.10 / 2016-12-04
|
2
5
|
* [NEW] Method `ParseForest#ambiguous?`. Indicates whether the parse is ambiguous.
|
3
6
|
* [CHANGE] File `README.md` updated with new grammar builder syntax & typo fixes.
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# Grammar for JSON data representation
|
2
|
+
require 'rley' # Load the gem
|
3
|
+
|
4
|
+
|
5
|
+
########################################
|
6
|
+
# Define a grammar for JSON
|
7
|
+
builder = Rley::Syntax::GrammarBuilder.new do
|
8
|
+
add_terminals('KEYWORD') # For true, false, null keywords
|
9
|
+
add_terminals('JSON_STRING', 'JSON_NUMBER')
|
10
|
+
add_terminals('LACCOL', 'RACCOL') # For '{', '}' delimiters
|
11
|
+
add_terminals('LBRACKET', 'RBRACKET') # For '[', ']' delimiters
|
12
|
+
add_terminals('COLON', 'COMMA') # For ':', ',' separators
|
13
|
+
rule 'json_text' => 'json_value'
|
14
|
+
rule 'json_value' => 'json_object'
|
15
|
+
rule 'json_value' => 'json_array'
|
16
|
+
rule 'json_value' => 'JSON_STRING'
|
17
|
+
rule 'json_value' => 'JSON_NUMBER'
|
18
|
+
rule 'json_value' => 'KEYWORD'
|
19
|
+
rule 'json_object' => %w(LACCOL json_pairs RACCOL)
|
20
|
+
rule 'json_object' => %w(LACCOL RACCOL)
|
21
|
+
rule 'json_pairs' => %w(json_pairs COMMA single_pair)
|
22
|
+
rule 'json_pairs' => 'single_pair'
|
23
|
+
rule 'single_pair' => %w(JSON_STRING COLON json_value)
|
24
|
+
rule 'json_array' => %w(LBRACKET array_items RBRACKET)
|
25
|
+
rule 'json_array' => %w(LBRACKET RBRACKET)
|
26
|
+
rule 'array_items' => %w(array_items COMMA json_value)
|
27
|
+
rule 'array_items' => %w(json_value)
|
28
|
+
end
|
29
|
+
|
30
|
+
# And now build the grammar...
|
31
|
+
GrammarJSON = builder.grammar
|
@@ -0,0 +1,114 @@
|
|
1
|
+
# File: JSON_lexer.rb
|
2
|
+
# Lexer for the JSON data format
|
3
|
+
require 'rley' # Load the gem
|
4
|
+
require 'strscan'
|
5
|
+
|
6
|
+
# Lexer for JSON.
|
7
|
+
class JSONLexer
|
8
|
+
attr_reader(:scanner)
|
9
|
+
attr_reader(:lineno)
|
10
|
+
attr_reader(:line_start)
|
11
|
+
attr_reader(:name2symbol)
|
12
|
+
|
13
|
+
@@lexeme2name = {
|
14
|
+
'{' => 'LACCOL',
|
15
|
+
'}' => 'RACCOL',
|
16
|
+
'[' => 'LBRACKET',
|
17
|
+
']' => 'RBRACKET',
|
18
|
+
',' => 'COMMA',
|
19
|
+
':' => 'COLON'
|
20
|
+
}
|
21
|
+
|
22
|
+
class ScanError < StandardError ; end
|
23
|
+
|
24
|
+
public
|
25
|
+
def initialize(source, aGrammar)
|
26
|
+
@scanner = StringScanner.new(source)
|
27
|
+
@name2symbol = aGrammar.name2symbol
|
28
|
+
@lineno = 1
|
29
|
+
end
|
30
|
+
|
31
|
+
def tokens()
|
32
|
+
tok_sequence = []
|
33
|
+
until @scanner.eos? do
|
34
|
+
token = _next_token
|
35
|
+
tok_sequence << token unless token.nil?
|
36
|
+
end
|
37
|
+
|
38
|
+
return tok_sequence
|
39
|
+
end
|
40
|
+
|
41
|
+
private
|
42
|
+
def _next_token()
|
43
|
+
token = nil
|
44
|
+
skip_whitespaces
|
45
|
+
curr_ch = scanner.getch # curr_ch is at start of token or eof reached...
|
46
|
+
|
47
|
+
begin
|
48
|
+
break if curr_ch.nil?
|
49
|
+
|
50
|
+
case curr_ch
|
51
|
+
when '{', '}', '[', ']', ',', ':'
|
52
|
+
type_name = @@lexeme2name[curr_ch]
|
53
|
+
token_type = name2symbol[type_name]
|
54
|
+
token = Rley::Parser::Token.new(curr_ch, token_type)
|
55
|
+
|
56
|
+
# LITERALS
|
57
|
+
when '"' # Start string delimiter found
|
58
|
+
value = scanner.scan(/([^"\\]|\\.)*/)
|
59
|
+
end_delimiter = scanner.getch()
|
60
|
+
raise ScanError.new('No closing quotes (") found') if end_delimiter.nil?
|
61
|
+
token_type = name2symbol['JSON_STRING']
|
62
|
+
token = Rley::Parser::Token.new(value, token_type)
|
63
|
+
|
64
|
+
when /[ftn]/ # First letter of keywords
|
65
|
+
@scanner.pos = scanner.pos - 1 # Simulate putback
|
66
|
+
keyw = scanner.scan(/false|true|null/)
|
67
|
+
if keyw.nil?
|
68
|
+
invalid_keyw = scanner.scan(/\w+/)
|
69
|
+
raise ScanError.new("Invalid keyword: #{invalid_keyw}")
|
70
|
+
else
|
71
|
+
token_type = name2symbol['KEYWORD']
|
72
|
+
token = Rley::Parser::Token.new(keyw, token_type)
|
73
|
+
end
|
74
|
+
|
75
|
+
|
76
|
+
when /[-0-9]/ # Start character of number literal found
|
77
|
+
@scanner.pos = scanner.pos - 1 # Simulate putback
|
78
|
+
value = scanner.scan(/-?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/)
|
79
|
+
token_type = name2symbol['JSON_NUMBER']
|
80
|
+
token = Rley::Parser::Token.new(value, token_type)
|
81
|
+
|
82
|
+
|
83
|
+
else # Unknown token
|
84
|
+
erroneous = curr_ch.nil? ? '' : curr_ch
|
85
|
+
sequel = scanner.scan(/.{1,20}/)
|
86
|
+
erroneous += sequel unless sequel.nil?
|
87
|
+
raise ScanError.new("Unknown token #{erroneous}")
|
88
|
+
end #case
|
89
|
+
|
90
|
+
|
91
|
+
end while (token.nil? && curr_ch = scanner.getch())
|
92
|
+
|
93
|
+
return token
|
94
|
+
end
|
95
|
+
|
96
|
+
|
97
|
+
def skip_whitespaces()
|
98
|
+
matched = scanner.scan(/[ \t\f\n\r]+/)
|
99
|
+
return if matched.nil?
|
100
|
+
|
101
|
+
newline_count = 0
|
102
|
+
matched.scan(/\n\r?|\r/) { |_| newline_count += 1 }
|
103
|
+
newline_detected(newline_count)
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
def newline_detected(count)
|
108
|
+
@lineno += count
|
109
|
+
@line_start = scanner.pos()
|
110
|
+
end
|
111
|
+
|
112
|
+
end # class
|
113
|
+
|
114
|
+
|
@@ -0,0 +1,47 @@
|
|
1
|
+
# Purpose: to demonstrate how to build and render a parse tree for JSON
|
2
|
+
# language
|
3
|
+
require 'pp'
|
4
|
+
require 'rley' # Load the gem
|
5
|
+
require_relative 'json_lexer'
|
6
|
+
|
7
|
+
# Steps to render a parse tree (of a valid parsed input):
|
8
|
+
# 1. Define a grammar
|
9
|
+
# 2. Create a parser for that grammar
|
10
|
+
# 3. Tokenize the input
|
11
|
+
# 4. Let the parser process the input
|
12
|
+
# 5. Generate a parse tree from the parse result
|
13
|
+
# 6. Render the parse tree (in JSON)
|
14
|
+
|
15
|
+
########################################
|
16
|
+
# Step 1. Load a grammar for JSON
|
17
|
+
require_relative 'JSON_grammar'
|
18
|
+
|
19
|
+
# A JSON parser derived from our general Earley parser.
|
20
|
+
class JSONParser < Rley::Parser::GFGEarleyParser
|
21
|
+
attr_reader(:source_file)
|
22
|
+
|
23
|
+
# Constructor
|
24
|
+
def initialize()
|
25
|
+
# Builder the Earley parser with the JSON grammar
|
26
|
+
super(GrammarJSON)
|
27
|
+
end
|
28
|
+
|
29
|
+
def parse_file(aFilename)
|
30
|
+
tokens = tokenize_file(aFilename)
|
31
|
+
result = parse(tokens)
|
32
|
+
|
33
|
+
return result
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
def tokenize_file(aFilename)
|
39
|
+
input_source = nil
|
40
|
+
File.open(aFilename, 'r') { |f| input_source = f.read }
|
41
|
+
|
42
|
+
lexer = JSONLexer.new(input_source, GrammarJSON)
|
43
|
+
return lexer.tokens
|
44
|
+
end
|
45
|
+
end # class
|
46
|
+
|
47
|
+
# End of file
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require_relative 'JSON_parser'
|
2
|
+
|
3
|
+
# Create a JSON parser object
|
4
|
+
parser = JSONParser.new
|
5
|
+
|
6
|
+
# Parse the input file with name given in command-line
|
7
|
+
if ARGV.empty?
|
8
|
+
msg = <<-END_MSG
|
9
|
+
Command-line symtax:
|
10
|
+
ruby #{__FILE__} filename
|
11
|
+
where:
|
12
|
+
filename is the name of a JSON file
|
13
|
+
|
14
|
+
Example:
|
15
|
+
ruby #{__FILE__} sample01.json
|
16
|
+
END_MSG
|
17
|
+
puts msg
|
18
|
+
exit(1)
|
19
|
+
end
|
20
|
+
file_name = ARGV[0]
|
21
|
+
result = parser.parse_file(file_name) # result object contains parse details
|
22
|
+
|
23
|
+
unless result.success?
|
24
|
+
# Stop if the parse failed...
|
25
|
+
puts "Parsing of '#{file_name}' failed"
|
26
|
+
exit(1)
|
27
|
+
end
|
28
|
+
|
29
|
+
# Generate a parse forest from the parse result
|
30
|
+
pforest = result.parse_forest
|
31
|
+
# End of file
|
data/lib/rley/constants.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rley
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.11
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Dimitri Geshef
|
@@ -130,6 +130,10 @@ files:
|
|
130
130
|
- README.md
|
131
131
|
- Rakefile
|
132
132
|
- examples/NLP/mini_en_demo.rb
|
133
|
+
- examples/data_formats/JSON/JSON_grammar.rb
|
134
|
+
- examples/data_formats/JSON/JSON_lexer.rb
|
135
|
+
- examples/data_formats/JSON/JSON_parser.rb
|
136
|
+
- examples/data_formats/JSON/demo_json.rb
|
133
137
|
- lib/rley.rb
|
134
138
|
- lib/rley/constants.rb
|
135
139
|
- lib/rley/formatter/base_formatter.rb
|