rley 0.3.10 → 0.3.11

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 20ceec50ffa7359dbf635a00184c604654cdf9bb
4
- data.tar.gz: 76e9432f62d88127257c66277e21be37fd1b00bd
3
+ metadata.gz: ce491e5c66dbf214e515d360d74b17215783ba1b
4
+ data.tar.gz: 6527418c9d5070bf69b0d0f90d9463fb3d05354b
5
5
  SHA512:
6
- metadata.gz: a80c8e38ede7dd3908a52b73fd8fa13fc7087537d8272c8d648764902cb29f0d5db23c157a2b704eb66e2c8322d7e2ee4822dfd36fadb25ac22e6ec9f517f3eb
7
- data.tar.gz: 9d23a806c510790c6fcc19566520e11c9292d642b2debe20784c4a884d646c173a789ae3060345ad659698554fce6748078ef7d5cc2353f7e1c3495ecd156104
6
+ metadata.gz: d057009d0dccc8b7365021c6e1e1f93c1c34fac3b0b1bee16a8a0502950b5436715d0f1e7ded9f79ebcb484a43e3b1d2bd7b22b2dfe0dc64a52d47bc18745cd3
7
+ data.tar.gz: 92d1b41cb0ac96f65626b198920d60ab76bab3cf4925e74466083e286d9915503143424a9e63cf1593cd089ffd69ac5a25fe7ae95b8ddbbb2f9934c005a106bb
data/CHANGELOG.md CHANGED
@@ -1,3 +1,6 @@
1
+ ### 0.3.11 / 2016-12-04
2
+ * [NEW] Directory `examples\data_formats\JSON`. A JSON demo parser.
3
+
1
4
  ### 0.3.10 / 2016-12-04
2
5
  * [NEW] Method `ParseForest#ambiguous?`. Indicates whether the parse is ambiguous.
3
6
  * [CHANGE] File `README.md` updated with new grammar builder syntax & typo fixes.
@@ -0,0 +1,31 @@
1
+ # Grammar for JSON data representation
2
+ require 'rley' # Load the gem
3
+
4
+
5
+ ########################################
6
+ # Define a grammar for JSON
7
+ builder = Rley::Syntax::GrammarBuilder.new do
8
+ add_terminals('KEYWORD') # For true, false, null keywords
9
+ add_terminals('JSON_STRING', 'JSON_NUMBER')
10
+ add_terminals('LACCOL', 'RACCOL') # For '{', '}' delimiters
11
+ add_terminals('LBRACKET', 'RBRACKET') # For '[', ']' delimiters
12
+ add_terminals('COLON', 'COMMA') # For ':', ',' separators
13
+ rule 'json_text' => 'json_value'
14
+ rule 'json_value' => 'json_object'
15
+ rule 'json_value' => 'json_array'
16
+ rule 'json_value' => 'JSON_STRING'
17
+ rule 'json_value' => 'JSON_NUMBER'
18
+ rule 'json_value' => 'KEYWORD'
19
+ rule 'json_object' => %w(LACCOL json_pairs RACCOL)
20
+ rule 'json_object' => %w(LACCOL RACCOL)
21
+ rule 'json_pairs' => %w(json_pairs COMMA single_pair)
22
+ rule 'json_pairs' => 'single_pair'
23
+ rule 'single_pair' => %w(JSON_STRING COLON json_value)
24
+ rule 'json_array' => %w(LBRACKET array_items RBRACKET)
25
+ rule 'json_array' => %w(LBRACKET RBRACKET)
26
+ rule 'array_items' => %w(array_items COMMA json_value)
27
+ rule 'array_items' => %w(json_value)
28
+ end
29
+
30
+ # And now build the grammar...
31
+ GrammarJSON = builder.grammar
@@ -0,0 +1,114 @@
1
+ # File: JSON_lexer.rb
2
+ # Lexer for the JSON data format
3
+ require 'rley' # Load the gem
4
+ require 'strscan'
5
+
6
+ # Lexer for JSON.
7
+ class JSONLexer
8
+ attr_reader(:scanner)
9
+ attr_reader(:lineno)
10
+ attr_reader(:line_start)
11
+ attr_reader(:name2symbol)
12
+
13
+ @@lexeme2name = {
14
+ '{' => 'LACCOL',
15
+ '}' => 'RACCOL',
16
+ '[' => 'LBRACKET',
17
+ ']' => 'RBRACKET',
18
+ ',' => 'COMMA',
19
+ ':' => 'COLON'
20
+ }
21
+
22
+ class ScanError < StandardError ; end
23
+
24
+ public
25
+ def initialize(source, aGrammar)
26
+ @scanner = StringScanner.new(source)
27
+ @name2symbol = aGrammar.name2symbol
28
+ @lineno = 1
29
+ end
30
+
31
+ def tokens()
32
+ tok_sequence = []
33
+ until @scanner.eos? do
34
+ token = _next_token
35
+ tok_sequence << token unless token.nil?
36
+ end
37
+
38
+ return tok_sequence
39
+ end
40
+
41
+ private
42
+ def _next_token()
43
+ token = nil
44
+ skip_whitespaces
45
+ curr_ch = scanner.getch # curr_ch is at start of token or eof reached...
46
+
47
+ begin
48
+ break if curr_ch.nil?
49
+
50
+ case curr_ch
51
+ when '{', '}', '[', ']', ',', ':'
52
+ type_name = @@lexeme2name[curr_ch]
53
+ token_type = name2symbol[type_name]
54
+ token = Rley::Parser::Token.new(curr_ch, token_type)
55
+
56
+ # LITERALS
57
+ when '"' # Start string delimiter found
58
+ value = scanner.scan(/([^"\\]|\\.)*/)
59
+ end_delimiter = scanner.getch()
60
+ raise ScanError.new('No closing quotes (") found') if end_delimiter.nil?
61
+ token_type = name2symbol['JSON_STRING']
62
+ token = Rley::Parser::Token.new(value, token_type)
63
+
64
+ when /[ftn]/ # First letter of keywords
65
+ @scanner.pos = scanner.pos - 1 # Simulate putback
66
+ keyw = scanner.scan(/false|true|null/)
67
+ if keyw.nil?
68
+ invalid_keyw = scanner.scan(/\w+/)
69
+ raise ScanError.new("Invalid keyword: #{invalid_keyw}")
70
+ else
71
+ token_type = name2symbol['KEYWORD']
72
+ token = Rley::Parser::Token.new(keyw, token_type)
73
+ end
74
+
75
+
76
+ when /[-0-9]/ # Start character of number literal found
77
+ @scanner.pos = scanner.pos - 1 # Simulate putback
78
+ value = scanner.scan(/-?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/)
79
+ token_type = name2symbol['JSON_NUMBER']
80
+ token = Rley::Parser::Token.new(value, token_type)
81
+
82
+
83
+ else # Unknown token
84
+ erroneous = curr_ch.nil? ? '' : curr_ch
85
+ sequel = scanner.scan(/.{1,20}/)
86
+ erroneous += sequel unless sequel.nil?
87
+ raise ScanError.new("Unknown token #{erroneous}")
88
+ end #case
89
+
90
+
91
+ end while (token.nil? && curr_ch = scanner.getch())
92
+
93
+ return token
94
+ end
95
+
96
+
97
+ def skip_whitespaces()
98
+ matched = scanner.scan(/[ \t\f\n\r]+/)
99
+ return if matched.nil?
100
+
101
+ newline_count = 0
102
+ matched.scan(/\n\r?|\r/) { |_| newline_count += 1 }
103
+ newline_detected(newline_count)
104
+ end
105
+
106
+
107
+ def newline_detected(count)
108
+ @lineno += count
109
+ @line_start = scanner.pos()
110
+ end
111
+
112
+ end # class
113
+
114
+
@@ -0,0 +1,47 @@
1
+ # Purpose: to demonstrate how to build and render a parse tree for JSON
2
+ # language
3
+ require 'pp'
4
+ require 'rley' # Load the gem
5
+ require_relative 'json_lexer'
6
+
7
+ # Steps to render a parse tree (of a valid parsed input):
8
+ # 1. Define a grammar
9
+ # 2. Create a parser for that grammar
10
+ # 3. Tokenize the input
11
+ # 4. Let the parser process the input
12
+ # 5. Generate a parse tree from the parse result
13
+ # 6. Render the parse tree (in JSON)
14
+
15
+ ########################################
16
+ # Step 1. Load a grammar for JSON
17
+ require_relative 'JSON_grammar'
18
+
19
+ # A JSON parser derived from our general Earley parser.
20
+ class JSONParser < Rley::Parser::GFGEarleyParser
21
+ attr_reader(:source_file)
22
+
23
+ # Constructor
24
+ def initialize()
25
+ # Builder the Earley parser with the JSON grammar
26
+ super(GrammarJSON)
27
+ end
28
+
29
+ def parse_file(aFilename)
30
+ tokens = tokenize_file(aFilename)
31
+ result = parse(tokens)
32
+
33
+ return result
34
+ end
35
+
36
+ private
37
+
38
+ def tokenize_file(aFilename)
39
+ input_source = nil
40
+ File.open(aFilename, 'r') { |f| input_source = f.read }
41
+
42
+ lexer = JSONLexer.new(input_source, GrammarJSON)
43
+ return lexer.tokens
44
+ end
45
+ end # class
46
+
47
+ # End of file
@@ -0,0 +1,31 @@
1
+ require_relative 'JSON_parser'
2
+
3
+ # Create a JSON parser object
4
+ parser = JSONParser.new
5
+
6
+ # Parse the input file with name given in command-line
7
+ if ARGV.empty?
8
+ msg = <<-END_MSG
9
+ Command-line symtax:
10
+ ruby #{__FILE__} filename
11
+ where:
12
+ filename is the name of a JSON file
13
+
14
+ Example:
15
+ ruby #{__FILE__} sample01.json
16
+ END_MSG
17
+ puts msg
18
+ exit(1)
19
+ end
20
+ file_name = ARGV[0]
21
+ result = parser.parse_file(file_name) # result object contains parse details
22
+
23
+ unless result.success?
24
+ # Stop if the parse failed...
25
+ puts "Parsing of '#{file_name}' failed"
26
+ exit(1)
27
+ end
28
+
29
+ # Generate a parse forest from the parse result
30
+ pforest = result.parse_forest
31
+ # End of file
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.3.10'.freeze
6
+ Version = '0.3.11'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.10
4
+ version: 0.3.11
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
@@ -130,6 +130,10 @@ files:
130
130
  - README.md
131
131
  - Rakefile
132
132
  - examples/NLP/mini_en_demo.rb
133
+ - examples/data_formats/JSON/JSON_grammar.rb
134
+ - examples/data_formats/JSON/JSON_lexer.rb
135
+ - examples/data_formats/JSON/JSON_parser.rb
136
+ - examples/data_formats/JSON/demo_json.rb
133
137
  - lib/rley.rb
134
138
  - lib/rley/constants.rb
135
139
  - lib/rley/formatter/base_formatter.rb