rley 0.2.08 → 0.2.09

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- ZmY0MDhhM2RlNzcxYzk3ZDJkMGM5YWVjY2Q0NWRlNGZhZmYyMjdmOQ==
4
+ MTgzNDQzMWNlYTAwMWNiZjIxZjYxYzBiY2YyZjYyMDE2MjA4NjJkZg==
5
5
  data.tar.gz: !binary |-
6
- NGNmZDI2MmYyNDBhZWZiNzdhZDhjNmNjMjQxMDRkMDI0NTZkMDA3YQ==
6
+ YTFjMDBmNWYzYmUwNjI4ZDNiNjczMjNhMTlhZjRjYzhlODUwMWM5Nw==
7
7
  !binary "U0hBNTEy":
8
8
  metadata.gz: !binary |-
9
- YTliNWVmM2NkN2ZjZjkyZjJhZGVhODE3M2M5MGQ5ZGI5NWM1MTI1ZGJjZWVl
10
- ZTg4ZTFhODNjNzA3MmZlMmY4YTRjNDc0YzQ0M2FlNzE2NTU0YjQzMDBjYjUy
11
- MmZjY2E5NzcyYzFiNTA2ZTg3Yjc1ZmUwNGJlNjUwOWM1YzY3ZWY=
9
+ MjAwZjQ4YzdmMTMxOTAxY2IxMTQ0N2QzYzI0MWM1ZWIyOGJmNGZjMWE0YzVk
10
+ MTZlOGZmZWU2YzhiZmFhN2I5YzU5NGFkNDk0MTY2NjFmNzk4MzBhZWM5NjUz
11
+ NDgyODQxNTkzMWFkMGM2OTJhMDYyYWMxMjY1ZjQ0NjZlMjdhNTU=
12
12
  data.tar.gz: !binary |-
13
- ZTM0M2IwODkwNjJlMDZkYzQzNTM4YmRkNDY5ZDI1YjQ1M2E5Y2IzZTA1OWIz
14
- MGM4MDAxZTNlNDM2N2Q1ZTI3NzcxOGU0NTBmMGFmYTU2MDBhNDMxNjdlNzI5
15
- ZTRlYTJkMzczNWZkZjk1NjkxZTVmYzc3N2ZjY2FlMDVhMTAyYjg=
13
+ ZmZlMGNlY2Q1M2E1MzkwMThlNmI0NTEwOTEzNTcxNzZjYWU2MGE0MDQ3ODUz
14
+ MjhjYjZjMjQxMGQzZjJkMTYwZDNhNWViMjhmZDk4ZGM1MjFkYjIxNTJkYjBm
15
+ MjZkMTE2YzdhYjg0NTRiMzhkZjMzOWVlY2VlMGFlMWU4ZDAxOGM=
@@ -1,3 +1,6 @@
1
+ ### 0.2.08 / 2015-04-28
2
+ * [NEW] Added folder with JSON demo parser under `examples\parsers\demo-JSON`.
3
+
1
4
  ### 0.2.07 / 2015-04-22
2
5
  * [NEW] Rake file added in `examples` folder. It allows to run all the examples at once.
3
6
 
data/README.md CHANGED
@@ -40,6 +40,22 @@ This project is in "earley" stage.
40
40
  - Provide documentation and examples
41
41
 
42
42
 
43
+ ### Other similar Ruby projects ###
44
+ __Rley__ isn't the sole Ruby implementation of the Earley parser algorithm.
45
+ Here are a few other ones:
46
+ - [Kanocc gem](https://rubygems.org/gems/kanocc) -- Advertised as a Ruby based parsing and translation framework.
47
+ Although the gem dates from 2009, the author still maintains its in a public repository in [Github](https://github.com/surlykke/Kanocc)
48
+ The grammar symbols (tokens and non-terminals) must be represented as (sub)classes.
49
+ Grammar rules are methods of the non-terminal classes. A rule can have a block code argument
50
+ that specifies the semantic action when that rule is applied.
51
+ - [lc1 project](https://github.com/kp0v/lc1) -- Advertised as a combination of Earley and Viterbi algorithms for [Probabilistic] Context Free Grammars
52
+ Aimed in parsing brazilian portuguese.
53
+ [earley project](https://github.com/joshingly/earley) -- An Earley parser (grammar rules are specified in JSON format).
54
+ The code doesn't seem to be maintained: latest commit dates from Nov. 2011.
55
+ - [linguist project](https://github.com/davidkellis/linguist) -- Advertised as library for parsing context-free languages.
56
+ It is a recognizer not a parser. In other words it can only tell whether a given input
57
+ conforms to the grammar rules or not. As such it cannot build parse trees.
58
+ The code doesn't seem to be maintained: latest commit dates from Oct. 2011.
43
59
 
44
60
  Copyright
45
61
  ---------
@@ -0,0 +1,31 @@
1
+ # Purpose: to demonstrate how to build and render a parse tree for JSON
2
+ # language
3
+ require 'rley' # Load the gem
4
+
5
+
6
+ ########################################
7
+ # Define a grammar for JSON
8
+ builder = Rley::Syntax::GrammarBuilder.new
9
+ builder.add_terminals('KEYWORD') # For true, false, null keywords
10
+ builder.add_terminals('JSON_STRING', 'JSON_NUMBER')
11
+ builder.add_terminals('LACCOL', 'RACCOL') # For '{', '}' delimiters
12
+ builder.add_terminals('LBRACKET', 'RBRACKET') # For '[', ']' delimiters
13
+ builder.add_terminals('COLON', 'COMMA') # For ':', ',' separators
14
+ builder.add_production('json_text' => 'json_value')
15
+ builder.add_production('json_value' => 'json_object')
16
+ builder.add_production('json_value' => 'json_array')
17
+ builder.add_production('json_value' => 'JSON_STRING')
18
+ builder.add_production('json_value' => 'JSON_NUMBER')
19
+ builder.add_production('json_value' => 'KEYWORD')
20
+ builder.add_production('json_object' => %w[LACCOL json_pairs RACCOL])
21
+ builder.add_production('json_object' => ['LACCOL', 'RACCOL'])
22
+ builder.add_production('json_pairs' => %w[json_pairs COMMA single_pair])
23
+ builder.add_production('json_pairs' => 'single_pair')
24
+ builder.add_production('single_pair' => %w[JSON_STRING COLON json_value])
25
+ builder.add_production('json_array' => %w[LBRACKET array_items RBRACKET])
26
+ builder.add_production('json_array' => ['RBRACKET', 'RBRACKET'])
27
+ builder.add_production('array_items' => %w[array_items COMMA json_value])
28
+ builder.add_production('array_items' => %w[json_value])
29
+
30
+ # And now build the grammar...
31
+ GrammarJSON = builder.grammar
@@ -0,0 +1,114 @@
1
+ # File: JSON_lexer.rb
2
+ # Lexer for the JSON data format
3
+ require 'rley' # Load the gem
4
+ require 'strscan'
5
+
6
+ # Lexer for JSON.
7
+ class JSONLexer
8
+ attr_reader(:scanner)
9
+ attr_reader(:lineno)
10
+ attr_reader(:line_start)
11
+ attr_reader(:name2symbol)
12
+
13
+ @@lexeme2name = {
14
+ '{' => 'LACCOL',
15
+ '}' => 'RACCOL',
16
+ '[' => 'LBRACKET',
17
+ ']' => 'RBRACKET',
18
+ ',' => 'COMMA',
19
+ ':' => 'COLON'
20
+ }
21
+
22
+ class ScanError < StandardError ; end
23
+
24
+ public
25
+ def initialize(source, aGrammar)
26
+ @scanner = StringScanner.new(source)
27
+ @name2symbol = aGrammar.name2symbol
28
+ @lineno = 1
29
+ end
30
+
31
+ def tokens()
32
+ tok_sequence = []
33
+ until @scanner.eos? do
34
+ token = _next_token
35
+ tok_sequence << token unless token.nil?
36
+ end
37
+
38
+ return tok_sequence
39
+ end
40
+
41
+ private
42
+ def _next_token()
43
+ token = nil
44
+ skip_whitespaces
45
+ curr_ch = scanner.getch # curr_ch is at start of token or eof reached...
46
+
47
+ begin
48
+ break if curr_ch.nil?
49
+
50
+ case curr_ch
51
+ when '{', '}', '[', ']', ',', ':'
52
+ type_name = @@lexeme2name[curr_ch]
53
+ token_type = name2symbol[type_name]
54
+ token = Rley::Parser::Token.new(curr_ch, token_type)
55
+
56
+ # LITERALS
57
+ when '"' # Start string delimiter found
58
+ value = scanner.scan(/([^"\\]|\\.)*/)
59
+ end_delimiter = scanner.getch()
60
+ raise ScanError.new('No closing quotes (") found') if end_delimiter.nil?
61
+ token_type = name2symbol['JSON_STRING']
62
+ token = Rley::Parser::Token.new(value, token_type)
63
+
64
+ when /[ftn]/ # First letter of keywords
65
+ @scanner.pos = scanner.pos - 1 # Simulate putback
66
+ keyw = scanner.scan(/false|true|null/)
67
+ if keyw.nil?
68
+ invalid_keyw = scanner.scan(/\w+/)
69
+ raise ScanError.new("Invalid keyword: #{invalid_keyw}")
70
+ else
71
+ token_type = name2symbol['KEYWORD']
72
+ token = Rley::Parser::Token.new(keyw, token_type)
73
+ end
74
+
75
+
76
+ when /[-0-9]/ # Start character of number literal found
77
+ @scanner.pos = scanner.pos - 1 # Simulate putback
78
+ value = scanner.scan(/-?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/)
79
+ token_type = name2symbol['JSON_NUMBER']
80
+ token = Rley::Parser::Token.new(value, token_type)
81
+
82
+
83
+ else # Unknown token
84
+ erroneous = curr_ch.nil? ? '' : curr_ch
85
+ sequel = scanner.scan(/.{1,20}/)
86
+ erroneous += sequel unless sequel.nil?
87
+ raise ScanError.new("Unknown token #{erroneous}")
88
+ end #case
89
+
90
+
91
+ end while (token.nil? && curr_ch = scanner.getch())
92
+
93
+ return token
94
+ end
95
+
96
+
97
+ def skip_whitespaces()
98
+ matched = scanner.scan(/[ \t\f\n\r]+/)
99
+ return if matched.nil?
100
+
101
+ newline_count = 0
102
+ matched.scan(/\n\r?|\r/) { |_| newline_count += 1 }
103
+ newline_detected(newline_count)
104
+ end
105
+
106
+
107
+ def newline_detected(count)
108
+ @lineno += count
109
+ @line_start = scanner.pos()
110
+ end
111
+
112
+ end # class
113
+
114
+
@@ -0,0 +1,89 @@
1
+ # Purpose: to demonstrate how to build and render a parse tree for JSON
2
+ # language
3
+ require 'pp'
4
+ require 'rley' # Load the gem
5
+ require_relative 'json_lexer'
6
+
7
+ # Steps to render a parse tree (of a valid parsed input):
8
+ # 1. Define a grammar
9
+ # 2. Create a parser for that grammar
10
+ # 3. Tokenize the input
11
+ # 4. Let the parser process the input
12
+ # 5. Generate a parse tree from the parse result
13
+ # 6. Render the parse tree (in JSON)
14
+
15
+ ########################################
16
+ # Step 1. Load a grammar for JSON
17
+ require_relative 'JSON_grammar'
18
+
19
+ # A JSON parser derived from our general Earley parser.
20
+ class JSONParser < Rley::Parser::EarleyParser
21
+ attr_reader(:source_file)
22
+
23
+ # Constructor
24
+ def initialize()
25
+ # Builder the Earley parser with the JSON grammar
26
+ super(GrammarJSON)
27
+ end
28
+
29
+ def parse_file(aFilename)
30
+ tokens = tokenize_file(aFilename)
31
+ result = parse(tokens)
32
+
33
+ return result
34
+ end
35
+
36
+ private
37
+
38
+ def tokenize_file(aFilename)
39
+ input_source = nil
40
+ File.open(aFilename, 'r') { |f| input_source = f.read }
41
+
42
+ lexer = JSONLexer.new(input_source, GrammarJSON)
43
+ return lexer.tokens
44
+ end
45
+ end # class
46
+
47
+ =begin
48
+ ########################################
49
+ # Step 3. Create a parser for that grammar
50
+ # parser = Rley::Parser::EarleyParser.new(GrammarJSON)
51
+ parser = JSONParser.new
52
+
53
+
54
+ ########################################
55
+ # Step 4. Tokenize the input file
56
+ file_name = 'sample02.json'
57
+ =begin
58
+ input_source = nil
59
+ File.open(file_name, 'r') { |f| input_source = f.read }
60
+
61
+ lexer = JSONLexer.new(input_source, GrammarJSON)
62
+ tokens = lexer.tokens
63
+ #=end
64
+
65
+ ########################################
66
+ # Step 5. Let the parser process the input
67
+ result = parser.parse_file(file_name) # parser.parse(tokens)
68
+ unless result.success?
69
+ puts "Parsing of '#{file_name}' failed"
70
+ exit(1)
71
+ end
72
+
73
+ ########################################
74
+ # Step 6. Generate a parse tree from the parse result
75
+ ptree = result.parse_tree
76
+
77
+ ########################################
78
+ # Step 7. Render the parse tree (in JSON)
79
+ # Let's create a parse tree visitor
80
+ visitor = Rley::ParseTreeVisitor.new(ptree)
81
+
82
+ #Here we create a renderer object...
83
+ renderer = Rley::Formatter::Json.new(STDOUT)
84
+
85
+ # Now emit the parse tree as JSON on the console output
86
+ puts "JSON rendering of the parse tree for '#{file_name}' input:"
87
+ renderer.render(visitor)
88
+ =end
89
+ # End of file
@@ -0,0 +1,42 @@
1
+ require_relative 'JSON_parser'
2
+
3
+ # Create a JSON parser object
4
+ parser = JSONParser.new
5
+
6
+ # Parse the input file with name given in command-line
7
+ if ARGV.empty?
8
+ msg = <<-END_MSG
9
+ Command-line symtax:
10
+ ruby #{__FILE__} filename
11
+ where:
12
+ filename is the name of a JSON file
13
+
14
+ Example:
15
+ ruby #{__FILE__} sample01.json
16
+ END_MSG
17
+ puts msg
18
+ exit(1)
19
+ end
20
+ file_name = ARGV[0]
21
+ result = parser.parse_file(file_name) # result object contains parse details
22
+
23
+ unless result.success?
24
+ # Stop if the parse failed...
25
+ puts "Parsing of '#{file_name}' failed"
26
+ exit(1)
27
+ end
28
+
29
+ # Generate a parse tree from the parse result
30
+ ptree = result.parse_tree
31
+
32
+ # Do something with the parse tree: render it on the output console.
33
+ # Step a: Let's create a parse tree visitor
34
+ visitor = Rley::ParseTreeVisitor.new(ptree)
35
+
36
+ # Step b: Select the rendition format to be JSON
37
+ renderer = Rley::Formatter::Json.new(STDOUT)
38
+
39
+ # Step c: Now emit the parse tree as JSON on the console output
40
+ puts "JSON rendering of the parse tree for '#{file_name}' input:"
41
+ renderer.render(visitor)
42
+ # End of file
@@ -102,10 +102,6 @@ def tokenizer(aText, aGrammar)
102
102
  return tokens
103
103
  end
104
104
 
105
- ########################################
106
- # Step 3. Create a parser for that grammar
107
- parser = Rley::Parser::EarleyParser.new(grammar_l1)
108
-
109
105
  ########################################
110
106
  # Step 3. Tokenize the input
111
107
  valid_input = 'I want the flight from Alaska through Chicago to Houston'
@@ -113,6 +109,10 @@ valid_input = 'I want the flight from Alaska through Chicago to Houston'
113
109
  # Another sentence: it is a flight from Chicago
114
110
  tokens = tokenizer(valid_input, grammar_l1)
115
111
 
112
+ ########################################
113
+ # Step 4. Create a parser for that grammar
114
+ parser = Rley::Parser::EarleyParser.new(grammar_l1)
115
+
116
116
  ########################################
117
117
  # Step 5. Let the parser process the input
118
118
  result = parser.parse(tokens)
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.2.08'
6
+ Version = '0.2.09'
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rley
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.08
4
+ version: 0.2.09
5
5
  platform: ruby
6
6
  authors:
7
7
  - Dimitri Geshef
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-22 00:00:00.000000000 Z
11
+ date: 2015-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake
@@ -101,6 +101,10 @@ files:
101
101
  - README.md
102
102
  - examples/grammars/grammar_abc.rb
103
103
  - examples/grammars/grammar_L0.rb
104
+ - examples/parsers/demo-JSON/demo_json.rb
105
+ - examples/parsers/demo-JSON/JSON_grammar.rb
106
+ - examples/parsers/demo-JSON/JSON_lexer.rb
107
+ - examples/parsers/demo-JSON/JSON_parser.rb
104
108
  - examples/parsers/parsing_abc.rb
105
109
  - examples/parsers/parsing_ambig.rb
106
110
  - examples/parsers/parsing_b_expr.rb