rley 0.5.14 → 0.6.00

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -2
  3. data/README.md +29 -31
  4. data/examples/NLP/benchmark_pico_en.rb +34 -34
  5. data/examples/NLP/engtagger.rb +1 -1
  6. data/examples/NLP/nano_eng/nano_en_demo.rb +23 -28
  7. data/examples/NLP/nano_eng/nano_grammar.rb +1 -1
  8. data/examples/NLP/pico_en_demo.rb +28 -31
  9. data/examples/data_formats/JSON/json_ast_builder.rb +11 -70
  10. data/examples/data_formats/JSON/json_demo.rb +32 -14
  11. data/examples/data_formats/JSON/json_grammar.rb +1 -1
  12. data/examples/data_formats/JSON/json_lexer.rb +5 -11
  13. data/examples/general/SRL/lib/ast_builder.rb +5 -28
  14. data/examples/general/SRL/lib/tokenizer.rb +2 -5
  15. data/examples/general/SRL/spec/integration_spec.rb +12 -5
  16. data/examples/general/SRL/spec/tokenizer_spec.rb +13 -14
  17. data/examples/general/SRL/srl_demo.rb +16 -9
  18. data/examples/general/calc_iter1/calc_ast_builder.rb +29 -85
  19. data/examples/general/calc_iter1/calc_demo.rb +15 -6
  20. data/examples/general/calc_iter1/calc_lexer.rb +2 -5
  21. data/examples/general/calc_iter1/spec/calculator_spec.rb +18 -19
  22. data/examples/general/calc_iter2/calc_ast_builder.rb +9 -107
  23. data/examples/general/calc_iter2/calc_demo.rb +15 -8
  24. data/examples/general/calc_iter2/calc_lexer.rb +3 -5
  25. data/examples/general/calc_iter2/spec/calculator_spec.rb +18 -31
  26. data/lib/rley.rb +2 -1
  27. data/lib/rley/constants.rb +1 -1
  28. data/lib/rley/engine.rb +122 -0
  29. data/lib/rley/parse_rep/ast_base_builder.rb +128 -0
  30. data/lib/rley/{parser → parse_rep}/cst_builder.rb +1 -1
  31. data/lib/rley/{parser → parse_rep}/parse_forest_builder.rb +1 -1
  32. data/lib/rley/{parser → parse_rep}/parse_forest_factory.rb +2 -2
  33. data/lib/rley/{parser → parse_rep}/parse_rep_creator.rb +3 -3
  34. data/lib/rley/{parser → parse_rep}/parse_tree_builder.rb +4 -4
  35. data/lib/rley/{parser → parse_rep}/parse_tree_factory.rb +1 -1
  36. data/lib/rley/parser/gfg_parsing.rb +16 -4
  37. data/spec/rley/engine_spec.rb +127 -0
  38. data/spec/rley/formatter/asciitree_spec.rb +11 -13
  39. data/spec/rley/formatter/bracket_notation_spec.rb +11 -13
  40. data/spec/rley/formatter/debug_spec.rb +13 -15
  41. data/spec/rley/formatter/json_spec.rb +10 -14
  42. data/spec/rley/{parser → parse_rep}/ambiguous_parse_spec.rb +3 -3
  43. data/spec/rley/{parser → parse_rep}/ast_builder_spec.rb +34 -83
  44. data/spec/rley/{parser → parse_rep}/cst_builder_spec.rb +3 -3
  45. data/spec/rley/{parser → parse_rep}/groucho_spec.rb +3 -3
  46. data/spec/rley/{parser → parse_rep}/parse_forest_builder_spec.rb +4 -4
  47. data/spec/rley/{parser → parse_rep}/parse_forest_factory_spec.rb +2 -2
  48. data/spec/rley/{parser → parse_rep}/parse_tree_factory_spec.rb +2 -2
  49. data/spec/rley/parse_tree_visitor_spec.rb +12 -15
  50. data/spec/rley/support/ast_builder.rb +403 -0
  51. data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
  52. metadata +27 -28
  53. data/examples/data_formats/JSON/json_parser.rb +0 -46
  54. data/examples/general/SRL/lib/ast_building.rb +0 -20
  55. data/examples/general/SRL/lib/parser.rb +0 -26
  56. data/examples/general/calc_iter1/calc_parser.rb +0 -24
  57. data/examples/general/calc_iter2/ast_building.rb +0 -20
  58. data/examples/general/calc_iter2/calc_parser.rb +0 -24
@@ -7,7 +7,7 @@ require_relative 'json_ast_nodes'
7
7
  # The Builder pattern creates a complex object
8
8
  # (say, a parse tree) from simpler objects (terminal and non-terminal
9
9
  # nodes) and using a step by step approach.
10
- class JSONASTBuilder < Rley::Parser::ParseTreeBuilder
10
+ class JSONASTBuilder < Rley::ParseRep::ParseTreeBuilder
11
11
  Terminal2NodeClass = {
12
12
  'false' => JSONBooleanNode,
13
13
  'true' => JSONBooleanNode,
@@ -18,77 +18,18 @@ class JSONASTBuilder < Rley::Parser::ParseTreeBuilder
18
18
 
19
19
  protected
20
20
 
21
- def return_first_child(_range, _tokens, theChildren)
22
- return theChildren[0]
21
+ def terminal2node()
22
+ Terminal2NodeClass
23
23
  end
24
-
25
- def return_second_child(_range, _tokens, theChildren)
26
- return theChildren[1]
27
- end
28
-
29
- # Overriding method.
30
- # Create a parse tree object with given
31
- # node as root node.
32
- def create_tree(aRootNode)
33
- return Rley::PTree::ParseTree.new(aRootNode)
34
- end
35
-
36
- # Overriding method.
37
- # Factory method for creating a node object for the given
38
- # input token.
39
- # @param terminal [Terminal] Terminal symbol associated with the token
40
- # @param aTokenPosition [Integer] Position of token in the input stream
41
- # @param aToken [Token] The input token
42
- def new_leaf_node(_production, terminal, aTokenPosition, aToken)
43
- klass = Terminal2NodeClass.fetch(terminal.name, JSONTerminalNode)
44
- return klass.new(aToken, aTokenPosition)
24
+
25
+ # Default class for representing terminal nodes.
26
+ # @return [Class]
27
+ def terminalnode_class()
28
+ JSONTerminalNode
45
29
  end
46
-
47
- # Method to override.
48
- # Factory method for creating a parent node object.
49
- # @param aProduction [Production] Production rule
50
- # @param aRange [Range] Range of tokens matched by the rule
51
- # @param theTokens [Array] The input tokens
52
- # @param theChildren [Array] Children nodes (one per rhs symbol)
53
- def new_parent_node(aProduction, aRange, theTokens, theChildren)
54
- node = case aProduction.name
55
- when 'JSON-text_0' # rule 'JSON-text' => 'value'
56
- return_first_child(aRange, theTokens, theChildren)
57
-
58
- when /value_\d/
59
- return_first_child(aRange, theTokens, theChildren)
60
-
61
- when 'object_0'
62
- reduce_object_0(aProduction, aRange, theTokens, theChildren)
63
-
64
- when 'object_1'
65
- reduce_object_1(aRange, theTokens, theChildren)
66
-
67
- when 'member-list_0'
68
- reduce_member_list_0(aRange, theTokens, theChildren)
69
-
70
- when 'member-list_1'
71
- reduce_member_list_1(aProduction, aRange, theTokens, theChildren)
72
-
73
- when 'member_0'
74
- reduce_member_0(aProduction, aRange, theTokens, theChildren)
75
-
76
- when 'array_0'
77
- reduce_array_0(aProduction, aRange, theTokens, theChildren)
78
-
79
- when 'array_1'
80
- reduce_array_1(aRange, theTokens, theChildren)
81
-
82
- when 'array-items_0'
83
- reduce_array_items_0(aRange, theTokens, theChildren)
84
-
85
- when 'array-items_1'
86
- reduce_array_items_1(aProduction, aRange, theTokens, theChildren)
87
- else
88
- raise StandardError, "Don't know production #{aProduction.name}"
89
- end
90
-
91
- return node
30
+
31
+ def reduce_JSON_text_0(_aProd, _range, _tokens, theChildren)
32
+ return_first_child(_range, _tokens, theChildren)
92
33
  end
93
34
 
94
35
  # rule 'object' => %w[begin-object member-list end-object]
@@ -1,8 +1,9 @@
1
1
  require_relative 'cli_options'
2
- require_relative 'json_parser'
2
+ require_relative 'json_lexer'
3
3
  require_relative 'json_minifier'
4
4
  require_relative 'json_ast_builder'
5
5
 
6
+
6
7
  prog_name = 'json_demo'
7
8
  prog_version = '0.3.0'
8
9
 
@@ -14,16 +15,6 @@ if ARGV.empty?
14
15
  end
15
16
 
16
17
  file_name = ARGV[0]
17
- # Create a JSON parser object
18
- parser = JSONParser.new
19
- result = parser.parse_file(file_name) # result object contains parse details
20
-
21
- unless result.success?
22
- # Stop if parse failed...
23
- puts "Parsing of '#{file_name}' failed"
24
- puts result.failure_reason.message
25
- exit(1)
26
- end
27
18
 
28
19
  tree_rep = cli_options[:rep]
29
20
  renderer = nil
@@ -43,14 +34,41 @@ case cli_options[:format]
43
34
  raise StandardError, msg if tree_rep == :cst
44
35
  end
45
36
 
46
- tree_builder = tree_rep == :ast ? JSONASTBuilder : nil
37
+
38
+ # Create a Rley facade object
39
+ # If necessary, select AST representation
40
+ engine = Rley::Engine.new do |cfg|
41
+ builder = tree_rep == :ast ? JSONASTBuilder : nil
42
+ cfg.repr_builder = builder
43
+ end
44
+
45
+ ########################################
46
+ # Step 1. Load a grammar for JSON
47
+ require_relative 'json_grammar'
48
+ engine.use_grammar(GrammarJSON)
49
+
50
+
51
+ input_source = nil
52
+ File.open(file_name, 'r') { |f| input_source = f.read }
53
+ lexer = JSONLexer.new(input_source)
54
+
55
+ result = engine.parse(lexer.tokens)
56
+
57
+ unless result.success?
58
+ # Stop if parse failed...
59
+ puts "Parsing of '#{file_name}' failed"
60
+ puts result.failure_reason.message
61
+ exit(1)
62
+ end
63
+
64
+
47
65
 
48
66
  # Generate a parse tree from the parse result
49
- ptree = result.parse_tree(tree_builder)
67
+ ptree = engine.convert(result)
50
68
 
51
69
  if renderer
52
70
  # Let's create a parse tree visitor
53
- visitor = Rley::ParseTreeVisitor.new(ptree)
71
+ visitor = engine.ptree_visitor(ptree)
54
72
 
55
73
  # Now output formatted parse tree
56
74
  renderer.render(visitor)
@@ -13,7 +13,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
13
13
  add_terminals('begin-object', 'end-object') # For '{', '}' delimiters
14
14
  add_terminals('begin-array', 'end-array') # For '[', ']' delimiters
15
15
  add_terminals('name-separator', 'value-separator') # For ':', ',' separators
16
- rule 'JSON-text' => 'value'
16
+ rule 'JSON_text' => 'value'
17
17
  rule 'value' => 'false'
18
18
  rule 'value' => 'null'
19
19
  rule 'value' => 'true'
@@ -8,7 +8,6 @@ class JSONLexer
8
8
  attr_reader(:scanner)
9
9
  attr_reader(:lineno)
10
10
  attr_reader(:line_start)
11
- attr_reader(:name2symbol)
12
11
 
13
12
  @@lexeme2name = {
14
13
  '{' => 'begin-object',
@@ -21,9 +20,8 @@ class JSONLexer
21
20
 
22
21
  class ScanError < StandardError; end
23
22
 
24
- def initialize(source, aGrammar)
23
+ def initialize(source)
25
24
  @scanner = StringScanner.new(source)
26
- @name2symbol = aGrammar.name2symbol
27
25
  @lineno = 1
28
26
  end
29
27
 
@@ -49,8 +47,7 @@ class JSONLexer
49
47
 
50
48
  case curr_ch
51
49
  when '{', '}', '[', ']', ',', ':'
52
- type_name = @@lexeme2name[curr_ch]
53
- token_type = name2symbol[type_name]
50
+ token_type = @@lexeme2name[curr_ch]
54
51
  token = Rley::Lexical::Token.new(curr_ch, token_type)
55
52
 
56
53
  when /[ftn]/ # First letter of keywords
@@ -60,8 +57,7 @@ class JSONLexer
60
57
  invalid_keyw = scanner.scan(/\w+/)
61
58
  raise ScanError.new("Invalid keyword: #{invalid_keyw}")
62
59
  else
63
- token_type = name2symbol[keyw]
64
- token = Rley::Lexical::Token.new(keyw, token_type)
60
+ token = Rley::Lexical::Token.new(keyw, keyw)
65
61
  end
66
62
 
67
63
  # LITERALS
@@ -70,14 +66,12 @@ class JSONLexer
70
66
  end_delimiter = scanner.getch
71
67
  err_msg = 'No closing quotes (") found'
72
68
  raise ScanError.new(err_msg) if end_delimiter.nil?
73
- token_type = name2symbol['string']
74
- token = Rley::Lexical::Token.new(value, token_type)
69
+ token = Rley::Lexical::Token.new(value, 'string')
75
70
 
76
71
  when /[-0-9]/ # Start character of number literal found
77
72
  @scanner.pos = scanner.pos - 1 # Simulate putback
78
73
  value = scanner.scan(/-?[0-9]+(\.[0-9]+)?([eE][-+]?[0-9])?/)
79
- token_type = name2symbol['number']
80
- token = Rley::Lexical::Token.new(value, token_type)
74
+ token = Rley::Lexical::Token.new(value, 'number')
81
75
 
82
76
  else # Unknown token
83
77
  erroneous = curr_ch.nil? ? '' : curr_ch
@@ -1,5 +1,4 @@
1
1
  require 'stringio'
2
- require_relative 'ast_building'
3
2
  require_relative 'regex_repr'
4
3
 
5
4
  # The purpose of a ASTBuilder is to build piece by piece an AST
@@ -9,14 +8,17 @@ require_relative 'regex_repr'
9
8
  # The Builder pattern creates a complex object
10
9
  # (say, a parse tree) from simpler objects (terminal and non-terminal
11
10
  # nodes) and using a step by step approach.
12
- class ASTBuilder < Rley::Parser::ParseTreeBuilder
13
- include ASTBuilding
11
+ class ASTBuilder < Rley::ParseRep::ASTBaseBuilder
14
12
 
15
13
  Terminal2NodeClass = { }.freeze
16
14
 
17
15
  attr_reader :options
18
16
 
19
17
  protected
18
+
19
+ def terminal2node()
20
+ Terminal2NodeClass
21
+ end
20
22
 
21
23
  # Overriding method.
22
24
  # Factory method for creating a node object for the given
@@ -30,31 +32,6 @@ class ASTBuilder < Rley::Parser::ParseTreeBuilder
30
32
  return node
31
33
  end
32
34
 
33
- # Method to override.
34
- # Factory method for creating a parent node object.
35
- # @param aProduction [Production] Production rule
36
- # @param aRange [Range] Range of tokens matched by the rule
37
- # @param theTokens [Array] The input tokens
38
- # @param theChildren [Array] Children nodes (one per rhs symbol)
39
- def new_parent_node(aProduction, aRange, theTokens, theChildren)
40
- short_name = aProduction.name
41
- method_name = 'reduce_' + short_name
42
- if self.respond_to?(method_name, true)
43
- node = send(method_name, aProduction, aRange, theTokens, theChildren)
44
- else
45
- # Default action...
46
- node = case aProduction.rhs.size
47
- when 0
48
- nil
49
- when 1
50
- return_first_child(aRange, theTokens, theChildren)
51
- else
52
- raise StandardError, "Don't know production '#{aProduction.name}'"
53
- end
54
- end
55
- return node
56
- end
57
-
58
35
  def multiplicity(lowerBound, upperBound)
59
36
  return SRL::Regex::Multiplicity.new(lowerBound, upperBound, :greedy)
60
37
  end
@@ -15,7 +15,6 @@ module SRL
15
15
  attr_reader(:scanner)
16
16
  attr_reader(:lineno)
17
17
  attr_reader(:line_start)
18
- attr_reader(:name2symbol)
19
18
 
20
19
  @@lexeme2name = {
21
20
  '(' => 'LPAREN',
@@ -78,9 +77,8 @@ module SRL
78
77
 
79
78
  class ScanError < StandardError; end
80
79
 
81
- def initialize(source, aGrammar)
80
+ def initialize(source)
82
81
  @scanner = StringScanner.new(source)
83
- @name2symbol = aGrammar.name2symbol
84
82
  @lineno = 1
85
83
  end
86
84
 
@@ -132,9 +130,8 @@ module SRL
132
130
  end
133
131
 
134
132
  def build_token(aSymbolName, aLexeme)
135
- token_type = name2symbol[aSymbolName]
136
133
  begin
137
- token = Rley::Lexical::Token.new(aLexeme, token_type)
134
+ token = Rley::Lexical::Token.new(aLexeme, aSymbolName)
138
135
  rescue Exception => ex
139
136
  puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
140
137
  raise ex
@@ -1,19 +1,26 @@
1
1
  require_relative 'spec_helper' # Use the RSpec framework
2
- require_relative '../lib/parser'
2
+ require_relative '../lib/tokenizer'
3
+ require_relative '../lib/grammar'
3
4
  require_relative '../lib/ast_builder'
4
5
 
5
6
  describe 'Integration tests:' do
6
7
  def parse(someSRL)
7
- parser = SRL::Parser.new
8
- result = parser.parse_SRL(someSRL)
8
+ tokenizer = SRL::Tokenizer.new(someSRL)
9
+ @engine.parse(tokenizer.tokens)
9
10
  end
10
11
 
11
12
  def regexp_repr(aResult)
12
13
  # Generate an abstract syntax parse tree from the parse result
13
- regexp_expr_builder = ASTBuilder
14
- tree = aResult.parse_tree(regexp_expr_builder)
14
+ tree = @engine.convert(aResult)
15
15
  regexp = tree.root
16
16
  end
17
+
18
+ before(:each) do
19
+ @engine = Rley::Engine.new do |config|
20
+ config.repr_builder = ASTBuilder
21
+ end
22
+ @engine.use_grammar(SRL::Grammar)
23
+ end
17
24
 
18
25
  context 'Parsing character ranges:' do
19
26
  it "should parse 'letter from ... to ...' syntax" do
@@ -1,5 +1,4 @@
1
1
  require_relative 'spec_helper' # Use the RSpec framework
2
- require_relative '../lib/grammar'
3
2
  require_relative '../lib/tokenizer' # Load the class under test
4
3
 
5
4
 
@@ -8,18 +7,18 @@ module SRL
8
7
  def match_expectations(aTokenizer, theExpectations)
9
8
  aTokenizer.tokens.each_with_index do |token, i|
10
9
  terminal, lexeme = theExpectations[i]
11
- expect(token.terminal.name).to eq(terminal)
10
+ expect(token.terminal).to eq(terminal)
12
11
  expect(token.lexeme).to eq(lexeme)
13
12
  end
14
13
  end
15
14
 
16
15
 
17
- subject { Tokenizer.new('', SRL::Grammar) }
16
+ subject { Tokenizer.new('') }
18
17
 
19
18
  context 'Initialization:' do
20
19
 
21
20
  it 'should be initialized with a text to tokenize and a grammar' do
22
- expect { Tokenizer.new('anything', SRL::Grammar) }.not_to raise_error
21
+ expect { Tokenizer.new('anything') }.not_to raise_error
23
22
  end
24
23
 
25
24
  it 'should have its scanner initialized' do
@@ -28,20 +27,20 @@ module SRL
28
27
  end # context
29
28
 
30
29
  context 'Single token recognition:' do
31
- # it 'should tokenize delimiters and separators' do
32
- # subject.scanner.string = ','
33
- # token = subject.tokens.first
34
- # expect(token).to be_kind_of(Rley::Lexical::Token)
35
- # expect(token.terminal.name).to eq('COMMA')
36
- # expect(token.lexeme).to eq(',')
37
- # end
30
+ it 'should tokenize delimiters and separators' do
31
+ subject.scanner.string = ','
32
+ token = subject.tokens.first
33
+ expect(token).to be_kind_of(Rley::Lexical::Token)
34
+ expect(token.terminal).to eq('COMMA')
35
+ expect(token.lexeme).to eq(',')
36
+ end
38
37
 
39
38
  it 'should tokenize keywords' do
40
39
  sample = 'between Exactly oncE optional TWICE'
41
40
  subject.scanner.string = sample
42
41
  subject.tokens.each do |tok|
43
42
  expect(tok).to be_kind_of(Rley::Lexical::Token)
44
- expect(tok.terminal.name).to eq(tok.lexeme.upcase)
43
+ expect(tok.terminal).to eq(tok.lexeme.upcase)
45
44
  end
46
45
  end
47
46
 
@@ -49,7 +48,7 @@ module SRL
49
48
  subject.scanner.string = ' 123 '
50
49
  token = subject.tokens.first
51
50
  expect(token).to be_kind_of(Rley::Lexical::Token)
52
- expect(token.terminal.name).to eq('INTEGER')
51
+ expect(token.terminal).to eq('INTEGER')
53
52
  expect(token.lexeme).to eq('123')
54
53
  end
55
54
 
@@ -57,7 +56,7 @@ module SRL
57
56
  subject.scanner.string = ' 1 '
58
57
  token = subject.tokens.first
59
58
  expect(token).to be_kind_of(Rley::Lexical::Token)
60
- expect(token.terminal.name).to eq('DIGIT_LIT')
59
+ expect(token.terminal).to eq('DIGIT_LIT')
61
60
  expect(token.lexeme).to eq('1')
62
61
  end
63
62
  end # context
@@ -1,4 +1,5 @@
1
- require_relative './lib/parser'
1
+ require_relative './lib/tokenizer'
2
+ require_relative './lib/grammar'
2
3
  require_relative './lib/ast_builder'
3
4
 
4
5
  def print_title(aTitle)
@@ -17,9 +18,6 @@ def print_tree(aTitle, aParseTree)
17
18
  puts ''
18
19
  end
19
20
 
20
- # Create a calculator parser object
21
- parser = SRL::Parser.new
22
-
23
21
  # Parse the input expression in command-line
24
22
  if ARGV.empty?
25
23
  my_name = File.basename(__FILE__)
@@ -42,7 +40,16 @@ END_MSG
42
40
  exit(1)
43
41
  end
44
42
  puts ARGV[0]
45
- result = parser.parse_SRL(ARGV[0])
43
+
44
+ # Create a Rley facade object
45
+ engine = Rley::Engine.new
46
+
47
+ ########################################
48
+ # Step 1. Load a grammar for calculator
49
+ engine.use_grammar(SRL::Grammar)
50
+
51
+ lexer = SRL::Tokenizer.new(ARGV[0])
52
+ result = engine.parse(lexer.tokens)
46
53
 
47
54
  unless result.success?
48
55
  # Stop if the parse failed...
@@ -53,12 +60,12 @@ end
53
60
 
54
61
 
55
62
  # Generate a concrete syntax parse tree from the parse result
56
- cst_ptree = result.parse_tree
63
+ cst_ptree = engine.convert(result)
57
64
  print_tree('Concrete Syntax Tree (CST)', cst_ptree)
58
65
 
59
- # Generate a regexp literal representation from the parse result
60
- tree_builder = ASTBuilder
61
- ast_ptree = result.parse_tree(tree_builder)
66
+ # Generate an abstract syntax tree (AST) from the parse result
67
+ engine.configuration.repr_builder = ASTBuilder
68
+ ast_ptree = engine.convert(result)
62
69
 
63
70
  # Now output the regexp literal
64
71
  root = ast_ptree.root