rley 0.5.14 → 0.6.00

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -2
  3. data/README.md +29 -31
  4. data/examples/NLP/benchmark_pico_en.rb +34 -34
  5. data/examples/NLP/engtagger.rb +1 -1
  6. data/examples/NLP/nano_eng/nano_en_demo.rb +23 -28
  7. data/examples/NLP/nano_eng/nano_grammar.rb +1 -1
  8. data/examples/NLP/pico_en_demo.rb +28 -31
  9. data/examples/data_formats/JSON/json_ast_builder.rb +11 -70
  10. data/examples/data_formats/JSON/json_demo.rb +32 -14
  11. data/examples/data_formats/JSON/json_grammar.rb +1 -1
  12. data/examples/data_formats/JSON/json_lexer.rb +5 -11
  13. data/examples/general/SRL/lib/ast_builder.rb +5 -28
  14. data/examples/general/SRL/lib/tokenizer.rb +2 -5
  15. data/examples/general/SRL/spec/integration_spec.rb +12 -5
  16. data/examples/general/SRL/spec/tokenizer_spec.rb +13 -14
  17. data/examples/general/SRL/srl_demo.rb +16 -9
  18. data/examples/general/calc_iter1/calc_ast_builder.rb +29 -85
  19. data/examples/general/calc_iter1/calc_demo.rb +15 -6
  20. data/examples/general/calc_iter1/calc_lexer.rb +2 -5
  21. data/examples/general/calc_iter1/spec/calculator_spec.rb +18 -19
  22. data/examples/general/calc_iter2/calc_ast_builder.rb +9 -107
  23. data/examples/general/calc_iter2/calc_demo.rb +15 -8
  24. data/examples/general/calc_iter2/calc_lexer.rb +3 -5
  25. data/examples/general/calc_iter2/spec/calculator_spec.rb +18 -31
  26. data/lib/rley.rb +2 -1
  27. data/lib/rley/constants.rb +1 -1
  28. data/lib/rley/engine.rb +122 -0
  29. data/lib/rley/parse_rep/ast_base_builder.rb +128 -0
  30. data/lib/rley/{parser → parse_rep}/cst_builder.rb +1 -1
  31. data/lib/rley/{parser → parse_rep}/parse_forest_builder.rb +1 -1
  32. data/lib/rley/{parser → parse_rep}/parse_forest_factory.rb +2 -2
  33. data/lib/rley/{parser → parse_rep}/parse_rep_creator.rb +3 -3
  34. data/lib/rley/{parser → parse_rep}/parse_tree_builder.rb +4 -4
  35. data/lib/rley/{parser → parse_rep}/parse_tree_factory.rb +1 -1
  36. data/lib/rley/parser/gfg_parsing.rb +16 -4
  37. data/spec/rley/engine_spec.rb +127 -0
  38. data/spec/rley/formatter/asciitree_spec.rb +11 -13
  39. data/spec/rley/formatter/bracket_notation_spec.rb +11 -13
  40. data/spec/rley/formatter/debug_spec.rb +13 -15
  41. data/spec/rley/formatter/json_spec.rb +10 -14
  42. data/spec/rley/{parser → parse_rep}/ambiguous_parse_spec.rb +3 -3
  43. data/spec/rley/{parser → parse_rep}/ast_builder_spec.rb +34 -83
  44. data/spec/rley/{parser → parse_rep}/cst_builder_spec.rb +3 -3
  45. data/spec/rley/{parser → parse_rep}/groucho_spec.rb +3 -3
  46. data/spec/rley/{parser → parse_rep}/parse_forest_builder_spec.rb +4 -4
  47. data/spec/rley/{parser → parse_rep}/parse_forest_factory_spec.rb +2 -2
  48. data/spec/rley/{parser → parse_rep}/parse_tree_factory_spec.rb +2 -2
  49. data/spec/rley/parse_tree_visitor_spec.rb +12 -15
  50. data/spec/rley/support/ast_builder.rb +403 -0
  51. data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
  52. metadata +27 -28
  53. data/examples/data_formats/JSON/json_parser.rb +0 -46
  54. data/examples/general/SRL/lib/ast_building.rb +0 -20
  55. data/examples/general/SRL/lib/parser.rb +0 -26
  56. data/examples/general/calc_iter1/calc_parser.rb +0 -24
  57. data/examples/general/calc_iter2/ast_building.rb +0 -20
  58. data/examples/general/calc_iter2/calc_parser.rb +0 -24
@@ -7,96 +7,15 @@ require_relative 'calc_ast_nodes'
7
7
  # The Builder pattern creates a complex object
8
8
  # (say, a parse tree) from simpler objects (terminal and non-terminal
9
9
  # nodes) and using a step by step approach.
10
- class CalcASTBuilder < Rley::Parser::ParseTreeBuilder
10
+ class CalcASTBuilder < Rley::ParseRep::ASTBaseBuilder
11
11
  Terminal2NodeClass = {
12
12
  'NUMBER' => CalcNumberNode
13
13
  }.freeze
14
14
 
15
15
  protected
16
-
17
- def return_first_child(_range, _tokens, theChildren)
18
- return theChildren[0]
19
- end
20
-
21
- def return_second_child(_range, _tokens, theChildren)
22
- return theChildren[1]
23
- end
24
-
25
- def return_last_child(_range, _tokens, theChildren)
26
- return theChildren[-1]
27
- end
28
-
29
- # Overriding method.
30
- # Create a parse tree object with given
31
- # node as root node.
32
- def create_tree(aRootNode)
33
- return Rley::PTree::ParseTree.new(aRootNode)
34
- end
35
-
36
- # Overriding method.
37
- # Factory method for creating a node object for the given
38
- # input token.
39
- # @param aTerminal [Terminal] Terminal symbol associated with the token
40
- # @param aTokenPosition [Integer] Position of token in the input stream
41
- # @param aToken [Token] The input token
42
- def new_leaf_node(_production, aTerminal, aTokenPosition, aToken)
43
- klass = Terminal2NodeClass.fetch(aTerminal.name, CalcTerminalNode)
44
- node = if klass
45
- klass.new(aToken, aTokenPosition)
46
- else
47
- PTree::TerminalNode.new(aToken, aTokenPosition)
48
- end
49
-
50
- return node
51
- end
52
-
53
- # Method to override.
54
- # Factory method for creating a parent node object.
55
- # @param aProduction [Production] Production rule
56
- # @param aRange [Range] Range of tokens matched by the rule
57
- # @param theTokens [Array] The input tokens
58
- # @param theChildren [Array] Children nodes (one per rhs symbol)
59
- def new_parent_node(aProduction, aRange, theTokens, theChildren)
60
- node = case aProduction.name
61
- when 'expression_0' # rule 'expression' => 'simple_expression'
62
- return_first_child(aRange, theTokens, theChildren)
63
-
64
- when 'simple_expression_0' # rule 'simple_expression' => 'term'
65
- return_first_child(aRange, theTokens, theChildren)
66
-
67
- when 'simple_expression_1'
68
- # rule 'simple_expression' => %w[simple_expression add_operator term]
69
- reduce_simple_expression_1(aProduction, aRange, theTokens, theChildren)
70
-
71
- when 'term_0' # rule 'term' => 'factor'
72
- return_first_child(aRange, theTokens, theChildren)
73
-
74
- when 'term_1' # rule 'term' => %w[term mul_operator factor]
75
- reduce_term_1(aProduction, aRange, theTokens, theChildren)
76
-
77
- when 'factor_0' # rule 'factor' => 'NUMBER'
78
- return_first_child(aRange, theTokens, theChildren)
79
-
80
- when 'factor_1' # rule 'factor' => %w[LPAREN expression RPAREN]
81
- return_second_child(aRange, theTokens, theChildren)
82
-
83
- when 'add_operator_0' # rule 'add_operator' => 'PLUS'
84
- reduce_add_operator_0(aProduction, aRange, theTokens, theChildren)
85
-
86
- when 'add_operator_1' # rule 'add_operator' => 'MINUS'
87
- reduce_add_operator_1(aProduction, aRange, theTokens, theChildren)
88
-
89
- when 'mul_operator_0' # rule 'mul_operator' => 'STAR'
90
- reduce_mul_operator_0(aProduction, aRange, theTokens, theChildren)
91
-
92
- when 'mul_operator_1' # rule 'mul_operator' => 'DIVIDE'
93
- reduce_mul_operator_1(aProduction, aRange, theTokens, theChildren)
94
-
95
- else
96
- raise StandardError, "Don't know production #{aProduction.name}"
97
- end
98
-
99
- return node
16
+
17
+ def terminal2node()
18
+ Terminal2NodeClass
100
19
  end
101
20
 
102
21
  def reduce_binary_operator(theChildren)
@@ -105,16 +24,41 @@ class CalcASTBuilder < Rley::Parser::ParseTreeBuilder
105
24
  operator_node.children << theChildren[2]
106
25
  return operator_node
107
26
  end
27
+
28
+ # rule 'expression' => 'simple_expression'
29
+ def reduce_expression_0(_aProd, _range, _tokens, theChildren)
30
+ return_first_child(_range, _tokens, theChildren)
31
+ end
32
+
33
+ # rule 'simple_expression' => 'term'
34
+ def reduce_simple_expression_0(_aProd, _range, _tokens, theChildren)
35
+ return_first_child(_range, _tokens, theChildren)
36
+ end
108
37
 
109
38
  # rule 'simple_expression' => %w[simple_expression add_operator term]
110
39
  def reduce_simple_expression_1(_production, _range, _tokens, theChildren)
111
40
  reduce_binary_operator(theChildren)
112
41
  end
42
+
43
+ # rule 'term' => 'factor'
44
+ def reduce_term_0(_aProd, _range, _tokens, theChildren)
45
+ return_first_child(_range, _tokens, theChildren)
46
+ end
113
47
 
114
48
  # rule 'term' => %w[term mul_operator factor]
115
49
  def reduce_term_1(_production, _range, _tokens, theChildren)
116
50
  reduce_binary_operator(theChildren)
117
51
  end
52
+
53
+ # rule 'factor' => 'NUMBER'
54
+ def reduce_factor_0(_aProd, _range, _tokens, theChildren)
55
+ return_first_child(_range, _tokens, theChildren)
56
+ end
57
+
58
+ # # rule 'factor' => %w[LPAREN expression RPAREN]
59
+ def reduce_factor_1(_aProd, _range, _tokens, theChildren)
60
+ return_second_child(_range, _tokens, theChildren)
61
+ end
118
62
 
119
63
  # rule 'add_operator' => 'PLUS'
120
64
  def reduce_add_operator_0(_production, _range, _tokens, theChildren)
@@ -1,4 +1,4 @@
1
- require_relative 'calc_parser'
1
+ require_relative 'calc_lexer'
2
2
  require_relative 'calc_ast_builder'
3
3
 
4
4
  # Retrieve input expression to parse from command-line
@@ -17,9 +17,19 @@ END_MSG
17
17
  exit(1)
18
18
  end
19
19
 
20
- # Create a calculator parser object
21
- parser = CalcParser.new
22
- result = parser.parse_expression(ARGV[0])
20
+ # Create a Rley facade object
21
+ engine = Rley::Engine.new do |cfg|
22
+ cfg.repr_builder = CalcASTBuilder
23
+ end
24
+
25
+ ########################################
26
+ # Step 1. Load a grammar for calculator
27
+ require_relative 'calc_grammar'
28
+ engine.use_grammar(CalcGrammar)
29
+
30
+
31
+ lexer = CalcLexer.new(ARGV[0])
32
+ result = engine.parse(lexer.tokens)
23
33
 
24
34
  unless result.success?
25
35
  # Stop if the parse failed...
@@ -28,10 +38,9 @@ unless result.success?
28
38
  exit(1)
29
39
  end
30
40
 
31
- tree_builder = CalcASTBuilder
32
41
 
33
42
  # Generate a parse tree from the parse result
34
- ptree = result.parse_tree(tree_builder)
43
+ ptree = engine.to_ptree(result)
35
44
 
36
45
  root = ptree.root
37
46
  puts root.interpret # Output the expression result
@@ -8,7 +8,6 @@ class CalcLexer
8
8
  attr_reader(:scanner)
9
9
  attr_reader(:lineno)
10
10
  attr_reader(:line_start)
11
- attr_reader(:name2symbol)
12
11
 
13
12
  @@lexeme2name = {
14
13
  '(' => 'LPAREN',
@@ -22,9 +21,8 @@ class CalcLexer
22
21
 
23
22
  class ScanError < StandardError; end
24
23
 
25
- def initialize(source, aGrammar)
24
+ def initialize(source)
26
25
  @scanner = StringScanner.new(source)
27
- @name2symbol = aGrammar.name2symbol
28
26
  @lineno = 1
29
27
  end
30
28
 
@@ -70,8 +68,7 @@ class CalcLexer
70
68
  end
71
69
 
72
70
  def build_token(aSymbolName, aLexeme)
73
- token_type = name2symbol[aSymbolName]
74
- return Rley::Lexical::Token.new(aLexeme, token_type)
71
+ return Rley::Lexical::Token.new(aLexeme, aSymbolName)
75
72
  end
76
73
 
77
74
  def skip_whitespaces()
@@ -1,13 +1,25 @@
1
1
  require 'rspec' # Use the RSpec framework
2
- require_relative '../calc_parser' # Load the class under test
2
+ require_relative '../calc_lexer'
3
+ require_relative '../calc_grammar'
3
4
  require_relative '../calc_ast_builder'
4
5
 
5
6
 
6
7
  describe 'Calculator' do
7
- def parse_expression(anExpression)
8
- # Create a calculator parser object
9
- parser = CalcParser.new
10
- result = parser.parse_expression(anExpression)
8
+ def expect_expr(anExpression)
9
+ # Create a Rley facade object
10
+ engine = Rley::Engine.new do |cfg|
11
+ cfg.repr_builder = CalcASTBuilder
12
+ end
13
+
14
+ engine.use_grammar(CalcGrammar)
15
+ raw_result = parse_expression(engine, anExpression)
16
+ ast = engine.to_ptree(raw_result)
17
+ return expect(ast.root.interpret)
18
+ end
19
+
20
+ def parse_expression(anEngine, anExpression)
21
+ lexer = CalcLexer.new(anExpression)
22
+ result = anEngine.parse(lexer.tokens)
11
23
 
12
24
  unless result.success?
13
25
  # Stop if the parse failed...
@@ -17,20 +29,7 @@ describe 'Calculator' do
17
29
  end
18
30
 
19
31
  return result
20
- end
21
-
22
- def build_ast(aParseResult)
23
- tree_builder = CalcASTBuilder
24
- # Generate an abstract syntax tree from the parse result
25
- ast = aParseResult.parse_tree(tree_builder)
26
- return ast.root
27
- end
28
-
29
- def expect_expr(anExpression)
30
- parsing = parse_expression(anExpression)
31
- ast = build_ast(parsing)
32
- return expect(ast.interpret)
33
- end
32
+ end
34
33
 
35
34
  it 'should evaluate simple number literals' do
36
35
  expect_expr('2').to eq(2)
@@ -1,4 +1,3 @@
1
- require_relative 'ast_building'
2
1
  require_relative 'calc_ast_nodes'
3
2
 
4
3
  # The purpose of a CalcASTBuilder is to build piece by piece an AST
@@ -8,8 +7,7 @@ require_relative 'calc_ast_nodes'
8
7
  # The Builder pattern creates a complex object
9
8
  # (say, a parse tree) from simpler objects (terminal and non-terminal
10
9
  # nodes) and using a step by step approach.
11
- class CalcASTBuilder < Rley::Parser::ParseTreeBuilder
12
- include ASTBuilding
10
+ class CalcASTBuilder < Rley::ParseRep::ASTBaseBuilder
13
11
 
14
12
  Terminal2NodeClass = {
15
13
  # Lexical ambiguity: minus sign represents two very concepts:
@@ -26,111 +24,10 @@ class CalcASTBuilder < Rley::Parser::ParseTreeBuilder
26
24
 
27
25
  protected
28
26
 
29
- # Overriding method.
30
- # Factory method for creating a node object for the given
31
- # input token.
32
- # @param aTerminal [Terminal] Terminal symbol associated with the token
33
- # @param aTokenPosition [Integer] Position of token in the input stream
34
- # @param aToken [Token] The input token
35
- def new_leaf_node(aProduction, aTerminal, aTokenPosition, aToken)
36
- klass = Terminal2NodeClass.fetch(aTerminal.name, CalcTerminalNode)
37
- node = if klass
38
- if klass.is_a?(Hash)
39
- # Lexical ambiguity...
40
- klass = klass.fetch(aProduction.name)
41
- end
42
- klass.new(aToken, aTokenPosition)
43
- else
44
- PTree::TerminalNode.new(aToken, aTokenPosition)
45
- end
46
-
47
- return node
27
+ def terminal2node()
28
+ Terminal2NodeClass
48
29
  end
49
30
 
50
- # Method to override.
51
- # Factory method for creating a parent node object.
52
- # @param aProduction [Production] Production rule
53
- # @param aRange [Range] Range of tokens matched by the rule
54
- # @param theTokens [Array] The input tokens
55
- # @param theChildren [Array] Children nodes (one per rhs symbol)
56
- def new_parent_node(aProduction, aRange, theTokens, theChildren)
57
- node = case aProduction.name
58
- when 'expression_0' # rule 'expression' => %w[simple_expression]
59
- return_first_child(aRange, theTokens, theChildren)
60
-
61
- when 'simple_expression_0' # rule 'simple_expression' => 'term'
62
- return_first_child(aRange, theTokens, theChildren)
63
-
64
- when 'simple_expression_1'
65
- # rule 'simple_expression' => %w[simple_expression add_operator term]
66
- reduce_simple_expression_1(aProduction, aRange, theTokens, theChildren)
67
-
68
- when 'term_0' # rule 'term' => 'factor'
69
- return_first_child(aRange, theTokens, theChildren)
70
-
71
- when 'term_1' # rule 'term' => %w[term mul_operator factor]
72
- reduce_term_1(aProduction, aRange, theTokens, theChildren)
73
-
74
- when 'factor_0' # rule 'factor' => 'simple_factor'
75
- return_first_child(aRange, theTokens, theChildren)
76
-
77
- when 'factor_1' # rule 'factor' => %w[factor POWER simple_factor]
78
- reduce_factor_1(aProduction, aRange, theTokens, theChildren)
79
-
80
- when 'simple_factor_0' # rule 'simple_factor' => %[sign scalar]
81
- reduce_simple_factor_0(aProduction, aRange, theTokens, theChildren)
82
-
83
- when 'simple_factor_1' # rule 'simple_factor' => %w[unary_function in_parenthesis]
84
- reduce_simple_factor_1(aProduction, aRange, theTokens, theChildren)
85
-
86
- when 'simple_factor_2' # rule 'simple_factor' => %w[MINUS in_parenthesis]
87
- reduce_simple_factor_2(aProduction, aRange, theTokens, theChildren)
88
-
89
- when 'simple_factor_3' # rule 'simple_factor' => 'in_parenthesis'
90
- return_first_child(aRange, theTokens, theChildren)
91
-
92
- when 'sign_0' # rule 'sign' => 'PLUS'
93
- return_first_child(aRange, theTokens, theChildren)
94
-
95
- when 'sign_1' # rule 'sign' => 'MINUS'
96
- return_first_child(aRange, theTokens, theChildren)
97
-
98
- when 'sign_2' # rule 'sign' => []
99
- return_epsilon(aRange, theTokens, theChildren)
100
-
101
- when 'scalar_0' # rule 'scalar' => 'NUMBER'
102
- return_first_child(aRange, theTokens, theChildren)
103
-
104
- when 'scalar_1' # rule 'scalar' => 'PI'
105
- return_first_child(aRange, theTokens, theChildren)
106
-
107
- when 'scalar_2' # rule 'scalar' => 'E'
108
- return_first_child(aRange, theTokens, theChildren)
109
-
110
- when 'unary_function_0' # rule 'unary_function' => 'RESERVED'
111
- return_first_child(aRange, theTokens, theChildren)
112
-
113
- when 'in_parenthesis_0' # rule 'in_parenthesis' => %w[LPAREN expression RPAREN]
114
- return_second_child(aRange, theTokens, theChildren)
115
-
116
- when 'add_operator_0' # rule 'add_operator' => 'PLUS'
117
- reduce_add_operator_0(aProduction, aRange, theTokens, theChildren)
118
-
119
- when 'add_operator_1' # rule 'add_operator' => 'MINUS'
120
- reduce_add_operator_1(aProduction, aRange, theTokens, theChildren)
121
-
122
- when 'mul_operator_0' # rule 'mul_operator' => 'STAR'
123
- reduce_mul_operator_0(aProduction, aRange, theTokens, theChildren)
124
-
125
- when 'mul_operator_1' # rule 'mul_operator' => 'DIVIDE'
126
- reduce_mul_operator_1(aProduction, aRange, theTokens, theChildren)
127
-
128
- else
129
- raise StandardError, "Don't know production #{aProduction.name}"
130
- end
131
-
132
- return node
133
- end
134
31
 
135
32
  def reduce_binary_operator(theChildren)
136
33
  operator_node = theChildren[1]
@@ -179,12 +76,17 @@ class CalcASTBuilder < Rley::Parser::ParseTreeBuilder
179
76
  end
180
77
 
181
78
  # rule 'simple_factor' => %w[MINUS in_parenthesis]
182
- def reduce_simple_factor_2(aProduction, aRange, theTokens, theChildren)
79
+ def reduce_simple_factor_2(aProduction, aRange, _tokens, theChildren)
183
80
  negation = CalcNegateNode.new(theChildren[0].symbol, aRange.low)
184
81
  negation.children << theChildren[1]
185
82
  return negation
186
83
  end
187
84
 
85
+ # rule 'in_parenthesis' => %w[LPAREN expression RPAREN]
86
+ def reduce_in_parenthesis_0(_production, _range, _tokens, theChildren)
87
+ return_second_child(_range, _tokens, theChildren)
88
+ end
89
+
188
90
  # rule 'add_operator' => 'PLUS'
189
91
  def reduce_add_operator_0(_production, aRange, _tokens, theChildren)
190
92
  return CalcAddNode.new(theChildren[0].symbol, aRange)
@@ -1,4 +1,4 @@
1
- require_relative 'calc_parser'
1
+ require_relative 'calc_lexer'
2
2
  require_relative 'calc_ast_builder'
3
3
 
4
4
  def print_title(aTitle)
@@ -17,9 +17,6 @@ def print_tree(aTitle, aParseTree)
17
17
  puts ''
18
18
  end
19
19
 
20
- # Create a calculator parser object
21
- parser = CalcParser.new
22
-
23
20
  # Parse the input expression in command-line
24
21
  if ARGV.empty?
25
22
  my_name = File.basename(__FILE__)
@@ -41,7 +38,17 @@ END_MSG
41
38
  exit(1)
42
39
  end
43
40
  puts ARGV[0]
44
- result = parser.parse_expression(ARGV[0])
41
+
42
+ # Create a Rley facade object
43
+ engine = Rley::Engine.new
44
+
45
+ ########################################
46
+ # Step 1. Load a grammar for calculator
47
+ require_relative 'calc_grammar'
48
+ engine.use_grammar(CalcGrammar)
49
+
50
+ lexer = CalcLexer.new(ARGV[0])
51
+ result = engine.parse(lexer.tokens)
45
52
 
46
53
  unless result.success?
47
54
  # Stop if the parse failed...
@@ -52,12 +59,12 @@ end
52
59
 
53
60
 
54
61
  # Generate a concrete syntax parse tree from the parse result
55
- cst_ptree = result.parse_tree
62
+ cst_ptree = engine.convert(result)
56
63
  print_tree('Concrete Syntax Tree (CST)', cst_ptree)
57
64
 
58
65
  # Generate an abstract syntax parse tree from the parse result
59
- tree_builder = CalcASTBuilder
60
- ast_ptree = result.parse_tree(tree_builder)
66
+ engine.configuration.repr_builder = CalcASTBuilder
67
+ ast_ptree = engine.convert(result)
61
68
  print_tree('Abstract Syntax Tree (AST)', ast_ptree)
62
69
 
63
70
  # Now perform the computation of math expression