rley 0.5.14 → 0.6.00

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +12 -2
  3. data/README.md +29 -31
  4. data/examples/NLP/benchmark_pico_en.rb +34 -34
  5. data/examples/NLP/engtagger.rb +1 -1
  6. data/examples/NLP/nano_eng/nano_en_demo.rb +23 -28
  7. data/examples/NLP/nano_eng/nano_grammar.rb +1 -1
  8. data/examples/NLP/pico_en_demo.rb +28 -31
  9. data/examples/data_formats/JSON/json_ast_builder.rb +11 -70
  10. data/examples/data_formats/JSON/json_demo.rb +32 -14
  11. data/examples/data_formats/JSON/json_grammar.rb +1 -1
  12. data/examples/data_formats/JSON/json_lexer.rb +5 -11
  13. data/examples/general/SRL/lib/ast_builder.rb +5 -28
  14. data/examples/general/SRL/lib/tokenizer.rb +2 -5
  15. data/examples/general/SRL/spec/integration_spec.rb +12 -5
  16. data/examples/general/SRL/spec/tokenizer_spec.rb +13 -14
  17. data/examples/general/SRL/srl_demo.rb +16 -9
  18. data/examples/general/calc_iter1/calc_ast_builder.rb +29 -85
  19. data/examples/general/calc_iter1/calc_demo.rb +15 -6
  20. data/examples/general/calc_iter1/calc_lexer.rb +2 -5
  21. data/examples/general/calc_iter1/spec/calculator_spec.rb +18 -19
  22. data/examples/general/calc_iter2/calc_ast_builder.rb +9 -107
  23. data/examples/general/calc_iter2/calc_demo.rb +15 -8
  24. data/examples/general/calc_iter2/calc_lexer.rb +3 -5
  25. data/examples/general/calc_iter2/spec/calculator_spec.rb +18 -31
  26. data/lib/rley.rb +2 -1
  27. data/lib/rley/constants.rb +1 -1
  28. data/lib/rley/engine.rb +122 -0
  29. data/lib/rley/parse_rep/ast_base_builder.rb +128 -0
  30. data/lib/rley/{parser → parse_rep}/cst_builder.rb +1 -1
  31. data/lib/rley/{parser → parse_rep}/parse_forest_builder.rb +1 -1
  32. data/lib/rley/{parser → parse_rep}/parse_forest_factory.rb +2 -2
  33. data/lib/rley/{parser → parse_rep}/parse_rep_creator.rb +3 -3
  34. data/lib/rley/{parser → parse_rep}/parse_tree_builder.rb +4 -4
  35. data/lib/rley/{parser → parse_rep}/parse_tree_factory.rb +1 -1
  36. data/lib/rley/parser/gfg_parsing.rb +16 -4
  37. data/spec/rley/engine_spec.rb +127 -0
  38. data/spec/rley/formatter/asciitree_spec.rb +11 -13
  39. data/spec/rley/formatter/bracket_notation_spec.rb +11 -13
  40. data/spec/rley/formatter/debug_spec.rb +13 -15
  41. data/spec/rley/formatter/json_spec.rb +10 -14
  42. data/spec/rley/{parser → parse_rep}/ambiguous_parse_spec.rb +3 -3
  43. data/spec/rley/{parser → parse_rep}/ast_builder_spec.rb +34 -83
  44. data/spec/rley/{parser → parse_rep}/cst_builder_spec.rb +3 -3
  45. data/spec/rley/{parser → parse_rep}/groucho_spec.rb +3 -3
  46. data/spec/rley/{parser → parse_rep}/parse_forest_builder_spec.rb +4 -4
  47. data/spec/rley/{parser → parse_rep}/parse_forest_factory_spec.rb +2 -2
  48. data/spec/rley/{parser → parse_rep}/parse_tree_factory_spec.rb +2 -2
  49. data/spec/rley/parse_tree_visitor_spec.rb +12 -15
  50. data/spec/rley/support/ast_builder.rb +403 -0
  51. data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
  52. metadata +27 -28
  53. data/examples/data_formats/JSON/json_parser.rb +0 -46
  54. data/examples/general/SRL/lib/ast_building.rb +0 -20
  55. data/examples/general/SRL/lib/parser.rb +0 -26
  56. data/examples/general/calc_iter1/calc_parser.rb +0 -24
  57. data/examples/general/calc_iter2/ast_building.rb +0 -20
  58. data/examples/general/calc_iter2/calc_parser.rb +0 -24
@@ -7,7 +7,7 @@ class CalcLexer
7
7
  attr_reader(:scanner)
8
8
  attr_reader(:lineno)
9
9
  attr_reader(:line_start)
10
- attr_reader(:name2symbol)
10
+
11
11
 
12
12
  @@lexeme2name = {
13
13
  '(' => 'LPAREN',
@@ -30,9 +30,8 @@ class CalcLexer
30
30
 
31
31
  class ScanError < StandardError; end
32
32
 
33
- def initialize(source, aGrammar)
33
+ def initialize(source)
34
34
  @scanner = StringScanner.new(source)
35
- @name2symbol = aGrammar.name2symbol
36
35
  @lineno = 1
37
36
  end
38
37
 
@@ -78,8 +77,7 @@ class CalcLexer
78
77
  end
79
78
 
80
79
  def build_token(aSymbolName, aLexeme)
81
- token_type = name2symbol[aSymbolName]
82
- return Rley::Lexical::Token.new(aLexeme, token_type)
80
+ return Rley::Lexical::Token.new(aLexeme, aSymbolName)
83
81
  end
84
82
 
85
83
  def skip_whitespaces()
@@ -1,5 +1,6 @@
1
1
  require 'rspec' # Use the RSpec framework
2
- require_relative '../calc_parser' # Load the class under test
2
+ require_relative '../calc_lexer'
3
+ require_relative '../calc_grammar'
3
4
  require_relative '../calc_ast_builder'
4
5
 
5
6
  RSpec.configure do |config|
@@ -9,10 +10,21 @@ end
9
10
 
10
11
 
11
12
  describe 'Calculator' do
12
- def parse_expression(anExpression)
13
- # Create a calculator parser object
14
- parser = CalcParser.new
15
- result = parser.parse_expression(anExpression)
13
+ def expect_expr(anExpression)
14
+ # Create a Rley facade object
15
+ engine = Rley::Engine.new do |cfg|
16
+ cfg.repr_builder = CalcASTBuilder
17
+ end
18
+
19
+ engine.use_grammar(CalcGrammar)
20
+ raw_result = parse_expression(engine, anExpression)
21
+ ast = engine.to_ptree(raw_result)
22
+ return expect(ast.root.interpret)
23
+ end
24
+
25
+ def parse_expression(anEngine, anExpression)
26
+ lexer = CalcLexer.new(anExpression)
27
+ result = anEngine.parse(lexer.tokens)
16
28
 
17
29
  unless result.success?
18
30
  # Stop if the parse failed...
@@ -24,31 +36,6 @@ describe 'Calculator' do
24
36
  return result
25
37
  end
26
38
 
27
- def print_cst(aParseResult)
28
- # Generate a parse tree from the parse result
29
- ptree = aParseResult.parse_tree
30
-
31
- # Let's create a parse tree visitor
32
- visitor = Rley::ParseTreeVisitor.new(ptree)
33
-
34
- # Now output formatted parse tree
35
- renderer = Rley::Formatter::Asciitree.new($stdout)
36
- renderer.render(visitor)
37
- end
38
-
39
- def build_ast(aParseResult)
40
- tree_builder = CalcASTBuilder
41
- # Generate an abstract syntax tree from the parse result
42
- ast = aParseResult.parse_tree(tree_builder)
43
- return ast.root
44
- end
45
-
46
- def expect_expr(anExpression)
47
- parsing = parse_expression(anExpression)
48
- ast = build_ast(parsing)
49
- return expect(ast.interpret)
50
- end
51
-
52
39
  context 'Parsing valid expressions' do
53
40
  it 'should evaluate simple integer literals' do
54
41
  expect_expr('2').to eq(2)
@@ -143,7 +130,7 @@ describe 'Calculator' do
143
130
  end
144
131
 
145
132
  it 'should handle nested exponentiations' do
146
- expect_expr('2 ** 2**2)').to eq(16)
133
+ expect_expr('2 ** 2**2').to eq(16)
147
134
  end
148
135
 
149
136
  it 'should change sign of expression in parentheses' do
@@ -6,11 +6,12 @@ require_relative './rley/constants'
6
6
  require_relative './rley/syntax/grammar_builder'
7
7
  require_relative './rley/lexical/token'
8
8
  require_relative './rley/parser/gfg_earley_parser'
9
- require_relative './rley/parser/parse_tree_builder'
9
+ require_relative './rley/parse_rep/ast_base_builder'
10
10
  require_relative './rley/parse_tree_visitor'
11
11
  require_relative './rley/formatter/debug'
12
12
  require_relative './rley/formatter/json'
13
13
  require_relative './rley/formatter/asciitree'
14
14
  require_relative './rley/formatter/bracket_notation'
15
+ require_relative './rley/engine'
15
16
 
16
17
  # End of file
@@ -3,7 +3,7 @@
3
3
 
4
4
  module Rley # Module used as a namespace
5
5
  # The version number of the gem.
6
- Version = '0.5.14'.freeze
6
+ Version = '0.6.00'.freeze
7
7
 
8
8
  # Brief description of the gem.
9
9
  Description = "Ruby implementation of the Earley's parsing algorithm".freeze
@@ -0,0 +1,122 @@
1
+ require_relative './syntax/grammar_builder'
2
+ require_relative './parser/gfg_earley_parser'
3
+ require_relative './parse_rep/parse_tree_factory'
4
+
5
+ module Rley # This module is used as a namespace
6
+ EngineConfig = Struct.new(
7
+ :parse_repr,
8
+ :repr_builder
9
+ ) do
10
+ def initialize()
11
+ super()
12
+ self.parse_repr = :parse_tree
13
+ self.repr_builder = :default
14
+ end
15
+ end
16
+
17
+ # Implementation of the GoF Facade design pattern.
18
+ # an Engine object provides a higher-level interface that shields
19
+ # Rley client code from the lower-level classes.
20
+ class Engine
21
+ attr_reader :configuration
22
+ attr_reader :grammar
23
+
24
+ # Constructor.
25
+ # @param &aBlock [Proc, Lambda] Code block for setting the configuration.
26
+ def initialize(&aConfigBlock)
27
+ @configuration = EngineConfig.new
28
+
29
+ yield configuration if block_given?
30
+ end
31
+
32
+ # Factory method.
33
+ # @param &aBlock [Proc, Lambda] Code block for creating the grammar.
34
+ def build_grammar(&aBlock)
35
+ builder = Rley::Syntax::GrammarBuilder.new(&aBlock)
36
+ @grammar = builder.grammar
37
+ end
38
+
39
+ # Use the given grammar.
40
+ # @param aGrammar [Rley::Syntax::Grammar]
41
+ def use_grammar(aGrammar)
42
+ @grammar = aGrammar
43
+ end
44
+
45
+ # Parse the sequence of tokens produced by the given tokenizer object.
46
+ # @param aTokenizer [#each]
47
+ # @return [Parser::GFGParsing]
48
+ def parse(aTokenizer)
49
+ tokens = []
50
+ aTokenizer.each do |a_token|
51
+ if a_token
52
+ term_name = a_token.terminal
53
+ term_symb = grammar.name2symbol[term_name]
54
+ a_token.instance_variable_set(:@terminal, term_symb)
55
+ tokens << a_token
56
+ end
57
+ end
58
+ parser = build_parser(grammar)
59
+ return parser.parse(tokens)
60
+ end
61
+
62
+ # Convert raw parse result into a more convenient representation
63
+ # (parse tree or parse forest) as specified by the configuration.
64
+ # @param aRawParse [Parser::GFGParsing]
65
+ def convert(aRawParse)
66
+ result = case configuration.parse_repr
67
+ when :parse_tree
68
+ to_ptree(aRawParse)
69
+ when :parse_forest
70
+ to_pforest(aRawParse)
71
+ end
72
+
73
+ return result
74
+ end
75
+
76
+ # Convert raw parse result into a parse tree representation
77
+ # @param aRawParse [Parser::GFGParsing]
78
+ def to_ptree(aRawParse)
79
+ factory = ParseRep::ParseTreeFactory.new(aRawParse)
80
+ if configuration.repr_builder == :default
81
+ result = factory.create(nil)
82
+ else
83
+ result = factory.create(configuration.repr_builder)
84
+ end
85
+
86
+ return result
87
+ end
88
+
89
+ # Convert raw parse result into a parse forest representation
90
+ # @param aRawParse [Parser::GFGParsing]
91
+ # def to_pforest(aRawParse)
92
+ # factory = ParseRep::ParseForestFactory.new(aRawParse)
93
+ # if configuration.repr_builder == :default
94
+ # result = factory.create(nil)
95
+ # else
96
+ # result = factory.create(configuration.repr_builder)
97
+ # end
98
+
99
+ # return result
100
+ # end
101
+
102
+ # Build a visitor for the given parse tree
103
+ # @param aPTree[PTree::ParseTree]
104
+ # @return [ParseTreeVisitor]
105
+ def ptree_visitor(aPTree)
106
+ return Rley::ParseTreeVisitor.new(aPTree)
107
+ end
108
+
109
+ # @param aPTree[SPPF::ParseForest]
110
+ # @return [ParseForestVisitor]
111
+ # def pforest_visitor(aPForest)
112
+ # return Rley::ParseForestVisitor.new(aPForest)
113
+ # end
114
+
115
+ protected
116
+
117
+ def build_parser(aGrammar)
118
+ return Parser::GFGEarleyParser.new(aGrammar)
119
+ end
120
+ end # class
121
+ end # module
122
+
@@ -0,0 +1,128 @@
1
+ require_relative '../ptree/parse_tree'
2
+ require_relative 'parse_tree_builder'
3
+
4
+ module Rley # This module is used as a namespace
5
+ module ParseRep # This module is used as a namespace
6
+ # Abstract class (to be subclassed).
7
+ # The purpose of an ASTBaseBuilder is to build piece by piece an AST
8
+ # (Abstract Syntax Tree) from a sequence of input tokens and
9
+ # visit events produced by walking over a GFGParsing object.
10
+ # It is an implementation of the Builder GoF pattern.
11
+ # The Builder pattern creates a complex object
12
+ # (say, a parse tree) from simpler objects (terminal and non-terminal
13
+ # nodes) and using a step by step approach.
14
+ class ASTBaseBuilder < ParseTreeBuilder
15
+ # Method to override in subclass.
16
+ # Returns a Hash
17
+ # @return [Hash{String => Class}, Hash{String => Hash{String => Class}}]
18
+ # Returned hash contains pairs of the form:
19
+ # terminal name => Class implementing the terminal tokens
20
+ # terminal name => Hash with pairs: rule name => Class
21
+ def terminal2node()
22
+ raise NotImplementedError
23
+ end
24
+
25
+ # Method to override in subclass.
26
+ # Default class for representing terminal nodes.
27
+ # @return [Class]
28
+ def terminalnode_class()
29
+ PTree::TerminalNode
30
+ end
31
+
32
+ # Default method name to invoke when production
33
+ # with given name is invoked.
34
+ # Override this method for other method naming convention.
35
+ # @param aProductionName [String]
36
+ # @return [String]
37
+ def method_name(aProductionName)
38
+ return 'reduce_' + aProductionName
39
+ end
40
+
41
+ # Utility method.
42
+ # Simply return the first child node
43
+ # @param _range [Lexical::TokenRange]
44
+ # @param _tokens [Array<Lexical::Token>]
45
+ # @param theChildren [Array<Object>]
46
+ def return_first_child(_range, _tokens, theChildren)
47
+ return theChildren[0]
48
+ end
49
+
50
+ # Utility method.
51
+ # Simply return the second child node
52
+ # @param _range [Lexical::TokenRange]
53
+ # @param _tokens [Array<Lexical::Token>]
54
+ # @param theChildren [Array<Object>]
55
+ def return_second_child(_range, _tokens, theChildren)
56
+ return theChildren[1]
57
+ end
58
+
59
+ # Utility method.
60
+ # Simply return the last child node
61
+ # @param _range [Lexical::TokenRange]
62
+ # @param _tokens [Array<Lexical::Token>]
63
+ # @param theChildren [Array<Object>]
64
+ def return_last_child(_range, _tokens, theChildren)
65
+ return theChildren[-1]
66
+ end
67
+
68
+ # Simply return an epsilon symbol
69
+ # @param _range [Lexical::TokenRange]
70
+ # @param _tokens [Array<Lexical::Token>]
71
+ # @param _hildren [Array<Object>]
72
+ def return_epsilon(_range, _tokens, _children)
73
+ return nil
74
+ end
75
+
76
+ protected
77
+
78
+ # Overriding method.
79
+ # Create a parse tree object with given
80
+ # node as root node.
81
+ def create_tree(aRootNode)
82
+ return Rley::PTree::ParseTree.new(aRootNode)
83
+ end
84
+
85
+ # Factory method for creating a node object for the given
86
+ # input token.
87
+ # @param aTerminal [Syntax::Production] Relevant production rule
88
+ # @param aTerminal [Syntax::Terminal] Terminal symbol associated with the token
89
+ # @param aTokenPosition [Integer] Position of token in the input stream
90
+ # @param aToken [Lexical::Token] The input token
91
+ def new_leaf_node(aProduction, aTerminal, aTokenPosition, aToken)
92
+ klass = terminal2node.fetch(aTerminal.name, terminalnode_class)
93
+ if klass.is_a?(Hash)
94
+ # Lexical ambiguity...
95
+ klass = klass.fetch(aProduction.name)
96
+ end
97
+ node = klass.new(aToken, aTokenPosition)
98
+
99
+ return node
100
+ end
101
+
102
+ # Method to override.
103
+ # Factory method for creating a parent node object.
104
+ # @param aProduction [Production] Production rule
105
+ # @param aRange [Range] Range of tokens matched by the rule
106
+ # @param theTokens [Array] The input tokens
107
+ # @param theChildren [Array] Children nodes (one per rhs symbol)
108
+ def new_parent_node(aProduction, aRange, theTokens, theChildren)
109
+ mth_name = method_name(aProduction.name)
110
+ if self.respond_to?(mth_name, true)
111
+ node = send(mth_name, aProduction, aRange, theTokens, theChildren)
112
+ else
113
+ # Default action...
114
+ node = case aProduction.rhs.size
115
+ when 0
116
+ return_epsilon(aRange, theTokens, theChildren)
117
+ when 1
118
+ return_first_child(aRange, theTokens, theChildren)
119
+ else
120
+ raise StandardError, "Don't know production '#{aProduction.name}'"
121
+ end
122
+ end
123
+ return node
124
+ end
125
+ end # class
126
+ end # module
127
+ end # module
128
+ # End of file
@@ -4,7 +4,7 @@ require_relative '../ptree/terminal_node'
4
4
  require_relative '../ptree/parse_tree'
5
5
 
6
6
  module Rley # This module is used as a namespace
7
- module Parser # This module is used as a namespace
7
+ module ParseRep # This module is used as a namespace
8
8
  # The purpose of a CSTBuilder is to build piece by piece a CST
9
9
  # (Concrete Syntax Tree) from a sequence of input tokens and
10
10
  # visit events produced by walking over a GFGParsing object.
@@ -9,7 +9,7 @@ require_relative '../sppf/alternative_node'
9
9
  require_relative '../sppf/parse_forest'
10
10
 
11
11
  module Rley # This module is used as a namespace
12
- module Parser # This module is used as a namespace
12
+ module ParseRep # This module is used as a namespace
13
13
  # Builder GoF pattern. Builder pattern builds a complex object
14
14
  # (say, a parse forest) from simpler objects (terminal and non-terminal
15
15
  # nodes) and using a step by step approach.
@@ -2,7 +2,7 @@ require_relative 'parse_rep_creator'
2
2
  require_relative 'parse_forest_builder'
3
3
 
4
4
  module Rley # This module is used as a namespace
5
- module Parser # This module is used as a namespace
5
+ module ParseRep # This module is used as a namespace
6
6
  # Utility class that helps to create a ParseForest from
7
7
  # a given Parsing object.
8
8
  class ParseForestFactory < ParseRepCreator
@@ -14,7 +14,7 @@ module Rley # This module is used as a namespace
14
14
  ParseForestBuilder.new(aParseResult.tokens)
15
15
  end
16
16
 
17
- # When a end vertex is re-visited then jump
17
+ # When an end vertex is re-visited then jump
18
18
  # its corresponding start vertex. This behaviour
19
19
  # makes sense for sharing nodes.
20
20
  def jump_to_start()
@@ -1,7 +1,7 @@
1
- require_relative 'parse_walker_factory'
1
+ require_relative '../parser/parse_walker_factory'
2
2
 
3
3
  module Rley # This module is used as a namespace
4
- module Parser # This module is used as a namespace
4
+ module ParseRep # This module is used as a namespace
5
5
  # Utility class that helps to create a representation of a parse from
6
6
  # a given Parsing object.
7
7
  class ParseRepCreator
@@ -39,7 +39,7 @@ module Rley # This module is used as a namespace
39
39
  # that will iterate over the relevant nodes (= parsing entries)
40
40
  # of a GFGParsing
41
41
  def walker(aParseResult)
42
- walker_factory = ParseWalkerFactory.new
42
+ walker_factory = Parser::ParseWalkerFactory.new
43
43
  accept_entry = aParseResult.accepting_entry
44
44
  accept_index = aParseResult.chart.last_index
45
45
  walker_factory.build_walker(accept_entry, accept_index, jump_to_start)
@@ -9,7 +9,7 @@ require_relative '../ptree/terminal_node'
9
9
  require_relative '../ptree/parse_tree'
10
10
 
11
11
  module Rley # This module is used as a namespace
12
- module Parser # This module is used as a namespace
12
+ module ParseRep # This module is used as a namespace
13
13
  # Structure used internally by ParseTreeBuilder class.
14
14
  CSTRawNode = Struct.new(:range, :symbol, :children) do
15
15
  # Constructor.
@@ -55,11 +55,11 @@ module Rley # This module is used as a namespace
55
55
  # @param anIndex [anIndex] The token index associated with anEntry
56
56
  def receive_event(anEvent, anEntry, anIndex)
57
57
  # puts "Event: #{anEvent} #{anEntry} #{anIndex}"
58
- if anEntry.dotted_entry? # A N => alpha . beta pattern?
58
+ if anEntry.dotted_entry? # N => alpha . beta pattern?
59
59
  process_item_entry(anEvent, anEntry, anIndex)
60
- elsif anEntry.start_entry? # A .N pattern?
60
+ elsif anEntry.start_entry? # .N pattern?
61
61
  process_start_entry(anEvent, anEntry, anIndex)
62
- elsif anEntry.end_entry? # A N. pattern?
62
+ elsif anEntry.end_entry? # N. pattern?
63
63
  process_end_entry(anEvent, anEntry, anIndex)
64
64
  else
65
65
  raise NotImplementedError