rley 0.7.08 → 0.8.03

Sign up to get free protection for your applications and to get access to all the features.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +29 -5
  3. data/CHANGELOG.md +28 -4
  4. data/README.md +4 -5
  5. data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
  6. data/examples/NLP/nano_eng/nano_grammar.rb +18 -18
  7. data/examples/data_formats/JSON/json_ast_builder.rb +9 -18
  8. data/examples/data_formats/JSON/json_demo.rb +1 -2
  9. data/examples/data_formats/JSON/json_grammar.rb +11 -11
  10. data/examples/general/calc_iter1/calc_grammar.rb +5 -4
  11. data/examples/general/calc_iter2/calc_grammar.rb +9 -9
  12. data/examples/general/left.rb +1 -1
  13. data/examples/general/right.rb +1 -1
  14. data/lib/rley/base/dotted_item.rb +5 -0
  15. data/lib/rley/base/grm_items_builder.rb +6 -0
  16. data/lib/rley/constants.rb +1 -1
  17. data/lib/rley/engine.rb +2 -2
  18. data/lib/rley/interface.rb +16 -0
  19. data/lib/rley/notation/all_notation_nodes.rb +4 -0
  20. data/lib/rley/notation/ast_builder.rb +185 -0
  21. data/lib/rley/notation/ast_node.rb +44 -0
  22. data/lib/rley/notation/ast_visitor.rb +115 -0
  23. data/lib/rley/notation/grammar.rb +49 -0
  24. data/lib/rley/notation/grammar_builder.rb +505 -0
  25. data/lib/rley/notation/grouping_node.rb +23 -0
  26. data/lib/rley/notation/parser.rb +56 -0
  27. data/lib/rley/notation/sequence_node.rb +35 -0
  28. data/lib/rley/notation/symbol_node.rb +29 -0
  29. data/lib/rley/notation/tokenizer.rb +180 -0
  30. data/lib/rley/parse_rep/ast_base_builder.rb +44 -0
  31. data/lib/rley/parser/gfg_chart.rb +101 -6
  32. data/lib/rley/parser/gfg_earley_parser.rb +1 -1
  33. data/lib/rley/parser/gfg_parsing.rb +5 -3
  34. data/lib/rley/parser/parse_entry_set.rb +1 -1
  35. data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +53 -15
  36. data/lib/rley/syntax/grm_symbol.rb +1 -1
  37. data/lib/rley/syntax/match_closest.rb +43 -0
  38. data/lib/rley/syntax/production.rb +6 -0
  39. data/lib/rley.rb +1 -1
  40. data/spec/rley/engine_spec.rb +6 -6
  41. data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
  42. data/spec/rley/notation/grammar_builder_spec.rb +302 -0
  43. data/spec/rley/notation/parser_spec.rb +183 -0
  44. data/spec/rley/notation/tokenizer_spec.rb +364 -0
  45. data/spec/rley/parse_rep/ast_builder_spec.rb +0 -1
  46. data/spec/rley/parse_rep/groucho_spec.rb +1 -1
  47. data/spec/rley/parse_rep/parse_forest_builder_spec.rb +1 -1
  48. data/spec/rley/parse_rep/parse_forest_factory_spec.rb +2 -2
  49. data/spec/rley/parse_rep/parse_tree_factory_spec.rb +1 -1
  50. data/spec/rley/parser/dangling_else_spec.rb +447 -0
  51. data/spec/rley/parser/gfg_earley_parser_spec.rb +118 -10
  52. data/spec/rley/parser/gfg_parsing_spec.rb +2 -1
  53. data/spec/rley/parser/parse_walker_factory_spec.rb +2 -2
  54. data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
  55. data/spec/rley/support/grammar_abc_helper.rb +2 -2
  56. data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
  57. data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
  58. data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
  59. data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
  60. data/spec/rley/support/grammar_l0_helper.rb +2 -2
  61. data/spec/rley/support/grammar_pb_helper.rb +2 -2
  62. data/spec/rley/support/grammar_sppf_helper.rb +2 -2
  63. data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +29 -11
  64. data/spec/rley/syntax/match_closest_spec.rb +46 -0
  65. data/spec/rley/syntax/production_spec.rb +4 -0
  66. metadata +29 -14
  67. data/lib/rley/parser/parse_state.rb +0 -78
  68. data/lib/rley/parser/parse_state_tracker.rb +0 -59
  69. data/lib/rley/parser/state_set.rb +0 -100
  70. data/spec/rley/parser/parse_state_spec.rb +0 -125
  71. data/spec/rley/parser/parse_tracer_spec.rb +0 -200
  72. data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -11,7 +11,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
11
11
 
12
12
  # Grammar with left recursive rule.
13
13
  rule 'l_dots' => []
14
- rule 'l_dots' => %w[l_dots DOT]
14
+ rule 'l_dots' => 'l_dots DOT'
15
15
  end
16
16
 
17
17
  # And now, let's build the grammar...
@@ -11,7 +11,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
11
11
 
12
12
  # Grammar with right recursive rule.
13
13
  rule 'r_dots' => []
14
- rule 'r_dots' => %w[DOT r_dots]
14
+ rule 'r_dots' => 'DOT r_dots'
15
15
  end
16
16
 
17
17
  # And now, let's build the grammar...
@@ -28,6 +28,11 @@ module Rley # This module is used as a namespace
28
28
  # @return [Integer]
29
29
  attr_reader :position
30
30
 
31
+ # A possible constraint between symbol on left of dot and
32
+ # the closest preceding given terminal
33
+ # @return [NilClass, Syntax::MatchClosest]
34
+ attr_accessor :constraint
35
+
31
36
  # @param aProduction [Syntax::Production]
32
37
  # @param aPosition [Integer] Position of the dot in rhs of production.
33
38
  def initialize(aProduction, aPosition)
@@ -12,12 +12,18 @@ module Rley # This module is used as a namespace
12
12
  def build_dotted_items(aGrammar)
13
13
  items = []
14
14
  aGrammar.rules.each do |prod|
15
+ index_prev = items.size
15
16
  rhs_size = prod.rhs.size
16
17
  if rhs_size.zero?
17
18
  items << DottedItem.new(prod, 0)
18
19
  else
19
20
  items += (0..rhs_size).map { |i| DottedItem.new(prod, i) }
20
21
  end
22
+
23
+ prod.constraints.each do |cs|
24
+ # Attach constraint to dotted item n + 1
25
+ items[index_prev + cs.idx_symbol + 1].constraint = cs
26
+ end
21
27
  end
22
28
 
23
29
  return items
@@ -5,7 +5,7 @@
5
5
 
6
6
  module Rley # Module used as a namespace
7
7
  # The version number of the gem.
8
- Version = '0.7.08'
8
+ Version = '0.8.03'
9
9
 
10
10
  # Brief description of the gem.
11
11
  Description = "Ruby implementation of the Earley's parsing algorithm"
data/lib/rley/engine.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative './syntax/grammar_builder'
3
+ require_relative './notation/grammar_builder'
4
4
  require_relative './parser/gfg_earley_parser'
5
5
  require_relative './parse_tree_visitor'
6
6
  require_relative './parse_forest_visitor'
@@ -61,7 +61,7 @@ module Rley # This module is used as a namespace
61
61
  # add_production('elements' => 'INTEGER')
62
62
  # end
63
63
  def build_grammar(&aBlock)
64
- builder = Rley::Syntax::GrammarBuilder.new(&aBlock)
64
+ builder = Rley::Notation::GrammarBuilder.new(&aBlock)
65
65
  @grammar = builder.grammar
66
66
  end
67
67
 
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative './notation/grammar_builder'
4
+
5
+ module Rley # Module used as a namespace
6
+ # Factory method.
7
+ # A grammar builder constructs a Rley grammar piece by piece
8
+ # from DSL instructions in a provided code block.
9
+ # @param aBlock [Proc] a code block
10
+ # @return [Rley::Notation::GrammarBuilder] An object that builds a grammar.
11
+ def self.grammar_builder(&aBlock)
12
+ Rley::Notation::GrammarBuilder.new(&aBlock)
13
+ end
14
+ end # module
15
+
16
+ # End of file
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'grouping_node'
4
+ require_relative 'symbol_node'
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../parse_rep/ast_base_builder'
4
+ require_relative '../engine'
5
+ require_relative 'all_notation_nodes'
6
+
7
+ module Rley
8
+ module Notation
9
+ # The purpose of ASTBuilder is to build piece by piece an AST
10
+ # (Abstract Syntax Tree) from a sequence of input tokens and
11
+ # visit events produced by walking over a GFGParsing object.
12
+ class ASTBuilder < Rley::ParseRep::ASTBaseBuilder
13
+ unless defined?(Name2special)
14
+ # Mapping Token name => operator | separator | delimiter characters
15
+ # @return [Hash{String => String}]
16
+ Name2special = {
17
+ 'COMMA' => ',',
18
+ 'ELLIPSIS' => '..',
19
+ 'LEFT_BRACE' => '{',
20
+ 'LEFT_PAREN' => '(',
21
+ 'PLUS' => '+',
22
+ 'QUESTION_MARK' => '?',
23
+ 'RIGHT_BRACE' => '}',
24
+ 'RIGHT_PAREN' => ')',
25
+ 'STAR' => '*'
26
+ }.freeze
27
+ end
28
+
29
+ protected
30
+
31
+ def terminal2node
32
+ Terminal2NodeClass
33
+ end
34
+
35
+ # Method override
36
+ def new_leaf_node(_production, _terminal, aTokenPosition, aToken)
37
+ Rley::PTree::TerminalNode.new(aToken, aTokenPosition)
38
+ end
39
+
40
+ # Factory method for creating a parent node object.
41
+ # @param aProduction [Production] Production rule
42
+ # @param aRange [Range] Range of tokens matched by the rule
43
+ # @param theTokens [Array] The input tokens
44
+ # @param theChildren [Array] Children nodes (one per rhs symbol)
45
+ def new_parent_node(aProduction, aRange, theTokens, theChildren)
46
+ mth_name = method_name(aProduction.name)
47
+ if respond_to?(mth_name, true)
48
+ node = send(mth_name, aProduction, aRange, theTokens, theChildren)
49
+ else
50
+ # Default action...
51
+ node = case aProduction.rhs.size
52
+ when 0
53
+ return_epsilon(aRange, theTokens, theChildren)
54
+ when 1
55
+ return_first_child(aRange, theTokens, theChildren)
56
+ else
57
+ node = Rley::PTree::NonTerminalNode.new(aProduction.lhs, aRange)
58
+ theChildren&.reverse_each do |child|
59
+ node.add_subnode(child) if child
60
+ end
61
+
62
+ node
63
+ end
64
+ end
65
+
66
+ node
67
+ end
68
+
69
+ # Return the AST node corresponding to the second symbol in the rhs
70
+ def reduce_to_2nd_symbol(_production, _range, _tokens, theChildren)
71
+ theChildren[1]
72
+ end
73
+
74
+ #####################################
75
+ # SEMANTIC ACTIONS
76
+ #####################################
77
+
78
+ # rule('rhs' => 'member_seq').tag 'sequence'
79
+ def reduce_sequence(_production, _range, _tokens, theChildren)
80
+ if theChildren[0].size == 1
81
+ theChildren[0].first
82
+ else
83
+ SequenceNode.new(theChildren[0].first.position, theChildren[0], nil)
84
+ end
85
+ end
86
+
87
+ # rule('member_seq' => 'member_seq member').tag 'more_members'
88
+ def reduce_more_members(_production, _range, _tokens, theChildren)
89
+ theChildren[0] << theChildren[1]
90
+ end
91
+
92
+ # rule('member_seq' => 'member')
93
+ def reduce_one_member(_production, _range, _tokens, theChildren)
94
+ [theChildren[0]]
95
+ end
96
+
97
+ # rule('strait_member' => 'base_member annotation')
98
+ def reduce_annotated_member(_production, _range, _tokens, theChildren)
99
+ theChildren[0].annotation = theChildren[1]
100
+
101
+ theChildren[0]
102
+ end
103
+
104
+ # rule('base_member' => 'SYMBOL')
105
+ def reduce_symbol(_production, _range, _tokens, theChildren)
106
+ SymbolNode.new(theChildren[0].token.position, theChildren[0].token.lexeme)
107
+ end
108
+
109
+ # rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN')
110
+ def reduce_grouping(_production, _range, tokens, theChildren)
111
+ if theChildren[1].size == 1
112
+ theChildren[1].first
113
+ else
114
+ rank = theChildren[0].range.high
115
+ pos = tokens[rank].position
116
+ GroupingNode.new(pos, theChildren[1], nil)
117
+ end
118
+ end
119
+
120
+ # rule('quantified_member' => 'base_member quantifier')
121
+ def reduce_quantified_member(_production, _range, _tokens, theChildren)
122
+ theChildren[0].repetition = theChildren[1]
123
+ theChildren[0]
124
+ end
125
+
126
+ # rule('quantifier' => 'QUESTION_MARK')
127
+ def reduce_question_mark(_production, _range, _tokens, _theChildren)
128
+ :zero_or_one
129
+ end
130
+
131
+ # rule('quantifier' => 'STAR')
132
+ def reduce_star(_production, _range, _tokens, _theChildren)
133
+ :zero_or_more
134
+ end
135
+
136
+ # rule('quantifier' => 'PLUS')
137
+ def reduce_plus(_production, _range, _tokens, _theChildren)
138
+ :one_or_more
139
+ end
140
+
141
+ # rule('annotation' => 'LEFT_BRACE mapping RIGHT_BRACE').tag ''
142
+ def reduce_annotation(_production, _range, _tokens, theChildren)
143
+ theChildren[1]
144
+ end
145
+
146
+ # rule('mapping' => 'mapping COMMA key_value')
147
+ def reduce_more_pairs(_production, _range, _tokens, theChildren)
148
+ hsh = theChildren[0]
149
+ hsh[theChildren[2].first] = theChildren[2].last
150
+
151
+ hsh
152
+ end
153
+
154
+ # rule('mapping' => 'key_value').tag 'one_pair'
155
+ def reduce_one_pair(_production, _range, _tokens, theChildren)
156
+ { theChildren[0].first => theChildren[0].last }
157
+ end
158
+
159
+ # rule('key_value' => 'KEY value')
160
+ def reduce_raw_pair(_production, _range, _tokens, theChildren)
161
+ key = theChildren[0].token.lexeme
162
+ value = if theChildren[1].kind_of?(Rley::PTree::TerminalNode)
163
+ theChildren[1].token.lexeme
164
+ else
165
+ theChildren[1]
166
+ end
167
+ [key, value]
168
+ end
169
+
170
+ # rule('range' => 'INT_LIT ELLIPSIS INT_LIT')
171
+ def reduce_bound_range(_production, _range, _tokens, theChildren)
172
+ low = theChildren[0].token.lexeme
173
+ high = theChildren[2].token.lexeme
174
+ case [low, high]
175
+ when %w[0 1]
176
+ :zero_or_one
177
+ when %w[1 1]
178
+ :exactly_one
179
+ else
180
+ Range.new(low.to_i, high.to_i)
181
+ end
182
+ end
183
+ end # class
184
+ end # module
185
+ end # module
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rley
4
+ module Notation
5
+ # Abstract class.
6
+ # Instances of its subclasses represent nodes of an abstract syntax tree
7
+ # that is the product of the parse of an input text.
8
+ class ASTNode
9
+ # @return [Rley::Lexical::Position] Position of the entry in the input stream.
10
+ attr_reader :position
11
+
12
+ # @return [Symbol]
13
+ attr_accessor :repetition
14
+
15
+ # @return [Hash]
16
+ attr_reader :annotation
17
+
18
+ # @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
19
+ def initialize(aPosition)
20
+ @position = aPosition
21
+ @repetition = :exactly_one
22
+ @annotation = {}
23
+ end
24
+
25
+ def annotation=(aMapping)
26
+ repeat_key = 'repeat'
27
+ @repetition = aMapping.delete(repeat_key) if aMapping.include?(repeat_key)
28
+ @annotation = aMapping
29
+ end
30
+
31
+ # Notification that the parsing has successfully completed
32
+ def done!
33
+ # Default: do nothing ...
34
+ end
35
+
36
+ # Abstract method (must be overriden in subclasses).
37
+ # Part of the 'visitee' role in Visitor design pattern.
38
+ # @param _visitor [LoxxyTreeVisitor] the visitor
39
+ def accept(_visitor)
40
+ raise NotImplementedError
41
+ end
42
+ end # class
43
+ end # module
44
+ end # module
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rley
4
+ module Notation
5
+ class ASTVisitor
6
+ # Link to the top node to visit
7
+ attr_reader(:top)
8
+
9
+ # List of objects that subscribed to the visit event notification.
10
+ attr_reader(:subscribers)
11
+
12
+ # Build a visitor for the given top.
13
+ # @param aTop [Notation::ASTNode] the parse tree to visit.
14
+ def initialize(aTop)
15
+ raise StandardError if aTop.nil?
16
+
17
+ @top = aTop
18
+ @subscribers = []
19
+ end
20
+
21
+ # Add a subscriber for the visit event notifications.
22
+ # @param aSubscriber [Object]
23
+ def subscribe(aSubscriber)
24
+ subscribers << aSubscriber
25
+ end
26
+
27
+ # Remove the given object from the subscription list.
28
+ # The object won't be notified of visit events.
29
+ # @param aSubscriber [Object]
30
+ def unsubscribe(aSubscriber)
31
+ subscribers.delete_if { |entry| entry == aSubscriber }
32
+ end
33
+
34
+ # The signal to begin the visit of the top.
35
+ def start
36
+ top.accept(self)
37
+ end
38
+
39
+ # Visit event. The visitor is about to visit the ptree.
40
+ # @param aParseTree [Rley::PTree::ParseTree] the ptree to visit.
41
+ def start_visit_ptree(aParseTree)
42
+ broadcast(:before_ptree, aParseTree)
43
+ end
44
+
45
+ # Visit event. The visitor has completed the visit of the ptree.
46
+ # @param aParseTree [Rley::PTree::ParseTree] the visited ptree.
47
+ def end_visit_ptree(aParseTree)
48
+ broadcast(:after_ptree, aParseTree)
49
+ end
50
+
51
+ # Visit event. The visitor is about to visit a symbol node.
52
+ # @param aSymbolNode [Notation::SymbolNode] the symbol node to visit
53
+ def visit_symbol_node(aSymbolNode)
54
+ broadcast(:before_symbol_node, aSymbolNode, self)
55
+ broadcast(:after_symbol_node, aSymbolNode, self)
56
+ end
57
+
58
+ # Visit event. The visitor is about to visit a sequence node.
59
+ # @param aSequenceNode [Notation::SequenceNode] the sequence node to visit
60
+ def visit_sequence_node(aSequenceNode)
61
+ broadcast(:before_sequence_node, aSequenceNode, self)
62
+ traverse_subnodes(aSequenceNode)
63
+ broadcast(:after_sequence_node, aSequenceNode, self)
64
+ end
65
+
66
+ # Visit event. The visitor is about to visit a grouping node.
67
+ # @param aGroupingNode [Notation::GroupingNode] the grouping node to visit
68
+ def visit_grouping_node(aGroupingNode)
69
+ broadcast(:before_grouping_node, aGroupingNode, self)
70
+ traverse_subnodes(aGroupingNode) if aGroupingNode.repetition == :exactly_one
71
+ broadcast(:after_grouping_node, aGroupingNode, self)
72
+ end
73
+
74
+ private
75
+
76
+ # Visit event. The visitor is about to visit the subnodes of a non
77
+ # terminal node.
78
+ # @param aParentNode [Ast::LocCompoundExpr] the parent node.
79
+ def traverse_subnodes(aParentNode)
80
+ subnodes = aParentNode.subnodes
81
+ broadcast(:before_subnodes, aParentNode, subnodes)
82
+
83
+ # Let's proceed with the visit of subnodes
84
+ subnodes.each { |a_node| a_node.accept(self) }
85
+
86
+ broadcast(:after_subnodes, aParentNode, subnodes)
87
+ end
88
+
89
+ # Visit event. The visitor is about to visit one given subnode of a non
90
+ # terminal node.
91
+ # @param aParentNode [Ast::LocCompoundExpr] the parent node.
92
+ # @param index [integer] index of child subnode
93
+ def traverse_given_subnode(aParentNode, index)
94
+ subnode = aParentNode.subnodes[index]
95
+ broadcast(:before_given_subnode, aParentNode, subnode)
96
+
97
+ # Now, let's proceed with the visit of that subnode
98
+ subnode.accept(self)
99
+
100
+ broadcast(:after_given_subnode, aParentNode, subnode)
101
+ end
102
+
103
+ # Send a notification to all subscribers.
104
+ # @param msg [Symbol] event to notify
105
+ # @param args [Array] arguments of the notification.
106
+ def broadcast(msg, *args)
107
+ subscribers.each do |subscr|
108
+ next unless subscr.respond_to?(msg) || subscr.respond_to?(:accept_all)
109
+
110
+ subscr.send(msg, *args)
111
+ end
112
+ end
113
+ end # class
114
+ end # module
115
+ end # module