rley 0.7.08 → 0.8.03

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +29 -5
  3. data/CHANGELOG.md +28 -4
  4. data/README.md +4 -5
  5. data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
  6. data/examples/NLP/nano_eng/nano_grammar.rb +18 -18
  7. data/examples/data_formats/JSON/json_ast_builder.rb +9 -18
  8. data/examples/data_formats/JSON/json_demo.rb +1 -2
  9. data/examples/data_formats/JSON/json_grammar.rb +11 -11
  10. data/examples/general/calc_iter1/calc_grammar.rb +5 -4
  11. data/examples/general/calc_iter2/calc_grammar.rb +9 -9
  12. data/examples/general/left.rb +1 -1
  13. data/examples/general/right.rb +1 -1
  14. data/lib/rley/base/dotted_item.rb +5 -0
  15. data/lib/rley/base/grm_items_builder.rb +6 -0
  16. data/lib/rley/constants.rb +1 -1
  17. data/lib/rley/engine.rb +2 -2
  18. data/lib/rley/interface.rb +16 -0
  19. data/lib/rley/notation/all_notation_nodes.rb +4 -0
  20. data/lib/rley/notation/ast_builder.rb +185 -0
  21. data/lib/rley/notation/ast_node.rb +44 -0
  22. data/lib/rley/notation/ast_visitor.rb +115 -0
  23. data/lib/rley/notation/grammar.rb +49 -0
  24. data/lib/rley/notation/grammar_builder.rb +505 -0
  25. data/lib/rley/notation/grouping_node.rb +23 -0
  26. data/lib/rley/notation/parser.rb +56 -0
  27. data/lib/rley/notation/sequence_node.rb +35 -0
  28. data/lib/rley/notation/symbol_node.rb +29 -0
  29. data/lib/rley/notation/tokenizer.rb +180 -0
  30. data/lib/rley/parse_rep/ast_base_builder.rb +44 -0
  31. data/lib/rley/parser/gfg_chart.rb +101 -6
  32. data/lib/rley/parser/gfg_earley_parser.rb +1 -1
  33. data/lib/rley/parser/gfg_parsing.rb +5 -3
  34. data/lib/rley/parser/parse_entry_set.rb +1 -1
  35. data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +53 -15
  36. data/lib/rley/syntax/grm_symbol.rb +1 -1
  37. data/lib/rley/syntax/match_closest.rb +43 -0
  38. data/lib/rley/syntax/production.rb +6 -0
  39. data/lib/rley.rb +1 -1
  40. data/spec/rley/engine_spec.rb +6 -6
  41. data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
  42. data/spec/rley/notation/grammar_builder_spec.rb +302 -0
  43. data/spec/rley/notation/parser_spec.rb +183 -0
  44. data/spec/rley/notation/tokenizer_spec.rb +364 -0
  45. data/spec/rley/parse_rep/ast_builder_spec.rb +0 -1
  46. data/spec/rley/parse_rep/groucho_spec.rb +1 -1
  47. data/spec/rley/parse_rep/parse_forest_builder_spec.rb +1 -1
  48. data/spec/rley/parse_rep/parse_forest_factory_spec.rb +2 -2
  49. data/spec/rley/parse_rep/parse_tree_factory_spec.rb +1 -1
  50. data/spec/rley/parser/dangling_else_spec.rb +447 -0
  51. data/spec/rley/parser/gfg_earley_parser_spec.rb +118 -10
  52. data/spec/rley/parser/gfg_parsing_spec.rb +2 -1
  53. data/spec/rley/parser/parse_walker_factory_spec.rb +2 -2
  54. data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
  55. data/spec/rley/support/grammar_abc_helper.rb +2 -2
  56. data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
  57. data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
  58. data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
  59. data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
  60. data/spec/rley/support/grammar_l0_helper.rb +2 -2
  61. data/spec/rley/support/grammar_pb_helper.rb +2 -2
  62. data/spec/rley/support/grammar_sppf_helper.rb +2 -2
  63. data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +29 -11
  64. data/spec/rley/syntax/match_closest_spec.rb +46 -0
  65. data/spec/rley/syntax/production_spec.rb +4 -0
  66. metadata +29 -14
  67. data/lib/rley/parser/parse_state.rb +0 -78
  68. data/lib/rley/parser/parse_state_tracker.rb +0 -59
  69. data/lib/rley/parser/state_set.rb +0 -100
  70. data/spec/rley/parser/parse_state_spec.rb +0 -125
  71. data/spec/rley/parser/parse_tracer_spec.rb +0 -200
  72. data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -11,7 +11,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
11
11
 
12
12
  # Grammar with left recursive rule.
13
13
  rule 'l_dots' => []
14
- rule 'l_dots' => %w[l_dots DOT]
14
+ rule 'l_dots' => 'l_dots DOT'
15
15
  end
16
16
 
17
17
  # And now, let's build the grammar...
@@ -11,7 +11,7 @@ builder = Rley::Syntax::GrammarBuilder.new do
11
11
 
12
12
  # Grammar with right recursive rule.
13
13
  rule 'r_dots' => []
14
- rule 'r_dots' => %w[DOT r_dots]
14
+ rule 'r_dots' => 'DOT r_dots'
15
15
  end
16
16
 
17
17
  # And now, let's build the grammar...
@@ -28,6 +28,11 @@ module Rley # This module is used as a namespace
28
28
  # @return [Integer]
29
29
  attr_reader :position
30
30
 
31
+ # A possible constraint between symbol on left of dot and
32
+ # the closest preceding given terminal
33
+ # @return [NilClass, Syntax::MatchClosest]
34
+ attr_accessor :constraint
35
+
31
36
  # @param aProduction [Syntax::Production]
32
37
  # @param aPosition [Integer] Position of the dot in rhs of production.
33
38
  def initialize(aProduction, aPosition)
@@ -12,12 +12,18 @@ module Rley # This module is used as a namespace
12
12
  def build_dotted_items(aGrammar)
13
13
  items = []
14
14
  aGrammar.rules.each do |prod|
15
+ index_prev = items.size
15
16
  rhs_size = prod.rhs.size
16
17
  if rhs_size.zero?
17
18
  items << DottedItem.new(prod, 0)
18
19
  else
19
20
  items += (0..rhs_size).map { |i| DottedItem.new(prod, i) }
20
21
  end
22
+
23
+ prod.constraints.each do |cs|
24
+ # Attach constraint to dotted item n + 1
25
+ items[index_prev + cs.idx_symbol + 1].constraint = cs
26
+ end
21
27
  end
22
28
 
23
29
  return items
@@ -5,7 +5,7 @@
5
5
 
6
6
  module Rley # Module used as a namespace
7
7
  # The version number of the gem.
8
- Version = '0.7.08'
8
+ Version = '0.8.03'
9
9
 
10
10
  # Brief description of the gem.
11
11
  Description = "Ruby implementation of the Earley's parsing algorithm"
data/lib/rley/engine.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative './syntax/grammar_builder'
3
+ require_relative './notation/grammar_builder'
4
4
  require_relative './parser/gfg_earley_parser'
5
5
  require_relative './parse_tree_visitor'
6
6
  require_relative './parse_forest_visitor'
@@ -61,7 +61,7 @@ module Rley # This module is used as a namespace
61
61
  # add_production('elements' => 'INTEGER')
62
62
  # end
63
63
  def build_grammar(&aBlock)
64
- builder = Rley::Syntax::GrammarBuilder.new(&aBlock)
64
+ builder = Rley::Notation::GrammarBuilder.new(&aBlock)
65
65
  @grammar = builder.grammar
66
66
  end
67
67
 
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative './notation/grammar_builder'
4
+
5
+ module Rley # Module used as a namespace
6
+ # Factory method.
7
+ # A grammar builder constructs a Rley grammar piece by piece
8
+ # from DSL instructions in a provided code block.
9
+ # @param aBlock [Proc] a code block
10
+ # @return [Rley::Notation::GrammarBuilder] An object that builds a grammar.
11
+ def self.grammar_builder(&aBlock)
12
+ Rley::Notation::GrammarBuilder.new(&aBlock)
13
+ end
14
+ end # module
15
+
16
+ # End of file
@@ -0,0 +1,4 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'grouping_node'
4
+ require_relative 'symbol_node'
@@ -0,0 +1,185 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../parse_rep/ast_base_builder'
4
+ require_relative '../engine'
5
+ require_relative 'all_notation_nodes'
6
+
7
+ module Rley
8
+ module Notation
9
+ # The purpose of ASTBuilder is to build piece by piece an AST
10
+ # (Abstract Syntax Tree) from a sequence of input tokens and
11
+ # visit events produced by walking over a GFGParsing object.
12
+ class ASTBuilder < Rley::ParseRep::ASTBaseBuilder
13
+ unless defined?(Name2special)
14
+ # Mapping Token name => operator | separator | delimiter characters
15
+ # @return [Hash{String => String}]
16
+ Name2special = {
17
+ 'COMMA' => ',',
18
+ 'ELLIPSIS' => '..',
19
+ 'LEFT_BRACE' => '{',
20
+ 'LEFT_PAREN' => '(',
21
+ 'PLUS' => '+',
22
+ 'QUESTION_MARK' => '?',
23
+ 'RIGHT_BRACE' => '}',
24
+ 'RIGHT_PAREN' => ')',
25
+ 'STAR' => '*'
26
+ }.freeze
27
+ end
28
+
29
+ protected
30
+
31
+ def terminal2node
32
+ Terminal2NodeClass
33
+ end
34
+
35
+ # Method override
36
+ def new_leaf_node(_production, _terminal, aTokenPosition, aToken)
37
+ Rley::PTree::TerminalNode.new(aToken, aTokenPosition)
38
+ end
39
+
40
+ # Factory method for creating a parent node object.
41
+ # @param aProduction [Production] Production rule
42
+ # @param aRange [Range] Range of tokens matched by the rule
43
+ # @param theTokens [Array] The input tokens
44
+ # @param theChildren [Array] Children nodes (one per rhs symbol)
45
+ def new_parent_node(aProduction, aRange, theTokens, theChildren)
46
+ mth_name = method_name(aProduction.name)
47
+ if respond_to?(mth_name, true)
48
+ node = send(mth_name, aProduction, aRange, theTokens, theChildren)
49
+ else
50
+ # Default action...
51
+ node = case aProduction.rhs.size
52
+ when 0
53
+ return_epsilon(aRange, theTokens, theChildren)
54
+ when 1
55
+ return_first_child(aRange, theTokens, theChildren)
56
+ else
57
+ node = Rley::PTree::NonTerminalNode.new(aProduction.lhs, aRange)
58
+ theChildren&.reverse_each do |child|
59
+ node.add_subnode(child) if child
60
+ end
61
+
62
+ node
63
+ end
64
+ end
65
+
66
+ node
67
+ end
68
+
69
+ # Return the AST node corresponding to the second symbol in the rhs
70
+ def reduce_to_2nd_symbol(_production, _range, _tokens, theChildren)
71
+ theChildren[1]
72
+ end
73
+
74
+ #####################################
75
+ # SEMANTIC ACTIONS
76
+ #####################################
77
+
78
+ # rule('rhs' => 'member_seq').tag 'sequence'
79
+ def reduce_sequence(_production, _range, _tokens, theChildren)
80
+ if theChildren[0].size == 1
81
+ theChildren[0].first
82
+ else
83
+ SequenceNode.new(theChildren[0].first.position, theChildren[0], nil)
84
+ end
85
+ end
86
+
87
+ # rule('member_seq' => 'member_seq member').tag 'more_members'
88
+ def reduce_more_members(_production, _range, _tokens, theChildren)
89
+ theChildren[0] << theChildren[1]
90
+ end
91
+
92
+ # rule('member_seq' => 'member')
93
+ def reduce_one_member(_production, _range, _tokens, theChildren)
94
+ [theChildren[0]]
95
+ end
96
+
97
+ # rule('strait_member' => 'base_member annotation')
98
+ def reduce_annotated_member(_production, _range, _tokens, theChildren)
99
+ theChildren[0].annotation = theChildren[1]
100
+
101
+ theChildren[0]
102
+ end
103
+
104
+ # rule('base_member' => 'SYMBOL')
105
+ def reduce_symbol(_production, _range, _tokens, theChildren)
106
+ SymbolNode.new(theChildren[0].token.position, theChildren[0].token.lexeme)
107
+ end
108
+
109
+ # rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN')
110
+ def reduce_grouping(_production, _range, tokens, theChildren)
111
+ if theChildren[1].size == 1
112
+ theChildren[1].first
113
+ else
114
+ rank = theChildren[0].range.high
115
+ pos = tokens[rank].position
116
+ GroupingNode.new(pos, theChildren[1], nil)
117
+ end
118
+ end
119
+
120
+ # rule('quantified_member' => 'base_member quantifier')
121
+ def reduce_quantified_member(_production, _range, _tokens, theChildren)
122
+ theChildren[0].repetition = theChildren[1]
123
+ theChildren[0]
124
+ end
125
+
126
+ # rule('quantifier' => 'QUESTION_MARK')
127
+ def reduce_question_mark(_production, _range, _tokens, _theChildren)
128
+ :zero_or_one
129
+ end
130
+
131
+ # rule('quantifier' => 'STAR')
132
+ def reduce_star(_production, _range, _tokens, _theChildren)
133
+ :zero_or_more
134
+ end
135
+
136
+ # rule('quantifier' => 'PLUS')
137
+ def reduce_plus(_production, _range, _tokens, _theChildren)
138
+ :one_or_more
139
+ end
140
+
141
+ # rule('annotation' => 'LEFT_BRACE mapping RIGHT_BRACE').tag ''
142
+ def reduce_annotation(_production, _range, _tokens, theChildren)
143
+ theChildren[1]
144
+ end
145
+
146
+ # rule('mapping' => 'mapping COMMA key_value')
147
+ def reduce_more_pairs(_production, _range, _tokens, theChildren)
148
+ hsh = theChildren[0]
149
+ hsh[theChildren[2].first] = theChildren[2].last
150
+
151
+ hsh
152
+ end
153
+
154
+ # rule('mapping' => 'key_value').tag 'one_pair'
155
+ def reduce_one_pair(_production, _range, _tokens, theChildren)
156
+ { theChildren[0].first => theChildren[0].last }
157
+ end
158
+
159
+ # rule('key_value' => 'KEY value')
160
+ def reduce_raw_pair(_production, _range, _tokens, theChildren)
161
+ key = theChildren[0].token.lexeme
162
+ value = if theChildren[1].kind_of?(Rley::PTree::TerminalNode)
163
+ theChildren[1].token.lexeme
164
+ else
165
+ theChildren[1]
166
+ end
167
+ [key, value]
168
+ end
169
+
170
+ # rule('range' => 'INT_LIT ELLIPSIS INT_LIT')
171
+ def reduce_bound_range(_production, _range, _tokens, theChildren)
172
+ low = theChildren[0].token.lexeme
173
+ high = theChildren[2].token.lexeme
174
+ case [low, high]
175
+ when %w[0 1]
176
+ :zero_or_one
177
+ when %w[1 1]
178
+ :exactly_one
179
+ else
180
+ Range.new(low.to_i, high.to_i)
181
+ end
182
+ end
183
+ end # class
184
+ end # module
185
+ end # module
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rley
4
+ module Notation
5
+ # Abstract class.
6
+ # Instances of its subclasses represent nodes of an abstract syntax tree
7
+ # that is the product of the parse of an input text.
8
+ class ASTNode
9
+ # @return [Rley::Lexical::Position] Position of the entry in the input stream.
10
+ attr_reader :position
11
+
12
+ # @return [Symbol]
13
+ attr_accessor :repetition
14
+
15
+ # @return [Hash]
16
+ attr_reader :annotation
17
+
18
+ # @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
19
+ def initialize(aPosition)
20
+ @position = aPosition
21
+ @repetition = :exactly_one
22
+ @annotation = {}
23
+ end
24
+
25
+ def annotation=(aMapping)
26
+ repeat_key = 'repeat'
27
+ @repetition = aMapping.delete(repeat_key) if aMapping.include?(repeat_key)
28
+ @annotation = aMapping
29
+ end
30
+
31
+ # Notification that the parsing has successfully completed
32
+ def done!
33
+ # Default: do nothing ...
34
+ end
35
+
36
+ # Abstract method (must be overriden in subclasses).
37
+ # Part of the 'visitee' role in Visitor design pattern.
38
+ # @param _visitor [LoxxyTreeVisitor] the visitor
39
+ def accept(_visitor)
40
+ raise NotImplementedError
41
+ end
42
+ end # class
43
+ end # module
44
+ end # module
@@ -0,0 +1,115 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rley
4
+ module Notation
5
+ class ASTVisitor
6
+ # Link to the top node to visit
7
+ attr_reader(:top)
8
+
9
+ # List of objects that subscribed to the visit event notification.
10
+ attr_reader(:subscribers)
11
+
12
+ # Build a visitor for the given top.
13
+ # @param aTop [Notation::ASTNode] the parse tree to visit.
14
+ def initialize(aTop)
15
+ raise StandardError if aTop.nil?
16
+
17
+ @top = aTop
18
+ @subscribers = []
19
+ end
20
+
21
+ # Add a subscriber for the visit event notifications.
22
+ # @param aSubscriber [Object]
23
+ def subscribe(aSubscriber)
24
+ subscribers << aSubscriber
25
+ end
26
+
27
+ # Remove the given object from the subscription list.
28
+ # The object won't be notified of visit events.
29
+ # @param aSubscriber [Object]
30
+ def unsubscribe(aSubscriber)
31
+ subscribers.delete_if { |entry| entry == aSubscriber }
32
+ end
33
+
34
+ # The signal to begin the visit of the top.
35
+ def start
36
+ top.accept(self)
37
+ end
38
+
39
+ # Visit event. The visitor is about to visit the ptree.
40
+ # @param aParseTree [Rley::PTree::ParseTree] the ptree to visit.
41
+ def start_visit_ptree(aParseTree)
42
+ broadcast(:before_ptree, aParseTree)
43
+ end
44
+
45
+ # Visit event. The visitor has completed the visit of the ptree.
46
+ # @param aParseTree [Rley::PTree::ParseTree] the visited ptree.
47
+ def end_visit_ptree(aParseTree)
48
+ broadcast(:after_ptree, aParseTree)
49
+ end
50
+
51
+ # Visit event. The visitor is about to visit a symbol node.
52
+ # @param aSymbolNode [Notation::SymbolNode] the symbol node to visit
53
+ def visit_symbol_node(aSymbolNode)
54
+ broadcast(:before_symbol_node, aSymbolNode, self)
55
+ broadcast(:after_symbol_node, aSymbolNode, self)
56
+ end
57
+
58
+ # Visit event. The visitor is about to visit a sequence node.
59
+ # @param aSequenceNode [Notation::SequenceNode] the sequence node to visit
60
+ def visit_sequence_node(aSequenceNode)
61
+ broadcast(:before_sequence_node, aSequenceNode, self)
62
+ traverse_subnodes(aSequenceNode)
63
+ broadcast(:after_sequence_node, aSequenceNode, self)
64
+ end
65
+
66
+ # Visit event. The visitor is about to visit a grouping node.
67
+ # @param aGroupingNode [Notation::GroupingNode] the grouping node to visit
68
+ def visit_grouping_node(aGroupingNode)
69
+ broadcast(:before_grouping_node, aGroupingNode, self)
70
+ traverse_subnodes(aGroupingNode) if aGroupingNode.repetition == :exactly_one
71
+ broadcast(:after_grouping_node, aGroupingNode, self)
72
+ end
73
+
74
+ private
75
+
76
+ # Visit event. The visitor is about to visit the subnodes of a non
77
+ # terminal node.
78
+ # @param aParentNode [Ast::LocCompoundExpr] the parent node.
79
+ def traverse_subnodes(aParentNode)
80
+ subnodes = aParentNode.subnodes
81
+ broadcast(:before_subnodes, aParentNode, subnodes)
82
+
83
+ # Let's proceed with the visit of subnodes
84
+ subnodes.each { |a_node| a_node.accept(self) }
85
+
86
+ broadcast(:after_subnodes, aParentNode, subnodes)
87
+ end
88
+
89
+ # Visit event. The visitor is about to visit one given subnode of a non
90
+ # terminal node.
91
+ # @param aParentNode [Ast::LocCompoundExpr] the parent node.
92
+ # @param index [integer] index of child subnode
93
+ def traverse_given_subnode(aParentNode, index)
94
+ subnode = aParentNode.subnodes[index]
95
+ broadcast(:before_given_subnode, aParentNode, subnode)
96
+
97
+ # Now, let's proceed with the visit of that subnode
98
+ subnode.accept(self)
99
+
100
+ broadcast(:after_given_subnode, aParentNode, subnode)
101
+ end
102
+
103
+ # Send a notification to all subscribers.
104
+ # @param msg [Symbol] event to notify
105
+ # @param args [Array] arguments of the notification.
106
+ def broadcast(msg, *args)
107
+ subscribers.each do |subscr|
108
+ next unless subscr.respond_to?(msg) || subscr.respond_to?(:accept_all)
109
+
110
+ subscr.send(msg, *args)
111
+ end
112
+ end
113
+ end # class
114
+ end # module
115
+ end # module