rley 0.7.08 → 0.8.00

Sign up to get free protection for your applications and to get access to all the features.
Files changed (71) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +4 -5
  4. data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
  5. data/examples/NLP/nano_eng/nano_grammar.rb +18 -18
  6. data/examples/NLP/pico_en_demo.rb +2 -2
  7. data/examples/data_formats/JSON/json_ast_builder.rb +9 -18
  8. data/examples/data_formats/JSON/json_demo.rb +1 -2
  9. data/examples/data_formats/JSON/json_grammar.rb +11 -11
  10. data/examples/general/calc_iter1/calc_grammar.rb +5 -4
  11. data/examples/general/calc_iter2/calc_grammar.rb +9 -9
  12. data/examples/general/left.rb +1 -1
  13. data/examples/general/right.rb +1 -1
  14. data/lib/rley.rb +1 -1
  15. data/lib/rley/base/dotted_item.rb +5 -0
  16. data/lib/rley/base/grm_items_builder.rb +6 -0
  17. data/lib/rley/constants.rb +1 -1
  18. data/lib/rley/engine.rb +2 -2
  19. data/lib/rley/interface.rb +16 -0
  20. data/lib/rley/notation/all_notation_nodes.rb +2 -0
  21. data/lib/rley/notation/ast_builder.rb +191 -0
  22. data/lib/rley/notation/ast_node.rb +44 -0
  23. data/lib/rley/notation/ast_visitor.rb +113 -0
  24. data/lib/rley/notation/grammar.rb +49 -0
  25. data/lib/rley/notation/grammar_builder.rb +451 -0
  26. data/lib/rley/notation/grouping_node.rb +23 -0
  27. data/lib/rley/notation/parser.rb +56 -0
  28. data/lib/rley/notation/sequence_node.rb +35 -0
  29. data/lib/rley/notation/symbol_node.rb +29 -0
  30. data/lib/rley/notation/tokenizer.rb +192 -0
  31. data/lib/rley/parse_rep/ast_base_builder.rb +13 -0
  32. data/lib/rley/parser/gfg_chart.rb +100 -6
  33. data/lib/rley/parser/gfg_parsing.rb +5 -3
  34. data/lib/rley/parser/parse_entry_set.rb +1 -1
  35. data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +45 -15
  36. data/lib/rley/syntax/grm_symbol.rb +1 -1
  37. data/lib/rley/syntax/match_closest.rb +43 -0
  38. data/lib/rley/syntax/production.rb +6 -0
  39. data/spec/rley/engine_spec.rb +6 -6
  40. data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
  41. data/spec/rley/notation/grammar_builder_spec.rb +295 -0
  42. data/spec/rley/notation/parser_spec.rb +184 -0
  43. data/spec/rley/notation/tokenizer_spec.rb +370 -0
  44. data/spec/rley/parse_rep/ast_builder_spec.rb +0 -1
  45. data/spec/rley/parse_rep/groucho_spec.rb +1 -1
  46. data/spec/rley/parse_rep/parse_forest_builder_spec.rb +1 -1
  47. data/spec/rley/parse_rep/parse_forest_factory_spec.rb +2 -2
  48. data/spec/rley/parse_rep/parse_tree_factory_spec.rb +1 -1
  49. data/spec/rley/parser/dangling_else_spec.rb +445 -0
  50. data/spec/rley/parser/gfg_earley_parser_spec.rb +95 -9
  51. data/spec/rley/parser/gfg_parsing_spec.rb +1 -1
  52. data/spec/rley/parser/parse_walker_factory_spec.rb +2 -2
  53. data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
  54. data/spec/rley/support/grammar_abc_helper.rb +2 -2
  55. data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
  56. data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
  57. data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
  58. data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
  59. data/spec/rley/support/grammar_l0_helper.rb +2 -2
  60. data/spec/rley/support/grammar_pb_helper.rb +2 -2
  61. data/spec/rley/support/grammar_sppf_helper.rb +2 -2
  62. data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +30 -11
  63. data/spec/rley/syntax/match_closest_spec.rb +46 -0
  64. data/spec/rley/syntax/production_spec.rb +4 -0
  65. metadata +29 -14
  66. data/lib/rley/parser/parse_state.rb +0 -78
  67. data/lib/rley/parser/parse_state_tracker.rb +0 -59
  68. data/lib/rley/parser/state_set.rb +0 -100
  69. data/spec/rley/parser/parse_state_spec.rb +0 -125
  70. data/spec/rley/parser/parse_tracer_spec.rb +0 -200
  71. data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -0,0 +1,2 @@
1
+ require_relative 'grouping_node'
2
+ require_relative 'symbol_node'
@@ -0,0 +1,191 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../parse_rep/ast_base_builder'
4
+ require_relative '../engine'
5
+ require_relative 'all_notation_nodes'
6
+
7
+ module Rley
8
+ module Notation
9
+ # The purpose of ASTBuilder is to build piece by piece an AST
10
+ # (Abstract Syntax Tree) from a sequence of input tokens and
11
+ # visit events produced by walking over a GFGParsing object.
12
+ class ASTBuilder < Rley::ParseRep::ASTBaseBuilder
13
+ unless defined?(Name2special)
14
+ # Mapping Token name => operator | separator | delimiter characters
15
+ # @return [Hash{String => String}]
16
+ Name2special = {
17
+ 'COMMA' => ',',
18
+ 'ELLIPSIS' => '..',
19
+ 'LEFT_BRACE' => '{',
20
+ 'LEFT_PAREN' => '(',
21
+ 'PLUS' => '+',
22
+ 'QUESTION_MARK' => '?',
23
+ 'RIGHT_BRACE' => '}',
24
+ 'RIGHT_PAREN' => ')',
25
+ 'STAR' => '*'
26
+ }.freeze
27
+ end
28
+
29
+ # Create a new AST builder instance.
30
+ # @param theTokens [Array<Rley::Lexical::Token>] The sequence of input tokens.
31
+ def initialize(theTokens)
32
+ super(theTokens)
33
+ end
34
+
35
+ protected
36
+
37
+ def terminal2node
38
+ Terminal2NodeClass
39
+ end
40
+
41
+ # Method override
42
+ def new_leaf_node(_production, _terminal, aTokenPosition, aToken)
43
+ Rley::PTree::TerminalNode.new(aToken, aTokenPosition)
44
+ end
45
+
46
+ # Factory method for creating a parent node object.
47
+ # @param aProduction [Production] Production rule
48
+ # @param aRange [Range] Range of tokens matched by the rule
49
+ # @param theTokens [Array] The input tokens
50
+ # @param theChildren [Array] Children nodes (one per rhs symbol)
51
+ def new_parent_node(aProduction, aRange, theTokens, theChildren)
52
+ mth_name = method_name(aProduction.name)
53
+ if respond_to?(mth_name, true)
54
+ node = send(mth_name, aProduction, aRange, theTokens, theChildren)
55
+ else
56
+ # Default action...
57
+ node = case aProduction.rhs.size
58
+ when 0
59
+ return_epsilon(aRange, theTokens, theChildren)
60
+ when 1
61
+ return_first_child(aRange, theTokens, theChildren)
62
+ else
63
+ node = Rley::PTree::NonTerminalNode.new(aProduction.lhs, aRange)
64
+ theChildren&.reverse_each do |child|
65
+ node.add_subnode(child) if child
66
+ end
67
+
68
+ node
69
+ end
70
+ end
71
+
72
+ node
73
+ end
74
+
75
+ # Return the AST node corresponding to the second symbol in the rhs
76
+ def reduce_to_2nd_symbol(_production, _range, _tokens, theChildren)
77
+ theChildren[1]
78
+ end
79
+
80
+ #####################################
81
+ # SEMANTIC ACTIONS
82
+ #####################################
83
+
84
+ # rule('rhs' => 'member_seq').tag 'sequence'
85
+ def reduce_sequence(_production, _range, _tokens, theChildren)
86
+ if theChildren[0].size == 1
87
+ theChildren[0].first
88
+ else
89
+ SequenceNode.new(theChildren[0].first.position, theChildren[0], nil)
90
+ end
91
+ end
92
+
93
+ # rule('member_seq' => 'member_seq member').tag 'more_members'
94
+ def reduce_more_members(_production, _range, _tokens, theChildren)
95
+ theChildren[0] << theChildren[1]
96
+ end
97
+
98
+ # rule('member_seq' => 'member')
99
+ def reduce_one_member(_production, _range, _tokens, theChildren)
100
+ [theChildren[0]]
101
+ end
102
+
103
+ # rule('strait_member' => 'base_member annotation')
104
+ def reduce_annotated_member(_production, _range, _tokens, theChildren)
105
+ theChildren[0].annotation = theChildren[1]
106
+
107
+ theChildren[0]
108
+ end
109
+
110
+ # rule('base_member' => 'SYMBOL')
111
+ def reduce_symbol(_production, _range, _tokens, theChildren)
112
+ SymbolNode.new(theChildren[0].token.position, theChildren[0].token.lexeme)
113
+ end
114
+
115
+ # rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN')
116
+ def reduce_grouping(_production, _range, tokens, theChildren)
117
+ if theChildren[1].size == 1
118
+ theChildren[1].first
119
+ else
120
+ rank = theChildren[0].range.high
121
+ pos = tokens[rank].position
122
+ GroupingNode.new(pos, theChildren[1], nil)
123
+ end
124
+ end
125
+
126
+ # rule('quantified_member' => 'base_member quantifier')
127
+ def reduce_quantified_member(_production, _range, _tokens, theChildren)
128
+ theChildren[0].repetition = theChildren[1]
129
+ theChildren[0]
130
+ end
131
+
132
+ # rule('quantifier' => 'QUESTION_MARK')
133
+ def reduce_question_mark(_production, _range, _tokens, _theChildren)
134
+ :zero_or_one
135
+ end
136
+
137
+ # rule('quantifier' => 'STAR')
138
+ def reduce_star(_production, _range, _tokens, _theChildren)
139
+ :zero_or_more
140
+ end
141
+
142
+ # rule('quantifier' => 'PLUS')
143
+ def reduce_plus(_production, _range, _tokens, _theChildren)
144
+ :one_or_more
145
+ end
146
+
147
+ # rule('annotation' => 'LEFT_BRACE mapping RIGHT_BRACE').tag ''
148
+ def reduce_annotation(_production, _range, _tokens, theChildren)
149
+ theChildren[1]
150
+ end
151
+
152
+ # rule('mapping' => 'mapping COMMA key_value')
153
+ def reduce_more_pairs(_production, _range, _tokens, theChildren)
154
+ hsh = theChildren[0]
155
+ hsh[theChildren[2].first] = theChildren[2].last
156
+
157
+ hsh
158
+ end
159
+
160
+ # rule('mapping' => 'key_value').tag 'one_pair'
161
+ def reduce_one_pair(_production, _range, _tokens, theChildren)
162
+ { theChildren[0].first => theChildren[0].last }
163
+ end
164
+
165
+ # rule('key_value' => 'KEY value')
166
+ def reduce_raw_pair(_production, _range, _tokens, theChildren)
167
+ key = theChildren[0].token.lexeme
168
+ value = if theChildren[1].kind_of?(Rley::PTree::TerminalNode)
169
+ theChildren[1].token.lexeme
170
+ else
171
+ theChildren[1]
172
+ end
173
+ [key, value]
174
+ end
175
+
176
+ # rule('range' => 'INT_LIT ELLIPSIS INT_LIT')
177
+ def reduce_bound_range(_production, _range, _tokens, theChildren)
178
+ low = theChildren[0].token.lexeme
179
+ high = theChildren[2].token.lexeme
180
+ case [low, high]
181
+ when ['0', '1']
182
+ :zero_or_one
183
+ when ['1', '1']
184
+ :exactly_one
185
+ else
186
+ Range.new(low.to_i, high.to_i)
187
+ end
188
+ end
189
+ end # class
190
+ end # module
191
+ end # module
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rley
4
+ module Notation
5
+ # Abstract class.
6
+ # Instances of its subclasses represent nodes of an abstract syntax tree
7
+ # that is the product of the parse of an input text.
8
+ class ASTNode
9
+ # @return [Rley::Lexical::Position] Position of the entry in the input stream.
10
+ attr_reader :position
11
+
12
+ # @return [Symbol]
13
+ attr_accessor :repetition
14
+
15
+ # @return [Hash]
16
+ attr_reader :annotation
17
+
18
+ # @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
19
+ def initialize(aPosition)
20
+ @position = aPosition
21
+ @repetition = :exactly_one
22
+ @annotation = {}
23
+ end
24
+
25
+ def annotation=(aMapping)
26
+ repeat_key = 'repeat'
27
+ @repetition = aMapping.delete(repeat_key) if aMapping.include?(repeat_key)
28
+ @annotation = aMapping
29
+ end
30
+
31
+ # Notification that the parsing has successfully completed
32
+ def done!
33
+ # Default: do nothing ...
34
+ end
35
+
36
+ # Abstract method (must be overriden in subclasses).
37
+ # Part of the 'visitee' role in Visitor design pattern.
38
+ # @param _visitor [LoxxyTreeVisitor] the visitor
39
+ def accept(_visitor)
40
+ raise NotImplementedError
41
+ end
42
+ end # class
43
+ end # module
44
+ end # module
@@ -0,0 +1,113 @@
1
+ module Rley
2
+ module Notation
3
+ class ASTVisitor
4
+ # Link to the top node to visit
5
+ attr_reader(:top)
6
+
7
+ # List of objects that subscribed to the visit event notification.
8
+ attr_reader(:subscribers)
9
+
10
+ # Build a visitor for the given top.
11
+ # @param aTop [Notation::ASTNode] the parse tree to visit.
12
+ def initialize(aTop)
13
+ raise StandardError if aTop.nil?
14
+
15
+ @top = aTop
16
+ @subscribers = []
17
+ end
18
+
19
+ # Add a subscriber for the visit event notifications.
20
+ # @param aSubscriber [Object]
21
+ def subscribe(aSubscriber)
22
+ subscribers << aSubscriber
23
+ end
24
+
25
+ # Remove the given object from the subscription list.
26
+ # The object won't be notified of visit events.
27
+ # @param aSubscriber [Object]
28
+ def unsubscribe(aSubscriber)
29
+ subscribers.delete_if { |entry| entry == aSubscriber }
30
+ end
31
+
32
+ # The signal to begin the visit of the top.
33
+ def start
34
+ top.accept(self)
35
+ end
36
+
37
+ # Visit event. The visitor is about to visit the ptree.
38
+ # @param aParseTree [Rley::PTree::ParseTree] the ptree to visit.
39
+ def start_visit_ptree(aParseTree)
40
+ broadcast(:before_ptree, aParseTree)
41
+ end
42
+
43
+ # Visit event. The visitor has completed the visit of the ptree.
44
+ # @param aParseTree [Rley::PTree::ParseTree] the visited ptree.
45
+ def end_visit_ptree(aParseTree)
46
+ broadcast(:after_ptree, aParseTree)
47
+ end
48
+
49
+ # Visit event. The visitor is about to visit a symbol node.
50
+ # @param aSymbolNode [Notation::SymbolNode] the symbol node to visit
51
+ def visit_symbol_node(aSymbolNode)
52
+ broadcast(:before_symbol_node, aSymbolNode, self)
53
+ broadcast(:after_symbol_node, aSymbolNode, self)
54
+ end
55
+
56
+ # Visit event. The visitor is about to visit a sequence node.
57
+ # @param aSequenceNode [Notation::SequenceNode] the sequence node to visit
58
+ def visit_sequence_node(aSequenceNode)
59
+ broadcast(:before_sequence_node, aSequenceNode, self)
60
+ traverse_subnodes(aSequenceNode)
61
+ broadcast(:after_sequence_node, aSequenceNode, self)
62
+ end
63
+
64
+ # Visit event. The visitor is about to visit a grouping node.
65
+ # @param aGroupingNode [Notation::GroupingNode] the grouping node to visit
66
+ def visit_grouping_node(aGroupingNode)
67
+ broadcast(:before_grouping_node, aGroupingNode, self)
68
+ traverse_subnodes(aGroupingNode) if aGroupingNode.repetition == :exactly_one
69
+ broadcast(:after_grouping_node, aGroupingNode, self)
70
+ end
71
+
72
+ private
73
+
74
+ # Visit event. The visitor is about to visit the subnodes of a non
75
+ # terminal node.
76
+ # @param aParentNode [Ast::LocCompoundExpr] the parent node.
77
+ def traverse_subnodes(aParentNode)
78
+ subnodes = aParentNode.subnodes
79
+ broadcast(:before_subnodes, aParentNode, subnodes)
80
+
81
+ # Let's proceed with the visit of subnodes
82
+ subnodes.each { |a_node| a_node.accept(self) }
83
+
84
+ broadcast(:after_subnodes, aParentNode, subnodes)
85
+ end
86
+
87
+ # Visit event. The visitor is about to visit one given subnode of a non
88
+ # terminal node.
89
+ # @param aParentNode [Ast::LocCompoundExpr] the parent node.
90
+ # @param index [integer] index of child subnode
91
+ def traverse_given_subnode(aParentNode, index)
92
+ subnode = aParentNode.subnodes[index]
93
+ broadcast(:before_given_subnode, aParentNode, subnode)
94
+
95
+ # Now, let's proceed with the visit of that subnode
96
+ subnode.accept(self)
97
+
98
+ broadcast(:after_given_subnode, aParentNode, subnode)
99
+ end
100
+
101
+ # Send a notification to all subscribers.
102
+ # @param msg [Symbol] event to notify
103
+ # @param args [Array] arguments of the notification.
104
+ def broadcast(msg, *args)
105
+ subscribers.each do |subscr|
106
+ next unless subscr.respond_to?(msg) || subscr.respond_to?(:accept_all)
107
+
108
+ subscr.send(msg, *args)
109
+ end
110
+ end
111
+ end # class
112
+ end # module
113
+ end # module
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../syntax/base_grammar_builder'
4
+
5
+ module Rley
6
+ module Notation
7
+ ########################################
8
+ # Syntax for right-hand side of production rules
9
+ builder = Rley::Syntax::BaseGrammarBuilder.new do
10
+ add_terminals('LEFT_PAREN', 'RIGHT_PAREN') # For '(', ')' grouping delimiters
11
+ add_terminals('LEFT_BRACE', 'RIGHT_BRACE') # For '{', '}' annotation delimiters
12
+ add_terminals('QUESTION_MARK', 'STAR', 'PLUS') # For postfix quantifiers
13
+ add_terminals('COMMA', 'ELLIPSIS')
14
+
15
+ add_terminals('STR_LIT') # For string literal values
16
+ add_terminals('INT_LIT') # For integer literal values
17
+ add_terminals('SYMBOL') # Grammar symbols
18
+ add_terminals('KEY') # Key literal
19
+
20
+ rule('notation' => 'rhs')
21
+ rule('rhs' => 'member_seq').tag 'sequence'
22
+ rule('rhs' => [])
23
+ rule('member_seq' => 'member_seq member').tag 'more_members'
24
+ rule('member_seq' => 'member').tag 'one_member'
25
+ rule('member' => 'strait_member')
26
+ rule('member' => 'quantified_member')
27
+ rule('strait_member' => 'base_member')
28
+ rule('strait_member' => 'base_member annotation').tag 'annotated_member'
29
+ rule('base_member' => 'SYMBOL').tag 'symbol'
30
+ rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN').tag 'grouping'
31
+ rule('quantified_member' => 'base_member quantifier').tag 'quantified_member'
32
+ rule('quantifier' => 'QUESTION_MARK').tag 'question_mark'
33
+ rule('quantifier' => 'STAR').tag 'star'
34
+ rule('quantifier' => 'PLUS').tag 'plus'
35
+ rule('annotation' => 'LEFT_BRACE mapping RIGHT_BRACE').tag 'annotation'
36
+ rule('mapping' => 'mapping COMMA key_value').tag 'more_pairs'
37
+ rule('mapping' => 'key_value').tag 'one_pair'
38
+ rule('key_value' => 'KEY value').tag 'raw_pair'
39
+ rule('value' => 'STR_LIT')
40
+ rule('value' => 'INT_LIT')
41
+ rule('value' => 'range')
42
+ rule('range' => 'INT_LIT ELLIPSIS INT_LIT').tag 'bound_range'
43
+ rule('range' => 'INT_LIT ELLIPSIS')
44
+ end
45
+
46
+ # And now build the Rley Grammar Notation (RGN) grammar...
47
+ RGNGrammar = builder.grammar
48
+ end # module
49
+ end # module
@@ -0,0 +1,451 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+ require_relative 'parser'
5
+ require_relative 'ast_visitor'
6
+ require_relative '../syntax/match_closest'
7
+
8
+ module Rley # This module is used as a namespace
9
+ module Notation # This module is used as a namespace
10
+ # Builder GoF pattern. Builder builds a complex object
11
+ # (say, a grammar) from simpler objects (terminals and productions)
12
+ # and using a step by step approach.
13
+ class GrammarBuilder
14
+ # @return [Hash{String, GrmSymbol}] The mapping of grammar symbol names
15
+ # to the matching grammar symbol object.
16
+ attr_reader(:symbols)
17
+
18
+ # @return [Notation::Parser] Parser for the right-side of productions
19
+ attr_reader(:parser)
20
+
21
+ # @return [Hash{ASTVisitor, Array}]
22
+ attr_reader(:visitor2rhs)
23
+
24
+ # @return [Array<Production>] The list of production rules for
25
+ # the grammar to build.
26
+ attr_reader(:productions)
27
+
28
+ # Creates a new grammar builder.
29
+ # @param aBlock [Proc] code block used to build the grammar.
30
+ # @example Building a tiny English grammar
31
+ # builder = Rley::Syntax::GrammarBuilder.new do
32
+ # add_terminals('n', 'v', 'adj', 'det')
33
+ # rule 'S' => %w[NP VP]
34
+ # rule 'VP' => %w[v NP]
35
+ # rule 'NP' => %w[det n]
36
+ # rule 'NP' => %w[adj NP]
37
+ # end
38
+ # tiny_eng = builder.grammar
39
+ def initialize(&aBlock)
40
+ @symbols = {}
41
+ @productions = []
42
+ @parser = Notation::Parser.new
43
+ @visitor2rhs = {}
44
+
45
+ instance_exec(&aBlock) if block_given?
46
+ end
47
+
48
+ # Retrieve a grammar symbol from its name.
49
+ # Raise an exception if not found.
50
+ # @param aSymbolName [String] the name of a grammar symbol.
51
+ # @return [GrmSymbol] the retrieved symbol object.
52
+ def [](aSymbolName)
53
+ symbols[aSymbolName]
54
+ end
55
+
56
+ # Add the given terminal symbols to the grammar of the language
57
+ # @param terminalSymbols [String or Terminal] 1..* terminal symbols.
58
+ # @return [void]
59
+ def add_terminals(*terminalSymbols)
60
+ new_symbs = build_symbols(Syntax::Terminal, terminalSymbols)
61
+ symbols.merge!(new_symbs)
62
+ end
63
+
64
+ # Add a production rule in the grammar given one
65
+ # key-value pair of the form: String => String.
66
+ # Where the key is the name of the non-terminal appearing in the
67
+ # left side of the rule.
68
+ # The value is a sequence of grammar symbol names (optionally quantified).
69
+ # The rule is created and inserted in the grammar.
70
+ # @example Equivalent call syntax
71
+ # builder.add_production('A' => 'a A c)
72
+ # builder.rule('A' => 'a A c]) # 'rule' is a synonym
73
+ # @param aProductionRepr [Hash{String, String}]
74
+ # A Hash-based representation of a production.
75
+ # @return [Production] The created Production instance
76
+ def add_production(aProductionRepr)
77
+ aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
78
+ lhs = get_grm_symbol(lhs_name)
79
+ rhs = rhs_repr.kind_of?(Array) && rhs_repr.empty? ? '' : rhs_repr.strip
80
+ constraints = []
81
+ if rhs.empty?
82
+ rhs_members = []
83
+ else
84
+ ast = parser.parse(rhs)
85
+ visitor = ASTVisitor.new(ast)
86
+ visitor2rhs[visitor] = []
87
+ visitor.subscribe(self)
88
+ visitor.start
89
+ root_node = ast.root
90
+ constraints = root_node.constraints unless root_node.kind_of?(SymbolNode)
91
+
92
+ rhs_members = visitor2rhs.delete(visitor)
93
+ end
94
+ new_prod = Syntax::Production.new(lhs, rhs_members)
95
+ new_prod.constraints = constraints
96
+ productions << new_prod
97
+ end
98
+
99
+ productions.last
100
+ end
101
+
102
+ # Given the grammar symbols and productions added to the builder,
103
+ # build the resulting grammar (if not yet done).
104
+ # @return [Grammar] the created grammar object.
105
+ def grammar
106
+ unless @grammar
107
+ raise StandardError, 'No symbol found for grammar' if symbols.empty?
108
+ if productions.empty?
109
+ raise StandardError, 'No production found for grammar'
110
+ end
111
+
112
+ # Check that each terminal appears at least in a rhs of a production
113
+ all_terminals = symbols.values.select do |a_symb|
114
+ a_symb.kind_of?(Syntax::Terminal)
115
+ end
116
+ in_use = Set.new
117
+ productions.each do |prod|
118
+ prod.rhs.members.each do |symb|
119
+ in_use << symb if symb.kind_of?(Syntax::Terminal)
120
+ end
121
+ end
122
+
123
+ unused = all_terminals.reject { |a_term| in_use.include?(a_term) }
124
+ unless unused.empty?
125
+ suffix = "#{unused.map(&:name).join(', ')}."
126
+ raise StandardError, "Useless terminal symbol(s): #{suffix}"
127
+ end
128
+
129
+ @grammar = Syntax::Grammar.new(productions.dup)
130
+ end
131
+
132
+ @grammar
133
+ end
134
+
135
+ alias rule add_production
136
+
137
+ # When a symbol, say symb, in a rhs is followed by a '*' modifier,
138
+ # then a rule will be generated with a lhs named symb * suffix_plus
139
+ # implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
140
+ # implicitly called: rule('declaration_star' => '').tag suffix_star_last
141
+ def suffix_qmark
142
+ '_qmark'
143
+ end
144
+
145
+ def suffix_qmark_one
146
+ '_qmark_one'
147
+ end
148
+
149
+ def suffix_qmark_none
150
+ '_qmark_none'
151
+ end
152
+
153
+ # When a symbol, say symb, in a rhs is followed by a '*' modifier,
154
+ # then a rule will be generated with a lhs named symb * suffix_plus
155
+ # implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
156
+ # implicitly called: rule('declaration_star' => '').tag suffix_star_last
157
+ def suffix_star
158
+ '_star'
159
+ end
160
+
161
+ def suffix_star_more
162
+ '_star_more'
163
+ end
164
+
165
+ def suffix_star_none
166
+ '_star_none'
167
+ end
168
+
169
+ # When a symbol, say symb, in a rhs is followed by a '+' modifier,
170
+ # then a rule will be generated with a lhs named symb + suffix_plus
171
+ # implicitly called: rule('digit_plus' => 'digit_plus digit').tag suffix_plus_more
172
+ # implicitly called: rule('digit_plus' => 'digit').tag suffix_plus_last
173
+ def suffix_plus
174
+ '_plus'
175
+ end
176
+
177
+ def suffix_plus_more
178
+ '_plus_more'
179
+ end
180
+
181
+ def suffix_plus_one
182
+ '_plus_one'
183
+ end
184
+
185
+ def repetition2suffix(aRepetition)
186
+ mapping = {
187
+ zero_or_one: suffix_qmark,
188
+ zero_or_more: suffix_star,
189
+ exactly_one: '',
190
+ one_or_more: suffix_plus
191
+ }
192
+
193
+ mapping[aRepetition]
194
+ end
195
+
196
+ def modifier2suffix(aModifier)
197
+ mapping = {
198
+ '?' => suffix_qmark,
199
+ '*' => suffix_star,
200
+ '+' => suffix_plus
201
+ }
202
+
203
+ mapping[aModifier]
204
+ end
205
+
206
+ ##################################
207
+ # AST visit notification events
208
+ # ################################
209
+ def after_symbol_node(aSymbolNode, aVisitor)
210
+ symb_name = aSymbolNode.name
211
+
212
+ case aSymbolNode.repetition
213
+ when :zero_or_one
214
+ # implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
215
+ # implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
216
+ name_modified = "#{symb_name}#{suffix_qmark}"
217
+ unless symbols.include? name_modified
218
+ symbols[name_modified] = Syntax::NonTerminal.new(name_modified)
219
+ rule(name_modified => "#{symb_name}" ).tag suffix_qmark_one
220
+ rule(name_modified => '').tag suffix_qmark_none
221
+ end
222
+ symb_name = name_modified
223
+
224
+ when :zero_or_more
225
+ # implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
226
+ # implicitly called: rule('symb_name_star' => '').tag suffix_star_none
227
+ name_modified = "#{symb_name}#{suffix_star}"
228
+ unless symbols.include? name_modified
229
+ symbols[name_modified] = Syntax::NonTerminal.new(name_modified)
230
+ rule(name_modified => "#{name_modified} #{symb_name}").tag suffix_star_more
231
+ rule(name_modified => '').tag suffix_star_none
232
+ end
233
+ symb_name = name_modified
234
+
235
+ when :exactly_one
236
+ # Do nothing
237
+
238
+ when :one_or_more
239
+ name_modified = "#{symb_name}#{suffix_plus}"
240
+ unless symbols.include? name_modified
241
+ symbols[name_modified] = Syntax::NonTerminal.new(name_modified)
242
+ rule(name_modified => "#{name_modified} #{symb_name}").tag suffix_plus_more
243
+ rule(name_modified => symb_name).tag suffix_plus_one
244
+ end
245
+ symb_name = name_modified
246
+ else
247
+ raise StandardError, 'Unhandled multiplicity'
248
+ end
249
+
250
+ symb = get_grm_symbol(symb_name)
251
+ visitor2rhs[aVisitor] << symb
252
+ end
253
+
254
+ def after_sequence_node(aSequenceNode, _visitor)
255
+ aSequenceNode.subnodes.each_with_index do |sn, i|
256
+ next if sn.annotation.empty?
257
+ matching = sn.annotation['match_closest']
258
+ aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
259
+ end
260
+ end
261
+
262
+ def after_grouping_node(aGroupingNode, aVisitor)
263
+ after_sequence_node(aGroupingNode, aVisitor)
264
+ symb_name = sequence_name(aGroupingNode)
265
+
266
+ unless symbols.include?(symb_name) || aGroupingNode.repetition == :exactly_one
267
+ symbols[symb_name] = Syntax::NonTerminal.new(symb_name)
268
+ simple_rule(symb_name => serialize_sequence(aGroupingNode) ).tag 'return_children'
269
+ prod = productions.last
270
+ prod.constraints = aGroupingNode.constraints
271
+ end
272
+ name_modified = "#{symb_name}#{repetition2suffix(aGroupingNode.repetition)}"
273
+
274
+ case aGroupingNode.repetition
275
+ when :zero_or_one
276
+ # implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
277
+ # implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
278
+ unless symbols.include? name_modified
279
+ symbols[name_modified] = Syntax::NonTerminal.new(name_modified)
280
+ simple_rule(name_modified => symb_name).tag suffix_qmark_one
281
+ simple_rule(name_modified => []).tag suffix_qmark_none
282
+ end
283
+
284
+ when :zero_or_more
285
+ # implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
286
+ # implicitly called: rule('symb_name_star' => '').tag suffix_star_none
287
+ unless symbols.include? name_modified
288
+ symbols[name_modified] = Syntax::NonTerminal.new(name_modified)
289
+ rule(name_modified => "#{name_modified} #{symb_name}").tag suffix_star_more
290
+ rule(name_modified => '').tag suffix_star_none
291
+ end
292
+
293
+ when :exactly_one
294
+ # Do nothing
295
+
296
+ when :one_or_more
297
+ unless symbols.include? name_modified
298
+ symbols[name_modified] = Syntax::NonTerminal.new(name_modified)
299
+ rule(name_modified => "#{name_modified} #{symb_name}").tag suffix_plus_more
300
+ rule(name_modified => symb_name).tag suffix_plus_one
301
+ end
302
+ else
303
+ raise StandardError, 'Unhandled multiplicity'
304
+ end
305
+
306
+ unless aGroupingNode.repetition == :exactly_one
307
+ symb = get_grm_symbol(name_modified)
308
+ visitor2rhs[aVisitor] << symb
309
+ end
310
+ end
311
+
312
+ private
313
+
314
+ def simple_rule(aProductionRepr)
315
+ aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
316
+ lhs = get_grm_symbol(lhs_name)
317
+
318
+ if rhs_repr.kind_of?(String)
319
+ rhs = rhs_repr.strip.scan(/\S+/)
320
+ else
321
+ rhs = rhs_repr
322
+ end
323
+
324
+ members = rhs.map do |name|
325
+ if name.end_with?('?', '*', '+')
326
+ modifier = name[-1]
327
+ suffix = modifier2suffix(aModifier)
328
+ get_grm_symbol("#{name.chop}#{suffix}")
329
+ else
330
+ get_grm_symbol(name)
331
+ end
332
+ end
333
+ new_prod = Syntax::Production.new(lhs, members)
334
+ productions << new_prod
335
+ end
336
+
337
+ productions.last
338
+ end
339
+
340
+ # Add the given grammar symbols.
341
+ # @param aClass [Class] The class of grammar symbols to instantiate.
342
+ # @param theSymbols [Array] array of elements are treated as follows:
343
+ # if the element is already a grammar symbol, then it added as is,
344
+ # otherwise it is considered as the name of a grammar symbol
345
+ # of the specified class to build.
346
+ def build_symbols(aClass, theSymbols)
347
+ symbs = {}
348
+ theSymbols.each do |s|
349
+ new_symbol = build_symbol(aClass, s)
350
+ symbs[new_symbol.name] = new_symbol
351
+ end
352
+
353
+ symbs
354
+ end
355
+
356
+ # If the argument is already a grammar symbol object then it is
357
+ # returned as is. Otherwise, the argument is treated as a name
358
+ # for a new instance of the given class.
359
+ # @param aClass [Class] The class of grammar symbols to instantiate
360
+ # @param aSymbolArg [GrmSymbol-like or String]
361
+ # @return [Array] list of grammar symbols
362
+ def build_symbol(aClass, aSymbolArg)
363
+ if aSymbolArg.kind_of?(Syntax::GrmSymbol)
364
+ aSymbolArg
365
+ else
366
+ aClass.new(aSymbolArg)
367
+ end
368
+ end
369
+
370
+ # Retrieve the non-terminal symbol with given name.
371
+ # If it doesn't exist yet, then it is created on the fly.
372
+ # @param aSymbolName [String] the name of the grammar symbol to retrieve
373
+ # @return [NonTerminal]
374
+ def get_grm_symbol(aSymbolName)
375
+ unless aSymbolName.end_with?('+') && aSymbolName.length > 1
376
+ name = aSymbolName
377
+ else
378
+ name = aSymbolName.chop
379
+ case aSymbolName[-1]
380
+ when '+'
381
+ name_modified = "#{name}#{suffix_plus}"
382
+ unless symbols.include? name_modified
383
+ symbols[name_modified] = NonTerminal.new(name_modified)
384
+ rule(name_modified => [name_modified, name]).as suffix_plus_more
385
+ rule(name_modified => name).as suffix_plus_last
386
+ end
387
+ name = name_modified
388
+ else
389
+ err_msg = "Unknown symbol modifier #{aSymbolName[-1]}"
390
+ raise NotImplementedError, err_msg
391
+ end
392
+ end
393
+
394
+ symbols[name] = Syntax::NonTerminal.new(name) unless symbols.include? name
395
+
396
+ symbols[name]
397
+ end
398
+
399
+ def sequence_name(aSequenceNode)
400
+ subnode_names = +''
401
+ aSequenceNode.subnodes.each do |subn|
402
+ case subn
403
+ when SymbolNode
404
+ subnode_names << "_#{subn.name}"
405
+ when SequenceNode
406
+ subnode_names << "_#{sequence_name(subn)}"
407
+ end
408
+ suffix = repetition2suffix(subn.repetition)
409
+ subnode_names << suffix
410
+ end
411
+
412
+ "seq#{subnode_names}"
413
+ end
414
+
415
+ def node_base_name(aNode)
416
+ if aNode.kind_of?(SymbolNode)
417
+ aNode.name
418
+ else
419
+ sequence_name(aNode)
420
+ end
421
+ end
422
+
423
+ def node_decorated_name(aNdoe)
424
+ base_name = node_base_name(aNode)
425
+ suffix = repetition2suffix(aNode.repetition)
426
+
427
+ "#{base_name}#{suffix}"
428
+ end
429
+
430
+ def serialize_sequence(aSequenceNode)
431
+ text = +''
432
+ aSequenceNode.subnodes.each do |sn|
433
+ text << ' '
434
+ case sn
435
+ when SymbolNode
436
+ text << sn.name
437
+ when SequenceNode
438
+ text << sequence_name(sn)
439
+ end
440
+
441
+ suffix = suffix = repetition2suffix(sn.repetition)
442
+ text << suffix
443
+ end
444
+
445
+ text.strip
446
+ end
447
+ end # class
448
+ end # module
449
+ end # module
450
+
451
+ # End of file