rley 0.7.06 → 0.8.01

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (167) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +362 -62
  3. data/.travis.yml +6 -6
  4. data/CHANGELOG.md +20 -4
  5. data/LICENSE.txt +1 -1
  6. data/README.md +7 -7
  7. data/examples/NLP/engtagger.rb +193 -190
  8. data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
  9. data/examples/NLP/nano_eng/nano_grammar.rb +21 -21
  10. data/examples/NLP/pico_en_demo.rb +2 -2
  11. data/examples/data_formats/JSON/cli_options.rb +1 -1
  12. data/examples/data_formats/JSON/json_ast_builder.rb +21 -27
  13. data/examples/data_formats/JSON/json_ast_nodes.rb +12 -21
  14. data/examples/data_formats/JSON/json_demo.rb +1 -2
  15. data/examples/data_formats/JSON/json_grammar.rb +13 -13
  16. data/examples/data_formats/JSON/json_lexer.rb +8 -8
  17. data/examples/data_formats/JSON/json_minifier.rb +1 -1
  18. data/examples/general/calc_iter1/calc_ast_builder.rb +13 -10
  19. data/examples/general/calc_iter1/calc_ast_nodes.rb +23 -37
  20. data/examples/general/calc_iter1/calc_grammar.rb +7 -6
  21. data/examples/general/calc_iter1/calc_lexer.rb +6 -4
  22. data/examples/general/calc_iter1/spec/calculator_spec.rb +5 -5
  23. data/examples/general/calc_iter2/calc_ast_builder.rb +5 -3
  24. data/examples/general/calc_iter2/calc_ast_nodes.rb +27 -43
  25. data/examples/general/calc_iter2/calc_grammar.rb +12 -12
  26. data/examples/general/calc_iter2/calc_lexer.rb +11 -10
  27. data/examples/general/calc_iter2/spec/calculator_spec.rb +26 -26
  28. data/examples/general/left.rb +2 -2
  29. data/examples/general/right.rb +2 -2
  30. data/lib/rley.rb +1 -1
  31. data/lib/rley/base/dotted_item.rb +28 -31
  32. data/lib/rley/base/grm_items_builder.rb +6 -0
  33. data/lib/rley/constants.rb +2 -2
  34. data/lib/rley/engine.rb +22 -25
  35. data/lib/rley/formatter/asciitree.rb +3 -3
  36. data/lib/rley/formatter/bracket_notation.rb +1 -8
  37. data/lib/rley/formatter/debug.rb +6 -6
  38. data/lib/rley/formatter/json.rb +2 -2
  39. data/lib/rley/gfg/call_edge.rb +1 -1
  40. data/lib/rley/gfg/edge.rb +5 -5
  41. data/lib/rley/gfg/end_vertex.rb +2 -6
  42. data/lib/rley/gfg/epsilon_edge.rb +1 -5
  43. data/lib/rley/gfg/grm_flow_graph.rb +27 -23
  44. data/lib/rley/gfg/item_vertex.rb +10 -10
  45. data/lib/rley/gfg/non_terminal_vertex.rb +4 -4
  46. data/lib/rley/gfg/scan_edge.rb +1 -1
  47. data/lib/rley/gfg/shortcut_edge.rb +2 -2
  48. data/lib/rley/gfg/start_vertex.rb +4 -8
  49. data/lib/rley/gfg/vertex.rb +43 -39
  50. data/lib/rley/interface.rb +16 -0
  51. data/lib/rley/lexical/token_range.rb +6 -6
  52. data/lib/rley/notation/all_notation_nodes.rb +2 -0
  53. data/lib/rley/notation/ast_builder.rb +191 -0
  54. data/lib/rley/notation/ast_node.rb +44 -0
  55. data/lib/rley/notation/ast_visitor.rb +113 -0
  56. data/lib/rley/notation/grammar.rb +49 -0
  57. data/lib/rley/notation/grammar_builder.rb +504 -0
  58. data/lib/rley/notation/grouping_node.rb +23 -0
  59. data/lib/rley/notation/parser.rb +56 -0
  60. data/lib/rley/notation/sequence_node.rb +35 -0
  61. data/lib/rley/notation/symbol_node.rb +29 -0
  62. data/lib/rley/notation/tokenizer.rb +192 -0
  63. data/lib/rley/parse_forest_visitor.rb +5 -5
  64. data/lib/rley/parse_rep/ast_base_builder.rb +48 -11
  65. data/lib/rley/parse_rep/cst_builder.rb +5 -6
  66. data/lib/rley/parse_rep/parse_forest_builder.rb +22 -18
  67. data/lib/rley/parse_rep/parse_forest_factory.rb +3 -3
  68. data/lib/rley/parse_rep/parse_rep_creator.rb +14 -16
  69. data/lib/rley/parse_rep/parse_tree_builder.rb +4 -4
  70. data/lib/rley/parse_rep/parse_tree_factory.rb +27 -27
  71. data/lib/rley/parse_tree_visitor.rb +1 -1
  72. data/lib/rley/parser/error_reason.rb +4 -5
  73. data/lib/rley/parser/gfg_chart.rb +118 -26
  74. data/lib/rley/parser/gfg_parsing.rb +22 -33
  75. data/lib/rley/parser/parse_entry.rb +25 -31
  76. data/lib/rley/parser/parse_entry_set.rb +19 -16
  77. data/lib/rley/parser/parse_entry_tracker.rb +4 -4
  78. data/lib/rley/parser/parse_tracer.rb +13 -13
  79. data/lib/rley/parser/parse_walker_factory.rb +23 -28
  80. data/lib/rley/ptree/non_terminal_node.rb +7 -5
  81. data/lib/rley/ptree/parse_tree.rb +3 -3
  82. data/lib/rley/ptree/parse_tree_node.rb +5 -5
  83. data/lib/rley/ptree/terminal_node.rb +7 -7
  84. data/lib/rley/rley_error.rb +12 -12
  85. data/lib/rley/sppf/alternative_node.rb +6 -6
  86. data/lib/rley/sppf/composite_node.rb +7 -7
  87. data/lib/rley/sppf/epsilon_node.rb +3 -3
  88. data/lib/rley/sppf/leaf_node.rb +3 -3
  89. data/lib/rley/sppf/parse_forest.rb +16 -16
  90. data/lib/rley/sppf/sppf_node.rb +7 -8
  91. data/lib/rley/sppf/token_node.rb +3 -3
  92. data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +61 -23
  93. data/lib/rley/syntax/grammar.rb +5 -5
  94. data/lib/rley/syntax/grm_symbol.rb +7 -7
  95. data/lib/rley/syntax/match_closest.rb +43 -0
  96. data/lib/rley/syntax/non_terminal.rb +9 -15
  97. data/lib/rley/syntax/production.rb +16 -10
  98. data/lib/rley/syntax/symbol_seq.rb +7 -9
  99. data/lib/rley/syntax/terminal.rb +4 -5
  100. data/lib/rley/syntax/verbatim_symbol.rb +3 -3
  101. data/lib/support/base_tokenizer.rb +19 -18
  102. data/spec/rley/base/dotted_item_spec.rb +2 -2
  103. data/spec/rley/engine_spec.rb +23 -21
  104. data/spec/rley/formatter/asciitree_spec.rb +7 -7
  105. data/spec/rley/formatter/bracket_notation_spec.rb +13 -13
  106. data/spec/rley/formatter/json_spec.rb +1 -1
  107. data/spec/rley/gfg/end_vertex_spec.rb +5 -5
  108. data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
  109. data/spec/rley/gfg/item_vertex_spec.rb +10 -10
  110. data/spec/rley/gfg/non_terminal_vertex_spec.rb +3 -3
  111. data/spec/rley/gfg/shortcut_edge_spec.rb +1 -1
  112. data/spec/rley/gfg/start_vertex_spec.rb +5 -5
  113. data/spec/rley/gfg/vertex_spec.rb +3 -3
  114. data/spec/rley/lexical/token_range_spec.rb +16 -16
  115. data/spec/rley/lexical/token_spec.rb +2 -2
  116. data/spec/rley/notation/grammar_builder_spec.rb +302 -0
  117. data/spec/rley/notation/parser_spec.rb +184 -0
  118. data/spec/rley/notation/tokenizer_spec.rb +370 -0
  119. data/spec/rley/parse_forest_visitor_spec.rb +165 -163
  120. data/spec/rley/parse_rep/ambiguous_parse_spec.rb +44 -44
  121. data/spec/rley/parse_rep/ast_builder_spec.rb +6 -7
  122. data/spec/rley/parse_rep/cst_builder_spec.rb +5 -5
  123. data/spec/rley/parse_rep/groucho_spec.rb +24 -26
  124. data/spec/rley/parse_rep/parse_forest_builder_spec.rb +27 -27
  125. data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -8
  126. data/spec/rley/parse_rep/parse_tree_factory_spec.rb +3 -3
  127. data/spec/rley/parse_tree_visitor_spec.rb +10 -8
  128. data/spec/rley/parser/dangling_else_spec.rb +445 -0
  129. data/spec/rley/parser/error_reason_spec.rb +6 -6
  130. data/spec/rley/parser/gfg_earley_parser_spec.rb +120 -12
  131. data/spec/rley/parser/gfg_parsing_spec.rb +6 -13
  132. data/spec/rley/parser/parse_entry_spec.rb +19 -19
  133. data/spec/rley/parser/parse_walker_factory_spec.rb +10 -10
  134. data/spec/rley/ptree/non_terminal_node_spec.rb +5 -3
  135. data/spec/rley/ptree/parse_tree_node_spec.rb +4 -4
  136. data/spec/rley/ptree/terminal_node_spec.rb +6 -6
  137. data/spec/rley/sppf/alternative_node_spec.rb +6 -6
  138. data/spec/rley/sppf/non_terminal_node_spec.rb +3 -3
  139. data/spec/rley/sppf/token_node_spec.rb +4 -4
  140. data/spec/rley/support/ambiguous_grammar_helper.rb +4 -5
  141. data/spec/rley/support/grammar_abc_helper.rb +3 -5
  142. data/spec/rley/support/grammar_ambig01_helper.rb +5 -6
  143. data/spec/rley/support/grammar_arr_int_helper.rb +5 -6
  144. data/spec/rley/support/grammar_b_expr_helper.rb +5 -6
  145. data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
  146. data/spec/rley/support/grammar_l0_helper.rb +14 -17
  147. data/spec/rley/support/grammar_pb_helper.rb +8 -7
  148. data/spec/rley/support/grammar_sppf_helper.rb +3 -3
  149. data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +35 -16
  150. data/spec/rley/syntax/grammar_spec.rb +6 -6
  151. data/spec/rley/syntax/grm_symbol_spec.rb +1 -1
  152. data/spec/rley/syntax/match_closest_spec.rb +46 -0
  153. data/spec/rley/syntax/non_terminal_spec.rb +8 -8
  154. data/spec/rley/syntax/production_spec.rb +17 -13
  155. data/spec/rley/syntax/symbol_seq_spec.rb +2 -2
  156. data/spec/rley/syntax/terminal_spec.rb +5 -5
  157. data/spec/rley/syntax/verbatim_symbol_spec.rb +1 -1
  158. data/spec/spec_helper.rb +0 -12
  159. data/spec/support/base_tokenizer_spec.rb +7 -2
  160. metadata +48 -74
  161. data/.simplecov +0 -7
  162. data/lib/rley/parser/parse_state.rb +0 -83
  163. data/lib/rley/parser/parse_state_tracker.rb +0 -59
  164. data/lib/rley/parser/state_set.rb +0 -101
  165. data/spec/rley/parser/parse_state_spec.rb +0 -125
  166. data/spec/rley/parser/parse_tracer_spec.rb +0 -200
  167. data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rley
4
+ module Notation
5
+ # Abstract class.
6
+ # Instances of its subclasses represent nodes of an abstract syntax tree
7
+ # that is the product of the parse of an input text.
8
+ class ASTNode
9
+ # @return [Rley::Lexical::Position] Position of the entry in the input stream.
10
+ attr_reader :position
11
+
12
+ # @return [Symbol]
13
+ attr_accessor :repetition
14
+
15
+ # @return [Hash]
16
+ attr_reader :annotation
17
+
18
+ # @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
19
+ def initialize(aPosition)
20
+ @position = aPosition
21
+ @repetition = :exactly_one
22
+ @annotation = {}
23
+ end
24
+
25
+ def annotation=(aMapping)
26
+ repeat_key = 'repeat'
27
+ @repetition = aMapping.delete(repeat_key) if aMapping.include?(repeat_key)
28
+ @annotation = aMapping
29
+ end
30
+
31
+ # Notification that the parsing has successfully completed
32
+ def done!
33
+ # Default: do nothing ...
34
+ end
35
+
36
+ # Abstract method (must be overriden in subclasses).
37
+ # Part of the 'visitee' role in Visitor design pattern.
38
+ # @param _visitor [LoxxyTreeVisitor] the visitor
39
+ def accept(_visitor)
40
+ raise NotImplementedError
41
+ end
42
+ end # class
43
+ end # module
44
+ end # module
@@ -0,0 +1,113 @@
1
+ module Rley
2
+ module Notation
3
+ class ASTVisitor
4
+ # Link to the top node to visit
5
+ attr_reader(:top)
6
+
7
+ # List of objects that subscribed to the visit event notification.
8
+ attr_reader(:subscribers)
9
+
10
+ # Build a visitor for the given top.
11
+ # @param aTop [Notation::ASTNode] the parse tree to visit.
12
+ def initialize(aTop)
13
+ raise StandardError if aTop.nil?
14
+
15
+ @top = aTop
16
+ @subscribers = []
17
+ end
18
+
19
+ # Add a subscriber for the visit event notifications.
20
+ # @param aSubscriber [Object]
21
+ def subscribe(aSubscriber)
22
+ subscribers << aSubscriber
23
+ end
24
+
25
+ # Remove the given object from the subscription list.
26
+ # The object won't be notified of visit events.
27
+ # @param aSubscriber [Object]
28
+ def unsubscribe(aSubscriber)
29
+ subscribers.delete_if { |entry| entry == aSubscriber }
30
+ end
31
+
32
+ # The signal to begin the visit of the top.
33
+ def start
34
+ top.accept(self)
35
+ end
36
+
37
+ # Visit event. The visitor is about to visit the ptree.
38
+ # @param aParseTree [Rley::PTree::ParseTree] the ptree to visit.
39
+ def start_visit_ptree(aParseTree)
40
+ broadcast(:before_ptree, aParseTree)
41
+ end
42
+
43
+ # Visit event. The visitor has completed the visit of the ptree.
44
+ # @param aParseTree [Rley::PTree::ParseTree] the visited ptree.
45
+ def end_visit_ptree(aParseTree)
46
+ broadcast(:after_ptree, aParseTree)
47
+ end
48
+
49
+ # Visit event. The visitor is about to visit a symbol node.
50
+ # @param aSymbolNode [Notation::SymbolNode] the symbol node to visit
51
+ def visit_symbol_node(aSymbolNode)
52
+ broadcast(:before_symbol_node, aSymbolNode, self)
53
+ broadcast(:after_symbol_node, aSymbolNode, self)
54
+ end
55
+
56
+ # Visit event. The visitor is about to visit a sequence node.
57
+ # @param aSequenceNode [Notation::SequenceNode] the sequence node to visit
58
+ def visit_sequence_node(aSequenceNode)
59
+ broadcast(:before_sequence_node, aSequenceNode, self)
60
+ traverse_subnodes(aSequenceNode)
61
+ broadcast(:after_sequence_node, aSequenceNode, self)
62
+ end
63
+
64
+ # Visit event. The visitor is about to visit a grouping node.
65
+ # @param aGroupingNode [Notation::GroupingNode] the grouping node to visit
66
+ def visit_grouping_node(aGroupingNode)
67
+ broadcast(:before_grouping_node, aGroupingNode, self)
68
+ traverse_subnodes(aGroupingNode) if aGroupingNode.repetition == :exactly_one
69
+ broadcast(:after_grouping_node, aGroupingNode, self)
70
+ end
71
+
72
+ private
73
+
74
+ # Visit event. The visitor is about to visit the subnodes of a non
75
+ # terminal node.
76
+ # @param aParentNode [Ast::LocCompoundExpr] the parent node.
77
+ def traverse_subnodes(aParentNode)
78
+ subnodes = aParentNode.subnodes
79
+ broadcast(:before_subnodes, aParentNode, subnodes)
80
+
81
+ # Let's proceed with the visit of subnodes
82
+ subnodes.each { |a_node| a_node.accept(self) }
83
+
84
+ broadcast(:after_subnodes, aParentNode, subnodes)
85
+ end
86
+
87
+ # Visit event. The visitor is about to visit one given subnode of a non
88
+ # terminal node.
89
+ # @param aParentNode [Ast::LocCompoundExpr] the parent node.
90
+ # @param index [integer] index of child subnode
91
+ def traverse_given_subnode(aParentNode, index)
92
+ subnode = aParentNode.subnodes[index]
93
+ broadcast(:before_given_subnode, aParentNode, subnode)
94
+
95
+ # Now, let's proceed with the visit of that subnode
96
+ subnode.accept(self)
97
+
98
+ broadcast(:after_given_subnode, aParentNode, subnode)
99
+ end
100
+
101
+ # Send a notification to all subscribers.
102
+ # @param msg [Symbol] event to notify
103
+ # @param args [Array] arguments of the notification.
104
+ def broadcast(msg, *args)
105
+ subscribers.each do |subscr|
106
+ next unless subscr.respond_to?(msg) || subscr.respond_to?(:accept_all)
107
+
108
+ subscr.send(msg, *args)
109
+ end
110
+ end
111
+ end # class
112
+ end # module
113
+ end # module
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../syntax/base_grammar_builder'
4
+
5
+ module Rley
6
+ module Notation
7
+ ########################################
8
+ # Syntax for right-hand side of production rules
9
+ builder = Rley::Syntax::BaseGrammarBuilder.new do
10
+ add_terminals('LEFT_PAREN', 'RIGHT_PAREN') # For '(', ')' grouping delimiters
11
+ add_terminals('LEFT_BRACE', 'RIGHT_BRACE') # For '{', '}' annotation delimiters
12
+ add_terminals('QUESTION_MARK', 'STAR', 'PLUS') # For postfix quantifiers
13
+ add_terminals('COMMA', 'ELLIPSIS')
14
+
15
+ add_terminals('STR_LIT') # For string literal values
16
+ add_terminals('INT_LIT') # For integer literal values
17
+ add_terminals('SYMBOL') # Grammar symbols
18
+ add_terminals('KEY') # Key literal
19
+
20
+ rule('notation' => 'rhs')
21
+ rule('rhs' => 'member_seq').tag 'sequence'
22
+ rule('rhs' => [])
23
+ rule('member_seq' => 'member_seq member').tag 'more_members'
24
+ rule('member_seq' => 'member').tag 'one_member'
25
+ rule('member' => 'strait_member')
26
+ rule('member' => 'quantified_member')
27
+ rule('strait_member' => 'base_member')
28
+ rule('strait_member' => 'base_member annotation').tag 'annotated_member'
29
+ rule('base_member' => 'SYMBOL').tag 'symbol'
30
+ rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN').tag 'grouping'
31
+ rule('quantified_member' => 'base_member quantifier').tag 'quantified_member'
32
+ rule('quantifier' => 'QUESTION_MARK').tag 'question_mark'
33
+ rule('quantifier' => 'STAR').tag 'star'
34
+ rule('quantifier' => 'PLUS').tag 'plus'
35
+ rule('annotation' => 'LEFT_BRACE mapping RIGHT_BRACE').tag 'annotation'
36
+ rule('mapping' => 'mapping COMMA key_value').tag 'more_pairs'
37
+ rule('mapping' => 'key_value').tag 'one_pair'
38
+ rule('key_value' => 'KEY value').tag 'raw_pair'
39
+ rule('value' => 'STR_LIT')
40
+ rule('value' => 'INT_LIT')
41
+ rule('value' => 'range')
42
+ rule('range' => 'INT_LIT ELLIPSIS INT_LIT').tag 'bound_range'
43
+ rule('range' => 'INT_LIT ELLIPSIS')
44
+ end
45
+
46
+ # And now build the Rley Grammar Notation (RGN) grammar...
47
+ RGNGrammar = builder.grammar
48
+ end # module
49
+ end # module
@@ -0,0 +1,504 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
5
+ require_relative 'parser'
6
+ require_relative 'ast_visitor'
7
+ require_relative '../syntax/match_closest'
8
+
9
+ module Rley # This module is used as a namespace
10
+ module Notation # This module is used as a namespace
11
+ # Structure used for production rules that are implicitly generated by Rley
12
+ RawRule = Struct.new(:lhs, :rhs, :tag, :simple, :constraints)
13
+
14
+ # Builder GoF pattern. Builder builds a complex object
15
+ # (say, a grammar) from simpler objects (terminals and productions)
16
+ # and using a step by step approach.
17
+ class GrammarBuilder
18
+ # @return [Hash{String, GrmSymbol}] The mapping of grammar symbol names
19
+ # to the matching grammar symbol object.
20
+ attr_reader(:symbols)
21
+
22
+ # @return [Notation::Parser] Parser for the right-side of productions
23
+ attr_reader(:parser)
24
+
25
+ # @return [Hash{ASTVisitor, Array}]
26
+ attr_reader(:visitor2rhs)
27
+
28
+ # @return [Array<Production>] The list of production rules for
29
+ # the grammar to build.
30
+ attr_reader(:productions)
31
+
32
+ # @return [Hash{String, String}] The synthesized raw productions
33
+ attr_reader(:synthetized)
34
+
35
+ # Creates a new grammar builder.
36
+ # @param aBlock [Proc] code block used to build the grammar.
37
+ # @example Building a tiny English grammar
38
+ # builder = Rley::Notation::GrammarBuilder.new do
39
+ # add_terminals('n', 'v', 'adj', 'det')
40
+ # rule 'S' => 'NP VP'
41
+ # rule 'VP' => 'v NP'
42
+ # rule 'NP' => 'det n'
43
+ # rule 'NP' => 'adj NP'
44
+ # end
45
+ # tiny_eng = builder.grammar
46
+ def initialize(&aBlock)
47
+ @symbols = {}
48
+ @productions = []
49
+ @parser = Notation::Parser.new
50
+ @visitor2rhs = {}
51
+ @synthetized = {}
52
+
53
+ if block_given?
54
+ instance_exec(&aBlock)
55
+ grammar_complete!
56
+ end
57
+ end
58
+
59
+ # Retrieve a grammar symbol from its name.
60
+ # Raise an exception if not found.
61
+ # @param aSymbolName [String] the name of a grammar symbol.
62
+ # @return [GrmSymbol] the retrieved symbol object.
63
+ def [](aSymbolName)
64
+ symbols[aSymbolName]
65
+ end
66
+
67
+ # Add the given terminal symbols to the grammar of the language
68
+ # @param terminalSymbols [String or Terminal] 1..* terminal symbols.
69
+ # @return [void]
70
+ def add_terminals(*terminalSymbols)
71
+ new_symbs = build_symbols(Syntax::Terminal, terminalSymbols)
72
+ symbols.merge!(new_symbs)
73
+ end
74
+
75
+ # Add the given marker symbol to the grammar of the language
76
+ # @param aMarkerSymbol [String] A mazker symbol
77
+ # @return [void]
78
+ def add_marker(aMarkerSymbol)
79
+ new_symb = build_symbol(Syntax::Marker, aMarkerSymbol)
80
+ symbols[new_symb.name] = new_symb
81
+ end
82
+
83
+ # Add a production rule in the grammar given one
84
+ # key-value pair of the form: String => String.
85
+ # Where the key is the name of the non-terminal appearing in the
86
+ # left side of the rule.
87
+ # The value is a sequence of grammar symbol names (optionally quantified).
88
+ # The rule is created and inserted in the grammar.
89
+ # @example Equivalent call syntax
90
+ # builder.add_production('A' => 'a A c)
91
+ # builder.rule('A' => 'a A c]) # 'rule' is a synonym
92
+ # @param aProductionRepr [Hash{String, String}]
93
+ # A Hash-based representation of a production.
94
+ # @return [Production] The created Production instance
95
+ def add_production(aProductionRepr)
96
+ aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
97
+ lhs = get_grm_symbol(lhs_name)
98
+ rhs = rhs_repr.kind_of?(Array) && rhs_repr.empty? ? '' : rhs_repr.strip
99
+ constraints = []
100
+ if rhs.empty?
101
+ rhs_members = []
102
+ else
103
+ ast = parser.parse(rhs)
104
+ visitor = ASTVisitor.new(ast)
105
+ visitor2rhs[visitor] = []
106
+ visitor.subscribe(self)
107
+ visitor.start
108
+ root_node = ast.root
109
+ constraints = root_node.constraints unless root_node.kind_of?(SymbolNode)
110
+
111
+ rhs_members = visitor2rhs.delete(visitor)
112
+ end
113
+ new_prod = Syntax::Production.new(lhs, rhs_members)
114
+ new_prod.constraints = constraints
115
+ productions << new_prod
116
+ end
117
+
118
+ productions.last
119
+ end
120
+
121
+ # Given the grammar symbols and productions added to the builder,
122
+ # build the resulting grammar (if not yet done).
123
+ # @return [Grammar] the created grammar object.
124
+ def grammar
125
+ unless @grammar
126
+ raise StandardError, 'No symbol found for grammar' if symbols.empty?
127
+ if productions.empty?
128
+ raise StandardError, 'No production found for grammar'
129
+ end
130
+
131
+ # Check that each terminal appears at least in a rhs of a production
132
+ all_terminals = symbols.values.select do |a_symb|
133
+ a_symb.kind_of?(Syntax::Terminal)
134
+ end
135
+ in_use = Set.new
136
+ productions.each do |prod|
137
+ prod.rhs.members.each do |symb|
138
+ in_use << symb if symb.kind_of?(Syntax::Terminal)
139
+ end
140
+ end
141
+
142
+ unused = all_terminals.reject { |a_term| in_use.include?(a_term) }
143
+ unless unused.empty?
144
+ suffix = "#{unused.map(&:name).join(', ')}."
145
+ raise StandardError, "Useless terminal symbol(s): #{suffix}"
146
+ end
147
+
148
+ @grammar = Syntax::Grammar.new(productions.dup)
149
+ end
150
+
151
+ @grammar
152
+ end
153
+
154
+ alias rule add_production
155
+
156
+ # When a symbol, say symb, in a rhs is followed by a '*' modifier,
157
+ # then a rule will be generated with a lhs named symb * suffix_plus
158
+ # implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
159
+ # implicitly called: rule('declaration_star' => '').tag suffix_star_last
160
+ def suffix_qmark
161
+ '_qmark'
162
+ end
163
+
164
+ def suffix_qmark_one
165
+ '_qmark_one'
166
+ end
167
+
168
+ def suffix_qmark_none
169
+ '_qmark_none'
170
+ end
171
+
172
+ # When a symbol, say symb, in a rhs is followed by a '*' modifier,
173
+ # then a rule will be generated with a lhs named symb * suffix_plus
174
+ # implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
175
+ # implicitly called: rule('declaration_star' => '').tag suffix_star_last
176
+ def suffix_star
177
+ '_star'
178
+ end
179
+
180
+ def suffix_star_more
181
+ '_star_more'
182
+ end
183
+
184
+ def suffix_star_none
185
+ '_star_none'
186
+ end
187
+
188
+ # When a symbol, say symb, in a rhs is followed by a '+' modifier,
189
+ # then a rule will be generated with a lhs named symb + suffix_plus
190
+ # implicitly called: rule('digit_plus' => 'digit_plus digit').tag suffix_plus_more
191
+ # implicitly called: rule('digit_plus' => 'digit').tag suffix_plus_last
192
+ def suffix_plus
193
+ '_plus'
194
+ end
195
+
196
+ def suffix_plus_more
197
+ '_plus_more'
198
+ end
199
+
200
+ def suffix_plus_one
201
+ '_plus_one'
202
+ end
203
+
204
+ def repetition2suffix(aRepetition)
205
+ mapping = {
206
+ zero_or_one: suffix_qmark,
207
+ zero_or_more: suffix_star,
208
+ exactly_one: '',
209
+ one_or_more: suffix_plus
210
+ }
211
+
212
+ mapping[aRepetition]
213
+ end
214
+
215
+ def modifier2suffix(aModifier)
216
+ mapping = {
217
+ '?' => suffix_qmark,
218
+ '*' => suffix_star,
219
+ '+' => suffix_plus
220
+ }
221
+
222
+ mapping[aModifier]
223
+ end
224
+
225
+ ##################################
226
+ # RGN's AST visit notification events
227
+ # ################################
228
+ def after_symbol_node(aSymbolNode, aVisitor)
229
+ symb_name = aSymbolNode.name
230
+
231
+ case aSymbolNode.repetition
232
+ when :zero_or_one
233
+ # implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
234
+ # implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
235
+ name_modified = "#{symb_name}#{suffix_qmark}"
236
+ unless symbols.include? name_modified
237
+ add_nonterminal(name_modified)
238
+ add_raw_rule(name_modified, "#{symb_name}", suffix_qmark_one)
239
+ add_raw_rule(name_modified, '', suffix_qmark_none)
240
+ end
241
+ symb_name = name_modified
242
+
243
+ when :zero_or_more
244
+ # implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
245
+ # implicitly called: rule('symb_name_star' => '').tag suffix_star_none
246
+ name_modified = "#{symb_name}#{suffix_star}"
247
+ unless symbols.include? name_modified
248
+ add_nonterminal(name_modified)
249
+ add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
250
+ add_raw_rule(name_modified, [], suffix_star_none)
251
+ end
252
+ symb_name = name_modified
253
+
254
+ when :exactly_one
255
+ # Do nothing
256
+
257
+ when :one_or_more
258
+ name_modified = "#{symb_name}#{suffix_plus}"
259
+ unless symbols.include? name_modified
260
+ add_nonterminal(name_modified)
261
+ add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
262
+ add_raw_rule(name_modified, symb_name, suffix_plus_one)
263
+ end
264
+ symb_name = name_modified
265
+ else
266
+ raise StandardError, 'Unhandled multiplicity'
267
+ end
268
+
269
+ symb = get_grm_symbol(symb_name)
270
+ visitor2rhs[aVisitor] << symb
271
+ end
272
+
273
+ def after_sequence_node(aSequenceNode, _visitor)
274
+ aSequenceNode.subnodes.each_with_index do |sn, i|
275
+ next if sn.annotation.empty?
276
+ matching = sn.annotation['match_closest']
277
+ aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
278
+ end
279
+ end
280
+
281
+ def after_grouping_node(aGroupingNode, aVisitor)
282
+ after_sequence_node(aGroupingNode, aVisitor)
283
+ symb_name = sequence_name(aGroupingNode)
284
+
285
+ unless symbols.include?(symb_name) || aGroupingNode.repetition == :exactly_one
286
+ add_nonterminal(symb_name)
287
+ rhs = serialize_sequence(aGroupingNode)
288
+ add_raw_rule(symb_name, rhs, 'return_children', true, aGroupingNode.constraints)
289
+ end
290
+ name_modified = "#{symb_name}#{repetition2suffix(aGroupingNode.repetition)}"
291
+
292
+ case aGroupingNode.repetition
293
+ when :zero_or_one
294
+ # implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
295
+ # implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
296
+ unless symbols.include? name_modified
297
+ add_nonterminal(name_modified)
298
+ add_raw_rule(name_modified, symb_name, suffix_qmark_one, true)
299
+ add_raw_rule(name_modified, [], suffix_qmark_none, true)
300
+ end
301
+
302
+ when :zero_or_more
303
+ # implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
304
+ # implicitly called: rule('symb_name_star' => '').tag suffix_star_none
305
+ unless symbols.include? name_modified
306
+ add_nonterminal(name_modified)
307
+ add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
308
+ add_raw_rule(name_modified, '', suffix_star_none)
309
+ end
310
+
311
+ when :exactly_one
312
+ # Do nothing
313
+
314
+ when :one_or_more
315
+ unless symbols.include? name_modified
316
+ add_nonterminal(name_modified)
317
+ add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
318
+ add_raw_rule(name_modified, symb_name, suffix_plus_one)
319
+ end
320
+ else
321
+ raise StandardError, 'Unhandled multiplicity'
322
+ end
323
+
324
+ unless aGroupingNode.repetition == :exactly_one
325
+ symb = get_grm_symbol(name_modified)
326
+ visitor2rhs[aVisitor] << symb
327
+ end
328
+ end
329
+
330
+ # A notification to the builderobject that the programmer
331
+ # has completed the entry of terminals and production rules
332
+ def grammar_complete!
333
+ process_raw_rules()
334
+ end
335
+
336
+ private
337
+
338
+ def add_nonterminal(aName)
339
+ symbols[aName] = Syntax::NonTerminal.new(aName)
340
+ end
341
+
342
+ def simple_rule(aProductionRepr)
343
+ aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
344
+ lhs = get_grm_symbol(lhs_name)
345
+
346
+ if rhs_repr.kind_of?(String)
347
+ rhs = rhs_repr.strip.scan(/\S+/)
348
+ else
349
+ rhs = rhs_repr
350
+ end
351
+
352
+ members = rhs.map do |name|
353
+ if name.end_with?('?', '*', '+')
354
+ modifier = name[-1]
355
+ suffix = modifier2suffix(aModifier)
356
+ get_grm_symbol("#{name.chop}#{suffix}")
357
+ else
358
+ get_grm_symbol(name)
359
+ end
360
+ end
361
+ new_prod = Syntax::Production.new(lhs, members)
362
+ productions << new_prod
363
+ end
364
+
365
+ productions.last
366
+ end
367
+
368
+ # Add the given grammar symbols.
369
+ # @param aClass [Class] The class of grammar symbols to instantiate.
370
+ # @param theSymbols [Array] array of elements are treated as follows:
371
+ # if the element is already a grammar symbol, then it added as is,
372
+ # otherwise it is considered as the name of a grammar symbol
373
+ # of the specified class to build.
374
+ def build_symbols(aClass, theSymbols)
375
+ symbs = {}
376
+ theSymbols.each do |s|
377
+ new_symbol = build_symbol(aClass, s)
378
+ symbs[new_symbol.name] = new_symbol
379
+ end
380
+
381
+ symbs
382
+ end
383
+
384
+ # If the argument is already a grammar symbol object then it is
385
+ # returned as is. Otherwise, the argument is treated as a name
386
+ # for a new instance of the given class.
387
+ # @param aClass [Class] The class of grammar symbols to instantiate
388
+ # @param aSymbolArg [GrmSymbol-like or String]
389
+ # @return [Array] list of grammar symbols
390
+ def build_symbol(aClass, aSymbolArg)
391
+ if aSymbolArg.kind_of?(Syntax::GrmSymbol)
392
+ aSymbolArg
393
+ else
394
+ aClass.new(aSymbolArg)
395
+ end
396
+ end
397
+
398
+ # Retrieve the non-terminal symbol with given name.
399
+ # If it doesn't exist yet, then it is created on the fly.
400
+ # @param aSymbolName [String] the name of the grammar symbol to retrieve
401
+ # @return [NonTerminal]
402
+ def get_grm_symbol(aSymbolName)
403
+ unless aSymbolName.end_with?('+') && aSymbolName.length > 1
404
+ name = aSymbolName
405
+ else
406
+ name = aSymbolName.chop
407
+ case aSymbolName[-1]
408
+ when '+'
409
+ name_modified = "#{name}#{suffix_plus}"
410
+ unless symbols.include? name_modified
411
+ symbols[name_modified] = NonTerminal.new(name_modified)
412
+ rule(name_modified => [name_modified, name]).as suffix_plus_more
413
+ rule(name_modified => name).as suffix_plus_last
414
+ end
415
+ name = name_modified
416
+ else
417
+ err_msg = "Unknown symbol modifier #{aSymbolName[-1]}"
418
+ raise NotImplementedError, err_msg
419
+ end
420
+ end
421
+
422
+ symbols[name] = Syntax::NonTerminal.new(name) unless symbols.include? name
423
+
424
+ symbols[name]
425
+ end
426
+
427
+ def sequence_name(aSequenceNode)
428
+ subnode_names = +''
429
+ aSequenceNode.subnodes.each do |subn|
430
+ case subn
431
+ when SymbolNode
432
+ subnode_names << "_#{subn.name}"
433
+ when SequenceNode
434
+ subnode_names << "_#{sequence_name(subn)}"
435
+ end
436
+ suffix = repetition2suffix(subn.repetition)
437
+ subnode_names << suffix
438
+ end
439
+
440
+ "seq#{subnode_names}"
441
+ end
442
+
443
+ def node_base_name(aNode)
444
+ if aNode.kind_of?(SymbolNode)
445
+ aNode.name
446
+ else
447
+ sequence_name(aNode)
448
+ end
449
+ end
450
+
451
+ def node_decorated_name(aNdoe)
452
+ base_name = node_base_name(aNode)
453
+ suffix = repetition2suffix(aNode.repetition)
454
+
455
+ "#{base_name}#{suffix}"
456
+ end
457
+
458
+ def serialize_sequence(aSequenceNode)
459
+ text = +''
460
+ aSequenceNode.subnodes.each do |sn|
461
+ text << ' '
462
+ case sn
463
+ when SymbolNode
464
+ text << sn.name
465
+ when SequenceNode
466
+ text << sequence_name(sn)
467
+ end
468
+
469
+ suffix = suffix = repetition2suffix(sn.repetition)
470
+ text << suffix
471
+ end
472
+
473
+ text.strip
474
+ end
475
+
476
+ def add_raw_rule(aSymbol, aRHS, aTag, simplified = false, constraints = [])
477
+ raw_rule = RawRule.new(aSymbol, aRHS, aTag, simplified, constraints)
478
+ if synthetized.include?(aSymbol)
479
+ @synthetized[aSymbol] << raw_rule
480
+ else
481
+ @synthetized[aSymbol] = [raw_rule]
482
+ end
483
+ end
484
+
485
+ def process_raw_rules
486
+ until synthetized.empty? do
487
+ raw_rules = synthetized.delete(synthetized.keys.first)
488
+ raw_rules.each do |raw|
489
+ new_prod = nil
490
+ if raw.simple
491
+ new_prod = simple_rule(raw.lhs => raw.rhs)
492
+ else
493
+ new_prod = rule(raw.lhs => raw.rhs)
494
+ end
495
+ new_prod.tag(raw.tag)
496
+ new_prod.constraints = raw.constraints
497
+ end
498
+ end
499
+ end
500
+ end # class
501
+ end # module
502
+ end # module
503
+
504
+ # End of file