rley 0.7.06 → 0.8.01

Sign up to get free protection for your applications and to get access to all the features.
Files changed (167) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +362 -62
  3. data/.travis.yml +6 -6
  4. data/CHANGELOG.md +20 -4
  5. data/LICENSE.txt +1 -1
  6. data/README.md +7 -7
  7. data/examples/NLP/engtagger.rb +193 -190
  8. data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
  9. data/examples/NLP/nano_eng/nano_grammar.rb +21 -21
  10. data/examples/NLP/pico_en_demo.rb +2 -2
  11. data/examples/data_formats/JSON/cli_options.rb +1 -1
  12. data/examples/data_formats/JSON/json_ast_builder.rb +21 -27
  13. data/examples/data_formats/JSON/json_ast_nodes.rb +12 -21
  14. data/examples/data_formats/JSON/json_demo.rb +1 -2
  15. data/examples/data_formats/JSON/json_grammar.rb +13 -13
  16. data/examples/data_formats/JSON/json_lexer.rb +8 -8
  17. data/examples/data_formats/JSON/json_minifier.rb +1 -1
  18. data/examples/general/calc_iter1/calc_ast_builder.rb +13 -10
  19. data/examples/general/calc_iter1/calc_ast_nodes.rb +23 -37
  20. data/examples/general/calc_iter1/calc_grammar.rb +7 -6
  21. data/examples/general/calc_iter1/calc_lexer.rb +6 -4
  22. data/examples/general/calc_iter1/spec/calculator_spec.rb +5 -5
  23. data/examples/general/calc_iter2/calc_ast_builder.rb +5 -3
  24. data/examples/general/calc_iter2/calc_ast_nodes.rb +27 -43
  25. data/examples/general/calc_iter2/calc_grammar.rb +12 -12
  26. data/examples/general/calc_iter2/calc_lexer.rb +11 -10
  27. data/examples/general/calc_iter2/spec/calculator_spec.rb +26 -26
  28. data/examples/general/left.rb +2 -2
  29. data/examples/general/right.rb +2 -2
  30. data/lib/rley.rb +1 -1
  31. data/lib/rley/base/dotted_item.rb +28 -31
  32. data/lib/rley/base/grm_items_builder.rb +6 -0
  33. data/lib/rley/constants.rb +2 -2
  34. data/lib/rley/engine.rb +22 -25
  35. data/lib/rley/formatter/asciitree.rb +3 -3
  36. data/lib/rley/formatter/bracket_notation.rb +1 -8
  37. data/lib/rley/formatter/debug.rb +6 -6
  38. data/lib/rley/formatter/json.rb +2 -2
  39. data/lib/rley/gfg/call_edge.rb +1 -1
  40. data/lib/rley/gfg/edge.rb +5 -5
  41. data/lib/rley/gfg/end_vertex.rb +2 -6
  42. data/lib/rley/gfg/epsilon_edge.rb +1 -5
  43. data/lib/rley/gfg/grm_flow_graph.rb +27 -23
  44. data/lib/rley/gfg/item_vertex.rb +10 -10
  45. data/lib/rley/gfg/non_terminal_vertex.rb +4 -4
  46. data/lib/rley/gfg/scan_edge.rb +1 -1
  47. data/lib/rley/gfg/shortcut_edge.rb +2 -2
  48. data/lib/rley/gfg/start_vertex.rb +4 -8
  49. data/lib/rley/gfg/vertex.rb +43 -39
  50. data/lib/rley/interface.rb +16 -0
  51. data/lib/rley/lexical/token_range.rb +6 -6
  52. data/lib/rley/notation/all_notation_nodes.rb +2 -0
  53. data/lib/rley/notation/ast_builder.rb +191 -0
  54. data/lib/rley/notation/ast_node.rb +44 -0
  55. data/lib/rley/notation/ast_visitor.rb +113 -0
  56. data/lib/rley/notation/grammar.rb +49 -0
  57. data/lib/rley/notation/grammar_builder.rb +504 -0
  58. data/lib/rley/notation/grouping_node.rb +23 -0
  59. data/lib/rley/notation/parser.rb +56 -0
  60. data/lib/rley/notation/sequence_node.rb +35 -0
  61. data/lib/rley/notation/symbol_node.rb +29 -0
  62. data/lib/rley/notation/tokenizer.rb +192 -0
  63. data/lib/rley/parse_forest_visitor.rb +5 -5
  64. data/lib/rley/parse_rep/ast_base_builder.rb +48 -11
  65. data/lib/rley/parse_rep/cst_builder.rb +5 -6
  66. data/lib/rley/parse_rep/parse_forest_builder.rb +22 -18
  67. data/lib/rley/parse_rep/parse_forest_factory.rb +3 -3
  68. data/lib/rley/parse_rep/parse_rep_creator.rb +14 -16
  69. data/lib/rley/parse_rep/parse_tree_builder.rb +4 -4
  70. data/lib/rley/parse_rep/parse_tree_factory.rb +27 -27
  71. data/lib/rley/parse_tree_visitor.rb +1 -1
  72. data/lib/rley/parser/error_reason.rb +4 -5
  73. data/lib/rley/parser/gfg_chart.rb +118 -26
  74. data/lib/rley/parser/gfg_parsing.rb +22 -33
  75. data/lib/rley/parser/parse_entry.rb +25 -31
  76. data/lib/rley/parser/parse_entry_set.rb +19 -16
  77. data/lib/rley/parser/parse_entry_tracker.rb +4 -4
  78. data/lib/rley/parser/parse_tracer.rb +13 -13
  79. data/lib/rley/parser/parse_walker_factory.rb +23 -28
  80. data/lib/rley/ptree/non_terminal_node.rb +7 -5
  81. data/lib/rley/ptree/parse_tree.rb +3 -3
  82. data/lib/rley/ptree/parse_tree_node.rb +5 -5
  83. data/lib/rley/ptree/terminal_node.rb +7 -7
  84. data/lib/rley/rley_error.rb +12 -12
  85. data/lib/rley/sppf/alternative_node.rb +6 -6
  86. data/lib/rley/sppf/composite_node.rb +7 -7
  87. data/lib/rley/sppf/epsilon_node.rb +3 -3
  88. data/lib/rley/sppf/leaf_node.rb +3 -3
  89. data/lib/rley/sppf/parse_forest.rb +16 -16
  90. data/lib/rley/sppf/sppf_node.rb +7 -8
  91. data/lib/rley/sppf/token_node.rb +3 -3
  92. data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +61 -23
  93. data/lib/rley/syntax/grammar.rb +5 -5
  94. data/lib/rley/syntax/grm_symbol.rb +7 -7
  95. data/lib/rley/syntax/match_closest.rb +43 -0
  96. data/lib/rley/syntax/non_terminal.rb +9 -15
  97. data/lib/rley/syntax/production.rb +16 -10
  98. data/lib/rley/syntax/symbol_seq.rb +7 -9
  99. data/lib/rley/syntax/terminal.rb +4 -5
  100. data/lib/rley/syntax/verbatim_symbol.rb +3 -3
  101. data/lib/support/base_tokenizer.rb +19 -18
  102. data/spec/rley/base/dotted_item_spec.rb +2 -2
  103. data/spec/rley/engine_spec.rb +23 -21
  104. data/spec/rley/formatter/asciitree_spec.rb +7 -7
  105. data/spec/rley/formatter/bracket_notation_spec.rb +13 -13
  106. data/spec/rley/formatter/json_spec.rb +1 -1
  107. data/spec/rley/gfg/end_vertex_spec.rb +5 -5
  108. data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
  109. data/spec/rley/gfg/item_vertex_spec.rb +10 -10
  110. data/spec/rley/gfg/non_terminal_vertex_spec.rb +3 -3
  111. data/spec/rley/gfg/shortcut_edge_spec.rb +1 -1
  112. data/spec/rley/gfg/start_vertex_spec.rb +5 -5
  113. data/spec/rley/gfg/vertex_spec.rb +3 -3
  114. data/spec/rley/lexical/token_range_spec.rb +16 -16
  115. data/spec/rley/lexical/token_spec.rb +2 -2
  116. data/spec/rley/notation/grammar_builder_spec.rb +302 -0
  117. data/spec/rley/notation/parser_spec.rb +184 -0
  118. data/spec/rley/notation/tokenizer_spec.rb +370 -0
  119. data/spec/rley/parse_forest_visitor_spec.rb +165 -163
  120. data/spec/rley/parse_rep/ambiguous_parse_spec.rb +44 -44
  121. data/spec/rley/parse_rep/ast_builder_spec.rb +6 -7
  122. data/spec/rley/parse_rep/cst_builder_spec.rb +5 -5
  123. data/spec/rley/parse_rep/groucho_spec.rb +24 -26
  124. data/spec/rley/parse_rep/parse_forest_builder_spec.rb +27 -27
  125. data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -8
  126. data/spec/rley/parse_rep/parse_tree_factory_spec.rb +3 -3
  127. data/spec/rley/parse_tree_visitor_spec.rb +10 -8
  128. data/spec/rley/parser/dangling_else_spec.rb +445 -0
  129. data/spec/rley/parser/error_reason_spec.rb +6 -6
  130. data/spec/rley/parser/gfg_earley_parser_spec.rb +120 -12
  131. data/spec/rley/parser/gfg_parsing_spec.rb +6 -13
  132. data/spec/rley/parser/parse_entry_spec.rb +19 -19
  133. data/spec/rley/parser/parse_walker_factory_spec.rb +10 -10
  134. data/spec/rley/ptree/non_terminal_node_spec.rb +5 -3
  135. data/spec/rley/ptree/parse_tree_node_spec.rb +4 -4
  136. data/spec/rley/ptree/terminal_node_spec.rb +6 -6
  137. data/spec/rley/sppf/alternative_node_spec.rb +6 -6
  138. data/spec/rley/sppf/non_terminal_node_spec.rb +3 -3
  139. data/spec/rley/sppf/token_node_spec.rb +4 -4
  140. data/spec/rley/support/ambiguous_grammar_helper.rb +4 -5
  141. data/spec/rley/support/grammar_abc_helper.rb +3 -5
  142. data/spec/rley/support/grammar_ambig01_helper.rb +5 -6
  143. data/spec/rley/support/grammar_arr_int_helper.rb +5 -6
  144. data/spec/rley/support/grammar_b_expr_helper.rb +5 -6
  145. data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
  146. data/spec/rley/support/grammar_l0_helper.rb +14 -17
  147. data/spec/rley/support/grammar_pb_helper.rb +8 -7
  148. data/spec/rley/support/grammar_sppf_helper.rb +3 -3
  149. data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +35 -16
  150. data/spec/rley/syntax/grammar_spec.rb +6 -6
  151. data/spec/rley/syntax/grm_symbol_spec.rb +1 -1
  152. data/spec/rley/syntax/match_closest_spec.rb +46 -0
  153. data/spec/rley/syntax/non_terminal_spec.rb +8 -8
  154. data/spec/rley/syntax/production_spec.rb +17 -13
  155. data/spec/rley/syntax/symbol_seq_spec.rb +2 -2
  156. data/spec/rley/syntax/terminal_spec.rb +5 -5
  157. data/spec/rley/syntax/verbatim_symbol_spec.rb +1 -1
  158. data/spec/spec_helper.rb +0 -12
  159. data/spec/support/base_tokenizer_spec.rb +7 -2
  160. metadata +48 -74
  161. data/.simplecov +0 -7
  162. data/lib/rley/parser/parse_state.rb +0 -83
  163. data/lib/rley/parser/parse_state_tracker.rb +0 -59
  164. data/lib/rley/parser/state_set.rb +0 -101
  165. data/spec/rley/parser/parse_state_spec.rb +0 -125
  166. data/spec/rley/parser/parse_tracer_spec.rb +0 -200
  167. data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rley
4
+ module Notation
5
+ # Abstract class.
6
+ # Instances of its subclasses represent nodes of an abstract syntax tree
7
+ # that is the product of the parse of an input text.
8
+ class ASTNode
9
+ # @return [Rley::Lexical::Position] Position of the entry in the input stream.
10
+ attr_reader :position
11
+
12
+ # @return [Symbol]
13
+ attr_accessor :repetition
14
+
15
+ # @return [Hash]
16
+ attr_reader :annotation
17
+
18
+ # @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
19
+ def initialize(aPosition)
20
+ @position = aPosition
21
+ @repetition = :exactly_one
22
+ @annotation = {}
23
+ end
24
+
25
+ def annotation=(aMapping)
26
+ repeat_key = 'repeat'
27
+ @repetition = aMapping.delete(repeat_key) if aMapping.include?(repeat_key)
28
+ @annotation = aMapping
29
+ end
30
+
31
+ # Notification that the parsing has successfully completed
32
+ def done!
33
+ # Default: do nothing ...
34
+ end
35
+
36
+ # Abstract method (must be overriden in subclasses).
37
+ # Part of the 'visitee' role in Visitor design pattern.
38
+ # @param _visitor [LoxxyTreeVisitor] the visitor
39
+ def accept(_visitor)
40
+ raise NotImplementedError
41
+ end
42
+ end # class
43
+ end # module
44
+ end # module
@@ -0,0 +1,113 @@
1
+ module Rley
2
+ module Notation
3
+ class ASTVisitor
4
+ # Link to the top node to visit
5
+ attr_reader(:top)
6
+
7
+ # List of objects that subscribed to the visit event notification.
8
+ attr_reader(:subscribers)
9
+
10
+ # Build a visitor for the given top.
11
+ # @param aTop [Notation::ASTNode] the parse tree to visit.
12
+ def initialize(aTop)
13
+ raise StandardError if aTop.nil?
14
+
15
+ @top = aTop
16
+ @subscribers = []
17
+ end
18
+
19
+ # Add a subscriber for the visit event notifications.
20
+ # @param aSubscriber [Object]
21
+ def subscribe(aSubscriber)
22
+ subscribers << aSubscriber
23
+ end
24
+
25
+ # Remove the given object from the subscription list.
26
+ # The object won't be notified of visit events.
27
+ # @param aSubscriber [Object]
28
+ def unsubscribe(aSubscriber)
29
+ subscribers.delete_if { |entry| entry == aSubscriber }
30
+ end
31
+
32
+ # The signal to begin the visit of the top.
33
+ def start
34
+ top.accept(self)
35
+ end
36
+
37
+ # Visit event. The visitor is about to visit the ptree.
38
+ # @param aParseTree [Rley::PTree::ParseTree] the ptree to visit.
39
+ def start_visit_ptree(aParseTree)
40
+ broadcast(:before_ptree, aParseTree)
41
+ end
42
+
43
+ # Visit event. The visitor has completed the visit of the ptree.
44
+ # @param aParseTree [Rley::PTree::ParseTree] the visited ptree.
45
+ def end_visit_ptree(aParseTree)
46
+ broadcast(:after_ptree, aParseTree)
47
+ end
48
+
49
+ # Visit event. The visitor is about to visit a symbol node.
50
+ # @param aSymbolNode [Notation::SymbolNode] the symbol node to visit
51
+ def visit_symbol_node(aSymbolNode)
52
+ broadcast(:before_symbol_node, aSymbolNode, self)
53
+ broadcast(:after_symbol_node, aSymbolNode, self)
54
+ end
55
+
56
+ # Visit event. The visitor is about to visit a sequence node.
57
+ # @param aSequenceNode [Notation::SequenceNode] the sequence node to visit
58
+ def visit_sequence_node(aSequenceNode)
59
+ broadcast(:before_sequence_node, aSequenceNode, self)
60
+ traverse_subnodes(aSequenceNode)
61
+ broadcast(:after_sequence_node, aSequenceNode, self)
62
+ end
63
+
64
+ # Visit event. The visitor is about to visit a grouping node.
65
+ # @param aGroupingNode [Notation::GroupingNode] the grouping node to visit
66
+ def visit_grouping_node(aGroupingNode)
67
+ broadcast(:before_grouping_node, aGroupingNode, self)
68
+ traverse_subnodes(aGroupingNode) if aGroupingNode.repetition == :exactly_one
69
+ broadcast(:after_grouping_node, aGroupingNode, self)
70
+ end
71
+
72
+ private
73
+
74
+ # Visit event. The visitor is about to visit the subnodes of a non
75
+ # terminal node.
76
+ # @param aParentNode [Ast::LocCompoundExpr] the parent node.
77
+ def traverse_subnodes(aParentNode)
78
+ subnodes = aParentNode.subnodes
79
+ broadcast(:before_subnodes, aParentNode, subnodes)
80
+
81
+ # Let's proceed with the visit of subnodes
82
+ subnodes.each { |a_node| a_node.accept(self) }
83
+
84
+ broadcast(:after_subnodes, aParentNode, subnodes)
85
+ end
86
+
87
+ # Visit event. The visitor is about to visit one given subnode of a non
88
+ # terminal node.
89
+ # @param aParentNode [Ast::LocCompoundExpr] the parent node.
90
+ # @param index [integer] index of child subnode
91
+ def traverse_given_subnode(aParentNode, index)
92
+ subnode = aParentNode.subnodes[index]
93
+ broadcast(:before_given_subnode, aParentNode, subnode)
94
+
95
+ # Now, let's proceed with the visit of that subnode
96
+ subnode.accept(self)
97
+
98
+ broadcast(:after_given_subnode, aParentNode, subnode)
99
+ end
100
+
101
+ # Send a notification to all subscribers.
102
+ # @param msg [Symbol] event to notify
103
+ # @param args [Array] arguments of the notification.
104
+ def broadcast(msg, *args)
105
+ subscribers.each do |subscr|
106
+ next unless subscr.respond_to?(msg) || subscr.respond_to?(:accept_all)
107
+
108
+ subscr.send(msg, *args)
109
+ end
110
+ end
111
+ end # class
112
+ end # module
113
+ end # module
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../syntax/base_grammar_builder'
4
+
5
+ module Rley
6
+ module Notation
7
+ ########################################
8
+ # Syntax for right-hand side of production rules
9
+ builder = Rley::Syntax::BaseGrammarBuilder.new do
10
+ add_terminals('LEFT_PAREN', 'RIGHT_PAREN') # For '(', ')' grouping delimiters
11
+ add_terminals('LEFT_BRACE', 'RIGHT_BRACE') # For '{', '}' annotation delimiters
12
+ add_terminals('QUESTION_MARK', 'STAR', 'PLUS') # For postfix quantifiers
13
+ add_terminals('COMMA', 'ELLIPSIS')
14
+
15
+ add_terminals('STR_LIT') # For string literal values
16
+ add_terminals('INT_LIT') # For integer literal values
17
+ add_terminals('SYMBOL') # Grammar symbols
18
+ add_terminals('KEY') # Key literal
19
+
20
+ rule('notation' => 'rhs')
21
+ rule('rhs' => 'member_seq').tag 'sequence'
22
+ rule('rhs' => [])
23
+ rule('member_seq' => 'member_seq member').tag 'more_members'
24
+ rule('member_seq' => 'member').tag 'one_member'
25
+ rule('member' => 'strait_member')
26
+ rule('member' => 'quantified_member')
27
+ rule('strait_member' => 'base_member')
28
+ rule('strait_member' => 'base_member annotation').tag 'annotated_member'
29
+ rule('base_member' => 'SYMBOL').tag 'symbol'
30
+ rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN').tag 'grouping'
31
+ rule('quantified_member' => 'base_member quantifier').tag 'quantified_member'
32
+ rule('quantifier' => 'QUESTION_MARK').tag 'question_mark'
33
+ rule('quantifier' => 'STAR').tag 'star'
34
+ rule('quantifier' => 'PLUS').tag 'plus'
35
+ rule('annotation' => 'LEFT_BRACE mapping RIGHT_BRACE').tag 'annotation'
36
+ rule('mapping' => 'mapping COMMA key_value').tag 'more_pairs'
37
+ rule('mapping' => 'key_value').tag 'one_pair'
38
+ rule('key_value' => 'KEY value').tag 'raw_pair'
39
+ rule('value' => 'STR_LIT')
40
+ rule('value' => 'INT_LIT')
41
+ rule('value' => 'range')
42
+ rule('range' => 'INT_LIT ELLIPSIS INT_LIT').tag 'bound_range'
43
+ rule('range' => 'INT_LIT ELLIPSIS')
44
+ end
45
+
46
+ # And now build the Rley Grammar Notation (RGN) grammar...
47
+ RGNGrammar = builder.grammar
48
+ end # module
49
+ end # module
@@ -0,0 +1,504 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
5
+ require_relative 'parser'
6
+ require_relative 'ast_visitor'
7
+ require_relative '../syntax/match_closest'
8
+
9
+ module Rley # This module is used as a namespace
10
+ module Notation # This module is used as a namespace
11
+ # Structure used for production rules that are implicitly generated by Rley
12
+ RawRule = Struct.new(:lhs, :rhs, :tag, :simple, :constraints)
13
+
14
+ # Builder GoF pattern. Builder builds a complex object
15
+ # (say, a grammar) from simpler objects (terminals and productions)
16
+ # and using a step by step approach.
17
+ class GrammarBuilder
18
+ # @return [Hash{String, GrmSymbol}] The mapping of grammar symbol names
19
+ # to the matching grammar symbol object.
20
+ attr_reader(:symbols)
21
+
22
+ # @return [Notation::Parser] Parser for the right-side of productions
23
+ attr_reader(:parser)
24
+
25
+ # @return [Hash{ASTVisitor, Array}]
26
+ attr_reader(:visitor2rhs)
27
+
28
+ # @return [Array<Production>] The list of production rules for
29
+ # the grammar to build.
30
+ attr_reader(:productions)
31
+
32
+ # @return [Hash{String, String}] The synthesized raw productions
33
+ attr_reader(:synthetized)
34
+
35
+ # Creates a new grammar builder.
36
+ # @param aBlock [Proc] code block used to build the grammar.
37
+ # @example Building a tiny English grammar
38
+ # builder = Rley::Notation::GrammarBuilder.new do
39
+ # add_terminals('n', 'v', 'adj', 'det')
40
+ # rule 'S' => 'NP VP'
41
+ # rule 'VP' => 'v NP'
42
+ # rule 'NP' => 'det n'
43
+ # rule 'NP' => 'adj NP'
44
+ # end
45
+ # tiny_eng = builder.grammar
46
+ def initialize(&aBlock)
47
+ @symbols = {}
48
+ @productions = []
49
+ @parser = Notation::Parser.new
50
+ @visitor2rhs = {}
51
+ @synthetized = {}
52
+
53
+ if block_given?
54
+ instance_exec(&aBlock)
55
+ grammar_complete!
56
+ end
57
+ end
58
+
59
+ # Retrieve a grammar symbol from its name.
60
+ # Raise an exception if not found.
61
+ # @param aSymbolName [String] the name of a grammar symbol.
62
+ # @return [GrmSymbol] the retrieved symbol object.
63
+ def [](aSymbolName)
64
+ symbols[aSymbolName]
65
+ end
66
+
67
+ # Add the given terminal symbols to the grammar of the language
68
+ # @param terminalSymbols [String or Terminal] 1..* terminal symbols.
69
+ # @return [void]
70
+ def add_terminals(*terminalSymbols)
71
+ new_symbs = build_symbols(Syntax::Terminal, terminalSymbols)
72
+ symbols.merge!(new_symbs)
73
+ end
74
+
75
+ # Add the given marker symbol to the grammar of the language
76
+ # @param aMarkerSymbol [String] A mazker symbol
77
+ # @return [void]
78
+ def add_marker(aMarkerSymbol)
79
+ new_symb = build_symbol(Syntax::Marker, aMarkerSymbol)
80
+ symbols[new_symb.name] = new_symb
81
+ end
82
+
83
+ # Add a production rule in the grammar given one
84
+ # key-value pair of the form: String => String.
85
+ # Where the key is the name of the non-terminal appearing in the
86
+ # left side of the rule.
87
+ # The value is a sequence of grammar symbol names (optionally quantified).
88
+ # The rule is created and inserted in the grammar.
89
+ # @example Equivalent call syntax
90
+ # builder.add_production('A' => 'a A c)
91
+ # builder.rule('A' => 'a A c]) # 'rule' is a synonym
92
+ # @param aProductionRepr [Hash{String, String}]
93
+ # A Hash-based representation of a production.
94
+ # @return [Production] The created Production instance
95
+ def add_production(aProductionRepr)
96
+ aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
97
+ lhs = get_grm_symbol(lhs_name)
98
+ rhs = rhs_repr.kind_of?(Array) && rhs_repr.empty? ? '' : rhs_repr.strip
99
+ constraints = []
100
+ if rhs.empty?
101
+ rhs_members = []
102
+ else
103
+ ast = parser.parse(rhs)
104
+ visitor = ASTVisitor.new(ast)
105
+ visitor2rhs[visitor] = []
106
+ visitor.subscribe(self)
107
+ visitor.start
108
+ root_node = ast.root
109
+ constraints = root_node.constraints unless root_node.kind_of?(SymbolNode)
110
+
111
+ rhs_members = visitor2rhs.delete(visitor)
112
+ end
113
+ new_prod = Syntax::Production.new(lhs, rhs_members)
114
+ new_prod.constraints = constraints
115
+ productions << new_prod
116
+ end
117
+
118
+ productions.last
119
+ end
120
+
121
+ # Given the grammar symbols and productions added to the builder,
122
+ # build the resulting grammar (if not yet done).
123
+ # @return [Grammar] the created grammar object.
124
+ def grammar
125
+ unless @grammar
126
+ raise StandardError, 'No symbol found for grammar' if symbols.empty?
127
+ if productions.empty?
128
+ raise StandardError, 'No production found for grammar'
129
+ end
130
+
131
+ # Check that each terminal appears at least in a rhs of a production
132
+ all_terminals = symbols.values.select do |a_symb|
133
+ a_symb.kind_of?(Syntax::Terminal)
134
+ end
135
+ in_use = Set.new
136
+ productions.each do |prod|
137
+ prod.rhs.members.each do |symb|
138
+ in_use << symb if symb.kind_of?(Syntax::Terminal)
139
+ end
140
+ end
141
+
142
+ unused = all_terminals.reject { |a_term| in_use.include?(a_term) }
143
+ unless unused.empty?
144
+ suffix = "#{unused.map(&:name).join(', ')}."
145
+ raise StandardError, "Useless terminal symbol(s): #{suffix}"
146
+ end
147
+
148
+ @grammar = Syntax::Grammar.new(productions.dup)
149
+ end
150
+
151
+ @grammar
152
+ end
153
+
154
+ alias rule add_production
155
+
156
+ # When a symbol, say symb, in a rhs is followed by a '*' modifier,
157
+ # then a rule will be generated with a lhs named symb * suffix_plus
158
+ # implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
159
+ # implicitly called: rule('declaration_star' => '').tag suffix_star_last
160
+ def suffix_qmark
161
+ '_qmark'
162
+ end
163
+
164
+ def suffix_qmark_one
165
+ '_qmark_one'
166
+ end
167
+
168
+ def suffix_qmark_none
169
+ '_qmark_none'
170
+ end
171
+
172
+ # When a symbol, say symb, in a rhs is followed by a '*' modifier,
173
+ # then a rule will be generated with a lhs named symb * suffix_plus
174
+ # implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
175
+ # implicitly called: rule('declaration_star' => '').tag suffix_star_last
176
+ def suffix_star
177
+ '_star'
178
+ end
179
+
180
+ def suffix_star_more
181
+ '_star_more'
182
+ end
183
+
184
+ def suffix_star_none
185
+ '_star_none'
186
+ end
187
+
188
+ # When a symbol, say symb, in a rhs is followed by a '+' modifier,
189
+ # then a rule will be generated with a lhs named symb + suffix_plus
190
+ # implicitly called: rule('digit_plus' => 'digit_plus digit').tag suffix_plus_more
191
+ # implicitly called: rule('digit_plus' => 'digit').tag suffix_plus_last
192
+ def suffix_plus
193
+ '_plus'
194
+ end
195
+
196
+ def suffix_plus_more
197
+ '_plus_more'
198
+ end
199
+
200
+ def suffix_plus_one
201
+ '_plus_one'
202
+ end
203
+
204
+ def repetition2suffix(aRepetition)
205
+ mapping = {
206
+ zero_or_one: suffix_qmark,
207
+ zero_or_more: suffix_star,
208
+ exactly_one: '',
209
+ one_or_more: suffix_plus
210
+ }
211
+
212
+ mapping[aRepetition]
213
+ end
214
+
215
+ def modifier2suffix(aModifier)
216
+ mapping = {
217
+ '?' => suffix_qmark,
218
+ '*' => suffix_star,
219
+ '+' => suffix_plus
220
+ }
221
+
222
+ mapping[aModifier]
223
+ end
224
+
225
+ ##################################
226
+ # RGN's AST visit notification events
227
+ # ################################
228
+ def after_symbol_node(aSymbolNode, aVisitor)
229
+ symb_name = aSymbolNode.name
230
+
231
+ case aSymbolNode.repetition
232
+ when :zero_or_one
233
+ # implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
234
+ # implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
235
+ name_modified = "#{symb_name}#{suffix_qmark}"
236
+ unless symbols.include? name_modified
237
+ add_nonterminal(name_modified)
238
+ add_raw_rule(name_modified, "#{symb_name}", suffix_qmark_one)
239
+ add_raw_rule(name_modified, '', suffix_qmark_none)
240
+ end
241
+ symb_name = name_modified
242
+
243
+ when :zero_or_more
244
+ # implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
245
+ # implicitly called: rule('symb_name_star' => '').tag suffix_star_none
246
+ name_modified = "#{symb_name}#{suffix_star}"
247
+ unless symbols.include? name_modified
248
+ add_nonterminal(name_modified)
249
+ add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
250
+ add_raw_rule(name_modified, [], suffix_star_none)
251
+ end
252
+ symb_name = name_modified
253
+
254
+ when :exactly_one
255
+ # Do nothing
256
+
257
+ when :one_or_more
258
+ name_modified = "#{symb_name}#{suffix_plus}"
259
+ unless symbols.include? name_modified
260
+ add_nonterminal(name_modified)
261
+ add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
262
+ add_raw_rule(name_modified, symb_name, suffix_plus_one)
263
+ end
264
+ symb_name = name_modified
265
+ else
266
+ raise StandardError, 'Unhandled multiplicity'
267
+ end
268
+
269
+ symb = get_grm_symbol(symb_name)
270
+ visitor2rhs[aVisitor] << symb
271
+ end
272
+
273
+ def after_sequence_node(aSequenceNode, _visitor)
274
+ aSequenceNode.subnodes.each_with_index do |sn, i|
275
+ next if sn.annotation.empty?
276
+ matching = sn.annotation['match_closest']
277
+ aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
278
+ end
279
+ end
280
+
281
+ def after_grouping_node(aGroupingNode, aVisitor)
282
+ after_sequence_node(aGroupingNode, aVisitor)
283
+ symb_name = sequence_name(aGroupingNode)
284
+
285
+ unless symbols.include?(symb_name) || aGroupingNode.repetition == :exactly_one
286
+ add_nonterminal(symb_name)
287
+ rhs = serialize_sequence(aGroupingNode)
288
+ add_raw_rule(symb_name, rhs, 'return_children', true, aGroupingNode.constraints)
289
+ end
290
+ name_modified = "#{symb_name}#{repetition2suffix(aGroupingNode.repetition)}"
291
+
292
+ case aGroupingNode.repetition
293
+ when :zero_or_one
294
+ # implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
295
+ # implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
296
+ unless symbols.include? name_modified
297
+ add_nonterminal(name_modified)
298
+ add_raw_rule(name_modified, symb_name, suffix_qmark_one, true)
299
+ add_raw_rule(name_modified, [], suffix_qmark_none, true)
300
+ end
301
+
302
+ when :zero_or_more
303
+ # implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
304
+ # implicitly called: rule('symb_name_star' => '').tag suffix_star_none
305
+ unless symbols.include? name_modified
306
+ add_nonterminal(name_modified)
307
+ add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
308
+ add_raw_rule(name_modified, '', suffix_star_none)
309
+ end
310
+
311
+ when :exactly_one
312
+ # Do nothing
313
+
314
+ when :one_or_more
315
+ unless symbols.include? name_modified
316
+ add_nonterminal(name_modified)
317
+ add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
318
+ add_raw_rule(name_modified, symb_name, suffix_plus_one)
319
+ end
320
+ else
321
+ raise StandardError, 'Unhandled multiplicity'
322
+ end
323
+
324
+ unless aGroupingNode.repetition == :exactly_one
325
+ symb = get_grm_symbol(name_modified)
326
+ visitor2rhs[aVisitor] << symb
327
+ end
328
+ end
329
+
330
+ # A notification to the builderobject that the programmer
331
+ # has completed the entry of terminals and production rules
332
+ def grammar_complete!
333
+ process_raw_rules()
334
+ end
335
+
336
+ private
337
+
338
+ def add_nonterminal(aName)
339
+ symbols[aName] = Syntax::NonTerminal.new(aName)
340
+ end
341
+
342
+ def simple_rule(aProductionRepr)
343
+ aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
344
+ lhs = get_grm_symbol(lhs_name)
345
+
346
+ if rhs_repr.kind_of?(String)
347
+ rhs = rhs_repr.strip.scan(/\S+/)
348
+ else
349
+ rhs = rhs_repr
350
+ end
351
+
352
+ members = rhs.map do |name|
353
+ if name.end_with?('?', '*', '+')
354
+ modifier = name[-1]
355
+ suffix = modifier2suffix(aModifier)
356
+ get_grm_symbol("#{name.chop}#{suffix}")
357
+ else
358
+ get_grm_symbol(name)
359
+ end
360
+ end
361
+ new_prod = Syntax::Production.new(lhs, members)
362
+ productions << new_prod
363
+ end
364
+
365
+ productions.last
366
+ end
367
+
368
+ # Add the given grammar symbols.
369
+ # @param aClass [Class] The class of grammar symbols to instantiate.
370
+ # @param theSymbols [Array] array of elements are treated as follows:
371
+ # if the element is already a grammar symbol, then it added as is,
372
+ # otherwise it is considered as the name of a grammar symbol
373
+ # of the specified class to build.
374
+ def build_symbols(aClass, theSymbols)
375
+ symbs = {}
376
+ theSymbols.each do |s|
377
+ new_symbol = build_symbol(aClass, s)
378
+ symbs[new_symbol.name] = new_symbol
379
+ end
380
+
381
+ symbs
382
+ end
383
+
384
+ # If the argument is already a grammar symbol object then it is
385
+ # returned as is. Otherwise, the argument is treated as a name
386
+ # for a new instance of the given class.
387
+ # @param aClass [Class] The class of grammar symbols to instantiate
388
+ # @param aSymbolArg [GrmSymbol-like or String]
389
+ # @return [Array] list of grammar symbols
390
+ def build_symbol(aClass, aSymbolArg)
391
+ if aSymbolArg.kind_of?(Syntax::GrmSymbol)
392
+ aSymbolArg
393
+ else
394
+ aClass.new(aSymbolArg)
395
+ end
396
+ end
397
+
398
+ # Retrieve the non-terminal symbol with given name.
399
+ # If it doesn't exist yet, then it is created on the fly.
400
+ # @param aSymbolName [String] the name of the grammar symbol to retrieve
401
+ # @return [NonTerminal]
402
+ def get_grm_symbol(aSymbolName)
403
+ unless aSymbolName.end_with?('+') && aSymbolName.length > 1
404
+ name = aSymbolName
405
+ else
406
+ name = aSymbolName.chop
407
+ case aSymbolName[-1]
408
+ when '+'
409
+ name_modified = "#{name}#{suffix_plus}"
410
+ unless symbols.include? name_modified
411
+ symbols[name_modified] = NonTerminal.new(name_modified)
412
+ rule(name_modified => [name_modified, name]).as suffix_plus_more
413
+ rule(name_modified => name).as suffix_plus_last
414
+ end
415
+ name = name_modified
416
+ else
417
+ err_msg = "Unknown symbol modifier #{aSymbolName[-1]}"
418
+ raise NotImplementedError, err_msg
419
+ end
420
+ end
421
+
422
+ symbols[name] = Syntax::NonTerminal.new(name) unless symbols.include? name
423
+
424
+ symbols[name]
425
+ end
426
+
427
+ def sequence_name(aSequenceNode)
428
+ subnode_names = +''
429
+ aSequenceNode.subnodes.each do |subn|
430
+ case subn
431
+ when SymbolNode
432
+ subnode_names << "_#{subn.name}"
433
+ when SequenceNode
434
+ subnode_names << "_#{sequence_name(subn)}"
435
+ end
436
+ suffix = repetition2suffix(subn.repetition)
437
+ subnode_names << suffix
438
+ end
439
+
440
+ "seq#{subnode_names}"
441
+ end
442
+
443
+ def node_base_name(aNode)
444
+ if aNode.kind_of?(SymbolNode)
445
+ aNode.name
446
+ else
447
+ sequence_name(aNode)
448
+ end
449
+ end
450
+
451
+ def node_decorated_name(aNdoe)
452
+ base_name = node_base_name(aNode)
453
+ suffix = repetition2suffix(aNode.repetition)
454
+
455
+ "#{base_name}#{suffix}"
456
+ end
457
+
458
+ def serialize_sequence(aSequenceNode)
459
+ text = +''
460
+ aSequenceNode.subnodes.each do |sn|
461
+ text << ' '
462
+ case sn
463
+ when SymbolNode
464
+ text << sn.name
465
+ when SequenceNode
466
+ text << sequence_name(sn)
467
+ end
468
+
469
+ suffix = suffix = repetition2suffix(sn.repetition)
470
+ text << suffix
471
+ end
472
+
473
+ text.strip
474
+ end
475
+
476
+ def add_raw_rule(aSymbol, aRHS, aTag, simplified = false, constraints = [])
477
+ raw_rule = RawRule.new(aSymbol, aRHS, aTag, simplified, constraints)
478
+ if synthetized.include?(aSymbol)
479
+ @synthetized[aSymbol] << raw_rule
480
+ else
481
+ @synthetized[aSymbol] = [raw_rule]
482
+ end
483
+ end
484
+
485
+ def process_raw_rules
486
+ until synthetized.empty? do
487
+ raw_rules = synthetized.delete(synthetized.keys.first)
488
+ raw_rules.each do |raw|
489
+ new_prod = nil
490
+ if raw.simple
491
+ new_prod = simple_rule(raw.lhs => raw.rhs)
492
+ else
493
+ new_prod = rule(raw.lhs => raw.rhs)
494
+ end
495
+ new_prod.tag(raw.tag)
496
+ new_prod.constraints = raw.constraints
497
+ end
498
+ end
499
+ end
500
+ end # class
501
+ end # module
502
+ end # module
503
+
504
+ # End of file