rley 0.7.08 → 0.8.00
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +4 -5
- data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
- data/examples/NLP/nano_eng/nano_grammar.rb +18 -18
- data/examples/NLP/pico_en_demo.rb +2 -2
- data/examples/data_formats/JSON/json_ast_builder.rb +9 -18
- data/examples/data_formats/JSON/json_demo.rb +1 -2
- data/examples/data_formats/JSON/json_grammar.rb +11 -11
- data/examples/general/calc_iter1/calc_grammar.rb +5 -4
- data/examples/general/calc_iter2/calc_grammar.rb +9 -9
- data/examples/general/left.rb +1 -1
- data/examples/general/right.rb +1 -1
- data/lib/rley.rb +1 -1
- data/lib/rley/base/dotted_item.rb +5 -0
- data/lib/rley/base/grm_items_builder.rb +6 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +2 -2
- data/lib/rley/interface.rb +16 -0
- data/lib/rley/notation/all_notation_nodes.rb +2 -0
- data/lib/rley/notation/ast_builder.rb +191 -0
- data/lib/rley/notation/ast_node.rb +44 -0
- data/lib/rley/notation/ast_visitor.rb +113 -0
- data/lib/rley/notation/grammar.rb +49 -0
- data/lib/rley/notation/grammar_builder.rb +451 -0
- data/lib/rley/notation/grouping_node.rb +23 -0
- data/lib/rley/notation/parser.rb +56 -0
- data/lib/rley/notation/sequence_node.rb +35 -0
- data/lib/rley/notation/symbol_node.rb +29 -0
- data/lib/rley/notation/tokenizer.rb +192 -0
- data/lib/rley/parse_rep/ast_base_builder.rb +13 -0
- data/lib/rley/parser/gfg_chart.rb +100 -6
- data/lib/rley/parser/gfg_parsing.rb +5 -3
- data/lib/rley/parser/parse_entry_set.rb +1 -1
- data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +45 -15
- data/lib/rley/syntax/grm_symbol.rb +1 -1
- data/lib/rley/syntax/match_closest.rb +43 -0
- data/lib/rley/syntax/production.rb +6 -0
- data/spec/rley/engine_spec.rb +6 -6
- data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
- data/spec/rley/notation/grammar_builder_spec.rb +295 -0
- data/spec/rley/notation/parser_spec.rb +184 -0
- data/spec/rley/notation/tokenizer_spec.rb +370 -0
- data/spec/rley/parse_rep/ast_builder_spec.rb +0 -1
- data/spec/rley/parse_rep/groucho_spec.rb +1 -1
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +1 -1
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +2 -2
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +1 -1
- data/spec/rley/parser/dangling_else_spec.rb +445 -0
- data/spec/rley/parser/gfg_earley_parser_spec.rb +95 -9
- data/spec/rley/parser/gfg_parsing_spec.rb +1 -1
- data/spec/rley/parser/parse_walker_factory_spec.rb +2 -2
- data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
- data/spec/rley/support/grammar_abc_helper.rb +2 -2
- data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
- data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
- data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
- data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
- data/spec/rley/support/grammar_l0_helper.rb +2 -2
- data/spec/rley/support/grammar_pb_helper.rb +2 -2
- data/spec/rley/support/grammar_sppf_helper.rb +2 -2
- data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +30 -11
- data/spec/rley/syntax/match_closest_spec.rb +46 -0
- data/spec/rley/syntax/production_spec.rb +4 -0
- metadata +29 -14
- data/lib/rley/parser/parse_state.rb +0 -78
- data/lib/rley/parser/parse_state_tracker.rb +0 -59
- data/lib/rley/parser/state_set.rb +0 -100
- data/spec/rley/parser/parse_state_spec.rb +0 -125
- data/spec/rley/parser/parse_tracer_spec.rb +0 -200
- data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -0,0 +1,191 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../parse_rep/ast_base_builder'
|
4
|
+
require_relative '../engine'
|
5
|
+
require_relative 'all_notation_nodes'
|
6
|
+
|
7
|
+
module Rley
|
8
|
+
module Notation
|
9
|
+
# The purpose of ASTBuilder is to build piece by piece an AST
|
10
|
+
# (Abstract Syntax Tree) from a sequence of input tokens and
|
11
|
+
# visit events produced by walking over a GFGParsing object.
|
12
|
+
class ASTBuilder < Rley::ParseRep::ASTBaseBuilder
|
13
|
+
unless defined?(Name2special)
|
14
|
+
# Mapping Token name => operator | separator | delimiter characters
|
15
|
+
# @return [Hash{String => String}]
|
16
|
+
Name2special = {
|
17
|
+
'COMMA' => ',',
|
18
|
+
'ELLIPSIS' => '..',
|
19
|
+
'LEFT_BRACE' => '{',
|
20
|
+
'LEFT_PAREN' => '(',
|
21
|
+
'PLUS' => '+',
|
22
|
+
'QUESTION_MARK' => '?',
|
23
|
+
'RIGHT_BRACE' => '}',
|
24
|
+
'RIGHT_PAREN' => ')',
|
25
|
+
'STAR' => '*'
|
26
|
+
}.freeze
|
27
|
+
end
|
28
|
+
|
29
|
+
# Create a new AST builder instance.
|
30
|
+
# @param theTokens [Array<Rley::Lexical::Token>] The sequence of input tokens.
|
31
|
+
def initialize(theTokens)
|
32
|
+
super(theTokens)
|
33
|
+
end
|
34
|
+
|
35
|
+
protected
|
36
|
+
|
37
|
+
def terminal2node
|
38
|
+
Terminal2NodeClass
|
39
|
+
end
|
40
|
+
|
41
|
+
# Method override
|
42
|
+
def new_leaf_node(_production, _terminal, aTokenPosition, aToken)
|
43
|
+
Rley::PTree::TerminalNode.new(aToken, aTokenPosition)
|
44
|
+
end
|
45
|
+
|
46
|
+
# Factory method for creating a parent node object.
|
47
|
+
# @param aProduction [Production] Production rule
|
48
|
+
# @param aRange [Range] Range of tokens matched by the rule
|
49
|
+
# @param theTokens [Array] The input tokens
|
50
|
+
# @param theChildren [Array] Children nodes (one per rhs symbol)
|
51
|
+
def new_parent_node(aProduction, aRange, theTokens, theChildren)
|
52
|
+
mth_name = method_name(aProduction.name)
|
53
|
+
if respond_to?(mth_name, true)
|
54
|
+
node = send(mth_name, aProduction, aRange, theTokens, theChildren)
|
55
|
+
else
|
56
|
+
# Default action...
|
57
|
+
node = case aProduction.rhs.size
|
58
|
+
when 0
|
59
|
+
return_epsilon(aRange, theTokens, theChildren)
|
60
|
+
when 1
|
61
|
+
return_first_child(aRange, theTokens, theChildren)
|
62
|
+
else
|
63
|
+
node = Rley::PTree::NonTerminalNode.new(aProduction.lhs, aRange)
|
64
|
+
theChildren&.reverse_each do |child|
|
65
|
+
node.add_subnode(child) if child
|
66
|
+
end
|
67
|
+
|
68
|
+
node
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
node
|
73
|
+
end
|
74
|
+
|
75
|
+
# Return the AST node corresponding to the second symbol in the rhs
|
76
|
+
def reduce_to_2nd_symbol(_production, _range, _tokens, theChildren)
|
77
|
+
theChildren[1]
|
78
|
+
end
|
79
|
+
|
80
|
+
#####################################
|
81
|
+
# SEMANTIC ACTIONS
|
82
|
+
#####################################
|
83
|
+
|
84
|
+
# rule('rhs' => 'member_seq').tag 'sequence'
|
85
|
+
def reduce_sequence(_production, _range, _tokens, theChildren)
|
86
|
+
if theChildren[0].size == 1
|
87
|
+
theChildren[0].first
|
88
|
+
else
|
89
|
+
SequenceNode.new(theChildren[0].first.position, theChildren[0], nil)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
# rule('member_seq' => 'member_seq member').tag 'more_members'
|
94
|
+
def reduce_more_members(_production, _range, _tokens, theChildren)
|
95
|
+
theChildren[0] << theChildren[1]
|
96
|
+
end
|
97
|
+
|
98
|
+
# rule('member_seq' => 'member')
|
99
|
+
def reduce_one_member(_production, _range, _tokens, theChildren)
|
100
|
+
[theChildren[0]]
|
101
|
+
end
|
102
|
+
|
103
|
+
# rule('strait_member' => 'base_member annotation')
|
104
|
+
def reduce_annotated_member(_production, _range, _tokens, theChildren)
|
105
|
+
theChildren[0].annotation = theChildren[1]
|
106
|
+
|
107
|
+
theChildren[0]
|
108
|
+
end
|
109
|
+
|
110
|
+
# rule('base_member' => 'SYMBOL')
|
111
|
+
def reduce_symbol(_production, _range, _tokens, theChildren)
|
112
|
+
SymbolNode.new(theChildren[0].token.position, theChildren[0].token.lexeme)
|
113
|
+
end
|
114
|
+
|
115
|
+
# rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN')
|
116
|
+
def reduce_grouping(_production, _range, tokens, theChildren)
|
117
|
+
if theChildren[1].size == 1
|
118
|
+
theChildren[1].first
|
119
|
+
else
|
120
|
+
rank = theChildren[0].range.high
|
121
|
+
pos = tokens[rank].position
|
122
|
+
GroupingNode.new(pos, theChildren[1], nil)
|
123
|
+
end
|
124
|
+
end
|
125
|
+
|
126
|
+
# rule('quantified_member' => 'base_member quantifier')
|
127
|
+
def reduce_quantified_member(_production, _range, _tokens, theChildren)
|
128
|
+
theChildren[0].repetition = theChildren[1]
|
129
|
+
theChildren[0]
|
130
|
+
end
|
131
|
+
|
132
|
+
# rule('quantifier' => 'QUESTION_MARK')
|
133
|
+
def reduce_question_mark(_production, _range, _tokens, _theChildren)
|
134
|
+
:zero_or_one
|
135
|
+
end
|
136
|
+
|
137
|
+
# rule('quantifier' => 'STAR')
|
138
|
+
def reduce_star(_production, _range, _tokens, _theChildren)
|
139
|
+
:zero_or_more
|
140
|
+
end
|
141
|
+
|
142
|
+
# rule('quantifier' => 'PLUS')
|
143
|
+
def reduce_plus(_production, _range, _tokens, _theChildren)
|
144
|
+
:one_or_more
|
145
|
+
end
|
146
|
+
|
147
|
+
# rule('annotation' => 'LEFT_BRACE mapping RIGHT_BRACE').tag ''
|
148
|
+
def reduce_annotation(_production, _range, _tokens, theChildren)
|
149
|
+
theChildren[1]
|
150
|
+
end
|
151
|
+
|
152
|
+
# rule('mapping' => 'mapping COMMA key_value')
|
153
|
+
def reduce_more_pairs(_production, _range, _tokens, theChildren)
|
154
|
+
hsh = theChildren[0]
|
155
|
+
hsh[theChildren[2].first] = theChildren[2].last
|
156
|
+
|
157
|
+
hsh
|
158
|
+
end
|
159
|
+
|
160
|
+
# rule('mapping' => 'key_value').tag 'one_pair'
|
161
|
+
def reduce_one_pair(_production, _range, _tokens, theChildren)
|
162
|
+
{ theChildren[0].first => theChildren[0].last }
|
163
|
+
end
|
164
|
+
|
165
|
+
# rule('key_value' => 'KEY value')
|
166
|
+
def reduce_raw_pair(_production, _range, _tokens, theChildren)
|
167
|
+
key = theChildren[0].token.lexeme
|
168
|
+
value = if theChildren[1].kind_of?(Rley::PTree::TerminalNode)
|
169
|
+
theChildren[1].token.lexeme
|
170
|
+
else
|
171
|
+
theChildren[1]
|
172
|
+
end
|
173
|
+
[key, value]
|
174
|
+
end
|
175
|
+
|
176
|
+
# rule('range' => 'INT_LIT ELLIPSIS INT_LIT')
|
177
|
+
def reduce_bound_range(_production, _range, _tokens, theChildren)
|
178
|
+
low = theChildren[0].token.lexeme
|
179
|
+
high = theChildren[2].token.lexeme
|
180
|
+
case [low, high]
|
181
|
+
when ['0', '1']
|
182
|
+
:zero_or_one
|
183
|
+
when ['1', '1']
|
184
|
+
:exactly_one
|
185
|
+
else
|
186
|
+
Range.new(low.to_i, high.to_i)
|
187
|
+
end
|
188
|
+
end
|
189
|
+
end # class
|
190
|
+
end # module
|
191
|
+
end # module
|
@@ -0,0 +1,44 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Rley
|
4
|
+
module Notation
|
5
|
+
# Abstract class.
|
6
|
+
# Instances of its subclasses represent nodes of an abstract syntax tree
|
7
|
+
# that is the product of the parse of an input text.
|
8
|
+
class ASTNode
|
9
|
+
# @return [Rley::Lexical::Position] Position of the entry in the input stream.
|
10
|
+
attr_reader :position
|
11
|
+
|
12
|
+
# @return [Symbol]
|
13
|
+
attr_accessor :repetition
|
14
|
+
|
15
|
+
# @return [Hash]
|
16
|
+
attr_reader :annotation
|
17
|
+
|
18
|
+
# @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
|
19
|
+
def initialize(aPosition)
|
20
|
+
@position = aPosition
|
21
|
+
@repetition = :exactly_one
|
22
|
+
@annotation = {}
|
23
|
+
end
|
24
|
+
|
25
|
+
def annotation=(aMapping)
|
26
|
+
repeat_key = 'repeat'
|
27
|
+
@repetition = aMapping.delete(repeat_key) if aMapping.include?(repeat_key)
|
28
|
+
@annotation = aMapping
|
29
|
+
end
|
30
|
+
|
31
|
+
# Notification that the parsing has successfully completed
|
32
|
+
def done!
|
33
|
+
# Default: do nothing ...
|
34
|
+
end
|
35
|
+
|
36
|
+
# Abstract method (must be overriden in subclasses).
|
37
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
38
|
+
# @param _visitor [LoxxyTreeVisitor] the visitor
|
39
|
+
def accept(_visitor)
|
40
|
+
raise NotImplementedError
|
41
|
+
end
|
42
|
+
end # class
|
43
|
+
end # module
|
44
|
+
end # module
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module Rley
|
2
|
+
module Notation
|
3
|
+
class ASTVisitor
|
4
|
+
# Link to the top node to visit
|
5
|
+
attr_reader(:top)
|
6
|
+
|
7
|
+
# List of objects that subscribed to the visit event notification.
|
8
|
+
attr_reader(:subscribers)
|
9
|
+
|
10
|
+
# Build a visitor for the given top.
|
11
|
+
# @param aTop [Notation::ASTNode] the parse tree to visit.
|
12
|
+
def initialize(aTop)
|
13
|
+
raise StandardError if aTop.nil?
|
14
|
+
|
15
|
+
@top = aTop
|
16
|
+
@subscribers = []
|
17
|
+
end
|
18
|
+
|
19
|
+
# Add a subscriber for the visit event notifications.
|
20
|
+
# @param aSubscriber [Object]
|
21
|
+
def subscribe(aSubscriber)
|
22
|
+
subscribers << aSubscriber
|
23
|
+
end
|
24
|
+
|
25
|
+
# Remove the given object from the subscription list.
|
26
|
+
# The object won't be notified of visit events.
|
27
|
+
# @param aSubscriber [Object]
|
28
|
+
def unsubscribe(aSubscriber)
|
29
|
+
subscribers.delete_if { |entry| entry == aSubscriber }
|
30
|
+
end
|
31
|
+
|
32
|
+
# The signal to begin the visit of the top.
|
33
|
+
def start
|
34
|
+
top.accept(self)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Visit event. The visitor is about to visit the ptree.
|
38
|
+
# @param aParseTree [Rley::PTree::ParseTree] the ptree to visit.
|
39
|
+
def start_visit_ptree(aParseTree)
|
40
|
+
broadcast(:before_ptree, aParseTree)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Visit event. The visitor has completed the visit of the ptree.
|
44
|
+
# @param aParseTree [Rley::PTree::ParseTree] the visited ptree.
|
45
|
+
def end_visit_ptree(aParseTree)
|
46
|
+
broadcast(:after_ptree, aParseTree)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Visit event. The visitor is about to visit a symbol node.
|
50
|
+
# @param aSymbolNode [Notation::SymbolNode] the symbol node to visit
|
51
|
+
def visit_symbol_node(aSymbolNode)
|
52
|
+
broadcast(:before_symbol_node, aSymbolNode, self)
|
53
|
+
broadcast(:after_symbol_node, aSymbolNode, self)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Visit event. The visitor is about to visit a sequence node.
|
57
|
+
# @param aSequenceNode [Notation::SequenceNode] the sequence node to visit
|
58
|
+
def visit_sequence_node(aSequenceNode)
|
59
|
+
broadcast(:before_sequence_node, aSequenceNode, self)
|
60
|
+
traverse_subnodes(aSequenceNode)
|
61
|
+
broadcast(:after_sequence_node, aSequenceNode, self)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Visit event. The visitor is about to visit a grouping node.
|
65
|
+
# @param aGroupingNode [Notation::GroupingNode] the grouping node to visit
|
66
|
+
def visit_grouping_node(aGroupingNode)
|
67
|
+
broadcast(:before_grouping_node, aGroupingNode, self)
|
68
|
+
traverse_subnodes(aGroupingNode) if aGroupingNode.repetition == :exactly_one
|
69
|
+
broadcast(:after_grouping_node, aGroupingNode, self)
|
70
|
+
end
|
71
|
+
|
72
|
+
private
|
73
|
+
|
74
|
+
# Visit event. The visitor is about to visit the subnodes of a non
|
75
|
+
# terminal node.
|
76
|
+
# @param aParentNode [Ast::LocCompoundExpr] the parent node.
|
77
|
+
def traverse_subnodes(aParentNode)
|
78
|
+
subnodes = aParentNode.subnodes
|
79
|
+
broadcast(:before_subnodes, aParentNode, subnodes)
|
80
|
+
|
81
|
+
# Let's proceed with the visit of subnodes
|
82
|
+
subnodes.each { |a_node| a_node.accept(self) }
|
83
|
+
|
84
|
+
broadcast(:after_subnodes, aParentNode, subnodes)
|
85
|
+
end
|
86
|
+
|
87
|
+
# Visit event. The visitor is about to visit one given subnode of a non
|
88
|
+
# terminal node.
|
89
|
+
# @param aParentNode [Ast::LocCompoundExpr] the parent node.
|
90
|
+
# @param index [integer] index of child subnode
|
91
|
+
def traverse_given_subnode(aParentNode, index)
|
92
|
+
subnode = aParentNode.subnodes[index]
|
93
|
+
broadcast(:before_given_subnode, aParentNode, subnode)
|
94
|
+
|
95
|
+
# Now, let's proceed with the visit of that subnode
|
96
|
+
subnode.accept(self)
|
97
|
+
|
98
|
+
broadcast(:after_given_subnode, aParentNode, subnode)
|
99
|
+
end
|
100
|
+
|
101
|
+
# Send a notification to all subscribers.
|
102
|
+
# @param msg [Symbol] event to notify
|
103
|
+
# @param args [Array] arguments of the notification.
|
104
|
+
def broadcast(msg, *args)
|
105
|
+
subscribers.each do |subscr|
|
106
|
+
next unless subscr.respond_to?(msg) || subscr.respond_to?(:accept_all)
|
107
|
+
|
108
|
+
subscr.send(msg, *args)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end # class
|
112
|
+
end # module
|
113
|
+
end # module
|
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../syntax/base_grammar_builder'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module Notation
|
7
|
+
########################################
|
8
|
+
# Syntax for right-hand side of production rules
|
9
|
+
builder = Rley::Syntax::BaseGrammarBuilder.new do
|
10
|
+
add_terminals('LEFT_PAREN', 'RIGHT_PAREN') # For '(', ')' grouping delimiters
|
11
|
+
add_terminals('LEFT_BRACE', 'RIGHT_BRACE') # For '{', '}' annotation delimiters
|
12
|
+
add_terminals('QUESTION_MARK', 'STAR', 'PLUS') # For postfix quantifiers
|
13
|
+
add_terminals('COMMA', 'ELLIPSIS')
|
14
|
+
|
15
|
+
add_terminals('STR_LIT') # For string literal values
|
16
|
+
add_terminals('INT_LIT') # For integer literal values
|
17
|
+
add_terminals('SYMBOL') # Grammar symbols
|
18
|
+
add_terminals('KEY') # Key literal
|
19
|
+
|
20
|
+
rule('notation' => 'rhs')
|
21
|
+
rule('rhs' => 'member_seq').tag 'sequence'
|
22
|
+
rule('rhs' => [])
|
23
|
+
rule('member_seq' => 'member_seq member').tag 'more_members'
|
24
|
+
rule('member_seq' => 'member').tag 'one_member'
|
25
|
+
rule('member' => 'strait_member')
|
26
|
+
rule('member' => 'quantified_member')
|
27
|
+
rule('strait_member' => 'base_member')
|
28
|
+
rule('strait_member' => 'base_member annotation').tag 'annotated_member'
|
29
|
+
rule('base_member' => 'SYMBOL').tag 'symbol'
|
30
|
+
rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN').tag 'grouping'
|
31
|
+
rule('quantified_member' => 'base_member quantifier').tag 'quantified_member'
|
32
|
+
rule('quantifier' => 'QUESTION_MARK').tag 'question_mark'
|
33
|
+
rule('quantifier' => 'STAR').tag 'star'
|
34
|
+
rule('quantifier' => 'PLUS').tag 'plus'
|
35
|
+
rule('annotation' => 'LEFT_BRACE mapping RIGHT_BRACE').tag 'annotation'
|
36
|
+
rule('mapping' => 'mapping COMMA key_value').tag 'more_pairs'
|
37
|
+
rule('mapping' => 'key_value').tag 'one_pair'
|
38
|
+
rule('key_value' => 'KEY value').tag 'raw_pair'
|
39
|
+
rule('value' => 'STR_LIT')
|
40
|
+
rule('value' => 'INT_LIT')
|
41
|
+
rule('value' => 'range')
|
42
|
+
rule('range' => 'INT_LIT ELLIPSIS INT_LIT').tag 'bound_range'
|
43
|
+
rule('range' => 'INT_LIT ELLIPSIS')
|
44
|
+
end
|
45
|
+
|
46
|
+
# And now build the Rley Grammar Notation (RGN) grammar...
|
47
|
+
RGNGrammar = builder.grammar
|
48
|
+
end # module
|
49
|
+
end # module
|
@@ -0,0 +1,451 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
require_relative 'parser'
|
5
|
+
require_relative 'ast_visitor'
|
6
|
+
require_relative '../syntax/match_closest'
|
7
|
+
|
8
|
+
module Rley # This module is used as a namespace
|
9
|
+
module Notation # This module is used as a namespace
|
10
|
+
# Builder GoF pattern. Builder builds a complex object
|
11
|
+
# (say, a grammar) from simpler objects (terminals and productions)
|
12
|
+
# and using a step by step approach.
|
13
|
+
class GrammarBuilder
|
14
|
+
# @return [Hash{String, GrmSymbol}] The mapping of grammar symbol names
|
15
|
+
# to the matching grammar symbol object.
|
16
|
+
attr_reader(:symbols)
|
17
|
+
|
18
|
+
# @return [Notation::Parser] Parser for the right-side of productions
|
19
|
+
attr_reader(:parser)
|
20
|
+
|
21
|
+
# @return [Hash{ASTVisitor, Array}]
|
22
|
+
attr_reader(:visitor2rhs)
|
23
|
+
|
24
|
+
# @return [Array<Production>] The list of production rules for
|
25
|
+
# the grammar to build.
|
26
|
+
attr_reader(:productions)
|
27
|
+
|
28
|
+
# Creates a new grammar builder.
|
29
|
+
# @param aBlock [Proc] code block used to build the grammar.
|
30
|
+
# @example Building a tiny English grammar
|
31
|
+
# builder = Rley::Syntax::GrammarBuilder.new do
|
32
|
+
# add_terminals('n', 'v', 'adj', 'det')
|
33
|
+
# rule 'S' => %w[NP VP]
|
34
|
+
# rule 'VP' => %w[v NP]
|
35
|
+
# rule 'NP' => %w[det n]
|
36
|
+
# rule 'NP' => %w[adj NP]
|
37
|
+
# end
|
38
|
+
# tiny_eng = builder.grammar
|
39
|
+
def initialize(&aBlock)
|
40
|
+
@symbols = {}
|
41
|
+
@productions = []
|
42
|
+
@parser = Notation::Parser.new
|
43
|
+
@visitor2rhs = {}
|
44
|
+
|
45
|
+
instance_exec(&aBlock) if block_given?
|
46
|
+
end
|
47
|
+
|
48
|
+
# Retrieve a grammar symbol from its name.
|
49
|
+
# Raise an exception if not found.
|
50
|
+
# @param aSymbolName [String] the name of a grammar symbol.
|
51
|
+
# @return [GrmSymbol] the retrieved symbol object.
|
52
|
+
def [](aSymbolName)
|
53
|
+
symbols[aSymbolName]
|
54
|
+
end
|
55
|
+
|
56
|
+
# Add the given terminal symbols to the grammar of the language
|
57
|
+
# @param terminalSymbols [String or Terminal] 1..* terminal symbols.
|
58
|
+
# @return [void]
|
59
|
+
def add_terminals(*terminalSymbols)
|
60
|
+
new_symbs = build_symbols(Syntax::Terminal, terminalSymbols)
|
61
|
+
symbols.merge!(new_symbs)
|
62
|
+
end
|
63
|
+
|
64
|
+
# Add a production rule in the grammar given one
|
65
|
+
# key-value pair of the form: String => String.
|
66
|
+
# Where the key is the name of the non-terminal appearing in the
|
67
|
+
# left side of the rule.
|
68
|
+
# The value is a sequence of grammar symbol names (optionally quantified).
|
69
|
+
# The rule is created and inserted in the grammar.
|
70
|
+
# @example Equivalent call syntax
|
71
|
+
# builder.add_production('A' => 'a A c)
|
72
|
+
# builder.rule('A' => 'a A c]) # 'rule' is a synonym
|
73
|
+
# @param aProductionRepr [Hash{String, String}]
|
74
|
+
# A Hash-based representation of a production.
|
75
|
+
# @return [Production] The created Production instance
|
76
|
+
def add_production(aProductionRepr)
|
77
|
+
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
78
|
+
lhs = get_grm_symbol(lhs_name)
|
79
|
+
rhs = rhs_repr.kind_of?(Array) && rhs_repr.empty? ? '' : rhs_repr.strip
|
80
|
+
constraints = []
|
81
|
+
if rhs.empty?
|
82
|
+
rhs_members = []
|
83
|
+
else
|
84
|
+
ast = parser.parse(rhs)
|
85
|
+
visitor = ASTVisitor.new(ast)
|
86
|
+
visitor2rhs[visitor] = []
|
87
|
+
visitor.subscribe(self)
|
88
|
+
visitor.start
|
89
|
+
root_node = ast.root
|
90
|
+
constraints = root_node.constraints unless root_node.kind_of?(SymbolNode)
|
91
|
+
|
92
|
+
rhs_members = visitor2rhs.delete(visitor)
|
93
|
+
end
|
94
|
+
new_prod = Syntax::Production.new(lhs, rhs_members)
|
95
|
+
new_prod.constraints = constraints
|
96
|
+
productions << new_prod
|
97
|
+
end
|
98
|
+
|
99
|
+
productions.last
|
100
|
+
end
|
101
|
+
|
102
|
+
# Given the grammar symbols and productions added to the builder,
|
103
|
+
# build the resulting grammar (if not yet done).
|
104
|
+
# @return [Grammar] the created grammar object.
|
105
|
+
def grammar
|
106
|
+
unless @grammar
|
107
|
+
raise StandardError, 'No symbol found for grammar' if symbols.empty?
|
108
|
+
if productions.empty?
|
109
|
+
raise StandardError, 'No production found for grammar'
|
110
|
+
end
|
111
|
+
|
112
|
+
# Check that each terminal appears at least in a rhs of a production
|
113
|
+
all_terminals = symbols.values.select do |a_symb|
|
114
|
+
a_symb.kind_of?(Syntax::Terminal)
|
115
|
+
end
|
116
|
+
in_use = Set.new
|
117
|
+
productions.each do |prod|
|
118
|
+
prod.rhs.members.each do |symb|
|
119
|
+
in_use << symb if symb.kind_of?(Syntax::Terminal)
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
unused = all_terminals.reject { |a_term| in_use.include?(a_term) }
|
124
|
+
unless unused.empty?
|
125
|
+
suffix = "#{unused.map(&:name).join(', ')}."
|
126
|
+
raise StandardError, "Useless terminal symbol(s): #{suffix}"
|
127
|
+
end
|
128
|
+
|
129
|
+
@grammar = Syntax::Grammar.new(productions.dup)
|
130
|
+
end
|
131
|
+
|
132
|
+
@grammar
|
133
|
+
end
|
134
|
+
|
135
|
+
alias rule add_production
|
136
|
+
|
137
|
+
# When a symbol, say symb, in a rhs is followed by a '*' modifier,
|
138
|
+
# then a rule will be generated with a lhs named symb * suffix_plus
|
139
|
+
# implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
|
140
|
+
# implicitly called: rule('declaration_star' => '').tag suffix_star_last
|
141
|
+
def suffix_qmark
|
142
|
+
'_qmark'
|
143
|
+
end
|
144
|
+
|
145
|
+
def suffix_qmark_one
|
146
|
+
'_qmark_one'
|
147
|
+
end
|
148
|
+
|
149
|
+
def suffix_qmark_none
|
150
|
+
'_qmark_none'
|
151
|
+
end
|
152
|
+
|
153
|
+
# When a symbol, say symb, in a rhs is followed by a '*' modifier,
|
154
|
+
# then a rule will be generated with a lhs named symb * suffix_plus
|
155
|
+
# implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
|
156
|
+
# implicitly called: rule('declaration_star' => '').tag suffix_star_last
|
157
|
+
def suffix_star
|
158
|
+
'_star'
|
159
|
+
end
|
160
|
+
|
161
|
+
def suffix_star_more
|
162
|
+
'_star_more'
|
163
|
+
end
|
164
|
+
|
165
|
+
def suffix_star_none
|
166
|
+
'_star_none'
|
167
|
+
end
|
168
|
+
|
169
|
+
# When a symbol, say symb, in a rhs is followed by a '+' modifier,
|
170
|
+
# then a rule will be generated with a lhs named symb + suffix_plus
|
171
|
+
# implicitly called: rule('digit_plus' => 'digit_plus digit').tag suffix_plus_more
|
172
|
+
# implicitly called: rule('digit_plus' => 'digit').tag suffix_plus_last
|
173
|
+
def suffix_plus
|
174
|
+
'_plus'
|
175
|
+
end
|
176
|
+
|
177
|
+
def suffix_plus_more
|
178
|
+
'_plus_more'
|
179
|
+
end
|
180
|
+
|
181
|
+
def suffix_plus_one
|
182
|
+
'_plus_one'
|
183
|
+
end
|
184
|
+
|
185
|
+
def repetition2suffix(aRepetition)
|
186
|
+
mapping = {
|
187
|
+
zero_or_one: suffix_qmark,
|
188
|
+
zero_or_more: suffix_star,
|
189
|
+
exactly_one: '',
|
190
|
+
one_or_more: suffix_plus
|
191
|
+
}
|
192
|
+
|
193
|
+
mapping[aRepetition]
|
194
|
+
end
|
195
|
+
|
196
|
+
def modifier2suffix(aModifier)
|
197
|
+
mapping = {
|
198
|
+
'?' => suffix_qmark,
|
199
|
+
'*' => suffix_star,
|
200
|
+
'+' => suffix_plus
|
201
|
+
}
|
202
|
+
|
203
|
+
mapping[aModifier]
|
204
|
+
end
|
205
|
+
|
206
|
+
##################################
|
207
|
+
# AST visit notification events
|
208
|
+
# ################################
|
209
|
+
def after_symbol_node(aSymbolNode, aVisitor)
|
210
|
+
symb_name = aSymbolNode.name
|
211
|
+
|
212
|
+
case aSymbolNode.repetition
|
213
|
+
when :zero_or_one
|
214
|
+
# implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
|
215
|
+
# implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
|
216
|
+
name_modified = "#{symb_name}#{suffix_qmark}"
|
217
|
+
unless symbols.include? name_modified
|
218
|
+
symbols[name_modified] = Syntax::NonTerminal.new(name_modified)
|
219
|
+
rule(name_modified => "#{symb_name}" ).tag suffix_qmark_one
|
220
|
+
rule(name_modified => '').tag suffix_qmark_none
|
221
|
+
end
|
222
|
+
symb_name = name_modified
|
223
|
+
|
224
|
+
when :zero_or_more
|
225
|
+
# implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
|
226
|
+
# implicitly called: rule('symb_name_star' => '').tag suffix_star_none
|
227
|
+
name_modified = "#{symb_name}#{suffix_star}"
|
228
|
+
unless symbols.include? name_modified
|
229
|
+
symbols[name_modified] = Syntax::NonTerminal.new(name_modified)
|
230
|
+
rule(name_modified => "#{name_modified} #{symb_name}").tag suffix_star_more
|
231
|
+
rule(name_modified => '').tag suffix_star_none
|
232
|
+
end
|
233
|
+
symb_name = name_modified
|
234
|
+
|
235
|
+
when :exactly_one
|
236
|
+
# Do nothing
|
237
|
+
|
238
|
+
when :one_or_more
|
239
|
+
name_modified = "#{symb_name}#{suffix_plus}"
|
240
|
+
unless symbols.include? name_modified
|
241
|
+
symbols[name_modified] = Syntax::NonTerminal.new(name_modified)
|
242
|
+
rule(name_modified => "#{name_modified} #{symb_name}").tag suffix_plus_more
|
243
|
+
rule(name_modified => symb_name).tag suffix_plus_one
|
244
|
+
end
|
245
|
+
symb_name = name_modified
|
246
|
+
else
|
247
|
+
raise StandardError, 'Unhandled multiplicity'
|
248
|
+
end
|
249
|
+
|
250
|
+
symb = get_grm_symbol(symb_name)
|
251
|
+
visitor2rhs[aVisitor] << symb
|
252
|
+
end
|
253
|
+
|
254
|
+
def after_sequence_node(aSequenceNode, _visitor)
|
255
|
+
aSequenceNode.subnodes.each_with_index do |sn, i|
|
256
|
+
next if sn.annotation.empty?
|
257
|
+
matching = sn.annotation['match_closest']
|
258
|
+
aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def after_grouping_node(aGroupingNode, aVisitor)
|
263
|
+
after_sequence_node(aGroupingNode, aVisitor)
|
264
|
+
symb_name = sequence_name(aGroupingNode)
|
265
|
+
|
266
|
+
unless symbols.include?(symb_name) || aGroupingNode.repetition == :exactly_one
|
267
|
+
symbols[symb_name] = Syntax::NonTerminal.new(symb_name)
|
268
|
+
simple_rule(symb_name => serialize_sequence(aGroupingNode) ).tag 'return_children'
|
269
|
+
prod = productions.last
|
270
|
+
prod.constraints = aGroupingNode.constraints
|
271
|
+
end
|
272
|
+
name_modified = "#{symb_name}#{repetition2suffix(aGroupingNode.repetition)}"
|
273
|
+
|
274
|
+
case aGroupingNode.repetition
|
275
|
+
when :zero_or_one
|
276
|
+
# implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
|
277
|
+
# implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
|
278
|
+
unless symbols.include? name_modified
|
279
|
+
symbols[name_modified] = Syntax::NonTerminal.new(name_modified)
|
280
|
+
simple_rule(name_modified => symb_name).tag suffix_qmark_one
|
281
|
+
simple_rule(name_modified => []).tag suffix_qmark_none
|
282
|
+
end
|
283
|
+
|
284
|
+
when :zero_or_more
|
285
|
+
# implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
|
286
|
+
# implicitly called: rule('symb_name_star' => '').tag suffix_star_none
|
287
|
+
unless symbols.include? name_modified
|
288
|
+
symbols[name_modified] = Syntax::NonTerminal.new(name_modified)
|
289
|
+
rule(name_modified => "#{name_modified} #{symb_name}").tag suffix_star_more
|
290
|
+
rule(name_modified => '').tag suffix_star_none
|
291
|
+
end
|
292
|
+
|
293
|
+
when :exactly_one
|
294
|
+
# Do nothing
|
295
|
+
|
296
|
+
when :one_or_more
|
297
|
+
unless symbols.include? name_modified
|
298
|
+
symbols[name_modified] = Syntax::NonTerminal.new(name_modified)
|
299
|
+
rule(name_modified => "#{name_modified} #{symb_name}").tag suffix_plus_more
|
300
|
+
rule(name_modified => symb_name).tag suffix_plus_one
|
301
|
+
end
|
302
|
+
else
|
303
|
+
raise StandardError, 'Unhandled multiplicity'
|
304
|
+
end
|
305
|
+
|
306
|
+
unless aGroupingNode.repetition == :exactly_one
|
307
|
+
symb = get_grm_symbol(name_modified)
|
308
|
+
visitor2rhs[aVisitor] << symb
|
309
|
+
end
|
310
|
+
end
|
311
|
+
|
312
|
+
private
|
313
|
+
|
314
|
+
def simple_rule(aProductionRepr)
|
315
|
+
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
316
|
+
lhs = get_grm_symbol(lhs_name)
|
317
|
+
|
318
|
+
if rhs_repr.kind_of?(String)
|
319
|
+
rhs = rhs_repr.strip.scan(/\S+/)
|
320
|
+
else
|
321
|
+
rhs = rhs_repr
|
322
|
+
end
|
323
|
+
|
324
|
+
members = rhs.map do |name|
|
325
|
+
if name.end_with?('?', '*', '+')
|
326
|
+
modifier = name[-1]
|
327
|
+
suffix = modifier2suffix(aModifier)
|
328
|
+
get_grm_symbol("#{name.chop}#{suffix}")
|
329
|
+
else
|
330
|
+
get_grm_symbol(name)
|
331
|
+
end
|
332
|
+
end
|
333
|
+
new_prod = Syntax::Production.new(lhs, members)
|
334
|
+
productions << new_prod
|
335
|
+
end
|
336
|
+
|
337
|
+
productions.last
|
338
|
+
end
|
339
|
+
|
340
|
+
# Add the given grammar symbols.
|
341
|
+
# @param aClass [Class] The class of grammar symbols to instantiate.
|
342
|
+
# @param theSymbols [Array] array of elements are treated as follows:
|
343
|
+
# if the element is already a grammar symbol, then it added as is,
|
344
|
+
# otherwise it is considered as the name of a grammar symbol
|
345
|
+
# of the specified class to build.
|
346
|
+
def build_symbols(aClass, theSymbols)
|
347
|
+
symbs = {}
|
348
|
+
theSymbols.each do |s|
|
349
|
+
new_symbol = build_symbol(aClass, s)
|
350
|
+
symbs[new_symbol.name] = new_symbol
|
351
|
+
end
|
352
|
+
|
353
|
+
symbs
|
354
|
+
end
|
355
|
+
|
356
|
+
# If the argument is already a grammar symbol object then it is
|
357
|
+
# returned as is. Otherwise, the argument is treated as a name
|
358
|
+
# for a new instance of the given class.
|
359
|
+
# @param aClass [Class] The class of grammar symbols to instantiate
|
360
|
+
# @param aSymbolArg [GrmSymbol-like or String]
|
361
|
+
# @return [Array] list of grammar symbols
|
362
|
+
def build_symbol(aClass, aSymbolArg)
|
363
|
+
if aSymbolArg.kind_of?(Syntax::GrmSymbol)
|
364
|
+
aSymbolArg
|
365
|
+
else
|
366
|
+
aClass.new(aSymbolArg)
|
367
|
+
end
|
368
|
+
end
|
369
|
+
|
370
|
+
# Retrieve the non-terminal symbol with given name.
|
371
|
+
# If it doesn't exist yet, then it is created on the fly.
|
372
|
+
# @param aSymbolName [String] the name of the grammar symbol to retrieve
|
373
|
+
# @return [NonTerminal]
|
374
|
+
def get_grm_symbol(aSymbolName)
|
375
|
+
unless aSymbolName.end_with?('+') && aSymbolName.length > 1
|
376
|
+
name = aSymbolName
|
377
|
+
else
|
378
|
+
name = aSymbolName.chop
|
379
|
+
case aSymbolName[-1]
|
380
|
+
when '+'
|
381
|
+
name_modified = "#{name}#{suffix_plus}"
|
382
|
+
unless symbols.include? name_modified
|
383
|
+
symbols[name_modified] = NonTerminal.new(name_modified)
|
384
|
+
rule(name_modified => [name_modified, name]).as suffix_plus_more
|
385
|
+
rule(name_modified => name).as suffix_plus_last
|
386
|
+
end
|
387
|
+
name = name_modified
|
388
|
+
else
|
389
|
+
err_msg = "Unknown symbol modifier #{aSymbolName[-1]}"
|
390
|
+
raise NotImplementedError, err_msg
|
391
|
+
end
|
392
|
+
end
|
393
|
+
|
394
|
+
symbols[name] = Syntax::NonTerminal.new(name) unless symbols.include? name
|
395
|
+
|
396
|
+
symbols[name]
|
397
|
+
end
|
398
|
+
|
399
|
+
def sequence_name(aSequenceNode)
|
400
|
+
subnode_names = +''
|
401
|
+
aSequenceNode.subnodes.each do |subn|
|
402
|
+
case subn
|
403
|
+
when SymbolNode
|
404
|
+
subnode_names << "_#{subn.name}"
|
405
|
+
when SequenceNode
|
406
|
+
subnode_names << "_#{sequence_name(subn)}"
|
407
|
+
end
|
408
|
+
suffix = repetition2suffix(subn.repetition)
|
409
|
+
subnode_names << suffix
|
410
|
+
end
|
411
|
+
|
412
|
+
"seq#{subnode_names}"
|
413
|
+
end
|
414
|
+
|
415
|
+
def node_base_name(aNode)
|
416
|
+
if aNode.kind_of?(SymbolNode)
|
417
|
+
aNode.name
|
418
|
+
else
|
419
|
+
sequence_name(aNode)
|
420
|
+
end
|
421
|
+
end
|
422
|
+
|
423
|
+
def node_decorated_name(aNdoe)
|
424
|
+
base_name = node_base_name(aNode)
|
425
|
+
suffix = repetition2suffix(aNode.repetition)
|
426
|
+
|
427
|
+
"#{base_name}#{suffix}"
|
428
|
+
end
|
429
|
+
|
430
|
+
def serialize_sequence(aSequenceNode)
|
431
|
+
text = +''
|
432
|
+
aSequenceNode.subnodes.each do |sn|
|
433
|
+
text << ' '
|
434
|
+
case sn
|
435
|
+
when SymbolNode
|
436
|
+
text << sn.name
|
437
|
+
when SequenceNode
|
438
|
+
text << sequence_name(sn)
|
439
|
+
end
|
440
|
+
|
441
|
+
suffix = suffix = repetition2suffix(sn.repetition)
|
442
|
+
text << suffix
|
443
|
+
end
|
444
|
+
|
445
|
+
text.strip
|
446
|
+
end
|
447
|
+
end # class
|
448
|
+
end # module
|
449
|
+
end # module
|
450
|
+
|
451
|
+
# End of file
|