rley 0.7.08 → 0.8.03
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +29 -5
- data/CHANGELOG.md +28 -4
- data/README.md +4 -5
- data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
- data/examples/NLP/nano_eng/nano_grammar.rb +18 -18
- data/examples/data_formats/JSON/json_ast_builder.rb +9 -18
- data/examples/data_formats/JSON/json_demo.rb +1 -2
- data/examples/data_formats/JSON/json_grammar.rb +11 -11
- data/examples/general/calc_iter1/calc_grammar.rb +5 -4
- data/examples/general/calc_iter2/calc_grammar.rb +9 -9
- data/examples/general/left.rb +1 -1
- data/examples/general/right.rb +1 -1
- data/lib/rley/base/dotted_item.rb +5 -0
- data/lib/rley/base/grm_items_builder.rb +6 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +2 -2
- data/lib/rley/interface.rb +16 -0
- data/lib/rley/notation/all_notation_nodes.rb +4 -0
- data/lib/rley/notation/ast_builder.rb +185 -0
- data/lib/rley/notation/ast_node.rb +44 -0
- data/lib/rley/notation/ast_visitor.rb +115 -0
- data/lib/rley/notation/grammar.rb +49 -0
- data/lib/rley/notation/grammar_builder.rb +505 -0
- data/lib/rley/notation/grouping_node.rb +23 -0
- data/lib/rley/notation/parser.rb +56 -0
- data/lib/rley/notation/sequence_node.rb +35 -0
- data/lib/rley/notation/symbol_node.rb +29 -0
- data/lib/rley/notation/tokenizer.rb +180 -0
- data/lib/rley/parse_rep/ast_base_builder.rb +44 -0
- data/lib/rley/parser/gfg_chart.rb +101 -6
- data/lib/rley/parser/gfg_earley_parser.rb +1 -1
- data/lib/rley/parser/gfg_parsing.rb +5 -3
- data/lib/rley/parser/parse_entry_set.rb +1 -1
- data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +53 -15
- data/lib/rley/syntax/grm_symbol.rb +1 -1
- data/lib/rley/syntax/match_closest.rb +43 -0
- data/lib/rley/syntax/production.rb +6 -0
- data/lib/rley.rb +1 -1
- data/spec/rley/engine_spec.rb +6 -6
- data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
- data/spec/rley/notation/grammar_builder_spec.rb +302 -0
- data/spec/rley/notation/parser_spec.rb +183 -0
- data/spec/rley/notation/tokenizer_spec.rb +364 -0
- data/spec/rley/parse_rep/ast_builder_spec.rb +0 -1
- data/spec/rley/parse_rep/groucho_spec.rb +1 -1
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +1 -1
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +2 -2
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +1 -1
- data/spec/rley/parser/dangling_else_spec.rb +447 -0
- data/spec/rley/parser/gfg_earley_parser_spec.rb +118 -10
- data/spec/rley/parser/gfg_parsing_spec.rb +2 -1
- data/spec/rley/parser/parse_walker_factory_spec.rb +2 -2
- data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
- data/spec/rley/support/grammar_abc_helper.rb +2 -2
- data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
- data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
- data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
- data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
- data/spec/rley/support/grammar_l0_helper.rb +2 -2
- data/spec/rley/support/grammar_pb_helper.rb +2 -2
- data/spec/rley/support/grammar_sppf_helper.rb +2 -2
- data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +29 -11
- data/spec/rley/syntax/match_closest_spec.rb +46 -0
- data/spec/rley/syntax/production_spec.rb +4 -0
- metadata +29 -14
- data/lib/rley/parser/parse_state.rb +0 -78
- data/lib/rley/parser/parse_state_tracker.rb +0 -59
- data/lib/rley/parser/state_set.rb +0 -100
- data/spec/rley/parser/parse_state_spec.rb +0 -125
- data/spec/rley/parser/parse_tracer_spec.rb +0 -200
- data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../syntax/base_grammar_builder'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module Notation
|
7
|
+
########################################
|
8
|
+
# Syntax for right-hand side of production rules
|
9
|
+
builder = Rley::Syntax::BaseGrammarBuilder.new do
|
10
|
+
add_terminals('LEFT_PAREN', 'RIGHT_PAREN') # For '(', ')' grouping delimiters
|
11
|
+
add_terminals('LEFT_BRACE', 'RIGHT_BRACE') # For '{', '}' annotation delimiters
|
12
|
+
add_terminals('QUESTION_MARK', 'STAR', 'PLUS') # For postfix quantifiers
|
13
|
+
add_terminals('COMMA', 'ELLIPSIS')
|
14
|
+
|
15
|
+
add_terminals('STR_LIT') # For string literal values
|
16
|
+
add_terminals('INT_LIT') # For integer literal values
|
17
|
+
add_terminals('SYMBOL') # Grammar symbols
|
18
|
+
add_terminals('KEY') # Key literal
|
19
|
+
|
20
|
+
rule('notation' => 'rhs')
|
21
|
+
rule('rhs' => 'member_seq').tag 'sequence'
|
22
|
+
rule('rhs' => [])
|
23
|
+
rule('member_seq' => 'member_seq member').tag 'more_members'
|
24
|
+
rule('member_seq' => 'member').tag 'one_member'
|
25
|
+
rule('member' => 'strait_member')
|
26
|
+
rule('member' => 'quantified_member')
|
27
|
+
rule('strait_member' => 'base_member')
|
28
|
+
rule('strait_member' => 'base_member annotation').tag 'annotated_member'
|
29
|
+
rule('base_member' => 'SYMBOL').tag 'symbol'
|
30
|
+
rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN').tag 'grouping'
|
31
|
+
rule('quantified_member' => 'base_member quantifier').tag 'quantified_member'
|
32
|
+
rule('quantifier' => 'QUESTION_MARK').tag 'question_mark'
|
33
|
+
rule('quantifier' => 'STAR').tag 'star'
|
34
|
+
rule('quantifier' => 'PLUS').tag 'plus'
|
35
|
+
rule('annotation' => 'LEFT_BRACE mapping RIGHT_BRACE').tag 'annotation'
|
36
|
+
rule('mapping' => 'mapping COMMA key_value').tag 'more_pairs'
|
37
|
+
rule('mapping' => 'key_value').tag 'one_pair'
|
38
|
+
rule('key_value' => 'KEY value').tag 'raw_pair'
|
39
|
+
rule('value' => 'STR_LIT')
|
40
|
+
rule('value' => 'INT_LIT')
|
41
|
+
rule('value' => 'range')
|
42
|
+
rule('range' => 'INT_LIT ELLIPSIS INT_LIT').tag 'bound_range'
|
43
|
+
rule('range' => 'INT_LIT ELLIPSIS')
|
44
|
+
end
|
45
|
+
|
46
|
+
# And now build the Rley Grammar Notation (RGN) grammar...
|
47
|
+
RGNGrammar = builder.grammar
|
48
|
+
end # module
|
49
|
+
end # module
|
@@ -0,0 +1,505 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
require_relative 'parser'
|
6
|
+
require_relative 'ast_visitor'
|
7
|
+
require_relative '../syntax/match_closest'
|
8
|
+
|
9
|
+
module Rley # This module is used as a namespace
|
10
|
+
module Notation # This module is used as a namespace
|
11
|
+
# Structure used for production rules that are implicitly generated by Rley
|
12
|
+
RawRule = Struct.new(:lhs, :rhs, :tag, :simple, :constraints)
|
13
|
+
|
14
|
+
# Builder GoF pattern. Builder builds a complex object
|
15
|
+
# (say, a grammar) from simpler objects (terminals and productions)
|
16
|
+
# and using a step by step approach.
|
17
|
+
class GrammarBuilder
|
18
|
+
# @return [Hash{String, GrmSymbol}] The mapping of grammar symbol names
|
19
|
+
# to the matching grammar symbol object.
|
20
|
+
attr_reader(:symbols)
|
21
|
+
|
22
|
+
# @return [Notation::Parser] Parser for the right-side of productions
|
23
|
+
attr_reader(:parser)
|
24
|
+
|
25
|
+
# @return [Hash{ASTVisitor, Array}]
|
26
|
+
attr_reader(:visitor2rhs)
|
27
|
+
|
28
|
+
# @return [Array<Production>] The list of production rules for
|
29
|
+
# the grammar to build.
|
30
|
+
attr_reader(:productions)
|
31
|
+
|
32
|
+
# @return [Hash{String, String}] The synthesized raw productions
|
33
|
+
attr_reader(:synthetized)
|
34
|
+
|
35
|
+
# Creates a new grammar builder.
|
36
|
+
# @param aBlock [Proc] code block used to build the grammar.
|
37
|
+
# @example Building a tiny English grammar
|
38
|
+
# builder = Rley::Notation::GrammarBuilder.new do
|
39
|
+
# add_terminals('n', 'v', 'adj', 'det')
|
40
|
+
# rule 'S' => 'NP VP'
|
41
|
+
# rule 'VP' => 'v NP'
|
42
|
+
# rule 'NP' => 'det n'
|
43
|
+
# rule 'NP' => 'adj NP'
|
44
|
+
# end
|
45
|
+
# tiny_eng = builder.grammar
|
46
|
+
def initialize(&aBlock)
|
47
|
+
@symbols = {}
|
48
|
+
@productions = []
|
49
|
+
@parser = Notation::Parser.new
|
50
|
+
@visitor2rhs = {}
|
51
|
+
@synthetized = {}
|
52
|
+
|
53
|
+
if block_given?
|
54
|
+
instance_exec(&aBlock)
|
55
|
+
grammar_complete!
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Retrieve a grammar symbol from its name.
|
60
|
+
# Raise an exception if not found.
|
61
|
+
# @param aSymbolName [String] the name of a grammar symbol.
|
62
|
+
# @return [GrmSymbol] the retrieved symbol object.
|
63
|
+
def [](aSymbolName)
|
64
|
+
symbols[aSymbolName]
|
65
|
+
end
|
66
|
+
|
67
|
+
# Add the given terminal symbols to the grammar of the language
|
68
|
+
# @param terminalSymbols [String or Terminal] 1..* terminal symbols.
|
69
|
+
# @return [void]
|
70
|
+
def add_terminals(*terminalSymbols)
|
71
|
+
new_symbs = build_symbols(Syntax::Terminal, terminalSymbols)
|
72
|
+
symbols.merge!(new_symbs)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Add the given marker symbol to the grammar of the language
|
76
|
+
# @param aMarkerSymbol [String] A mazker symbol
|
77
|
+
# @return [void]
|
78
|
+
def add_marker(aMarkerSymbol)
|
79
|
+
new_symb = build_symbol(Syntax::Marker, aMarkerSymbol)
|
80
|
+
symbols[new_symb.name] = new_symb
|
81
|
+
end
|
82
|
+
|
83
|
+
# Add a production rule in the grammar given one
|
84
|
+
# key-value pair of the form: String => String.
|
85
|
+
# Where the key is the name of the non-terminal appearing in the
|
86
|
+
# left side of the rule.
|
87
|
+
# The value is a sequence of grammar symbol names (optionally quantified).
|
88
|
+
# The rule is created and inserted in the grammar.
|
89
|
+
# @example Equivalent call syntax
|
90
|
+
# builder.add_production('A' => 'a A c)
|
91
|
+
# builder.rule('A' => 'a A c]) # 'rule' is a synonym
|
92
|
+
# @param aProductionRepr [Hash{String, String}]
|
93
|
+
# A Hash-based representation of a production.
|
94
|
+
# @return [Production] The created Production instance
|
95
|
+
def add_production(aProductionRepr)
|
96
|
+
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
97
|
+
lhs = get_grm_symbol(lhs_name)
|
98
|
+
rhs = rhs_repr.kind_of?(Array) && rhs_repr.empty? ? '' : rhs_repr.strip
|
99
|
+
constraints = []
|
100
|
+
if rhs.empty?
|
101
|
+
rhs_members = []
|
102
|
+
else
|
103
|
+
ast = parser.parse(rhs)
|
104
|
+
visitor = ASTVisitor.new(ast)
|
105
|
+
visitor2rhs[visitor] = []
|
106
|
+
visitor.subscribe(self)
|
107
|
+
visitor.start
|
108
|
+
root_node = ast.root
|
109
|
+
constraints = root_node.constraints unless root_node.kind_of?(SymbolNode)
|
110
|
+
|
111
|
+
rhs_members = visitor2rhs.delete(visitor)
|
112
|
+
end
|
113
|
+
new_prod = Syntax::Production.new(lhs, rhs_members)
|
114
|
+
new_prod.constraints = constraints
|
115
|
+
productions << new_prod
|
116
|
+
end
|
117
|
+
|
118
|
+
productions.last
|
119
|
+
end
|
120
|
+
|
121
|
+
# Given the grammar symbols and productions added to the builder,
|
122
|
+
# build the resulting grammar (if not yet done).
|
123
|
+
# @return [Grammar] the created grammar object.
|
124
|
+
def grammar
|
125
|
+
unless @grammar
|
126
|
+
raise StandardError, 'No symbol found for grammar' if symbols.empty?
|
127
|
+
if productions.empty?
|
128
|
+
raise StandardError, 'No production found for grammar'
|
129
|
+
end
|
130
|
+
|
131
|
+
# Check that each terminal appears at least in a rhs of a production
|
132
|
+
all_terminals = symbols.values.select do |a_symb|
|
133
|
+
a_symb.kind_of?(Syntax::Terminal)
|
134
|
+
end
|
135
|
+
in_use = Set.new
|
136
|
+
productions.each do |prod|
|
137
|
+
prod.rhs.members.each do |symb|
|
138
|
+
in_use << symb if symb.kind_of?(Syntax::Terminal)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
unused = all_terminals.reject { |a_term| in_use.include?(a_term) }
|
143
|
+
unless unused.empty?
|
144
|
+
suffix = "#{unused.map(&:name).join(', ')}."
|
145
|
+
raise StandardError, "Useless terminal symbol(s): #{suffix}"
|
146
|
+
end
|
147
|
+
|
148
|
+
@grammar = Syntax::Grammar.new(productions.dup)
|
149
|
+
end
|
150
|
+
|
151
|
+
@grammar
|
152
|
+
end
|
153
|
+
|
154
|
+
alias rule add_production
|
155
|
+
|
156
|
+
# When a symbol, say symb, in a rhs is followed by a '*' modifier,
|
157
|
+
# then a rule will be generated with a lhs named symb * suffix_plus
|
158
|
+
# implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
|
159
|
+
# implicitly called: rule('declaration_star' => '').tag suffix_star_last
|
160
|
+
def suffix_qmark
|
161
|
+
'_qmark'
|
162
|
+
end
|
163
|
+
|
164
|
+
def suffix_qmark_one
|
165
|
+
'_qmark_one'
|
166
|
+
end
|
167
|
+
|
168
|
+
def suffix_qmark_none
|
169
|
+
'_qmark_none'
|
170
|
+
end
|
171
|
+
|
172
|
+
# When a symbol, say symb, in a rhs is followed by a '*' modifier,
|
173
|
+
# then a rule will be generated with a lhs named symb * suffix_plus
|
174
|
+
# implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
|
175
|
+
# implicitly called: rule('declaration_star' => '').tag suffix_star_last
|
176
|
+
def suffix_star
|
177
|
+
'_star'
|
178
|
+
end
|
179
|
+
|
180
|
+
def suffix_star_more
|
181
|
+
'_star_more'
|
182
|
+
end
|
183
|
+
|
184
|
+
def suffix_star_none
|
185
|
+
'_star_none'
|
186
|
+
end
|
187
|
+
|
188
|
+
# When a symbol, say symb, in a rhs is followed by a '+' modifier,
|
189
|
+
# then a rule will be generated with a lhs named symb + suffix_plus
|
190
|
+
# implicitly called: rule('digit_plus' => 'digit_plus digit').tag suffix_plus_more
|
191
|
+
# implicitly called: rule('digit_plus' => 'digit').tag suffix_plus_last
|
192
|
+
def suffix_plus
|
193
|
+
'_plus'
|
194
|
+
end
|
195
|
+
|
196
|
+
def suffix_plus_more
|
197
|
+
'_plus_more'
|
198
|
+
end
|
199
|
+
|
200
|
+
def suffix_plus_one
|
201
|
+
'_plus_one'
|
202
|
+
end
|
203
|
+
|
204
|
+
def repetition2suffix(aRepetition)
|
205
|
+
mapping = {
|
206
|
+
zero_or_one: suffix_qmark,
|
207
|
+
zero_or_more: suffix_star,
|
208
|
+
exactly_one: '',
|
209
|
+
one_or_more: suffix_plus
|
210
|
+
}
|
211
|
+
|
212
|
+
mapping[aRepetition]
|
213
|
+
end
|
214
|
+
|
215
|
+
def modifier2suffix(aModifier)
|
216
|
+
mapping = {
|
217
|
+
'?' => suffix_qmark,
|
218
|
+
'*' => suffix_star,
|
219
|
+
'+' => suffix_plus
|
220
|
+
}
|
221
|
+
|
222
|
+
mapping[aModifier]
|
223
|
+
end
|
224
|
+
|
225
|
+
##################################
|
226
|
+
# RGN's AST visit notification events
|
227
|
+
# ################################
|
228
|
+
def after_symbol_node(aSymbolNode, aVisitor)
|
229
|
+
symb_name = aSymbolNode.name
|
230
|
+
|
231
|
+
case aSymbolNode.repetition
|
232
|
+
when :zero_or_one
|
233
|
+
# implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
|
234
|
+
# implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
|
235
|
+
name_modified = "#{symb_name}#{suffix_qmark}"
|
236
|
+
unless symbols.include? name_modified
|
237
|
+
add_nonterminal(name_modified)
|
238
|
+
add_raw_rule(name_modified, symb_name, suffix_qmark_one)
|
239
|
+
add_raw_rule(name_modified, '', suffix_qmark_none)
|
240
|
+
end
|
241
|
+
symb_name = name_modified
|
242
|
+
|
243
|
+
when :zero_or_more
|
244
|
+
# implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
|
245
|
+
# implicitly called: rule('symb_name_star' => '').tag suffix_star_none
|
246
|
+
name_modified = "#{symb_name}#{suffix_star}"
|
247
|
+
unless symbols.include? name_modified
|
248
|
+
add_nonterminal(name_modified)
|
249
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
|
250
|
+
add_raw_rule(name_modified, [], suffix_star_none)
|
251
|
+
end
|
252
|
+
symb_name = name_modified
|
253
|
+
|
254
|
+
when :exactly_one
|
255
|
+
# Do nothing
|
256
|
+
|
257
|
+
when :one_or_more
|
258
|
+
name_modified = "#{symb_name}#{suffix_plus}"
|
259
|
+
unless symbols.include? name_modified
|
260
|
+
add_nonterminal(name_modified)
|
261
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
|
262
|
+
add_raw_rule(name_modified, symb_name, suffix_plus_one)
|
263
|
+
end
|
264
|
+
symb_name = name_modified
|
265
|
+
else
|
266
|
+
raise StandardError, 'Unhandled multiplicity'
|
267
|
+
end
|
268
|
+
|
269
|
+
symb = get_grm_symbol(symb_name)
|
270
|
+
visitor2rhs[aVisitor] << symb
|
271
|
+
end
|
272
|
+
|
273
|
+
def after_sequence_node(aSequenceNode, _visitor)
|
274
|
+
aSequenceNode.subnodes.each_with_index do |sn, i|
|
275
|
+
next if sn.annotation.empty?
|
276
|
+
|
277
|
+
matching = sn.annotation['match_closest']
|
278
|
+
aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
def after_grouping_node(aGroupingNode, aVisitor)
|
283
|
+
after_sequence_node(aGroupingNode, aVisitor)
|
284
|
+
symb_name = sequence_name(aGroupingNode)
|
285
|
+
|
286
|
+
unless symbols.include?(symb_name) || aGroupingNode.repetition == :exactly_one
|
287
|
+
add_nonterminal(symb_name)
|
288
|
+
rhs = serialize_sequence(aGroupingNode)
|
289
|
+
add_raw_rule(symb_name, rhs, 'return_children', true, aGroupingNode.constraints)
|
290
|
+
end
|
291
|
+
name_modified = "#{symb_name}#{repetition2suffix(aGroupingNode.repetition)}"
|
292
|
+
|
293
|
+
case aGroupingNode.repetition
|
294
|
+
when :zero_or_one
|
295
|
+
# implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
|
296
|
+
# implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
|
297
|
+
unless symbols.include? name_modified
|
298
|
+
add_nonterminal(name_modified)
|
299
|
+
add_raw_rule(name_modified, symb_name, suffix_qmark_one, true)
|
300
|
+
add_raw_rule(name_modified, [], suffix_qmark_none, true)
|
301
|
+
end
|
302
|
+
|
303
|
+
when :zero_or_more
|
304
|
+
# implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
|
305
|
+
# implicitly called: rule('symb_name_star' => '').tag suffix_star_none
|
306
|
+
unless symbols.include? name_modified
|
307
|
+
add_nonterminal(name_modified)
|
308
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
|
309
|
+
add_raw_rule(name_modified, '', suffix_star_none)
|
310
|
+
end
|
311
|
+
|
312
|
+
when :exactly_one
|
313
|
+
# Do nothing
|
314
|
+
|
315
|
+
when :one_or_more
|
316
|
+
unless symbols.include? name_modified
|
317
|
+
add_nonterminal(name_modified)
|
318
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
|
319
|
+
add_raw_rule(name_modified, symb_name, suffix_plus_one)
|
320
|
+
end
|
321
|
+
else
|
322
|
+
raise StandardError, 'Unhandled multiplicity'
|
323
|
+
end
|
324
|
+
|
325
|
+
unless aGroupingNode.repetition == :exactly_one
|
326
|
+
symb = get_grm_symbol(name_modified)
|
327
|
+
visitor2rhs[aVisitor] << symb
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
# A notification to the builderobject that the programmer
|
332
|
+
# has completed the entry of terminals and production rules
|
333
|
+
def grammar_complete!
|
334
|
+
process_raw_rules
|
335
|
+
end
|
336
|
+
|
337
|
+
private
|
338
|
+
|
339
|
+
def add_nonterminal(aName)
|
340
|
+
symbols[aName] = Syntax::NonTerminal.new(aName)
|
341
|
+
end
|
342
|
+
|
343
|
+
def simple_rule(aProductionRepr)
|
344
|
+
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
345
|
+
lhs = get_grm_symbol(lhs_name)
|
346
|
+
|
347
|
+
if rhs_repr.kind_of?(String)
|
348
|
+
rhs = rhs_repr.strip.scan(/\S+/)
|
349
|
+
else
|
350
|
+
rhs = rhs_repr
|
351
|
+
end
|
352
|
+
|
353
|
+
members = rhs.map do |name|
|
354
|
+
if name.end_with?('?', '*', '+')
|
355
|
+
modifier = name[-1]
|
356
|
+
suffix = modifier2suffix(modifier)
|
357
|
+
get_grm_symbol("#{name.chop}#{suffix}")
|
358
|
+
else
|
359
|
+
get_grm_symbol(name)
|
360
|
+
end
|
361
|
+
end
|
362
|
+
new_prod = Syntax::Production.new(lhs, members)
|
363
|
+
productions << new_prod
|
364
|
+
end
|
365
|
+
|
366
|
+
productions.last
|
367
|
+
end
|
368
|
+
|
369
|
+
# Add the given grammar symbols.
|
370
|
+
# @param aClass [Class] The class of grammar symbols to instantiate.
|
371
|
+
# @param theSymbols [Array] array of elements are treated as follows:
|
372
|
+
# if the element is already a grammar symbol, then it added as is,
|
373
|
+
# otherwise it is considered as the name of a grammar symbol
|
374
|
+
# of the specified class to build.
|
375
|
+
def build_symbols(aClass, theSymbols)
|
376
|
+
symbs = {}
|
377
|
+
theSymbols.each do |s|
|
378
|
+
new_symbol = build_symbol(aClass, s)
|
379
|
+
symbs[new_symbol.name] = new_symbol
|
380
|
+
end
|
381
|
+
|
382
|
+
symbs
|
383
|
+
end
|
384
|
+
|
385
|
+
# If the argument is already a grammar symbol object then it is
|
386
|
+
# returned as is. Otherwise, the argument is treated as a name
|
387
|
+
# for a new instance of the given class.
|
388
|
+
# @param aClass [Class] The class of grammar symbols to instantiate
|
389
|
+
# @param aSymbolArg [GrmSymbol-like or String]
|
390
|
+
# @return [Array] list of grammar symbols
|
391
|
+
def build_symbol(aClass, aSymbolArg)
|
392
|
+
if aSymbolArg.kind_of?(Syntax::GrmSymbol)
|
393
|
+
aSymbolArg
|
394
|
+
else
|
395
|
+
aClass.new(aSymbolArg)
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
399
|
+
# Retrieve the non-terminal symbol with given name.
|
400
|
+
# If it doesn't exist yet, then it is created on the fly.
|
401
|
+
# @param aSymbolName [String] the name of the grammar symbol to retrieve
|
402
|
+
# @return [NonTerminal]
|
403
|
+
def get_grm_symbol(aSymbolName)
|
404
|
+
unless aSymbolName.end_with?('+') && aSymbolName.length > 1
|
405
|
+
name = aSymbolName
|
406
|
+
else
|
407
|
+
name = aSymbolName.chop
|
408
|
+
case aSymbolName[-1]
|
409
|
+
when '+'
|
410
|
+
name_modified = "#{name}#{suffix_plus}"
|
411
|
+
unless symbols.include? name_modified
|
412
|
+
symbols[name_modified] = NonTerminal.new(name_modified)
|
413
|
+
rule(name_modified => [name_modified, name]).as suffix_plus_more
|
414
|
+
rule(name_modified => name).as suffix_plus_last
|
415
|
+
end
|
416
|
+
name = name_modified
|
417
|
+
else
|
418
|
+
err_msg = "Unknown symbol modifier #{aSymbolName[-1]}"
|
419
|
+
raise NotImplementedError, err_msg
|
420
|
+
end
|
421
|
+
end
|
422
|
+
|
423
|
+
symbols[name] = Syntax::NonTerminal.new(name) unless symbols.include? name
|
424
|
+
|
425
|
+
symbols[name]
|
426
|
+
end
|
427
|
+
|
428
|
+
def sequence_name(aSequenceNode)
|
429
|
+
subnode_names = +''
|
430
|
+
aSequenceNode.subnodes.each do |subn|
|
431
|
+
case subn
|
432
|
+
when SymbolNode
|
433
|
+
subnode_names << "_#{subn.name}"
|
434
|
+
when SequenceNode
|
435
|
+
subnode_names << "_#{sequence_name(subn)}"
|
436
|
+
end
|
437
|
+
suffix = repetition2suffix(subn.repetition)
|
438
|
+
subnode_names << suffix
|
439
|
+
end
|
440
|
+
|
441
|
+
"seq#{subnode_names}"
|
442
|
+
end
|
443
|
+
|
444
|
+
def node_base_name(aNode)
|
445
|
+
if aNode.kind_of?(SymbolNode)
|
446
|
+
aNode.name
|
447
|
+
else
|
448
|
+
sequence_name(aNode)
|
449
|
+
end
|
450
|
+
end
|
451
|
+
|
452
|
+
def node_decorated_name(aNode)
|
453
|
+
base_name = node_base_name(aNode)
|
454
|
+
suffix = repetition2suffix(aNode.repetition)
|
455
|
+
|
456
|
+
"#{base_name}#{suffix}"
|
457
|
+
end
|
458
|
+
|
459
|
+
def serialize_sequence(aSequenceNode)
|
460
|
+
text = +''
|
461
|
+
aSequenceNode.subnodes.each do |sn|
|
462
|
+
text << ' '
|
463
|
+
case sn
|
464
|
+
when SymbolNode
|
465
|
+
text << sn.name
|
466
|
+
when SequenceNode
|
467
|
+
text << sequence_name(sn)
|
468
|
+
end
|
469
|
+
|
470
|
+
suffix = repetition2suffix(sn.repetition)
|
471
|
+
text << suffix
|
472
|
+
end
|
473
|
+
|
474
|
+
text.strip
|
475
|
+
end
|
476
|
+
|
477
|
+
def add_raw_rule(aSymbol, aRHS, aTag, simplified = false, constraints = [])
|
478
|
+
raw_rule = RawRule.new(aSymbol, aRHS, aTag, simplified, constraints)
|
479
|
+
if synthetized.include?(aSymbol)
|
480
|
+
@synthetized[aSymbol] << raw_rule
|
481
|
+
else
|
482
|
+
@synthetized[aSymbol] = [raw_rule]
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
def process_raw_rules
|
487
|
+
until synthetized.empty? do
|
488
|
+
raw_rules = synthetized.delete(synthetized.keys.first)
|
489
|
+
raw_rules.each do |raw|
|
490
|
+
new_prod = nil
|
491
|
+
if raw.simple
|
492
|
+
new_prod = simple_rule(raw.lhs => raw.rhs)
|
493
|
+
else
|
494
|
+
new_prod = rule(raw.lhs => raw.rhs)
|
495
|
+
end
|
496
|
+
new_prod.tag(raw.tag)
|
497
|
+
new_prod.constraints = raw.constraints
|
498
|
+
end
|
499
|
+
end
|
500
|
+
end
|
501
|
+
end # class
|
502
|
+
end # module
|
503
|
+
end # module
|
504
|
+
|
505
|
+
# End of file
|