rley 0.7.08 → 0.8.03
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +29 -5
- data/CHANGELOG.md +28 -4
- data/README.md +4 -5
- data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
- data/examples/NLP/nano_eng/nano_grammar.rb +18 -18
- data/examples/data_formats/JSON/json_ast_builder.rb +9 -18
- data/examples/data_formats/JSON/json_demo.rb +1 -2
- data/examples/data_formats/JSON/json_grammar.rb +11 -11
- data/examples/general/calc_iter1/calc_grammar.rb +5 -4
- data/examples/general/calc_iter2/calc_grammar.rb +9 -9
- data/examples/general/left.rb +1 -1
- data/examples/general/right.rb +1 -1
- data/lib/rley/base/dotted_item.rb +5 -0
- data/lib/rley/base/grm_items_builder.rb +6 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +2 -2
- data/lib/rley/interface.rb +16 -0
- data/lib/rley/notation/all_notation_nodes.rb +4 -0
- data/lib/rley/notation/ast_builder.rb +185 -0
- data/lib/rley/notation/ast_node.rb +44 -0
- data/lib/rley/notation/ast_visitor.rb +115 -0
- data/lib/rley/notation/grammar.rb +49 -0
- data/lib/rley/notation/grammar_builder.rb +505 -0
- data/lib/rley/notation/grouping_node.rb +23 -0
- data/lib/rley/notation/parser.rb +56 -0
- data/lib/rley/notation/sequence_node.rb +35 -0
- data/lib/rley/notation/symbol_node.rb +29 -0
- data/lib/rley/notation/tokenizer.rb +180 -0
- data/lib/rley/parse_rep/ast_base_builder.rb +44 -0
- data/lib/rley/parser/gfg_chart.rb +101 -6
- data/lib/rley/parser/gfg_earley_parser.rb +1 -1
- data/lib/rley/parser/gfg_parsing.rb +5 -3
- data/lib/rley/parser/parse_entry_set.rb +1 -1
- data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +53 -15
- data/lib/rley/syntax/grm_symbol.rb +1 -1
- data/lib/rley/syntax/match_closest.rb +43 -0
- data/lib/rley/syntax/production.rb +6 -0
- data/lib/rley.rb +1 -1
- data/spec/rley/engine_spec.rb +6 -6
- data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
- data/spec/rley/notation/grammar_builder_spec.rb +302 -0
- data/spec/rley/notation/parser_spec.rb +183 -0
- data/spec/rley/notation/tokenizer_spec.rb +364 -0
- data/spec/rley/parse_rep/ast_builder_spec.rb +0 -1
- data/spec/rley/parse_rep/groucho_spec.rb +1 -1
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +1 -1
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +2 -2
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +1 -1
- data/spec/rley/parser/dangling_else_spec.rb +447 -0
- data/spec/rley/parser/gfg_earley_parser_spec.rb +118 -10
- data/spec/rley/parser/gfg_parsing_spec.rb +2 -1
- data/spec/rley/parser/parse_walker_factory_spec.rb +2 -2
- data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
- data/spec/rley/support/grammar_abc_helper.rb +2 -2
- data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
- data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
- data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
- data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
- data/spec/rley/support/grammar_l0_helper.rb +2 -2
- data/spec/rley/support/grammar_pb_helper.rb +2 -2
- data/spec/rley/support/grammar_sppf_helper.rb +2 -2
- data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +29 -11
- data/spec/rley/syntax/match_closest_spec.rb +46 -0
- data/spec/rley/syntax/production_spec.rb +4 -0
- metadata +29 -14
- data/lib/rley/parser/parse_state.rb +0 -78
- data/lib/rley/parser/parse_state_tracker.rb +0 -59
- data/lib/rley/parser/state_set.rb +0 -100
- data/spec/rley/parser/parse_state_spec.rb +0 -125
- data/spec/rley/parser/parse_tracer_spec.rb +0 -200
- data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -0,0 +1,49 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative '../syntax/base_grammar_builder'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module Notation
|
7
|
+
########################################
|
8
|
+
# Syntax for right-hand side of production rules
|
9
|
+
builder = Rley::Syntax::BaseGrammarBuilder.new do
|
10
|
+
add_terminals('LEFT_PAREN', 'RIGHT_PAREN') # For '(', ')' grouping delimiters
|
11
|
+
add_terminals('LEFT_BRACE', 'RIGHT_BRACE') # For '{', '}' annotation delimiters
|
12
|
+
add_terminals('QUESTION_MARK', 'STAR', 'PLUS') # For postfix quantifiers
|
13
|
+
add_terminals('COMMA', 'ELLIPSIS')
|
14
|
+
|
15
|
+
add_terminals('STR_LIT') # For string literal values
|
16
|
+
add_terminals('INT_LIT') # For integer literal values
|
17
|
+
add_terminals('SYMBOL') # Grammar symbols
|
18
|
+
add_terminals('KEY') # Key literal
|
19
|
+
|
20
|
+
rule('notation' => 'rhs')
|
21
|
+
rule('rhs' => 'member_seq').tag 'sequence'
|
22
|
+
rule('rhs' => [])
|
23
|
+
rule('member_seq' => 'member_seq member').tag 'more_members'
|
24
|
+
rule('member_seq' => 'member').tag 'one_member'
|
25
|
+
rule('member' => 'strait_member')
|
26
|
+
rule('member' => 'quantified_member')
|
27
|
+
rule('strait_member' => 'base_member')
|
28
|
+
rule('strait_member' => 'base_member annotation').tag 'annotated_member'
|
29
|
+
rule('base_member' => 'SYMBOL').tag 'symbol'
|
30
|
+
rule('base_member' => 'LEFT_PAREN member_seq RIGHT_PAREN').tag 'grouping'
|
31
|
+
rule('quantified_member' => 'base_member quantifier').tag 'quantified_member'
|
32
|
+
rule('quantifier' => 'QUESTION_MARK').tag 'question_mark'
|
33
|
+
rule('quantifier' => 'STAR').tag 'star'
|
34
|
+
rule('quantifier' => 'PLUS').tag 'plus'
|
35
|
+
rule('annotation' => 'LEFT_BRACE mapping RIGHT_BRACE').tag 'annotation'
|
36
|
+
rule('mapping' => 'mapping COMMA key_value').tag 'more_pairs'
|
37
|
+
rule('mapping' => 'key_value').tag 'one_pair'
|
38
|
+
rule('key_value' => 'KEY value').tag 'raw_pair'
|
39
|
+
rule('value' => 'STR_LIT')
|
40
|
+
rule('value' => 'INT_LIT')
|
41
|
+
rule('value' => 'range')
|
42
|
+
rule('range' => 'INT_LIT ELLIPSIS INT_LIT').tag 'bound_range'
|
43
|
+
rule('range' => 'INT_LIT ELLIPSIS')
|
44
|
+
end
|
45
|
+
|
46
|
+
# And now build the Rley Grammar Notation (RGN) grammar...
|
47
|
+
RGNGrammar = builder.grammar
|
48
|
+
end # module
|
49
|
+
end # module
|
@@ -0,0 +1,505 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
|
5
|
+
require_relative 'parser'
|
6
|
+
require_relative 'ast_visitor'
|
7
|
+
require_relative '../syntax/match_closest'
|
8
|
+
|
9
|
+
module Rley # This module is used as a namespace
|
10
|
+
module Notation # This module is used as a namespace
|
11
|
+
# Structure used for production rules that are implicitly generated by Rley
|
12
|
+
RawRule = Struct.new(:lhs, :rhs, :tag, :simple, :constraints)
|
13
|
+
|
14
|
+
# Builder GoF pattern. Builder builds a complex object
|
15
|
+
# (say, a grammar) from simpler objects (terminals and productions)
|
16
|
+
# and using a step by step approach.
|
17
|
+
class GrammarBuilder
|
18
|
+
# @return [Hash{String, GrmSymbol}] The mapping of grammar symbol names
|
19
|
+
# to the matching grammar symbol object.
|
20
|
+
attr_reader(:symbols)
|
21
|
+
|
22
|
+
# @return [Notation::Parser] Parser for the right-side of productions
|
23
|
+
attr_reader(:parser)
|
24
|
+
|
25
|
+
# @return [Hash{ASTVisitor, Array}]
|
26
|
+
attr_reader(:visitor2rhs)
|
27
|
+
|
28
|
+
# @return [Array<Production>] The list of production rules for
|
29
|
+
# the grammar to build.
|
30
|
+
attr_reader(:productions)
|
31
|
+
|
32
|
+
# @return [Hash{String, String}] The synthesized raw productions
|
33
|
+
attr_reader(:synthetized)
|
34
|
+
|
35
|
+
# Creates a new grammar builder.
|
36
|
+
# @param aBlock [Proc] code block used to build the grammar.
|
37
|
+
# @example Building a tiny English grammar
|
38
|
+
# builder = Rley::Notation::GrammarBuilder.new do
|
39
|
+
# add_terminals('n', 'v', 'adj', 'det')
|
40
|
+
# rule 'S' => 'NP VP'
|
41
|
+
# rule 'VP' => 'v NP'
|
42
|
+
# rule 'NP' => 'det n'
|
43
|
+
# rule 'NP' => 'adj NP'
|
44
|
+
# end
|
45
|
+
# tiny_eng = builder.grammar
|
46
|
+
def initialize(&aBlock)
|
47
|
+
@symbols = {}
|
48
|
+
@productions = []
|
49
|
+
@parser = Notation::Parser.new
|
50
|
+
@visitor2rhs = {}
|
51
|
+
@synthetized = {}
|
52
|
+
|
53
|
+
if block_given?
|
54
|
+
instance_exec(&aBlock)
|
55
|
+
grammar_complete!
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# Retrieve a grammar symbol from its name.
|
60
|
+
# Raise an exception if not found.
|
61
|
+
# @param aSymbolName [String] the name of a grammar symbol.
|
62
|
+
# @return [GrmSymbol] the retrieved symbol object.
|
63
|
+
def [](aSymbolName)
|
64
|
+
symbols[aSymbolName]
|
65
|
+
end
|
66
|
+
|
67
|
+
# Add the given terminal symbols to the grammar of the language
|
68
|
+
# @param terminalSymbols [String or Terminal] 1..* terminal symbols.
|
69
|
+
# @return [void]
|
70
|
+
def add_terminals(*terminalSymbols)
|
71
|
+
new_symbs = build_symbols(Syntax::Terminal, terminalSymbols)
|
72
|
+
symbols.merge!(new_symbs)
|
73
|
+
end
|
74
|
+
|
75
|
+
# Add the given marker symbol to the grammar of the language
|
76
|
+
# @param aMarkerSymbol [String] A mazker symbol
|
77
|
+
# @return [void]
|
78
|
+
def add_marker(aMarkerSymbol)
|
79
|
+
new_symb = build_symbol(Syntax::Marker, aMarkerSymbol)
|
80
|
+
symbols[new_symb.name] = new_symb
|
81
|
+
end
|
82
|
+
|
83
|
+
# Add a production rule in the grammar given one
|
84
|
+
# key-value pair of the form: String => String.
|
85
|
+
# Where the key is the name of the non-terminal appearing in the
|
86
|
+
# left side of the rule.
|
87
|
+
# The value is a sequence of grammar symbol names (optionally quantified).
|
88
|
+
# The rule is created and inserted in the grammar.
|
89
|
+
# @example Equivalent call syntax
|
90
|
+
# builder.add_production('A' => 'a A c)
|
91
|
+
# builder.rule('A' => 'a A c]) # 'rule' is a synonym
|
92
|
+
# @param aProductionRepr [Hash{String, String}]
|
93
|
+
# A Hash-based representation of a production.
|
94
|
+
# @return [Production] The created Production instance
|
95
|
+
def add_production(aProductionRepr)
|
96
|
+
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
97
|
+
lhs = get_grm_symbol(lhs_name)
|
98
|
+
rhs = rhs_repr.kind_of?(Array) && rhs_repr.empty? ? '' : rhs_repr.strip
|
99
|
+
constraints = []
|
100
|
+
if rhs.empty?
|
101
|
+
rhs_members = []
|
102
|
+
else
|
103
|
+
ast = parser.parse(rhs)
|
104
|
+
visitor = ASTVisitor.new(ast)
|
105
|
+
visitor2rhs[visitor] = []
|
106
|
+
visitor.subscribe(self)
|
107
|
+
visitor.start
|
108
|
+
root_node = ast.root
|
109
|
+
constraints = root_node.constraints unless root_node.kind_of?(SymbolNode)
|
110
|
+
|
111
|
+
rhs_members = visitor2rhs.delete(visitor)
|
112
|
+
end
|
113
|
+
new_prod = Syntax::Production.new(lhs, rhs_members)
|
114
|
+
new_prod.constraints = constraints
|
115
|
+
productions << new_prod
|
116
|
+
end
|
117
|
+
|
118
|
+
productions.last
|
119
|
+
end
|
120
|
+
|
121
|
+
# Given the grammar symbols and productions added to the builder,
|
122
|
+
# build the resulting grammar (if not yet done).
|
123
|
+
# @return [Grammar] the created grammar object.
|
124
|
+
def grammar
|
125
|
+
unless @grammar
|
126
|
+
raise StandardError, 'No symbol found for grammar' if symbols.empty?
|
127
|
+
if productions.empty?
|
128
|
+
raise StandardError, 'No production found for grammar'
|
129
|
+
end
|
130
|
+
|
131
|
+
# Check that each terminal appears at least in a rhs of a production
|
132
|
+
all_terminals = symbols.values.select do |a_symb|
|
133
|
+
a_symb.kind_of?(Syntax::Terminal)
|
134
|
+
end
|
135
|
+
in_use = Set.new
|
136
|
+
productions.each do |prod|
|
137
|
+
prod.rhs.members.each do |symb|
|
138
|
+
in_use << symb if symb.kind_of?(Syntax::Terminal)
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
unused = all_terminals.reject { |a_term| in_use.include?(a_term) }
|
143
|
+
unless unused.empty?
|
144
|
+
suffix = "#{unused.map(&:name).join(', ')}."
|
145
|
+
raise StandardError, "Useless terminal symbol(s): #{suffix}"
|
146
|
+
end
|
147
|
+
|
148
|
+
@grammar = Syntax::Grammar.new(productions.dup)
|
149
|
+
end
|
150
|
+
|
151
|
+
@grammar
|
152
|
+
end
|
153
|
+
|
154
|
+
alias rule add_production
|
155
|
+
|
156
|
+
# When a symbol, say symb, in a rhs is followed by a '*' modifier,
|
157
|
+
# then a rule will be generated with a lhs named symb * suffix_plus
|
158
|
+
# implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
|
159
|
+
# implicitly called: rule('declaration_star' => '').tag suffix_star_last
|
160
|
+
def suffix_qmark
|
161
|
+
'_qmark'
|
162
|
+
end
|
163
|
+
|
164
|
+
def suffix_qmark_one
|
165
|
+
'_qmark_one'
|
166
|
+
end
|
167
|
+
|
168
|
+
def suffix_qmark_none
|
169
|
+
'_qmark_none'
|
170
|
+
end
|
171
|
+
|
172
|
+
# When a symbol, say symb, in a rhs is followed by a '*' modifier,
|
173
|
+
# then a rule will be generated with a lhs named symb * suffix_plus
|
174
|
+
# implicitly called: rule('declaration_star' => 'declaration_star declaration').tag suffix_star_more
|
175
|
+
# implicitly called: rule('declaration_star' => '').tag suffix_star_last
|
176
|
+
def suffix_star
|
177
|
+
'_star'
|
178
|
+
end
|
179
|
+
|
180
|
+
def suffix_star_more
|
181
|
+
'_star_more'
|
182
|
+
end
|
183
|
+
|
184
|
+
def suffix_star_none
|
185
|
+
'_star_none'
|
186
|
+
end
|
187
|
+
|
188
|
+
# When a symbol, say symb, in a rhs is followed by a '+' modifier,
|
189
|
+
# then a rule will be generated with a lhs named symb + suffix_plus
|
190
|
+
# implicitly called: rule('digit_plus' => 'digit_plus digit').tag suffix_plus_more
|
191
|
+
# implicitly called: rule('digit_plus' => 'digit').tag suffix_plus_last
|
192
|
+
def suffix_plus
|
193
|
+
'_plus'
|
194
|
+
end
|
195
|
+
|
196
|
+
def suffix_plus_more
|
197
|
+
'_plus_more'
|
198
|
+
end
|
199
|
+
|
200
|
+
def suffix_plus_one
|
201
|
+
'_plus_one'
|
202
|
+
end
|
203
|
+
|
204
|
+
def repetition2suffix(aRepetition)
|
205
|
+
mapping = {
|
206
|
+
zero_or_one: suffix_qmark,
|
207
|
+
zero_or_more: suffix_star,
|
208
|
+
exactly_one: '',
|
209
|
+
one_or_more: suffix_plus
|
210
|
+
}
|
211
|
+
|
212
|
+
mapping[aRepetition]
|
213
|
+
end
|
214
|
+
|
215
|
+
def modifier2suffix(aModifier)
|
216
|
+
mapping = {
|
217
|
+
'?' => suffix_qmark,
|
218
|
+
'*' => suffix_star,
|
219
|
+
'+' => suffix_plus
|
220
|
+
}
|
221
|
+
|
222
|
+
mapping[aModifier]
|
223
|
+
end
|
224
|
+
|
225
|
+
##################################
|
226
|
+
# RGN's AST visit notification events
|
227
|
+
# ################################
|
228
|
+
def after_symbol_node(aSymbolNode, aVisitor)
|
229
|
+
symb_name = aSymbolNode.name
|
230
|
+
|
231
|
+
case aSymbolNode.repetition
|
232
|
+
when :zero_or_one
|
233
|
+
# implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
|
234
|
+
# implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
|
235
|
+
name_modified = "#{symb_name}#{suffix_qmark}"
|
236
|
+
unless symbols.include? name_modified
|
237
|
+
add_nonterminal(name_modified)
|
238
|
+
add_raw_rule(name_modified, symb_name, suffix_qmark_one)
|
239
|
+
add_raw_rule(name_modified, '', suffix_qmark_none)
|
240
|
+
end
|
241
|
+
symb_name = name_modified
|
242
|
+
|
243
|
+
when :zero_or_more
|
244
|
+
# implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
|
245
|
+
# implicitly called: rule('symb_name_star' => '').tag suffix_star_none
|
246
|
+
name_modified = "#{symb_name}#{suffix_star}"
|
247
|
+
unless symbols.include? name_modified
|
248
|
+
add_nonterminal(name_modified)
|
249
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
|
250
|
+
add_raw_rule(name_modified, [], suffix_star_none)
|
251
|
+
end
|
252
|
+
symb_name = name_modified
|
253
|
+
|
254
|
+
when :exactly_one
|
255
|
+
# Do nothing
|
256
|
+
|
257
|
+
when :one_or_more
|
258
|
+
name_modified = "#{symb_name}#{suffix_plus}"
|
259
|
+
unless symbols.include? name_modified
|
260
|
+
add_nonterminal(name_modified)
|
261
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
|
262
|
+
add_raw_rule(name_modified, symb_name, suffix_plus_one)
|
263
|
+
end
|
264
|
+
symb_name = name_modified
|
265
|
+
else
|
266
|
+
raise StandardError, 'Unhandled multiplicity'
|
267
|
+
end
|
268
|
+
|
269
|
+
symb = get_grm_symbol(symb_name)
|
270
|
+
visitor2rhs[aVisitor] << symb
|
271
|
+
end
|
272
|
+
|
273
|
+
def after_sequence_node(aSequenceNode, _visitor)
|
274
|
+
aSequenceNode.subnodes.each_with_index do |sn, i|
|
275
|
+
next if sn.annotation.empty?
|
276
|
+
|
277
|
+
matching = sn.annotation['match_closest']
|
278
|
+
aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
|
279
|
+
end
|
280
|
+
end
|
281
|
+
|
282
|
+
def after_grouping_node(aGroupingNode, aVisitor)
|
283
|
+
after_sequence_node(aGroupingNode, aVisitor)
|
284
|
+
symb_name = sequence_name(aGroupingNode)
|
285
|
+
|
286
|
+
unless symbols.include?(symb_name) || aGroupingNode.repetition == :exactly_one
|
287
|
+
add_nonterminal(symb_name)
|
288
|
+
rhs = serialize_sequence(aGroupingNode)
|
289
|
+
add_raw_rule(symb_name, rhs, 'return_children', true, aGroupingNode.constraints)
|
290
|
+
end
|
291
|
+
name_modified = "#{symb_name}#{repetition2suffix(aGroupingNode.repetition)}"
|
292
|
+
|
293
|
+
case aGroupingNode.repetition
|
294
|
+
when :zero_or_one
|
295
|
+
# implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
|
296
|
+
# implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
|
297
|
+
unless symbols.include? name_modified
|
298
|
+
add_nonterminal(name_modified)
|
299
|
+
add_raw_rule(name_modified, symb_name, suffix_qmark_one, true)
|
300
|
+
add_raw_rule(name_modified, [], suffix_qmark_none, true)
|
301
|
+
end
|
302
|
+
|
303
|
+
when :zero_or_more
|
304
|
+
# implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
|
305
|
+
# implicitly called: rule('symb_name_star' => '').tag suffix_star_none
|
306
|
+
unless symbols.include? name_modified
|
307
|
+
add_nonterminal(name_modified)
|
308
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
|
309
|
+
add_raw_rule(name_modified, '', suffix_star_none)
|
310
|
+
end
|
311
|
+
|
312
|
+
when :exactly_one
|
313
|
+
# Do nothing
|
314
|
+
|
315
|
+
when :one_or_more
|
316
|
+
unless symbols.include? name_modified
|
317
|
+
add_nonterminal(name_modified)
|
318
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
|
319
|
+
add_raw_rule(name_modified, symb_name, suffix_plus_one)
|
320
|
+
end
|
321
|
+
else
|
322
|
+
raise StandardError, 'Unhandled multiplicity'
|
323
|
+
end
|
324
|
+
|
325
|
+
unless aGroupingNode.repetition == :exactly_one
|
326
|
+
symb = get_grm_symbol(name_modified)
|
327
|
+
visitor2rhs[aVisitor] << symb
|
328
|
+
end
|
329
|
+
end
|
330
|
+
|
331
|
+
# A notification to the builderobject that the programmer
|
332
|
+
# has completed the entry of terminals and production rules
|
333
|
+
def grammar_complete!
|
334
|
+
process_raw_rules
|
335
|
+
end
|
336
|
+
|
337
|
+
private
|
338
|
+
|
339
|
+
def add_nonterminal(aName)
|
340
|
+
symbols[aName] = Syntax::NonTerminal.new(aName)
|
341
|
+
end
|
342
|
+
|
343
|
+
def simple_rule(aProductionRepr)
|
344
|
+
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
345
|
+
lhs = get_grm_symbol(lhs_name)
|
346
|
+
|
347
|
+
if rhs_repr.kind_of?(String)
|
348
|
+
rhs = rhs_repr.strip.scan(/\S+/)
|
349
|
+
else
|
350
|
+
rhs = rhs_repr
|
351
|
+
end
|
352
|
+
|
353
|
+
members = rhs.map do |name|
|
354
|
+
if name.end_with?('?', '*', '+')
|
355
|
+
modifier = name[-1]
|
356
|
+
suffix = modifier2suffix(modifier)
|
357
|
+
get_grm_symbol("#{name.chop}#{suffix}")
|
358
|
+
else
|
359
|
+
get_grm_symbol(name)
|
360
|
+
end
|
361
|
+
end
|
362
|
+
new_prod = Syntax::Production.new(lhs, members)
|
363
|
+
productions << new_prod
|
364
|
+
end
|
365
|
+
|
366
|
+
productions.last
|
367
|
+
end
|
368
|
+
|
369
|
+
# Add the given grammar symbols.
|
370
|
+
# @param aClass [Class] The class of grammar symbols to instantiate.
|
371
|
+
# @param theSymbols [Array] array of elements are treated as follows:
|
372
|
+
# if the element is already a grammar symbol, then it added as is,
|
373
|
+
# otherwise it is considered as the name of a grammar symbol
|
374
|
+
# of the specified class to build.
|
375
|
+
def build_symbols(aClass, theSymbols)
|
376
|
+
symbs = {}
|
377
|
+
theSymbols.each do |s|
|
378
|
+
new_symbol = build_symbol(aClass, s)
|
379
|
+
symbs[new_symbol.name] = new_symbol
|
380
|
+
end
|
381
|
+
|
382
|
+
symbs
|
383
|
+
end
|
384
|
+
|
385
|
+
# If the argument is already a grammar symbol object then it is
|
386
|
+
# returned as is. Otherwise, the argument is treated as a name
|
387
|
+
# for a new instance of the given class.
|
388
|
+
# @param aClass [Class] The class of grammar symbols to instantiate
|
389
|
+
# @param aSymbolArg [GrmSymbol-like or String]
|
390
|
+
# @return [Array] list of grammar symbols
|
391
|
+
def build_symbol(aClass, aSymbolArg)
|
392
|
+
if aSymbolArg.kind_of?(Syntax::GrmSymbol)
|
393
|
+
aSymbolArg
|
394
|
+
else
|
395
|
+
aClass.new(aSymbolArg)
|
396
|
+
end
|
397
|
+
end
|
398
|
+
|
399
|
+
# Retrieve the non-terminal symbol with given name.
|
400
|
+
# If it doesn't exist yet, then it is created on the fly.
|
401
|
+
# @param aSymbolName [String] the name of the grammar symbol to retrieve
|
402
|
+
# @return [NonTerminal]
|
403
|
+
def get_grm_symbol(aSymbolName)
|
404
|
+
unless aSymbolName.end_with?('+') && aSymbolName.length > 1
|
405
|
+
name = aSymbolName
|
406
|
+
else
|
407
|
+
name = aSymbolName.chop
|
408
|
+
case aSymbolName[-1]
|
409
|
+
when '+'
|
410
|
+
name_modified = "#{name}#{suffix_plus}"
|
411
|
+
unless symbols.include? name_modified
|
412
|
+
symbols[name_modified] = NonTerminal.new(name_modified)
|
413
|
+
rule(name_modified => [name_modified, name]).as suffix_plus_more
|
414
|
+
rule(name_modified => name).as suffix_plus_last
|
415
|
+
end
|
416
|
+
name = name_modified
|
417
|
+
else
|
418
|
+
err_msg = "Unknown symbol modifier #{aSymbolName[-1]}"
|
419
|
+
raise NotImplementedError, err_msg
|
420
|
+
end
|
421
|
+
end
|
422
|
+
|
423
|
+
symbols[name] = Syntax::NonTerminal.new(name) unless symbols.include? name
|
424
|
+
|
425
|
+
symbols[name]
|
426
|
+
end
|
427
|
+
|
428
|
+
def sequence_name(aSequenceNode)
|
429
|
+
subnode_names = +''
|
430
|
+
aSequenceNode.subnodes.each do |subn|
|
431
|
+
case subn
|
432
|
+
when SymbolNode
|
433
|
+
subnode_names << "_#{subn.name}"
|
434
|
+
when SequenceNode
|
435
|
+
subnode_names << "_#{sequence_name(subn)}"
|
436
|
+
end
|
437
|
+
suffix = repetition2suffix(subn.repetition)
|
438
|
+
subnode_names << suffix
|
439
|
+
end
|
440
|
+
|
441
|
+
"seq#{subnode_names}"
|
442
|
+
end
|
443
|
+
|
444
|
+
def node_base_name(aNode)
|
445
|
+
if aNode.kind_of?(SymbolNode)
|
446
|
+
aNode.name
|
447
|
+
else
|
448
|
+
sequence_name(aNode)
|
449
|
+
end
|
450
|
+
end
|
451
|
+
|
452
|
+
def node_decorated_name(aNode)
|
453
|
+
base_name = node_base_name(aNode)
|
454
|
+
suffix = repetition2suffix(aNode.repetition)
|
455
|
+
|
456
|
+
"#{base_name}#{suffix}"
|
457
|
+
end
|
458
|
+
|
459
|
+
def serialize_sequence(aSequenceNode)
|
460
|
+
text = +''
|
461
|
+
aSequenceNode.subnodes.each do |sn|
|
462
|
+
text << ' '
|
463
|
+
case sn
|
464
|
+
when SymbolNode
|
465
|
+
text << sn.name
|
466
|
+
when SequenceNode
|
467
|
+
text << sequence_name(sn)
|
468
|
+
end
|
469
|
+
|
470
|
+
suffix = repetition2suffix(sn.repetition)
|
471
|
+
text << suffix
|
472
|
+
end
|
473
|
+
|
474
|
+
text.strip
|
475
|
+
end
|
476
|
+
|
477
|
+
def add_raw_rule(aSymbol, aRHS, aTag, simplified = false, constraints = [])
|
478
|
+
raw_rule = RawRule.new(aSymbol, aRHS, aTag, simplified, constraints)
|
479
|
+
if synthetized.include?(aSymbol)
|
480
|
+
@synthetized[aSymbol] << raw_rule
|
481
|
+
else
|
482
|
+
@synthetized[aSymbol] = [raw_rule]
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
def process_raw_rules
|
487
|
+
until synthetized.empty? do
|
488
|
+
raw_rules = synthetized.delete(synthetized.keys.first)
|
489
|
+
raw_rules.each do |raw|
|
490
|
+
new_prod = nil
|
491
|
+
if raw.simple
|
492
|
+
new_prod = simple_rule(raw.lhs => raw.rhs)
|
493
|
+
else
|
494
|
+
new_prod = rule(raw.lhs => raw.rhs)
|
495
|
+
end
|
496
|
+
new_prod.tag(raw.tag)
|
497
|
+
new_prod.constraints = raw.constraints
|
498
|
+
end
|
499
|
+
end
|
500
|
+
end
|
501
|
+
end # class
|
502
|
+
end # module
|
503
|
+
end # module
|
504
|
+
|
505
|
+
# End of file
|