yadriggy 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,524 @@
1
+ # Copyright (C) 2017- Shigeru Chiba. All rights reserved.
2
+
3
+ require 'yadriggy/ast'
4
+ require 'yadriggy/ast_location'
5
+
6
+ module Yadriggy
7
+ # Defines syntax and returns a {Syntax} object.
8
+ # {Yadriggy#define_syntax} is not available in a method body.
9
+ # @param [Proc] block the syntax definition.
10
+ # @return [Syntax] the defined syntax.
11
+ def self.define_syntax(&block)
12
+ ast = reify(block)
13
+ if Syntax.check_syntax(ast.tree)
14
+ Syntax.new(ast)
15
+ else
16
+ raise Syntax.last_error
17
+ end
18
+ end
19
+
20
+ class ASTnode
21
+ # also defined in ast.rb
22
+
23
+ # The user type (or non-terminal symbol) corresponding
24
+ # to this node.
25
+ # @return [Symbol|nil] the user type.
26
+ attr_accessor :usertype
27
+ end
28
+
29
+ # An exception.
30
+ class SyntaxError < RuntimeError
31
+ end
32
+
33
+ # Syntax checker.
34
+ #
35
+ class Syntax
36
+ # @return [Hash] the grammar rules.
37
+ attr_reader :hash
38
+
39
+ # debugging mode is on if true.
40
+ attr_writer :debug
41
+
42
+ @syntax = nil # initialized later
43
+
44
+ # Checks the syntax of the grammar rules.
45
+ #
46
+ # @param [ASTnode] block the grammar rules.
47
+ # @return [Boolean] false if a syntax error is found in the given
48
+ # grammar rules.
49
+ def self.check_syntax(block)
50
+ @syntax.nil? || @syntax.check(block)
51
+ end
52
+
53
+ # @return [String] the error message for the grammar rule
54
+ # last checked.
55
+ def self.last_error
56
+ @syntax.error
57
+ end
58
+
59
+ # @param [ASTree] ast the grammar rules.
60
+ def initialize(ast)
61
+ @error_loc = nil
62
+ @error_msg = nil
63
+ @debug = false
64
+ @hash = {}
65
+ update_hash(ast.tree.body)
66
+ end
67
+
68
+ # @private
69
+ # @param [Body] body
70
+ # @return [void]
71
+ def update_hash(body)
72
+ return if body.nil?
73
+
74
+ if body.is_a?(Binary)
75
+ key = to_hash_key(body.left)
76
+ @hash[key] = body.right
77
+ else
78
+ body.expressions.each do |e|
79
+ @hash[to_hash_key(e.left)] = e.right
80
+ end
81
+ end
82
+ end
83
+
84
+ # Adds rules.
85
+ # @param [Syntax] syntax the rules of this given syntax are added.
86
+ # This parameter is optional.
87
+ # @yield the rules in the block are added.
88
+ # The block is optional.
89
+ def add_rules(syntax=nil, &block)
90
+ if syntax.is_a?(Syntax)
91
+ syntax.hash.each do |k,v|
92
+ @hash[k] = v
93
+ end
94
+ elsif block.is_a?(Proc)
95
+ ast = Yadriggy::reify(block)
96
+ if Syntax.check_syntax(ast.tree)
97
+ update_hash(ast.tree.body)
98
+ else
99
+ raise Syntax.last_error
100
+ end
101
+ end
102
+ end
103
+
104
+ # Checks the syntax of the given AST and raise an error if
105
+ # a syntax error is found.
106
+ #
107
+ # @param [ASTree|ASTnode] astree the AST returned by {Yadriggy::reify}.
108
+ # @return [void]
109
+ # @raise [SyntaxError] when a syntax error is found.
110
+ def check_error(astree)
111
+ raise_error unless check(astree.is_a?(ASTree) ? astree.tree : astree)
112
+ end
113
+
114
+ # Checks the syntax of the given AST.
115
+ #
116
+ # @param [ASTnode] tree the AST.
117
+ # @return [Boolean] true when the given AST is syntactically correct.
118
+ def check(tree)
119
+ error_cleared!
120
+ expr = find_hash_entry(tree.class)
121
+ if expr
122
+ check_expr(expr, tree, false) || error_found!(tree, tree)
123
+ else
124
+ error_found!(tree, tree, "no rule for #{tree.class}")
125
+ end
126
+ end
127
+
128
+ # Checks whether the given AST matches the grammar rule for
129
+ # the given user type.
130
+ #
131
+ # @param [String|Symbol] user_type the name of the user type.
132
+ # @param [ASTnode] tree the AST.
133
+ # @return [Boolean] true if the given AST matches.
134
+ def check_usertype(user_type, tree)
135
+ error_cleared!
136
+ check_rule_usertype(user_type.to_s, tree, false)
137
+ end
138
+
139
+ # Returns an error message.
140
+ # @return [String] an error message when the last invocation of {#check}
141
+ # returns false.
142
+ def error
143
+ if @error_loc.nil? && @error_msg.nil?
144
+ ''
145
+ else
146
+ "#{@error_loc} DSL syntax error#{@error_msg}"
147
+ end
148
+ end
149
+
150
+ # Raises a syntax-error exception.
151
+ # @raise [SyntaxError] always.
152
+ def raise_error
153
+ raise SyntaxError.new(error)
154
+ end
155
+
156
+ private
157
+
158
+ def to_node_class(name)
159
+ Yadriggy::const_get(name)
160
+ end
161
+
162
+ def to_hash_key(left)
163
+ if left.is_a?(Const)
164
+ to_node_class(left.name).name
165
+ else
166
+ left.name
167
+ end
168
+ end
169
+
170
+ def error_found!(ast1, ast2, msg=nil)
171
+ if @error_loc.nil?
172
+ @error_loc = if ast1.is_a?(ASTnode)
173
+ ast1.source_location_string
174
+ elsif ast2.is_a?(ASTnode)
175
+ ast2.source_location_string
176
+ else
177
+ ''
178
+ end
179
+ end
180
+ @error_msg = ", #{msg}#{@error_msg}" unless msg.nil?
181
+ false
182
+ end
183
+
184
+ def error_cleared!
185
+ @error_loc = nil
186
+ @error_msg = nil
187
+ true
188
+ end
189
+
190
+ # Returns true if no rule for the given (non-user) type is found
191
+ # or if ast matches the rule.
192
+ #
193
+ def check_rule(node_class, ast, in_hash)
194
+ if in_hash
195
+ expr = find_hash_entry(ast.class)
196
+ else
197
+ expr = find_hash_entry(node_class)
198
+ end
199
+ result = (expr.nil? || check_expr(expr, ast, false) || error_found!(ast, ast))
200
+ if @debug && !result || @debug == 1
201
+ warn "check rules for #{node_class}, #{result}"
202
+ @debug = true
203
+ end
204
+ result
205
+ end
206
+
207
+ def find_hash_entry(node_class)
208
+ expr = @hash[node_class.name]
209
+ if expr.nil? && !node_class.superclass.nil?
210
+ find_hash_entry(node_class.superclass)
211
+ else
212
+ expr
213
+ end
214
+ end
215
+
216
+ def check_rule_usertype(node_type_name, ast, in_hash)
217
+ expr = @hash[node_type_name]
218
+ expr && tag_and_check_expr(node_type_name, expr, ast, in_hash) || error_found!(ast, ast)
219
+ end
220
+
221
+ # Adds a user-type tag to the given AST.
222
+ def tag_and_check_expr(node_type_name, expr, ast, in_hash)
223
+ return if ast.is_a?(Array)
224
+ unless ast.nil?
225
+ old_usertype = ast.usertype
226
+ ast.usertype = node_type_name.to_sym
227
+ end
228
+
229
+ success = check_expr(expr, ast, in_hash)
230
+ unless success || ast.nil?
231
+ ast.usertype = old_usertype
232
+ end
233
+ success
234
+ end
235
+
236
+ def check_expr(expr, ast, in_hash)
237
+ if expr.is_a?(Binary) && expr.op == :|
238
+ check_expr(expr.left, ast, in_hash) ||
239
+ check_add_expr(expr.right, ast, in_hash) && error_cleared!
240
+ else
241
+ check_add_expr(expr, ast, in_hash)
242
+ end
243
+ end
244
+
245
+ def check_add_expr(expr, ast, in_hash)
246
+ if expr.is_a?(Binary) && expr.op == :+
247
+ check_add_expr(expr.left, ast, in_hash) &&
248
+ check_operand(expr.right, ast, in_hash)
249
+ else
250
+ check_operand(expr, ast, in_hash)
251
+ end
252
+ end
253
+
254
+ def check_operand(operand, ast, in_hash)
255
+ if operand.is_a?(Const)
256
+ clazz = to_node_class(operand.name)
257
+ ast.is_a?(clazz) && check_rule(clazz, ast, in_hash)
258
+ elsif operand.is_a?(Reserved)
259
+ ast == nil || ast == []
260
+ elsif operand.is_a?(IdentifierOrCall)
261
+ check_rule_usertype(operand.name, ast, in_hash)
262
+ elsif operand.is_a?(HashLiteral)
263
+ check_hash(operand, ast)
264
+ else
265
+ false
266
+ end
267
+ end
268
+
269
+ def check_hash(hash, ast)
270
+ !ast.nil? && hash.pairs.all? do |p|
271
+ field = p[0].name
272
+ raise_hash_error(field, ast) unless ast.class.method_defined?(field)
273
+ if check_or_constraint(p[1], ast.send(field))
274
+ true
275
+ else
276
+ if @debug
277
+ warn " failed to check \##{field}"
278
+ @debug = 1
279
+ end
280
+ error_found!(ast.send(field), ast,
281
+ "#{field} in #{ast.usertype.nil? ? ast.class : ast.usertype}?")
282
+ end
283
+ end
284
+ end
285
+
286
+ def raise_hash_error(field, ast)
287
+ raise SyntaxError.new("unknown method `#{field}' in #{ast.class} tested during syntax checking (wrong grammar?)")
288
+ end
289
+
290
+ def check_or_constraint(or_con, ast)
291
+ if or_con.is_a?(Binary) && or_con.op == :|
292
+ check_or_constraint(or_con.left, ast) ||
293
+ check_constraint(or_con.right, ast) && error_cleared!
294
+ else
295
+ check_constraint(or_con, ast)
296
+ end
297
+ end
298
+
299
+ def check_constraint(con, ast)
300
+ if con.is_a?(StringLiteral)
301
+ con.value == ast
302
+ elsif con.is_a?(SymbolLiteral)
303
+ con.to_sym == ast
304
+ elsif con.is_a?(Reserved)
305
+ ast == nil || ast == []
306
+ elsif con.is_a?(Const)
307
+ check_const_constraint(con, mabye_one_element_array(ast))
308
+ elsif con.is_a?(IdentifierOrCall)
309
+ check_rule_usertype(con.name, mabye_one_element_array(ast), true)
310
+ elsif con.is_a?(Paren)
311
+ ast == nil || check_or_constraint(con.expression, ast)
312
+ elsif con.is_a?(ArrayLiteral)
313
+ ast.is_a?(Array) &&
314
+ if con.elements.size == 0
315
+ ast.size == 0
316
+ elsif con.elements.size == 1
317
+ con0 = con.elements[0]
318
+ ast.all? do |e|
319
+ check_one_array_element(con0, e) || error_found!(e, ast)
320
+ end
321
+ else
322
+ con.elements.size - 1 <= ast.size &&
323
+ check_array_elements(con.elements, ast)
324
+ end
325
+ else
326
+ false
327
+ end
328
+ end
329
+
330
+ def check_array_elements(con_elements, ast_elements)
331
+ ast_i = 0
332
+ for i in 0..con_elements.size - 2
333
+ if check_one_array_element(con_elements[i], ast_elements[ast_i])
334
+ ast_i += 1
335
+ else
336
+ unless con_elements[i].is_a?(Paren)
337
+ return error_found!(ast_elements[ast_i], ast_elements)
338
+ end
339
+ end
340
+ end
341
+ con = con_elements.last
342
+ while ast_i < ast_elements.size
343
+ if check_one_array_element(con, ast_elements[ast_i])
344
+ ast_i += 1
345
+ else
346
+ return error_found!(ast_elements[ast_i], ast_elements)
347
+ end
348
+ end
349
+ true
350
+ end
351
+
352
+ def check_one_array_element(con, element)
353
+ if element.is_a?(Array)
354
+ check_pair_element(con, element, element.size - 1)
355
+ else
356
+ check_or_constraint(con, element)
357
+ end
358
+ end
359
+
360
+ def check_pair_element(con, element, idx)
361
+ if con.is_a?(Binary) && con.op == :*
362
+ idx > 0 && check_or_constraint(con.right, element[idx]) &&
363
+ check_pair_element(con.left, element, idx - 1)
364
+ else
365
+ idx == 0 && check_or_constraint(con, element[0])
366
+ end
367
+ end
368
+
369
+ def check_const_constraint(con, ast)
370
+ ast.is_a?(to_node_class(con.name)) &&
371
+ check_rule(ast.class, ast, true)
372
+ end
373
+
374
+ # Rule 'expr <= term' accepts a single element array of term.
375
+ # Recall that 'expr <= [ term ]' also accepts an array of term
376
+ # but the length of the array may be more than one.
377
+ #
378
+ def mabye_one_element_array(ast)
379
+ if ast.is_a?(Array) && ast.size == 1
380
+ ast[0]
381
+ else
382
+ ast
383
+ end
384
+ end
385
+
386
+ # The syntax of the BNF-like DSL, which is used for describing
387
+ # a syntax in this system.
388
+ #
389
+ # the right-hand side of = cannot be Const when the = expression
390
+ # is in a method body. In that case, use <=.
391
+ #
392
+ # The operator | is ordered choice as in PEG (parsing expression
393
+ # grammar).
394
+ #
395
+ # (<pat>) specifies <pat> is optional.
396
+ #
397
+ # [<pat>] specifies an array of <pat>.
398
+ #
399
+ # <pat> may match a single-element array of <pat>.
400
+ #
401
+ # The hash literal specifies constraints on node properties.
402
+ # For example, `Binary = { op: :+ }` specifies that the `op` property
403
+ # of `Binary` has to be `:+`. Note that the other properties such
404
+ # as `left` and `right` are not checked. Hence, when the rules are
405
+ # <pre>Binary = { op: :+ }
406
+ # Unary = { op: :! }</pre>
407
+ # `a + -b` causes no syntax error since the unary expression `-b` is
408
+ # the right operand of the binary expression. The rule for {Binary}
409
+ # is passed and hence the rule for {Unary} is not applied to `-b`.
410
+ # <pre>Binary = { op: :+, right: Unary }
411
+ # Unary = { op: :! }</pre>
412
+ #
413
+ # An AST subtree passes syntax checking if no rule is found for that
414
+ # subtree.
415
+ #
416
+ @syntax = Yadriggy.define_syntax do
417
+ nil_value = Reserved + { name: 'nil' }
418
+
419
+ ArrayLiteral = { elements: [ array_elem ] }
420
+ array_elem = Binary + { op: :*, left: array_elem,
421
+ right: or_constraint } |
422
+ or_constraint
423
+ Paren = { expression: or_constraint }
424
+ constraint = Const | IdentifierOrCall | Paren | ArrayLiteral |
425
+ StringLiteral | SymbolLiteral | nil_value
426
+ or_constraint = Binary + { op: :|, left: or_constraint,
427
+ right: constraint } |
428
+ constraint
429
+ HashLiteral = { pairs: [ Label * or_constraint ] }
430
+
431
+ operand = Const | IdentifierOrCall | HashLiteral | nil_value
432
+ add_expr = Binary + { op: :+, left: add_expr, right: operand } |
433
+ operand
434
+ expr = Binary + { op: :|, left: expr, right: add_expr } |
435
+ add_expr
436
+
437
+ rule = Binary +
438
+ { left: Const | IdentifierOrCall, op: :'=' | :'<=',
439
+ right: expr }
440
+ Exprs = { expressions: [ rule ] }
441
+ Parameters = { params: [],
442
+ optionals: [],
443
+ rest_of_params: nil,
444
+ params_after_rest: [],
445
+ keywords: [],
446
+ rest_of_keywords: nil,
447
+ block_param: nil }
448
+ Block = Parameters + { body: nil | rule | Exprs }
449
+ end
450
+
451
+ public
452
+
453
+ # Defines Ruby syntax and returns its Syntax object.
454
+ # @return [Syntax] the Ruby syntax.
455
+ def self.ruby_syntax
456
+ Yadriggy.define_syntax do
457
+ expr <= Name | Number | Super | Binary | Unary | SymbolLiteral |
458
+ ConstPathRef | StringLiteral | StringInterpolation |
459
+ ArrayLiteral | Paren | Call | ArrayRef | HashLiteral |
460
+ Return | ForLoop | Loop | Conditional | Break |
461
+ Lambda | BeginEnd | Def | ModuleDef
462
+ exprs <= Exprs | expr
463
+
464
+ Name <= { name: String }
465
+ Number <= { value: Numeric }
466
+ Super <= {}
467
+ Identifier <= Name
468
+ SymbolLiteral <= { name: String }
469
+ VariableCall <= Name
470
+ InstanceVariable <= Name
471
+ GlobalVariable <= Name
472
+ Label <= Name
473
+ Reserved <= Name
474
+ Const <= Name
475
+ Binary <= { left: expr, op: Symbol, right: expr }
476
+ ArrayRef <= { array: expr, indexes: [ expr ] }
477
+ ArrayRefField <= ArrayRef
478
+ Assign <= Binary
479
+ Dots <= Binary
480
+ Unary <= { op: Symbol, expr: expr }
481
+ ConstPathRef <= { scope: (ConstPathRef | Const), name: Const }
482
+ ConstPathField <= ConstPathRef
483
+ StringLiteral <= { value: String }
484
+ StringInterpolation <= { contents: [ exprs ] }
485
+ ArrayLiteral <= { elements: [ expr ] }
486
+ Paren <= { expression: expr }
487
+ HashLiteral <= { pairs: [ expr * expr ] }
488
+ Return <= { values: [ expr ] }
489
+ ForLoop <= {vars: [ Identifier ], set: expr, body: exprs }
490
+ Loop <= { op: Symbol, cond: expr, body: exprs }
491
+ Conditional <= { op: Symbol, cond: expr, then: exprs,
492
+ all_elsif: [expr * exprs], else: (exprs) }
493
+ Parameters <= { params: [ Identifier ],
494
+ optionals: [ Identifier * expr ],
495
+ rest_of_params: (Identifier),
496
+ params_after_rest: [ Identifier ],
497
+ keywords: [ Label * expr ],
498
+ rest_of_keywords: (Identifier),
499
+ block_param: (Identifier) }
500
+ Block <= Parameters + { body: exprs }
501
+ Lambda <= Block
502
+ Call <= { receiver: (expr), op: (Symbol), name: Identifier,
503
+ args: [ expr ], block_arg: (expr), block: (Block) }
504
+ Command <= Call
505
+ Exprs <= { expressions: [ expr ] }
506
+ Rescue <= { types: [ Const | ConstPathRef ],
507
+ parameter: (Identifier),
508
+ body: (exprs), nested_rescue: (Rescue),
509
+ else: (exprs), ensure: (exprs) }
510
+ BeginEnd <= { body: exprs, rescue: (Rescue) }
511
+ Def <= Parameters +
512
+ { singular: (expr), name: Identifier, body: exprs,
513
+ rescue: (Rescue) }
514
+ ModuleDef <= { name: Const | ConstPathRef, body: exprs,
515
+ rescue: (Rescue) }
516
+ ClassDef <= ModuleDef +
517
+ { superclass: (Const | ConstPathRef) }
518
+ SingularClassDef <= { name: expr, body: exprs,
519
+ rescue: (Rescue) }
520
+ Program <= { elements: exprs }
521
+ end
522
+ end
523
+ end
524
+ end