rley 0.8.00 → 0.8.05
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +47 -3
- data/CHANGELOG.md +32 -4
- data/examples/NLP/pico_en_demo.rb +2 -2
- data/examples/data_formats/JSON/README.md +34 -0
- data/examples/data_formats/JSON/sample01.json +3 -0
- data/examples/data_formats/JSON/sample01.svg +36 -0
- data/examples/data_formats/JSON/sample02.json +6 -0
- data/examples/data_formats/JSON/sample02.svg +128 -0
- data/examples/data_formats/JSON/sample03.json +88 -0
- data/examples/general/calc_iter1/README.md +26 -0
- data/examples/general/calc_iter2/README.md +55 -0
- data/examples/general/general_examples.md +37 -0
- data/examples/tokenizer/README.md +46 -0
- data/examples/tokenizer/loxxy_raw_scanner.rex +98 -0
- data/examples/tokenizer/loxxy_raw_scanner.rex.rb +256 -0
- data/examples/tokenizer/loxxy_tokenizer.rb +94 -0
- data/examples/tokenizer/run_tokenizer.rb +29 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/lexical/literal.rb +29 -0
- data/lib/rley/lexical/token.rb +7 -4
- data/lib/rley/notation/all_notation_nodes.rb +3 -1
- data/lib/rley/notation/ast_builder.rb +185 -191
- data/lib/rley/notation/ast_node.rb +5 -5
- data/lib/rley/notation/ast_visitor.rb +3 -1
- data/lib/rley/notation/grammar.rb +1 -1
- data/lib/rley/notation/grammar_builder.rb +87 -33
- data/lib/rley/notation/grouping_node.rb +1 -1
- data/lib/rley/notation/parser.rb +56 -56
- data/lib/rley/notation/sequence_node.rb +3 -3
- data/lib/rley/notation/symbol_node.rb +2 -2
- data/lib/rley/notation/tokenizer.rb +3 -15
- data/lib/rley/parse_rep/ast_base_builder.rb +35 -4
- data/lib/rley/parser/gfg_chart.rb +5 -4
- data/lib/rley/parser/gfg_earley_parser.rb +1 -1
- data/lib/rley/syntax/base_grammar_builder.rb +8 -2
- data/lib/rley/syntax/match_closest.rb +7 -7
- data/lib/rley.rb +1 -1
- data/spec/rley/lexical/literal_spec.rb +33 -0
- data/spec/rley/lexical/token_spec.rb +15 -4
- data/spec/rley/notation/grammar_builder_spec.rb +57 -50
- data/spec/rley/notation/parser_spec.rb +183 -184
- data/spec/rley/notation/tokenizer_spec.rb +98 -104
- data/spec/rley/parser/dangling_else_spec.rb +20 -20
- data/spec/rley/parser/gfg_chart_spec.rb +0 -1
- data/spec/rley/parser/gfg_earley_parser_spec.rb +166 -147
- data/spec/rley/parser/gfg_parsing_spec.rb +2 -2
- data/spec/rley/syntax/base_grammar_builder_spec.rb +7 -8
- data/spec/rley/syntax/grammar_spec.rb +6 -9
- data/spec/rley/syntax/match_closest_spec.rb +4 -4
- metadata +19 -9
- data/lib/rley/parser/parse_tracer.rb +0 -103
- data/lib/rley/syntax/literal.rb +0 -20
- data/lib/rley/syntax/verbatim_symbol.rb +0 -27
- data/spec/rley/syntax/literal_spec.rb +0 -31
- data/spec/rley/syntax/verbatim_symbol_spec.rb +0 -38
@@ -1,12 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'set'
|
4
|
+
|
4
5
|
require_relative 'parser'
|
5
6
|
require_relative 'ast_visitor'
|
6
7
|
require_relative '../syntax/match_closest'
|
7
8
|
|
8
9
|
module Rley # This module is used as a namespace
|
9
10
|
module Notation # This module is used as a namespace
|
11
|
+
# Structure used for production rules that are implicitly generated by Rley
|
12
|
+
RawRule = Struct.new(:lhs, :rhs, :tag, :simple, :constraints)
|
13
|
+
|
10
14
|
# Builder GoF pattern. Builder builds a complex object
|
11
15
|
# (say, a grammar) from simpler objects (terminals and productions)
|
12
16
|
# and using a step by step approach.
|
@@ -25,15 +29,18 @@ module Rley # This module is used as a namespace
|
|
25
29
|
# the grammar to build.
|
26
30
|
attr_reader(:productions)
|
27
31
|
|
32
|
+
# @return [Hash{String, String}] The synthesized raw productions
|
33
|
+
attr_reader(:synthetized)
|
34
|
+
|
28
35
|
# Creates a new grammar builder.
|
29
36
|
# @param aBlock [Proc] code block used to build the grammar.
|
30
37
|
# @example Building a tiny English grammar
|
31
|
-
# builder = Rley::
|
38
|
+
# builder = Rley::Notation::GrammarBuilder.new do
|
32
39
|
# add_terminals('n', 'v', 'adj', 'det')
|
33
|
-
# rule 'S' =>
|
34
|
-
# rule 'VP' =>
|
35
|
-
# rule 'NP' =>
|
36
|
-
# rule 'NP' =>
|
40
|
+
# rule 'S' => 'NP VP'
|
41
|
+
# rule 'VP' => 'v NP'
|
42
|
+
# rule 'NP' => 'det n'
|
43
|
+
# rule 'NP' => 'adj NP'
|
37
44
|
# end
|
38
45
|
# tiny_eng = builder.grammar
|
39
46
|
def initialize(&aBlock)
|
@@ -41,8 +48,12 @@ module Rley # This module is used as a namespace
|
|
41
48
|
@productions = []
|
42
49
|
@parser = Notation::Parser.new
|
43
50
|
@visitor2rhs = {}
|
51
|
+
@synthetized = {}
|
44
52
|
|
45
|
-
|
53
|
+
if block_given?
|
54
|
+
instance_exec(&aBlock)
|
55
|
+
grammar_complete!
|
56
|
+
end
|
46
57
|
end
|
47
58
|
|
48
59
|
# Retrieve a grammar symbol from its name.
|
@@ -61,6 +72,14 @@ module Rley # This module is used as a namespace
|
|
61
72
|
symbols.merge!(new_symbs)
|
62
73
|
end
|
63
74
|
|
75
|
+
# Add the given marker symbol to the grammar of the language
|
76
|
+
# @param aMarkerSymbol [String] A mazker symbol
|
77
|
+
# @return [void]
|
78
|
+
def add_marker(aMarkerSymbol)
|
79
|
+
new_symb = build_symbol(Syntax::Marker, aMarkerSymbol)
|
80
|
+
symbols[new_symb.name] = new_symb
|
81
|
+
end
|
82
|
+
|
64
83
|
# Add a production rule in the grammar given one
|
65
84
|
# key-value pair of the form: String => String.
|
66
85
|
# Where the key is the name of the non-terminal appearing in the
|
@@ -204,7 +223,7 @@ module Rley # This module is used as a namespace
|
|
204
223
|
end
|
205
224
|
|
206
225
|
##################################
|
207
|
-
# AST visit notification events
|
226
|
+
# RGN's AST visit notification events
|
208
227
|
# ################################
|
209
228
|
def after_symbol_node(aSymbolNode, aVisitor)
|
210
229
|
symb_name = aSymbolNode.name
|
@@ -215,9 +234,9 @@ module Rley # This module is used as a namespace
|
|
215
234
|
# implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
|
216
235
|
name_modified = "#{symb_name}#{suffix_qmark}"
|
217
236
|
unless symbols.include? name_modified
|
218
|
-
|
219
|
-
|
220
|
-
|
237
|
+
add_nonterminal(name_modified)
|
238
|
+
add_raw_rule(name_modified, symb_name, suffix_qmark_one)
|
239
|
+
add_raw_rule(name_modified, '', suffix_qmark_none)
|
221
240
|
end
|
222
241
|
symb_name = name_modified
|
223
242
|
|
@@ -226,21 +245,21 @@ module Rley # This module is used as a namespace
|
|
226
245
|
# implicitly called: rule('symb_name_star' => '').tag suffix_star_none
|
227
246
|
name_modified = "#{symb_name}#{suffix_star}"
|
228
247
|
unless symbols.include? name_modified
|
229
|
-
|
230
|
-
|
231
|
-
|
248
|
+
add_nonterminal(name_modified)
|
249
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
|
250
|
+
add_raw_rule(name_modified, [], suffix_star_none)
|
232
251
|
end
|
233
252
|
symb_name = name_modified
|
234
253
|
|
235
254
|
when :exactly_one
|
236
255
|
# Do nothing
|
237
256
|
|
238
|
-
when
|
257
|
+
when :one_or_more
|
239
258
|
name_modified = "#{symb_name}#{suffix_plus}"
|
240
259
|
unless symbols.include? name_modified
|
241
|
-
|
242
|
-
|
243
|
-
|
260
|
+
add_nonterminal(name_modified)
|
261
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
|
262
|
+
add_raw_rule(name_modified, symb_name, suffix_plus_one)
|
244
263
|
end
|
245
264
|
symb_name = name_modified
|
246
265
|
else
|
@@ -254,6 +273,7 @@ module Rley # This module is used as a namespace
|
|
254
273
|
def after_sequence_node(aSequenceNode, _visitor)
|
255
274
|
aSequenceNode.subnodes.each_with_index do |sn, i|
|
256
275
|
next if sn.annotation.empty?
|
276
|
+
|
257
277
|
matching = sn.annotation['match_closest']
|
258
278
|
aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
|
259
279
|
end
|
@@ -264,10 +284,9 @@ module Rley # This module is used as a namespace
|
|
264
284
|
symb_name = sequence_name(aGroupingNode)
|
265
285
|
|
266
286
|
unless symbols.include?(symb_name) || aGroupingNode.repetition == :exactly_one
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
prod.constraints = aGroupingNode.constraints
|
287
|
+
add_nonterminal(symb_name)
|
288
|
+
rhs = serialize_sequence(aGroupingNode)
|
289
|
+
add_raw_rule(symb_name, rhs, 'return_children', true, aGroupingNode.constraints)
|
271
290
|
end
|
272
291
|
name_modified = "#{symb_name}#{repetition2suffix(aGroupingNode.repetition)}"
|
273
292
|
|
@@ -276,18 +295,18 @@ module Rley # This module is used as a namespace
|
|
276
295
|
# implicitly called: rule('symb_name_qmark' => 'symb_name_qmark').tag suffix_qmark_one
|
277
296
|
# implicitly called: rule('symb_name_qmark' => '').tag suffix_qmark_none
|
278
297
|
unless symbols.include? name_modified
|
279
|
-
|
280
|
-
|
281
|
-
|
298
|
+
add_nonterminal(name_modified)
|
299
|
+
add_raw_rule(name_modified, symb_name, suffix_qmark_one, true)
|
300
|
+
add_raw_rule(name_modified, [], suffix_qmark_none, true)
|
282
301
|
end
|
283
302
|
|
284
303
|
when :zero_or_more
|
285
304
|
# implicitly called: rule('symb_name_star' => 'symb_name_star symb_name').tag suffix_star_more
|
286
305
|
# implicitly called: rule('symb_name_star' => '').tag suffix_star_none
|
287
306
|
unless symbols.include? name_modified
|
288
|
-
|
289
|
-
|
290
|
-
|
307
|
+
add_nonterminal(name_modified)
|
308
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_star_more)
|
309
|
+
add_raw_rule(name_modified, '', suffix_star_none)
|
291
310
|
end
|
292
311
|
|
293
312
|
when :exactly_one
|
@@ -295,9 +314,9 @@ module Rley # This module is used as a namespace
|
|
295
314
|
|
296
315
|
when :one_or_more
|
297
316
|
unless symbols.include? name_modified
|
298
|
-
|
299
|
-
|
300
|
-
|
317
|
+
add_nonterminal(name_modified)
|
318
|
+
add_raw_rule(name_modified, "#{name_modified} #{symb_name}", suffix_plus_more)
|
319
|
+
add_raw_rule(name_modified, symb_name, suffix_plus_one)
|
301
320
|
end
|
302
321
|
else
|
303
322
|
raise StandardError, 'Unhandled multiplicity'
|
@@ -309,8 +328,18 @@ module Rley # This module is used as a namespace
|
|
309
328
|
end
|
310
329
|
end
|
311
330
|
|
331
|
+
# A notification to the builderobject that the programmer
|
332
|
+
# has completed the entry of terminals and production rules
|
333
|
+
def grammar_complete!
|
334
|
+
process_raw_rules
|
335
|
+
end
|
336
|
+
|
312
337
|
private
|
313
338
|
|
339
|
+
def add_nonterminal(aName)
|
340
|
+
symbols[aName] = Syntax::NonTerminal.new(aName)
|
341
|
+
end
|
342
|
+
|
314
343
|
def simple_rule(aProductionRepr)
|
315
344
|
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
316
345
|
lhs = get_grm_symbol(lhs_name)
|
@@ -324,7 +353,7 @@ module Rley # This module is used as a namespace
|
|
324
353
|
members = rhs.map do |name|
|
325
354
|
if name.end_with?('?', '*', '+')
|
326
355
|
modifier = name[-1]
|
327
|
-
suffix = modifier2suffix(
|
356
|
+
suffix = modifier2suffix(modifier)
|
328
357
|
get_grm_symbol("#{name.chop}#{suffix}")
|
329
358
|
else
|
330
359
|
get_grm_symbol(name)
|
@@ -420,7 +449,7 @@ module Rley # This module is used as a namespace
|
|
420
449
|
end
|
421
450
|
end
|
422
451
|
|
423
|
-
def node_decorated_name(
|
452
|
+
def node_decorated_name(aNode)
|
424
453
|
base_name = node_base_name(aNode)
|
425
454
|
suffix = repetition2suffix(aNode.repetition)
|
426
455
|
|
@@ -438,12 +467,37 @@ module Rley # This module is used as a namespace
|
|
438
467
|
text << sequence_name(sn)
|
439
468
|
end
|
440
469
|
|
441
|
-
suffix =
|
470
|
+
suffix = repetition2suffix(sn.repetition)
|
442
471
|
text << suffix
|
443
472
|
end
|
444
473
|
|
445
474
|
text.strip
|
446
475
|
end
|
476
|
+
|
477
|
+
def add_raw_rule(aSymbol, aRHS, aTag, simplified = false, constraints = [])
|
478
|
+
raw_rule = RawRule.new(aSymbol, aRHS, aTag, simplified, constraints)
|
479
|
+
if synthetized.include?(aSymbol)
|
480
|
+
@synthetized[aSymbol] << raw_rule
|
481
|
+
else
|
482
|
+
@synthetized[aSymbol] = [raw_rule]
|
483
|
+
end
|
484
|
+
end
|
485
|
+
|
486
|
+
def process_raw_rules
|
487
|
+
until synthetized.empty? do
|
488
|
+
raw_rules = synthetized.delete(synthetized.keys.first)
|
489
|
+
raw_rules.each do |raw|
|
490
|
+
new_prod = nil
|
491
|
+
if raw.simple
|
492
|
+
new_prod = simple_rule(raw.lhs => raw.rhs)
|
493
|
+
else
|
494
|
+
new_prod = rule(raw.lhs => raw.rhs)
|
495
|
+
end
|
496
|
+
new_prod.tag(raw.tag)
|
497
|
+
new_prod.constraints = raw.constraints
|
498
|
+
end
|
499
|
+
end
|
500
|
+
end
|
447
501
|
end # class
|
448
502
|
end # module
|
449
503
|
end # module
|
@@ -5,7 +5,7 @@ require_relative 'sequence_node'
|
|
5
5
|
module Rley
|
6
6
|
module Notation
|
7
7
|
# A syntax node representing an expression bracketed by parentheses.
|
8
|
-
class GroupingNode < SequenceNode
|
8
|
+
class GroupingNode < SequenceNode
|
9
9
|
# @param aPosition [Rley::Lexical::Position] Start position.
|
10
10
|
# @param sequence [Array<ASTNode>] sequence of AST nodes
|
11
11
|
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
data/lib/rley/notation/parser.rb
CHANGED
@@ -1,56 +1,56 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'tokenizer'
|
4
|
-
require_relative 'grammar'
|
5
|
-
require_relative 'ast_builder'
|
6
|
-
|
7
|
-
module Rley
|
8
|
-
module Notation
|
9
|
-
# A Lox parser that produce concrete parse trees.
|
10
|
-
# Concrete parse trees are the default kind of parse tree
|
11
|
-
# generated by the Rley library.
|
12
|
-
# They consist of two node types only:
|
13
|
-
# - NonTerminalNode
|
14
|
-
# - TerminalNode
|
15
|
-
# A NonTerminalNode has zero or more child nodes (called subnodes)
|
16
|
-
# A TerminalNode is leaf node, that is, it has no child node.
|
17
|
-
# While concrete parse tree nodes can be generated out of the box,
|
18
|
-
# they have the following drawbacks:
|
19
|
-
# - Generic node classes that aren't always suited for the needs of
|
20
|
-
# the language being processing.
|
21
|
-
# - Concrete parse tree tend to be deeply nested, which may complicate
|
22
|
-
# further processing.
|
23
|
-
class Parser
|
24
|
-
# @return [Rley::Engine] A facade object for the Rley parsing library
|
25
|
-
attr_reader(:engine)
|
26
|
-
|
27
|
-
def initialize
|
28
|
-
# Create a Rley facade object
|
29
|
-
@engine = Rley::Engine.new do |cfg|
|
30
|
-
cfg.diagnose = true
|
31
|
-
cfg.repr_builder = Notation::ASTBuilder
|
32
|
-
end
|
33
|
-
|
34
|
-
# Step 1. Load RGN grammar
|
35
|
-
@engine.use_grammar(Rley::Notation::RGNGrammar)
|
36
|
-
end
|
37
|
-
|
38
|
-
# Parse the given Lox program into a parse tree.
|
39
|
-
# @param source [String] Lox program to parse
|
40
|
-
# @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
|
41
|
-
def parse(source)
|
42
|
-
lexer = Tokenizer.new(source)
|
43
|
-
result = engine.parse(lexer.tokens)
|
44
|
-
|
45
|
-
unless result.success?
|
46
|
-
# Stop if the parse failed...
|
47
|
-
line1 = "Parsing failed\n"
|
48
|
-
line2 = "Reason: #{result.failure_reason.message}"
|
49
|
-
raise SyntaxError, line1 + line2
|
50
|
-
end
|
51
|
-
|
52
|
-
return engine.convert(result) # engine.to_ptree(result)
|
53
|
-
end
|
54
|
-
end # class
|
55
|
-
end # module
|
56
|
-
end # module
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'tokenizer'
|
4
|
+
require_relative 'grammar'
|
5
|
+
require_relative 'ast_builder'
|
6
|
+
|
7
|
+
module Rley
|
8
|
+
module Notation
|
9
|
+
# A Lox parser that produce concrete parse trees.
|
10
|
+
# Concrete parse trees are the default kind of parse tree
|
11
|
+
# generated by the Rley library.
|
12
|
+
# They consist of two node types only:
|
13
|
+
# - NonTerminalNode
|
14
|
+
# - TerminalNode
|
15
|
+
# A NonTerminalNode has zero or more child nodes (called subnodes)
|
16
|
+
# A TerminalNode is leaf node, that is, it has no child node.
|
17
|
+
# While concrete parse tree nodes can be generated out of the box,
|
18
|
+
# they have the following drawbacks:
|
19
|
+
# - Generic node classes that aren't always suited for the needs of
|
20
|
+
# the language being processing.
|
21
|
+
# - Concrete parse tree tend to be deeply nested, which may complicate
|
22
|
+
# further processing.
|
23
|
+
class Parser
|
24
|
+
# @return [Rley::Engine] A facade object for the Rley parsing library
|
25
|
+
attr_reader(:engine)
|
26
|
+
|
27
|
+
def initialize
|
28
|
+
# Create a Rley facade object
|
29
|
+
@engine = Rley::Engine.new do |cfg|
|
30
|
+
cfg.diagnose = true
|
31
|
+
cfg.repr_builder = Notation::ASTBuilder
|
32
|
+
end
|
33
|
+
|
34
|
+
# Step 1. Load RGN grammar
|
35
|
+
@engine.use_grammar(Rley::Notation::RGNGrammar)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Parse the given Lox program into a parse tree.
|
39
|
+
# @param source [String] Lox program to parse
|
40
|
+
# @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
|
41
|
+
def parse(source)
|
42
|
+
lexer = Tokenizer.new(source)
|
43
|
+
result = engine.parse(lexer.tokens)
|
44
|
+
|
45
|
+
unless result.success?
|
46
|
+
# Stop if the parse failed...
|
47
|
+
line1 = "Parsing failed\n"
|
48
|
+
line2 = "Reason: #{result.failure_reason.message}"
|
49
|
+
raise SyntaxError, line1 + line2
|
50
|
+
end
|
51
|
+
|
52
|
+
return engine.convert(result) # engine.to_ptree(result)
|
53
|
+
end
|
54
|
+
end # class
|
55
|
+
end # module
|
56
|
+
end # module
|
@@ -8,16 +8,16 @@ module Rley
|
|
8
8
|
class SequenceNode < ASTNode
|
9
9
|
# @return [Array<ASTNode>]
|
10
10
|
attr_reader :subnodes
|
11
|
-
|
11
|
+
|
12
12
|
attr_accessor :constraints
|
13
|
-
|
13
|
+
|
14
14
|
# @param aPosition [Rley::Lexical::Position] Start position.
|
15
15
|
# @param sequence [Array<ASTNode>] sequence of AST nodes
|
16
16
|
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
17
17
|
def initialize(aPosition, sequence, theRepetition = nil)
|
18
18
|
super(aPosition)
|
19
19
|
@subnodes = sequence
|
20
|
-
repetition=
|
20
|
+
self.repetition = theRepetition if theRepetition
|
21
21
|
@constraints = []
|
22
22
|
end
|
23
23
|
|
@@ -8,14 +8,14 @@ module Rley
|
|
8
8
|
class SymbolNode < ASTNode
|
9
9
|
# @return [String] name of grammar symbol
|
10
10
|
attr_reader :name
|
11
|
-
|
11
|
+
|
12
12
|
# @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
|
13
13
|
# @param aName [String] name of grammar symbol
|
14
14
|
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
15
15
|
def initialize(aPosition, aName, theRepetition = nil)
|
16
16
|
super(aPosition)
|
17
17
|
@name = aName
|
18
|
-
repetition=
|
18
|
+
self.repetition = theRepetition if theRepetition
|
19
19
|
end
|
20
20
|
|
21
21
|
# Abstract method (must be overriden in subclasses).
|
@@ -35,11 +35,11 @@ module Rley
|
|
35
35
|
'*' => 'STAR',
|
36
36
|
'..' => 'ELLIPSIS'
|
37
37
|
}.freeze
|
38
|
-
|
38
|
+
|
39
39
|
# Here are all the implemented Rley notation keywords
|
40
40
|
@@keywords = %w[
|
41
41
|
match_closest repeat
|
42
|
-
].map { |x| [x, x] }.to_h
|
42
|
+
].map { |x| [x, x] }.to_h
|
43
43
|
|
44
44
|
# Constructor. Initialize a tokenizer for Lox input.
|
45
45
|
# @param source [String] Lox text to tokenize.
|
@@ -84,7 +84,7 @@ module Rley
|
|
84
84
|
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
85
85
|
elsif '?*+,'.include? curr_ch # modifier character
|
86
86
|
# modifiers without prefix text are symbols
|
87
|
-
symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
|
87
|
+
symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
|
88
88
|
token = build_token(symb, scanner.getch)
|
89
89
|
elsif (lexeme = scanner.scan(/\.\./))
|
90
90
|
# One or two special character tokens
|
@@ -114,7 +114,6 @@ module Rley
|
|
114
114
|
col = scanner.pos - lex_length - @line_start + 1
|
115
115
|
pos = Rley::Lexical::Position.new(@lineno, col)
|
116
116
|
token = Rley::Lexical::Token.new(aLexeme.dup, aSymbolName, pos)
|
117
|
-
|
118
117
|
rescue StandardError => e
|
119
118
|
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
120
119
|
raise e
|
@@ -148,21 +147,10 @@ module Rley
|
|
148
147
|
when "\n"
|
149
148
|
next_line
|
150
149
|
literal << special
|
151
|
-
# when '\\'
|
152
|
-
# ch = scanner.scan(/./)
|
153
|
-
# next unless ch
|
154
|
-
|
155
|
-
# escaped = @@escape_chars[ch]
|
156
|
-
# if escaped
|
157
|
-
# literal << escaped
|
158
|
-
# else
|
159
|
-
# literal << ch
|
160
|
-
# end
|
161
150
|
end
|
162
151
|
end
|
163
152
|
end
|
164
153
|
pos = Rley::Lexical::Position.new(line, column_start)
|
165
|
-
lexeme = scanner.string[scan_pos - 1..scanner.pos - 1]
|
166
154
|
Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
|
167
155
|
end
|
168
156
|
|
@@ -126,16 +126,47 @@ module Rley # This module is used as a namespace
|
|
126
126
|
|
127
127
|
# Standard method for handling one or more modifier: symbol+
|
128
128
|
# rule('symbol_plus' => 'symbol_plus symbol')
|
129
|
-
def reduce_base_plus_more(_production, _range, _tokens, theChildren)
|
130
|
-
theChildren[0] << theChildren[1]
|
131
|
-
end
|
129
|
+
# def reduce_base_plus_more(_production, _range, _tokens, theChildren)
|
130
|
+
# theChildren[0] << theChildren[1]
|
131
|
+
# end
|
132
132
|
|
133
133
|
# Standard rule method handling one or more modifier: symbol+
|
134
134
|
# rule('symbol_plus' => 'symbol')
|
135
|
-
def reduce_base_plus_last(_production, _range, _tokens, theChildren)
|
135
|
+
# def reduce_base_plus_last(_production, _range, _tokens, theChildren)
|
136
|
+
# [theChildren[0]]
|
137
|
+
# end
|
138
|
+
|
139
|
+
# Implicit rule generated for * modifier
|
140
|
+
# rule('X') => 'X item'.as '_star_more'
|
141
|
+
def reduce__star_more(_production, _range, _tokens, theChildren)
|
142
|
+
theChildren[0] << theChildren[1]
|
143
|
+
theChildren[0]
|
144
|
+
end
|
145
|
+
|
146
|
+
# Implicit rule generated for * modifier
|
147
|
+
# rule('X') => ''.as '_star_none'
|
148
|
+
def reduce__star_none(_production, _range, _tokens, _children)
|
149
|
+
[]
|
150
|
+
end
|
151
|
+
|
152
|
+
# Implicit rule generated for + modifier
|
153
|
+
# rule('X') => 'X item'.as '_plus_more'
|
154
|
+
def reduce__plus_more(_production, _range, _tokens, theChildren)
|
155
|
+
theChildren[0] << theChildren[1]
|
156
|
+
theChildren[0]
|
157
|
+
end
|
158
|
+
|
159
|
+
# Implicit rule generated for + modifier
|
160
|
+
# rule('X') => 'item'.as '_plus_one'
|
161
|
+
def reduce__plus_one(_production, _range, _tokens, theChildren)
|
136
162
|
[theChildren[0]]
|
137
163
|
end
|
138
164
|
|
165
|
+
# Implicit rule generated for + modifier
|
166
|
+
# rule('X') => 'item'.as '_plus_one'
|
167
|
+
def reduce_return_children(_production, _range, _tokens, theChildren)
|
168
|
+
theChildren
|
169
|
+
end
|
139
170
|
end # class
|
140
171
|
end # module
|
141
172
|
end # module
|
@@ -20,7 +20,7 @@ module Rley # This module is used as a namespace
|
|
20
20
|
# @param aGFGraph [GFG::GrmFlowGraph] The GFG for the grammar in use.
|
21
21
|
def initialize(aGFGraph)
|
22
22
|
@sets = [ParseEntrySet.new]
|
23
|
-
@constraints = [[]]
|
23
|
+
@constraints = [[]]
|
24
24
|
push_entry(aGFGraph.start_vertex, 0, 0, :start_rule)
|
25
25
|
end
|
26
26
|
|
@@ -174,7 +174,7 @@ module Rley # This module is used as a namespace
|
|
174
174
|
when :before # terminal before dot
|
175
175
|
term_name = criteria[keyword]
|
176
176
|
if e.dotted_entry? && e.vertex.dotted_item.position > -2
|
177
|
-
found << e if e.prev_symbol&.name ==
|
177
|
+
found << e if e.prev_symbol&.name == term_name
|
178
178
|
end
|
179
179
|
end
|
180
180
|
end
|
@@ -209,12 +209,13 @@ module Rley # This module is used as a namespace
|
|
209
209
|
first_entry = sets[i][0]
|
210
210
|
prev_symbol = first_entry.prev_symbol
|
211
211
|
break if prev_symbol.name == aConstraint.closest_symb
|
212
|
+
|
212
213
|
i -= 1
|
213
|
-
break if i
|
214
|
+
break if i.negative?
|
214
215
|
end
|
215
216
|
|
216
217
|
# Retrieve all entries of the kind: closest_symb .
|
217
|
-
if i
|
218
|
+
if i.positive?
|
218
219
|
entries = sets[i].entries.select do |en|
|
219
220
|
if en.prev_symbol
|
220
221
|
en.prev_symbol.name == aConstraint.closest_symb
|
@@ -51,7 +51,7 @@ module Rley # This module is used as a namespace
|
|
51
51
|
result.chart[index].each do |entry|
|
52
52
|
# Is entry of the form? [A => alpha . B beta, k]...
|
53
53
|
next_symbol = entry.next_symbol
|
54
|
-
if next_symbol
|
54
|
+
if next_symbol.kind_of?(Syntax::NonTerminal)
|
55
55
|
# ...apply the Call rule
|
56
56
|
call_rule(result, entry, index)
|
57
57
|
end
|
@@ -3,8 +3,6 @@
|
|
3
3
|
require 'set'
|
4
4
|
require_relative 'terminal'
|
5
5
|
require_relative 'non_terminal'
|
6
|
-
require_relative 'literal'
|
7
|
-
require_relative 'verbatim_symbol'
|
8
6
|
require_relative 'production'
|
9
7
|
require_relative 'grammar'
|
10
8
|
|
@@ -56,6 +54,14 @@ module Rley # This module is used as a namespace
|
|
56
54
|
symbols.merge!(new_symbs)
|
57
55
|
end
|
58
56
|
|
57
|
+
# Add the given marker symbol to the grammar of the language
|
58
|
+
# @param aMarkerSymbol [Syntax::Marker] A mazker symbol
|
59
|
+
# @return [void]
|
60
|
+
def add_marker(aMarkerSymbol)
|
61
|
+
new_symb = build_symbol(Marker, aMarkerSymbol)
|
62
|
+
symbols[aMarkerSymbol.name] = new_symb
|
63
|
+
end
|
64
|
+
|
59
65
|
# Add a production rule in the grammar given one
|
60
66
|
# key-value pair of the form: String => Array.
|
61
67
|
# Where the key is the name of the non-terminal appearing in the
|
@@ -7,13 +7,13 @@ module Rley # This module is used as a namespace
|
|
7
7
|
class MatchClosest
|
8
8
|
# @return [Integer] index of constrained symbol to match
|
9
9
|
attr_reader(:idx_symbol)
|
10
|
-
|
10
|
+
|
11
11
|
# @return [String] name of closest preceding symbol to pair
|
12
12
|
attr_reader(:closest_symb)
|
13
13
|
|
14
14
|
# @return [NilClass, Array<Parser::ParseEntry>] set of entries with closest symbol
|
15
15
|
attr_accessor(:entries)
|
16
|
-
|
16
|
+
|
17
17
|
# @param aSymbolSeq [Rley::Syntax::SymbolSeq] a sequence of grammar symbols
|
18
18
|
# @param idxSymbol [Integer] index of symbol
|
19
19
|
# @param nameClosest [String] Terminal symbol name
|
@@ -21,18 +21,18 @@ module Rley # This module is used as a namespace
|
|
21
21
|
@idx_symbol = valid_idx_symbol(idxSymbol, aSymbolSeq)
|
22
22
|
@closest_symb = valid_name_closest(nameClosest)
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
private
|
26
|
-
|
26
|
+
|
27
27
|
# Check that the provided index is within plausible bounds
|
28
28
|
def valid_idx_symbol(idxSymbol, aSymbolSeq)
|
29
|
-
bounds = 0
|
29
|
+
bounds = 0..aSymbolSeq.size - 1
|
30
30
|
err_msg_outbound = 'Index of symbol out of bound'
|
31
31
|
raise StandardError, err_msg_outbound unless bounds.include? idxSymbol
|
32
|
-
|
32
|
+
|
33
33
|
idxSymbol
|
34
34
|
end
|
35
|
-
|
35
|
+
|
36
36
|
def valid_name_closest(nameClosest)
|
37
37
|
nameClosest
|
38
38
|
end
|
data/lib/rley.rb
CHANGED
@@ -6,7 +6,7 @@
|
|
6
6
|
|
7
7
|
require_relative './rley/constants'
|
8
8
|
require_relative './rley/interface'
|
9
|
-
require_relative './rley/lexical/
|
9
|
+
require_relative './rley/lexical/literal'
|
10
10
|
require_relative './rley/parser/gfg_earley_parser'
|
11
11
|
require_relative './rley/parse_rep/ast_base_builder'
|
12
12
|
require_relative './rley/parse_tree_visitor'
|