rley 0.8.01 → 0.8.02
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +29 -5
- data/CHANGELOG.md +7 -0
- data/examples/NLP/pico_en_demo.rb +2 -2
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/notation/all_notation_nodes.rb +3 -1
- data/lib/rley/notation/ast_builder.rb +185 -191
- data/lib/rley/notation/ast_node.rb +5 -5
- data/lib/rley/notation/ast_visitor.rb +3 -1
- data/lib/rley/notation/grammar.rb +1 -1
- data/lib/rley/notation/grammar_builder.rb +16 -15
- data/lib/rley/notation/grouping_node.rb +1 -1
- data/lib/rley/notation/parser.rb +56 -56
- data/lib/rley/notation/sequence_node.rb +3 -3
- data/lib/rley/notation/symbol_node.rb +2 -2
- data/lib/rley/notation/tokenizer.rb +3 -15
- data/lib/rley/parse_rep/ast_base_builder.rb +5 -6
- data/lib/rley/parser/gfg_chart.rb +5 -4
- data/lib/rley/parser/gfg_earley_parser.rb +1 -1
- data/lib/rley/syntax/base_grammar_builder.rb +3 -3
- data/lib/rley/syntax/match_closest.rb +7 -7
- data/spec/rley/notation/grammar_builder_spec.rb +6 -6
- data/spec/rley/notation/parser_spec.rb +183 -184
- data/spec/rley/notation/tokenizer_spec.rb +98 -104
- data/spec/rley/parser/dangling_else_spec.rb +15 -13
- data/spec/rley/parser/gfg_earley_parser_spec.rb +11 -9
- data/spec/rley/parser/gfg_parsing_spec.rb +1 -0
- data/spec/rley/syntax/base_grammar_builder_spec.rb +0 -1
- data/spec/rley/syntax/match_closest_spec.rb +4 -4
- metadata +2 -2
@@ -8,25 +8,25 @@ module Rley
|
|
8
8
|
class ASTNode
|
9
9
|
# @return [Rley::Lexical::Position] Position of the entry in the input stream.
|
10
10
|
attr_reader :position
|
11
|
-
|
11
|
+
|
12
12
|
# @return [Symbol]
|
13
13
|
attr_accessor :repetition
|
14
14
|
|
15
15
|
# @return [Hash]
|
16
|
-
attr_reader :annotation
|
16
|
+
attr_reader :annotation
|
17
17
|
|
18
18
|
# @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
|
19
19
|
def initialize(aPosition)
|
20
20
|
@position = aPosition
|
21
21
|
@repetition = :exactly_one
|
22
|
-
@annotation = {}
|
22
|
+
@annotation = {}
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
def annotation=(aMapping)
|
26
26
|
repeat_key = 'repeat'
|
27
27
|
@repetition = aMapping.delete(repeat_key) if aMapping.include?(repeat_key)
|
28
28
|
@annotation = aMapping
|
29
|
-
end
|
29
|
+
end
|
30
30
|
|
31
31
|
# Notification that the parsing has successfully completed
|
32
32
|
def done!
|
@@ -10,7 +10,7 @@ module Rley # This module is used as a namespace
|
|
10
10
|
module Notation # This module is used as a namespace
|
11
11
|
# Structure used for production rules that are implicitly generated by Rley
|
12
12
|
RawRule = Struct.new(:lhs, :rhs, :tag, :simple, :constraints)
|
13
|
-
|
13
|
+
|
14
14
|
# Builder GoF pattern. Builder builds a complex object
|
15
15
|
# (say, a grammar) from simpler objects (terminals and productions)
|
16
16
|
# and using a step by step approach.
|
@@ -28,7 +28,7 @@ module Rley # This module is used as a namespace
|
|
28
28
|
# @return [Array<Production>] The list of production rules for
|
29
29
|
# the grammar to build.
|
30
30
|
attr_reader(:productions)
|
31
|
-
|
31
|
+
|
32
32
|
# @return [Hash{String, String}] The synthesized raw productions
|
33
33
|
attr_reader(:synthetized)
|
34
34
|
|
@@ -71,14 +71,14 @@ module Rley # This module is used as a namespace
|
|
71
71
|
new_symbs = build_symbols(Syntax::Terminal, terminalSymbols)
|
72
72
|
symbols.merge!(new_symbs)
|
73
73
|
end
|
74
|
-
|
74
|
+
|
75
75
|
# Add the given marker symbol to the grammar of the language
|
76
76
|
# @param aMarkerSymbol [String] A mazker symbol
|
77
|
-
# @return [void]
|
77
|
+
# @return [void]
|
78
78
|
def add_marker(aMarkerSymbol)
|
79
79
|
new_symb = build_symbol(Syntax::Marker, aMarkerSymbol)
|
80
80
|
symbols[new_symb.name] = new_symb
|
81
|
-
end
|
81
|
+
end
|
82
82
|
|
83
83
|
# Add a production rule in the grammar given one
|
84
84
|
# key-value pair of the form: String => String.
|
@@ -235,7 +235,7 @@ module Rley # This module is used as a namespace
|
|
235
235
|
name_modified = "#{symb_name}#{suffix_qmark}"
|
236
236
|
unless symbols.include? name_modified
|
237
237
|
add_nonterminal(name_modified)
|
238
|
-
add_raw_rule(name_modified,
|
238
|
+
add_raw_rule(name_modified, symb_name, suffix_qmark_one)
|
239
239
|
add_raw_rule(name_modified, '', suffix_qmark_none)
|
240
240
|
end
|
241
241
|
symb_name = name_modified
|
@@ -254,7 +254,7 @@ module Rley # This module is used as a namespace
|
|
254
254
|
when :exactly_one
|
255
255
|
# Do nothing
|
256
256
|
|
257
|
-
when
|
257
|
+
when :one_or_more
|
258
258
|
name_modified = "#{symb_name}#{suffix_plus}"
|
259
259
|
unless symbols.include? name_modified
|
260
260
|
add_nonterminal(name_modified)
|
@@ -273,6 +273,7 @@ module Rley # This module is used as a namespace
|
|
273
273
|
def after_sequence_node(aSequenceNode, _visitor)
|
274
274
|
aSequenceNode.subnodes.each_with_index do |sn, i|
|
275
275
|
next if sn.annotation.empty?
|
276
|
+
|
276
277
|
matching = sn.annotation['match_closest']
|
277
278
|
aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
|
278
279
|
end
|
@@ -330,14 +331,14 @@ module Rley # This module is used as a namespace
|
|
330
331
|
# A notification to the builderobject that the programmer
|
331
332
|
# has completed the entry of terminals and production rules
|
332
333
|
def grammar_complete!
|
333
|
-
process_raw_rules
|
334
|
+
process_raw_rules
|
334
335
|
end
|
335
336
|
|
336
337
|
private
|
337
|
-
|
338
|
-
def add_nonterminal(aName)
|
338
|
+
|
339
|
+
def add_nonterminal(aName)
|
339
340
|
symbols[aName] = Syntax::NonTerminal.new(aName)
|
340
|
-
end
|
341
|
+
end
|
341
342
|
|
342
343
|
def simple_rule(aProductionRepr)
|
343
344
|
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
@@ -352,7 +353,7 @@ module Rley # This module is used as a namespace
|
|
352
353
|
members = rhs.map do |name|
|
353
354
|
if name.end_with?('?', '*', '+')
|
354
355
|
modifier = name[-1]
|
355
|
-
suffix = modifier2suffix(
|
356
|
+
suffix = modifier2suffix(modifier)
|
356
357
|
get_grm_symbol("#{name.chop}#{suffix}")
|
357
358
|
else
|
358
359
|
get_grm_symbol(name)
|
@@ -448,7 +449,7 @@ module Rley # This module is used as a namespace
|
|
448
449
|
end
|
449
450
|
end
|
450
451
|
|
451
|
-
def node_decorated_name(
|
452
|
+
def node_decorated_name(aNode)
|
452
453
|
base_name = node_base_name(aNode)
|
453
454
|
suffix = repetition2suffix(aNode.repetition)
|
454
455
|
|
@@ -466,13 +467,13 @@ module Rley # This module is used as a namespace
|
|
466
467
|
text << sequence_name(sn)
|
467
468
|
end
|
468
469
|
|
469
|
-
suffix =
|
470
|
+
suffix = repetition2suffix(sn.repetition)
|
470
471
|
text << suffix
|
471
472
|
end
|
472
473
|
|
473
474
|
text.strip
|
474
475
|
end
|
475
|
-
|
476
|
+
|
476
477
|
def add_raw_rule(aSymbol, aRHS, aTag, simplified = false, constraints = [])
|
477
478
|
raw_rule = RawRule.new(aSymbol, aRHS, aTag, simplified, constraints)
|
478
479
|
if synthetized.include?(aSymbol)
|
@@ -5,7 +5,7 @@ require_relative 'sequence_node'
|
|
5
5
|
module Rley
|
6
6
|
module Notation
|
7
7
|
# A syntax node representing an expression bracketed by parentheses.
|
8
|
-
class GroupingNode < SequenceNode
|
8
|
+
class GroupingNode < SequenceNode
|
9
9
|
# @param aPosition [Rley::Lexical::Position] Start position.
|
10
10
|
# @param sequence [Array<ASTNode>] sequence of AST nodes
|
11
11
|
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
data/lib/rley/notation/parser.rb
CHANGED
@@ -1,56 +1,56 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'tokenizer'
|
4
|
-
require_relative 'grammar'
|
5
|
-
require_relative 'ast_builder'
|
6
|
-
|
7
|
-
module Rley
|
8
|
-
module Notation
|
9
|
-
# A Lox parser that produce concrete parse trees.
|
10
|
-
# Concrete parse trees are the default kind of parse tree
|
11
|
-
# generated by the Rley library.
|
12
|
-
# They consist of two node types only:
|
13
|
-
# - NonTerminalNode
|
14
|
-
# - TerminalNode
|
15
|
-
# A NonTerminalNode has zero or more child nodes (called subnodes)
|
16
|
-
# A TerminalNode is leaf node, that is, it has no child node.
|
17
|
-
# While concrete parse tree nodes can be generated out of the box,
|
18
|
-
# they have the following drawbacks:
|
19
|
-
# - Generic node classes that aren't always suited for the needs of
|
20
|
-
# the language being processing.
|
21
|
-
# - Concrete parse tree tend to be deeply nested, which may complicate
|
22
|
-
# further processing.
|
23
|
-
class Parser
|
24
|
-
# @return [Rley::Engine] A facade object for the Rley parsing library
|
25
|
-
attr_reader(:engine)
|
26
|
-
|
27
|
-
def initialize
|
28
|
-
# Create a Rley facade object
|
29
|
-
@engine = Rley::Engine.new do |cfg|
|
30
|
-
cfg.diagnose = true
|
31
|
-
cfg.repr_builder = Notation::ASTBuilder
|
32
|
-
end
|
33
|
-
|
34
|
-
# Step 1. Load RGN grammar
|
35
|
-
@engine.use_grammar(Rley::Notation::RGNGrammar)
|
36
|
-
end
|
37
|
-
|
38
|
-
# Parse the given Lox program into a parse tree.
|
39
|
-
# @param source [String] Lox program to parse
|
40
|
-
# @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
|
41
|
-
def parse(source)
|
42
|
-
lexer = Tokenizer.new(source)
|
43
|
-
result = engine.parse(lexer.tokens)
|
44
|
-
|
45
|
-
unless result.success?
|
46
|
-
# Stop if the parse failed...
|
47
|
-
line1 = "Parsing failed\n"
|
48
|
-
line2 = "Reason: #{result.failure_reason.message}"
|
49
|
-
raise SyntaxError, line1 + line2
|
50
|
-
end
|
51
|
-
|
52
|
-
return engine.convert(result) # engine.to_ptree(result)
|
53
|
-
end
|
54
|
-
end # class
|
55
|
-
end # module
|
56
|
-
end # module
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'tokenizer'
|
4
|
+
require_relative 'grammar'
|
5
|
+
require_relative 'ast_builder'
|
6
|
+
|
7
|
+
module Rley
|
8
|
+
module Notation
|
9
|
+
# A Lox parser that produce concrete parse trees.
|
10
|
+
# Concrete parse trees are the default kind of parse tree
|
11
|
+
# generated by the Rley library.
|
12
|
+
# They consist of two node types only:
|
13
|
+
# - NonTerminalNode
|
14
|
+
# - TerminalNode
|
15
|
+
# A NonTerminalNode has zero or more child nodes (called subnodes)
|
16
|
+
# A TerminalNode is leaf node, that is, it has no child node.
|
17
|
+
# While concrete parse tree nodes can be generated out of the box,
|
18
|
+
# they have the following drawbacks:
|
19
|
+
# - Generic node classes that aren't always suited for the needs of
|
20
|
+
# the language being processing.
|
21
|
+
# - Concrete parse tree tend to be deeply nested, which may complicate
|
22
|
+
# further processing.
|
23
|
+
class Parser
|
24
|
+
# @return [Rley::Engine] A facade object for the Rley parsing library
|
25
|
+
attr_reader(:engine)
|
26
|
+
|
27
|
+
def initialize
|
28
|
+
# Create a Rley facade object
|
29
|
+
@engine = Rley::Engine.new do |cfg|
|
30
|
+
cfg.diagnose = true
|
31
|
+
cfg.repr_builder = Notation::ASTBuilder
|
32
|
+
end
|
33
|
+
|
34
|
+
# Step 1. Load RGN grammar
|
35
|
+
@engine.use_grammar(Rley::Notation::RGNGrammar)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Parse the given Lox program into a parse tree.
|
39
|
+
# @param source [String] Lox program to parse
|
40
|
+
# @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
|
41
|
+
def parse(source)
|
42
|
+
lexer = Tokenizer.new(source)
|
43
|
+
result = engine.parse(lexer.tokens)
|
44
|
+
|
45
|
+
unless result.success?
|
46
|
+
# Stop if the parse failed...
|
47
|
+
line1 = "Parsing failed\n"
|
48
|
+
line2 = "Reason: #{result.failure_reason.message}"
|
49
|
+
raise SyntaxError, line1 + line2
|
50
|
+
end
|
51
|
+
|
52
|
+
return engine.convert(result) # engine.to_ptree(result)
|
53
|
+
end
|
54
|
+
end # class
|
55
|
+
end # module
|
56
|
+
end # module
|
@@ -8,16 +8,16 @@ module Rley
|
|
8
8
|
class SequenceNode < ASTNode
|
9
9
|
# @return [Array<ASTNode>]
|
10
10
|
attr_reader :subnodes
|
11
|
-
|
11
|
+
|
12
12
|
attr_accessor :constraints
|
13
|
-
|
13
|
+
|
14
14
|
# @param aPosition [Rley::Lexical::Position] Start position.
|
15
15
|
# @param sequence [Array<ASTNode>] sequence of AST nodes
|
16
16
|
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
17
17
|
def initialize(aPosition, sequence, theRepetition = nil)
|
18
18
|
super(aPosition)
|
19
19
|
@subnodes = sequence
|
20
|
-
repetition=
|
20
|
+
self.repetition = theRepetition if theRepetition
|
21
21
|
@constraints = []
|
22
22
|
end
|
23
23
|
|
@@ -8,14 +8,14 @@ module Rley
|
|
8
8
|
class SymbolNode < ASTNode
|
9
9
|
# @return [String] name of grammar symbol
|
10
10
|
attr_reader :name
|
11
|
-
|
11
|
+
|
12
12
|
# @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
|
13
13
|
# @param aName [String] name of grammar symbol
|
14
14
|
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
15
15
|
def initialize(aPosition, aName, theRepetition = nil)
|
16
16
|
super(aPosition)
|
17
17
|
@name = aName
|
18
|
-
repetition=
|
18
|
+
self.repetition = theRepetition if theRepetition
|
19
19
|
end
|
20
20
|
|
21
21
|
# Abstract method (must be overriden in subclasses).
|
@@ -35,11 +35,11 @@ module Rley
|
|
35
35
|
'*' => 'STAR',
|
36
36
|
'..' => 'ELLIPSIS'
|
37
37
|
}.freeze
|
38
|
-
|
38
|
+
|
39
39
|
# Here are all the implemented Rley notation keywords
|
40
40
|
@@keywords = %w[
|
41
41
|
match_closest repeat
|
42
|
-
].map { |x| [x, x] }.to_h
|
42
|
+
].map { |x| [x, x] }.to_h
|
43
43
|
|
44
44
|
# Constructor. Initialize a tokenizer for Lox input.
|
45
45
|
# @param source [String] Lox text to tokenize.
|
@@ -84,7 +84,7 @@ module Rley
|
|
84
84
|
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
85
85
|
elsif '?*+,'.include? curr_ch # modifier character
|
86
86
|
# modifiers without prefix text are symbols
|
87
|
-
symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
|
87
|
+
symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
|
88
88
|
token = build_token(symb, scanner.getch)
|
89
89
|
elsif (lexeme = scanner.scan(/\.\./))
|
90
90
|
# One or two special character tokens
|
@@ -114,7 +114,6 @@ module Rley
|
|
114
114
|
col = scanner.pos - lex_length - @line_start + 1
|
115
115
|
pos = Rley::Lexical::Position.new(@lineno, col)
|
116
116
|
token = Rley::Lexical::Token.new(aLexeme.dup, aSymbolName, pos)
|
117
|
-
|
118
117
|
rescue StandardError => e
|
119
118
|
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
120
119
|
raise e
|
@@ -148,21 +147,10 @@ module Rley
|
|
148
147
|
when "\n"
|
149
148
|
next_line
|
150
149
|
literal << special
|
151
|
-
# when '\\'
|
152
|
-
# ch = scanner.scan(/./)
|
153
|
-
# next unless ch
|
154
|
-
|
155
|
-
# escaped = @@escape_chars[ch]
|
156
|
-
# if escaped
|
157
|
-
# literal << escaped
|
158
|
-
# else
|
159
|
-
# literal << ch
|
160
|
-
# end
|
161
150
|
end
|
162
151
|
end
|
163
152
|
end
|
164
153
|
pos = Rley::Lexical::Position.new(line, column_start)
|
165
|
-
lexeme = scanner.string[scan_pos - 1..scanner.pos - 1]
|
166
154
|
Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
|
167
155
|
end
|
168
156
|
|
@@ -135,24 +135,24 @@ module Rley # This module is used as a namespace
|
|
135
135
|
# def reduce_base_plus_last(_production, _range, _tokens, theChildren)
|
136
136
|
# [theChildren[0]]
|
137
137
|
# end
|
138
|
-
|
138
|
+
|
139
139
|
# Implicit rule generated for * modifier
|
140
140
|
# rule('X') => 'X item'.as '_star_more'
|
141
141
|
def reduce__star_more(_production, _range, _tokens, theChildren)
|
142
|
-
theChildren[0]
|
142
|
+
theChildren[0] << theChildren[1]
|
143
143
|
theChildren[0]
|
144
144
|
end
|
145
145
|
|
146
146
|
# Implicit rule generated for * modifier
|
147
147
|
# rule('X') => ''.as '_star_none'
|
148
|
-
def reduce__star_none(_production, _range, _tokens,
|
148
|
+
def reduce__star_none(_production, _range, _tokens, _children)
|
149
149
|
[]
|
150
150
|
end
|
151
151
|
|
152
152
|
# Implicit rule generated for + modifier
|
153
153
|
# rule('X') => 'X item'.as '_plus_more'
|
154
154
|
def reduce__plus_more(_production, _range, _tokens, theChildren)
|
155
|
-
theChildren[0]
|
155
|
+
theChildren[0] << theChildren[1]
|
156
156
|
theChildren[0]
|
157
157
|
end
|
158
158
|
|
@@ -160,8 +160,7 @@ module Rley # This module is used as a namespace
|
|
160
160
|
# rule('X') => 'item'.as '_plus_one'
|
161
161
|
def reduce__plus_one(_production, _range, _tokens, theChildren)
|
162
162
|
[theChildren[0]]
|
163
|
-
end
|
164
|
-
|
163
|
+
end
|
165
164
|
end # class
|
166
165
|
end # module
|
167
166
|
end # module
|
@@ -20,7 +20,7 @@ module Rley # This module is used as a namespace
|
|
20
20
|
# @param aGFGraph [GFG::GrmFlowGraph] The GFG for the grammar in use.
|
21
21
|
def initialize(aGFGraph)
|
22
22
|
@sets = [ParseEntrySet.new]
|
23
|
-
@constraints = [[]]
|
23
|
+
@constraints = [[]]
|
24
24
|
push_entry(aGFGraph.start_vertex, 0, 0, :start_rule)
|
25
25
|
end
|
26
26
|
|
@@ -174,7 +174,7 @@ module Rley # This module is used as a namespace
|
|
174
174
|
when :before # terminal before dot
|
175
175
|
term_name = criteria[keyword]
|
176
176
|
if e.dotted_entry? && e.vertex.dotted_item.position > -2
|
177
|
-
found << e if e.prev_symbol&.name ==
|
177
|
+
found << e if e.prev_symbol&.name == term_name
|
178
178
|
end
|
179
179
|
end
|
180
180
|
end
|
@@ -209,12 +209,13 @@ module Rley # This module is used as a namespace
|
|
209
209
|
first_entry = sets[i][0]
|
210
210
|
prev_symbol = first_entry.prev_symbol
|
211
211
|
break if prev_symbol.name == aConstraint.closest_symb
|
212
|
+
|
212
213
|
i -= 1
|
213
|
-
break if i
|
214
|
+
break if i.negative?
|
214
215
|
end
|
215
216
|
|
216
217
|
# Retrieve all entries of the kind: closest_symb .
|
217
|
-
if i
|
218
|
+
if i.positive?
|
218
219
|
entries = sets[i].entries.select do |en|
|
219
220
|
if en.prev_symbol
|
220
221
|
en.prev_symbol.name == aConstraint.closest_symb
|