rley 0.8.01 → 0.8.02
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +29 -5
- data/CHANGELOG.md +7 -0
- data/examples/NLP/pico_en_demo.rb +2 -2
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/notation/all_notation_nodes.rb +3 -1
- data/lib/rley/notation/ast_builder.rb +185 -191
- data/lib/rley/notation/ast_node.rb +5 -5
- data/lib/rley/notation/ast_visitor.rb +3 -1
- data/lib/rley/notation/grammar.rb +1 -1
- data/lib/rley/notation/grammar_builder.rb +16 -15
- data/lib/rley/notation/grouping_node.rb +1 -1
- data/lib/rley/notation/parser.rb +56 -56
- data/lib/rley/notation/sequence_node.rb +3 -3
- data/lib/rley/notation/symbol_node.rb +2 -2
- data/lib/rley/notation/tokenizer.rb +3 -15
- data/lib/rley/parse_rep/ast_base_builder.rb +5 -6
- data/lib/rley/parser/gfg_chart.rb +5 -4
- data/lib/rley/parser/gfg_earley_parser.rb +1 -1
- data/lib/rley/syntax/base_grammar_builder.rb +3 -3
- data/lib/rley/syntax/match_closest.rb +7 -7
- data/spec/rley/notation/grammar_builder_spec.rb +6 -6
- data/spec/rley/notation/parser_spec.rb +183 -184
- data/spec/rley/notation/tokenizer_spec.rb +98 -104
- data/spec/rley/parser/dangling_else_spec.rb +15 -13
- data/spec/rley/parser/gfg_earley_parser_spec.rb +11 -9
- data/spec/rley/parser/gfg_parsing_spec.rb +1 -0
- data/spec/rley/syntax/base_grammar_builder_spec.rb +0 -1
- data/spec/rley/syntax/match_closest_spec.rb +4 -4
- metadata +2 -2
@@ -8,25 +8,25 @@ module Rley
|
|
8
8
|
class ASTNode
|
9
9
|
# @return [Rley::Lexical::Position] Position of the entry in the input stream.
|
10
10
|
attr_reader :position
|
11
|
-
|
11
|
+
|
12
12
|
# @return [Symbol]
|
13
13
|
attr_accessor :repetition
|
14
14
|
|
15
15
|
# @return [Hash]
|
16
|
-
attr_reader :annotation
|
16
|
+
attr_reader :annotation
|
17
17
|
|
18
18
|
# @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
|
19
19
|
def initialize(aPosition)
|
20
20
|
@position = aPosition
|
21
21
|
@repetition = :exactly_one
|
22
|
-
@annotation = {}
|
22
|
+
@annotation = {}
|
23
23
|
end
|
24
|
-
|
24
|
+
|
25
25
|
def annotation=(aMapping)
|
26
26
|
repeat_key = 'repeat'
|
27
27
|
@repetition = aMapping.delete(repeat_key) if aMapping.include?(repeat_key)
|
28
28
|
@annotation = aMapping
|
29
|
-
end
|
29
|
+
end
|
30
30
|
|
31
31
|
# Notification that the parsing has successfully completed
|
32
32
|
def done!
|
@@ -10,7 +10,7 @@ module Rley # This module is used as a namespace
|
|
10
10
|
module Notation # This module is used as a namespace
|
11
11
|
# Structure used for production rules that are implicitly generated by Rley
|
12
12
|
RawRule = Struct.new(:lhs, :rhs, :tag, :simple, :constraints)
|
13
|
-
|
13
|
+
|
14
14
|
# Builder GoF pattern. Builder builds a complex object
|
15
15
|
# (say, a grammar) from simpler objects (terminals and productions)
|
16
16
|
# and using a step by step approach.
|
@@ -28,7 +28,7 @@ module Rley # This module is used as a namespace
|
|
28
28
|
# @return [Array<Production>] The list of production rules for
|
29
29
|
# the grammar to build.
|
30
30
|
attr_reader(:productions)
|
31
|
-
|
31
|
+
|
32
32
|
# @return [Hash{String, String}] The synthesized raw productions
|
33
33
|
attr_reader(:synthetized)
|
34
34
|
|
@@ -71,14 +71,14 @@ module Rley # This module is used as a namespace
|
|
71
71
|
new_symbs = build_symbols(Syntax::Terminal, terminalSymbols)
|
72
72
|
symbols.merge!(new_symbs)
|
73
73
|
end
|
74
|
-
|
74
|
+
|
75
75
|
# Add the given marker symbol to the grammar of the language
|
76
76
|
# @param aMarkerSymbol [String] A mazker symbol
|
77
|
-
# @return [void]
|
77
|
+
# @return [void]
|
78
78
|
def add_marker(aMarkerSymbol)
|
79
79
|
new_symb = build_symbol(Syntax::Marker, aMarkerSymbol)
|
80
80
|
symbols[new_symb.name] = new_symb
|
81
|
-
end
|
81
|
+
end
|
82
82
|
|
83
83
|
# Add a production rule in the grammar given one
|
84
84
|
# key-value pair of the form: String => String.
|
@@ -235,7 +235,7 @@ module Rley # This module is used as a namespace
|
|
235
235
|
name_modified = "#{symb_name}#{suffix_qmark}"
|
236
236
|
unless symbols.include? name_modified
|
237
237
|
add_nonterminal(name_modified)
|
238
|
-
add_raw_rule(name_modified,
|
238
|
+
add_raw_rule(name_modified, symb_name, suffix_qmark_one)
|
239
239
|
add_raw_rule(name_modified, '', suffix_qmark_none)
|
240
240
|
end
|
241
241
|
symb_name = name_modified
|
@@ -254,7 +254,7 @@ module Rley # This module is used as a namespace
|
|
254
254
|
when :exactly_one
|
255
255
|
# Do nothing
|
256
256
|
|
257
|
-
when
|
257
|
+
when :one_or_more
|
258
258
|
name_modified = "#{symb_name}#{suffix_plus}"
|
259
259
|
unless symbols.include? name_modified
|
260
260
|
add_nonterminal(name_modified)
|
@@ -273,6 +273,7 @@ module Rley # This module is used as a namespace
|
|
273
273
|
def after_sequence_node(aSequenceNode, _visitor)
|
274
274
|
aSequenceNode.subnodes.each_with_index do |sn, i|
|
275
275
|
next if sn.annotation.empty?
|
276
|
+
|
276
277
|
matching = sn.annotation['match_closest']
|
277
278
|
aSequenceNode.constraints << Syntax::MatchClosest.new(aSequenceNode, i, matching)
|
278
279
|
end
|
@@ -330,14 +331,14 @@ module Rley # This module is used as a namespace
|
|
330
331
|
# A notification to the builderobject that the programmer
|
331
332
|
# has completed the entry of terminals and production rules
|
332
333
|
def grammar_complete!
|
333
|
-
process_raw_rules
|
334
|
+
process_raw_rules
|
334
335
|
end
|
335
336
|
|
336
337
|
private
|
337
|
-
|
338
|
-
def add_nonterminal(aName)
|
338
|
+
|
339
|
+
def add_nonterminal(aName)
|
339
340
|
symbols[aName] = Syntax::NonTerminal.new(aName)
|
340
|
-
end
|
341
|
+
end
|
341
342
|
|
342
343
|
def simple_rule(aProductionRepr)
|
343
344
|
aProductionRepr.each_pair do |(lhs_name, rhs_repr)|
|
@@ -352,7 +353,7 @@ module Rley # This module is used as a namespace
|
|
352
353
|
members = rhs.map do |name|
|
353
354
|
if name.end_with?('?', '*', '+')
|
354
355
|
modifier = name[-1]
|
355
|
-
suffix = modifier2suffix(
|
356
|
+
suffix = modifier2suffix(modifier)
|
356
357
|
get_grm_symbol("#{name.chop}#{suffix}")
|
357
358
|
else
|
358
359
|
get_grm_symbol(name)
|
@@ -448,7 +449,7 @@ module Rley # This module is used as a namespace
|
|
448
449
|
end
|
449
450
|
end
|
450
451
|
|
451
|
-
def node_decorated_name(
|
452
|
+
def node_decorated_name(aNode)
|
452
453
|
base_name = node_base_name(aNode)
|
453
454
|
suffix = repetition2suffix(aNode.repetition)
|
454
455
|
|
@@ -466,13 +467,13 @@ module Rley # This module is used as a namespace
|
|
466
467
|
text << sequence_name(sn)
|
467
468
|
end
|
468
469
|
|
469
|
-
suffix =
|
470
|
+
suffix = repetition2suffix(sn.repetition)
|
470
471
|
text << suffix
|
471
472
|
end
|
472
473
|
|
473
474
|
text.strip
|
474
475
|
end
|
475
|
-
|
476
|
+
|
476
477
|
def add_raw_rule(aSymbol, aRHS, aTag, simplified = false, constraints = [])
|
477
478
|
raw_rule = RawRule.new(aSymbol, aRHS, aTag, simplified, constraints)
|
478
479
|
if synthetized.include?(aSymbol)
|
@@ -5,7 +5,7 @@ require_relative 'sequence_node'
|
|
5
5
|
module Rley
|
6
6
|
module Notation
|
7
7
|
# A syntax node representing an expression bracketed by parentheses.
|
8
|
-
class GroupingNode < SequenceNode
|
8
|
+
class GroupingNode < SequenceNode
|
9
9
|
# @param aPosition [Rley::Lexical::Position] Start position.
|
10
10
|
# @param sequence [Array<ASTNode>] sequence of AST nodes
|
11
11
|
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
data/lib/rley/notation/parser.rb
CHANGED
@@ -1,56 +1,56 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'tokenizer'
|
4
|
-
require_relative 'grammar'
|
5
|
-
require_relative 'ast_builder'
|
6
|
-
|
7
|
-
module Rley
|
8
|
-
module Notation
|
9
|
-
# A Lox parser that produce concrete parse trees.
|
10
|
-
# Concrete parse trees are the default kind of parse tree
|
11
|
-
# generated by the Rley library.
|
12
|
-
# They consist of two node types only:
|
13
|
-
# - NonTerminalNode
|
14
|
-
# - TerminalNode
|
15
|
-
# A NonTerminalNode has zero or more child nodes (called subnodes)
|
16
|
-
# A TerminalNode is leaf node, that is, it has no child node.
|
17
|
-
# While concrete parse tree nodes can be generated out of the box,
|
18
|
-
# they have the following drawbacks:
|
19
|
-
# - Generic node classes that aren't always suited for the needs of
|
20
|
-
# the language being processing.
|
21
|
-
# - Concrete parse tree tend to be deeply nested, which may complicate
|
22
|
-
# further processing.
|
23
|
-
class Parser
|
24
|
-
# @return [Rley::Engine] A facade object for the Rley parsing library
|
25
|
-
attr_reader(:engine)
|
26
|
-
|
27
|
-
def initialize
|
28
|
-
# Create a Rley facade object
|
29
|
-
@engine = Rley::Engine.new do |cfg|
|
30
|
-
cfg.diagnose = true
|
31
|
-
cfg.repr_builder = Notation::ASTBuilder
|
32
|
-
end
|
33
|
-
|
34
|
-
# Step 1. Load RGN grammar
|
35
|
-
@engine.use_grammar(Rley::Notation::RGNGrammar)
|
36
|
-
end
|
37
|
-
|
38
|
-
# Parse the given Lox program into a parse tree.
|
39
|
-
# @param source [String] Lox program to parse
|
40
|
-
# @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
|
41
|
-
def parse(source)
|
42
|
-
lexer = Tokenizer.new(source)
|
43
|
-
result = engine.parse(lexer.tokens)
|
44
|
-
|
45
|
-
unless result.success?
|
46
|
-
# Stop if the parse failed...
|
47
|
-
line1 = "Parsing failed\n"
|
48
|
-
line2 = "Reason: #{result.failure_reason.message}"
|
49
|
-
raise SyntaxError, line1 + line2
|
50
|
-
end
|
51
|
-
|
52
|
-
return engine.convert(result) # engine.to_ptree(result)
|
53
|
-
end
|
54
|
-
end # class
|
55
|
-
end # module
|
56
|
-
end # module
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'tokenizer'
|
4
|
+
require_relative 'grammar'
|
5
|
+
require_relative 'ast_builder'
|
6
|
+
|
7
|
+
module Rley
|
8
|
+
module Notation
|
9
|
+
# A Lox parser that produce concrete parse trees.
|
10
|
+
# Concrete parse trees are the default kind of parse tree
|
11
|
+
# generated by the Rley library.
|
12
|
+
# They consist of two node types only:
|
13
|
+
# - NonTerminalNode
|
14
|
+
# - TerminalNode
|
15
|
+
# A NonTerminalNode has zero or more child nodes (called subnodes)
|
16
|
+
# A TerminalNode is leaf node, that is, it has no child node.
|
17
|
+
# While concrete parse tree nodes can be generated out of the box,
|
18
|
+
# they have the following drawbacks:
|
19
|
+
# - Generic node classes that aren't always suited for the needs of
|
20
|
+
# the language being processing.
|
21
|
+
# - Concrete parse tree tend to be deeply nested, which may complicate
|
22
|
+
# further processing.
|
23
|
+
class Parser
|
24
|
+
# @return [Rley::Engine] A facade object for the Rley parsing library
|
25
|
+
attr_reader(:engine)
|
26
|
+
|
27
|
+
def initialize
|
28
|
+
# Create a Rley facade object
|
29
|
+
@engine = Rley::Engine.new do |cfg|
|
30
|
+
cfg.diagnose = true
|
31
|
+
cfg.repr_builder = Notation::ASTBuilder
|
32
|
+
end
|
33
|
+
|
34
|
+
# Step 1. Load RGN grammar
|
35
|
+
@engine.use_grammar(Rley::Notation::RGNGrammar)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Parse the given Lox program into a parse tree.
|
39
|
+
# @param source [String] Lox program to parse
|
40
|
+
# @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
|
41
|
+
def parse(source)
|
42
|
+
lexer = Tokenizer.new(source)
|
43
|
+
result = engine.parse(lexer.tokens)
|
44
|
+
|
45
|
+
unless result.success?
|
46
|
+
# Stop if the parse failed...
|
47
|
+
line1 = "Parsing failed\n"
|
48
|
+
line2 = "Reason: #{result.failure_reason.message}"
|
49
|
+
raise SyntaxError, line1 + line2
|
50
|
+
end
|
51
|
+
|
52
|
+
return engine.convert(result) # engine.to_ptree(result)
|
53
|
+
end
|
54
|
+
end # class
|
55
|
+
end # module
|
56
|
+
end # module
|
@@ -8,16 +8,16 @@ module Rley
|
|
8
8
|
class SequenceNode < ASTNode
|
9
9
|
# @return [Array<ASTNode>]
|
10
10
|
attr_reader :subnodes
|
11
|
-
|
11
|
+
|
12
12
|
attr_accessor :constraints
|
13
|
-
|
13
|
+
|
14
14
|
# @param aPosition [Rley::Lexical::Position] Start position.
|
15
15
|
# @param sequence [Array<ASTNode>] sequence of AST nodes
|
16
16
|
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
17
17
|
def initialize(aPosition, sequence, theRepetition = nil)
|
18
18
|
super(aPosition)
|
19
19
|
@subnodes = sequence
|
20
|
-
repetition=
|
20
|
+
self.repetition = theRepetition if theRepetition
|
21
21
|
@constraints = []
|
22
22
|
end
|
23
23
|
|
@@ -8,14 +8,14 @@ module Rley
|
|
8
8
|
class SymbolNode < ASTNode
|
9
9
|
# @return [String] name of grammar symbol
|
10
10
|
attr_reader :name
|
11
|
-
|
11
|
+
|
12
12
|
# @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
|
13
13
|
# @param aName [String] name of grammar symbol
|
14
14
|
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
15
15
|
def initialize(aPosition, aName, theRepetition = nil)
|
16
16
|
super(aPosition)
|
17
17
|
@name = aName
|
18
|
-
repetition=
|
18
|
+
self.repetition = theRepetition if theRepetition
|
19
19
|
end
|
20
20
|
|
21
21
|
# Abstract method (must be overriden in subclasses).
|
@@ -35,11 +35,11 @@ module Rley
|
|
35
35
|
'*' => 'STAR',
|
36
36
|
'..' => 'ELLIPSIS'
|
37
37
|
}.freeze
|
38
|
-
|
38
|
+
|
39
39
|
# Here are all the implemented Rley notation keywords
|
40
40
|
@@keywords = %w[
|
41
41
|
match_closest repeat
|
42
|
-
].map { |x| [x, x] }.to_h
|
42
|
+
].map { |x| [x, x] }.to_h
|
43
43
|
|
44
44
|
# Constructor. Initialize a tokenizer for Lox input.
|
45
45
|
# @param source [String] Lox text to tokenize.
|
@@ -84,7 +84,7 @@ module Rley
|
|
84
84
|
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
85
85
|
elsif '?*+,'.include? curr_ch # modifier character
|
86
86
|
# modifiers without prefix text are symbols
|
87
|
-
symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
|
87
|
+
symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
|
88
88
|
token = build_token(symb, scanner.getch)
|
89
89
|
elsif (lexeme = scanner.scan(/\.\./))
|
90
90
|
# One or two special character tokens
|
@@ -114,7 +114,6 @@ module Rley
|
|
114
114
|
col = scanner.pos - lex_length - @line_start + 1
|
115
115
|
pos = Rley::Lexical::Position.new(@lineno, col)
|
116
116
|
token = Rley::Lexical::Token.new(aLexeme.dup, aSymbolName, pos)
|
117
|
-
|
118
117
|
rescue StandardError => e
|
119
118
|
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
120
119
|
raise e
|
@@ -148,21 +147,10 @@ module Rley
|
|
148
147
|
when "\n"
|
149
148
|
next_line
|
150
149
|
literal << special
|
151
|
-
# when '\\'
|
152
|
-
# ch = scanner.scan(/./)
|
153
|
-
# next unless ch
|
154
|
-
|
155
|
-
# escaped = @@escape_chars[ch]
|
156
|
-
# if escaped
|
157
|
-
# literal << escaped
|
158
|
-
# else
|
159
|
-
# literal << ch
|
160
|
-
# end
|
161
150
|
end
|
162
151
|
end
|
163
152
|
end
|
164
153
|
pos = Rley::Lexical::Position.new(line, column_start)
|
165
|
-
lexeme = scanner.string[scan_pos - 1..scanner.pos - 1]
|
166
154
|
Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
|
167
155
|
end
|
168
156
|
|
@@ -135,24 +135,24 @@ module Rley # This module is used as a namespace
|
|
135
135
|
# def reduce_base_plus_last(_production, _range, _tokens, theChildren)
|
136
136
|
# [theChildren[0]]
|
137
137
|
# end
|
138
|
-
|
138
|
+
|
139
139
|
# Implicit rule generated for * modifier
|
140
140
|
# rule('X') => 'X item'.as '_star_more'
|
141
141
|
def reduce__star_more(_production, _range, _tokens, theChildren)
|
142
|
-
theChildren[0]
|
142
|
+
theChildren[0] << theChildren[1]
|
143
143
|
theChildren[0]
|
144
144
|
end
|
145
145
|
|
146
146
|
# Implicit rule generated for * modifier
|
147
147
|
# rule('X') => ''.as '_star_none'
|
148
|
-
def reduce__star_none(_production, _range, _tokens,
|
148
|
+
def reduce__star_none(_production, _range, _tokens, _children)
|
149
149
|
[]
|
150
150
|
end
|
151
151
|
|
152
152
|
# Implicit rule generated for + modifier
|
153
153
|
# rule('X') => 'X item'.as '_plus_more'
|
154
154
|
def reduce__plus_more(_production, _range, _tokens, theChildren)
|
155
|
-
theChildren[0]
|
155
|
+
theChildren[0] << theChildren[1]
|
156
156
|
theChildren[0]
|
157
157
|
end
|
158
158
|
|
@@ -160,8 +160,7 @@ module Rley # This module is used as a namespace
|
|
160
160
|
# rule('X') => 'item'.as '_plus_one'
|
161
161
|
def reduce__plus_one(_production, _range, _tokens, theChildren)
|
162
162
|
[theChildren[0]]
|
163
|
-
end
|
164
|
-
|
163
|
+
end
|
165
164
|
end # class
|
166
165
|
end # module
|
167
166
|
end # module
|
@@ -20,7 +20,7 @@ module Rley # This module is used as a namespace
|
|
20
20
|
# @param aGFGraph [GFG::GrmFlowGraph] The GFG for the grammar in use.
|
21
21
|
def initialize(aGFGraph)
|
22
22
|
@sets = [ParseEntrySet.new]
|
23
|
-
@constraints = [[]]
|
23
|
+
@constraints = [[]]
|
24
24
|
push_entry(aGFGraph.start_vertex, 0, 0, :start_rule)
|
25
25
|
end
|
26
26
|
|
@@ -174,7 +174,7 @@ module Rley # This module is used as a namespace
|
|
174
174
|
when :before # terminal before dot
|
175
175
|
term_name = criteria[keyword]
|
176
176
|
if e.dotted_entry? && e.vertex.dotted_item.position > -2
|
177
|
-
found << e if e.prev_symbol&.name ==
|
177
|
+
found << e if e.prev_symbol&.name == term_name
|
178
178
|
end
|
179
179
|
end
|
180
180
|
end
|
@@ -209,12 +209,13 @@ module Rley # This module is used as a namespace
|
|
209
209
|
first_entry = sets[i][0]
|
210
210
|
prev_symbol = first_entry.prev_symbol
|
211
211
|
break if prev_symbol.name == aConstraint.closest_symb
|
212
|
+
|
212
213
|
i -= 1
|
213
|
-
break if i
|
214
|
+
break if i.negative?
|
214
215
|
end
|
215
216
|
|
216
217
|
# Retrieve all entries of the kind: closest_symb .
|
217
|
-
if i
|
218
|
+
if i.positive?
|
218
219
|
entries = sets[i].entries.select do |en|
|
219
220
|
if en.prev_symbol
|
220
221
|
en.prev_symbol.name == aConstraint.closest_symb
|