rley 0.7.08 → 0.8.03
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +29 -5
- data/CHANGELOG.md +28 -4
- data/README.md +4 -5
- data/examples/NLP/nano_eng/nano_en_demo.rb +7 -11
- data/examples/NLP/nano_eng/nano_grammar.rb +18 -18
- data/examples/data_formats/JSON/json_ast_builder.rb +9 -18
- data/examples/data_formats/JSON/json_demo.rb +1 -2
- data/examples/data_formats/JSON/json_grammar.rb +11 -11
- data/examples/general/calc_iter1/calc_grammar.rb +5 -4
- data/examples/general/calc_iter2/calc_grammar.rb +9 -9
- data/examples/general/left.rb +1 -1
- data/examples/general/right.rb +1 -1
- data/lib/rley/base/dotted_item.rb +5 -0
- data/lib/rley/base/grm_items_builder.rb +6 -0
- data/lib/rley/constants.rb +1 -1
- data/lib/rley/engine.rb +2 -2
- data/lib/rley/interface.rb +16 -0
- data/lib/rley/notation/all_notation_nodes.rb +4 -0
- data/lib/rley/notation/ast_builder.rb +185 -0
- data/lib/rley/notation/ast_node.rb +44 -0
- data/lib/rley/notation/ast_visitor.rb +115 -0
- data/lib/rley/notation/grammar.rb +49 -0
- data/lib/rley/notation/grammar_builder.rb +505 -0
- data/lib/rley/notation/grouping_node.rb +23 -0
- data/lib/rley/notation/parser.rb +56 -0
- data/lib/rley/notation/sequence_node.rb +35 -0
- data/lib/rley/notation/symbol_node.rb +29 -0
- data/lib/rley/notation/tokenizer.rb +180 -0
- data/lib/rley/parse_rep/ast_base_builder.rb +44 -0
- data/lib/rley/parser/gfg_chart.rb +101 -6
- data/lib/rley/parser/gfg_earley_parser.rb +1 -1
- data/lib/rley/parser/gfg_parsing.rb +5 -3
- data/lib/rley/parser/parse_entry_set.rb +1 -1
- data/lib/rley/syntax/{grammar_builder.rb → base_grammar_builder.rb} +53 -15
- data/lib/rley/syntax/grm_symbol.rb +1 -1
- data/lib/rley/syntax/match_closest.rb +43 -0
- data/lib/rley/syntax/production.rb +6 -0
- data/lib/rley.rb +1 -1
- data/spec/rley/engine_spec.rb +6 -6
- data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -2
- data/spec/rley/notation/grammar_builder_spec.rb +302 -0
- data/spec/rley/notation/parser_spec.rb +183 -0
- data/spec/rley/notation/tokenizer_spec.rb +364 -0
- data/spec/rley/parse_rep/ast_builder_spec.rb +0 -1
- data/spec/rley/parse_rep/groucho_spec.rb +1 -1
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +1 -1
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +2 -2
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +1 -1
- data/spec/rley/parser/dangling_else_spec.rb +447 -0
- data/spec/rley/parser/gfg_earley_parser_spec.rb +118 -10
- data/spec/rley/parser/gfg_parsing_spec.rb +2 -1
- data/spec/rley/parser/parse_walker_factory_spec.rb +2 -2
- data/spec/rley/support/ambiguous_grammar_helper.rb +2 -2
- data/spec/rley/support/grammar_abc_helper.rb +2 -2
- data/spec/rley/support/grammar_ambig01_helper.rb +2 -2
- data/spec/rley/support/grammar_arr_int_helper.rb +2 -2
- data/spec/rley/support/grammar_b_expr_helper.rb +2 -2
- data/spec/rley/support/grammar_int_seq_helper.rb +51 -0
- data/spec/rley/support/grammar_l0_helper.rb +2 -2
- data/spec/rley/support/grammar_pb_helper.rb +2 -2
- data/spec/rley/support/grammar_sppf_helper.rb +2 -2
- data/spec/rley/syntax/{grammar_builder_spec.rb → base_grammar_builder_spec.rb} +29 -11
- data/spec/rley/syntax/match_closest_spec.rb +46 -0
- data/spec/rley/syntax/production_spec.rb +4 -0
- metadata +29 -14
- data/lib/rley/parser/parse_state.rb +0 -78
- data/lib/rley/parser/parse_state_tracker.rb +0 -59
- data/lib/rley/parser/state_set.rb +0 -100
- data/spec/rley/parser/parse_state_spec.rb +0 -125
- data/spec/rley/parser/parse_tracer_spec.rb +0 -200
- data/spec/rley/parser/state_set_spec.rb +0 -130
@@ -0,0 +1,23 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'sequence_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module Notation
|
7
|
+
# A syntax node representing an expression bracketed by parentheses.
|
8
|
+
class GroupingNode < SequenceNode
|
9
|
+
# @param aPosition [Rley::Lexical::Position] Start position.
|
10
|
+
# @param sequence [Array<ASTNode>] sequence of AST nodes
|
11
|
+
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
12
|
+
def initialize(aPosition, sequence, theRepetition = nil)
|
13
|
+
super(aPosition, sequence, theRepetition)
|
14
|
+
end
|
15
|
+
|
16
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
17
|
+
# @param visitor [Notation::ASTVisitor] the visitor
|
18
|
+
def accept(visitor)
|
19
|
+
visitor.visit_grouping_node(self)
|
20
|
+
end
|
21
|
+
end # class
|
22
|
+
end # module
|
23
|
+
end # module
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'tokenizer'
|
4
|
+
require_relative 'grammar'
|
5
|
+
require_relative 'ast_builder'
|
6
|
+
|
7
|
+
module Rley
|
8
|
+
module Notation
|
9
|
+
# A Lox parser that produce concrete parse trees.
|
10
|
+
# Concrete parse trees are the default kind of parse tree
|
11
|
+
# generated by the Rley library.
|
12
|
+
# They consist of two node types only:
|
13
|
+
# - NonTerminalNode
|
14
|
+
# - TerminalNode
|
15
|
+
# A NonTerminalNode has zero or more child nodes (called subnodes)
|
16
|
+
# A TerminalNode is leaf node, that is, it has no child node.
|
17
|
+
# While concrete parse tree nodes can be generated out of the box,
|
18
|
+
# they have the following drawbacks:
|
19
|
+
# - Generic node classes that aren't always suited for the needs of
|
20
|
+
# the language being processing.
|
21
|
+
# - Concrete parse tree tend to be deeply nested, which may complicate
|
22
|
+
# further processing.
|
23
|
+
class Parser
|
24
|
+
# @return [Rley::Engine] A facade object for the Rley parsing library
|
25
|
+
attr_reader(:engine)
|
26
|
+
|
27
|
+
def initialize
|
28
|
+
# Create a Rley facade object
|
29
|
+
@engine = Rley::Engine.new do |cfg|
|
30
|
+
cfg.diagnose = true
|
31
|
+
cfg.repr_builder = Notation::ASTBuilder
|
32
|
+
end
|
33
|
+
|
34
|
+
# Step 1. Load RGN grammar
|
35
|
+
@engine.use_grammar(Rley::Notation::RGNGrammar)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Parse the given Lox program into a parse tree.
|
39
|
+
# @param source [String] Lox program to parse
|
40
|
+
# @return [Rley::ParseTree] A parse tree equivalent to the Lox input.
|
41
|
+
def parse(source)
|
42
|
+
lexer = Tokenizer.new(source)
|
43
|
+
result = engine.parse(lexer.tokens)
|
44
|
+
|
45
|
+
unless result.success?
|
46
|
+
# Stop if the parse failed...
|
47
|
+
line1 = "Parsing failed\n"
|
48
|
+
line2 = "Reason: #{result.failure_reason.message}"
|
49
|
+
raise SyntaxError, line1 + line2
|
50
|
+
end
|
51
|
+
|
52
|
+
return engine.convert(result) # engine.to_ptree(result)
|
53
|
+
end
|
54
|
+
end # class
|
55
|
+
end # module
|
56
|
+
end # module
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'ast_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module Notation
|
7
|
+
# A syntax node for a sequence of AST nodes
|
8
|
+
class SequenceNode < ASTNode
|
9
|
+
# @return [Array<ASTNode>]
|
10
|
+
attr_reader :subnodes
|
11
|
+
|
12
|
+
attr_accessor :constraints
|
13
|
+
|
14
|
+
# @param aPosition [Rley::Lexical::Position] Start position.
|
15
|
+
# @param sequence [Array<ASTNode>] sequence of AST nodes
|
16
|
+
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
17
|
+
def initialize(aPosition, sequence, theRepetition = nil)
|
18
|
+
super(aPosition)
|
19
|
+
@subnodes = sequence
|
20
|
+
self.repetition = theRepetition if theRepetition
|
21
|
+
@constraints = []
|
22
|
+
end
|
23
|
+
|
24
|
+
def size
|
25
|
+
subnodes.size
|
26
|
+
end
|
27
|
+
|
28
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
29
|
+
# @param visitor [Notation::ASTVisitor] the visitor
|
30
|
+
def accept(visitor)
|
31
|
+
visitor.visit_sequence_node(self)
|
32
|
+
end
|
33
|
+
end # class
|
34
|
+
end # module
|
35
|
+
end # module
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'ast_node'
|
4
|
+
|
5
|
+
module Rley
|
6
|
+
module Notation
|
7
|
+
# A syntax node for a grammar symbol occurring in rhs of a rule
|
8
|
+
class SymbolNode < ASTNode
|
9
|
+
# @return [String] name of grammar symbol
|
10
|
+
attr_reader :name
|
11
|
+
|
12
|
+
# @param aPosition [Rley::Lexical::Position] Position of the entry in the input stream.
|
13
|
+
# @param aName [String] name of grammar symbol
|
14
|
+
# @param theRepetition [Symbol] indicates how many times the symbol can be repeated
|
15
|
+
def initialize(aPosition, aName, theRepetition = nil)
|
16
|
+
super(aPosition)
|
17
|
+
@name = aName
|
18
|
+
self.repetition = theRepetition if theRepetition
|
19
|
+
end
|
20
|
+
|
21
|
+
# Abstract method (must be overriden in subclasses).
|
22
|
+
# Part of the 'visitee' role in Visitor design pattern.
|
23
|
+
# @param _visitor [LoxxyTreeVisitor] the visitor
|
24
|
+
def accept(visitor)
|
25
|
+
visitor.visit_symbol_node(self)
|
26
|
+
end
|
27
|
+
end # class
|
28
|
+
end # module
|
29
|
+
end # module
|
@@ -0,0 +1,180 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'strscan'
|
4
|
+
require_relative '../lexical/token'
|
5
|
+
|
6
|
+
module Rley
|
7
|
+
module Notation
|
8
|
+
# A tokenizer for the Rley notation language.
|
9
|
+
# Responsibility: break input into a sequence of token objects.
|
10
|
+
# The tokenizer should recognize:
|
11
|
+
# Identifiers,
|
12
|
+
# Number literals including single digit
|
13
|
+
# String literals (quote delimited)
|
14
|
+
# Delimiters: e.g. parentheses '(', ')'
|
15
|
+
# Separators: e.g. comma
|
16
|
+
class Tokenizer
|
17
|
+
# @return [StringScanner] Low-level input scanner
|
18
|
+
attr_reader(:scanner)
|
19
|
+
|
20
|
+
# @return [Integer] The current line number
|
21
|
+
attr_reader(:lineno)
|
22
|
+
|
23
|
+
# @return [Integer] Position of last start of line in the input
|
24
|
+
attr_reader(:line_start)
|
25
|
+
|
26
|
+
# One or two special character tokens.
|
27
|
+
@@lexeme2name = {
|
28
|
+
'(' => 'LEFT_PAREN',
|
29
|
+
')' => 'RIGHT_PAREN',
|
30
|
+
'{' => 'LEFT_BRACE',
|
31
|
+
'}' => 'RIGHT_BRACE',
|
32
|
+
',' => 'COMMA',
|
33
|
+
'+' => 'PLUS',
|
34
|
+
'?' => 'QUESTION_MARK',
|
35
|
+
'*' => 'STAR',
|
36
|
+
'..' => 'ELLIPSIS'
|
37
|
+
}.freeze
|
38
|
+
|
39
|
+
# Here are all the implemented Rley notation keywords
|
40
|
+
@@keywords = %w[
|
41
|
+
match_closest repeat
|
42
|
+
].map { |x| [x, x] }.to_h
|
43
|
+
|
44
|
+
# Constructor. Initialize a tokenizer for Lox input.
|
45
|
+
# @param source [String] Lox text to tokenize.
|
46
|
+
def initialize(source = nil)
|
47
|
+
@scanner = StringScanner.new('')
|
48
|
+
start_with(source) if source
|
49
|
+
end
|
50
|
+
|
51
|
+
# Reset the tokenizer and make the given text, the current input.
|
52
|
+
# @param source [String] Lox text to tokenize.
|
53
|
+
def start_with(source)
|
54
|
+
@scanner.string = source
|
55
|
+
@lineno = 1
|
56
|
+
@line_start = 0
|
57
|
+
end
|
58
|
+
|
59
|
+
# Scan the source and return an array of tokens.
|
60
|
+
# @return [Array<Rley::Lexical::Token>] | Returns a sequence of tokens
|
61
|
+
def tokens
|
62
|
+
tok_sequence = []
|
63
|
+
until @scanner.eos?
|
64
|
+
token = _next_token
|
65
|
+
tok_sequence << token unless token.nil?
|
66
|
+
end
|
67
|
+
|
68
|
+
return tok_sequence
|
69
|
+
end
|
70
|
+
|
71
|
+
private
|
72
|
+
|
73
|
+
def _next_token
|
74
|
+
pos_before = scanner.pos
|
75
|
+
skip_intertoken_spaces
|
76
|
+
ws_found = true if scanner.pos > pos_before
|
77
|
+
curr_ch = scanner.peek(1)
|
78
|
+
return nil if curr_ch.nil? || curr_ch.empty?
|
79
|
+
|
80
|
+
token = nil
|
81
|
+
|
82
|
+
if '(){},'.include? curr_ch
|
83
|
+
# Single delimiter, separator or character
|
84
|
+
token = build_token(@@lexeme2name[curr_ch], scanner.getch)
|
85
|
+
elsif '?*+,'.include? curr_ch # modifier character
|
86
|
+
# modifiers without prefix text are symbols
|
87
|
+
symb = ws_found ? 'SYMBOL' : @@lexeme2name[curr_ch]
|
88
|
+
token = build_token(symb, scanner.getch)
|
89
|
+
elsif (lexeme = scanner.scan(/\.\./))
|
90
|
+
# One or two special character tokens
|
91
|
+
token = build_token(@@lexeme2name[lexeme], lexeme)
|
92
|
+
elsif scanner.check(/"|'/) # Start of string detected...
|
93
|
+
token = build_string_token
|
94
|
+
elsif (lexeme = scanner.scan(/\d+/))
|
95
|
+
token = build_token('INT_LIT', lexeme)
|
96
|
+
elsif (lexeme = scanner.scan(/[a-zA-Z_][a-zA-Z_0-9]*:/))
|
97
|
+
keyw = @@keywords[lexeme.chop!]
|
98
|
+
token = build_token('KEY', lexeme) if keyw
|
99
|
+
# ... error case
|
100
|
+
elsif (lexeme = scanner.scan(/[^?*+,:(){}\s]+/))
|
101
|
+
token = build_token('SYMBOL', lexeme)
|
102
|
+
else # Unknown token
|
103
|
+
col = scanner.pos - @line_start + 1
|
104
|
+
_erroneous = curr_ch.nil? ? '' : scanner.scan(/./)
|
105
|
+
raise ScanError, "Error: [line #{lineno}:#{col}]: Unexpected character."
|
106
|
+
end
|
107
|
+
|
108
|
+
return token
|
109
|
+
end
|
110
|
+
|
111
|
+
def build_token(aSymbolName, aLexeme)
|
112
|
+
begin
|
113
|
+
lex_length = aLexeme ? aLexeme.size : 0
|
114
|
+
col = scanner.pos - lex_length - @line_start + 1
|
115
|
+
pos = Rley::Lexical::Position.new(@lineno, col)
|
116
|
+
token = Rley::Lexical::Token.new(aLexeme.dup, aSymbolName, pos)
|
117
|
+
rescue StandardError => e
|
118
|
+
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
119
|
+
raise e
|
120
|
+
end
|
121
|
+
|
122
|
+
return token
|
123
|
+
end
|
124
|
+
|
125
|
+
# precondition: current position at leading quote
|
126
|
+
def build_string_token
|
127
|
+
delimiter = scanner.scan(/./)
|
128
|
+
scan_pos = scanner.pos
|
129
|
+
line = @lineno
|
130
|
+
column_start = scan_pos - @line_start
|
131
|
+
literal = +''
|
132
|
+
loop do
|
133
|
+
substr = scanner.scan(/[^"'\\\r\n]*/)
|
134
|
+
if scanner.eos?
|
135
|
+
pos_start = "line #{line}:#{column_start}"
|
136
|
+
raise ScanError, "Error: [#{pos_start}]: Unterminated string."
|
137
|
+
else
|
138
|
+
literal << substr
|
139
|
+
special = scanner.scan(/["'\\\r\n]/)
|
140
|
+
case special
|
141
|
+
when delimiter # Terminating quote found
|
142
|
+
break
|
143
|
+
when "\r"
|
144
|
+
next_line
|
145
|
+
special << scanner.scan(/./) if scanner.match?(/\n/)
|
146
|
+
literal << special
|
147
|
+
when "\n"
|
148
|
+
next_line
|
149
|
+
literal << special
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
pos = Rley::Lexical::Position.new(line, column_start)
|
154
|
+
Rley::Lexical::Token.new(literal, 'STR_LIT', pos)
|
155
|
+
end
|
156
|
+
|
157
|
+
# Skip non-significant whitespaces and comments.
|
158
|
+
# Advance the scanner until something significant is found.
|
159
|
+
def skip_intertoken_spaces
|
160
|
+
loop do
|
161
|
+
ws_found = scanner.skip(/[ \t\f]+/) ? true : false
|
162
|
+
nl_found = scanner.skip(/(?:\r\n)|\r|\n/)
|
163
|
+
if nl_found
|
164
|
+
ws_found = true
|
165
|
+
next_line
|
166
|
+
end
|
167
|
+
|
168
|
+
break unless ws_found
|
169
|
+
end
|
170
|
+
|
171
|
+
scanner.pos
|
172
|
+
end
|
173
|
+
|
174
|
+
def next_line
|
175
|
+
@lineno += 1
|
176
|
+
@line_start = scanner.pos
|
177
|
+
end
|
178
|
+
end # class
|
179
|
+
end # module
|
180
|
+
end # module
|
@@ -123,6 +123,50 @@ module Rley # This module is used as a namespace
|
|
123
123
|
end
|
124
124
|
return node
|
125
125
|
end
|
126
|
+
|
127
|
+
# Standard method for handling one or more modifier: symbol+
|
128
|
+
# rule('symbol_plus' => 'symbol_plus symbol')
|
129
|
+
# def reduce_base_plus_more(_production, _range, _tokens, theChildren)
|
130
|
+
# theChildren[0] << theChildren[1]
|
131
|
+
# end
|
132
|
+
|
133
|
+
# Standard rule method handling one or more modifier: symbol+
|
134
|
+
# rule('symbol_plus' => 'symbol')
|
135
|
+
# def reduce_base_plus_last(_production, _range, _tokens, theChildren)
|
136
|
+
# [theChildren[0]]
|
137
|
+
# end
|
138
|
+
|
139
|
+
# Implicit rule generated for * modifier
|
140
|
+
# rule('X') => 'X item'.as '_star_more'
|
141
|
+
def reduce__star_more(_production, _range, _tokens, theChildren)
|
142
|
+
theChildren[0] << theChildren[1]
|
143
|
+
theChildren[0]
|
144
|
+
end
|
145
|
+
|
146
|
+
# Implicit rule generated for * modifier
|
147
|
+
# rule('X') => ''.as '_star_none'
|
148
|
+
def reduce__star_none(_production, _range, _tokens, _children)
|
149
|
+
[]
|
150
|
+
end
|
151
|
+
|
152
|
+
# Implicit rule generated for + modifier
|
153
|
+
# rule('X') => 'X item'.as '_plus_more'
|
154
|
+
def reduce__plus_more(_production, _range, _tokens, theChildren)
|
155
|
+
theChildren[0] << theChildren[1]
|
156
|
+
theChildren[0]
|
157
|
+
end
|
158
|
+
|
159
|
+
# Implicit rule generated for + modifier
|
160
|
+
# rule('X') => 'item'.as '_plus_one'
|
161
|
+
def reduce__plus_one(_production, _range, _tokens, theChildren)
|
162
|
+
[theChildren[0]]
|
163
|
+
end
|
164
|
+
|
165
|
+
# Implicit rule generated for + modifier
|
166
|
+
# rule('X') => 'item'.as '_plus_one'
|
167
|
+
def reduce_return_children(_production, _range, _tokens, theChildren)
|
168
|
+
theChildren
|
169
|
+
end
|
126
170
|
end # class
|
127
171
|
end # module
|
128
172
|
end # module
|
@@ -12,11 +12,15 @@ module Rley # This module is used as a namespace
|
|
12
12
|
# the chart is an array with n + 1 entry sets.
|
13
13
|
class GFGChart
|
14
14
|
# @return [Array<ParseEntrySet>] entry sets (one per input token + 1)
|
15
|
-
attr_reader
|
15
|
+
attr_reader :sets
|
16
|
+
|
17
|
+
# @return [Array<Array<Syntax::MatchClosest>>]
|
18
|
+
attr_reader :constraints
|
16
19
|
|
17
20
|
# @param aGFGraph [GFG::GrmFlowGraph] The GFG for the grammar in use.
|
18
21
|
def initialize(aGFGraph)
|
19
22
|
@sets = [ParseEntrySet.new]
|
23
|
+
@constraints = [[]]
|
20
24
|
push_entry(aGFGraph.start_vertex, 0, 0, :start_rule)
|
21
25
|
end
|
22
26
|
|
@@ -42,6 +46,18 @@ module Rley # This module is used as a namespace
|
|
42
46
|
end
|
43
47
|
end
|
44
48
|
|
49
|
+
# if an entry corresponds to dotted item with a constraint
|
50
|
+
# make this constraint active for this index
|
51
|
+
# :before 'IF'
|
52
|
+
# search backwards to find nearest 'IF' scan rule
|
53
|
+
# in n+1, retrieve all items with IF . pattern
|
54
|
+
# create a lambda
|
55
|
+
# for every subsequent push_entry with same index,
|
56
|
+
# the lambda checks the condition (i.e pattern: ELSE . )
|
57
|
+
# if the condition is false, then push new entry
|
58
|
+
# if the condition is true but the consequent is false, then discard push action
|
59
|
+
# consequent: candidate refers to same dotted_item and same origin, then condition is false
|
60
|
+
|
45
61
|
# Push a parse entry for the chart entry with given index
|
46
62
|
# @param anIndex [Integer] The rank of the token in the input stream.
|
47
63
|
# @return [ParseEntry] the passed parse entry if it is pushed
|
@@ -51,14 +67,48 @@ module Rley # This module is used as a namespace
|
|
51
67
|
# puts " anOrigin: #{anOrigin}"
|
52
68
|
# puts " anIndex: #{anIndex}"
|
53
69
|
# puts " _reason: #{_reason}"
|
54
|
-
new_entry = ParseEntry.new(aVertex, anOrigin)
|
55
70
|
if anIndex == sets.size
|
56
|
-
|
57
|
-
|
71
|
+
if reason == :scan_rule
|
72
|
+
add_entry_set
|
73
|
+
else
|
74
|
+
err_msg = "Internal error: unexpected push reason #{reason}"
|
75
|
+
raise StandardError, err_msg
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
reject = false
|
80
|
+
unless constraints[anIndex].empty?
|
81
|
+
constraints[anIndex].each do |ct|
|
82
|
+
case ct
|
83
|
+
when Syntax::MatchClosest
|
84
|
+
not_found = sets[anIndex][0].prev_symbol != aVertex.prev_symbol
|
85
|
+
next if not_found
|
86
|
+
|
87
|
+
some_mismatch = ct.entries.find do |en|
|
88
|
+
(en.vertex.dotted_item.production == aVertex.dotted_item.production) &&
|
89
|
+
(en.origin != anOrigin)
|
90
|
+
end
|
91
|
+
reject = true if some_mismatch
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
|
96
|
+
return nil if reject
|
97
|
+
|
98
|
+
new_entry = ParseEntry.new(aVertex, anOrigin)
|
99
|
+
result = self[anIndex].push_entry(new_entry)
|
58
100
|
|
59
|
-
|
101
|
+
if aVertex.kind_of?(GFG::ItemVertex) && aVertex.dotted_item.constraint
|
102
|
+
ct = aVertex.dotted_item.constraint
|
103
|
+
|
104
|
+
case ct
|
105
|
+
when Syntax::MatchClosest
|
106
|
+
update_match_closest(ct, anIndex)
|
107
|
+
end
|
108
|
+
constraints[anIndex] << ct
|
60
109
|
end
|
61
|
-
|
110
|
+
|
111
|
+
result
|
62
112
|
end
|
63
113
|
|
64
114
|
# Retrieve the first parse entry added to this chart
|
@@ -113,6 +163,25 @@ module Rley # This module is used as a namespace
|
|
113
163
|
end
|
114
164
|
# rubocop: enable Lint/UselessAssignment
|
115
165
|
|
166
|
+
# Retrieve all entries that have a given terminal before the dot.
|
167
|
+
# @param criteria [Hash{Symbol => String}]
|
168
|
+
def search_entries(atIndex, criteria)
|
169
|
+
entries = sets[atIndex].entries
|
170
|
+
keyword = criteria.keys[0]
|
171
|
+
found = []
|
172
|
+
entries.each do |e|
|
173
|
+
case keyword
|
174
|
+
when :before # terminal before dot
|
175
|
+
term_name = criteria[keyword]
|
176
|
+
if e.dotted_entry? && e.vertex.dotted_item.position > -2
|
177
|
+
found << e if e.prev_symbol&.name == term_name
|
178
|
+
end
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
found
|
183
|
+
end
|
184
|
+
|
116
185
|
# @ return [String] A human-readable representation of the chart.
|
117
186
|
def to_s
|
118
187
|
result = +''
|
@@ -130,6 +199,32 @@ module Rley # This module is used as a namespace
|
|
130
199
|
|
131
200
|
def add_entry_set
|
132
201
|
@sets << ParseEntrySet.new
|
202
|
+
@constraints << []
|
203
|
+
end
|
204
|
+
|
205
|
+
def update_match_closest(aConstraint, anIndex)
|
206
|
+
# Locate in the chart the closest matching terminal...
|
207
|
+
i = anIndex - 1
|
208
|
+
loop do
|
209
|
+
first_entry = sets[i][0]
|
210
|
+
prev_symbol = first_entry.prev_symbol
|
211
|
+
break if prev_symbol.name == aConstraint.closest_symb
|
212
|
+
|
213
|
+
i -= 1
|
214
|
+
break if i.negative?
|
215
|
+
end
|
216
|
+
|
217
|
+
# Retrieve all entries of the kind: closest_symb .
|
218
|
+
if i.positive?
|
219
|
+
entries = sets[i].entries.select do |en|
|
220
|
+
if en.prev_symbol
|
221
|
+
en.prev_symbol.name == aConstraint.closest_symb
|
222
|
+
else
|
223
|
+
false
|
224
|
+
end
|
225
|
+
end
|
226
|
+
aConstraint.entries = entries
|
227
|
+
end
|
133
228
|
end
|
134
229
|
end # class
|
135
230
|
end # module
|