rley 0.7.07 → 0.7.08
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +348 -54
- data/LICENSE.txt +1 -1
- data/README.md +3 -2
- data/examples/NLP/engtagger.rb +193 -190
- data/examples/NLP/nano_eng/nano_grammar.rb +5 -5
- data/examples/data_formats/JSON/cli_options.rb +1 -1
- data/examples/data_formats/JSON/json_ast_builder.rb +12 -9
- data/examples/data_formats/JSON/json_ast_nodes.rb +12 -21
- data/examples/data_formats/JSON/json_grammar.rb +2 -2
- data/examples/data_formats/JSON/json_lexer.rb +8 -8
- data/examples/data_formats/JSON/json_minifier.rb +1 -1
- data/examples/general/calc_iter1/calc_ast_builder.rb +13 -10
- data/examples/general/calc_iter1/calc_ast_nodes.rb +23 -37
- data/examples/general/calc_iter1/calc_grammar.rb +2 -2
- data/examples/general/calc_iter1/calc_lexer.rb +6 -4
- data/examples/general/calc_iter1/spec/calculator_spec.rb +5 -5
- data/examples/general/calc_iter2/calc_ast_builder.rb +5 -3
- data/examples/general/calc_iter2/calc_ast_nodes.rb +27 -43
- data/examples/general/calc_iter2/calc_grammar.rb +3 -3
- data/examples/general/calc_iter2/calc_lexer.rb +11 -10
- data/examples/general/calc_iter2/spec/calculator_spec.rb +26 -26
- data/examples/general/left.rb +2 -2
- data/examples/general/right.rb +2 -2
- data/lib/rley/base/dotted_item.rb +23 -31
- data/lib/rley/constants.rb +2 -2
- data/lib/rley/engine.rb +20 -23
- data/lib/rley/formatter/asciitree.rb +3 -3
- data/lib/rley/formatter/bracket_notation.rb +1 -8
- data/lib/rley/formatter/debug.rb +6 -6
- data/lib/rley/formatter/json.rb +2 -2
- data/lib/rley/gfg/call_edge.rb +1 -1
- data/lib/rley/gfg/edge.rb +5 -5
- data/lib/rley/gfg/end_vertex.rb +2 -6
- data/lib/rley/gfg/epsilon_edge.rb +1 -5
- data/lib/rley/gfg/grm_flow_graph.rb +27 -23
- data/lib/rley/gfg/item_vertex.rb +10 -10
- data/lib/rley/gfg/non_terminal_vertex.rb +4 -4
- data/lib/rley/gfg/scan_edge.rb +1 -1
- data/lib/rley/gfg/shortcut_edge.rb +2 -2
- data/lib/rley/gfg/start_vertex.rb +4 -8
- data/lib/rley/gfg/vertex.rb +43 -39
- data/lib/rley/lexical/token_range.rb +6 -6
- data/lib/rley/parse_forest_visitor.rb +5 -5
- data/lib/rley/parse_rep/ast_base_builder.rb +9 -11
- data/lib/rley/parse_rep/cst_builder.rb +5 -6
- data/lib/rley/parse_rep/parse_forest_builder.rb +20 -18
- data/lib/rley/parse_rep/parse_forest_factory.rb +3 -3
- data/lib/rley/parse_rep/parse_rep_creator.rb +11 -13
- data/lib/rley/parse_rep/parse_tree_builder.rb +4 -4
- data/lib/rley/parse_rep/parse_tree_factory.rb +27 -27
- data/lib/rley/parse_tree_visitor.rb +1 -1
- data/lib/rley/parser/error_reason.rb +4 -5
- data/lib/rley/parser/gfg_chart.rb +20 -22
- data/lib/rley/parser/gfg_parsing.rb +16 -30
- data/lib/rley/parser/parse_entry.rb +25 -31
- data/lib/rley/parser/parse_entry_set.rb +18 -15
- data/lib/rley/parser/parse_entry_tracker.rb +4 -4
- data/lib/rley/parser/parse_state.rb +16 -21
- data/lib/rley/parser/parse_state_tracker.rb +4 -4
- data/lib/rley/parser/parse_tracer.rb +13 -13
- data/lib/rley/parser/parse_walker_factory.rb +23 -28
- data/lib/rley/parser/state_set.rb +9 -10
- data/lib/rley/ptree/non_terminal_node.rb +7 -5
- data/lib/rley/ptree/parse_tree.rb +3 -3
- data/lib/rley/ptree/parse_tree_node.rb +5 -5
- data/lib/rley/ptree/terminal_node.rb +7 -7
- data/lib/rley/rley_error.rb +12 -12
- data/lib/rley/sppf/alternative_node.rb +6 -6
- data/lib/rley/sppf/composite_node.rb +7 -7
- data/lib/rley/sppf/epsilon_node.rb +3 -3
- data/lib/rley/sppf/leaf_node.rb +3 -3
- data/lib/rley/sppf/parse_forest.rb +16 -16
- data/lib/rley/sppf/sppf_node.rb +7 -8
- data/lib/rley/sppf/token_node.rb +3 -3
- data/lib/rley/syntax/grammar.rb +5 -5
- data/lib/rley/syntax/grammar_builder.rb +9 -9
- data/lib/rley/syntax/grm_symbol.rb +6 -6
- data/lib/rley/syntax/non_terminal.rb +9 -15
- data/lib/rley/syntax/production.rb +10 -10
- data/lib/rley/syntax/symbol_seq.rb +7 -9
- data/lib/rley/syntax/terminal.rb +4 -5
- data/lib/rley/syntax/verbatim_symbol.rb +3 -3
- data/lib/support/base_tokenizer.rb +19 -18
- data/spec/rley/base/dotted_item_spec.rb +2 -2
- data/spec/rley/engine_spec.rb +17 -15
- data/spec/rley/formatter/asciitree_spec.rb +7 -7
- data/spec/rley/formatter/bracket_notation_spec.rb +13 -13
- data/spec/rley/formatter/json_spec.rb +1 -1
- data/spec/rley/gfg/end_vertex_spec.rb +5 -5
- data/spec/rley/gfg/item_vertex_spec.rb +10 -10
- data/spec/rley/gfg/non_terminal_vertex_spec.rb +3 -3
- data/spec/rley/gfg/shortcut_edge_spec.rb +1 -1
- data/spec/rley/gfg/start_vertex_spec.rb +5 -5
- data/spec/rley/gfg/vertex_spec.rb +3 -3
- data/spec/rley/lexical/token_range_spec.rb +16 -16
- data/spec/rley/lexical/token_spec.rb +2 -2
- data/spec/rley/parse_forest_visitor_spec.rb +165 -163
- data/spec/rley/parse_rep/ambiguous_parse_spec.rb +44 -44
- data/spec/rley/parse_rep/ast_builder_spec.rb +6 -6
- data/spec/rley/parse_rep/cst_builder_spec.rb +5 -5
- data/spec/rley/parse_rep/groucho_spec.rb +21 -21
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +26 -26
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +6 -6
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +2 -2
- data/spec/rley/parse_tree_visitor_spec.rb +10 -8
- data/spec/rley/parser/error_reason_spec.rb +6 -6
- data/spec/rley/parser/gfg_earley_parser_spec.rb +4 -2
- data/spec/rley/parser/gfg_parsing_spec.rb +4 -8
- data/spec/rley/parser/parse_entry_spec.rb +19 -19
- data/spec/rley/parser/parse_state_spec.rb +5 -5
- data/spec/rley/parser/parse_walker_factory_spec.rb +1 -1
- data/spec/rley/parser/state_set_spec.rb +22 -22
- data/spec/rley/ptree/non_terminal_node_spec.rb +5 -3
- data/spec/rley/ptree/parse_tree_node_spec.rb +4 -4
- data/spec/rley/ptree/terminal_node_spec.rb +6 -6
- data/spec/rley/sppf/alternative_node_spec.rb +6 -6
- data/spec/rley/sppf/non_terminal_node_spec.rb +3 -3
- data/spec/rley/sppf/token_node_spec.rb +4 -4
- data/spec/rley/support/ambiguous_grammar_helper.rb +3 -4
- data/spec/rley/support/grammar_abc_helper.rb +2 -4
- data/spec/rley/support/grammar_ambig01_helper.rb +4 -5
- data/spec/rley/support/grammar_arr_int_helper.rb +4 -5
- data/spec/rley/support/grammar_b_expr_helper.rb +4 -5
- data/spec/rley/support/grammar_l0_helper.rb +10 -11
- data/spec/rley/support/grammar_pb_helper.rb +6 -5
- data/spec/rley/support/grammar_sppf_helper.rb +1 -1
- data/spec/rley/syntax/grammar_builder_spec.rb +5 -5
- data/spec/rley/syntax/grammar_spec.rb +6 -6
- data/spec/rley/syntax/grm_symbol_spec.rb +1 -1
- data/spec/rley/syntax/non_terminal_spec.rb +8 -8
- data/spec/rley/syntax/production_spec.rb +13 -13
- data/spec/rley/syntax/symbol_seq_spec.rb +2 -2
- data/spec/rley/syntax/terminal_spec.rb +5 -5
- data/spec/rley/syntax/verbatim_symbol_spec.rb +1 -1
- data/spec/spec_helper.rb +0 -12
- data/spec/support/base_tokenizer_spec.rb +7 -2
- metadata +21 -62
- data/.simplecov +0 -8
data/examples/general/left.rb
CHANGED
@@ -8,10 +8,10 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
8
8
|
# The grammar defines a language that consists in a sequence
|
9
9
|
# of 0 or more dots...
|
10
10
|
add_terminals('DOT')
|
11
|
-
|
11
|
+
|
12
12
|
# Grammar with left recursive rule.
|
13
13
|
rule 'l_dots' => []
|
14
|
-
rule 'l_dots' => %w[l_dots DOT]
|
14
|
+
rule 'l_dots' => %w[l_dots DOT]
|
15
15
|
end
|
16
16
|
|
17
17
|
# And now, let's build the grammar...
|
data/examples/general/right.rb
CHANGED
@@ -8,10 +8,10 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
8
8
|
# The grammar defines a language that consists in a sequence
|
9
9
|
# of 0 or more dots...
|
10
10
|
add_terminals('DOT')
|
11
|
-
|
11
|
+
|
12
12
|
# Grammar with right recursive rule.
|
13
13
|
rule 'r_dots' => []
|
14
|
-
rule 'r_dots' => %w[DOT r_dots]
|
14
|
+
rule 'r_dots' => %w[DOT r_dots]
|
15
15
|
end
|
16
16
|
|
17
17
|
# And now, let's build the grammar...
|
@@ -19,14 +19,14 @@ module Rley # This module is used as a namespace
|
|
19
19
|
class DottedItem
|
20
20
|
# Production rule
|
21
21
|
# @return [Syntax::Production]
|
22
|
-
attr_reader
|
22
|
+
attr_reader :production
|
23
23
|
|
24
24
|
# Index of the next symbol (from the rhs) after the 'dot'.
|
25
25
|
# If the dot is at the end of the rhs (i.e.) there is no next
|
26
26
|
# symbol, then the position takes the value -1.
|
27
27
|
# It the rhs is empty, then the position is -2
|
28
28
|
# @return [Integer]
|
29
|
-
attr_reader
|
29
|
+
attr_reader :position
|
30
30
|
|
31
31
|
# @param aProduction [Syntax::Production]
|
32
32
|
# @param aPosition [Integer] Position of the dot in rhs of production.
|
@@ -37,7 +37,7 @@ module Rley # This module is used as a namespace
|
|
37
37
|
|
38
38
|
# Return a String representation of the dotted item.
|
39
39
|
# @return [String]
|
40
|
-
def to_s
|
40
|
+
def to_s
|
41
41
|
prefix = "#{production.lhs} => "
|
42
42
|
text_values = production.rhs.map(&:to_s)
|
43
43
|
if position.negative?
|
@@ -47,13 +47,13 @@ module Rley # This module is used as a namespace
|
|
47
47
|
end
|
48
48
|
suffix = text_values.join(' ')
|
49
49
|
|
50
|
-
|
50
|
+
prefix + suffix
|
51
51
|
end
|
52
52
|
|
53
53
|
# Return true if the dot position is at the start of the rhs.
|
54
54
|
# @return [Boolean]
|
55
|
-
def at_start?
|
56
|
-
|
55
|
+
def at_start?
|
56
|
+
position.zero? || position == -2
|
57
57
|
end
|
58
58
|
|
59
59
|
# An item with the dot at the beginning is called
|
@@ -62,41 +62,35 @@ module Rley # This module is used as a namespace
|
|
62
62
|
|
63
63
|
# A dotted item is called a reduce item if the dot is at the end.
|
64
64
|
# @return [Boolean]
|
65
|
-
def reduce_item?
|
66
|
-
|
65
|
+
def reduce_item?
|
66
|
+
position.negative? # Either -1 or -2
|
67
67
|
end
|
68
68
|
|
69
69
|
# The non-terminal symbol that is on the left-side of the production
|
70
70
|
# @return [Syntax::NonTerminal]
|
71
|
-
def lhs
|
72
|
-
|
71
|
+
def lhs
|
72
|
+
production.lhs
|
73
73
|
end
|
74
74
|
|
75
75
|
# Return the symbol before the dot.
|
76
76
|
# nil is returned if the dot is at the start of the rhs
|
77
77
|
# @return [Syntax::GrmSymbol, NilClass]
|
78
|
-
def prev_symbol
|
78
|
+
def prev_symbol
|
79
79
|
before_position = prev_position
|
80
|
-
|
81
|
-
nil
|
82
|
-
else
|
83
|
-
production.rhs[before_position]
|
84
|
-
end
|
85
|
-
|
86
|
-
return result
|
80
|
+
before_position.nil? ? nil : production.rhs[before_position]
|
87
81
|
end
|
88
82
|
|
89
83
|
# Return the symbol after the dot.
|
90
84
|
# nil is returned if the dot is at the end
|
91
85
|
# @return [Syntax::GrmSymbol, NilClass]
|
92
|
-
def next_symbol
|
93
|
-
|
86
|
+
def next_symbol
|
87
|
+
position.negative? ? nil : production.rhs[position]
|
94
88
|
end
|
95
89
|
|
96
90
|
# Calculate the position of the dot if were moved by
|
97
91
|
# one step on the left.
|
98
92
|
# @return [Integer]
|
99
|
-
def prev_position
|
93
|
+
def prev_position
|
100
94
|
unless @k_prev_position
|
101
95
|
case position
|
102
96
|
when -2, 0
|
@@ -122,7 +116,7 @@ module Rley # This module is used as a namespace
|
|
122
116
|
to_the_left = prev_position
|
123
117
|
return false if to_the_left.nil?
|
124
118
|
|
125
|
-
|
119
|
+
to_the_left == another.position
|
126
120
|
end
|
127
121
|
|
128
122
|
|
@@ -135,15 +129,13 @@ module Rley # This module is used as a namespace
|
|
135
129
|
raise StandardError, 'Out of bound index'
|
136
130
|
end
|
137
131
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
return index
|
132
|
+
if rhs_size.zero?
|
133
|
+
-2 # Minus 2 at start/end of empty production
|
134
|
+
elsif aPosition == rhs_size
|
135
|
+
-1 # Minus 1 at end of non-empty production
|
136
|
+
else
|
137
|
+
aPosition
|
138
|
+
end
|
147
139
|
end
|
148
140
|
end # class
|
149
141
|
end # module
|
data/lib/rley/constants.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
module Rley # Module used as a namespace
|
7
7
|
# The version number of the gem.
|
8
|
-
Version = '0.7.
|
8
|
+
Version = '0.7.08'
|
9
9
|
|
10
10
|
# Brief description of the gem.
|
11
11
|
Description = "Ruby implementation of the Earley's parsing algorithm"
|
@@ -20,7 +20,7 @@ module Rley # Module used as a namespace
|
|
20
20
|
RootDir = begin
|
21
21
|
require 'pathname' # Load Pathname class from standard library
|
22
22
|
startdir = Pathname(__FILE__).dirname.parent.parent.expand_path
|
23
|
-
startdir
|
23
|
+
"#{startdir}/" # Append trailing slash character to it
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end # module
|
data/lib/rley/engine.rb
CHANGED
@@ -15,15 +15,14 @@ module Rley # This module is used as a namespace
|
|
15
15
|
# @return [Symbol] allowed values are: :parse_tree, :parse_forest
|
16
16
|
:parse_repr,
|
17
17
|
:repr_builder,
|
18
|
-
:diagnose
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
end
|
18
|
+
:diagnose) do
|
19
|
+
# Constructor with default initialization.
|
20
|
+
def initialize
|
21
|
+
super()
|
22
|
+
self.parse_repr = :parse_tree
|
23
|
+
self.repr_builder = :default
|
24
|
+
self.diagnose = false
|
25
|
+
end
|
27
26
|
end
|
28
27
|
|
29
28
|
# Implementation of the GoF Facade design pattern.
|
@@ -43,7 +42,7 @@ module Rley # This module is used as a namespace
|
|
43
42
|
# Engine.new do |config|
|
44
43
|
# config.parse_repr = :parse_forest
|
45
44
|
# end
|
46
|
-
def initialize
|
45
|
+
def initialize
|
47
46
|
@configuration = EngineConfig.new
|
48
47
|
yield configuration if block_given?
|
49
48
|
end
|
@@ -91,7 +90,7 @@ module Rley # This module is used as a namespace
|
|
91
90
|
result = parser.parse(tokens)
|
92
91
|
result.tidy_up!
|
93
92
|
|
94
|
-
|
93
|
+
result
|
95
94
|
end
|
96
95
|
|
97
96
|
# Convert raw parse result into a more convenient representation
|
@@ -99,14 +98,12 @@ module Rley # This module is used as a namespace
|
|
99
98
|
# @param aRawParse [Parser::GFGParsing]
|
100
99
|
# @return [Rley::PTree::ParseTree, Rley::SPPF::ParseForest]
|
101
100
|
def convert(aRawParse)
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
return result
|
101
|
+
case configuration.parse_repr
|
102
|
+
when :parse_tree
|
103
|
+
to_ptree(aRawParse)
|
104
|
+
when :parse_forest
|
105
|
+
to_pforest(aRawParse)
|
106
|
+
end
|
110
107
|
end
|
111
108
|
|
112
109
|
# Convert raw parse result into a parse tree representation
|
@@ -120,7 +117,7 @@ module Rley # This module is used as a namespace
|
|
120
117
|
result = factory.create(configuration.repr_builder)
|
121
118
|
end
|
122
119
|
|
123
|
-
|
120
|
+
result
|
124
121
|
end
|
125
122
|
|
126
123
|
# Convert raw parse result into a parse forest representation
|
@@ -134,7 +131,7 @@ module Rley # This module is used as a namespace
|
|
134
131
|
result = factory.create(configuration.repr_builder)
|
135
132
|
end
|
136
133
|
|
137
|
-
|
134
|
+
result
|
138
135
|
end
|
139
136
|
|
140
137
|
# Build a visitor for the given parse tree
|
@@ -148,13 +145,13 @@ module Rley # This module is used as a namespace
|
|
148
145
|
# @param aPForest [SPPF::ParseForest]
|
149
146
|
# @return [ParseForestVisitor]
|
150
147
|
def pforest_visitor(aPForest)
|
151
|
-
|
148
|
+
ParseForestVisitor.new(aPForest)
|
152
149
|
end
|
153
150
|
|
154
151
|
protected
|
155
152
|
|
156
153
|
def build_parser(aGrammar)
|
157
|
-
|
154
|
+
Parser::GFGEarleyParser.new(aGrammar)
|
158
155
|
end
|
159
156
|
end # class
|
160
157
|
end # module
|
@@ -15,7 +15,7 @@ module Rley # This module is used as a namespace
|
|
15
15
|
# Allowed string values are: 'first', 'last', 'first_and_last', 'other'
|
16
16
|
attr_reader(:ranks)
|
17
17
|
|
18
|
-
# @return [String] The character pattern used for rendering
|
18
|
+
# @return [String] The character pattern used for rendering
|
19
19
|
# a parent - child nesting
|
20
20
|
attr_reader(:nesting_prefix)
|
21
21
|
|
@@ -99,7 +99,7 @@ module Rley # This module is used as a namespace
|
|
99
99
|
end
|
100
100
|
|
101
101
|
# 'root', 'first', 'first_and_last', 'last', 'other'
|
102
|
-
def path_prefix
|
102
|
+
def path_prefix
|
103
103
|
return '' if ranks.empty?
|
104
104
|
|
105
105
|
prefix = +''
|
@@ -116,7 +116,7 @@ module Rley # This module is used as a namespace
|
|
116
116
|
end
|
117
117
|
|
118
118
|
prefix << nesting_prefix
|
119
|
-
|
119
|
+
prefix
|
120
120
|
end
|
121
121
|
|
122
122
|
def emit(aNode, aSuffix = '')
|
@@ -13,13 +13,6 @@ module Rley # This module is used as a namespace
|
|
13
13
|
# For Ruby developers, there is RSyntaxTree by Yoichiro Hasebe.
|
14
14
|
# (accessible via: http://yohasebe.com/rsyntaxtree/)
|
15
15
|
class BracketNotation < BaseFormatter
|
16
|
-
# Constructor.
|
17
|
-
# @param anIO [IO] The output stream to which the rendered grammar
|
18
|
-
# is written.
|
19
|
-
def initialize(anIO)
|
20
|
-
super(anIO)
|
21
|
-
end
|
22
|
-
|
23
16
|
# Method called by a ParseTreeVisitor to which the formatter subscribed.
|
24
17
|
# Notification of a visit event: the visitor is about to visit
|
25
18
|
# a non-terminal node
|
@@ -44,7 +37,7 @@ module Rley # This module is used as a namespace
|
|
44
37
|
# Escape all opening and closing square brackets
|
45
38
|
escape_lbrackets = aTerm.token.lexeme.gsub(/\[/, '\[')
|
46
39
|
escaped = escape_lbrackets.gsub(/\]/, '\]')
|
47
|
-
write(escaped
|
40
|
+
write("#{escaped}]")
|
48
41
|
end
|
49
42
|
|
50
43
|
# Method called by a ParseTreeVisitor to which the formatter subscribed.
|
data/lib/rley/formatter/debug.rb
CHANGED
@@ -19,17 +19,17 @@ module Rley # This module is used as a namespace
|
|
19
19
|
super(anIO)
|
20
20
|
@indentation = 0
|
21
21
|
end
|
22
|
-
|
22
|
+
|
23
23
|
# Indicates that this formatter accepts all visit events
|
24
24
|
# provided their names start with 'before_' or 'after_'
|
25
25
|
# @return [Boolean]
|
26
26
|
def accept_all
|
27
27
|
return true
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
# Ghost method pattern.
|
31
|
-
def method_missing(mth, *args)
|
32
|
-
mth_name = mth.to_s
|
31
|
+
def method_missing(mth, *args)
|
32
|
+
mth_name = mth.to_s
|
33
33
|
case mth_name
|
34
34
|
when /^before_/
|
35
35
|
output_event(mth_name, indentation)
|
@@ -44,11 +44,11 @@ module Rley # This module is used as a namespace
|
|
44
44
|
|
45
45
|
private
|
46
46
|
|
47
|
-
def indent
|
47
|
+
def indent
|
48
48
|
@indentation += 1
|
49
49
|
end
|
50
50
|
|
51
|
-
def dedent
|
51
|
+
def dedent
|
52
52
|
@indentation -= 1
|
53
53
|
end
|
54
54
|
|
data/lib/rley/formatter/json.rb
CHANGED
data/lib/rley/gfg/call_edge.rb
CHANGED
@@ -16,7 +16,7 @@ module Rley # This module is used as a namespace
|
|
16
16
|
# Pre-condition: theSuccessor is an StartVertex
|
17
17
|
def initialize(thePredecessor, theSuccessor)
|
18
18
|
super(thePredecessor, theSuccessor)
|
19
|
-
do_set_key(thePredecessor, theSuccessor)
|
19
|
+
do_set_key(thePredecessor, theSuccessor)
|
20
20
|
end
|
21
21
|
|
22
22
|
private
|
data/lib/rley/gfg/edge.rb
CHANGED
@@ -14,18 +14,18 @@ module Rley # This module is used as a namespace
|
|
14
14
|
# @param theSuccessor [Vertex]
|
15
15
|
def initialize(thePredecessor, theSuccessor)
|
16
16
|
@successor = theSuccessor
|
17
|
-
thePredecessor
|
17
|
+
thePredecessor&.add_edge(self)
|
18
18
|
end
|
19
19
|
|
20
20
|
# @return [String]
|
21
|
-
def to_s
|
21
|
+
def to_s
|
22
22
|
" --> #{successor.label}"
|
23
23
|
end
|
24
|
-
|
25
|
-
# Returns a string containing a human-readable representation of the
|
24
|
+
|
25
|
+
# Returns a string containing a human-readable representation of the
|
26
26
|
# production.
|
27
27
|
# @return [String]
|
28
|
-
def inspect
|
28
|
+
def inspect
|
29
29
|
to_s
|
30
30
|
end
|
31
31
|
end # class
|
data/lib/rley/gfg/end_vertex.rb
CHANGED
@@ -10,12 +10,8 @@ module Rley # This module is used as a namespace
|
|
10
10
|
# Responsibilities (in addition to inherited ones):
|
11
11
|
# - Know its related non-terminal symbol
|
12
12
|
class EndVertex < NonTerminalVertex
|
13
|
-
def
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
def label()
|
18
|
-
return "#{non_terminal}."
|
13
|
+
def label
|
14
|
+
"#{non_terminal}."
|
19
15
|
end
|
20
16
|
end # class
|
21
17
|
end # module
|
@@ -9,12 +9,8 @@ module Rley # This module is used as a namespace
|
|
9
9
|
# Responsibilities:
|
10
10
|
# - To know the successor vertex
|
11
11
|
class EpsilonEdge < Edge
|
12
|
-
# The destination vertex of the edge
|
12
|
+
# The destination vertex of the edge.
|
13
13
|
attr_reader :successor
|
14
|
-
|
15
|
-
def initialize(thePredecessor, theSuccessor)
|
16
|
-
super(thePredecessor, theSuccessor)
|
17
|
-
end
|
18
14
|
end # class
|
19
15
|
end # module
|
20
16
|
end # module
|
@@ -13,26 +13,26 @@ require_relative 'shortcut_edge'
|
|
13
13
|
module Rley # This module is used as a namespace
|
14
14
|
module GFG # This module is used as a namespace
|
15
15
|
# A Grammar Flow Graph (GFG) represents the parsing states of productions
|
16
|
-
# rules from a context-free grammar. This representation is based on a
|
17
|
-
# directed graph structure. The parsing process can then be re-formulated
|
16
|
+
# rules from a context-free grammar. This representation is based on a
|
17
|
+
# directed graph structure. The parsing process can then be re-formulated
|
18
18
|
# as a path problem in the graph. The theory behind GFGs can be found in
|
19
19
|
# papers. The first article on GFG can be found here:
|
20
20
|
# https://apps.cs.utexas.edu/tech_reports/reports/tr/TR-2102.pdf
|
21
|
-
# There are three types of vertex in a GFG:
|
21
|
+
# There are three types of vertex in a GFG:
|
22
22
|
# start vertex, end vertex and item vertex.
|
23
23
|
# For each non-terminal symbol N of the grammar, there is:
|
24
24
|
# a start vertex with label '.N'
|
25
25
|
# an end vertex with label 'N.'
|
26
26
|
# For each production rule of the grammar:
|
27
27
|
# N => s1 s2 s3 (...) sk
|
28
|
-
# I.e. a rule with k grammar symbols in its right-handed side.
|
28
|
+
# I.e. a rule with k grammar symbols in its right-handed side.
|
29
29
|
# For such a rule there will be k + 1 item vertices. By convention,
|
30
30
|
# the first item vertex is labelled as 'N => . s1 s2 s3 (...) sk'
|
31
31
|
# the second item vertex is labelled as 'N => s1 . s2 s3 (...) sk'
|
32
32
|
# the third item vertex is labelled as 'N => s1 s2 . s3 (...) sk'
|
33
33
|
# and so on. In other words, the labels are obtained by moving a dot
|
34
|
-
# in successive positions in the rhs. The dot represents the
|
35
|
-
# parse progress for the production rule. Symbols on the left of the
|
34
|
+
# in successive positions in the rhs. The dot represents the
|
35
|
+
# parse progress for the production rule. Symbols on the left of the
|
36
36
|
# dot represent the symbols that were successfully matched in the input.
|
37
37
|
# A GFG has three types of directed edges linking the vertices.
|
38
38
|
# call edge, return edge and scan edge.
|
@@ -60,24 +60,24 @@ module Rley # This module is used as a namespace
|
|
60
60
|
|
61
61
|
build_graph(theDottedItems)
|
62
62
|
end
|
63
|
-
|
64
|
-
# Returns a string containing a human-readable representation of the
|
63
|
+
|
64
|
+
# Returns a string containing a human-readable representation of the
|
65
65
|
# production.
|
66
66
|
# @return [String]
|
67
|
-
def inspect
|
67
|
+
def inspect
|
68
68
|
result = +"#<#{self.class.name}:#{object_id}"
|
69
69
|
result << ' @vertices=['
|
70
70
|
list = vertices.map { |v| "#<#{v.selfie}>" }
|
71
71
|
result << list.join(', ')
|
72
72
|
result << '] '
|
73
73
|
edges = []
|
74
|
-
vertices.each do |v|
|
74
|
+
vertices.each do |v|
|
75
75
|
edges << v.edges do |e|
|
76
76
|
result << "#{v.object_id} #{e.inspect}"
|
77
77
|
end
|
78
78
|
end
|
79
79
|
result << "edges=[#{edges.join(",\n ")}]>"
|
80
|
-
|
80
|
+
result
|
81
81
|
end
|
82
82
|
|
83
83
|
# Retrieve the vertex with given vertex label.
|
@@ -92,7 +92,7 @@ module Rley # This module is used as a namespace
|
|
92
92
|
# If one wants to remove useless rules, then do first:
|
93
93
|
# elimination of non-generating symbols
|
94
94
|
# then elimination of unreachable symbols
|
95
|
-
def diagnose
|
95
|
+
def diagnose
|
96
96
|
mark_unreachable_symbols
|
97
97
|
end
|
98
98
|
|
@@ -121,6 +121,7 @@ module Rley # This module is used as a namespace
|
|
121
121
|
# @param aStartVertex [StartVertex] the depth-first traversal begins
|
122
122
|
# from here
|
123
123
|
# @param _visitAction [Proc] block called when a new graph vertex is found
|
124
|
+
# rubocop: disable Lint/Loop
|
124
125
|
def traverse_df(aStartVertex, &_visitAction)
|
125
126
|
visited = Set.new
|
126
127
|
stack = []
|
@@ -129,13 +130,13 @@ module Rley # This module is used as a namespace
|
|
129
130
|
|
130
131
|
begin
|
131
132
|
# print_vertex( 'Traversing', visitee)
|
132
|
-
|
133
|
+
|
133
134
|
first_time = !visited.include?(visitee)
|
134
135
|
if first_time
|
135
136
|
yield(visitee)
|
136
137
|
visited << visitee
|
137
|
-
end
|
138
|
-
|
138
|
+
end
|
139
|
+
|
139
140
|
case visitee
|
140
141
|
when Rley::GFG::StartVertex
|
141
142
|
if first_time
|
@@ -155,12 +156,12 @@ module Rley # This module is used as a namespace
|
|
155
156
|
if stack.last.done?
|
156
157
|
popped = stack.pop
|
157
158
|
break if stack.empty?
|
158
|
-
|
159
|
+
|
159
160
|
# puts "Popped!"
|
160
161
|
return_key = popped.in_edge.key.sub(/^CALL/, 'RET')
|
161
162
|
curr_edge = visitee.edges.find { |e| e.key == return_key }
|
162
163
|
else
|
163
|
-
curr_edge = stack.last.next_edge
|
164
|
+
curr_edge = stack.last.next_edge
|
164
165
|
end
|
165
166
|
|
166
167
|
else
|
@@ -173,6 +174,7 @@ module Rley # This module is used as a namespace
|
|
173
174
|
last_one = end_vertex_for[aStartVertex.non_terminal]
|
174
175
|
yield(last_one) unless visited.include?(last_one)
|
175
176
|
end
|
177
|
+
# rubocop: enable Lint/Loop
|
176
178
|
|
177
179
|
private
|
178
180
|
|
@@ -183,16 +185,16 @@ module Rley # This module is used as a namespace
|
|
183
185
|
@start_vertex = aVertex if vertices.empty?
|
184
186
|
vertices << aVertex
|
185
187
|
end
|
186
|
-
|
188
|
+
|
187
189
|
# For debugging purposes
|
188
190
|
def print_vertex(aText, aVertex)
|
189
|
-
print aText
|
191
|
+
print "#{aText} "
|
190
192
|
if aVertex.kind_of?(NonTerminalVertex)
|
191
193
|
puts "#{aVertex.class} #{aVertex.non_terminal.name}"
|
192
194
|
else
|
193
195
|
p(aVertex.label)
|
194
|
-
end
|
195
|
-
end
|
196
|
+
end
|
197
|
+
end
|
196
198
|
|
197
199
|
def build_graph(theDottedItems)
|
198
200
|
build_all_starts_ends(theDottedItems)
|
@@ -200,7 +202,7 @@ module Rley # This module is used as a namespace
|
|
200
202
|
curr_prod = nil
|
201
203
|
theDottedItems.each_with_index do |d_item, index_item|
|
202
204
|
next unless curr_prod.nil? || curr_prod != d_item.production
|
203
|
-
|
205
|
+
|
204
206
|
# Another production found...
|
205
207
|
curr_prod = d_item.production
|
206
208
|
if curr_prod.empty?
|
@@ -268,6 +270,7 @@ module Rley # This module is used as a namespace
|
|
268
270
|
# add a shortcut edge:
|
269
271
|
# ( N => α[1] .A α[n] ) -> ( N => α[1] A. α[n] )
|
270
272
|
def augment_graph(theDottedItems, firstItemPos)
|
273
|
+
# rubocop: disable Lint/RedundantSafeNavigation
|
271
274
|
production = theDottedItems[firstItemPos].production
|
272
275
|
max_index = production.rhs.size + 1
|
273
276
|
prev_vertex = nil
|
@@ -298,6 +301,7 @@ module Rley # This module is used as a namespace
|
|
298
301
|
prev_vertex = new_vertex
|
299
302
|
end
|
300
303
|
end
|
304
|
+
# rubocop: enable Lint/RedundantSafeNavigation
|
301
305
|
|
302
306
|
# Create an entry edge for the given vertex
|
303
307
|
def build_entry_edge(theVertex)
|
@@ -359,7 +363,7 @@ module Rley # This module is used as a namespace
|
|
359
363
|
# Mark non-terminal symbols that cannot be derived from the start symbol.
|
360
364
|
# In a GFG, a non-terminal symbol N is unreachable if there is no path
|
361
365
|
# from the start symbol to the start node .N
|
362
|
-
def mark_unreachable_symbols
|
366
|
+
def mark_unreachable_symbols
|
363
367
|
# Mark all non-terminals as unreachable
|
364
368
|
start_vertex_for.each_value do |a_vertex|
|
365
369
|
a_vertex.non_terminal.unreachable = true
|