rley 0.7.07 → 0.7.08
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +348 -54
- data/LICENSE.txt +1 -1
- data/README.md +3 -2
- data/examples/NLP/engtagger.rb +193 -190
- data/examples/NLP/nano_eng/nano_grammar.rb +5 -5
- data/examples/data_formats/JSON/cli_options.rb +1 -1
- data/examples/data_formats/JSON/json_ast_builder.rb +12 -9
- data/examples/data_formats/JSON/json_ast_nodes.rb +12 -21
- data/examples/data_formats/JSON/json_grammar.rb +2 -2
- data/examples/data_formats/JSON/json_lexer.rb +8 -8
- data/examples/data_formats/JSON/json_minifier.rb +1 -1
- data/examples/general/calc_iter1/calc_ast_builder.rb +13 -10
- data/examples/general/calc_iter1/calc_ast_nodes.rb +23 -37
- data/examples/general/calc_iter1/calc_grammar.rb +2 -2
- data/examples/general/calc_iter1/calc_lexer.rb +6 -4
- data/examples/general/calc_iter1/spec/calculator_spec.rb +5 -5
- data/examples/general/calc_iter2/calc_ast_builder.rb +5 -3
- data/examples/general/calc_iter2/calc_ast_nodes.rb +27 -43
- data/examples/general/calc_iter2/calc_grammar.rb +3 -3
- data/examples/general/calc_iter2/calc_lexer.rb +11 -10
- data/examples/general/calc_iter2/spec/calculator_spec.rb +26 -26
- data/examples/general/left.rb +2 -2
- data/examples/general/right.rb +2 -2
- data/lib/rley/base/dotted_item.rb +23 -31
- data/lib/rley/constants.rb +2 -2
- data/lib/rley/engine.rb +20 -23
- data/lib/rley/formatter/asciitree.rb +3 -3
- data/lib/rley/formatter/bracket_notation.rb +1 -8
- data/lib/rley/formatter/debug.rb +6 -6
- data/lib/rley/formatter/json.rb +2 -2
- data/lib/rley/gfg/call_edge.rb +1 -1
- data/lib/rley/gfg/edge.rb +5 -5
- data/lib/rley/gfg/end_vertex.rb +2 -6
- data/lib/rley/gfg/epsilon_edge.rb +1 -5
- data/lib/rley/gfg/grm_flow_graph.rb +27 -23
- data/lib/rley/gfg/item_vertex.rb +10 -10
- data/lib/rley/gfg/non_terminal_vertex.rb +4 -4
- data/lib/rley/gfg/scan_edge.rb +1 -1
- data/lib/rley/gfg/shortcut_edge.rb +2 -2
- data/lib/rley/gfg/start_vertex.rb +4 -8
- data/lib/rley/gfg/vertex.rb +43 -39
- data/lib/rley/lexical/token_range.rb +6 -6
- data/lib/rley/parse_forest_visitor.rb +5 -5
- data/lib/rley/parse_rep/ast_base_builder.rb +9 -11
- data/lib/rley/parse_rep/cst_builder.rb +5 -6
- data/lib/rley/parse_rep/parse_forest_builder.rb +20 -18
- data/lib/rley/parse_rep/parse_forest_factory.rb +3 -3
- data/lib/rley/parse_rep/parse_rep_creator.rb +11 -13
- data/lib/rley/parse_rep/parse_tree_builder.rb +4 -4
- data/lib/rley/parse_rep/parse_tree_factory.rb +27 -27
- data/lib/rley/parse_tree_visitor.rb +1 -1
- data/lib/rley/parser/error_reason.rb +4 -5
- data/lib/rley/parser/gfg_chart.rb +20 -22
- data/lib/rley/parser/gfg_parsing.rb +16 -30
- data/lib/rley/parser/parse_entry.rb +25 -31
- data/lib/rley/parser/parse_entry_set.rb +18 -15
- data/lib/rley/parser/parse_entry_tracker.rb +4 -4
- data/lib/rley/parser/parse_state.rb +16 -21
- data/lib/rley/parser/parse_state_tracker.rb +4 -4
- data/lib/rley/parser/parse_tracer.rb +13 -13
- data/lib/rley/parser/parse_walker_factory.rb +23 -28
- data/lib/rley/parser/state_set.rb +9 -10
- data/lib/rley/ptree/non_terminal_node.rb +7 -5
- data/lib/rley/ptree/parse_tree.rb +3 -3
- data/lib/rley/ptree/parse_tree_node.rb +5 -5
- data/lib/rley/ptree/terminal_node.rb +7 -7
- data/lib/rley/rley_error.rb +12 -12
- data/lib/rley/sppf/alternative_node.rb +6 -6
- data/lib/rley/sppf/composite_node.rb +7 -7
- data/lib/rley/sppf/epsilon_node.rb +3 -3
- data/lib/rley/sppf/leaf_node.rb +3 -3
- data/lib/rley/sppf/parse_forest.rb +16 -16
- data/lib/rley/sppf/sppf_node.rb +7 -8
- data/lib/rley/sppf/token_node.rb +3 -3
- data/lib/rley/syntax/grammar.rb +5 -5
- data/lib/rley/syntax/grammar_builder.rb +9 -9
- data/lib/rley/syntax/grm_symbol.rb +6 -6
- data/lib/rley/syntax/non_terminal.rb +9 -15
- data/lib/rley/syntax/production.rb +10 -10
- data/lib/rley/syntax/symbol_seq.rb +7 -9
- data/lib/rley/syntax/terminal.rb +4 -5
- data/lib/rley/syntax/verbatim_symbol.rb +3 -3
- data/lib/support/base_tokenizer.rb +19 -18
- data/spec/rley/base/dotted_item_spec.rb +2 -2
- data/spec/rley/engine_spec.rb +17 -15
- data/spec/rley/formatter/asciitree_spec.rb +7 -7
- data/spec/rley/formatter/bracket_notation_spec.rb +13 -13
- data/spec/rley/formatter/json_spec.rb +1 -1
- data/spec/rley/gfg/end_vertex_spec.rb +5 -5
- data/spec/rley/gfg/item_vertex_spec.rb +10 -10
- data/spec/rley/gfg/non_terminal_vertex_spec.rb +3 -3
- data/spec/rley/gfg/shortcut_edge_spec.rb +1 -1
- data/spec/rley/gfg/start_vertex_spec.rb +5 -5
- data/spec/rley/gfg/vertex_spec.rb +3 -3
- data/spec/rley/lexical/token_range_spec.rb +16 -16
- data/spec/rley/lexical/token_spec.rb +2 -2
- data/spec/rley/parse_forest_visitor_spec.rb +165 -163
- data/spec/rley/parse_rep/ambiguous_parse_spec.rb +44 -44
- data/spec/rley/parse_rep/ast_builder_spec.rb +6 -6
- data/spec/rley/parse_rep/cst_builder_spec.rb +5 -5
- data/spec/rley/parse_rep/groucho_spec.rb +21 -21
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +26 -26
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +6 -6
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +2 -2
- data/spec/rley/parse_tree_visitor_spec.rb +10 -8
- data/spec/rley/parser/error_reason_spec.rb +6 -6
- data/spec/rley/parser/gfg_earley_parser_spec.rb +4 -2
- data/spec/rley/parser/gfg_parsing_spec.rb +4 -8
- data/spec/rley/parser/parse_entry_spec.rb +19 -19
- data/spec/rley/parser/parse_state_spec.rb +5 -5
- data/spec/rley/parser/parse_walker_factory_spec.rb +1 -1
- data/spec/rley/parser/state_set_spec.rb +22 -22
- data/spec/rley/ptree/non_terminal_node_spec.rb +5 -3
- data/spec/rley/ptree/parse_tree_node_spec.rb +4 -4
- data/spec/rley/ptree/terminal_node_spec.rb +6 -6
- data/spec/rley/sppf/alternative_node_spec.rb +6 -6
- data/spec/rley/sppf/non_terminal_node_spec.rb +3 -3
- data/spec/rley/sppf/token_node_spec.rb +4 -4
- data/spec/rley/support/ambiguous_grammar_helper.rb +3 -4
- data/spec/rley/support/grammar_abc_helper.rb +2 -4
- data/spec/rley/support/grammar_ambig01_helper.rb +4 -5
- data/spec/rley/support/grammar_arr_int_helper.rb +4 -5
- data/spec/rley/support/grammar_b_expr_helper.rb +4 -5
- data/spec/rley/support/grammar_l0_helper.rb +10 -11
- data/spec/rley/support/grammar_pb_helper.rb +6 -5
- data/spec/rley/support/grammar_sppf_helper.rb +1 -1
- data/spec/rley/syntax/grammar_builder_spec.rb +5 -5
- data/spec/rley/syntax/grammar_spec.rb +6 -6
- data/spec/rley/syntax/grm_symbol_spec.rb +1 -1
- data/spec/rley/syntax/non_terminal_spec.rb +8 -8
- data/spec/rley/syntax/production_spec.rb +13 -13
- data/spec/rley/syntax/symbol_seq_spec.rb +2 -2
- data/spec/rley/syntax/terminal_spec.rb +5 -5
- data/spec/rley/syntax/verbatim_symbol_spec.rb +1 -1
- data/spec/spec_helper.rb +0 -12
- data/spec/support/base_tokenizer_spec.rb +7 -2
- metadata +21 -62
- data/.simplecov +0 -8
data/examples/general/left.rb
CHANGED
@@ -8,10 +8,10 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
8
8
|
# The grammar defines a language that consists in a sequence
|
9
9
|
# of 0 or more dots...
|
10
10
|
add_terminals('DOT')
|
11
|
-
|
11
|
+
|
12
12
|
# Grammar with left recursive rule.
|
13
13
|
rule 'l_dots' => []
|
14
|
-
rule 'l_dots' => %w[l_dots DOT]
|
14
|
+
rule 'l_dots' => %w[l_dots DOT]
|
15
15
|
end
|
16
16
|
|
17
17
|
# And now, let's build the grammar...
|
data/examples/general/right.rb
CHANGED
@@ -8,10 +8,10 @@ builder = Rley::Syntax::GrammarBuilder.new do
|
|
8
8
|
# The grammar defines a language that consists in a sequence
|
9
9
|
# of 0 or more dots...
|
10
10
|
add_terminals('DOT')
|
11
|
-
|
11
|
+
|
12
12
|
# Grammar with right recursive rule.
|
13
13
|
rule 'r_dots' => []
|
14
|
-
rule 'r_dots' => %w[DOT r_dots]
|
14
|
+
rule 'r_dots' => %w[DOT r_dots]
|
15
15
|
end
|
16
16
|
|
17
17
|
# And now, let's build the grammar...
|
@@ -19,14 +19,14 @@ module Rley # This module is used as a namespace
|
|
19
19
|
class DottedItem
|
20
20
|
# Production rule
|
21
21
|
# @return [Syntax::Production]
|
22
|
-
attr_reader
|
22
|
+
attr_reader :production
|
23
23
|
|
24
24
|
# Index of the next symbol (from the rhs) after the 'dot'.
|
25
25
|
# If the dot is at the end of the rhs (i.e.) there is no next
|
26
26
|
# symbol, then the position takes the value -1.
|
27
27
|
# It the rhs is empty, then the position is -2
|
28
28
|
# @return [Integer]
|
29
|
-
attr_reader
|
29
|
+
attr_reader :position
|
30
30
|
|
31
31
|
# @param aProduction [Syntax::Production]
|
32
32
|
# @param aPosition [Integer] Position of the dot in rhs of production.
|
@@ -37,7 +37,7 @@ module Rley # This module is used as a namespace
|
|
37
37
|
|
38
38
|
# Return a String representation of the dotted item.
|
39
39
|
# @return [String]
|
40
|
-
def to_s
|
40
|
+
def to_s
|
41
41
|
prefix = "#{production.lhs} => "
|
42
42
|
text_values = production.rhs.map(&:to_s)
|
43
43
|
if position.negative?
|
@@ -47,13 +47,13 @@ module Rley # This module is used as a namespace
|
|
47
47
|
end
|
48
48
|
suffix = text_values.join(' ')
|
49
49
|
|
50
|
-
|
50
|
+
prefix + suffix
|
51
51
|
end
|
52
52
|
|
53
53
|
# Return true if the dot position is at the start of the rhs.
|
54
54
|
# @return [Boolean]
|
55
|
-
def at_start?
|
56
|
-
|
55
|
+
def at_start?
|
56
|
+
position.zero? || position == -2
|
57
57
|
end
|
58
58
|
|
59
59
|
# An item with the dot at the beginning is called
|
@@ -62,41 +62,35 @@ module Rley # This module is used as a namespace
|
|
62
62
|
|
63
63
|
# A dotted item is called a reduce item if the dot is at the end.
|
64
64
|
# @return [Boolean]
|
65
|
-
def reduce_item?
|
66
|
-
|
65
|
+
def reduce_item?
|
66
|
+
position.negative? # Either -1 or -2
|
67
67
|
end
|
68
68
|
|
69
69
|
# The non-terminal symbol that is on the left-side of the production
|
70
70
|
# @return [Syntax::NonTerminal]
|
71
|
-
def lhs
|
72
|
-
|
71
|
+
def lhs
|
72
|
+
production.lhs
|
73
73
|
end
|
74
74
|
|
75
75
|
# Return the symbol before the dot.
|
76
76
|
# nil is returned if the dot is at the start of the rhs
|
77
77
|
# @return [Syntax::GrmSymbol, NilClass]
|
78
|
-
def prev_symbol
|
78
|
+
def prev_symbol
|
79
79
|
before_position = prev_position
|
80
|
-
|
81
|
-
nil
|
82
|
-
else
|
83
|
-
production.rhs[before_position]
|
84
|
-
end
|
85
|
-
|
86
|
-
return result
|
80
|
+
before_position.nil? ? nil : production.rhs[before_position]
|
87
81
|
end
|
88
82
|
|
89
83
|
# Return the symbol after the dot.
|
90
84
|
# nil is returned if the dot is at the end
|
91
85
|
# @return [Syntax::GrmSymbol, NilClass]
|
92
|
-
def next_symbol
|
93
|
-
|
86
|
+
def next_symbol
|
87
|
+
position.negative? ? nil : production.rhs[position]
|
94
88
|
end
|
95
89
|
|
96
90
|
# Calculate the position of the dot if were moved by
|
97
91
|
# one step on the left.
|
98
92
|
# @return [Integer]
|
99
|
-
def prev_position
|
93
|
+
def prev_position
|
100
94
|
unless @k_prev_position
|
101
95
|
case position
|
102
96
|
when -2, 0
|
@@ -122,7 +116,7 @@ module Rley # This module is used as a namespace
|
|
122
116
|
to_the_left = prev_position
|
123
117
|
return false if to_the_left.nil?
|
124
118
|
|
125
|
-
|
119
|
+
to_the_left == another.position
|
126
120
|
end
|
127
121
|
|
128
122
|
|
@@ -135,15 +129,13 @@ module Rley # This module is used as a namespace
|
|
135
129
|
raise StandardError, 'Out of bound index'
|
136
130
|
end
|
137
131
|
|
138
|
-
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
return index
|
132
|
+
if rhs_size.zero?
|
133
|
+
-2 # Minus 2 at start/end of empty production
|
134
|
+
elsif aPosition == rhs_size
|
135
|
+
-1 # Minus 1 at end of non-empty production
|
136
|
+
else
|
137
|
+
aPosition
|
138
|
+
end
|
147
139
|
end
|
148
140
|
end # class
|
149
141
|
end # module
|
data/lib/rley/constants.rb
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
module Rley # Module used as a namespace
|
7
7
|
# The version number of the gem.
|
8
|
-
Version = '0.7.
|
8
|
+
Version = '0.7.08'
|
9
9
|
|
10
10
|
# Brief description of the gem.
|
11
11
|
Description = "Ruby implementation of the Earley's parsing algorithm"
|
@@ -20,7 +20,7 @@ module Rley # Module used as a namespace
|
|
20
20
|
RootDir = begin
|
21
21
|
require 'pathname' # Load Pathname class from standard library
|
22
22
|
startdir = Pathname(__FILE__).dirname.parent.parent.expand_path
|
23
|
-
startdir
|
23
|
+
"#{startdir}/" # Append trailing slash character to it
|
24
24
|
end
|
25
25
|
end
|
26
26
|
end # module
|
data/lib/rley/engine.rb
CHANGED
@@ -15,15 +15,14 @@ module Rley # This module is used as a namespace
|
|
15
15
|
# @return [Symbol] allowed values are: :parse_tree, :parse_forest
|
16
16
|
:parse_repr,
|
17
17
|
:repr_builder,
|
18
|
-
:diagnose
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
end
|
18
|
+
:diagnose) do
|
19
|
+
# Constructor with default initialization.
|
20
|
+
def initialize
|
21
|
+
super()
|
22
|
+
self.parse_repr = :parse_tree
|
23
|
+
self.repr_builder = :default
|
24
|
+
self.diagnose = false
|
25
|
+
end
|
27
26
|
end
|
28
27
|
|
29
28
|
# Implementation of the GoF Facade design pattern.
|
@@ -43,7 +42,7 @@ module Rley # This module is used as a namespace
|
|
43
42
|
# Engine.new do |config|
|
44
43
|
# config.parse_repr = :parse_forest
|
45
44
|
# end
|
46
|
-
def initialize
|
45
|
+
def initialize
|
47
46
|
@configuration = EngineConfig.new
|
48
47
|
yield configuration if block_given?
|
49
48
|
end
|
@@ -91,7 +90,7 @@ module Rley # This module is used as a namespace
|
|
91
90
|
result = parser.parse(tokens)
|
92
91
|
result.tidy_up!
|
93
92
|
|
94
|
-
|
93
|
+
result
|
95
94
|
end
|
96
95
|
|
97
96
|
# Convert raw parse result into a more convenient representation
|
@@ -99,14 +98,12 @@ module Rley # This module is used as a namespace
|
|
99
98
|
# @param aRawParse [Parser::GFGParsing]
|
100
99
|
# @return [Rley::PTree::ParseTree, Rley::SPPF::ParseForest]
|
101
100
|
def convert(aRawParse)
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
return result
|
101
|
+
case configuration.parse_repr
|
102
|
+
when :parse_tree
|
103
|
+
to_ptree(aRawParse)
|
104
|
+
when :parse_forest
|
105
|
+
to_pforest(aRawParse)
|
106
|
+
end
|
110
107
|
end
|
111
108
|
|
112
109
|
# Convert raw parse result into a parse tree representation
|
@@ -120,7 +117,7 @@ module Rley # This module is used as a namespace
|
|
120
117
|
result = factory.create(configuration.repr_builder)
|
121
118
|
end
|
122
119
|
|
123
|
-
|
120
|
+
result
|
124
121
|
end
|
125
122
|
|
126
123
|
# Convert raw parse result into a parse forest representation
|
@@ -134,7 +131,7 @@ module Rley # This module is used as a namespace
|
|
134
131
|
result = factory.create(configuration.repr_builder)
|
135
132
|
end
|
136
133
|
|
137
|
-
|
134
|
+
result
|
138
135
|
end
|
139
136
|
|
140
137
|
# Build a visitor for the given parse tree
|
@@ -148,13 +145,13 @@ module Rley # This module is used as a namespace
|
|
148
145
|
# @param aPForest [SPPF::ParseForest]
|
149
146
|
# @return [ParseForestVisitor]
|
150
147
|
def pforest_visitor(aPForest)
|
151
|
-
|
148
|
+
ParseForestVisitor.new(aPForest)
|
152
149
|
end
|
153
150
|
|
154
151
|
protected
|
155
152
|
|
156
153
|
def build_parser(aGrammar)
|
157
|
-
|
154
|
+
Parser::GFGEarleyParser.new(aGrammar)
|
158
155
|
end
|
159
156
|
end # class
|
160
157
|
end # module
|
@@ -15,7 +15,7 @@ module Rley # This module is used as a namespace
|
|
15
15
|
# Allowed string values are: 'first', 'last', 'first_and_last', 'other'
|
16
16
|
attr_reader(:ranks)
|
17
17
|
|
18
|
-
# @return [String] The character pattern used for rendering
|
18
|
+
# @return [String] The character pattern used for rendering
|
19
19
|
# a parent - child nesting
|
20
20
|
attr_reader(:nesting_prefix)
|
21
21
|
|
@@ -99,7 +99,7 @@ module Rley # This module is used as a namespace
|
|
99
99
|
end
|
100
100
|
|
101
101
|
# 'root', 'first', 'first_and_last', 'last', 'other'
|
102
|
-
def path_prefix
|
102
|
+
def path_prefix
|
103
103
|
return '' if ranks.empty?
|
104
104
|
|
105
105
|
prefix = +''
|
@@ -116,7 +116,7 @@ module Rley # This module is used as a namespace
|
|
116
116
|
end
|
117
117
|
|
118
118
|
prefix << nesting_prefix
|
119
|
-
|
119
|
+
prefix
|
120
120
|
end
|
121
121
|
|
122
122
|
def emit(aNode, aSuffix = '')
|
@@ -13,13 +13,6 @@ module Rley # This module is used as a namespace
|
|
13
13
|
# For Ruby developers, there is RSyntaxTree by Yoichiro Hasebe.
|
14
14
|
# (accessible via: http://yohasebe.com/rsyntaxtree/)
|
15
15
|
class BracketNotation < BaseFormatter
|
16
|
-
# Constructor.
|
17
|
-
# @param anIO [IO] The output stream to which the rendered grammar
|
18
|
-
# is written.
|
19
|
-
def initialize(anIO)
|
20
|
-
super(anIO)
|
21
|
-
end
|
22
|
-
|
23
16
|
# Method called by a ParseTreeVisitor to which the formatter subscribed.
|
24
17
|
# Notification of a visit event: the visitor is about to visit
|
25
18
|
# a non-terminal node
|
@@ -44,7 +37,7 @@ module Rley # This module is used as a namespace
|
|
44
37
|
# Escape all opening and closing square brackets
|
45
38
|
escape_lbrackets = aTerm.token.lexeme.gsub(/\[/, '\[')
|
46
39
|
escaped = escape_lbrackets.gsub(/\]/, '\]')
|
47
|
-
write(escaped
|
40
|
+
write("#{escaped}]")
|
48
41
|
end
|
49
42
|
|
50
43
|
# Method called by a ParseTreeVisitor to which the formatter subscribed.
|
data/lib/rley/formatter/debug.rb
CHANGED
@@ -19,17 +19,17 @@ module Rley # This module is used as a namespace
|
|
19
19
|
super(anIO)
|
20
20
|
@indentation = 0
|
21
21
|
end
|
22
|
-
|
22
|
+
|
23
23
|
# Indicates that this formatter accepts all visit events
|
24
24
|
# provided their names start with 'before_' or 'after_'
|
25
25
|
# @return [Boolean]
|
26
26
|
def accept_all
|
27
27
|
return true
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
# Ghost method pattern.
|
31
|
-
def method_missing(mth, *args)
|
32
|
-
mth_name = mth.to_s
|
31
|
+
def method_missing(mth, *args)
|
32
|
+
mth_name = mth.to_s
|
33
33
|
case mth_name
|
34
34
|
when /^before_/
|
35
35
|
output_event(mth_name, indentation)
|
@@ -44,11 +44,11 @@ module Rley # This module is used as a namespace
|
|
44
44
|
|
45
45
|
private
|
46
46
|
|
47
|
-
def indent
|
47
|
+
def indent
|
48
48
|
@indentation += 1
|
49
49
|
end
|
50
50
|
|
51
|
-
def dedent
|
51
|
+
def dedent
|
52
52
|
@indentation -= 1
|
53
53
|
end
|
54
54
|
|
data/lib/rley/formatter/json.rb
CHANGED
data/lib/rley/gfg/call_edge.rb
CHANGED
@@ -16,7 +16,7 @@ module Rley # This module is used as a namespace
|
|
16
16
|
# Pre-condition: theSuccessor is an StartVertex
|
17
17
|
def initialize(thePredecessor, theSuccessor)
|
18
18
|
super(thePredecessor, theSuccessor)
|
19
|
-
do_set_key(thePredecessor, theSuccessor)
|
19
|
+
do_set_key(thePredecessor, theSuccessor)
|
20
20
|
end
|
21
21
|
|
22
22
|
private
|
data/lib/rley/gfg/edge.rb
CHANGED
@@ -14,18 +14,18 @@ module Rley # This module is used as a namespace
|
|
14
14
|
# @param theSuccessor [Vertex]
|
15
15
|
def initialize(thePredecessor, theSuccessor)
|
16
16
|
@successor = theSuccessor
|
17
|
-
thePredecessor
|
17
|
+
thePredecessor&.add_edge(self)
|
18
18
|
end
|
19
19
|
|
20
20
|
# @return [String]
|
21
|
-
def to_s
|
21
|
+
def to_s
|
22
22
|
" --> #{successor.label}"
|
23
23
|
end
|
24
|
-
|
25
|
-
# Returns a string containing a human-readable representation of the
|
24
|
+
|
25
|
+
# Returns a string containing a human-readable representation of the
|
26
26
|
# production.
|
27
27
|
# @return [String]
|
28
|
-
def inspect
|
28
|
+
def inspect
|
29
29
|
to_s
|
30
30
|
end
|
31
31
|
end # class
|
data/lib/rley/gfg/end_vertex.rb
CHANGED
@@ -10,12 +10,8 @@ module Rley # This module is used as a namespace
|
|
10
10
|
# Responsibilities (in addition to inherited ones):
|
11
11
|
# - Know its related non-terminal symbol
|
12
12
|
class EndVertex < NonTerminalVertex
|
13
|
-
def
|
14
|
-
|
15
|
-
end
|
16
|
-
|
17
|
-
def label()
|
18
|
-
return "#{non_terminal}."
|
13
|
+
def label
|
14
|
+
"#{non_terminal}."
|
19
15
|
end
|
20
16
|
end # class
|
21
17
|
end # module
|
@@ -9,12 +9,8 @@ module Rley # This module is used as a namespace
|
|
9
9
|
# Responsibilities:
|
10
10
|
# - To know the successor vertex
|
11
11
|
class EpsilonEdge < Edge
|
12
|
-
# The destination vertex of the edge
|
12
|
+
# The destination vertex of the edge.
|
13
13
|
attr_reader :successor
|
14
|
-
|
15
|
-
def initialize(thePredecessor, theSuccessor)
|
16
|
-
super(thePredecessor, theSuccessor)
|
17
|
-
end
|
18
14
|
end # class
|
19
15
|
end # module
|
20
16
|
end # module
|
@@ -13,26 +13,26 @@ require_relative 'shortcut_edge'
|
|
13
13
|
module Rley # This module is used as a namespace
|
14
14
|
module GFG # This module is used as a namespace
|
15
15
|
# A Grammar Flow Graph (GFG) represents the parsing states of productions
|
16
|
-
# rules from a context-free grammar. This representation is based on a
|
17
|
-
# directed graph structure. The parsing process can then be re-formulated
|
16
|
+
# rules from a context-free grammar. This representation is based on a
|
17
|
+
# directed graph structure. The parsing process can then be re-formulated
|
18
18
|
# as a path problem in the graph. The theory behind GFGs can be found in
|
19
19
|
# papers. The first article on GFG can be found here:
|
20
20
|
# https://apps.cs.utexas.edu/tech_reports/reports/tr/TR-2102.pdf
|
21
|
-
# There are three types of vertex in a GFG:
|
21
|
+
# There are three types of vertex in a GFG:
|
22
22
|
# start vertex, end vertex and item vertex.
|
23
23
|
# For each non-terminal symbol N of the grammar, there is:
|
24
24
|
# a start vertex with label '.N'
|
25
25
|
# an end vertex with label 'N.'
|
26
26
|
# For each production rule of the grammar:
|
27
27
|
# N => s1 s2 s3 (...) sk
|
28
|
-
# I.e. a rule with k grammar symbols in its right-handed side.
|
28
|
+
# I.e. a rule with k grammar symbols in its right-handed side.
|
29
29
|
# For such a rule there will be k + 1 item vertices. By convention,
|
30
30
|
# the first item vertex is labelled as 'N => . s1 s2 s3 (...) sk'
|
31
31
|
# the second item vertex is labelled as 'N => s1 . s2 s3 (...) sk'
|
32
32
|
# the third item vertex is labelled as 'N => s1 s2 . s3 (...) sk'
|
33
33
|
# and so on. In other words, the labels are obtained by moving a dot
|
34
|
-
# in successive positions in the rhs. The dot represents the
|
35
|
-
# parse progress for the production rule. Symbols on the left of the
|
34
|
+
# in successive positions in the rhs. The dot represents the
|
35
|
+
# parse progress for the production rule. Symbols on the left of the
|
36
36
|
# dot represent the symbols that were successfully matched in the input.
|
37
37
|
# A GFG has three types of directed edges linking the vertices.
|
38
38
|
# call edge, return edge and scan edge.
|
@@ -60,24 +60,24 @@ module Rley # This module is used as a namespace
|
|
60
60
|
|
61
61
|
build_graph(theDottedItems)
|
62
62
|
end
|
63
|
-
|
64
|
-
# Returns a string containing a human-readable representation of the
|
63
|
+
|
64
|
+
# Returns a string containing a human-readable representation of the
|
65
65
|
# production.
|
66
66
|
# @return [String]
|
67
|
-
def inspect
|
67
|
+
def inspect
|
68
68
|
result = +"#<#{self.class.name}:#{object_id}"
|
69
69
|
result << ' @vertices=['
|
70
70
|
list = vertices.map { |v| "#<#{v.selfie}>" }
|
71
71
|
result << list.join(', ')
|
72
72
|
result << '] '
|
73
73
|
edges = []
|
74
|
-
vertices.each do |v|
|
74
|
+
vertices.each do |v|
|
75
75
|
edges << v.edges do |e|
|
76
76
|
result << "#{v.object_id} #{e.inspect}"
|
77
77
|
end
|
78
78
|
end
|
79
79
|
result << "edges=[#{edges.join(",\n ")}]>"
|
80
|
-
|
80
|
+
result
|
81
81
|
end
|
82
82
|
|
83
83
|
# Retrieve the vertex with given vertex label.
|
@@ -92,7 +92,7 @@ module Rley # This module is used as a namespace
|
|
92
92
|
# If one wants to remove useless rules, then do first:
|
93
93
|
# elimination of non-generating symbols
|
94
94
|
# then elimination of unreachable symbols
|
95
|
-
def diagnose
|
95
|
+
def diagnose
|
96
96
|
mark_unreachable_symbols
|
97
97
|
end
|
98
98
|
|
@@ -121,6 +121,7 @@ module Rley # This module is used as a namespace
|
|
121
121
|
# @param aStartVertex [StartVertex] the depth-first traversal begins
|
122
122
|
# from here
|
123
123
|
# @param _visitAction [Proc] block called when a new graph vertex is found
|
124
|
+
# rubocop: disable Lint/Loop
|
124
125
|
def traverse_df(aStartVertex, &_visitAction)
|
125
126
|
visited = Set.new
|
126
127
|
stack = []
|
@@ -129,13 +130,13 @@ module Rley # This module is used as a namespace
|
|
129
130
|
|
130
131
|
begin
|
131
132
|
# print_vertex( 'Traversing', visitee)
|
132
|
-
|
133
|
+
|
133
134
|
first_time = !visited.include?(visitee)
|
134
135
|
if first_time
|
135
136
|
yield(visitee)
|
136
137
|
visited << visitee
|
137
|
-
end
|
138
|
-
|
138
|
+
end
|
139
|
+
|
139
140
|
case visitee
|
140
141
|
when Rley::GFG::StartVertex
|
141
142
|
if first_time
|
@@ -155,12 +156,12 @@ module Rley # This module is used as a namespace
|
|
155
156
|
if stack.last.done?
|
156
157
|
popped = stack.pop
|
157
158
|
break if stack.empty?
|
158
|
-
|
159
|
+
|
159
160
|
# puts "Popped!"
|
160
161
|
return_key = popped.in_edge.key.sub(/^CALL/, 'RET')
|
161
162
|
curr_edge = visitee.edges.find { |e| e.key == return_key }
|
162
163
|
else
|
163
|
-
curr_edge = stack.last.next_edge
|
164
|
+
curr_edge = stack.last.next_edge
|
164
165
|
end
|
165
166
|
|
166
167
|
else
|
@@ -173,6 +174,7 @@ module Rley # This module is used as a namespace
|
|
173
174
|
last_one = end_vertex_for[aStartVertex.non_terminal]
|
174
175
|
yield(last_one) unless visited.include?(last_one)
|
175
176
|
end
|
177
|
+
# rubocop: enable Lint/Loop
|
176
178
|
|
177
179
|
private
|
178
180
|
|
@@ -183,16 +185,16 @@ module Rley # This module is used as a namespace
|
|
183
185
|
@start_vertex = aVertex if vertices.empty?
|
184
186
|
vertices << aVertex
|
185
187
|
end
|
186
|
-
|
188
|
+
|
187
189
|
# For debugging purposes
|
188
190
|
def print_vertex(aText, aVertex)
|
189
|
-
print aText
|
191
|
+
print "#{aText} "
|
190
192
|
if aVertex.kind_of?(NonTerminalVertex)
|
191
193
|
puts "#{aVertex.class} #{aVertex.non_terminal.name}"
|
192
194
|
else
|
193
195
|
p(aVertex.label)
|
194
|
-
end
|
195
|
-
end
|
196
|
+
end
|
197
|
+
end
|
196
198
|
|
197
199
|
def build_graph(theDottedItems)
|
198
200
|
build_all_starts_ends(theDottedItems)
|
@@ -200,7 +202,7 @@ module Rley # This module is used as a namespace
|
|
200
202
|
curr_prod = nil
|
201
203
|
theDottedItems.each_with_index do |d_item, index_item|
|
202
204
|
next unless curr_prod.nil? || curr_prod != d_item.production
|
203
|
-
|
205
|
+
|
204
206
|
# Another production found...
|
205
207
|
curr_prod = d_item.production
|
206
208
|
if curr_prod.empty?
|
@@ -268,6 +270,7 @@ module Rley # This module is used as a namespace
|
|
268
270
|
# add a shortcut edge:
|
269
271
|
# ( N => α[1] .A α[n] ) -> ( N => α[1] A. α[n] )
|
270
272
|
def augment_graph(theDottedItems, firstItemPos)
|
273
|
+
# rubocop: disable Lint/RedundantSafeNavigation
|
271
274
|
production = theDottedItems[firstItemPos].production
|
272
275
|
max_index = production.rhs.size + 1
|
273
276
|
prev_vertex = nil
|
@@ -298,6 +301,7 @@ module Rley # This module is used as a namespace
|
|
298
301
|
prev_vertex = new_vertex
|
299
302
|
end
|
300
303
|
end
|
304
|
+
# rubocop: enable Lint/RedundantSafeNavigation
|
301
305
|
|
302
306
|
# Create an entry edge for the given vertex
|
303
307
|
def build_entry_edge(theVertex)
|
@@ -359,7 +363,7 @@ module Rley # This module is used as a namespace
|
|
359
363
|
# Mark non-terminal symbols that cannot be derived from the start symbol.
|
360
364
|
# In a GFG, a non-terminal symbol N is unreachable if there is no path
|
361
365
|
# from the start symbol to the start node .N
|
362
|
-
def mark_unreachable_symbols
|
366
|
+
def mark_unreachable_symbols
|
363
367
|
# Mark all non-terminals as unreachable
|
364
368
|
start_vertex_for.each_value do |a_vertex|
|
365
369
|
a_vertex.non_terminal.unreachable = true
|