rley 0.7.03 → 0.7.08
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +362 -62
- data/.travis.yml +6 -7
- data/CHANGELOG.md +20 -1
- data/LICENSE.txt +1 -1
- data/README.md +6 -7
- data/Rakefile +2 -0
- data/appveyor.yml +2 -4
- data/examples/NLP/benchmark_pico_en.rb +2 -0
- data/examples/NLP/engtagger.rb +193 -188
- data/examples/NLP/nano_eng/nano_en_demo.rb +2 -0
- data/examples/NLP/nano_eng/nano_grammar.rb +7 -5
- data/examples/NLP/pico_en_demo.rb +2 -0
- data/examples/data_formats/JSON/cli_options.rb +3 -1
- data/examples/data_formats/JSON/json_ast_builder.rb +14 -9
- data/examples/data_formats/JSON/json_ast_nodes.rb +14 -21
- data/examples/data_formats/JSON/json_demo.rb +2 -0
- data/examples/data_formats/JSON/json_grammar.rb +4 -2
- data/examples/data_formats/JSON/json_lexer.rb +10 -8
- data/examples/data_formats/JSON/json_minifier.rb +3 -1
- data/examples/general/calc_iter1/calc_ast_builder.rb +15 -10
- data/examples/general/calc_iter1/calc_ast_nodes.rb +25 -37
- data/examples/general/calc_iter1/calc_demo.rb +2 -0
- data/examples/general/calc_iter1/calc_grammar.rb +4 -2
- data/examples/general/calc_iter1/calc_lexer.rb +8 -4
- data/examples/general/calc_iter1/spec/calculator_spec.rb +7 -5
- data/examples/general/calc_iter2/calc_ast_builder.rb +7 -3
- data/examples/general/calc_iter2/calc_ast_nodes.rb +29 -43
- data/examples/general/calc_iter2/calc_demo.rb +2 -0
- data/examples/general/calc_iter2/calc_grammar.rb +5 -3
- data/examples/general/calc_iter2/calc_lexer.rb +13 -10
- data/examples/general/calc_iter2/spec/calculator_spec.rb +28 -26
- data/examples/general/left.rb +4 -2
- data/examples/general/right.rb +4 -2
- data/lib/rley.rb +2 -0
- data/lib/rley/base/base_parser.rb +2 -0
- data/lib/rley/base/dotted_item.rb +38 -41
- data/lib/rley/base/grm_items_builder.rb +2 -0
- data/lib/rley/constants.rb +5 -3
- data/lib/rley/engine.rb +22 -24
- data/lib/rley/formatter/asciitree.rb +6 -4
- data/lib/rley/formatter/base_formatter.rb +2 -0
- data/lib/rley/formatter/bracket_notation.rb +3 -8
- data/lib/rley/formatter/debug.rb +8 -6
- data/lib/rley/formatter/json.rb +4 -2
- data/lib/rley/gfg/call_edge.rb +3 -1
- data/lib/rley/gfg/edge.rb +7 -5
- data/lib/rley/gfg/end_vertex.rb +4 -6
- data/lib/rley/gfg/epsilon_edge.rb +3 -5
- data/lib/rley/gfg/grm_flow_graph.rb +31 -25
- data/lib/rley/gfg/item_vertex.rb +12 -22
- data/lib/rley/gfg/non_terminal_vertex.rb +6 -4
- data/lib/rley/gfg/return_edge.rb +2 -0
- data/lib/rley/gfg/scan_edge.rb +3 -1
- data/lib/rley/gfg/shortcut_edge.rb +4 -2
- data/lib/rley/gfg/start_vertex.rb +6 -8
- data/lib/rley/gfg/vertex.rb +47 -41
- data/lib/rley/lexical/token.rb +3 -1
- data/lib/rley/lexical/token_range.rb +8 -6
- data/lib/rley/parse_forest_visitor.rb +7 -5
- data/lib/rley/parse_rep/ast_base_builder.rb +11 -11
- data/lib/rley/parse_rep/cst_builder.rb +7 -4
- data/lib/rley/parse_rep/parse_forest_builder.rb +36 -25
- data/lib/rley/parse_rep/parse_forest_factory.rb +5 -3
- data/lib/rley/parse_rep/parse_rep_creator.rb +18 -13
- data/lib/rley/parse_rep/parse_tree_builder.rb +15 -15
- data/lib/rley/parse_rep/parse_tree_factory.rb +27 -25
- data/lib/rley/parse_tree_visitor.rb +3 -1
- data/lib/rley/parser/error_reason.rb +9 -8
- data/lib/rley/parser/gfg_chart.rb +54 -22
- data/lib/rley/parser/gfg_earley_parser.rb +3 -1
- data/lib/rley/parser/gfg_parsing.rb +51 -31
- data/lib/rley/parser/parse_entry.rb +29 -33
- data/lib/rley/parser/parse_entry_set.rb +32 -27
- data/lib/rley/parser/parse_entry_tracker.rb +6 -4
- data/lib/rley/parser/parse_state.rb +18 -21
- data/lib/rley/parser/parse_state_tracker.rb +6 -4
- data/lib/rley/parser/parse_tracer.rb +15 -13
- data/lib/rley/parser/parse_walker_factory.rb +28 -29
- data/lib/rley/parser/state_set.rb +11 -10
- data/lib/rley/ptree/non_terminal_node.rb +10 -6
- data/lib/rley/ptree/parse_tree.rb +6 -4
- data/lib/rley/ptree/parse_tree_node.rb +7 -5
- data/lib/rley/ptree/terminal_node.rb +9 -7
- data/lib/rley/rley_error.rb +12 -10
- data/lib/rley/sppf/alternative_node.rb +8 -6
- data/lib/rley/sppf/composite_node.rb +9 -7
- data/lib/rley/sppf/epsilon_node.rb +5 -3
- data/lib/rley/sppf/leaf_node.rb +5 -3
- data/lib/rley/sppf/non_terminal_node.rb +2 -0
- data/lib/rley/sppf/parse_forest.rb +19 -17
- data/lib/rley/sppf/sppf_node.rb +9 -8
- data/lib/rley/sppf/token_node.rb +5 -3
- data/lib/rley/syntax/grammar.rb +7 -5
- data/lib/rley/syntax/grammar_builder.rb +11 -9
- data/lib/rley/syntax/grm_symbol.rb +8 -6
- data/lib/rley/syntax/literal.rb +2 -0
- data/lib/rley/syntax/non_terminal.rb +11 -15
- data/lib/rley/syntax/production.rb +13 -11
- data/lib/rley/syntax/symbol_seq.rb +10 -10
- data/lib/rley/syntax/terminal.rb +6 -5
- data/lib/rley/syntax/verbatim_symbol.rb +5 -3
- data/lib/support/base_tokenizer.rb +23 -20
- data/spec/rley/base/dotted_item_spec.rb +4 -2
- data/spec/rley/base/grm_items_builder_spec.rb +2 -0
- data/spec/rley/engine_spec.rb +47 -9
- data/spec/rley/formatter/asciitree_spec.rb +11 -9
- data/spec/rley/formatter/bracket_notation_spec.rb +16 -14
- data/spec/rley/formatter/debug_spec.rb +4 -2
- data/spec/rley/formatter/json_spec.rb +5 -3
- data/spec/rley/gfg/call_edge_spec.rb +2 -0
- data/spec/rley/gfg/edge_spec.rb +2 -0
- data/spec/rley/gfg/end_vertex_spec.rb +7 -5
- data/spec/rley/gfg/epsilon_edge_spec.rb +2 -0
- data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -0
- data/spec/rley/gfg/item_vertex_spec.rb +12 -10
- data/spec/rley/gfg/non_terminal_vertex_spec.rb +5 -3
- data/spec/rley/gfg/return_edge_spec.rb +2 -0
- data/spec/rley/gfg/scan_edge_spec.rb +2 -0
- data/spec/rley/gfg/shortcut_edge_spec.rb +3 -1
- data/spec/rley/gfg/start_vertex_spec.rb +7 -5
- data/spec/rley/gfg/vertex_spec.rb +5 -3
- data/spec/rley/lexical/token_range_spec.rb +18 -16
- data/spec/rley/lexical/token_spec.rb +4 -2
- data/spec/rley/parse_forest_visitor_spec.rb +167 -163
- data/spec/rley/parse_rep/ambiguous_parse_spec.rb +46 -44
- data/spec/rley/parse_rep/ast_builder_spec.rb +8 -6
- data/spec/rley/parse_rep/cst_builder_spec.rb +7 -5
- data/spec/rley/parse_rep/groucho_spec.rb +25 -25
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +28 -26
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -6
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +4 -2
- data/spec/rley/parse_tree_visitor_spec.rb +12 -8
- data/spec/rley/parser/error_reason_spec.rb +8 -6
- data/spec/rley/parser/gfg_chart_spec.rb +17 -4
- data/spec/rley/parser/gfg_earley_parser_spec.rb +16 -11
- data/spec/rley/parser/gfg_parsing_spec.rb +41 -252
- data/spec/rley/parser/parse_entry_set_spec.rb +2 -0
- data/spec/rley/parser/parse_entry_spec.rb +21 -19
- data/spec/rley/parser/parse_state_spec.rb +7 -5
- data/spec/rley/parser/parse_tracer_spec.rb +16 -14
- data/spec/rley/parser/parse_walker_factory_spec.rb +10 -8
- data/spec/rley/parser/state_set_spec.rb +24 -22
- data/spec/rley/ptree/non_terminal_node_spec.rb +7 -3
- data/spec/rley/ptree/parse_tree_node_spec.rb +6 -4
- data/spec/rley/ptree/parse_tree_spec.rb +2 -0
- data/spec/rley/ptree/terminal_node_spec.rb +8 -6
- data/spec/rley/sppf/alternative_node_spec.rb +8 -6
- data/spec/rley/sppf/non_terminal_node_spec.rb +5 -3
- data/spec/rley/sppf/token_node_spec.rb +6 -4
- data/spec/rley/support/ambiguous_grammar_helper.rb +5 -4
- data/spec/rley/support/expectation_helper.rb +2 -0
- data/spec/rley/support/grammar_abc_helper.rb +4 -4
- data/spec/rley/support/grammar_ambig01_helper.rb +6 -5
- data/spec/rley/support/grammar_arr_int_helper.rb +6 -5
- data/spec/rley/support/grammar_b_expr_helper.rb +6 -5
- data/spec/rley/support/grammar_helper.rb +2 -0
- data/spec/rley/support/grammar_l0_helper.rb +15 -16
- data/spec/rley/support/grammar_pb_helper.rb +8 -5
- data/spec/rley/support/grammar_sppf_helper.rb +3 -1
- data/spec/rley/syntax/grammar_builder_spec.rb +7 -5
- data/spec/rley/syntax/grammar_spec.rb +8 -6
- data/spec/rley/syntax/grm_symbol_spec.rb +3 -1
- data/spec/rley/syntax/literal_spec.rb +2 -0
- data/spec/rley/syntax/non_terminal_spec.rb +10 -8
- data/spec/rley/syntax/production_spec.rb +15 -13
- data/spec/rley/syntax/symbol_seq_spec.rb +4 -2
- data/spec/rley/syntax/terminal_spec.rb +7 -5
- data/spec/rley/syntax/verbatim_symbol_spec.rb +3 -1
- data/spec/spec_helper.rb +2 -12
- data/spec/support/base_tokenizer_spec.rb +9 -2
- metadata +21 -63
- data/.simplecov +0 -7
- data/Gemfile +0 -8
data/lib/rley/syntax/grammar.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'set'
|
2
4
|
require_relative '../rley_error'
|
3
5
|
|
@@ -46,13 +48,13 @@ module Rley # This module is used as a namespace
|
|
46
48
|
end
|
47
49
|
|
48
50
|
# @return [Array] The list of non-terminals in the grammar.
|
49
|
-
def non_terminals
|
51
|
+
def non_terminals
|
50
52
|
@non_terminals ||= symbols.select { |s| s.kind_of?(NonTerminal) }
|
51
53
|
end
|
52
54
|
|
53
55
|
# @return [Production] The start production of the grammar (i.e.
|
54
56
|
# the rule that specifies the syntax for the start symbol.
|
55
|
-
def start_production
|
57
|
+
def start_production
|
56
58
|
return rules[0]
|
57
59
|
end
|
58
60
|
|
@@ -97,7 +99,7 @@ module Rley # This module is used as a namespace
|
|
97
99
|
end
|
98
100
|
|
99
101
|
# Perform some check of the grammar.
|
100
|
-
def diagnose
|
102
|
+
def diagnose
|
101
103
|
mark_undefined
|
102
104
|
mark_generative
|
103
105
|
compute_nullable
|
@@ -191,7 +193,7 @@ module Rley # This module is used as a namespace
|
|
191
193
|
|
192
194
|
# For each non-terminal determine whether it is nullable or not.
|
193
195
|
# A nullable nonterminal is a nonterminal that can match an empty string.
|
194
|
-
def compute_nullable
|
196
|
+
def compute_nullable
|
195
197
|
non_terminals.each { |nterm| nterm.nullable = false }
|
196
198
|
nullable_sets = [direct_nullable]
|
197
199
|
|
@@ -234,7 +236,7 @@ module Rley # This module is used as a namespace
|
|
234
236
|
nullables << prod.lhs
|
235
237
|
end
|
236
238
|
|
237
|
-
|
239
|
+
nullables
|
238
240
|
end
|
239
241
|
|
240
242
|
# For each prodction determine whether it is nullable or not.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'set'
|
2
4
|
require_relative 'terminal'
|
3
5
|
require_relative 'non_terminal'
|
@@ -16,12 +18,12 @@ module Rley # This module is used as a namespace
|
|
16
18
|
# to the matching grammar symbol object.
|
17
19
|
attr_reader(:symbols)
|
18
20
|
|
19
|
-
# @return [Array<Production>] The list of production rules for
|
21
|
+
# @return [Array<Production>] The list of production rules for
|
20
22
|
# the grammar to build.
|
21
23
|
attr_reader(:productions)
|
22
24
|
|
23
25
|
# Creates a new grammar builder.
|
24
|
-
# @param aBlock [Proc] code block used to build the grammar.
|
26
|
+
# @param aBlock [Proc] code block used to build the grammar.
|
25
27
|
# @example Building a tiny English grammar
|
26
28
|
# builder = Rley::Syntax::GrammarBuilder.new do
|
27
29
|
# add_terminals('n', 'v', 'adj', 'det')
|
@@ -43,7 +45,7 @@ module Rley # This module is used as a namespace
|
|
43
45
|
# @param aSymbolName [String] the name of a grammar symbol.
|
44
46
|
# @return [GrmSymbol] the retrieved symbol object.
|
45
47
|
def [](aSymbolName)
|
46
|
-
|
48
|
+
symbols[aSymbolName]
|
47
49
|
end
|
48
50
|
|
49
51
|
# Add the given terminal symbols to the grammar of the language
|
@@ -65,7 +67,7 @@ module Rley # This module is used as a namespace
|
|
65
67
|
# builder.rule('A' => ['a', 'A', 'c']) # 'rule' is a synonym
|
66
68
|
# builder.rule('A' => %w[a A c]) # Use %w syntax for Array of String
|
67
69
|
# builder.rule 'A' => %w[a A c] # Call parentheses are optional
|
68
|
-
# @param aProductionRepr [Hash{String, Array<String>}]
|
70
|
+
# @param aProductionRepr [Hash{String, Array<String>}]
|
69
71
|
# A Hash-based representation of a production.
|
70
72
|
# @return [Production] The created Production instance
|
71
73
|
def add_production(aProductionRepr)
|
@@ -83,14 +85,14 @@ module Rley # This module is used as a namespace
|
|
83
85
|
new_prod = Production.new(lhs, rhs_members)
|
84
86
|
productions << new_prod
|
85
87
|
end
|
86
|
-
|
88
|
+
|
87
89
|
return productions.last
|
88
90
|
end
|
89
91
|
|
90
92
|
# Given the grammar symbols and productions added to the builder,
|
91
93
|
# build the resulting grammar (if not yet done).
|
92
|
-
# @return [Grammar] the created grammar object.
|
93
|
-
def grammar
|
94
|
+
# @return [Grammar] the created grammar object.
|
95
|
+
def grammar
|
94
96
|
unless @grammar
|
95
97
|
raise StandardError, 'No symbol found for grammar' if symbols.empty?
|
96
98
|
if productions.empty?
|
@@ -111,7 +113,7 @@ module Rley # This module is used as a namespace
|
|
111
113
|
unused = all_terminals.reject { |a_term| in_use.include?(a_term) }
|
112
114
|
unless unused.empty?
|
113
115
|
suffix = "#{unused.map(&:name).join(', ')}."
|
114
|
-
raise StandardError,
|
116
|
+
raise StandardError, "Useless terminal symbol(s): #{suffix}"
|
115
117
|
end
|
116
118
|
|
117
119
|
@grammar = Grammar.new(productions.dup)
|
@@ -164,7 +166,7 @@ module Rley # This module is used as a namespace
|
|
164
166
|
unless symbols.include? aSymbolName
|
165
167
|
symbols[aSymbolName] = NonTerminal.new(aSymbolName)
|
166
168
|
end
|
167
|
-
|
169
|
+
symbols[aSymbolName]
|
168
170
|
end
|
169
171
|
end # class
|
170
172
|
end # module
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rley # This module is used as a namespace
|
2
4
|
module Syntax # This module is used as a namespace
|
3
5
|
# Abstract class for grammar symbols.
|
@@ -21,19 +23,19 @@ module Rley # This module is used as a namespace
|
|
21
23
|
|
22
24
|
# The String representation of the grammar symbol
|
23
25
|
# @return [String]
|
24
|
-
def to_s
|
25
|
-
|
26
|
+
def to_s
|
27
|
+
name.to_s
|
26
28
|
end
|
27
29
|
|
28
30
|
# @return [Boolean] true iff the symbol is a terminal
|
29
|
-
def terminal?
|
31
|
+
def terminal?
|
30
32
|
# Default implementation to override if necessary
|
31
|
-
|
33
|
+
false
|
32
34
|
end
|
33
35
|
|
34
36
|
# @return [Boolean] true iff the symbol is generative.
|
35
|
-
def generative?
|
36
|
-
|
37
|
+
def generative?
|
38
|
+
@generative
|
37
39
|
end
|
38
40
|
end # class
|
39
41
|
end # module
|
data/lib/rley/syntax/literal.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'grm_symbol' # Load superclass
|
2
4
|
|
3
5
|
module Rley # This module is used as a namespace
|
@@ -7,41 +9,35 @@ module Rley # This module is used as a namespace
|
|
7
9
|
class NonTerminal < GrmSymbol
|
8
10
|
# A non-terminal symbol is nullable if it can match an empty string.
|
9
11
|
attr_writer(:nullable)
|
10
|
-
|
12
|
+
|
11
13
|
# A non-terminal symbol is undefined if no production rule in the grammar
|
12
14
|
# has that non-terminal symbol in its left-hand side.
|
13
15
|
attr_writer(:undefined)
|
14
|
-
|
15
|
-
# A non-terminal symbol is unreachable if it cannot be reached (derived)
|
16
|
+
|
17
|
+
# A non-terminal symbol is unreachable if it cannot be reached (derived)
|
16
18
|
# from the start symbol.
|
17
19
|
attr_writer(:unreachable)
|
18
20
|
|
19
|
-
# Constructor.
|
20
|
-
# @param aName [String] The name of the grammar symbol.
|
21
|
-
def initialize(aName)
|
22
|
-
super(aName)
|
23
|
-
end
|
24
|
-
|
25
21
|
# @return [false/true] Return true if the symbol derives
|
26
22
|
# the empty string. As non-terminal symbol is nullable when it can
|
27
23
|
# can match to zero input token.
|
28
24
|
# The "nullability" of a non-terminal can practically be determined once
|
29
25
|
# all the production rules of the grammar are specified.
|
30
|
-
def nullable?
|
26
|
+
def nullable?
|
31
27
|
return @nullable
|
32
28
|
end
|
33
|
-
|
29
|
+
|
34
30
|
# @return [false/true] Return true if the symbol doesn't appear
|
35
31
|
# on the left-hand side of any production rule.
|
36
|
-
def undefined?
|
32
|
+
def undefined?
|
37
33
|
return @undefined
|
38
34
|
end
|
39
|
-
|
35
|
+
|
40
36
|
# @return [false/true] Return true if the symbol cannot be derived
|
41
37
|
# from the start symbol.
|
42
|
-
def unreachable?
|
38
|
+
def unreachable?
|
43
39
|
return @unreachable
|
44
|
-
end
|
40
|
+
end
|
45
41
|
end # class
|
46
42
|
end # module
|
47
43
|
end # module
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'symbol_seq'
|
2
4
|
|
3
5
|
module Rley # This module is used as a namespace
|
@@ -44,33 +46,33 @@ module Rley # This module is used as a namespace
|
|
44
46
|
|
45
47
|
# Is the rhs empty?
|
46
48
|
# @return [Boolean] true if the rhs has no members.
|
47
|
-
def empty?
|
48
|
-
|
49
|
+
def empty?
|
50
|
+
rhs.empty?
|
49
51
|
end
|
50
52
|
|
51
53
|
# Return true iff the production is generative
|
52
|
-
def generative?
|
53
|
-
if @generative.nil?
|
54
|
-
end
|
54
|
+
def generative?
|
55
|
+
# if @generative.nil?
|
56
|
+
# end
|
55
57
|
|
56
|
-
|
58
|
+
@generative
|
57
59
|
end
|
58
60
|
|
59
61
|
# @return [Boolen] true iff the production is nullable
|
60
|
-
def nullable?
|
61
|
-
|
62
|
+
def nullable?
|
63
|
+
@nullable
|
62
64
|
end
|
63
65
|
|
64
66
|
# Returns a string containing a human-readable representation of the
|
65
67
|
# production.
|
66
68
|
# @return [String]
|
67
|
-
def inspect
|
68
|
-
result = "#<#{self.class.name}:#{object_id}"
|
69
|
+
def inspect
|
70
|
+
result = +"#<#{self.class.name}:#{object_id}"
|
69
71
|
result << " @name=\"#{name}\""
|
70
72
|
result << " @lhs=#{lhs.name}"
|
71
73
|
result << " @rhs=#{rhs.inspect}"
|
72
74
|
result << " @generative=#{@generative}>"
|
73
|
-
|
75
|
+
result
|
74
76
|
end
|
75
77
|
|
76
78
|
# A setter for the production name
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'forwardable'
|
2
4
|
|
3
5
|
module Rley # This module is used as a namespace
|
@@ -10,9 +12,9 @@ module Rley # This module is used as a namespace
|
|
10
12
|
# @return [Array<GrmSymbol>] The sequence of symbols
|
11
13
|
attr_reader(:members)
|
12
14
|
|
13
|
-
# Create a sequence of grammar symbols (as in right-hand side of
|
15
|
+
# Create a sequence of grammar symbols (as in right-hand side of
|
14
16
|
# a production rule).
|
15
|
-
# @param theSymbols [Array<GrmSymbol>] An array of symbols.
|
17
|
+
# @param theSymbols [Array<GrmSymbol>] An array of symbols.
|
16
18
|
def initialize(theSymbols)
|
17
19
|
@members = theSymbols.dup
|
18
20
|
end
|
@@ -31,20 +33,18 @@ module Rley # This module is used as a namespace
|
|
31
33
|
raise StandardError, msg
|
32
34
|
end
|
33
35
|
|
34
|
-
|
36
|
+
result
|
35
37
|
end
|
36
|
-
|
37
|
-
# Returns a string containing a human-readable representation of the
|
38
|
+
|
39
|
+
# Returns a string containing a human-readable representation of the
|
38
40
|
# sequence of symbols.
|
39
41
|
# @return [String]
|
40
|
-
def inspect
|
41
|
-
result = "#<#{self.class.name}:#{object_id}"
|
42
|
+
def inspect
|
43
|
+
result = +"#<#{self.class.name}:#{object_id}"
|
42
44
|
symbol_names = members.map(&:name)
|
43
45
|
result << " @members=#{symbol_names}>"
|
44
|
-
|
46
|
+
result
|
45
47
|
end
|
46
|
-
|
47
|
-
|
48
48
|
end # class
|
49
49
|
end # module
|
50
50
|
end # module
|
data/lib/rley/syntax/terminal.rb
CHANGED
@@ -1,30 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'grm_symbol' # Load superclass
|
2
4
|
|
3
5
|
module Rley # This module is used as a namespace
|
4
6
|
module Syntax # This module is used as a namespace
|
5
|
-
# A terminal symbol represents a class of words in the language
|
7
|
+
# A terminal symbol represents a class of words in the language
|
6
8
|
# defined the grammar.
|
7
9
|
class Terminal < GrmSymbol
|
8
|
-
|
9
10
|
# Constructor.
|
10
11
|
# @param aName [String] The name of the grammar symbol.
|
11
12
|
def initialize(aName)
|
12
13
|
super(aName)
|
13
14
|
self.generative = true
|
14
15
|
end
|
15
|
-
|
16
|
+
|
16
17
|
# Return true iff the symbol is a terminal
|
17
18
|
def terminal?
|
18
19
|
return true
|
19
20
|
end
|
20
|
-
|
21
|
+
|
21
22
|
# @return [false] Return true if the symbol derives
|
22
23
|
# the empty string. As terminal symbol corresponds to a input token
|
23
24
|
# it is by definition non-nullable.
|
24
25
|
def nullable?
|
25
26
|
false
|
26
27
|
end
|
27
|
-
|
28
|
+
|
28
29
|
def to_s
|
29
30
|
name
|
30
31
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'terminal' # Load superclass
|
2
4
|
|
3
5
|
module Rley # This module is used as a namespace
|
@@ -12,11 +14,11 @@ module Rley # This module is used as a namespace
|
|
12
14
|
super(aText) # Do we need to separate the text from the name?
|
13
15
|
@text = aText.dup
|
14
16
|
end
|
15
|
-
|
17
|
+
|
16
18
|
# The String representation of the verbatim symbol
|
17
19
|
# @return [String]
|
18
|
-
def to_s
|
19
|
-
|
20
|
+
def to_s
|
21
|
+
"'#{text}'"
|
20
22
|
end
|
21
23
|
end # class
|
22
24
|
end # module
|
@@ -1,21 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'strscan'
|
2
4
|
require_relative '../rley/lexical/token'
|
3
5
|
|
6
|
+
# Simplistic tokenizer used mostly for testing purposes
|
4
7
|
class BaseTokenizer
|
8
|
+
# @return [StringScanner]
|
5
9
|
attr_reader(:scanner)
|
10
|
+
|
11
|
+
# @return [Integer] current line number
|
6
12
|
attr_reader(:lineno)
|
13
|
+
|
14
|
+
# @return [Integer] position of start of current line in source text
|
7
15
|
attr_reader(:line_start)
|
8
|
-
|
16
|
+
|
9
17
|
class ScanError < StandardError; end
|
10
18
|
|
11
|
-
# Constructor. Initialize a tokenizer
|
19
|
+
# Constructor. Initialize a tokenizer.
|
12
20
|
# @param source [String] Skeem text to tokenize.
|
13
21
|
def initialize(source)
|
14
22
|
@scanner = StringScanner.new('')
|
15
23
|
restart(source)
|
16
24
|
end
|
17
25
|
|
18
|
-
# @param source [String]
|
26
|
+
# @param source [String] input text to tokenize.
|
19
27
|
def restart(source)
|
20
28
|
@scanner.string = source
|
21
29
|
@lineno = 1
|
@@ -32,13 +40,13 @@ class BaseTokenizer
|
|
32
40
|
|
33
41
|
return tok_sequence
|
34
42
|
end
|
35
|
-
|
43
|
+
|
36
44
|
protected
|
37
|
-
|
45
|
+
|
38
46
|
# Patterns:
|
39
47
|
# Unambiguous single character
|
40
48
|
# Conditional single character:
|
41
|
-
# (e.g. '+' operator, '+' prefix for positive numbers)
|
49
|
+
# (e.g. '+' operator, '+' prefix for positive numbers)
|
42
50
|
def _next_token
|
43
51
|
skip_whitespaces
|
44
52
|
curr_ch = scanner.peek(1)
|
@@ -55,29 +63,29 @@ class BaseTokenizer
|
|
55
63
|
|
56
64
|
return token
|
57
65
|
end
|
58
|
-
|
66
|
+
|
59
67
|
def recognize_token
|
60
68
|
raise NotImplementedError
|
61
69
|
end
|
62
|
-
|
70
|
+
|
63
71
|
def build_token(aSymbolName, aLexeme, aFormat = :default)
|
64
72
|
begin
|
65
73
|
value = convert_to(aLexeme, aSymbolName, aFormat)
|
66
74
|
col = scanner.pos - aLexeme.size - @line_start + 1
|
67
75
|
pos = Rley::Lexical::Position.new(@lineno, col)
|
68
76
|
token = Rley::Lexical::Token.new(value, aSymbolName, pos)
|
69
|
-
rescue StandardError =>
|
77
|
+
rescue StandardError => e
|
70
78
|
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
71
|
-
raise
|
79
|
+
raise e
|
72
80
|
end
|
73
81
|
|
74
82
|
return token
|
75
83
|
end
|
76
|
-
|
84
|
+
|
77
85
|
def convert_to(aLexeme, _symbol_name, _format)
|
78
86
|
return aLexeme
|
79
87
|
end
|
80
|
-
|
88
|
+
|
81
89
|
def skip_whitespaces
|
82
90
|
pre_pos = scanner.pos
|
83
91
|
|
@@ -91,21 +99,16 @@ class BaseTokenizer
|
|
91
99
|
ws_found = true
|
92
100
|
next_line
|
93
101
|
end
|
94
|
-
|
95
|
-
# if next_ch == ';'
|
96
|
-
# cmt_found = true
|
97
|
-
# scanner.skip(/;[^\r\n]*(?:(?:\r\n)|\r|\n)?/)
|
98
|
-
# next_line
|
99
|
-
# end
|
102
|
+
|
100
103
|
break unless ws_found || cmt_found
|
101
104
|
end
|
102
105
|
|
103
106
|
curr_pos = scanner.pos
|
104
107
|
return if curr_pos == pre_pos
|
105
108
|
end
|
106
|
-
|
109
|
+
|
107
110
|
def next_line
|
108
111
|
@lineno += 1
|
109
112
|
@line_start = scanner.pos
|
110
|
-
end
|
113
|
+
end
|
111
114
|
end # class
|