rley 0.7.03 → 0.7.08
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +362 -62
- data/.travis.yml +6 -7
- data/CHANGELOG.md +20 -1
- data/LICENSE.txt +1 -1
- data/README.md +6 -7
- data/Rakefile +2 -0
- data/appveyor.yml +2 -4
- data/examples/NLP/benchmark_pico_en.rb +2 -0
- data/examples/NLP/engtagger.rb +193 -188
- data/examples/NLP/nano_eng/nano_en_demo.rb +2 -0
- data/examples/NLP/nano_eng/nano_grammar.rb +7 -5
- data/examples/NLP/pico_en_demo.rb +2 -0
- data/examples/data_formats/JSON/cli_options.rb +3 -1
- data/examples/data_formats/JSON/json_ast_builder.rb +14 -9
- data/examples/data_formats/JSON/json_ast_nodes.rb +14 -21
- data/examples/data_formats/JSON/json_demo.rb +2 -0
- data/examples/data_formats/JSON/json_grammar.rb +4 -2
- data/examples/data_formats/JSON/json_lexer.rb +10 -8
- data/examples/data_formats/JSON/json_minifier.rb +3 -1
- data/examples/general/calc_iter1/calc_ast_builder.rb +15 -10
- data/examples/general/calc_iter1/calc_ast_nodes.rb +25 -37
- data/examples/general/calc_iter1/calc_demo.rb +2 -0
- data/examples/general/calc_iter1/calc_grammar.rb +4 -2
- data/examples/general/calc_iter1/calc_lexer.rb +8 -4
- data/examples/general/calc_iter1/spec/calculator_spec.rb +7 -5
- data/examples/general/calc_iter2/calc_ast_builder.rb +7 -3
- data/examples/general/calc_iter2/calc_ast_nodes.rb +29 -43
- data/examples/general/calc_iter2/calc_demo.rb +2 -0
- data/examples/general/calc_iter2/calc_grammar.rb +5 -3
- data/examples/general/calc_iter2/calc_lexer.rb +13 -10
- data/examples/general/calc_iter2/spec/calculator_spec.rb +28 -26
- data/examples/general/left.rb +4 -2
- data/examples/general/right.rb +4 -2
- data/lib/rley.rb +2 -0
- data/lib/rley/base/base_parser.rb +2 -0
- data/lib/rley/base/dotted_item.rb +38 -41
- data/lib/rley/base/grm_items_builder.rb +2 -0
- data/lib/rley/constants.rb +5 -3
- data/lib/rley/engine.rb +22 -24
- data/lib/rley/formatter/asciitree.rb +6 -4
- data/lib/rley/formatter/base_formatter.rb +2 -0
- data/lib/rley/formatter/bracket_notation.rb +3 -8
- data/lib/rley/formatter/debug.rb +8 -6
- data/lib/rley/formatter/json.rb +4 -2
- data/lib/rley/gfg/call_edge.rb +3 -1
- data/lib/rley/gfg/edge.rb +7 -5
- data/lib/rley/gfg/end_vertex.rb +4 -6
- data/lib/rley/gfg/epsilon_edge.rb +3 -5
- data/lib/rley/gfg/grm_flow_graph.rb +31 -25
- data/lib/rley/gfg/item_vertex.rb +12 -22
- data/lib/rley/gfg/non_terminal_vertex.rb +6 -4
- data/lib/rley/gfg/return_edge.rb +2 -0
- data/lib/rley/gfg/scan_edge.rb +3 -1
- data/lib/rley/gfg/shortcut_edge.rb +4 -2
- data/lib/rley/gfg/start_vertex.rb +6 -8
- data/lib/rley/gfg/vertex.rb +47 -41
- data/lib/rley/lexical/token.rb +3 -1
- data/lib/rley/lexical/token_range.rb +8 -6
- data/lib/rley/parse_forest_visitor.rb +7 -5
- data/lib/rley/parse_rep/ast_base_builder.rb +11 -11
- data/lib/rley/parse_rep/cst_builder.rb +7 -4
- data/lib/rley/parse_rep/parse_forest_builder.rb +36 -25
- data/lib/rley/parse_rep/parse_forest_factory.rb +5 -3
- data/lib/rley/parse_rep/parse_rep_creator.rb +18 -13
- data/lib/rley/parse_rep/parse_tree_builder.rb +15 -15
- data/lib/rley/parse_rep/parse_tree_factory.rb +27 -25
- data/lib/rley/parse_tree_visitor.rb +3 -1
- data/lib/rley/parser/error_reason.rb +9 -8
- data/lib/rley/parser/gfg_chart.rb +54 -22
- data/lib/rley/parser/gfg_earley_parser.rb +3 -1
- data/lib/rley/parser/gfg_parsing.rb +51 -31
- data/lib/rley/parser/parse_entry.rb +29 -33
- data/lib/rley/parser/parse_entry_set.rb +32 -27
- data/lib/rley/parser/parse_entry_tracker.rb +6 -4
- data/lib/rley/parser/parse_state.rb +18 -21
- data/lib/rley/parser/parse_state_tracker.rb +6 -4
- data/lib/rley/parser/parse_tracer.rb +15 -13
- data/lib/rley/parser/parse_walker_factory.rb +28 -29
- data/lib/rley/parser/state_set.rb +11 -10
- data/lib/rley/ptree/non_terminal_node.rb +10 -6
- data/lib/rley/ptree/parse_tree.rb +6 -4
- data/lib/rley/ptree/parse_tree_node.rb +7 -5
- data/lib/rley/ptree/terminal_node.rb +9 -7
- data/lib/rley/rley_error.rb +12 -10
- data/lib/rley/sppf/alternative_node.rb +8 -6
- data/lib/rley/sppf/composite_node.rb +9 -7
- data/lib/rley/sppf/epsilon_node.rb +5 -3
- data/lib/rley/sppf/leaf_node.rb +5 -3
- data/lib/rley/sppf/non_terminal_node.rb +2 -0
- data/lib/rley/sppf/parse_forest.rb +19 -17
- data/lib/rley/sppf/sppf_node.rb +9 -8
- data/lib/rley/sppf/token_node.rb +5 -3
- data/lib/rley/syntax/grammar.rb +7 -5
- data/lib/rley/syntax/grammar_builder.rb +11 -9
- data/lib/rley/syntax/grm_symbol.rb +8 -6
- data/lib/rley/syntax/literal.rb +2 -0
- data/lib/rley/syntax/non_terminal.rb +11 -15
- data/lib/rley/syntax/production.rb +13 -11
- data/lib/rley/syntax/symbol_seq.rb +10 -10
- data/lib/rley/syntax/terminal.rb +6 -5
- data/lib/rley/syntax/verbatim_symbol.rb +5 -3
- data/lib/support/base_tokenizer.rb +23 -20
- data/spec/rley/base/dotted_item_spec.rb +4 -2
- data/spec/rley/base/grm_items_builder_spec.rb +2 -0
- data/spec/rley/engine_spec.rb +47 -9
- data/spec/rley/formatter/asciitree_spec.rb +11 -9
- data/spec/rley/formatter/bracket_notation_spec.rb +16 -14
- data/spec/rley/formatter/debug_spec.rb +4 -2
- data/spec/rley/formatter/json_spec.rb +5 -3
- data/spec/rley/gfg/call_edge_spec.rb +2 -0
- data/spec/rley/gfg/edge_spec.rb +2 -0
- data/spec/rley/gfg/end_vertex_spec.rb +7 -5
- data/spec/rley/gfg/epsilon_edge_spec.rb +2 -0
- data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -0
- data/spec/rley/gfg/item_vertex_spec.rb +12 -10
- data/spec/rley/gfg/non_terminal_vertex_spec.rb +5 -3
- data/spec/rley/gfg/return_edge_spec.rb +2 -0
- data/spec/rley/gfg/scan_edge_spec.rb +2 -0
- data/spec/rley/gfg/shortcut_edge_spec.rb +3 -1
- data/spec/rley/gfg/start_vertex_spec.rb +7 -5
- data/spec/rley/gfg/vertex_spec.rb +5 -3
- data/spec/rley/lexical/token_range_spec.rb +18 -16
- data/spec/rley/lexical/token_spec.rb +4 -2
- data/spec/rley/parse_forest_visitor_spec.rb +167 -163
- data/spec/rley/parse_rep/ambiguous_parse_spec.rb +46 -44
- data/spec/rley/parse_rep/ast_builder_spec.rb +8 -6
- data/spec/rley/parse_rep/cst_builder_spec.rb +7 -5
- data/spec/rley/parse_rep/groucho_spec.rb +25 -25
- data/spec/rley/parse_rep/parse_forest_builder_spec.rb +28 -26
- data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -6
- data/spec/rley/parse_rep/parse_tree_factory_spec.rb +4 -2
- data/spec/rley/parse_tree_visitor_spec.rb +12 -8
- data/spec/rley/parser/error_reason_spec.rb +8 -6
- data/spec/rley/parser/gfg_chart_spec.rb +17 -4
- data/spec/rley/parser/gfg_earley_parser_spec.rb +16 -11
- data/spec/rley/parser/gfg_parsing_spec.rb +41 -252
- data/spec/rley/parser/parse_entry_set_spec.rb +2 -0
- data/spec/rley/parser/parse_entry_spec.rb +21 -19
- data/spec/rley/parser/parse_state_spec.rb +7 -5
- data/spec/rley/parser/parse_tracer_spec.rb +16 -14
- data/spec/rley/parser/parse_walker_factory_spec.rb +10 -8
- data/spec/rley/parser/state_set_spec.rb +24 -22
- data/spec/rley/ptree/non_terminal_node_spec.rb +7 -3
- data/spec/rley/ptree/parse_tree_node_spec.rb +6 -4
- data/spec/rley/ptree/parse_tree_spec.rb +2 -0
- data/spec/rley/ptree/terminal_node_spec.rb +8 -6
- data/spec/rley/sppf/alternative_node_spec.rb +8 -6
- data/spec/rley/sppf/non_terminal_node_spec.rb +5 -3
- data/spec/rley/sppf/token_node_spec.rb +6 -4
- data/spec/rley/support/ambiguous_grammar_helper.rb +5 -4
- data/spec/rley/support/expectation_helper.rb +2 -0
- data/spec/rley/support/grammar_abc_helper.rb +4 -4
- data/spec/rley/support/grammar_ambig01_helper.rb +6 -5
- data/spec/rley/support/grammar_arr_int_helper.rb +6 -5
- data/spec/rley/support/grammar_b_expr_helper.rb +6 -5
- data/spec/rley/support/grammar_helper.rb +2 -0
- data/spec/rley/support/grammar_l0_helper.rb +15 -16
- data/spec/rley/support/grammar_pb_helper.rb +8 -5
- data/spec/rley/support/grammar_sppf_helper.rb +3 -1
- data/spec/rley/syntax/grammar_builder_spec.rb +7 -5
- data/spec/rley/syntax/grammar_spec.rb +8 -6
- data/spec/rley/syntax/grm_symbol_spec.rb +3 -1
- data/spec/rley/syntax/literal_spec.rb +2 -0
- data/spec/rley/syntax/non_terminal_spec.rb +10 -8
- data/spec/rley/syntax/production_spec.rb +15 -13
- data/spec/rley/syntax/symbol_seq_spec.rb +4 -2
- data/spec/rley/syntax/terminal_spec.rb +7 -5
- data/spec/rley/syntax/verbatim_symbol_spec.rb +3 -1
- data/spec/spec_helper.rb +2 -12
- data/spec/support/base_tokenizer_spec.rb +9 -2
- metadata +21 -63
- data/.simplecov +0 -7
- data/Gemfile +0 -8
data/lib/rley/syntax/grammar.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'set'
|
2
4
|
require_relative '../rley_error'
|
3
5
|
|
@@ -46,13 +48,13 @@ module Rley # This module is used as a namespace
|
|
46
48
|
end
|
47
49
|
|
48
50
|
# @return [Array] The list of non-terminals in the grammar.
|
49
|
-
def non_terminals
|
51
|
+
def non_terminals
|
50
52
|
@non_terminals ||= symbols.select { |s| s.kind_of?(NonTerminal) }
|
51
53
|
end
|
52
54
|
|
53
55
|
# @return [Production] The start production of the grammar (i.e.
|
54
56
|
# the rule that specifies the syntax for the start symbol.
|
55
|
-
def start_production
|
57
|
+
def start_production
|
56
58
|
return rules[0]
|
57
59
|
end
|
58
60
|
|
@@ -97,7 +99,7 @@ module Rley # This module is used as a namespace
|
|
97
99
|
end
|
98
100
|
|
99
101
|
# Perform some check of the grammar.
|
100
|
-
def diagnose
|
102
|
+
def diagnose
|
101
103
|
mark_undefined
|
102
104
|
mark_generative
|
103
105
|
compute_nullable
|
@@ -191,7 +193,7 @@ module Rley # This module is used as a namespace
|
|
191
193
|
|
192
194
|
# For each non-terminal determine whether it is nullable or not.
|
193
195
|
# A nullable nonterminal is a nonterminal that can match an empty string.
|
194
|
-
def compute_nullable
|
196
|
+
def compute_nullable
|
195
197
|
non_terminals.each { |nterm| nterm.nullable = false }
|
196
198
|
nullable_sets = [direct_nullable]
|
197
199
|
|
@@ -234,7 +236,7 @@ module Rley # This module is used as a namespace
|
|
234
236
|
nullables << prod.lhs
|
235
237
|
end
|
236
238
|
|
237
|
-
|
239
|
+
nullables
|
238
240
|
end
|
239
241
|
|
240
242
|
# For each prodction determine whether it is nullable or not.
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'set'
|
2
4
|
require_relative 'terminal'
|
3
5
|
require_relative 'non_terminal'
|
@@ -16,12 +18,12 @@ module Rley # This module is used as a namespace
|
|
16
18
|
# to the matching grammar symbol object.
|
17
19
|
attr_reader(:symbols)
|
18
20
|
|
19
|
-
# @return [Array<Production>] The list of production rules for
|
21
|
+
# @return [Array<Production>] The list of production rules for
|
20
22
|
# the grammar to build.
|
21
23
|
attr_reader(:productions)
|
22
24
|
|
23
25
|
# Creates a new grammar builder.
|
24
|
-
# @param aBlock [Proc] code block used to build the grammar.
|
26
|
+
# @param aBlock [Proc] code block used to build the grammar.
|
25
27
|
# @example Building a tiny English grammar
|
26
28
|
# builder = Rley::Syntax::GrammarBuilder.new do
|
27
29
|
# add_terminals('n', 'v', 'adj', 'det')
|
@@ -43,7 +45,7 @@ module Rley # This module is used as a namespace
|
|
43
45
|
# @param aSymbolName [String] the name of a grammar symbol.
|
44
46
|
# @return [GrmSymbol] the retrieved symbol object.
|
45
47
|
def [](aSymbolName)
|
46
|
-
|
48
|
+
symbols[aSymbolName]
|
47
49
|
end
|
48
50
|
|
49
51
|
# Add the given terminal symbols to the grammar of the language
|
@@ -65,7 +67,7 @@ module Rley # This module is used as a namespace
|
|
65
67
|
# builder.rule('A' => ['a', 'A', 'c']) # 'rule' is a synonym
|
66
68
|
# builder.rule('A' => %w[a A c]) # Use %w syntax for Array of String
|
67
69
|
# builder.rule 'A' => %w[a A c] # Call parentheses are optional
|
68
|
-
# @param aProductionRepr [Hash{String, Array<String>}]
|
70
|
+
# @param aProductionRepr [Hash{String, Array<String>}]
|
69
71
|
# A Hash-based representation of a production.
|
70
72
|
# @return [Production] The created Production instance
|
71
73
|
def add_production(aProductionRepr)
|
@@ -83,14 +85,14 @@ module Rley # This module is used as a namespace
|
|
83
85
|
new_prod = Production.new(lhs, rhs_members)
|
84
86
|
productions << new_prod
|
85
87
|
end
|
86
|
-
|
88
|
+
|
87
89
|
return productions.last
|
88
90
|
end
|
89
91
|
|
90
92
|
# Given the grammar symbols and productions added to the builder,
|
91
93
|
# build the resulting grammar (if not yet done).
|
92
|
-
# @return [Grammar] the created grammar object.
|
93
|
-
def grammar
|
94
|
+
# @return [Grammar] the created grammar object.
|
95
|
+
def grammar
|
94
96
|
unless @grammar
|
95
97
|
raise StandardError, 'No symbol found for grammar' if symbols.empty?
|
96
98
|
if productions.empty?
|
@@ -111,7 +113,7 @@ module Rley # This module is used as a namespace
|
|
111
113
|
unused = all_terminals.reject { |a_term| in_use.include?(a_term) }
|
112
114
|
unless unused.empty?
|
113
115
|
suffix = "#{unused.map(&:name).join(', ')}."
|
114
|
-
raise StandardError,
|
116
|
+
raise StandardError, "Useless terminal symbol(s): #{suffix}"
|
115
117
|
end
|
116
118
|
|
117
119
|
@grammar = Grammar.new(productions.dup)
|
@@ -164,7 +166,7 @@ module Rley # This module is used as a namespace
|
|
164
166
|
unless symbols.include? aSymbolName
|
165
167
|
symbols[aSymbolName] = NonTerminal.new(aSymbolName)
|
166
168
|
end
|
167
|
-
|
169
|
+
symbols[aSymbolName]
|
168
170
|
end
|
169
171
|
end # class
|
170
172
|
end # module
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Rley # This module is used as a namespace
|
2
4
|
module Syntax # This module is used as a namespace
|
3
5
|
# Abstract class for grammar symbols.
|
@@ -21,19 +23,19 @@ module Rley # This module is used as a namespace
|
|
21
23
|
|
22
24
|
# The String representation of the grammar symbol
|
23
25
|
# @return [String]
|
24
|
-
def to_s
|
25
|
-
|
26
|
+
def to_s
|
27
|
+
name.to_s
|
26
28
|
end
|
27
29
|
|
28
30
|
# @return [Boolean] true iff the symbol is a terminal
|
29
|
-
def terminal?
|
31
|
+
def terminal?
|
30
32
|
# Default implementation to override if necessary
|
31
|
-
|
33
|
+
false
|
32
34
|
end
|
33
35
|
|
34
36
|
# @return [Boolean] true iff the symbol is generative.
|
35
|
-
def generative?
|
36
|
-
|
37
|
+
def generative?
|
38
|
+
@generative
|
37
39
|
end
|
38
40
|
end # class
|
39
41
|
end # module
|
data/lib/rley/syntax/literal.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'grm_symbol' # Load superclass
|
2
4
|
|
3
5
|
module Rley # This module is used as a namespace
|
@@ -7,41 +9,35 @@ module Rley # This module is used as a namespace
|
|
7
9
|
class NonTerminal < GrmSymbol
|
8
10
|
# A non-terminal symbol is nullable if it can match an empty string.
|
9
11
|
attr_writer(:nullable)
|
10
|
-
|
12
|
+
|
11
13
|
# A non-terminal symbol is undefined if no production rule in the grammar
|
12
14
|
# has that non-terminal symbol in its left-hand side.
|
13
15
|
attr_writer(:undefined)
|
14
|
-
|
15
|
-
# A non-terminal symbol is unreachable if it cannot be reached (derived)
|
16
|
+
|
17
|
+
# A non-terminal symbol is unreachable if it cannot be reached (derived)
|
16
18
|
# from the start symbol.
|
17
19
|
attr_writer(:unreachable)
|
18
20
|
|
19
|
-
# Constructor.
|
20
|
-
# @param aName [String] The name of the grammar symbol.
|
21
|
-
def initialize(aName)
|
22
|
-
super(aName)
|
23
|
-
end
|
24
|
-
|
25
21
|
# @return [false/true] Return true if the symbol derives
|
26
22
|
# the empty string. As non-terminal symbol is nullable when it can
|
27
23
|
# can match to zero input token.
|
28
24
|
# The "nullability" of a non-terminal can practically be determined once
|
29
25
|
# all the production rules of the grammar are specified.
|
30
|
-
def nullable?
|
26
|
+
def nullable?
|
31
27
|
return @nullable
|
32
28
|
end
|
33
|
-
|
29
|
+
|
34
30
|
# @return [false/true] Return true if the symbol doesn't appear
|
35
31
|
# on the left-hand side of any production rule.
|
36
|
-
def undefined?
|
32
|
+
def undefined?
|
37
33
|
return @undefined
|
38
34
|
end
|
39
|
-
|
35
|
+
|
40
36
|
# @return [false/true] Return true if the symbol cannot be derived
|
41
37
|
# from the start symbol.
|
42
|
-
def unreachable?
|
38
|
+
def unreachable?
|
43
39
|
return @unreachable
|
44
|
-
end
|
40
|
+
end
|
45
41
|
end # class
|
46
42
|
end # module
|
47
43
|
end # module
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'symbol_seq'
|
2
4
|
|
3
5
|
module Rley # This module is used as a namespace
|
@@ -44,33 +46,33 @@ module Rley # This module is used as a namespace
|
|
44
46
|
|
45
47
|
# Is the rhs empty?
|
46
48
|
# @return [Boolean] true if the rhs has no members.
|
47
|
-
def empty?
|
48
|
-
|
49
|
+
def empty?
|
50
|
+
rhs.empty?
|
49
51
|
end
|
50
52
|
|
51
53
|
# Return true iff the production is generative
|
52
|
-
def generative?
|
53
|
-
if @generative.nil?
|
54
|
-
end
|
54
|
+
def generative?
|
55
|
+
# if @generative.nil?
|
56
|
+
# end
|
55
57
|
|
56
|
-
|
58
|
+
@generative
|
57
59
|
end
|
58
60
|
|
59
61
|
# @return [Boolen] true iff the production is nullable
|
60
|
-
def nullable?
|
61
|
-
|
62
|
+
def nullable?
|
63
|
+
@nullable
|
62
64
|
end
|
63
65
|
|
64
66
|
# Returns a string containing a human-readable representation of the
|
65
67
|
# production.
|
66
68
|
# @return [String]
|
67
|
-
def inspect
|
68
|
-
result = "#<#{self.class.name}:#{object_id}"
|
69
|
+
def inspect
|
70
|
+
result = +"#<#{self.class.name}:#{object_id}"
|
69
71
|
result << " @name=\"#{name}\""
|
70
72
|
result << " @lhs=#{lhs.name}"
|
71
73
|
result << " @rhs=#{rhs.inspect}"
|
72
74
|
result << " @generative=#{@generative}>"
|
73
|
-
|
75
|
+
result
|
74
76
|
end
|
75
77
|
|
76
78
|
# A setter for the production name
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'forwardable'
|
2
4
|
|
3
5
|
module Rley # This module is used as a namespace
|
@@ -10,9 +12,9 @@ module Rley # This module is used as a namespace
|
|
10
12
|
# @return [Array<GrmSymbol>] The sequence of symbols
|
11
13
|
attr_reader(:members)
|
12
14
|
|
13
|
-
# Create a sequence of grammar symbols (as in right-hand side of
|
15
|
+
# Create a sequence of grammar symbols (as in right-hand side of
|
14
16
|
# a production rule).
|
15
|
-
# @param theSymbols [Array<GrmSymbol>] An array of symbols.
|
17
|
+
# @param theSymbols [Array<GrmSymbol>] An array of symbols.
|
16
18
|
def initialize(theSymbols)
|
17
19
|
@members = theSymbols.dup
|
18
20
|
end
|
@@ -31,20 +33,18 @@ module Rley # This module is used as a namespace
|
|
31
33
|
raise StandardError, msg
|
32
34
|
end
|
33
35
|
|
34
|
-
|
36
|
+
result
|
35
37
|
end
|
36
|
-
|
37
|
-
# Returns a string containing a human-readable representation of the
|
38
|
+
|
39
|
+
# Returns a string containing a human-readable representation of the
|
38
40
|
# sequence of symbols.
|
39
41
|
# @return [String]
|
40
|
-
def inspect
|
41
|
-
result = "#<#{self.class.name}:#{object_id}"
|
42
|
+
def inspect
|
43
|
+
result = +"#<#{self.class.name}:#{object_id}"
|
42
44
|
symbol_names = members.map(&:name)
|
43
45
|
result << " @members=#{symbol_names}>"
|
44
|
-
|
46
|
+
result
|
45
47
|
end
|
46
|
-
|
47
|
-
|
48
48
|
end # class
|
49
49
|
end # module
|
50
50
|
end # module
|
data/lib/rley/syntax/terminal.rb
CHANGED
@@ -1,30 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'grm_symbol' # Load superclass
|
2
4
|
|
3
5
|
module Rley # This module is used as a namespace
|
4
6
|
module Syntax # This module is used as a namespace
|
5
|
-
# A terminal symbol represents a class of words in the language
|
7
|
+
# A terminal symbol represents a class of words in the language
|
6
8
|
# defined the grammar.
|
7
9
|
class Terminal < GrmSymbol
|
8
|
-
|
9
10
|
# Constructor.
|
10
11
|
# @param aName [String] The name of the grammar symbol.
|
11
12
|
def initialize(aName)
|
12
13
|
super(aName)
|
13
14
|
self.generative = true
|
14
15
|
end
|
15
|
-
|
16
|
+
|
16
17
|
# Return true iff the symbol is a terminal
|
17
18
|
def terminal?
|
18
19
|
return true
|
19
20
|
end
|
20
|
-
|
21
|
+
|
21
22
|
# @return [false] Return true if the symbol derives
|
22
23
|
# the empty string. As terminal symbol corresponds to a input token
|
23
24
|
# it is by definition non-nullable.
|
24
25
|
def nullable?
|
25
26
|
false
|
26
27
|
end
|
27
|
-
|
28
|
+
|
28
29
|
def to_s
|
29
30
|
name
|
30
31
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require_relative 'terminal' # Load superclass
|
2
4
|
|
3
5
|
module Rley # This module is used as a namespace
|
@@ -12,11 +14,11 @@ module Rley # This module is used as a namespace
|
|
12
14
|
super(aText) # Do we need to separate the text from the name?
|
13
15
|
@text = aText.dup
|
14
16
|
end
|
15
|
-
|
17
|
+
|
16
18
|
# The String representation of the verbatim symbol
|
17
19
|
# @return [String]
|
18
|
-
def to_s
|
19
|
-
|
20
|
+
def to_s
|
21
|
+
"'#{text}'"
|
20
22
|
end
|
21
23
|
end # class
|
22
24
|
end # module
|
@@ -1,21 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'strscan'
|
2
4
|
require_relative '../rley/lexical/token'
|
3
5
|
|
6
|
+
# Simplistic tokenizer used mostly for testing purposes
|
4
7
|
class BaseTokenizer
|
8
|
+
# @return [StringScanner]
|
5
9
|
attr_reader(:scanner)
|
10
|
+
|
11
|
+
# @return [Integer] current line number
|
6
12
|
attr_reader(:lineno)
|
13
|
+
|
14
|
+
# @return [Integer] position of start of current line in source text
|
7
15
|
attr_reader(:line_start)
|
8
|
-
|
16
|
+
|
9
17
|
class ScanError < StandardError; end
|
10
18
|
|
11
|
-
# Constructor. Initialize a tokenizer
|
19
|
+
# Constructor. Initialize a tokenizer.
|
12
20
|
# @param source [String] Skeem text to tokenize.
|
13
21
|
def initialize(source)
|
14
22
|
@scanner = StringScanner.new('')
|
15
23
|
restart(source)
|
16
24
|
end
|
17
25
|
|
18
|
-
# @param source [String]
|
26
|
+
# @param source [String] input text to tokenize.
|
19
27
|
def restart(source)
|
20
28
|
@scanner.string = source
|
21
29
|
@lineno = 1
|
@@ -32,13 +40,13 @@ class BaseTokenizer
|
|
32
40
|
|
33
41
|
return tok_sequence
|
34
42
|
end
|
35
|
-
|
43
|
+
|
36
44
|
protected
|
37
|
-
|
45
|
+
|
38
46
|
# Patterns:
|
39
47
|
# Unambiguous single character
|
40
48
|
# Conditional single character:
|
41
|
-
# (e.g. '+' operator, '+' prefix for positive numbers)
|
49
|
+
# (e.g. '+' operator, '+' prefix for positive numbers)
|
42
50
|
def _next_token
|
43
51
|
skip_whitespaces
|
44
52
|
curr_ch = scanner.peek(1)
|
@@ -55,29 +63,29 @@ class BaseTokenizer
|
|
55
63
|
|
56
64
|
return token
|
57
65
|
end
|
58
|
-
|
66
|
+
|
59
67
|
def recognize_token
|
60
68
|
raise NotImplementedError
|
61
69
|
end
|
62
|
-
|
70
|
+
|
63
71
|
def build_token(aSymbolName, aLexeme, aFormat = :default)
|
64
72
|
begin
|
65
73
|
value = convert_to(aLexeme, aSymbolName, aFormat)
|
66
74
|
col = scanner.pos - aLexeme.size - @line_start + 1
|
67
75
|
pos = Rley::Lexical::Position.new(@lineno, col)
|
68
76
|
token = Rley::Lexical::Token.new(value, aSymbolName, pos)
|
69
|
-
rescue StandardError =>
|
77
|
+
rescue StandardError => e
|
70
78
|
puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
|
71
|
-
raise
|
79
|
+
raise e
|
72
80
|
end
|
73
81
|
|
74
82
|
return token
|
75
83
|
end
|
76
|
-
|
84
|
+
|
77
85
|
def convert_to(aLexeme, _symbol_name, _format)
|
78
86
|
return aLexeme
|
79
87
|
end
|
80
|
-
|
88
|
+
|
81
89
|
def skip_whitespaces
|
82
90
|
pre_pos = scanner.pos
|
83
91
|
|
@@ -91,21 +99,16 @@ class BaseTokenizer
|
|
91
99
|
ws_found = true
|
92
100
|
next_line
|
93
101
|
end
|
94
|
-
|
95
|
-
# if next_ch == ';'
|
96
|
-
# cmt_found = true
|
97
|
-
# scanner.skip(/;[^\r\n]*(?:(?:\r\n)|\r|\n)?/)
|
98
|
-
# next_line
|
99
|
-
# end
|
102
|
+
|
100
103
|
break unless ws_found || cmt_found
|
101
104
|
end
|
102
105
|
|
103
106
|
curr_pos = scanner.pos
|
104
107
|
return if curr_pos == pre_pos
|
105
108
|
end
|
106
|
-
|
109
|
+
|
107
110
|
def next_line
|
108
111
|
@lineno += 1
|
109
112
|
@line_start = scanner.pos
|
110
|
-
end
|
113
|
+
end
|
111
114
|
end # class
|