rley 0.7.03 → 0.7.08

Sign up to get free protection for your applications and to get access to all the features.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +362 -62
  3. data/.travis.yml +6 -7
  4. data/CHANGELOG.md +20 -1
  5. data/LICENSE.txt +1 -1
  6. data/README.md +6 -7
  7. data/Rakefile +2 -0
  8. data/appveyor.yml +2 -4
  9. data/examples/NLP/benchmark_pico_en.rb +2 -0
  10. data/examples/NLP/engtagger.rb +193 -188
  11. data/examples/NLP/nano_eng/nano_en_demo.rb +2 -0
  12. data/examples/NLP/nano_eng/nano_grammar.rb +7 -5
  13. data/examples/NLP/pico_en_demo.rb +2 -0
  14. data/examples/data_formats/JSON/cli_options.rb +3 -1
  15. data/examples/data_formats/JSON/json_ast_builder.rb +14 -9
  16. data/examples/data_formats/JSON/json_ast_nodes.rb +14 -21
  17. data/examples/data_formats/JSON/json_demo.rb +2 -0
  18. data/examples/data_formats/JSON/json_grammar.rb +4 -2
  19. data/examples/data_formats/JSON/json_lexer.rb +10 -8
  20. data/examples/data_formats/JSON/json_minifier.rb +3 -1
  21. data/examples/general/calc_iter1/calc_ast_builder.rb +15 -10
  22. data/examples/general/calc_iter1/calc_ast_nodes.rb +25 -37
  23. data/examples/general/calc_iter1/calc_demo.rb +2 -0
  24. data/examples/general/calc_iter1/calc_grammar.rb +4 -2
  25. data/examples/general/calc_iter1/calc_lexer.rb +8 -4
  26. data/examples/general/calc_iter1/spec/calculator_spec.rb +7 -5
  27. data/examples/general/calc_iter2/calc_ast_builder.rb +7 -3
  28. data/examples/general/calc_iter2/calc_ast_nodes.rb +29 -43
  29. data/examples/general/calc_iter2/calc_demo.rb +2 -0
  30. data/examples/general/calc_iter2/calc_grammar.rb +5 -3
  31. data/examples/general/calc_iter2/calc_lexer.rb +13 -10
  32. data/examples/general/calc_iter2/spec/calculator_spec.rb +28 -26
  33. data/examples/general/left.rb +4 -2
  34. data/examples/general/right.rb +4 -2
  35. data/lib/rley.rb +2 -0
  36. data/lib/rley/base/base_parser.rb +2 -0
  37. data/lib/rley/base/dotted_item.rb +38 -41
  38. data/lib/rley/base/grm_items_builder.rb +2 -0
  39. data/lib/rley/constants.rb +5 -3
  40. data/lib/rley/engine.rb +22 -24
  41. data/lib/rley/formatter/asciitree.rb +6 -4
  42. data/lib/rley/formatter/base_formatter.rb +2 -0
  43. data/lib/rley/formatter/bracket_notation.rb +3 -8
  44. data/lib/rley/formatter/debug.rb +8 -6
  45. data/lib/rley/formatter/json.rb +4 -2
  46. data/lib/rley/gfg/call_edge.rb +3 -1
  47. data/lib/rley/gfg/edge.rb +7 -5
  48. data/lib/rley/gfg/end_vertex.rb +4 -6
  49. data/lib/rley/gfg/epsilon_edge.rb +3 -5
  50. data/lib/rley/gfg/grm_flow_graph.rb +31 -25
  51. data/lib/rley/gfg/item_vertex.rb +12 -22
  52. data/lib/rley/gfg/non_terminal_vertex.rb +6 -4
  53. data/lib/rley/gfg/return_edge.rb +2 -0
  54. data/lib/rley/gfg/scan_edge.rb +3 -1
  55. data/lib/rley/gfg/shortcut_edge.rb +4 -2
  56. data/lib/rley/gfg/start_vertex.rb +6 -8
  57. data/lib/rley/gfg/vertex.rb +47 -41
  58. data/lib/rley/lexical/token.rb +3 -1
  59. data/lib/rley/lexical/token_range.rb +8 -6
  60. data/lib/rley/parse_forest_visitor.rb +7 -5
  61. data/lib/rley/parse_rep/ast_base_builder.rb +11 -11
  62. data/lib/rley/parse_rep/cst_builder.rb +7 -4
  63. data/lib/rley/parse_rep/parse_forest_builder.rb +36 -25
  64. data/lib/rley/parse_rep/parse_forest_factory.rb +5 -3
  65. data/lib/rley/parse_rep/parse_rep_creator.rb +18 -13
  66. data/lib/rley/parse_rep/parse_tree_builder.rb +15 -15
  67. data/lib/rley/parse_rep/parse_tree_factory.rb +27 -25
  68. data/lib/rley/parse_tree_visitor.rb +3 -1
  69. data/lib/rley/parser/error_reason.rb +9 -8
  70. data/lib/rley/parser/gfg_chart.rb +54 -22
  71. data/lib/rley/parser/gfg_earley_parser.rb +3 -1
  72. data/lib/rley/parser/gfg_parsing.rb +51 -31
  73. data/lib/rley/parser/parse_entry.rb +29 -33
  74. data/lib/rley/parser/parse_entry_set.rb +32 -27
  75. data/lib/rley/parser/parse_entry_tracker.rb +6 -4
  76. data/lib/rley/parser/parse_state.rb +18 -21
  77. data/lib/rley/parser/parse_state_tracker.rb +6 -4
  78. data/lib/rley/parser/parse_tracer.rb +15 -13
  79. data/lib/rley/parser/parse_walker_factory.rb +28 -29
  80. data/lib/rley/parser/state_set.rb +11 -10
  81. data/lib/rley/ptree/non_terminal_node.rb +10 -6
  82. data/lib/rley/ptree/parse_tree.rb +6 -4
  83. data/lib/rley/ptree/parse_tree_node.rb +7 -5
  84. data/lib/rley/ptree/terminal_node.rb +9 -7
  85. data/lib/rley/rley_error.rb +12 -10
  86. data/lib/rley/sppf/alternative_node.rb +8 -6
  87. data/lib/rley/sppf/composite_node.rb +9 -7
  88. data/lib/rley/sppf/epsilon_node.rb +5 -3
  89. data/lib/rley/sppf/leaf_node.rb +5 -3
  90. data/lib/rley/sppf/non_terminal_node.rb +2 -0
  91. data/lib/rley/sppf/parse_forest.rb +19 -17
  92. data/lib/rley/sppf/sppf_node.rb +9 -8
  93. data/lib/rley/sppf/token_node.rb +5 -3
  94. data/lib/rley/syntax/grammar.rb +7 -5
  95. data/lib/rley/syntax/grammar_builder.rb +11 -9
  96. data/lib/rley/syntax/grm_symbol.rb +8 -6
  97. data/lib/rley/syntax/literal.rb +2 -0
  98. data/lib/rley/syntax/non_terminal.rb +11 -15
  99. data/lib/rley/syntax/production.rb +13 -11
  100. data/lib/rley/syntax/symbol_seq.rb +10 -10
  101. data/lib/rley/syntax/terminal.rb +6 -5
  102. data/lib/rley/syntax/verbatim_symbol.rb +5 -3
  103. data/lib/support/base_tokenizer.rb +23 -20
  104. data/spec/rley/base/dotted_item_spec.rb +4 -2
  105. data/spec/rley/base/grm_items_builder_spec.rb +2 -0
  106. data/spec/rley/engine_spec.rb +47 -9
  107. data/spec/rley/formatter/asciitree_spec.rb +11 -9
  108. data/spec/rley/formatter/bracket_notation_spec.rb +16 -14
  109. data/spec/rley/formatter/debug_spec.rb +4 -2
  110. data/spec/rley/formatter/json_spec.rb +5 -3
  111. data/spec/rley/gfg/call_edge_spec.rb +2 -0
  112. data/spec/rley/gfg/edge_spec.rb +2 -0
  113. data/spec/rley/gfg/end_vertex_spec.rb +7 -5
  114. data/spec/rley/gfg/epsilon_edge_spec.rb +2 -0
  115. data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -0
  116. data/spec/rley/gfg/item_vertex_spec.rb +12 -10
  117. data/spec/rley/gfg/non_terminal_vertex_spec.rb +5 -3
  118. data/spec/rley/gfg/return_edge_spec.rb +2 -0
  119. data/spec/rley/gfg/scan_edge_spec.rb +2 -0
  120. data/spec/rley/gfg/shortcut_edge_spec.rb +3 -1
  121. data/spec/rley/gfg/start_vertex_spec.rb +7 -5
  122. data/spec/rley/gfg/vertex_spec.rb +5 -3
  123. data/spec/rley/lexical/token_range_spec.rb +18 -16
  124. data/spec/rley/lexical/token_spec.rb +4 -2
  125. data/spec/rley/parse_forest_visitor_spec.rb +167 -163
  126. data/spec/rley/parse_rep/ambiguous_parse_spec.rb +46 -44
  127. data/spec/rley/parse_rep/ast_builder_spec.rb +8 -6
  128. data/spec/rley/parse_rep/cst_builder_spec.rb +7 -5
  129. data/spec/rley/parse_rep/groucho_spec.rb +25 -25
  130. data/spec/rley/parse_rep/parse_forest_builder_spec.rb +28 -26
  131. data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -6
  132. data/spec/rley/parse_rep/parse_tree_factory_spec.rb +4 -2
  133. data/spec/rley/parse_tree_visitor_spec.rb +12 -8
  134. data/spec/rley/parser/error_reason_spec.rb +8 -6
  135. data/spec/rley/parser/gfg_chart_spec.rb +17 -4
  136. data/spec/rley/parser/gfg_earley_parser_spec.rb +16 -11
  137. data/spec/rley/parser/gfg_parsing_spec.rb +41 -252
  138. data/spec/rley/parser/parse_entry_set_spec.rb +2 -0
  139. data/spec/rley/parser/parse_entry_spec.rb +21 -19
  140. data/spec/rley/parser/parse_state_spec.rb +7 -5
  141. data/spec/rley/parser/parse_tracer_spec.rb +16 -14
  142. data/spec/rley/parser/parse_walker_factory_spec.rb +10 -8
  143. data/spec/rley/parser/state_set_spec.rb +24 -22
  144. data/spec/rley/ptree/non_terminal_node_spec.rb +7 -3
  145. data/spec/rley/ptree/parse_tree_node_spec.rb +6 -4
  146. data/spec/rley/ptree/parse_tree_spec.rb +2 -0
  147. data/spec/rley/ptree/terminal_node_spec.rb +8 -6
  148. data/spec/rley/sppf/alternative_node_spec.rb +8 -6
  149. data/spec/rley/sppf/non_terminal_node_spec.rb +5 -3
  150. data/spec/rley/sppf/token_node_spec.rb +6 -4
  151. data/spec/rley/support/ambiguous_grammar_helper.rb +5 -4
  152. data/spec/rley/support/expectation_helper.rb +2 -0
  153. data/spec/rley/support/grammar_abc_helper.rb +4 -4
  154. data/spec/rley/support/grammar_ambig01_helper.rb +6 -5
  155. data/spec/rley/support/grammar_arr_int_helper.rb +6 -5
  156. data/spec/rley/support/grammar_b_expr_helper.rb +6 -5
  157. data/spec/rley/support/grammar_helper.rb +2 -0
  158. data/spec/rley/support/grammar_l0_helper.rb +15 -16
  159. data/spec/rley/support/grammar_pb_helper.rb +8 -5
  160. data/spec/rley/support/grammar_sppf_helper.rb +3 -1
  161. data/spec/rley/syntax/grammar_builder_spec.rb +7 -5
  162. data/spec/rley/syntax/grammar_spec.rb +8 -6
  163. data/spec/rley/syntax/grm_symbol_spec.rb +3 -1
  164. data/spec/rley/syntax/literal_spec.rb +2 -0
  165. data/spec/rley/syntax/non_terminal_spec.rb +10 -8
  166. data/spec/rley/syntax/production_spec.rb +15 -13
  167. data/spec/rley/syntax/symbol_seq_spec.rb +4 -2
  168. data/spec/rley/syntax/terminal_spec.rb +7 -5
  169. data/spec/rley/syntax/verbatim_symbol_spec.rb +3 -1
  170. data/spec/spec_helper.rb +2 -12
  171. data/spec/support/base_tokenizer_spec.rb +9 -2
  172. metadata +21 -63
  173. data/.simplecov +0 -7
  174. data/Gemfile +0 -8
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
  require_relative '../rley_error'
3
5
 
@@ -46,13 +48,13 @@ module Rley # This module is used as a namespace
46
48
  end
47
49
 
48
50
  # @return [Array] The list of non-terminals in the grammar.
49
- def non_terminals()
51
+ def non_terminals
50
52
  @non_terminals ||= symbols.select { |s| s.kind_of?(NonTerminal) }
51
53
  end
52
54
 
53
55
  # @return [Production] The start production of the grammar (i.e.
54
56
  # the rule that specifies the syntax for the start symbol.
55
- def start_production()
57
+ def start_production
56
58
  return rules[0]
57
59
  end
58
60
 
@@ -97,7 +99,7 @@ module Rley # This module is used as a namespace
97
99
  end
98
100
 
99
101
  # Perform some check of the grammar.
100
- def diagnose()
102
+ def diagnose
101
103
  mark_undefined
102
104
  mark_generative
103
105
  compute_nullable
@@ -191,7 +193,7 @@ module Rley # This module is used as a namespace
191
193
 
192
194
  # For each non-terminal determine whether it is nullable or not.
193
195
  # A nullable nonterminal is a nonterminal that can match an empty string.
194
- def compute_nullable()
196
+ def compute_nullable
195
197
  non_terminals.each { |nterm| nterm.nullable = false }
196
198
  nullable_sets = [direct_nullable]
197
199
 
@@ -234,7 +236,7 @@ module Rley # This module is used as a namespace
234
236
  nullables << prod.lhs
235
237
  end
236
238
 
237
- return nullables
239
+ nullables
238
240
  end
239
241
 
240
242
  # For each prodction determine whether it is nullable or not.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
  require_relative 'terminal'
3
5
  require_relative 'non_terminal'
@@ -16,12 +18,12 @@ module Rley # This module is used as a namespace
16
18
  # to the matching grammar symbol object.
17
19
  attr_reader(:symbols)
18
20
 
19
- # @return [Array<Production>] The list of production rules for
21
+ # @return [Array<Production>] The list of production rules for
20
22
  # the grammar to build.
21
23
  attr_reader(:productions)
22
24
 
23
25
  # Creates a new grammar builder.
24
- # @param aBlock [Proc] code block used to build the grammar.
26
+ # @param aBlock [Proc] code block used to build the grammar.
25
27
  # @example Building a tiny English grammar
26
28
  # builder = Rley::Syntax::GrammarBuilder.new do
27
29
  # add_terminals('n', 'v', 'adj', 'det')
@@ -43,7 +45,7 @@ module Rley # This module is used as a namespace
43
45
  # @param aSymbolName [String] the name of a grammar symbol.
44
46
  # @return [GrmSymbol] the retrieved symbol object.
45
47
  def [](aSymbolName)
46
- return symbols[aSymbolName]
48
+ symbols[aSymbolName]
47
49
  end
48
50
 
49
51
  # Add the given terminal symbols to the grammar of the language
@@ -65,7 +67,7 @@ module Rley # This module is used as a namespace
65
67
  # builder.rule('A' => ['a', 'A', 'c']) # 'rule' is a synonym
66
68
  # builder.rule('A' => %w[a A c]) # Use %w syntax for Array of String
67
69
  # builder.rule 'A' => %w[a A c] # Call parentheses are optional
68
- # @param aProductionRepr [Hash{String, Array<String>}]
70
+ # @param aProductionRepr [Hash{String, Array<String>}]
69
71
  # A Hash-based representation of a production.
70
72
  # @return [Production] The created Production instance
71
73
  def add_production(aProductionRepr)
@@ -83,14 +85,14 @@ module Rley # This module is used as a namespace
83
85
  new_prod = Production.new(lhs, rhs_members)
84
86
  productions << new_prod
85
87
  end
86
-
88
+
87
89
  return productions.last
88
90
  end
89
91
 
90
92
  # Given the grammar symbols and productions added to the builder,
91
93
  # build the resulting grammar (if not yet done).
92
- # @return [Grammar] the created grammar object.
93
- def grammar()
94
+ # @return [Grammar] the created grammar object.
95
+ def grammar
94
96
  unless @grammar
95
97
  raise StandardError, 'No symbol found for grammar' if symbols.empty?
96
98
  if productions.empty?
@@ -111,7 +113,7 @@ module Rley # This module is used as a namespace
111
113
  unused = all_terminals.reject { |a_term| in_use.include?(a_term) }
112
114
  unless unused.empty?
113
115
  suffix = "#{unused.map(&:name).join(', ')}."
114
- raise StandardError, 'Useless terminal symbol(s): ' + suffix
116
+ raise StandardError, "Useless terminal symbol(s): #{suffix}"
115
117
  end
116
118
 
117
119
  @grammar = Grammar.new(productions.dup)
@@ -164,7 +166,7 @@ module Rley # This module is used as a namespace
164
166
  unless symbols.include? aSymbolName
165
167
  symbols[aSymbolName] = NonTerminal.new(aSymbolName)
166
168
  end
167
- return symbols[aSymbolName]
169
+ symbols[aSymbolName]
168
170
  end
169
171
  end # class
170
172
  end # module
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rley # This module is used as a namespace
2
4
  module Syntax # This module is used as a namespace
3
5
  # Abstract class for grammar symbols.
@@ -21,19 +23,19 @@ module Rley # This module is used as a namespace
21
23
 
22
24
  # The String representation of the grammar symbol
23
25
  # @return [String]
24
- def to_s()
25
- return name.to_s
26
+ def to_s
27
+ name.to_s
26
28
  end
27
29
 
28
30
  # @return [Boolean] true iff the symbol is a terminal
29
- def terminal?()
31
+ def terminal?
30
32
  # Default implementation to override if necessary
31
- return false
33
+ false
32
34
  end
33
35
 
34
36
  # @return [Boolean] true iff the symbol is generative.
35
- def generative?()
36
- return @generative
37
+ def generative?
38
+ @generative
37
39
  end
38
40
  end # class
39
41
  end # module
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'terminal' # Load superclass
2
4
 
3
5
  module Rley # This module is used as a namespace
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'grm_symbol' # Load superclass
2
4
 
3
5
  module Rley # This module is used as a namespace
@@ -7,41 +9,35 @@ module Rley # This module is used as a namespace
7
9
  class NonTerminal < GrmSymbol
8
10
  # A non-terminal symbol is nullable if it can match an empty string.
9
11
  attr_writer(:nullable)
10
-
12
+
11
13
  # A non-terminal symbol is undefined if no production rule in the grammar
12
14
  # has that non-terminal symbol in its left-hand side.
13
15
  attr_writer(:undefined)
14
-
15
- # A non-terminal symbol is unreachable if it cannot be reached (derived)
16
+
17
+ # A non-terminal symbol is unreachable if it cannot be reached (derived)
16
18
  # from the start symbol.
17
19
  attr_writer(:unreachable)
18
20
 
19
- # Constructor.
20
- # @param aName [String] The name of the grammar symbol.
21
- def initialize(aName)
22
- super(aName)
23
- end
24
-
25
21
  # @return [false/true] Return true if the symbol derives
26
22
  # the empty string. As non-terminal symbol is nullable when it can
27
23
  # can match to zero input token.
28
24
  # The "nullability" of a non-terminal can practically be determined once
29
25
  # all the production rules of the grammar are specified.
30
- def nullable?()
26
+ def nullable?
31
27
  return @nullable
32
28
  end
33
-
29
+
34
30
  # @return [false/true] Return true if the symbol doesn't appear
35
31
  # on the left-hand side of any production rule.
36
- def undefined?()
32
+ def undefined?
37
33
  return @undefined
38
34
  end
39
-
35
+
40
36
  # @return [false/true] Return true if the symbol cannot be derived
41
37
  # from the start symbol.
42
- def unreachable?()
38
+ def unreachable?
43
39
  return @unreachable
44
- end
40
+ end
45
41
  end # class
46
42
  end # module
47
43
  end # module
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'symbol_seq'
2
4
 
3
5
  module Rley # This module is used as a namespace
@@ -44,33 +46,33 @@ module Rley # This module is used as a namespace
44
46
 
45
47
  # Is the rhs empty?
46
48
  # @return [Boolean] true if the rhs has no members.
47
- def empty?()
48
- return rhs.empty?
49
+ def empty?
50
+ rhs.empty?
49
51
  end
50
52
 
51
53
  # Return true iff the production is generative
52
- def generative?()
53
- if @generative.nil?
54
- end
54
+ def generative?
55
+ # if @generative.nil?
56
+ # end
55
57
 
56
- return @generative
58
+ @generative
57
59
  end
58
60
 
59
61
  # @return [Boolen] true iff the production is nullable
60
- def nullable?()
61
- return @nullable
62
+ def nullable?
63
+ @nullable
62
64
  end
63
65
 
64
66
  # Returns a string containing a human-readable representation of the
65
67
  # production.
66
68
  # @return [String]
67
- def inspect()
68
- result = "#<#{self.class.name}:#{object_id}"
69
+ def inspect
70
+ result = +"#<#{self.class.name}:#{object_id}"
69
71
  result << " @name=\"#{name}\""
70
72
  result << " @lhs=#{lhs.name}"
71
73
  result << " @rhs=#{rhs.inspect}"
72
74
  result << " @generative=#{@generative}>"
73
- return result
75
+ result
74
76
  end
75
77
 
76
78
  # A setter for the production name
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'forwardable'
2
4
 
3
5
  module Rley # This module is used as a namespace
@@ -10,9 +12,9 @@ module Rley # This module is used as a namespace
10
12
  # @return [Array<GrmSymbol>] The sequence of symbols
11
13
  attr_reader(:members)
12
14
 
13
- # Create a sequence of grammar symbols (as in right-hand side of
15
+ # Create a sequence of grammar symbols (as in right-hand side of
14
16
  # a production rule).
15
- # @param theSymbols [Array<GrmSymbol>] An array of symbols.
17
+ # @param theSymbols [Array<GrmSymbol>] An array of symbols.
16
18
  def initialize(theSymbols)
17
19
  @members = theSymbols.dup
18
20
  end
@@ -31,20 +33,18 @@ module Rley # This module is used as a namespace
31
33
  raise StandardError, msg
32
34
  end
33
35
 
34
- return result
36
+ result
35
37
  end
36
-
37
- # Returns a string containing a human-readable representation of the
38
+
39
+ # Returns a string containing a human-readable representation of the
38
40
  # sequence of symbols.
39
41
  # @return [String]
40
- def inspect()
41
- result = "#<#{self.class.name}:#{object_id}"
42
+ def inspect
43
+ result = +"#<#{self.class.name}:#{object_id}"
42
44
  symbol_names = members.map(&:name)
43
45
  result << " @members=#{symbol_names}>"
44
- return result
46
+ result
45
47
  end
46
-
47
-
48
48
  end # class
49
49
  end # module
50
50
  end # module
@@ -1,30 +1,31 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'grm_symbol' # Load superclass
2
4
 
3
5
  module Rley # This module is used as a namespace
4
6
  module Syntax # This module is used as a namespace
5
- # A terminal symbol represents a class of words in the language
7
+ # A terminal symbol represents a class of words in the language
6
8
  # defined the grammar.
7
9
  class Terminal < GrmSymbol
8
-
9
10
  # Constructor.
10
11
  # @param aName [String] The name of the grammar symbol.
11
12
  def initialize(aName)
12
13
  super(aName)
13
14
  self.generative = true
14
15
  end
15
-
16
+
16
17
  # Return true iff the symbol is a terminal
17
18
  def terminal?
18
19
  return true
19
20
  end
20
-
21
+
21
22
  # @return [false] Return true if the symbol derives
22
23
  # the empty string. As terminal symbol corresponds to a input token
23
24
  # it is by definition non-nullable.
24
25
  def nullable?
25
26
  false
26
27
  end
27
-
28
+
28
29
  def to_s
29
30
  name
30
31
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'terminal' # Load superclass
2
4
 
3
5
  module Rley # This module is used as a namespace
@@ -12,11 +14,11 @@ module Rley # This module is used as a namespace
12
14
  super(aText) # Do we need to separate the text from the name?
13
15
  @text = aText.dup
14
16
  end
15
-
17
+
16
18
  # The String representation of the verbatim symbol
17
19
  # @return [String]
18
- def to_s()
19
- return "'#{text}'"
20
+ def to_s
21
+ "'#{text}'"
20
22
  end
21
23
  end # class
22
24
  end # module
@@ -1,21 +1,29 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'strscan'
2
4
  require_relative '../rley/lexical/token'
3
5
 
6
+ # Simplistic tokenizer used mostly for testing purposes
4
7
  class BaseTokenizer
8
+ # @return [StringScanner]
5
9
  attr_reader(:scanner)
10
+
11
+ # @return [Integer] current line number
6
12
  attr_reader(:lineno)
13
+
14
+ # @return [Integer] position of start of current line in source text
7
15
  attr_reader(:line_start)
8
-
16
+
9
17
  class ScanError < StandardError; end
10
18
 
11
- # Constructor. Initialize a tokenizer for Skeem.
19
+ # Constructor. Initialize a tokenizer.
12
20
  # @param source [String] Skeem text to tokenize.
13
21
  def initialize(source)
14
22
  @scanner = StringScanner.new('')
15
23
  restart(source)
16
24
  end
17
25
 
18
- # @param source [String] Skeem text to tokenize.
26
+ # @param source [String] input text to tokenize.
19
27
  def restart(source)
20
28
  @scanner.string = source
21
29
  @lineno = 1
@@ -32,13 +40,13 @@ class BaseTokenizer
32
40
 
33
41
  return tok_sequence
34
42
  end
35
-
43
+
36
44
  protected
37
-
45
+
38
46
  # Patterns:
39
47
  # Unambiguous single character
40
48
  # Conditional single character:
41
- # (e.g. '+' operator, '+' prefix for positive numbers)
49
+ # (e.g. '+' operator, '+' prefix for positive numbers)
42
50
  def _next_token
43
51
  skip_whitespaces
44
52
  curr_ch = scanner.peek(1)
@@ -55,29 +63,29 @@ class BaseTokenizer
55
63
 
56
64
  return token
57
65
  end
58
-
66
+
59
67
  def recognize_token
60
68
  raise NotImplementedError
61
69
  end
62
-
70
+
63
71
  def build_token(aSymbolName, aLexeme, aFormat = :default)
64
72
  begin
65
73
  value = convert_to(aLexeme, aSymbolName, aFormat)
66
74
  col = scanner.pos - aLexeme.size - @line_start + 1
67
75
  pos = Rley::Lexical::Position.new(@lineno, col)
68
76
  token = Rley::Lexical::Token.new(value, aSymbolName, pos)
69
- rescue StandardError => exc
77
+ rescue StandardError => e
70
78
  puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
71
- raise exc
79
+ raise e
72
80
  end
73
81
 
74
82
  return token
75
83
  end
76
-
84
+
77
85
  def convert_to(aLexeme, _symbol_name, _format)
78
86
  return aLexeme
79
87
  end
80
-
88
+
81
89
  def skip_whitespaces
82
90
  pre_pos = scanner.pos
83
91
 
@@ -91,21 +99,16 @@ class BaseTokenizer
91
99
  ws_found = true
92
100
  next_line
93
101
  end
94
- # next_ch = scanner.peek(1)
95
- # if next_ch == ';'
96
- # cmt_found = true
97
- # scanner.skip(/;[^\r\n]*(?:(?:\r\n)|\r|\n)?/)
98
- # next_line
99
- # end
102
+
100
103
  break unless ws_found || cmt_found
101
104
  end
102
105
 
103
106
  curr_pos = scanner.pos
104
107
  return if curr_pos == pre_pos
105
108
  end
106
-
109
+
107
110
  def next_line
108
111
  @lineno += 1
109
112
  @line_start = scanner.pos
110
- end
113
+ end
111
114
  end # class