rley 0.7.03 → 0.7.08

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +362 -62
  3. data/.travis.yml +6 -7
  4. data/CHANGELOG.md +20 -1
  5. data/LICENSE.txt +1 -1
  6. data/README.md +6 -7
  7. data/Rakefile +2 -0
  8. data/appveyor.yml +2 -4
  9. data/examples/NLP/benchmark_pico_en.rb +2 -0
  10. data/examples/NLP/engtagger.rb +193 -188
  11. data/examples/NLP/nano_eng/nano_en_demo.rb +2 -0
  12. data/examples/NLP/nano_eng/nano_grammar.rb +7 -5
  13. data/examples/NLP/pico_en_demo.rb +2 -0
  14. data/examples/data_formats/JSON/cli_options.rb +3 -1
  15. data/examples/data_formats/JSON/json_ast_builder.rb +14 -9
  16. data/examples/data_formats/JSON/json_ast_nodes.rb +14 -21
  17. data/examples/data_formats/JSON/json_demo.rb +2 -0
  18. data/examples/data_formats/JSON/json_grammar.rb +4 -2
  19. data/examples/data_formats/JSON/json_lexer.rb +10 -8
  20. data/examples/data_formats/JSON/json_minifier.rb +3 -1
  21. data/examples/general/calc_iter1/calc_ast_builder.rb +15 -10
  22. data/examples/general/calc_iter1/calc_ast_nodes.rb +25 -37
  23. data/examples/general/calc_iter1/calc_demo.rb +2 -0
  24. data/examples/general/calc_iter1/calc_grammar.rb +4 -2
  25. data/examples/general/calc_iter1/calc_lexer.rb +8 -4
  26. data/examples/general/calc_iter1/spec/calculator_spec.rb +7 -5
  27. data/examples/general/calc_iter2/calc_ast_builder.rb +7 -3
  28. data/examples/general/calc_iter2/calc_ast_nodes.rb +29 -43
  29. data/examples/general/calc_iter2/calc_demo.rb +2 -0
  30. data/examples/general/calc_iter2/calc_grammar.rb +5 -3
  31. data/examples/general/calc_iter2/calc_lexer.rb +13 -10
  32. data/examples/general/calc_iter2/spec/calculator_spec.rb +28 -26
  33. data/examples/general/left.rb +4 -2
  34. data/examples/general/right.rb +4 -2
  35. data/lib/rley.rb +2 -0
  36. data/lib/rley/base/base_parser.rb +2 -0
  37. data/lib/rley/base/dotted_item.rb +38 -41
  38. data/lib/rley/base/grm_items_builder.rb +2 -0
  39. data/lib/rley/constants.rb +5 -3
  40. data/lib/rley/engine.rb +22 -24
  41. data/lib/rley/formatter/asciitree.rb +6 -4
  42. data/lib/rley/formatter/base_formatter.rb +2 -0
  43. data/lib/rley/formatter/bracket_notation.rb +3 -8
  44. data/lib/rley/formatter/debug.rb +8 -6
  45. data/lib/rley/formatter/json.rb +4 -2
  46. data/lib/rley/gfg/call_edge.rb +3 -1
  47. data/lib/rley/gfg/edge.rb +7 -5
  48. data/lib/rley/gfg/end_vertex.rb +4 -6
  49. data/lib/rley/gfg/epsilon_edge.rb +3 -5
  50. data/lib/rley/gfg/grm_flow_graph.rb +31 -25
  51. data/lib/rley/gfg/item_vertex.rb +12 -22
  52. data/lib/rley/gfg/non_terminal_vertex.rb +6 -4
  53. data/lib/rley/gfg/return_edge.rb +2 -0
  54. data/lib/rley/gfg/scan_edge.rb +3 -1
  55. data/lib/rley/gfg/shortcut_edge.rb +4 -2
  56. data/lib/rley/gfg/start_vertex.rb +6 -8
  57. data/lib/rley/gfg/vertex.rb +47 -41
  58. data/lib/rley/lexical/token.rb +3 -1
  59. data/lib/rley/lexical/token_range.rb +8 -6
  60. data/lib/rley/parse_forest_visitor.rb +7 -5
  61. data/lib/rley/parse_rep/ast_base_builder.rb +11 -11
  62. data/lib/rley/parse_rep/cst_builder.rb +7 -4
  63. data/lib/rley/parse_rep/parse_forest_builder.rb +36 -25
  64. data/lib/rley/parse_rep/parse_forest_factory.rb +5 -3
  65. data/lib/rley/parse_rep/parse_rep_creator.rb +18 -13
  66. data/lib/rley/parse_rep/parse_tree_builder.rb +15 -15
  67. data/lib/rley/parse_rep/parse_tree_factory.rb +27 -25
  68. data/lib/rley/parse_tree_visitor.rb +3 -1
  69. data/lib/rley/parser/error_reason.rb +9 -8
  70. data/lib/rley/parser/gfg_chart.rb +54 -22
  71. data/lib/rley/parser/gfg_earley_parser.rb +3 -1
  72. data/lib/rley/parser/gfg_parsing.rb +51 -31
  73. data/lib/rley/parser/parse_entry.rb +29 -33
  74. data/lib/rley/parser/parse_entry_set.rb +32 -27
  75. data/lib/rley/parser/parse_entry_tracker.rb +6 -4
  76. data/lib/rley/parser/parse_state.rb +18 -21
  77. data/lib/rley/parser/parse_state_tracker.rb +6 -4
  78. data/lib/rley/parser/parse_tracer.rb +15 -13
  79. data/lib/rley/parser/parse_walker_factory.rb +28 -29
  80. data/lib/rley/parser/state_set.rb +11 -10
  81. data/lib/rley/ptree/non_terminal_node.rb +10 -6
  82. data/lib/rley/ptree/parse_tree.rb +6 -4
  83. data/lib/rley/ptree/parse_tree_node.rb +7 -5
  84. data/lib/rley/ptree/terminal_node.rb +9 -7
  85. data/lib/rley/rley_error.rb +12 -10
  86. data/lib/rley/sppf/alternative_node.rb +8 -6
  87. data/lib/rley/sppf/composite_node.rb +9 -7
  88. data/lib/rley/sppf/epsilon_node.rb +5 -3
  89. data/lib/rley/sppf/leaf_node.rb +5 -3
  90. data/lib/rley/sppf/non_terminal_node.rb +2 -0
  91. data/lib/rley/sppf/parse_forest.rb +19 -17
  92. data/lib/rley/sppf/sppf_node.rb +9 -8
  93. data/lib/rley/sppf/token_node.rb +5 -3
  94. data/lib/rley/syntax/grammar.rb +7 -5
  95. data/lib/rley/syntax/grammar_builder.rb +11 -9
  96. data/lib/rley/syntax/grm_symbol.rb +8 -6
  97. data/lib/rley/syntax/literal.rb +2 -0
  98. data/lib/rley/syntax/non_terminal.rb +11 -15
  99. data/lib/rley/syntax/production.rb +13 -11
  100. data/lib/rley/syntax/symbol_seq.rb +10 -10
  101. data/lib/rley/syntax/terminal.rb +6 -5
  102. data/lib/rley/syntax/verbatim_symbol.rb +5 -3
  103. data/lib/support/base_tokenizer.rb +23 -20
  104. data/spec/rley/base/dotted_item_spec.rb +4 -2
  105. data/spec/rley/base/grm_items_builder_spec.rb +2 -0
  106. data/spec/rley/engine_spec.rb +47 -9
  107. data/spec/rley/formatter/asciitree_spec.rb +11 -9
  108. data/spec/rley/formatter/bracket_notation_spec.rb +16 -14
  109. data/spec/rley/formatter/debug_spec.rb +4 -2
  110. data/spec/rley/formatter/json_spec.rb +5 -3
  111. data/spec/rley/gfg/call_edge_spec.rb +2 -0
  112. data/spec/rley/gfg/edge_spec.rb +2 -0
  113. data/spec/rley/gfg/end_vertex_spec.rb +7 -5
  114. data/spec/rley/gfg/epsilon_edge_spec.rb +2 -0
  115. data/spec/rley/gfg/grm_flow_graph_spec.rb +2 -0
  116. data/spec/rley/gfg/item_vertex_spec.rb +12 -10
  117. data/spec/rley/gfg/non_terminal_vertex_spec.rb +5 -3
  118. data/spec/rley/gfg/return_edge_spec.rb +2 -0
  119. data/spec/rley/gfg/scan_edge_spec.rb +2 -0
  120. data/spec/rley/gfg/shortcut_edge_spec.rb +3 -1
  121. data/spec/rley/gfg/start_vertex_spec.rb +7 -5
  122. data/spec/rley/gfg/vertex_spec.rb +5 -3
  123. data/spec/rley/lexical/token_range_spec.rb +18 -16
  124. data/spec/rley/lexical/token_spec.rb +4 -2
  125. data/spec/rley/parse_forest_visitor_spec.rb +167 -163
  126. data/spec/rley/parse_rep/ambiguous_parse_spec.rb +46 -44
  127. data/spec/rley/parse_rep/ast_builder_spec.rb +8 -6
  128. data/spec/rley/parse_rep/cst_builder_spec.rb +7 -5
  129. data/spec/rley/parse_rep/groucho_spec.rb +25 -25
  130. data/spec/rley/parse_rep/parse_forest_builder_spec.rb +28 -26
  131. data/spec/rley/parse_rep/parse_forest_factory_spec.rb +8 -6
  132. data/spec/rley/parse_rep/parse_tree_factory_spec.rb +4 -2
  133. data/spec/rley/parse_tree_visitor_spec.rb +12 -8
  134. data/spec/rley/parser/error_reason_spec.rb +8 -6
  135. data/spec/rley/parser/gfg_chart_spec.rb +17 -4
  136. data/spec/rley/parser/gfg_earley_parser_spec.rb +16 -11
  137. data/spec/rley/parser/gfg_parsing_spec.rb +41 -252
  138. data/spec/rley/parser/parse_entry_set_spec.rb +2 -0
  139. data/spec/rley/parser/parse_entry_spec.rb +21 -19
  140. data/spec/rley/parser/parse_state_spec.rb +7 -5
  141. data/spec/rley/parser/parse_tracer_spec.rb +16 -14
  142. data/spec/rley/parser/parse_walker_factory_spec.rb +10 -8
  143. data/spec/rley/parser/state_set_spec.rb +24 -22
  144. data/spec/rley/ptree/non_terminal_node_spec.rb +7 -3
  145. data/spec/rley/ptree/parse_tree_node_spec.rb +6 -4
  146. data/spec/rley/ptree/parse_tree_spec.rb +2 -0
  147. data/spec/rley/ptree/terminal_node_spec.rb +8 -6
  148. data/spec/rley/sppf/alternative_node_spec.rb +8 -6
  149. data/spec/rley/sppf/non_terminal_node_spec.rb +5 -3
  150. data/spec/rley/sppf/token_node_spec.rb +6 -4
  151. data/spec/rley/support/ambiguous_grammar_helper.rb +5 -4
  152. data/spec/rley/support/expectation_helper.rb +2 -0
  153. data/spec/rley/support/grammar_abc_helper.rb +4 -4
  154. data/spec/rley/support/grammar_ambig01_helper.rb +6 -5
  155. data/spec/rley/support/grammar_arr_int_helper.rb +6 -5
  156. data/spec/rley/support/grammar_b_expr_helper.rb +6 -5
  157. data/spec/rley/support/grammar_helper.rb +2 -0
  158. data/spec/rley/support/grammar_l0_helper.rb +15 -16
  159. data/spec/rley/support/grammar_pb_helper.rb +8 -5
  160. data/spec/rley/support/grammar_sppf_helper.rb +3 -1
  161. data/spec/rley/syntax/grammar_builder_spec.rb +7 -5
  162. data/spec/rley/syntax/grammar_spec.rb +8 -6
  163. data/spec/rley/syntax/grm_symbol_spec.rb +3 -1
  164. data/spec/rley/syntax/literal_spec.rb +2 -0
  165. data/spec/rley/syntax/non_terminal_spec.rb +10 -8
  166. data/spec/rley/syntax/production_spec.rb +15 -13
  167. data/spec/rley/syntax/symbol_seq_spec.rb +4 -2
  168. data/spec/rley/syntax/terminal_spec.rb +7 -5
  169. data/spec/rley/syntax/verbatim_symbol_spec.rb +3 -1
  170. data/spec/spec_helper.rb +2 -12
  171. data/spec/support/base_tokenizer_spec.rb +9 -2
  172. metadata +21 -63
  173. data/.simplecov +0 -7
  174. data/Gemfile +0 -8
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
  require_relative '../rley_error'
3
5
 
@@ -46,13 +48,13 @@ module Rley # This module is used as a namespace
46
48
  end
47
49
 
48
50
  # @return [Array] The list of non-terminals in the grammar.
49
- def non_terminals()
51
+ def non_terminals
50
52
  @non_terminals ||= symbols.select { |s| s.kind_of?(NonTerminal) }
51
53
  end
52
54
 
53
55
  # @return [Production] The start production of the grammar (i.e.
54
56
  # the rule that specifies the syntax for the start symbol.
55
- def start_production()
57
+ def start_production
56
58
  return rules[0]
57
59
  end
58
60
 
@@ -97,7 +99,7 @@ module Rley # This module is used as a namespace
97
99
  end
98
100
 
99
101
  # Perform some check of the grammar.
100
- def diagnose()
102
+ def diagnose
101
103
  mark_undefined
102
104
  mark_generative
103
105
  compute_nullable
@@ -191,7 +193,7 @@ module Rley # This module is used as a namespace
191
193
 
192
194
  # For each non-terminal determine whether it is nullable or not.
193
195
  # A nullable nonterminal is a nonterminal that can match an empty string.
194
- def compute_nullable()
196
+ def compute_nullable
195
197
  non_terminals.each { |nterm| nterm.nullable = false }
196
198
  nullable_sets = [direct_nullable]
197
199
 
@@ -234,7 +236,7 @@ module Rley # This module is used as a namespace
234
236
  nullables << prod.lhs
235
237
  end
236
238
 
237
- return nullables
239
+ nullables
238
240
  end
239
241
 
240
242
  # For each prodction determine whether it is nullable or not.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'set'
2
4
  require_relative 'terminal'
3
5
  require_relative 'non_terminal'
@@ -16,12 +18,12 @@ module Rley # This module is used as a namespace
16
18
  # to the matching grammar symbol object.
17
19
  attr_reader(:symbols)
18
20
 
19
- # @return [Array<Production>] The list of production rules for
21
+ # @return [Array<Production>] The list of production rules for
20
22
  # the grammar to build.
21
23
  attr_reader(:productions)
22
24
 
23
25
  # Creates a new grammar builder.
24
- # @param aBlock [Proc] code block used to build the grammar.
26
+ # @param aBlock [Proc] code block used to build the grammar.
25
27
  # @example Building a tiny English grammar
26
28
  # builder = Rley::Syntax::GrammarBuilder.new do
27
29
  # add_terminals('n', 'v', 'adj', 'det')
@@ -43,7 +45,7 @@ module Rley # This module is used as a namespace
43
45
  # @param aSymbolName [String] the name of a grammar symbol.
44
46
  # @return [GrmSymbol] the retrieved symbol object.
45
47
  def [](aSymbolName)
46
- return symbols[aSymbolName]
48
+ symbols[aSymbolName]
47
49
  end
48
50
 
49
51
  # Add the given terminal symbols to the grammar of the language
@@ -65,7 +67,7 @@ module Rley # This module is used as a namespace
65
67
  # builder.rule('A' => ['a', 'A', 'c']) # 'rule' is a synonym
66
68
  # builder.rule('A' => %w[a A c]) # Use %w syntax for Array of String
67
69
  # builder.rule 'A' => %w[a A c] # Call parentheses are optional
68
- # @param aProductionRepr [Hash{String, Array<String>}]
70
+ # @param aProductionRepr [Hash{String, Array<String>}]
69
71
  # A Hash-based representation of a production.
70
72
  # @return [Production] The created Production instance
71
73
  def add_production(aProductionRepr)
@@ -83,14 +85,14 @@ module Rley # This module is used as a namespace
83
85
  new_prod = Production.new(lhs, rhs_members)
84
86
  productions << new_prod
85
87
  end
86
-
88
+
87
89
  return productions.last
88
90
  end
89
91
 
90
92
  # Given the grammar symbols and productions added to the builder,
91
93
  # build the resulting grammar (if not yet done).
92
- # @return [Grammar] the created grammar object.
93
- def grammar()
94
+ # @return [Grammar] the created grammar object.
95
+ def grammar
94
96
  unless @grammar
95
97
  raise StandardError, 'No symbol found for grammar' if symbols.empty?
96
98
  if productions.empty?
@@ -111,7 +113,7 @@ module Rley # This module is used as a namespace
111
113
  unused = all_terminals.reject { |a_term| in_use.include?(a_term) }
112
114
  unless unused.empty?
113
115
  suffix = "#{unused.map(&:name).join(', ')}."
114
- raise StandardError, 'Useless terminal symbol(s): ' + suffix
116
+ raise StandardError, "Useless terminal symbol(s): #{suffix}"
115
117
  end
116
118
 
117
119
  @grammar = Grammar.new(productions.dup)
@@ -164,7 +166,7 @@ module Rley # This module is used as a namespace
164
166
  unless symbols.include? aSymbolName
165
167
  symbols[aSymbolName] = NonTerminal.new(aSymbolName)
166
168
  end
167
- return symbols[aSymbolName]
169
+ symbols[aSymbolName]
168
170
  end
169
171
  end # class
170
172
  end # module
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Rley # This module is used as a namespace
2
4
  module Syntax # This module is used as a namespace
3
5
  # Abstract class for grammar symbols.
@@ -21,19 +23,19 @@ module Rley # This module is used as a namespace
21
23
 
22
24
  # The String representation of the grammar symbol
23
25
  # @return [String]
24
- def to_s()
25
- return name.to_s
26
+ def to_s
27
+ name.to_s
26
28
  end
27
29
 
28
30
  # @return [Boolean] true iff the symbol is a terminal
29
- def terminal?()
31
+ def terminal?
30
32
  # Default implementation to override if necessary
31
- return false
33
+ false
32
34
  end
33
35
 
34
36
  # @return [Boolean] true iff the symbol is generative.
35
- def generative?()
36
- return @generative
37
+ def generative?
38
+ @generative
37
39
  end
38
40
  end # class
39
41
  end # module
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'terminal' # Load superclass
2
4
 
3
5
  module Rley # This module is used as a namespace
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'grm_symbol' # Load superclass
2
4
 
3
5
  module Rley # This module is used as a namespace
@@ -7,41 +9,35 @@ module Rley # This module is used as a namespace
7
9
  class NonTerminal < GrmSymbol
8
10
  # A non-terminal symbol is nullable if it can match an empty string.
9
11
  attr_writer(:nullable)
10
-
12
+
11
13
  # A non-terminal symbol is undefined if no production rule in the grammar
12
14
  # has that non-terminal symbol in its left-hand side.
13
15
  attr_writer(:undefined)
14
-
15
- # A non-terminal symbol is unreachable if it cannot be reached (derived)
16
+
17
+ # A non-terminal symbol is unreachable if it cannot be reached (derived)
16
18
  # from the start symbol.
17
19
  attr_writer(:unreachable)
18
20
 
19
- # Constructor.
20
- # @param aName [String] The name of the grammar symbol.
21
- def initialize(aName)
22
- super(aName)
23
- end
24
-
25
21
  # @return [false/true] Return true if the symbol derives
26
22
  # the empty string. As non-terminal symbol is nullable when it can
27
23
  # can match to zero input token.
28
24
  # The "nullability" of a non-terminal can practically be determined once
29
25
  # all the production rules of the grammar are specified.
30
- def nullable?()
26
+ def nullable?
31
27
  return @nullable
32
28
  end
33
-
29
+
34
30
  # @return [false/true] Return true if the symbol doesn't appear
35
31
  # on the left-hand side of any production rule.
36
- def undefined?()
32
+ def undefined?
37
33
  return @undefined
38
34
  end
39
-
35
+
40
36
  # @return [false/true] Return true if the symbol cannot be derived
41
37
  # from the start symbol.
42
- def unreachable?()
38
+ def unreachable?
43
39
  return @unreachable
44
- end
40
+ end
45
41
  end # class
46
42
  end # module
47
43
  end # module
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'symbol_seq'
2
4
 
3
5
  module Rley # This module is used as a namespace
@@ -44,33 +46,33 @@ module Rley # This module is used as a namespace
44
46
 
45
47
  # Is the rhs empty?
46
48
  # @return [Boolean] true if the rhs has no members.
47
- def empty?()
48
- return rhs.empty?
49
+ def empty?
50
+ rhs.empty?
49
51
  end
50
52
 
51
53
  # Return true iff the production is generative
52
- def generative?()
53
- if @generative.nil?
54
- end
54
+ def generative?
55
+ # if @generative.nil?
56
+ # end
55
57
 
56
- return @generative
58
+ @generative
57
59
  end
58
60
 
59
61
  # @return [Boolen] true iff the production is nullable
60
- def nullable?()
61
- return @nullable
62
+ def nullable?
63
+ @nullable
62
64
  end
63
65
 
64
66
  # Returns a string containing a human-readable representation of the
65
67
  # production.
66
68
  # @return [String]
67
- def inspect()
68
- result = "#<#{self.class.name}:#{object_id}"
69
+ def inspect
70
+ result = +"#<#{self.class.name}:#{object_id}"
69
71
  result << " @name=\"#{name}\""
70
72
  result << " @lhs=#{lhs.name}"
71
73
  result << " @rhs=#{rhs.inspect}"
72
74
  result << " @generative=#{@generative}>"
73
- return result
75
+ result
74
76
  end
75
77
 
76
78
  # A setter for the production name
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'forwardable'
2
4
 
3
5
  module Rley # This module is used as a namespace
@@ -10,9 +12,9 @@ module Rley # This module is used as a namespace
10
12
  # @return [Array<GrmSymbol>] The sequence of symbols
11
13
  attr_reader(:members)
12
14
 
13
- # Create a sequence of grammar symbols (as in right-hand side of
15
+ # Create a sequence of grammar symbols (as in right-hand side of
14
16
  # a production rule).
15
- # @param theSymbols [Array<GrmSymbol>] An array of symbols.
17
+ # @param theSymbols [Array<GrmSymbol>] An array of symbols.
16
18
  def initialize(theSymbols)
17
19
  @members = theSymbols.dup
18
20
  end
@@ -31,20 +33,18 @@ module Rley # This module is used as a namespace
31
33
  raise StandardError, msg
32
34
  end
33
35
 
34
- return result
36
+ result
35
37
  end
36
-
37
- # Returns a string containing a human-readable representation of the
38
+
39
+ # Returns a string containing a human-readable representation of the
38
40
  # sequence of symbols.
39
41
  # @return [String]
40
- def inspect()
41
- result = "#<#{self.class.name}:#{object_id}"
42
+ def inspect
43
+ result = +"#<#{self.class.name}:#{object_id}"
42
44
  symbol_names = members.map(&:name)
43
45
  result << " @members=#{symbol_names}>"
44
- return result
46
+ result
45
47
  end
46
-
47
-
48
48
  end # class
49
49
  end # module
50
50
  end # module
@@ -1,30 +1,31 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'grm_symbol' # Load superclass
2
4
 
3
5
  module Rley # This module is used as a namespace
4
6
  module Syntax # This module is used as a namespace
5
- # A terminal symbol represents a class of words in the language
7
+ # A terminal symbol represents a class of words in the language
6
8
  # defined the grammar.
7
9
  class Terminal < GrmSymbol
8
-
9
10
  # Constructor.
10
11
  # @param aName [String] The name of the grammar symbol.
11
12
  def initialize(aName)
12
13
  super(aName)
13
14
  self.generative = true
14
15
  end
15
-
16
+
16
17
  # Return true iff the symbol is a terminal
17
18
  def terminal?
18
19
  return true
19
20
  end
20
-
21
+
21
22
  # @return [false] Return true if the symbol derives
22
23
  # the empty string. As terminal symbol corresponds to a input token
23
24
  # it is by definition non-nullable.
24
25
  def nullable?
25
26
  false
26
27
  end
27
-
28
+
28
29
  def to_s
29
30
  name
30
31
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require_relative 'terminal' # Load superclass
2
4
 
3
5
  module Rley # This module is used as a namespace
@@ -12,11 +14,11 @@ module Rley # This module is used as a namespace
12
14
  super(aText) # Do we need to separate the text from the name?
13
15
  @text = aText.dup
14
16
  end
15
-
17
+
16
18
  # The String representation of the verbatim symbol
17
19
  # @return [String]
18
- def to_s()
19
- return "'#{text}'"
20
+ def to_s
21
+ "'#{text}'"
20
22
  end
21
23
  end # class
22
24
  end # module
@@ -1,21 +1,29 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'strscan'
2
4
  require_relative '../rley/lexical/token'
3
5
 
6
+ # Simplistic tokenizer used mostly for testing purposes
4
7
  class BaseTokenizer
8
+ # @return [StringScanner]
5
9
  attr_reader(:scanner)
10
+
11
+ # @return [Integer] current line number
6
12
  attr_reader(:lineno)
13
+
14
+ # @return [Integer] position of start of current line in source text
7
15
  attr_reader(:line_start)
8
-
16
+
9
17
  class ScanError < StandardError; end
10
18
 
11
- # Constructor. Initialize a tokenizer for Skeem.
19
+ # Constructor. Initialize a tokenizer.
12
20
  # @param source [String] Skeem text to tokenize.
13
21
  def initialize(source)
14
22
  @scanner = StringScanner.new('')
15
23
  restart(source)
16
24
  end
17
25
 
18
- # @param source [String] Skeem text to tokenize.
26
+ # @param source [String] input text to tokenize.
19
27
  def restart(source)
20
28
  @scanner.string = source
21
29
  @lineno = 1
@@ -32,13 +40,13 @@ class BaseTokenizer
32
40
 
33
41
  return tok_sequence
34
42
  end
35
-
43
+
36
44
  protected
37
-
45
+
38
46
  # Patterns:
39
47
  # Unambiguous single character
40
48
  # Conditional single character:
41
- # (e.g. '+' operator, '+' prefix for positive numbers)
49
+ # (e.g. '+' operator, '+' prefix for positive numbers)
42
50
  def _next_token
43
51
  skip_whitespaces
44
52
  curr_ch = scanner.peek(1)
@@ -55,29 +63,29 @@ class BaseTokenizer
55
63
 
56
64
  return token
57
65
  end
58
-
66
+
59
67
  def recognize_token
60
68
  raise NotImplementedError
61
69
  end
62
-
70
+
63
71
  def build_token(aSymbolName, aLexeme, aFormat = :default)
64
72
  begin
65
73
  value = convert_to(aLexeme, aSymbolName, aFormat)
66
74
  col = scanner.pos - aLexeme.size - @line_start + 1
67
75
  pos = Rley::Lexical::Position.new(@lineno, col)
68
76
  token = Rley::Lexical::Token.new(value, aSymbolName, pos)
69
- rescue StandardError => exc
77
+ rescue StandardError => e
70
78
  puts "Failing with '#{aSymbolName}' and '#{aLexeme}'"
71
- raise exc
79
+ raise e
72
80
  end
73
81
 
74
82
  return token
75
83
  end
76
-
84
+
77
85
  def convert_to(aLexeme, _symbol_name, _format)
78
86
  return aLexeme
79
87
  end
80
-
88
+
81
89
  def skip_whitespaces
82
90
  pre_pos = scanner.pos
83
91
 
@@ -91,21 +99,16 @@ class BaseTokenizer
91
99
  ws_found = true
92
100
  next_line
93
101
  end
94
- # next_ch = scanner.peek(1)
95
- # if next_ch == ';'
96
- # cmt_found = true
97
- # scanner.skip(/;[^\r\n]*(?:(?:\r\n)|\r|\n)?/)
98
- # next_line
99
- # end
102
+
100
103
  break unless ws_found || cmt_found
101
104
  end
102
105
 
103
106
  curr_pos = scanner.pos
104
107
  return if curr_pos == pre_pos
105
108
  end
106
-
109
+
107
110
  def next_line
108
111
  @lineno += 1
109
112
  @line_start = scanner.pos
110
- end
113
+ end
111
114
  end # class