antlr4-runtime 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (156) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +15 -0
  3. data/.rspec +3 -0
  4. data/.travis.yml +7 -0
  5. data/CODE_OF_CONDUCT.md +74 -0
  6. data/Gemfile +4 -0
  7. data/Gemfile.lock +35 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +65 -0
  10. data/Rakefile +6 -0
  11. data/antlr4-runtime.gemspec +30 -0
  12. data/bin/console +14 -0
  13. data/bin/setup +8 -0
  14. data/ext/rumourhash/Makefile +264 -0
  15. data/ext/rumourhash/extconf.rb +3 -0
  16. data/ext/rumourhash/rumourhash.c +59 -0
  17. data/lib/antlr4/runtime.rb +37 -0
  18. data/lib/antlr4/runtime/abstract_parse_tree_visitor.rb +43 -0
  19. data/lib/antlr4/runtime/abstract_predicate_transition.rb +11 -0
  20. data/lib/antlr4/runtime/action_transition.rb +29 -0
  21. data/lib/antlr4/runtime/ambiguity_info.rb +10 -0
  22. data/lib/antlr4/runtime/antlr_error_listener.rb +15 -0
  23. data/lib/antlr4/runtime/antlr_error_strategy.rb +24 -0
  24. data/lib/antlr4/runtime/antlr_file_stream.rb +17 -0
  25. data/lib/antlr4/runtime/antlr_input_stream.rb +6 -0
  26. data/lib/antlr4/runtime/array_2d_hash_set.rb +471 -0
  27. data/lib/antlr4/runtime/array_prediction_context.rb +76 -0
  28. data/lib/antlr4/runtime/atn.rb +100 -0
  29. data/lib/antlr4/runtime/atn_config.rb +140 -0
  30. data/lib/antlr4/runtime/atn_config_set.rb +150 -0
  31. data/lib/antlr4/runtime/atn_deserialization_options.rb +48 -0
  32. data/lib/antlr4/runtime/atn_deserializer.rb +737 -0
  33. data/lib/antlr4/runtime/atn_simulator.rb +69 -0
  34. data/lib/antlr4/runtime/atn_state.rb +118 -0
  35. data/lib/antlr4/runtime/atn_type.rb +8 -0
  36. data/lib/antlr4/runtime/atom_transition.rb +27 -0
  37. data/lib/antlr4/runtime/bail_error_strategy.rb +31 -0
  38. data/lib/antlr4/runtime/base_error_listener.rb +18 -0
  39. data/lib/antlr4/runtime/basic_block_start_state.rb +12 -0
  40. data/lib/antlr4/runtime/basic_state.rb +11 -0
  41. data/lib/antlr4/runtime/bit_set.rb +54 -0
  42. data/lib/antlr4/runtime/block_end_state.rb +15 -0
  43. data/lib/antlr4/runtime/block_start_state.rb +12 -0
  44. data/lib/antlr4/runtime/buffered_token_stream.rb +335 -0
  45. data/lib/antlr4/runtime/char_stream.rb +6 -0
  46. data/lib/antlr4/runtime/char_streams.rb +12 -0
  47. data/lib/antlr4/runtime/chunk.rb +4 -0
  48. data/lib/antlr4/runtime/code_point_char_stream.rb +83 -0
  49. data/lib/antlr4/runtime/common_token.rb +125 -0
  50. data/lib/antlr4/runtime/common_token_factory.rb +30 -0
  51. data/lib/antlr4/runtime/common_token_stream.rb +63 -0
  52. data/lib/antlr4/runtime/console_error_listener.rb +12 -0
  53. data/lib/antlr4/runtime/context_sensitivity_info.rb +7 -0
  54. data/lib/antlr4/runtime/decision_event_info.rb +19 -0
  55. data/lib/antlr4/runtime/decision_info.rb +36 -0
  56. data/lib/antlr4/runtime/decision_state.rb +15 -0
  57. data/lib/antlr4/runtime/default_error_strategy.rb +314 -0
  58. data/lib/antlr4/runtime/dfa.rb +97 -0
  59. data/lib/antlr4/runtime/dfa_serializer.rb +62 -0
  60. data/lib/antlr4/runtime/dfa_state.rb +109 -0
  61. data/lib/antlr4/runtime/diagnostic_error_listener.rb +58 -0
  62. data/lib/antlr4/runtime/double_key_map.rb +49 -0
  63. data/lib/antlr4/runtime/empty_prediction_context.rb +35 -0
  64. data/lib/antlr4/runtime/epsilon_transition.rb +27 -0
  65. data/lib/antlr4/runtime/equality_comparator.rb +4 -0
  66. data/lib/antlr4/runtime/error_info.rb +7 -0
  67. data/lib/antlr4/runtime/error_node.rb +5 -0
  68. data/lib/antlr4/runtime/error_node_impl.rb +12 -0
  69. data/lib/antlr4/runtime/failed_predicate_exception.rb +33 -0
  70. data/lib/antlr4/runtime/flexible_hash_map.rb +232 -0
  71. data/lib/antlr4/runtime/input_mismatch_exception.rb +20 -0
  72. data/lib/antlr4/runtime/int_stream.rb +31 -0
  73. data/lib/antlr4/runtime/integer.rb +14 -0
  74. data/lib/antlr4/runtime/interval.rb +111 -0
  75. data/lib/antlr4/runtime/interval_set.rb +540 -0
  76. data/lib/antlr4/runtime/lexer.rb +257 -0
  77. data/lib/antlr4/runtime/lexer_action.rb +12 -0
  78. data/lib/antlr4/runtime/lexer_action_executor.rb +75 -0
  79. data/lib/antlr4/runtime/lexer_action_type.rb +12 -0
  80. data/lib/antlr4/runtime/lexer_atn_config.rb +50 -0
  81. data/lib/antlr4/runtime/lexer_atn_simulator.rb +522 -0
  82. data/lib/antlr4/runtime/lexer_channel_action.rb +51 -0
  83. data/lib/antlr4/runtime/lexer_custom_action.rb +49 -0
  84. data/lib/antlr4/runtime/lexer_dfa_serializer.rb +12 -0
  85. data/lib/antlr4/runtime/lexer_indexed_custom_action.rb +49 -0
  86. data/lib/antlr4/runtime/lexer_mode_action.rb +51 -0
  87. data/lib/antlr4/runtime/lexer_more_action.rb +41 -0
  88. data/lib/antlr4/runtime/lexer_no_viable_alt_exception.rb +4 -0
  89. data/lib/antlr4/runtime/lexer_pop_mode_action.rb +41 -0
  90. data/lib/antlr4/runtime/lexer_push_mode_action.rb +51 -0
  91. data/lib/antlr4/runtime/lexer_skip_action.rb +43 -0
  92. data/lib/antlr4/runtime/lexer_type_action.rb +51 -0
  93. data/lib/antlr4/runtime/ll1_analyzer.rb +133 -0
  94. data/lib/antlr4/runtime/lookahead_event_info.rb +10 -0
  95. data/lib/antlr4/runtime/loop_end_state.rb +15 -0
  96. data/lib/antlr4/runtime/murmur_hash.rb +99 -0
  97. data/lib/antlr4/runtime/no_viable_alt_exception.rb +7 -0
  98. data/lib/antlr4/runtime/not_set_transition.rb +20 -0
  99. data/lib/antlr4/runtime/object_equality_comparator.rb +18 -0
  100. data/lib/antlr4/runtime/ordered_atn_config_set.rb +15 -0
  101. data/lib/antlr4/runtime/parse_cancellation_exception.rb +5 -0
  102. data/lib/antlr4/runtime/parse_tree.rb +7 -0
  103. data/lib/antlr4/runtime/parse_tree_listener.rb +4 -0
  104. data/lib/antlr4/runtime/parse_tree_visitor.rb +4 -0
  105. data/lib/antlr4/runtime/parser.rb +522 -0
  106. data/lib/antlr4/runtime/parser_atn_simulator.rb +1171 -0
  107. data/lib/antlr4/runtime/parser_rule_context.rb +186 -0
  108. data/lib/antlr4/runtime/plus_block_start_state.rb +11 -0
  109. data/lib/antlr4/runtime/plus_loopback_state.rb +12 -0
  110. data/lib/antlr4/runtime/precedence_predicate_transition.rb +31 -0
  111. data/lib/antlr4/runtime/predicate.rb +6 -0
  112. data/lib/antlr4/runtime/predicate_eval_info.rb +16 -0
  113. data/lib/antlr4/runtime/predicate_transition.rb +35 -0
  114. data/lib/antlr4/runtime/prediction_context.rb +103 -0
  115. data/lib/antlr4/runtime/prediction_context_cache.rb +28 -0
  116. data/lib/antlr4/runtime/prediction_context_utils.rb +407 -0
  117. data/lib/antlr4/runtime/prediction_mode.rb +213 -0
  118. data/lib/antlr4/runtime/profiling_atn_simulator.rb +149 -0
  119. data/lib/antlr4/runtime/proxy_error_listener.rb +33 -0
  120. data/lib/antlr4/runtime/range_transition.rb +29 -0
  121. data/lib/antlr4/runtime/recognition_exception.rb +17 -0
  122. data/lib/antlr4/runtime/recognizer.rb +136 -0
  123. data/lib/antlr4/runtime/rule_context.rb +131 -0
  124. data/lib/antlr4/runtime/rule_context_with_alt_num.rb +11 -0
  125. data/lib/antlr4/runtime/rule_node.rb +8 -0
  126. data/lib/antlr4/runtime/rule_start_state.rb +17 -0
  127. data/lib/antlr4/runtime/rule_stop_state.rb +12 -0
  128. data/lib/antlr4/runtime/rule_tag_token.rb +64 -0
  129. data/lib/antlr4/runtime/rule_transition.rb +29 -0
  130. data/lib/antlr4/runtime/semantic_context.rb +313 -0
  131. data/lib/antlr4/runtime/set_transition.rb +29 -0
  132. data/lib/antlr4/runtime/singleton_prediction_context.rb +56 -0
  133. data/lib/antlr4/runtime/star_block_start_state.rb +12 -0
  134. data/lib/antlr4/runtime/star_loop_entry_state.rb +17 -0
  135. data/lib/antlr4/runtime/star_loopback_state.rb +16 -0
  136. data/lib/antlr4/runtime/syntax_tree.rb +6 -0
  137. data/lib/antlr4/runtime/tag_chunk.rb +22 -0
  138. data/lib/antlr4/runtime/terminal_node.rb +5 -0
  139. data/lib/antlr4/runtime/terminal_node_impl.rb +50 -0
  140. data/lib/antlr4/runtime/text_chunk.rb +16 -0
  141. data/lib/antlr4/runtime/token.rb +13 -0
  142. data/lib/antlr4/runtime/token_stream.rb +13 -0
  143. data/lib/antlr4/runtime/token_tag_token.rb +22 -0
  144. data/lib/antlr4/runtime/tokens_start_state.rb +14 -0
  145. data/lib/antlr4/runtime/transition.rb +51 -0
  146. data/lib/antlr4/runtime/tree.rb +4 -0
  147. data/lib/antlr4/runtime/trees.rb +195 -0
  148. data/lib/antlr4/runtime/triple.rb +40 -0
  149. data/lib/antlr4/runtime/utils.rb +117 -0
  150. data/lib/antlr4/runtime/uuid.rb +46 -0
  151. data/lib/antlr4/runtime/version.rb +5 -0
  152. data/lib/antlr4/runtime/vocabulary.rb +12 -0
  153. data/lib/antlr4/runtime/vocabulary_impl.rb +82 -0
  154. data/lib/antlr4/runtime/wildcard_transition.rb +20 -0
  155. data/lib/antlr4/runtime/writable_token.rb +7 -0
  156. metadata +243 -0
@@ -0,0 +1,257 @@
1
+ require 'antlr4/runtime/recognizer'
2
+ require 'antlr4/runtime/token'
3
+ require 'antlr4/runtime/common_token_factory'
4
+ require 'antlr4/runtime/lexer_no_viable_alt_exception'
5
+
6
+ module Antlr4::Runtime
7
+
8
+ class Lexer < Recognizer
9
+ DEFAULT_MODE = 0
10
+ MORE = -2
11
+ SKIP = -3
12
+
13
+ DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL
14
+ HIDDEN = Token::HIDDEN_CHANNEL
15
+ MIN_CHAR_VALUE = 0x0000
16
+ MAX_CHAR_VALUE = 0x10FFFF
17
+
18
+ attr_accessor :_input
19
+ attr_accessor :token
20
+ attr_accessor :_token_start_char_index
21
+ attr_accessor :_token_start_line
22
+ attr_accessor :_token_start_char_position_in_line
23
+ attr_accessor :_hit_eof
24
+ attr_accessor :_channel
25
+ attr_accessor :_type
26
+ attr_accessor :_mode_stack
27
+ attr_accessor :_mode
28
+ attr_accessor :_text
29
+
30
+ def reset
31
+ # wack Lexer state variables
32
+ unless @_input.nil?
33
+ @_input.seek(0) # rewind the input
34
+ end
35
+ @_token = nil
36
+ @_type = Token::INVALID_TYPE
37
+ @_channel = Token::DEFAULT_CHANNEL
38
+ @_token_start_char_index = -1
39
+ @_token_start_char_position_in_line = -1
40
+ @_token_start_line = -1
41
+ @_text = nil
42
+
43
+ @_hit_eof = false
44
+ @_mode = DEFAULT_MODE
45
+ @_mode_stack.clear
46
+
47
+ @_interp.reset unless @_interp.nil?
48
+ end
49
+
50
+ def initialize(input = nil)
51
+ super()
52
+ unless input.nil?
53
+ @_input = input
54
+ @_token_factory_source_pair = OpenStruct.new
55
+ @_token_factory_source_pair.a = self
56
+ @_token_factory_source_pair.b = input
57
+ end
58
+ @_mode_stack = []
59
+ reset
60
+ @_factory = CommonTokenFactory.instance
61
+ end
62
+
63
+ def next_token
64
+ if @_input.nil?
65
+ raise IllegalStateException, 'nextToken requires a non-nil input stream.'
66
+ end
67
+
68
+ # Mark start location in char stream so unbuffered streams are
69
+ # guaranteed at least have text of current token
70
+ token_start_marker = @_input.mark
71
+ begin
72
+ repeat_outer = true
73
+ repeat_outer = next_token_inner while repeat_outer
74
+ return @_token
75
+ ensure # make sure we release marker after match or
76
+ # unbuffered char stream will keep buffering
77
+ @_input.release(token_start_marker)
78
+ end
79
+ end
80
+
81
+ def next_token_inner
82
+ loop do
83
+ if @_hit_eof
84
+ emit_eof
85
+ return false
86
+ end
87
+
88
+ @_token = nil
89
+ @_channel = Token::DEFAULT_CHANNEL
90
+ @_token_start_char_index = @_input.index
91
+ @_token_start_char_position_in_line = @_interp.char_position_in_line
92
+ @_token_start_line = @_interp.line
93
+ @_text = nil
94
+ loop do
95
+ @_type = Token::INVALID_TYPE
96
+
97
+ begin
98
+ ttype = @_interp.match(@_input, @_mode)
99
+ rescue LexerNoViableAltException => e
100
+ notify_listeners(e) # report error
101
+ recover1(e)
102
+ ttype = SKIP
103
+ end
104
+ @_hit_eof = true if @_input.la(1) == IntStream::EOF
105
+ @_type = ttype if @_type == Token::INVALID_TYPE
106
+ return true if @_type == SKIP
107
+ break if @_type != MORE
108
+ end
109
+
110
+ emit if @_token.nil?
111
+ return false
112
+ end
113
+ end
114
+
115
+ def skip
116
+ @_type = SKIP
117
+ end
118
+
119
+ def more
120
+ @_type = MORE
121
+ end
122
+
123
+ def mode(m)
124
+ @_mode = m
125
+ end
126
+
127
+ def push_mode(m)
128
+ puts('pushMode ' + m) if LexerATNSimulator.debug
129
+ @_mode_stack.push(@_mode)
130
+ mode(m)
131
+ end
132
+
133
+ def pop_mode
134
+ raise EmptyStackException if @_mode_stack.empty?
135
+
136
+ puts('popMode back to ' + @_mode_stack[-1]) if LexerATNSimulator.debug
137
+ mode(@_mode_stack.pop)
138
+ @_mode
139
+ end
140
+
141
+ def input_stream(input)
142
+ @_input = nil
143
+ @_token_factory_source_pair = OpenStruct.new
144
+ @_token_factory_source_pair.a = self
145
+ @_token_factory_source_pair.b = @_input
146
+ reset
147
+ @_input = input
148
+ @_token_factory_source_pair.a = self
149
+ @_token_factory_source_pair.b = @_input
150
+ end
151
+
152
+ def source_name
153
+ @_input.get_source_name
154
+ end
155
+
156
+ def emit(token = nil)
157
+ if !token.nil?
158
+ @_token = token
159
+ else
160
+ @_token = @_factory.create(@_token_factory_source_pair, @_type, @_text, @_channel, @_token_start_char_index, char_index - 1, @_token_start_line, @_token_start_char_position_in_line)
161
+ end
162
+ end
163
+
164
+ def emit_eof
165
+ cpos = char_position_in_line
166
+ eof = @_factory.create(@_token_factory_source_pair, Token::EOF, nil, Token::DEFAULT_CHANNEL, @_input.index, @_input.index - 1, line, cpos)
167
+ emit(eof)
168
+ eof
169
+ end
170
+
171
+ def line
172
+ @_interp.line
173
+ end
174
+
175
+ def char_position_in_line
176
+ @_interp.char_position_in_line
177
+ end
178
+
179
+ def set_line(line)
180
+ @_interp.set_line(line)
181
+ end
182
+
183
+ def set_char_position_in_line(char_position_in_line)
184
+ @_interp.set_char_position_in_line(char_position_in_line)
185
+ end
186
+
187
+ def char_index
188
+ @_input.index
189
+ end
190
+
191
+ def text
192
+ return @_text unless @_text.nil?
193
+
194
+ @_interp.text(@_input)
195
+ end
196
+
197
+ def all_tokens
198
+ tokens = []
199
+ t = next_token
200
+ while t.type != Token::EOF
201
+ tokens << t
202
+ t = next_token
203
+ end
204
+ tokens
205
+ end
206
+
207
+ def recover1(_e)
208
+ if @_input.la(1) != IntStream::EOF
209
+ # skip a char and begin again
210
+ @_interp.consume(@_input)
211
+ end
212
+ end
213
+
214
+ def notify_listeners(e)
215
+ text = @_input.text(Interval.of(@_token_start_char_index, @_input.index))
216
+ msg = "token recognition error at: '" + error_display(text) + "'"
217
+
218
+ listener = error_listener_dispatch
219
+ listener.syntax_error(self, nil, @_token_start_line, @_token_start_char_position_in_line, msg, e)
220
+ end
221
+
222
+ def error_display(s)
223
+ buf = ''
224
+ s.chars.each do |c|
225
+ buf << error_display_char(c)
226
+ end
227
+ buf
228
+ end
229
+
230
+ def error_display_char(c)
231
+ s = ''
232
+ s << c
233
+ case c
234
+ when Token::EOF
235
+ s = '<EOF>'
236
+ when '\n'
237
+ s = '\\n'
238
+ when '\t'
239
+ s = '\\t'
240
+ when '\r'
241
+ s = '\\r'
242
+ else
243
+ # type code here
244
+ end
245
+ s
246
+ end
247
+
248
+ def char_error_display(c)
249
+ s = error_display_char(c)
250
+ "'" + s + "'"
251
+ end
252
+
253
+ def recover2(_re)
254
+ @_input.consume
255
+ end
256
+ end
257
+ end
@@ -0,0 +1,12 @@
1
+ module Antlr4::Runtime
2
+ class LexerAction
3
+ def action_type
4
+ end
5
+
6
+ def position_dependent?
7
+ end
8
+
9
+ def execute(lexer)
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,75 @@
1
+ require 'antlr4/runtime/lexer_indexed_custom_action'
2
+
3
+ module Antlr4::Runtime
4
+
5
+ class LexerActionExecutor
6
+ attr_reader :lexer_actions
7
+ attr_reader :hash_code
8
+
9
+ def initialize(lexer_actions)
10
+ @lexer_actions = lexer_actions
11
+ @hash_code = MurmurHash.hash_objs(lexer_actions)
12
+ end
13
+
14
+ def self.append(lexer_action_executor, lexer_action)
15
+ return LexerActionExecutor.new([lexer_action]) if lexer_action_executor.nil?
16
+
17
+ lexer_actions = lexer_action_executor.lexer_actions.dup
18
+ lexer_actions << lexer_action
19
+ LexerActionExecutor.new(lexer_actions)
20
+ end
21
+
22
+ def fix_offset_before_match(offset)
23
+ updated_lexer_actions = nil
24
+ i = 0
25
+ while i < @lexer_actions.length
26
+ if @lexer_actions[i].position_dependent? && !(@lexer_actions[i].is_a? LexerIndexedCustomAction)
27
+ updated_lexer_actions = @lexer_actions.dup if updated_lexer_actions.nil?
28
+
29
+ updated_lexer_actions[i] = LexerIndexedCustomAction.new(offset, @lexer_actions[i])
30
+ end
31
+ i += 1
32
+ end
33
+
34
+ return self if updated_lexer_actions.nil?
35
+
36
+ LexerActionExecutor.new(updated_lexer_actions)
37
+ end
38
+
39
+ def execute(lexer, input, start_index)
40
+ requires_seek = false
41
+ stop_index = input.index
42
+ begin
43
+ i = 0
44
+ while i < @lexer_actions.length
45
+ lexerAction = @lexer_actions[i]
46
+ if lexerAction.is_a? LexerIndexedCustomAction
47
+ offset = lexerAction.getOffset
48
+ input.seek(start_index + offset)
49
+ requires_seek = ((start_index + offset) != stop_index)
50
+ else
51
+ if lexerAction.position_dependent?
52
+ input.seek(stop_index)
53
+ requires_seek = false
54
+ end
55
+
56
+ lexerAction.execute(lexer)
57
+ end
58
+ i += 1
59
+ end
60
+ ensure
61
+ input.seek(stop_index) if requires_seek
62
+ end
63
+ end
64
+
65
+ def eql?(obj)
66
+ if obj == self
67
+ return true
68
+ else
69
+ return false unless obj.is_a? LexerActionExecutor
70
+ end
71
+
72
+ @hash_code == obj.hash_code && (@lexer_actions == obj._a)
73
+ end
74
+ end
75
+ end
@@ -0,0 +1,12 @@
1
+ module Antlr4::Runtime
2
+ class LexerActionType
3
+ CHANNEL = 0
4
+ CUSTOM = 1
5
+ MODE = 2
6
+ MORE = 3
7
+ POP_MODE = 4
8
+ PUSH_MODE = 5
9
+ SKIP = 6
10
+ TYPE = 7
11
+ end
12
+ end
@@ -0,0 +1,50 @@
1
+ require 'antlr4/runtime/atn_config'
2
+ require 'antlr4/runtime/semantic_context'
3
+
4
+ module Antlr4::Runtime
5
+
6
+ class LexerATNConfig < ATNConfig
7
+ attr_accessor :passed_through_non_greedy_decision
8
+ attr_accessor :lexer_action_executor
9
+
10
+ def initialize
11
+ super
12
+ @passed_through_non_greedy_decision = false
13
+ @lexer_action_executor = nil
14
+ end
15
+
16
+ def lexer_atn_config1(state, alt, context)
17
+ atn_config2(state, alt, context, SemanticContext::NONE)
18
+ @passed_through_non_greedy_decision = false
19
+ @lexer_action_executor = nil
20
+ end
21
+
22
+ def lexer_atn_config2(state, alt, context, lexer_action_executor)
23
+ atn_config7(state, alt, context, SemanticContext::NONE)
24
+ @lexer_action_executor = lexer_action_executor
25
+ @passed_through_non_greedy_decision = false
26
+ end
27
+
28
+ def lexer_atn_config3(c, state)
29
+ atn_config7(c, state, c.context, c.semantic_context)
30
+ @lexer_action_executor = c.lexer_action_executor
31
+ @passed_through_non_greedy_decision = check_non_greedy_decision(c, state)
32
+ end
33
+
34
+ def lexer_atn_config4(c, state, lexer_action_executor)
35
+ atn_config7(c, state, c.context, c.semantic_context)
36
+ @lexer_action_executor = lexer_action_executor
37
+ @passed_through_non_greedy_decision = check_non_greedy_decision(c, state)
38
+ end
39
+
40
+ def lexer_atn_config5(c, state, context)
41
+ atn_config7(c, state, context, c.semantic_context)
42
+ @lexer_action_executor = c.lexer_action_executor
43
+ @passed_through_non_greedy_decision = check_non_greedy_decision(c, state)
44
+ end
45
+
46
+ def check_non_greedy_decision(source, target)
47
+ source.passed_through_non_greedy_decision || target.is_a?(DecisionState) && target.non_greedy
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,522 @@
1
+ require 'antlr4/runtime/atn_simulator'
2
+ require 'antlr4/runtime/lexer_atn_config'
3
+ require 'antlr4/runtime/ordered_atn_config_set'
4
+ require 'antlr4/runtime/lexer_action_executor'
5
+
6
+ module Antlr4::Runtime
7
+
8
+ class LexerATNSimulator < ATNSimulator
9
+ MIN_DFA_EDGE = 0
10
+ MAX_DFA_EDGE = 127 # forces unicode to stay in ATN
11
+
12
+
13
+ class << self
14
+ attr_reader :debug
15
+ attr_reader :empty
16
+ @@empty = EmptyPredictionContext.new(Integer::MAX)
17
+ end
18
+
19
+ class SimState
20
+ attr_accessor :index
21
+ attr_accessor :line
22
+ attr_accessor :char_pos
23
+ attr_accessor :dfa_state
24
+
25
+ def reset
26
+ @index = -1
27
+ @line = 0
28
+ @char_pos = -1
29
+ @dfa_state = nil
30
+ end
31
+ end
32
+
33
+ attr_reader :char_position_in_line
34
+ attr_reader :line
35
+
36
+ def initialize(recog, atn, decision_to_dfa, shared_context_cache)
37
+ super(atn, shared_context_cache)
38
+ @@debug = false
39
+ @dfa_debug = false
40
+
41
+ @decision_to_dfa = decision_to_dfa
42
+ @recog = recog
43
+ @start_index = -1
44
+ @line = 1
45
+ @char_position_in_line = 0
46
+ @mode = Lexer::DEFAULT_MODE
47
+ @prev_accept = SimState.new
48
+ @match_calls = 0
49
+ end
50
+
51
+ def copy_state(simulator)
52
+ @char_position_in_line = simulator.char_position_in_line
53
+ @line = simulator.line
54
+ @mode = simulator.mode
55
+ @start_index = simulator.start_index
56
+ end
57
+
58
+ def match(input, mode)
59
+ @match_calls += 1
60
+ @mode = mode
61
+ mark = input.mark
62
+
63
+ begin
64
+ @start_index = input.index
65
+ @prev_accept.reset
66
+ dfa = @decision_to_dfa[mode]
67
+ if dfa.s0.nil?
68
+ return match_atn(input)
69
+ else
70
+ return exec_atn(input, dfa.s0)
71
+ end
72
+ ensure
73
+ input.release(mark)
74
+ end
75
+ end
76
+
77
+ def reset
78
+ @prev_accept.reset
79
+ @start_index = -1
80
+ @line = 1
81
+ @char_position_in_line = 0
82
+ @mode = Lexer.DEFAULT_MODE
83
+ end
84
+
85
+ def clear_dfa
86
+ d = 0
87
+ while d < @decision_to_dfa.length
88
+ @decision_to_dfa[d] = DFA.new(atn.decision_state(d), d)
89
+ d += 1
90
+ end
91
+ end
92
+
93
+ def match_atn(input)
94
+ start_state = atn.mode_to_start_state[@mode]
95
+
96
+ printf format("matchATN mode %d start: %s\n", @mode, start_state) if @@debug
97
+
98
+ old_mode = @mode
99
+
100
+ s0_closure = compute_start_state(input, start_state)
101
+ suppress_edge = s0_closure.has_semantic_context
102
+ s0_closure.has_semantic_context = false
103
+
104
+ next_state = add_dfa_state(s0_closure)
105
+ @decision_to_dfa[@mode].s0 = next_state unless suppress_edge
106
+
107
+ predict = exec_atn(input, next_state)
108
+
109
+ if @@debug
110
+ printf format("DFA after matchATN: %s\n", @decision_to_dfa[old_mode].to_lexer_string)
111
+ end
112
+
113
+ predict
114
+ end
115
+
116
+ def exec_atn(input, ds0)
117
+ printf format("start state closure=%s\n", ds0.configs) if @@debug
118
+
119
+ if ds0.is_accept_state
120
+ # allow zero-length tokens
121
+ capture_sim_state(@prev_accept, input, ds0)
122
+ end
123
+
124
+ t = input.la(1)
125
+
126
+ s = ds0 # s is current/from DFA state
127
+
128
+ loop do # while more work
129
+ printf format("execATN loop starting closure: %s\n", s.configs) if @@debug
130
+
131
+ # As we move src->trg, src->trg, we keep track of the previous trg to
132
+ # avoid looking up the DFA state again, which is expensive.
133
+ # If the previous target was already part of the DFA, we might
134
+ # be able to avoid doing a reach operation upon t. If s!=nil,
135
+ # it means that semantic predicates didn't prevent us from
136
+ # creating a DFA state. Once we know s!=nil, we check to see if
137
+ # the DFA state has an edge already for t. If so, we can just reuse
138
+ # it's configuration set there's no point in re-computing it.
139
+ # This is kind of like doing DFA simulation within the ATN
140
+ # simulation because DFA simulation is really just a way to avoid
141
+ # computing reach/closure sets. Technically, once we know that
142
+ # we have a previously added DFA state, we could jump over to
143
+ # the DFA simulator. But, that would mean popping back and forth
144
+ # a lot and making things more complicated algorithmically.
145
+ # This optimization makes a lot of sense for loops within DFA.
146
+ # A character will take us back to an existing DFA state
147
+ # that already has lots of edges out of it. e.g., .* in comments.
148
+ target = existing_target_state(s, t)
149
+ target = compute_target_state(input, s, t) if target.nil?
150
+
151
+ break if target == @@error
152
+
153
+ # If this is a consumable input element, make sure to consume before
154
+ # capturing the accept state so the input index, line, and char
155
+ # position accurately reflect the state of the interpreter at the
156
+ # end of the token.
157
+ consume(input) if t != IntStream::EOF
158
+
159
+ if target.is_accept_state
160
+ capture_sim_state(@prev_accept, input, target)
161
+ break if t == IntStream::EOF
162
+ end
163
+
164
+ t = input.la(1)
165
+ s = target # flip current DFA target becomes new src/from state
166
+ end
167
+
168
+ fail_or_accept(@prev_accept, input, s.configs, t)
169
+ end
170
+
171
+ def existing_target_state(s, t)
172
+ return nil if s.edges.nil? || t < MIN_DFA_EDGE || t > MAX_DFA_EDGE
173
+
174
+ target = s.edges[t - MIN_DFA_EDGE]
175
+ if @@debug && !target.nil?
176
+ puts 'reuse state ' + s.state_number.to_s + ' edge to ' + target.state_number.to_s
177
+ end
178
+
179
+ target
180
+ end
181
+
182
+ def compute_target_state(input, s, t)
183
+ reach = OrderedATNConfigSet.new
184
+
185
+ # if we don't find an existing DFA state
186
+ # Fill reach starting from closure, following t transitions
187
+ reachable_config_set(input, s.configs, reach, t)
188
+
189
+ if reach.empty? # we got nowhere on t from s
190
+ unless reach.has_semantic_context
191
+ # we got nowhere on t, don't throw out this knowledge it'd
192
+ # cause a failover from DFA later.
193
+ add_dfa_edge_dfastate_dfastate(s, t, @@error)
194
+ end
195
+
196
+ # stop when we can't match any more char
197
+ return @@error
198
+ end
199
+
200
+ # Add an edge from s to target DFA found/created for reach
201
+ add_dfa_edge_dfastate_atnconfigset(s, t, reach)
202
+ end
203
+
204
+ def fail_or_accept(prev_accept, input, _reach, t)
205
+ if !prev_accept.dfa_state.nil?
206
+ lexer_action_executor = prev_accept.dfa_state.lexer_action_executor
207
+ accept(input, lexer_action_executor, @start_index, prev_accept.index, prev_accept.line, prev_accept.char_pos)
208
+ prev_accept.dfa_state.prediction
209
+ else # if no accept and EOF is first char, return EOF
210
+ return Token::EOF if t == IntStream::EOF && input.index == @start_index
211
+
212
+ raise LexerNoViableAltException, @recog
213
+ end
214
+ end
215
+
216
+ def reachable_config_set(input, closure, reach, t) # this is used to skip processing for configs which have a lower priority
217
+ # than a config that already reached an accept state for the same rule
218
+ skip_alt = ATN::INVALID_ALT_NUMBER
219
+ i = 0
220
+ while i < closure.configs.length
221
+ c = closure.configs[i]
222
+ current_alt_reached_accept_state = (c.alt == skip_alt)
223
+ if current_alt_reached_accept_state && c.passed_through_non_greedy_decision
224
+ i += 1
225
+ next
226
+ end
227
+
228
+ if @@debug
229
+ printf format("testing %s at %s\n", token_name(t), c.to_s2(@recog, true))
230
+ end
231
+
232
+ n = c.state.number_of_transitions
233
+ ti = 0
234
+ while ti < n # for each transition
235
+ trans = c.state.transition(ti)
236
+ target = reachable_target(trans, t)
237
+ unless target.nil?
238
+ lexer_action_executor = c.lexer_action_executor
239
+ unless lexer_action_executor.nil?
240
+ lexer_action_executor = lexer_action_executor.fix_offset_before_match(input.index - start_index)
241
+ end
242
+
243
+ treat_eof_as_epsilon = (t == CharStream::EOF)
244
+ cfg = LexerATNConfig.new
245
+ cfg.lexer_atn_config4(c, target, lexer_action_executor)
246
+ if closure(input, cfg, reach, current_alt_reached_accept_state, true, treat_eof_as_epsilon)
247
+ # any remaining configs for this alt have a lower priority than
248
+ # the one that just reached an accept state.
249
+ skip_alt = c.alt
250
+ break
251
+ end
252
+ end
253
+ ti += 1
254
+ end
255
+ i += 1
256
+ end
257
+ end
258
+
259
+ def accept(input, lexer_action_executor, start_index, index, line, char_pos)
260
+ printf format("ACTION %s\n", lexer_action_executor) if @@debug
261
+
262
+ # seek to after last char in token
263
+ input.seek(index)
264
+ @line = line
265
+ @char_position_in_line = char_pos
266
+
267
+ if !lexer_action_executor.nil? && !@recog.nil?
268
+ lexer_action_executor.execute(@recog, input, start_index)
269
+ end
270
+ end
271
+
272
+ def reachable_target(trans, t)
273
+ if trans.matches(t, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)
274
+ return trans.target
275
+ end
276
+
277
+ nil
278
+ end
279
+
280
+ def compute_start_state(input, p)
281
+ initial_context = @@empty
282
+ configs = ATNConfigSet.new
283
+ i = 0
284
+ while i < p.number_of_transitions
285
+ target = p.transition(i).target
286
+ c = LexerATNConfig.new
287
+ c.lexer_atn_config1(target, i + 1, initial_context)
288
+ closure(input, c, configs, false, false, false)
289
+ i += 1
290
+ end
291
+ configs
292
+ end
293
+
294
+ def closure(input, config, configs, current_alt_reached_accept_state, speculative, treat_eof_as_epsilon)
295
+ if config.state.is_a? RuleStopState
296
+ if @@debug
297
+ if !@recog.nil?
298
+ printf format("closure at %s rule stop %s\n", @recog.rule_names[config.state.rule_index], config)
299
+ else
300
+ printf format("closure at rule stop %s\n", config)
301
+ end
302
+ end
303
+
304
+ if config.context.nil? || config.context.empty_path?
305
+ if config.context.nil? || config.context.empty?
306
+ configs.add(config)
307
+ return true
308
+ else
309
+ configs.add(LexerATNConfig.create_from_config2(config, config.state, EmptyPredictionContext::EMPTY))
310
+ current_alt_reached_accept_state = true
311
+ end
312
+ end
313
+
314
+ if !config.context.nil? && !config.context.empty?
315
+ i = 0
316
+ while i < config.context.size
317
+ if config.context.get_return_state(i) != PredictionContext::EMPTY_RETURN_STATE
318
+ new_context = config.context.get_parent(i) # "pop" return state
319
+ return_state = atn.states[config.context.get_return_state(i)]
320
+ c = LexerATNConfig.new
321
+ c.lexer_atn_config5(config, return_state, new_context)
322
+ current_alt_reached_accept_state = closure(input, c, configs, current_alt_reached_accept_state, speculative, treat_eof_as_epsilon)
323
+ end
324
+ i += 1
325
+ end
326
+ end
327
+
328
+ return current_alt_reached_accept_state
329
+ end
330
+
331
+ # optimization
332
+ unless config.state.only_has_epsilon_transitions
333
+ if !current_alt_reached_accept_state || !config.passed_through_non_greedy_decision
334
+ configs.add(config)
335
+ end
336
+ end
337
+
338
+ p = config.state
339
+ i = 0
340
+ while i < p.number_of_transitions
341
+ t = p.transition(i)
342
+ c = epsilon_target(input, config, t, configs, speculative, treat_eof_as_epsilon)
343
+ unless c.nil?
344
+ current_alt_reached_accept_state = closure(input, c, configs, current_alt_reached_accept_state, speculative, treat_eof_as_epsilon)
345
+ end
346
+ i += 1
347
+ end
348
+
349
+ current_alt_reached_accept_state
350
+ end
351
+
352
+ # side-effect: can alter configs.hasSemanticContext
353
+
354
+ def epsilon_target(input, config, t, configs, speculative, treat_eof_as_epsilon)
355
+ c = nil
356
+ case t.serialization_type
357
+ when Transition::RULE
358
+ rule_transition = t
359
+ new_context = SingletonPredictionContext.new(config.context, rule_transition.follow_state.state_number)
360
+ c = LexerATNConfig.new
361
+ c.lexer_atn_config5(config, t.target, new_context)
362
+
363
+ when Transition::PRECEDENCE
364
+
365
+ raise UnsupportedOperationException, 'Precedence predicates are not supported in lexers.'
366
+
367
+ when Transition::PREDICATE
368
+ pt = t
369
+ puts('EVAL rule ' + pt.rule_index + ':' + pt.pred_index) if @@debug
370
+ configs.has_semantic_context = true
371
+ if evaluate_predicate(input, pt.rule_index, pt.pred_index, speculative)
372
+ c = LexerATNConfig.create_from_config(config, t.target)
373
+ end
374
+
375
+ when Transition::ACTION
376
+
377
+ if config.context.nil? || config.context.empty_path?
378
+ # execute actions anywhere in the start rule for a token.
379
+ #
380
+ # TODO: if the entry rule is invoked recursively, some
381
+ # actions may be executed during the recursive call. The
382
+ # problem can appear when hasEmptyPath() is true but
383
+ # isEmpty() is false. In this case, the config needs to be
384
+ # split into two contexts - one with just the empty path
385
+ # and another with everything but the empty path.
386
+ # Unfortunately, the current algorithm does not allow
387
+ # getEpsilonTarget to return two configurations, so
388
+ # additional modifications are needed before we can support
389
+ # the split operation.
390
+ lexer_action_executor = LexerActionExecutor.append(config.lexer_action_executor, @atn._a[t.action_index])
391
+ c = LexerATNConfig.new
392
+ c.lexer_atn_config4(config, t.target, lexer_action_executor)
393
+ else # ignore actions in referenced rules
394
+ c = LexerATNConfig.new
395
+ c.lexer_atn_config3(config, t.target)
396
+ end
397
+
398
+ when Transition::EPSILON
399
+ c = LexerATNConfig.new
400
+ c.lexer_atn_config3(config, t.target)
401
+ when Transition::ATOM, Transition::RANGE, Transition::SET
402
+ if treat_eof_as_epsilon
403
+ if t.matches(CharStream.EOF, Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE)
404
+ c = LexerATNConfig.create_from_config(config, t.target)
405
+ end
406
+ end
407
+
408
+ else
409
+ # empty
410
+ end
411
+
412
+ c
413
+ end
414
+
415
+ def evaluate_predicate(input, rule_index, pred_index, speculative) # assume true if no recognizer was provided
416
+ return true if @recog.nil?
417
+
418
+ return @recog.sempred(nil, rule_index, pred_index) unless speculative
419
+
420
+ saved_char_position_in_line = @char_position_in_line
421
+ saved_line = @line
422
+ index = input.index
423
+ marker = input.mark
424
+ begin
425
+ consume(input)
426
+ return @recog.sempred(nil, rule_index, pred_index)
427
+ ensure
428
+ @char_position_in_line = saved_char_position_in_line
429
+ @line = saved_line
430
+ input.seek(index)
431
+ input.release(marker)
432
+ end
433
+ end
434
+
435
+ def capture_sim_state(settings, input, dfa_state)
436
+ settings.index = input.index
437
+ settings.line = @line
438
+ settings.char_pos = @char_position_in_line
439
+ settings.dfa_state = dfa_state
440
+ end
441
+
442
+ def add_dfa_edge_dfastate_atnconfigset(from, t, q)
443
+ suppress_edge = q.has_semantic_context
444
+ q.has_semantic_context = false
445
+
446
+ to = add_dfa_state(q)
447
+
448
+ return to if suppress_edge
449
+
450
+ add_dfa_edge_dfastate_dfastate(from, t, to)
451
+ to
452
+ end
453
+
454
+ def add_dfa_edge_dfastate_dfastate(p, t, q)
455
+ if t < MIN_DFA_EDGE || t > MAX_DFA_EDGE
456
+ # Only track edges within the DFA bounds
457
+ return
458
+ end
459
+
460
+ if @@debug
461
+ message = 'EDGE ' << p.to_s << ' -> ' << q.to_s << ' upon ' << token_name(t)
462
+ puts(message)
463
+ end
464
+
465
+ if p.edges.nil?
466
+ # make room for tokens 1..n and -1 masquerading as index 0
467
+ p.edges = []
468
+ end
469
+ p.edges[t - MIN_DFA_EDGE] = q # connect
470
+ end
471
+
472
+ def add_dfa_state(configs)
473
+ proposed = DFAState.new(configs)
474
+ first_config_with_rule_stop_state = configs.find_first_rule_stop_state
475
+
476
+ unless first_config_with_rule_stop_state.nil?
477
+ proposed.is_accept_state = true
478
+ proposed.lexer_action_executor = first_config_with_rule_stop_state.lexer_action_executor
479
+ proposed.prediction = atn.rule_to_token_type[first_config_with_rule_stop_state.state.rule_index]
480
+ end
481
+
482
+ dfa = @decision_to_dfa[@mode]
483
+
484
+ existing = dfa.states[proposed]
485
+ return existing unless existing.nil?
486
+
487
+ new_state = proposed
488
+
489
+ new_state.state_number = dfa.states.size
490
+ configs.readonly = true
491
+ new_state.configs = configs
492
+ dfa.states[new_state] = new_state
493
+ new_state
494
+ end
495
+
496
+ def dfa(mode)
497
+ @decision_to_dfa[mode]
498
+ end
499
+
500
+ def text(input) # index is first lookahead char, don' t include.
501
+ input.text(Interval.of(start_index, input.index - 1))
502
+ end
503
+
504
+ def consume(input)
505
+ cur_char = input.la(1)
506
+ if cur_char == 10 # newline
507
+ @line += 1
508
+ @char_position_in_line = 0
509
+ else
510
+ @char_position_in_line += 1
511
+ end
512
+ input.consume
513
+ end
514
+
515
+ def token_name(t)
516
+ return 'EOF' if t == -1
517
+
518
+ # if ( atn.g!=nil ) return atn.g.getTokenDisplayName(t)
519
+ "'" + t.to_s + "'"
520
+ end
521
+ end
522
+ end