antlr4-runtime 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/.rspec +3 -0
- data/.travis.yml +7 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +35 -0
- data/LICENSE.txt +21 -0
- data/README.md +65 -0
- data/Rakefile +6 -0
- data/antlr4-runtime.gemspec +30 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/ext/rumourhash/Makefile +264 -0
- data/ext/rumourhash/extconf.rb +3 -0
- data/ext/rumourhash/rumourhash.c +59 -0
- data/lib/antlr4/runtime.rb +37 -0
- data/lib/antlr4/runtime/abstract_parse_tree_visitor.rb +43 -0
- data/lib/antlr4/runtime/abstract_predicate_transition.rb +11 -0
- data/lib/antlr4/runtime/action_transition.rb +29 -0
- data/lib/antlr4/runtime/ambiguity_info.rb +10 -0
- data/lib/antlr4/runtime/antlr_error_listener.rb +15 -0
- data/lib/antlr4/runtime/antlr_error_strategy.rb +24 -0
- data/lib/antlr4/runtime/antlr_file_stream.rb +17 -0
- data/lib/antlr4/runtime/antlr_input_stream.rb +6 -0
- data/lib/antlr4/runtime/array_2d_hash_set.rb +471 -0
- data/lib/antlr4/runtime/array_prediction_context.rb +76 -0
- data/lib/antlr4/runtime/atn.rb +100 -0
- data/lib/antlr4/runtime/atn_config.rb +140 -0
- data/lib/antlr4/runtime/atn_config_set.rb +150 -0
- data/lib/antlr4/runtime/atn_deserialization_options.rb +48 -0
- data/lib/antlr4/runtime/atn_deserializer.rb +737 -0
- data/lib/antlr4/runtime/atn_simulator.rb +69 -0
- data/lib/antlr4/runtime/atn_state.rb +118 -0
- data/lib/antlr4/runtime/atn_type.rb +8 -0
- data/lib/antlr4/runtime/atom_transition.rb +27 -0
- data/lib/antlr4/runtime/bail_error_strategy.rb +31 -0
- data/lib/antlr4/runtime/base_error_listener.rb +18 -0
- data/lib/antlr4/runtime/basic_block_start_state.rb +12 -0
- data/lib/antlr4/runtime/basic_state.rb +11 -0
- data/lib/antlr4/runtime/bit_set.rb +54 -0
- data/lib/antlr4/runtime/block_end_state.rb +15 -0
- data/lib/antlr4/runtime/block_start_state.rb +12 -0
- data/lib/antlr4/runtime/buffered_token_stream.rb +335 -0
- data/lib/antlr4/runtime/char_stream.rb +6 -0
- data/lib/antlr4/runtime/char_streams.rb +12 -0
- data/lib/antlr4/runtime/chunk.rb +4 -0
- data/lib/antlr4/runtime/code_point_char_stream.rb +83 -0
- data/lib/antlr4/runtime/common_token.rb +125 -0
- data/lib/antlr4/runtime/common_token_factory.rb +30 -0
- data/lib/antlr4/runtime/common_token_stream.rb +63 -0
- data/lib/antlr4/runtime/console_error_listener.rb +12 -0
- data/lib/antlr4/runtime/context_sensitivity_info.rb +7 -0
- data/lib/antlr4/runtime/decision_event_info.rb +19 -0
- data/lib/antlr4/runtime/decision_info.rb +36 -0
- data/lib/antlr4/runtime/decision_state.rb +15 -0
- data/lib/antlr4/runtime/default_error_strategy.rb +314 -0
- data/lib/antlr4/runtime/dfa.rb +97 -0
- data/lib/antlr4/runtime/dfa_serializer.rb +62 -0
- data/lib/antlr4/runtime/dfa_state.rb +109 -0
- data/lib/antlr4/runtime/diagnostic_error_listener.rb +58 -0
- data/lib/antlr4/runtime/double_key_map.rb +49 -0
- data/lib/antlr4/runtime/empty_prediction_context.rb +35 -0
- data/lib/antlr4/runtime/epsilon_transition.rb +27 -0
- data/lib/antlr4/runtime/equality_comparator.rb +4 -0
- data/lib/antlr4/runtime/error_info.rb +7 -0
- data/lib/antlr4/runtime/error_node.rb +5 -0
- data/lib/antlr4/runtime/error_node_impl.rb +12 -0
- data/lib/antlr4/runtime/failed_predicate_exception.rb +33 -0
- data/lib/antlr4/runtime/flexible_hash_map.rb +232 -0
- data/lib/antlr4/runtime/input_mismatch_exception.rb +20 -0
- data/lib/antlr4/runtime/int_stream.rb +31 -0
- data/lib/antlr4/runtime/integer.rb +14 -0
- data/lib/antlr4/runtime/interval.rb +111 -0
- data/lib/antlr4/runtime/interval_set.rb +540 -0
- data/lib/antlr4/runtime/lexer.rb +257 -0
- data/lib/antlr4/runtime/lexer_action.rb +12 -0
- data/lib/antlr4/runtime/lexer_action_executor.rb +75 -0
- data/lib/antlr4/runtime/lexer_action_type.rb +12 -0
- data/lib/antlr4/runtime/lexer_atn_config.rb +50 -0
- data/lib/antlr4/runtime/lexer_atn_simulator.rb +522 -0
- data/lib/antlr4/runtime/lexer_channel_action.rb +51 -0
- data/lib/antlr4/runtime/lexer_custom_action.rb +49 -0
- data/lib/antlr4/runtime/lexer_dfa_serializer.rb +12 -0
- data/lib/antlr4/runtime/lexer_indexed_custom_action.rb +49 -0
- data/lib/antlr4/runtime/lexer_mode_action.rb +51 -0
- data/lib/antlr4/runtime/lexer_more_action.rb +41 -0
- data/lib/antlr4/runtime/lexer_no_viable_alt_exception.rb +4 -0
- data/lib/antlr4/runtime/lexer_pop_mode_action.rb +41 -0
- data/lib/antlr4/runtime/lexer_push_mode_action.rb +51 -0
- data/lib/antlr4/runtime/lexer_skip_action.rb +43 -0
- data/lib/antlr4/runtime/lexer_type_action.rb +51 -0
- data/lib/antlr4/runtime/ll1_analyzer.rb +133 -0
- data/lib/antlr4/runtime/lookahead_event_info.rb +10 -0
- data/lib/antlr4/runtime/loop_end_state.rb +15 -0
- data/lib/antlr4/runtime/murmur_hash.rb +99 -0
- data/lib/antlr4/runtime/no_viable_alt_exception.rb +7 -0
- data/lib/antlr4/runtime/not_set_transition.rb +20 -0
- data/lib/antlr4/runtime/object_equality_comparator.rb +18 -0
- data/lib/antlr4/runtime/ordered_atn_config_set.rb +15 -0
- data/lib/antlr4/runtime/parse_cancellation_exception.rb +5 -0
- data/lib/antlr4/runtime/parse_tree.rb +7 -0
- data/lib/antlr4/runtime/parse_tree_listener.rb +4 -0
- data/lib/antlr4/runtime/parse_tree_visitor.rb +4 -0
- data/lib/antlr4/runtime/parser.rb +522 -0
- data/lib/antlr4/runtime/parser_atn_simulator.rb +1171 -0
- data/lib/antlr4/runtime/parser_rule_context.rb +186 -0
- data/lib/antlr4/runtime/plus_block_start_state.rb +11 -0
- data/lib/antlr4/runtime/plus_loopback_state.rb +12 -0
- data/lib/antlr4/runtime/precedence_predicate_transition.rb +31 -0
- data/lib/antlr4/runtime/predicate.rb +6 -0
- data/lib/antlr4/runtime/predicate_eval_info.rb +16 -0
- data/lib/antlr4/runtime/predicate_transition.rb +35 -0
- data/lib/antlr4/runtime/prediction_context.rb +103 -0
- data/lib/antlr4/runtime/prediction_context_cache.rb +28 -0
- data/lib/antlr4/runtime/prediction_context_utils.rb +407 -0
- data/lib/antlr4/runtime/prediction_mode.rb +213 -0
- data/lib/antlr4/runtime/profiling_atn_simulator.rb +149 -0
- data/lib/antlr4/runtime/proxy_error_listener.rb +33 -0
- data/lib/antlr4/runtime/range_transition.rb +29 -0
- data/lib/antlr4/runtime/recognition_exception.rb +17 -0
- data/lib/antlr4/runtime/recognizer.rb +136 -0
- data/lib/antlr4/runtime/rule_context.rb +131 -0
- data/lib/antlr4/runtime/rule_context_with_alt_num.rb +11 -0
- data/lib/antlr4/runtime/rule_node.rb +8 -0
- data/lib/antlr4/runtime/rule_start_state.rb +17 -0
- data/lib/antlr4/runtime/rule_stop_state.rb +12 -0
- data/lib/antlr4/runtime/rule_tag_token.rb +64 -0
- data/lib/antlr4/runtime/rule_transition.rb +29 -0
- data/lib/antlr4/runtime/semantic_context.rb +313 -0
- data/lib/antlr4/runtime/set_transition.rb +29 -0
- data/lib/antlr4/runtime/singleton_prediction_context.rb +56 -0
- data/lib/antlr4/runtime/star_block_start_state.rb +12 -0
- data/lib/antlr4/runtime/star_loop_entry_state.rb +17 -0
- data/lib/antlr4/runtime/star_loopback_state.rb +16 -0
- data/lib/antlr4/runtime/syntax_tree.rb +6 -0
- data/lib/antlr4/runtime/tag_chunk.rb +22 -0
- data/lib/antlr4/runtime/terminal_node.rb +5 -0
- data/lib/antlr4/runtime/terminal_node_impl.rb +50 -0
- data/lib/antlr4/runtime/text_chunk.rb +16 -0
- data/lib/antlr4/runtime/token.rb +13 -0
- data/lib/antlr4/runtime/token_stream.rb +13 -0
- data/lib/antlr4/runtime/token_tag_token.rb +22 -0
- data/lib/antlr4/runtime/tokens_start_state.rb +14 -0
- data/lib/antlr4/runtime/transition.rb +51 -0
- data/lib/antlr4/runtime/tree.rb +4 -0
- data/lib/antlr4/runtime/trees.rb +195 -0
- data/lib/antlr4/runtime/triple.rb +40 -0
- data/lib/antlr4/runtime/utils.rb +117 -0
- data/lib/antlr4/runtime/uuid.rb +46 -0
- data/lib/antlr4/runtime/version.rb +5 -0
- data/lib/antlr4/runtime/vocabulary.rb +12 -0
- data/lib/antlr4/runtime/vocabulary_impl.rb +82 -0
- data/lib/antlr4/runtime/wildcard_transition.rb +20 -0
- data/lib/antlr4/runtime/writable_token.rb +7 -0
- metadata +243 -0
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
require 'antlr4/runtime/recognizer'
|
|
2
|
+
require 'antlr4/runtime/token'
|
|
3
|
+
require 'antlr4/runtime/common_token_factory'
|
|
4
|
+
require 'antlr4/runtime/lexer_no_viable_alt_exception'
|
|
5
|
+
|
|
6
|
+
module Antlr4::Runtime
|
|
7
|
+
|
|
8
|
+
class Lexer < Recognizer
|
|
9
|
+
DEFAULT_MODE = 0
|
|
10
|
+
MORE = -2
|
|
11
|
+
SKIP = -3
|
|
12
|
+
|
|
13
|
+
DEFAULT_TOKEN_CHANNEL = Token::DEFAULT_CHANNEL
|
|
14
|
+
HIDDEN = Token::HIDDEN_CHANNEL
|
|
15
|
+
MIN_CHAR_VALUE = 0x0000
|
|
16
|
+
MAX_CHAR_VALUE = 0x10FFFF
|
|
17
|
+
|
|
18
|
+
attr_accessor :_input
|
|
19
|
+
attr_accessor :token
|
|
20
|
+
attr_accessor :_token_start_char_index
|
|
21
|
+
attr_accessor :_token_start_line
|
|
22
|
+
attr_accessor :_token_start_char_position_in_line
|
|
23
|
+
attr_accessor :_hit_eof
|
|
24
|
+
attr_accessor :_channel
|
|
25
|
+
attr_accessor :_type
|
|
26
|
+
attr_accessor :_mode_stack
|
|
27
|
+
attr_accessor :_mode
|
|
28
|
+
attr_accessor :_text
|
|
29
|
+
|
|
30
|
+
def reset
|
|
31
|
+
# wack Lexer state variables
|
|
32
|
+
unless @_input.nil?
|
|
33
|
+
@_input.seek(0) # rewind the input
|
|
34
|
+
end
|
|
35
|
+
@_token = nil
|
|
36
|
+
@_type = Token::INVALID_TYPE
|
|
37
|
+
@_channel = Token::DEFAULT_CHANNEL
|
|
38
|
+
@_token_start_char_index = -1
|
|
39
|
+
@_token_start_char_position_in_line = -1
|
|
40
|
+
@_token_start_line = -1
|
|
41
|
+
@_text = nil
|
|
42
|
+
|
|
43
|
+
@_hit_eof = false
|
|
44
|
+
@_mode = DEFAULT_MODE
|
|
45
|
+
@_mode_stack.clear
|
|
46
|
+
|
|
47
|
+
@_interp.reset unless @_interp.nil?
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def initialize(input = nil)
|
|
51
|
+
super()
|
|
52
|
+
unless input.nil?
|
|
53
|
+
@_input = input
|
|
54
|
+
@_token_factory_source_pair = OpenStruct.new
|
|
55
|
+
@_token_factory_source_pair.a = self
|
|
56
|
+
@_token_factory_source_pair.b = input
|
|
57
|
+
end
|
|
58
|
+
@_mode_stack = []
|
|
59
|
+
reset
|
|
60
|
+
@_factory = CommonTokenFactory.instance
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def next_token
|
|
64
|
+
if @_input.nil?
|
|
65
|
+
raise IllegalStateException, 'nextToken requires a non-nil input stream.'
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Mark start location in char stream so unbuffered streams are
|
|
69
|
+
# guaranteed at least have text of current token
|
|
70
|
+
token_start_marker = @_input.mark
|
|
71
|
+
begin
|
|
72
|
+
repeat_outer = true
|
|
73
|
+
repeat_outer = next_token_inner while repeat_outer
|
|
74
|
+
return @_token
|
|
75
|
+
ensure # make sure we release marker after match or
|
|
76
|
+
# unbuffered char stream will keep buffering
|
|
77
|
+
@_input.release(token_start_marker)
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def next_token_inner
|
|
82
|
+
loop do
|
|
83
|
+
if @_hit_eof
|
|
84
|
+
emit_eof
|
|
85
|
+
return false
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
@_token = nil
|
|
89
|
+
@_channel = Token::DEFAULT_CHANNEL
|
|
90
|
+
@_token_start_char_index = @_input.index
|
|
91
|
+
@_token_start_char_position_in_line = @_interp.char_position_in_line
|
|
92
|
+
@_token_start_line = @_interp.line
|
|
93
|
+
@_text = nil
|
|
94
|
+
loop do
|
|
95
|
+
@_type = Token::INVALID_TYPE
|
|
96
|
+
|
|
97
|
+
begin
|
|
98
|
+
ttype = @_interp.match(@_input, @_mode)
|
|
99
|
+
rescue LexerNoViableAltException => e
|
|
100
|
+
notify_listeners(e) # report error
|
|
101
|
+
recover1(e)
|
|
102
|
+
ttype = SKIP
|
|
103
|
+
end
|
|
104
|
+
@_hit_eof = true if @_input.la(1) == IntStream::EOF
|
|
105
|
+
@_type = ttype if @_type == Token::INVALID_TYPE
|
|
106
|
+
return true if @_type == SKIP
|
|
107
|
+
break if @_type != MORE
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
emit if @_token.nil?
|
|
111
|
+
return false
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def skip
|
|
116
|
+
@_type = SKIP
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def more
|
|
120
|
+
@_type = MORE
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def mode(m)
|
|
124
|
+
@_mode = m
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
def push_mode(m)
|
|
128
|
+
puts('pushMode ' + m) if LexerATNSimulator.debug
|
|
129
|
+
@_mode_stack.push(@_mode)
|
|
130
|
+
mode(m)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def pop_mode
|
|
134
|
+
raise EmptyStackException if @_mode_stack.empty?
|
|
135
|
+
|
|
136
|
+
puts('popMode back to ' + @_mode_stack[-1]) if LexerATNSimulator.debug
|
|
137
|
+
mode(@_mode_stack.pop)
|
|
138
|
+
@_mode
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
def input_stream(input)
|
|
142
|
+
@_input = nil
|
|
143
|
+
@_token_factory_source_pair = OpenStruct.new
|
|
144
|
+
@_token_factory_source_pair.a = self
|
|
145
|
+
@_token_factory_source_pair.b = @_input
|
|
146
|
+
reset
|
|
147
|
+
@_input = input
|
|
148
|
+
@_token_factory_source_pair.a = self
|
|
149
|
+
@_token_factory_source_pair.b = @_input
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def source_name
|
|
153
|
+
@_input.get_source_name
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def emit(token = nil)
|
|
157
|
+
if !token.nil?
|
|
158
|
+
@_token = token
|
|
159
|
+
else
|
|
160
|
+
@_token = @_factory.create(@_token_factory_source_pair, @_type, @_text, @_channel, @_token_start_char_index, char_index - 1, @_token_start_line, @_token_start_char_position_in_line)
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
def emit_eof
|
|
165
|
+
cpos = char_position_in_line
|
|
166
|
+
eof = @_factory.create(@_token_factory_source_pair, Token::EOF, nil, Token::DEFAULT_CHANNEL, @_input.index, @_input.index - 1, line, cpos)
|
|
167
|
+
emit(eof)
|
|
168
|
+
eof
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def line
|
|
172
|
+
@_interp.line
|
|
173
|
+
end
|
|
174
|
+
|
|
175
|
+
def char_position_in_line
|
|
176
|
+
@_interp.char_position_in_line
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
def set_line(line)
|
|
180
|
+
@_interp.set_line(line)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def set_char_position_in_line(char_position_in_line)
|
|
184
|
+
@_interp.set_char_position_in_line(char_position_in_line)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def char_index
|
|
188
|
+
@_input.index
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
def text
|
|
192
|
+
return @_text unless @_text.nil?
|
|
193
|
+
|
|
194
|
+
@_interp.text(@_input)
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def all_tokens
|
|
198
|
+
tokens = []
|
|
199
|
+
t = next_token
|
|
200
|
+
while t.type != Token::EOF
|
|
201
|
+
tokens << t
|
|
202
|
+
t = next_token
|
|
203
|
+
end
|
|
204
|
+
tokens
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
def recover1(_e)
|
|
208
|
+
if @_input.la(1) != IntStream::EOF
|
|
209
|
+
# skip a char and begin again
|
|
210
|
+
@_interp.consume(@_input)
|
|
211
|
+
end
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
def notify_listeners(e)
|
|
215
|
+
text = @_input.text(Interval.of(@_token_start_char_index, @_input.index))
|
|
216
|
+
msg = "token recognition error at: '" + error_display(text) + "'"
|
|
217
|
+
|
|
218
|
+
listener = error_listener_dispatch
|
|
219
|
+
listener.syntax_error(self, nil, @_token_start_line, @_token_start_char_position_in_line, msg, e)
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
def error_display(s)
|
|
223
|
+
buf = ''
|
|
224
|
+
s.chars.each do |c|
|
|
225
|
+
buf << error_display_char(c)
|
|
226
|
+
end
|
|
227
|
+
buf
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
def error_display_char(c)
|
|
231
|
+
s = ''
|
|
232
|
+
s << c
|
|
233
|
+
case c
|
|
234
|
+
when Token::EOF
|
|
235
|
+
s = '<EOF>'
|
|
236
|
+
when '\n'
|
|
237
|
+
s = '\\n'
|
|
238
|
+
when '\t'
|
|
239
|
+
s = '\\t'
|
|
240
|
+
when '\r'
|
|
241
|
+
s = '\\r'
|
|
242
|
+
else
|
|
243
|
+
# type code here
|
|
244
|
+
end
|
|
245
|
+
s
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
def char_error_display(c)
|
|
249
|
+
s = error_display_char(c)
|
|
250
|
+
"'" + s + "'"
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def recover2(_re)
|
|
254
|
+
@_input.consume
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
end
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
require 'antlr4/runtime/lexer_indexed_custom_action'
|
|
2
|
+
|
|
3
|
+
module Antlr4::Runtime
|
|
4
|
+
|
|
5
|
+
class LexerActionExecutor
|
|
6
|
+
attr_reader :lexer_actions
|
|
7
|
+
attr_reader :hash_code
|
|
8
|
+
|
|
9
|
+
def initialize(lexer_actions)
|
|
10
|
+
@lexer_actions = lexer_actions
|
|
11
|
+
@hash_code = MurmurHash.hash_objs(lexer_actions)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def self.append(lexer_action_executor, lexer_action)
|
|
15
|
+
return LexerActionExecutor.new([lexer_action]) if lexer_action_executor.nil?
|
|
16
|
+
|
|
17
|
+
lexer_actions = lexer_action_executor.lexer_actions.dup
|
|
18
|
+
lexer_actions << lexer_action
|
|
19
|
+
LexerActionExecutor.new(lexer_actions)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def fix_offset_before_match(offset)
|
|
23
|
+
updated_lexer_actions = nil
|
|
24
|
+
i = 0
|
|
25
|
+
while i < @lexer_actions.length
|
|
26
|
+
if @lexer_actions[i].position_dependent? && !(@lexer_actions[i].is_a? LexerIndexedCustomAction)
|
|
27
|
+
updated_lexer_actions = @lexer_actions.dup if updated_lexer_actions.nil?
|
|
28
|
+
|
|
29
|
+
updated_lexer_actions[i] = LexerIndexedCustomAction.new(offset, @lexer_actions[i])
|
|
30
|
+
end
|
|
31
|
+
i += 1
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
return self if updated_lexer_actions.nil?
|
|
35
|
+
|
|
36
|
+
LexerActionExecutor.new(updated_lexer_actions)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def execute(lexer, input, start_index)
|
|
40
|
+
requires_seek = false
|
|
41
|
+
stop_index = input.index
|
|
42
|
+
begin
|
|
43
|
+
i = 0
|
|
44
|
+
while i < @lexer_actions.length
|
|
45
|
+
lexerAction = @lexer_actions[i]
|
|
46
|
+
if lexerAction.is_a? LexerIndexedCustomAction
|
|
47
|
+
offset = lexerAction.getOffset
|
|
48
|
+
input.seek(start_index + offset)
|
|
49
|
+
requires_seek = ((start_index + offset) != stop_index)
|
|
50
|
+
else
|
|
51
|
+
if lexerAction.position_dependent?
|
|
52
|
+
input.seek(stop_index)
|
|
53
|
+
requires_seek = false
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
lexerAction.execute(lexer)
|
|
57
|
+
end
|
|
58
|
+
i += 1
|
|
59
|
+
end
|
|
60
|
+
ensure
|
|
61
|
+
input.seek(stop_index) if requires_seek
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def eql?(obj)
|
|
66
|
+
if obj == self
|
|
67
|
+
return true
|
|
68
|
+
else
|
|
69
|
+
return false unless obj.is_a? LexerActionExecutor
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
@hash_code == obj.hash_code && (@lexer_actions == obj._a)
|
|
73
|
+
end
|
|
74
|
+
end
|
|
75
|
+
end
|
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
require 'antlr4/runtime/atn_config'
|
|
2
|
+
require 'antlr4/runtime/semantic_context'
|
|
3
|
+
|
|
4
|
+
module Antlr4::Runtime
|
|
5
|
+
|
|
6
|
+
class LexerATNConfig < ATNConfig
|
|
7
|
+
attr_accessor :passed_through_non_greedy_decision
|
|
8
|
+
attr_accessor :lexer_action_executor
|
|
9
|
+
|
|
10
|
+
def initialize
|
|
11
|
+
super
|
|
12
|
+
@passed_through_non_greedy_decision = false
|
|
13
|
+
@lexer_action_executor = nil
|
|
14
|
+
end
|
|
15
|
+
|
|
16
|
+
def lexer_atn_config1(state, alt, context)
|
|
17
|
+
atn_config2(state, alt, context, SemanticContext::NONE)
|
|
18
|
+
@passed_through_non_greedy_decision = false
|
|
19
|
+
@lexer_action_executor = nil
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def lexer_atn_config2(state, alt, context, lexer_action_executor)
|
|
23
|
+
atn_config7(state, alt, context, SemanticContext::NONE)
|
|
24
|
+
@lexer_action_executor = lexer_action_executor
|
|
25
|
+
@passed_through_non_greedy_decision = false
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def lexer_atn_config3(c, state)
|
|
29
|
+
atn_config7(c, state, c.context, c.semantic_context)
|
|
30
|
+
@lexer_action_executor = c.lexer_action_executor
|
|
31
|
+
@passed_through_non_greedy_decision = check_non_greedy_decision(c, state)
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def lexer_atn_config4(c, state, lexer_action_executor)
|
|
35
|
+
atn_config7(c, state, c.context, c.semantic_context)
|
|
36
|
+
@lexer_action_executor = lexer_action_executor
|
|
37
|
+
@passed_through_non_greedy_decision = check_non_greedy_decision(c, state)
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def lexer_atn_config5(c, state, context)
|
|
41
|
+
atn_config7(c, state, context, c.semantic_context)
|
|
42
|
+
@lexer_action_executor = c.lexer_action_executor
|
|
43
|
+
@passed_through_non_greedy_decision = check_non_greedy_decision(c, state)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def check_non_greedy_decision(source, target)
|
|
47
|
+
source.passed_through_non_greedy_decision || target.is_a?(DecisionState) && target.non_greedy
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
end
|
|
@@ -0,0 +1,522 @@
|
|
|
1
|
+
require 'antlr4/runtime/atn_simulator'
|
|
2
|
+
require 'antlr4/runtime/lexer_atn_config'
|
|
3
|
+
require 'antlr4/runtime/ordered_atn_config_set'
|
|
4
|
+
require 'antlr4/runtime/lexer_action_executor'
|
|
5
|
+
|
|
6
|
+
module Antlr4::Runtime
|
|
7
|
+
|
|
8
|
+
class LexerATNSimulator < ATNSimulator
|
|
9
|
+
MIN_DFA_EDGE = 0
|
|
10
|
+
MAX_DFA_EDGE = 127 # forces unicode to stay in ATN
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class << self
|
|
14
|
+
attr_reader :debug
|
|
15
|
+
attr_reader :empty
|
|
16
|
+
@@empty = EmptyPredictionContext.new(Integer::MAX)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
class SimState
|
|
20
|
+
attr_accessor :index
|
|
21
|
+
attr_accessor :line
|
|
22
|
+
attr_accessor :char_pos
|
|
23
|
+
attr_accessor :dfa_state
|
|
24
|
+
|
|
25
|
+
def reset
|
|
26
|
+
@index = -1
|
|
27
|
+
@line = 0
|
|
28
|
+
@char_pos = -1
|
|
29
|
+
@dfa_state = nil
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
attr_reader :char_position_in_line
|
|
34
|
+
attr_reader :line
|
|
35
|
+
|
|
36
|
+
def initialize(recog, atn, decision_to_dfa, shared_context_cache)
|
|
37
|
+
super(atn, shared_context_cache)
|
|
38
|
+
@@debug = false
|
|
39
|
+
@dfa_debug = false
|
|
40
|
+
|
|
41
|
+
@decision_to_dfa = decision_to_dfa
|
|
42
|
+
@recog = recog
|
|
43
|
+
@start_index = -1
|
|
44
|
+
@line = 1
|
|
45
|
+
@char_position_in_line = 0
|
|
46
|
+
@mode = Lexer::DEFAULT_MODE
|
|
47
|
+
@prev_accept = SimState.new
|
|
48
|
+
@match_calls = 0
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def copy_state(simulator)
|
|
52
|
+
@char_position_in_line = simulator.char_position_in_line
|
|
53
|
+
@line = simulator.line
|
|
54
|
+
@mode = simulator.mode
|
|
55
|
+
@start_index = simulator.start_index
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def match(input, mode)
|
|
59
|
+
@match_calls += 1
|
|
60
|
+
@mode = mode
|
|
61
|
+
mark = input.mark
|
|
62
|
+
|
|
63
|
+
begin
|
|
64
|
+
@start_index = input.index
|
|
65
|
+
@prev_accept.reset
|
|
66
|
+
dfa = @decision_to_dfa[mode]
|
|
67
|
+
if dfa.s0.nil?
|
|
68
|
+
return match_atn(input)
|
|
69
|
+
else
|
|
70
|
+
return exec_atn(input, dfa.s0)
|
|
71
|
+
end
|
|
72
|
+
ensure
|
|
73
|
+
input.release(mark)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def reset
|
|
78
|
+
@prev_accept.reset
|
|
79
|
+
@start_index = -1
|
|
80
|
+
@line = 1
|
|
81
|
+
@char_position_in_line = 0
|
|
82
|
+
@mode = Lexer.DEFAULT_MODE
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def clear_dfa
|
|
86
|
+
d = 0
|
|
87
|
+
while d < @decision_to_dfa.length
|
|
88
|
+
@decision_to_dfa[d] = DFA.new(atn.decision_state(d), d)
|
|
89
|
+
d += 1
|
|
90
|
+
end
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
def match_atn(input)
|
|
94
|
+
start_state = atn.mode_to_start_state[@mode]
|
|
95
|
+
|
|
96
|
+
printf format("matchATN mode %d start: %s\n", @mode, start_state) if @@debug
|
|
97
|
+
|
|
98
|
+
old_mode = @mode
|
|
99
|
+
|
|
100
|
+
s0_closure = compute_start_state(input, start_state)
|
|
101
|
+
suppress_edge = s0_closure.has_semantic_context
|
|
102
|
+
s0_closure.has_semantic_context = false
|
|
103
|
+
|
|
104
|
+
next_state = add_dfa_state(s0_closure)
|
|
105
|
+
@decision_to_dfa[@mode].s0 = next_state unless suppress_edge
|
|
106
|
+
|
|
107
|
+
predict = exec_atn(input, next_state)
|
|
108
|
+
|
|
109
|
+
if @@debug
|
|
110
|
+
printf format("DFA after matchATN: %s\n", @decision_to_dfa[old_mode].to_lexer_string)
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
predict
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
def exec_atn(input, ds0)
|
|
117
|
+
printf format("start state closure=%s\n", ds0.configs) if @@debug
|
|
118
|
+
|
|
119
|
+
if ds0.is_accept_state
|
|
120
|
+
# allow zero-length tokens
|
|
121
|
+
capture_sim_state(@prev_accept, input, ds0)
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
t = input.la(1)
|
|
125
|
+
|
|
126
|
+
s = ds0 # s is current/from DFA state
|
|
127
|
+
|
|
128
|
+
loop do # while more work
|
|
129
|
+
printf format("execATN loop starting closure: %s\n", s.configs) if @@debug
|
|
130
|
+
|
|
131
|
+
# As we move src->trg, src->trg, we keep track of the previous trg to
|
|
132
|
+
# avoid looking up the DFA state again, which is expensive.
|
|
133
|
+
# If the previous target was already part of the DFA, we might
|
|
134
|
+
# be able to avoid doing a reach operation upon t. If s!=nil,
|
|
135
|
+
# it means that semantic predicates didn't prevent us from
|
|
136
|
+
# creating a DFA state. Once we know s!=nil, we check to see if
|
|
137
|
+
# the DFA state has an edge already for t. If so, we can just reuse
|
|
138
|
+
# it's configuration set there's no point in re-computing it.
|
|
139
|
+
# This is kind of like doing DFA simulation within the ATN
|
|
140
|
+
# simulation because DFA simulation is really just a way to avoid
|
|
141
|
+
# computing reach/closure sets. Technically, once we know that
|
|
142
|
+
# we have a previously added DFA state, we could jump over to
|
|
143
|
+
# the DFA simulator. But, that would mean popping back and forth
|
|
144
|
+
# a lot and making things more complicated algorithmically.
|
|
145
|
+
# This optimization makes a lot of sense for loops within DFA.
|
|
146
|
+
# A character will take us back to an existing DFA state
|
|
147
|
+
# that already has lots of edges out of it. e.g., .* in comments.
|
|
148
|
+
target = existing_target_state(s, t)
|
|
149
|
+
target = compute_target_state(input, s, t) if target.nil?
|
|
150
|
+
|
|
151
|
+
break if target == @@error
|
|
152
|
+
|
|
153
|
+
# If this is a consumable input element, make sure to consume before
|
|
154
|
+
# capturing the accept state so the input index, line, and char
|
|
155
|
+
# position accurately reflect the state of the interpreter at the
|
|
156
|
+
# end of the token.
|
|
157
|
+
consume(input) if t != IntStream::EOF
|
|
158
|
+
|
|
159
|
+
if target.is_accept_state
|
|
160
|
+
capture_sim_state(@prev_accept, input, target)
|
|
161
|
+
break if t == IntStream::EOF
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
t = input.la(1)
|
|
165
|
+
s = target # flip current DFA target becomes new src/from state
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
fail_or_accept(@prev_accept, input, s.configs, t)
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
def existing_target_state(s, t)
|
|
172
|
+
return nil if s.edges.nil? || t < MIN_DFA_EDGE || t > MAX_DFA_EDGE
|
|
173
|
+
|
|
174
|
+
target = s.edges[t - MIN_DFA_EDGE]
|
|
175
|
+
if @@debug && !target.nil?
|
|
176
|
+
puts 'reuse state ' + s.state_number.to_s + ' edge to ' + target.state_number.to_s
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
target
|
|
180
|
+
end
|
|
181
|
+
|
|
182
|
+
def compute_target_state(input, s, t)
|
|
183
|
+
reach = OrderedATNConfigSet.new
|
|
184
|
+
|
|
185
|
+
# if we don't find an existing DFA state
|
|
186
|
+
# Fill reach starting from closure, following t transitions
|
|
187
|
+
reachable_config_set(input, s.configs, reach, t)
|
|
188
|
+
|
|
189
|
+
if reach.empty? # we got nowhere on t from s
|
|
190
|
+
unless reach.has_semantic_context
|
|
191
|
+
# we got nowhere on t, don't throw out this knowledge it'd
|
|
192
|
+
# cause a failover from DFA later.
|
|
193
|
+
add_dfa_edge_dfastate_dfastate(s, t, @@error)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# stop when we can't match any more char
|
|
197
|
+
return @@error
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
# Add an edge from s to target DFA found/created for reach
|
|
201
|
+
add_dfa_edge_dfastate_atnconfigset(s, t, reach)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
def fail_or_accept(prev_accept, input, _reach, t)
|
|
205
|
+
if !prev_accept.dfa_state.nil?
|
|
206
|
+
lexer_action_executor = prev_accept.dfa_state.lexer_action_executor
|
|
207
|
+
accept(input, lexer_action_executor, @start_index, prev_accept.index, prev_accept.line, prev_accept.char_pos)
|
|
208
|
+
prev_accept.dfa_state.prediction
|
|
209
|
+
else # if no accept and EOF is first char, return EOF
|
|
210
|
+
return Token::EOF if t == IntStream::EOF && input.index == @start_index
|
|
211
|
+
|
|
212
|
+
raise LexerNoViableAltException, @recog
|
|
213
|
+
end
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
def reachable_config_set(input, closure, reach, t) # this is used to skip processing for configs which have a lower priority
|
|
217
|
+
# than a config that already reached an accept state for the same rule
|
|
218
|
+
skip_alt = ATN::INVALID_ALT_NUMBER
|
|
219
|
+
i = 0
|
|
220
|
+
while i < closure.configs.length
|
|
221
|
+
c = closure.configs[i]
|
|
222
|
+
current_alt_reached_accept_state = (c.alt == skip_alt)
|
|
223
|
+
if current_alt_reached_accept_state && c.passed_through_non_greedy_decision
|
|
224
|
+
i += 1
|
|
225
|
+
next
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
if @@debug
|
|
229
|
+
printf format("testing %s at %s\n", token_name(t), c.to_s2(@recog, true))
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
n = c.state.number_of_transitions
|
|
233
|
+
ti = 0
|
|
234
|
+
while ti < n # for each transition
|
|
235
|
+
trans = c.state.transition(ti)
|
|
236
|
+
target = reachable_target(trans, t)
|
|
237
|
+
unless target.nil?
|
|
238
|
+
lexer_action_executor = c.lexer_action_executor
|
|
239
|
+
unless lexer_action_executor.nil?
|
|
240
|
+
lexer_action_executor = lexer_action_executor.fix_offset_before_match(input.index - start_index)
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
treat_eof_as_epsilon = (t == CharStream::EOF)
|
|
244
|
+
cfg = LexerATNConfig.new
|
|
245
|
+
cfg.lexer_atn_config4(c, target, lexer_action_executor)
|
|
246
|
+
if closure(input, cfg, reach, current_alt_reached_accept_state, true, treat_eof_as_epsilon)
|
|
247
|
+
# any remaining configs for this alt have a lower priority than
|
|
248
|
+
# the one that just reached an accept state.
|
|
249
|
+
skip_alt = c.alt
|
|
250
|
+
break
|
|
251
|
+
end
|
|
252
|
+
end
|
|
253
|
+
ti += 1
|
|
254
|
+
end
|
|
255
|
+
i += 1
|
|
256
|
+
end
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
def accept(input, lexer_action_executor, start_index, index, line, char_pos)
|
|
260
|
+
printf format("ACTION %s\n", lexer_action_executor) if @@debug
|
|
261
|
+
|
|
262
|
+
# seek to after last char in token
|
|
263
|
+
input.seek(index)
|
|
264
|
+
@line = line
|
|
265
|
+
@char_position_in_line = char_pos
|
|
266
|
+
|
|
267
|
+
if !lexer_action_executor.nil? && !@recog.nil?
|
|
268
|
+
lexer_action_executor.execute(@recog, input, start_index)
|
|
269
|
+
end
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
def reachable_target(trans, t)
|
|
273
|
+
if trans.matches(t, Lexer::MIN_CHAR_VALUE, Lexer::MAX_CHAR_VALUE)
|
|
274
|
+
return trans.target
|
|
275
|
+
end
|
|
276
|
+
|
|
277
|
+
nil
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def compute_start_state(input, p)
|
|
281
|
+
initial_context = @@empty
|
|
282
|
+
configs = ATNConfigSet.new
|
|
283
|
+
i = 0
|
|
284
|
+
while i < p.number_of_transitions
|
|
285
|
+
target = p.transition(i).target
|
|
286
|
+
c = LexerATNConfig.new
|
|
287
|
+
c.lexer_atn_config1(target, i + 1, initial_context)
|
|
288
|
+
closure(input, c, configs, false, false, false)
|
|
289
|
+
i += 1
|
|
290
|
+
end
|
|
291
|
+
configs
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
def closure(input, config, configs, current_alt_reached_accept_state, speculative, treat_eof_as_epsilon)
|
|
295
|
+
if config.state.is_a? RuleStopState
|
|
296
|
+
if @@debug
|
|
297
|
+
if !@recog.nil?
|
|
298
|
+
printf format("closure at %s rule stop %s\n", @recog.rule_names[config.state.rule_index], config)
|
|
299
|
+
else
|
|
300
|
+
printf format("closure at rule stop %s\n", config)
|
|
301
|
+
end
|
|
302
|
+
end
|
|
303
|
+
|
|
304
|
+
if config.context.nil? || config.context.empty_path?
|
|
305
|
+
if config.context.nil? || config.context.empty?
|
|
306
|
+
configs.add(config)
|
|
307
|
+
return true
|
|
308
|
+
else
|
|
309
|
+
configs.add(LexerATNConfig.create_from_config2(config, config.state, EmptyPredictionContext::EMPTY))
|
|
310
|
+
current_alt_reached_accept_state = true
|
|
311
|
+
end
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
if !config.context.nil? && !config.context.empty?
|
|
315
|
+
i = 0
|
|
316
|
+
while i < config.context.size
|
|
317
|
+
if config.context.get_return_state(i) != PredictionContext::EMPTY_RETURN_STATE
|
|
318
|
+
new_context = config.context.get_parent(i) # "pop" return state
|
|
319
|
+
return_state = atn.states[config.context.get_return_state(i)]
|
|
320
|
+
c = LexerATNConfig.new
|
|
321
|
+
c.lexer_atn_config5(config, return_state, new_context)
|
|
322
|
+
current_alt_reached_accept_state = closure(input, c, configs, current_alt_reached_accept_state, speculative, treat_eof_as_epsilon)
|
|
323
|
+
end
|
|
324
|
+
i += 1
|
|
325
|
+
end
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
return current_alt_reached_accept_state
|
|
329
|
+
end
|
|
330
|
+
|
|
331
|
+
# optimization
|
|
332
|
+
unless config.state.only_has_epsilon_transitions
|
|
333
|
+
if !current_alt_reached_accept_state || !config.passed_through_non_greedy_decision
|
|
334
|
+
configs.add(config)
|
|
335
|
+
end
|
|
336
|
+
end
|
|
337
|
+
|
|
338
|
+
p = config.state
|
|
339
|
+
i = 0
|
|
340
|
+
while i < p.number_of_transitions
|
|
341
|
+
t = p.transition(i)
|
|
342
|
+
c = epsilon_target(input, config, t, configs, speculative, treat_eof_as_epsilon)
|
|
343
|
+
unless c.nil?
|
|
344
|
+
current_alt_reached_accept_state = closure(input, c, configs, current_alt_reached_accept_state, speculative, treat_eof_as_epsilon)
|
|
345
|
+
end
|
|
346
|
+
i += 1
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
current_alt_reached_accept_state
|
|
350
|
+
end
|
|
351
|
+
|
|
352
|
+
# side-effect: can alter configs.hasSemanticContext
|
|
353
|
+
|
|
354
|
+
def epsilon_target(input, config, t, configs, speculative, treat_eof_as_epsilon)
|
|
355
|
+
c = nil
|
|
356
|
+
case t.serialization_type
|
|
357
|
+
when Transition::RULE
|
|
358
|
+
rule_transition = t
|
|
359
|
+
new_context = SingletonPredictionContext.new(config.context, rule_transition.follow_state.state_number)
|
|
360
|
+
c = LexerATNConfig.new
|
|
361
|
+
c.lexer_atn_config5(config, t.target, new_context)
|
|
362
|
+
|
|
363
|
+
when Transition::PRECEDENCE
|
|
364
|
+
|
|
365
|
+
raise UnsupportedOperationException, 'Precedence predicates are not supported in lexers.'
|
|
366
|
+
|
|
367
|
+
when Transition::PREDICATE
|
|
368
|
+
pt = t
|
|
369
|
+
puts('EVAL rule ' + pt.rule_index + ':' + pt.pred_index) if @@debug
|
|
370
|
+
configs.has_semantic_context = true
|
|
371
|
+
if evaluate_predicate(input, pt.rule_index, pt.pred_index, speculative)
|
|
372
|
+
c = LexerATNConfig.create_from_config(config, t.target)
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
when Transition::ACTION
|
|
376
|
+
|
|
377
|
+
if config.context.nil? || config.context.empty_path?
|
|
378
|
+
# execute actions anywhere in the start rule for a token.
|
|
379
|
+
#
|
|
380
|
+
# TODO: if the entry rule is invoked recursively, some
|
|
381
|
+
# actions may be executed during the recursive call. The
|
|
382
|
+
# problem can appear when hasEmptyPath() is true but
|
|
383
|
+
# isEmpty() is false. In this case, the config needs to be
|
|
384
|
+
# split into two contexts - one with just the empty path
|
|
385
|
+
# and another with everything but the empty path.
|
|
386
|
+
# Unfortunately, the current algorithm does not allow
|
|
387
|
+
# getEpsilonTarget to return two configurations, so
|
|
388
|
+
# additional modifications are needed before we can support
|
|
389
|
+
# the split operation.
|
|
390
|
+
lexer_action_executor = LexerActionExecutor.append(config.lexer_action_executor, @atn._a[t.action_index])
|
|
391
|
+
c = LexerATNConfig.new
|
|
392
|
+
c.lexer_atn_config4(config, t.target, lexer_action_executor)
|
|
393
|
+
else # ignore actions in referenced rules
|
|
394
|
+
c = LexerATNConfig.new
|
|
395
|
+
c.lexer_atn_config3(config, t.target)
|
|
396
|
+
end
|
|
397
|
+
|
|
398
|
+
when Transition::EPSILON
|
|
399
|
+
c = LexerATNConfig.new
|
|
400
|
+
c.lexer_atn_config3(config, t.target)
|
|
401
|
+
when Transition::ATOM, Transition::RANGE, Transition::SET
|
|
402
|
+
if treat_eof_as_epsilon
|
|
403
|
+
if t.matches(CharStream.EOF, Lexer.MIN_CHAR_VALUE, Lexer.MAX_CHAR_VALUE)
|
|
404
|
+
c = LexerATNConfig.create_from_config(config, t.target)
|
|
405
|
+
end
|
|
406
|
+
end
|
|
407
|
+
|
|
408
|
+
else
|
|
409
|
+
# empty
|
|
410
|
+
end
|
|
411
|
+
|
|
412
|
+
c
|
|
413
|
+
end
|
|
414
|
+
|
|
415
|
+
def evaluate_predicate(input, rule_index, pred_index, speculative) # assume true if no recognizer was provided
|
|
416
|
+
return true if @recog.nil?
|
|
417
|
+
|
|
418
|
+
return @recog.sempred(nil, rule_index, pred_index) unless speculative
|
|
419
|
+
|
|
420
|
+
saved_char_position_in_line = @char_position_in_line
|
|
421
|
+
saved_line = @line
|
|
422
|
+
index = input.index
|
|
423
|
+
marker = input.mark
|
|
424
|
+
begin
|
|
425
|
+
consume(input)
|
|
426
|
+
return @recog.sempred(nil, rule_index, pred_index)
|
|
427
|
+
ensure
|
|
428
|
+
@char_position_in_line = saved_char_position_in_line
|
|
429
|
+
@line = saved_line
|
|
430
|
+
input.seek(index)
|
|
431
|
+
input.release(marker)
|
|
432
|
+
end
|
|
433
|
+
end
|
|
434
|
+
|
|
435
|
+
def capture_sim_state(settings, input, dfa_state)
|
|
436
|
+
settings.index = input.index
|
|
437
|
+
settings.line = @line
|
|
438
|
+
settings.char_pos = @char_position_in_line
|
|
439
|
+
settings.dfa_state = dfa_state
|
|
440
|
+
end
|
|
441
|
+
|
|
442
|
+
def add_dfa_edge_dfastate_atnconfigset(from, t, q)
|
|
443
|
+
suppress_edge = q.has_semantic_context
|
|
444
|
+
q.has_semantic_context = false
|
|
445
|
+
|
|
446
|
+
to = add_dfa_state(q)
|
|
447
|
+
|
|
448
|
+
return to if suppress_edge
|
|
449
|
+
|
|
450
|
+
add_dfa_edge_dfastate_dfastate(from, t, to)
|
|
451
|
+
to
|
|
452
|
+
end
|
|
453
|
+
|
|
454
|
+
def add_dfa_edge_dfastate_dfastate(p, t, q)
|
|
455
|
+
if t < MIN_DFA_EDGE || t > MAX_DFA_EDGE
|
|
456
|
+
# Only track edges within the DFA bounds
|
|
457
|
+
return
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
if @@debug
|
|
461
|
+
message = 'EDGE ' << p.to_s << ' -> ' << q.to_s << ' upon ' << token_name(t)
|
|
462
|
+
puts(message)
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
if p.edges.nil?
|
|
466
|
+
# make room for tokens 1..n and -1 masquerading as index 0
|
|
467
|
+
p.edges = []
|
|
468
|
+
end
|
|
469
|
+
p.edges[t - MIN_DFA_EDGE] = q # connect
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
def add_dfa_state(configs)
|
|
473
|
+
proposed = DFAState.new(configs)
|
|
474
|
+
first_config_with_rule_stop_state = configs.find_first_rule_stop_state
|
|
475
|
+
|
|
476
|
+
unless first_config_with_rule_stop_state.nil?
|
|
477
|
+
proposed.is_accept_state = true
|
|
478
|
+
proposed.lexer_action_executor = first_config_with_rule_stop_state.lexer_action_executor
|
|
479
|
+
proposed.prediction = atn.rule_to_token_type[first_config_with_rule_stop_state.state.rule_index]
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
dfa = @decision_to_dfa[@mode]
|
|
483
|
+
|
|
484
|
+
existing = dfa.states[proposed]
|
|
485
|
+
return existing unless existing.nil?
|
|
486
|
+
|
|
487
|
+
new_state = proposed
|
|
488
|
+
|
|
489
|
+
new_state.state_number = dfa.states.size
|
|
490
|
+
configs.readonly = true
|
|
491
|
+
new_state.configs = configs
|
|
492
|
+
dfa.states[new_state] = new_state
|
|
493
|
+
new_state
|
|
494
|
+
end
|
|
495
|
+
|
|
496
|
+
def dfa(mode)
|
|
497
|
+
@decision_to_dfa[mode]
|
|
498
|
+
end
|
|
499
|
+
|
|
500
|
+
def text(input) # index is first lookahead char, don' t include.
|
|
501
|
+
input.text(Interval.of(start_index, input.index - 1))
|
|
502
|
+
end
|
|
503
|
+
|
|
504
|
+
def consume(input)
|
|
505
|
+
cur_char = input.la(1)
|
|
506
|
+
if cur_char == 10 # newline
|
|
507
|
+
@line += 1
|
|
508
|
+
@char_position_in_line = 0
|
|
509
|
+
else
|
|
510
|
+
@char_position_in_line += 1
|
|
511
|
+
end
|
|
512
|
+
input.consume
|
|
513
|
+
end
|
|
514
|
+
|
|
515
|
+
def token_name(t)
|
|
516
|
+
return 'EOF' if t == -1
|
|
517
|
+
|
|
518
|
+
# if ( atn.g!=nil ) return atn.g.getTokenDisplayName(t)
|
|
519
|
+
"'" + t.to_s + "'"
|
|
520
|
+
end
|
|
521
|
+
end
|
|
522
|
+
end
|