tree-sitter-zsh 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +78 -0
- package/binding.gyp +30 -0
- package/bindings/node/binding.cc +20 -0
- package/bindings/node/binding_test.js +9 -0
- package/bindings/node/index.d.ts +28 -0
- package/bindings/node/index.js +11 -0
- package/grammar.js +1904 -0
- package/package.json +54 -0
- package/prebuilds/darwin-arm64/tree-sitter-zsh.node +0 -0
- package/prebuilds/darwin-x64/tree-sitter-zsh.node +0 -0
- package/prebuilds/linux-arm64/tree-sitter-zsh.node +0 -0
- package/prebuilds/linux-x64/tree-sitter-zsh.node +0 -0
- package/prebuilds/win32-arm64/tree-sitter-zsh.node +0 -0
- package/prebuilds/win32-x64/tree-sitter-zsh.node +0 -0
- package/queries/highlights.scm +59 -0
- package/src/grammar.json +11444 -0
- package/src/node-types.json +4386 -0
- package/src/parser.c +929980 -0
- package/src/scanner.c +2416 -0
- package/src/tree_sitter/alloc.h +54 -0
- package/src/tree_sitter/array.h +291 -0
- package/src/tree_sitter/parser.h +286 -0
- package/tree-sitter-zsh.wasm +0 -0
- package/tree-sitter.json +42 -0
package/src/scanner.c
ADDED
|
@@ -0,0 +1,2416 @@
|
|
|
1
|
+
#include "tree_sitter/array.h"
|
|
2
|
+
#include "tree_sitter/parser.h"
|
|
3
|
+
|
|
4
|
+
#include <assert.h>
|
|
5
|
+
#include <ctype.h>
|
|
6
|
+
#include <stdint.h>
|
|
7
|
+
#include <stdio.h>
|
|
8
|
+
#include <string.h>
|
|
9
|
+
#include <wctype.h>
|
|
10
|
+
|
|
11
|
+
#define DEBUG 0
|
|
12
|
+
|
|
13
|
+
enum TokenType {
|
|
14
|
+
HEREDOC_START,
|
|
15
|
+
SIMPLE_HEREDOC_BODY,
|
|
16
|
+
HEREDOC_BODY_BEGINNING,
|
|
17
|
+
HEREDOC_CONTENT,
|
|
18
|
+
HEREDOC_END,
|
|
19
|
+
FILE_DESCRIPTOR,
|
|
20
|
+
EMPTY_VALUE,
|
|
21
|
+
CONCAT,
|
|
22
|
+
VARIABLE_NAME,
|
|
23
|
+
SIMPLE_VARIABLE_NAME,
|
|
24
|
+
SPECIAL_VARIABLE_NAME,
|
|
25
|
+
TEST_OPERATOR,
|
|
26
|
+
REGEX,
|
|
27
|
+
REGEX_NO_SLASH,
|
|
28
|
+
REGEX_NO_SPACE,
|
|
29
|
+
EXPANSION_WORD,
|
|
30
|
+
EXTGLOB_PATTERN,
|
|
31
|
+
RAW_DOLLAR, // Consumes spaces, only if $ alone for strings / commands
|
|
32
|
+
BARE_DOLLAR, // Consumes spaces
|
|
33
|
+
PEEK_BARE_DOLLAR, // Just determines if immediate $ is present
|
|
34
|
+
BRACE_START,
|
|
35
|
+
BRACE_EXPR_START,
|
|
36
|
+
IMMEDIATE_DOUBLE_HASH,
|
|
37
|
+
ARRAY_STAR_TOKEN,
|
|
38
|
+
ARRAY_AT_TOKEN,
|
|
39
|
+
CLOSING_BRACE,
|
|
40
|
+
CLOSING_BRACKET,
|
|
41
|
+
CLOSING_PAREN,
|
|
42
|
+
CLOSING_DOUBLE_PAREN,
|
|
43
|
+
HEREDOC_ARROW,
|
|
44
|
+
HEREDOC_ARROW_DASH,
|
|
45
|
+
HASH_PATTERN, // #pattern
|
|
46
|
+
DOUBLE_HASH_PATTERN, // ##pattern
|
|
47
|
+
ENTER_PATTERN, // implicit / etc
|
|
48
|
+
PATTERN_START, // After pattern operators, before pattern content
|
|
49
|
+
PATTERN_SUFFIX_START, // After # operators, before pattern content
|
|
50
|
+
NEWLINE,
|
|
51
|
+
OPENING_PAREN,
|
|
52
|
+
DOUBLE_OPENING_PAREN,
|
|
53
|
+
OPENING_BRACKET,
|
|
54
|
+
TEST_COMMAND_START, // [[
|
|
55
|
+
TEST_COMMAND_END, // ]]
|
|
56
|
+
ESAC,
|
|
57
|
+
ZSH_EXTENDED_GLOB_FLAGS,
|
|
58
|
+
DOUBLE_QUOTE,
|
|
59
|
+
BACKTICK,
|
|
60
|
+
ERROR_RECOVERY,
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
#if DEBUG
|
|
64
|
+
const char *TokenNames[] = {
|
|
65
|
+
"HEREDOC_START",
|
|
66
|
+
"SIMPLE_HEREDOC_BODY",
|
|
67
|
+
"HEREDOC_BODY_BEGINNING",
|
|
68
|
+
"HEREDOC_CONTENT",
|
|
69
|
+
"HEREDOC_END",
|
|
70
|
+
"FILE_DESCRIPTOR",
|
|
71
|
+
"EMPTY_VALUE",
|
|
72
|
+
"CONCAT",
|
|
73
|
+
"VARIABLE_NAME",
|
|
74
|
+
"SIMPLE_VARIABLE_NAME",
|
|
75
|
+
"SPECIAL_VARIABLE_NAME",
|
|
76
|
+
"TEST_OPERATOR",
|
|
77
|
+
"REGEX",
|
|
78
|
+
"REGEX_NO_SLASH",
|
|
79
|
+
"REGEX_NO_SPACE",
|
|
80
|
+
"EXPANSION_WORD",
|
|
81
|
+
"EXTGLOB_PATTERN",
|
|
82
|
+
"RAW_DOLLAR",
|
|
83
|
+
"BARE_DOLLAR",
|
|
84
|
+
"PEEK_BARE_DOLLAR",
|
|
85
|
+
"BRACE_START",
|
|
86
|
+
"BRACE_EXPR_START",
|
|
87
|
+
"IMMEDIATE_DOUBLE_HASH",
|
|
88
|
+
"ARRAY_STAR_TOKEN",
|
|
89
|
+
"ARRAY_AT_TOKEN",
|
|
90
|
+
"CLOSING_BRACE",
|
|
91
|
+
"CLOSING_BRACKET",
|
|
92
|
+
"CLOSING_PAREN",
|
|
93
|
+
"CLOSING_DOUBLE_PAREN",
|
|
94
|
+
"HEREDOC_ARROW",
|
|
95
|
+
"HEREDOC_ARROW_DASH",
|
|
96
|
+
"HASH_PATTERN", // #pattern
|
|
97
|
+
"DOUBLE_HASH_PATTERN", // ##pattern
|
|
98
|
+
"ENTER_PATTERN",
|
|
99
|
+
"PATTERN_START",
|
|
100
|
+
"PATTERN_SUFFIX_START",
|
|
101
|
+
"NEWLINE",
|
|
102
|
+
"OPENING_PAREN",
|
|
103
|
+
"DOUBLE_OPENING_PAREN",
|
|
104
|
+
"OPENING_BRACKET",
|
|
105
|
+
"TEST_COMMAND_START",
|
|
106
|
+
"TEST_COMMAND_END",
|
|
107
|
+
"ESAC",
|
|
108
|
+
"ZSH_EXTENDED_GLOB_FLAGS",
|
|
109
|
+
"DOUBLE_QUOTE",
|
|
110
|
+
"BACKTICK",
|
|
111
|
+
"ERROR_RECOVERY",
|
|
112
|
+
};
|
|
113
|
+
#endif
|
|
114
|
+
|
|
115
|
+
typedef Array(char) String;
|
|
116
|
+
|
|
117
|
+
// Context types for nested expansion tracking
|
|
118
|
+
typedef enum {
|
|
119
|
+
CTX_NONE = 0,
|
|
120
|
+
CTX_PARAMETER = 1, // ${...}
|
|
121
|
+
CTX_ARITHMETIC = 2, // $((...))
|
|
122
|
+
CTX_COMMAND = 3, // $(...)
|
|
123
|
+
CTX_TEST = 4, // [[ ... ]]
|
|
124
|
+
CTX_BRACE_EXPANSION = 5, // {a..b} and {a..b..c}
|
|
125
|
+
CTX_PARAMETER_PATTERN_SUFFIX =
|
|
126
|
+
6, // ${var%pattern} and ${var#pattern} - suffix/prefix removal
|
|
127
|
+
CTX_PARAMETER_PATTERN_SUBSTITUTE =
|
|
128
|
+
7, // ${var/pattern/replacement} - substitution
|
|
129
|
+
CTX_STRING = 8, // "..." string context
|
|
130
|
+
CTX_COMPOUND = 9, // "{ x; y; z; }"
|
|
131
|
+
CTX_BACKTICK = 10 // `a b c`
|
|
132
|
+
} context_type_t;
|
|
133
|
+
|
|
134
|
+
const char *ContextNames[] = {
|
|
135
|
+
"CTX_NONE",
|
|
136
|
+
"CTX_PARAMETER", // ${...}
|
|
137
|
+
"CTX_ARITHMETIC", // $((...))
|
|
138
|
+
"CTX_COMMAND", // $(...)
|
|
139
|
+
"CTX_TEST", // [[ ... ]]
|
|
140
|
+
"CTX_BRACE_EXPANSION", // {a..b} and {a..b..c}
|
|
141
|
+
"CTX_PARAMETER_PATTERN_SUFFIX", // ${var%pattern} and ${var#pattern} -
|
|
142
|
+
// suffix/prefix removal
|
|
143
|
+
"CTX_PARAMETER_PATTERN_SUBSTITUTE", // ${var/pattern/replacement} -
|
|
144
|
+
// substitution
|
|
145
|
+
"CTX_STRING", // "..." string context
|
|
146
|
+
"CTX_COMPOUND", // "{ x; y; z; }"
|
|
147
|
+
"CTX_BACKTICK" // `a b c`
|
|
148
|
+
};
|
|
149
|
+
|
|
150
|
+
typedef struct {
|
|
151
|
+
bool is_raw;
|
|
152
|
+
bool started;
|
|
153
|
+
bool allows_indent;
|
|
154
|
+
String delimiter;
|
|
155
|
+
String current_leading_word;
|
|
156
|
+
} Heredoc;
|
|
157
|
+
|
|
158
|
+
#define heredoc_new() \
|
|
159
|
+
{ \
|
|
160
|
+
.is_raw = false, \
|
|
161
|
+
.started = false, \
|
|
162
|
+
.allows_indent = false, \
|
|
163
|
+
.delimiter = array_new(), \
|
|
164
|
+
.current_leading_word = array_new(), \
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
typedef struct {
|
|
168
|
+
uint8_t last_glob_paren_depth;
|
|
169
|
+
bool ext_was_in_double_quote;
|
|
170
|
+
bool ext_saw_outside_quote;
|
|
171
|
+
Array(context_type_t) context_stack; // Proper context stack
|
|
172
|
+
bool just_returned_variable_name; // Track if we just returned VARIABLE_NAME
|
|
173
|
+
bool just_returned_bare_dollar; // Track if we just returned BARE_DOLLAR
|
|
174
|
+
bool just_exited_string; // Track if we just exited a string context
|
|
175
|
+
bool just_newline; // Track if we just handled newline
|
|
176
|
+
Array(Heredoc) heredocs;
|
|
177
|
+
} Scanner;
|
|
178
|
+
|
|
179
|
+
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
|
|
180
|
+
|
|
181
|
+
// Context management functions using proper stack
|
|
182
|
+
static inline context_type_t get_current_context(Scanner *scanner) {
|
|
183
|
+
if (scanner->context_stack.size == 0) {
|
|
184
|
+
return CTX_NONE;
|
|
185
|
+
}
|
|
186
|
+
return *array_back(&scanner->context_stack);
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
static inline bool in_parameter_expansion(Scanner *scanner) {
|
|
190
|
+
context_type_t ctx = get_current_context(scanner);
|
|
191
|
+
return ctx == CTX_PARAMETER || ctx == CTX_PARAMETER_PATTERN_SUFFIX ||
|
|
192
|
+
ctx == CTX_PARAMETER_PATTERN_SUBSTITUTE;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Helper to determine if we should stop at pattern operators
|
|
196
|
+
static inline bool should_stop_at_pattern_operators(Scanner *scanner) {
|
|
197
|
+
context_type_t ctx = get_current_context(scanner);
|
|
198
|
+
return ctx == CTX_PARAMETER || ctx == CTX_PARAMETER_PATTERN_SUFFIX ||
|
|
199
|
+
ctx == CTX_PARAMETER_PATTERN_SUBSTITUTE;
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
static inline bool should_stop_at_pattern_slash(Scanner *scanner) {
|
|
203
|
+
context_type_t ctx = get_current_context(scanner);
|
|
204
|
+
return ctx == CTX_PARAMETER_PATTERN_SUBSTITUTE;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// Helper to check if we're in parameter expansion context (for tokenization
|
|
208
|
+
// decisions)
|
|
209
|
+
static inline bool in_parameter_expansion_context(Scanner *scanner) {
|
|
210
|
+
return in_parameter_expansion(scanner);
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
// Helper to check if we should break on '/' in EXPANSION_WORD
|
|
214
|
+
static inline bool should_break_on_slash(Scanner *scanner) {
|
|
215
|
+
context_type_t ctx = get_current_context(scanner);
|
|
216
|
+
return ctx == CTX_PARAMETER_PATTERN_SUBSTITUTE;
|
|
217
|
+
}
|
|
218
|
+
static inline void enter_context(Scanner *scanner, context_type_t context) {
|
|
219
|
+
#if DEBUG
|
|
220
|
+
fprintf(stderr, "DEBUG: Entering context %s\n", ContextNames[context]);
|
|
221
|
+
for (int i = 0; i < scanner->context_stack.size; ++i) {
|
|
222
|
+
fprintf(stderr, " DEBUG: context_stack %d= %s\n", i,
|
|
223
|
+
ContextNames[*array_get(&scanner->context_stack, i)]);
|
|
224
|
+
}
|
|
225
|
+
#endif
|
|
226
|
+
array_push(&scanner->context_stack, context);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
static inline void exit_context(Scanner *scanner,
|
|
230
|
+
context_type_t expected_context) {
|
|
231
|
+
if (scanner->context_stack.size > 0) {
|
|
232
|
+
context_type_t current = *array_back(&scanner->context_stack);
|
|
233
|
+
// Verify we're exiting the expected context (for debugging)
|
|
234
|
+
if (current == expected_context) {
|
|
235
|
+
#if DEBUG
|
|
236
|
+
fprintf(stderr, "DEBUG: Exiting matching context %s\n",
|
|
237
|
+
ContextNames[current]);
|
|
238
|
+
#endif
|
|
239
|
+
array_pop(&scanner->context_stack);
|
|
240
|
+
} else {
|
|
241
|
+
#if DEBUG
|
|
242
|
+
fprintf(stderr,
|
|
243
|
+
"DEBUG: Exiting mismatching context %s, wanted %s\n",
|
|
244
|
+
ContextNames[current], ContextNames[expected_context]);
|
|
245
|
+
#endif
|
|
246
|
+
// Gracefully handle mismatched contexts by popping anyway
|
|
247
|
+
array_pop(&scanner->context_stack);
|
|
248
|
+
}
|
|
249
|
+
#if DEBUG
|
|
250
|
+
for (int i = 0; i < scanner->context_stack.size; ++i) {
|
|
251
|
+
fprintf(stderr, " DEBUG: context_stack %d= %s\n", i,
|
|
252
|
+
ContextNames[*array_get(&scanner->context_stack, i)]);
|
|
253
|
+
}
|
|
254
|
+
#endif
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
// Helper functions for checking contexts
|
|
259
|
+
static inline bool in_expansion_context(Scanner *scanner) {
|
|
260
|
+
context_type_t ctx = get_current_context(scanner);
|
|
261
|
+
return ctx == CTX_PARAMETER || ctx == CTX_ARITHMETIC || ctx == CTX_COMMAND;
|
|
262
|
+
}
|
|
263
|
+
|
|
264
|
+
static inline bool in_pattern_context(Scanner *scanner) {
|
|
265
|
+
context_type_t ctx = get_current_context(scanner);
|
|
266
|
+
return ctx == CTX_PARAMETER_PATTERN_SUFFIX ||
|
|
267
|
+
ctx == CTX_PARAMETER_PATTERN_SUBSTITUTE;
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
static inline bool in_test_command(Scanner *scanner) {
|
|
271
|
+
context_type_t ctx = get_current_context(scanner);
|
|
272
|
+
return ctx == CTX_TEST;
|
|
273
|
+
}
|
|
274
|
+
|
|
275
|
+
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
|
|
276
|
+
|
|
277
|
+
static inline void skip_ws(TSLexer *lexer) {
|
|
278
|
+
while (iswspace(lexer->lookahead) && lexer->lookahead != '\n' &&
|
|
279
|
+
!lexer->eof(lexer)) {
|
|
280
|
+
#if DEBUG
|
|
281
|
+
fprintf(stderr, "WARNING skip_ws skipping space");
|
|
282
|
+
#endif
|
|
283
|
+
skip(lexer);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
static inline void skip_wsnl(TSLexer *lexer) {
|
|
287
|
+
while (iswspace(lexer->lookahead) && !lexer->eof(lexer)) {
|
|
288
|
+
#if DEBUG
|
|
289
|
+
fprintf(stderr, "WARNING skip_wsnl skipping space");
|
|
290
|
+
#endif
|
|
291
|
+
skip(lexer);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
static inline bool in_error_recovery(const bool *valid_symbols) {
|
|
296
|
+
return valid_symbols[ERROR_RECOVERY];
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
static inline void reset_string(String *string) {
|
|
300
|
+
if (string->size > 0) {
|
|
301
|
+
memset(string->contents, 0, string->size);
|
|
302
|
+
array_clear(string);
|
|
303
|
+
}
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
static inline void reset_heredoc(Heredoc *heredoc) {
|
|
307
|
+
heredoc->is_raw = false;
|
|
308
|
+
heredoc->started = false;
|
|
309
|
+
heredoc->allows_indent = false;
|
|
310
|
+
reset_string(&heredoc->delimiter);
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
static inline void reset(Scanner *scanner) {
|
|
314
|
+
#if DEBUG
|
|
315
|
+
fprintf(stderr, "DEBUG: Reset called - heredocs.size before=%u %u\n",
|
|
316
|
+
scanner->heredocs.size, scanner->context_stack.size);
|
|
317
|
+
#endif
|
|
318
|
+
scanner->last_glob_paren_depth = 0;
|
|
319
|
+
scanner->ext_was_in_double_quote = false;
|
|
320
|
+
scanner->ext_saw_outside_quote = false;
|
|
321
|
+
scanner->context_stack.size = 0; // Clear context stack
|
|
322
|
+
scanner->just_returned_variable_name = false;
|
|
323
|
+
scanner->just_returned_bare_dollar = false;
|
|
324
|
+
scanner->just_exited_string = false;
|
|
325
|
+
scanner->just_newline = false;
|
|
326
|
+
for (uint32_t i = 0; i < scanner->heredocs.size; i++) {
|
|
327
|
+
reset_heredoc(array_get(&scanner->heredocs, i));
|
|
328
|
+
}
|
|
329
|
+
#if DEBUG
|
|
330
|
+
fprintf(stderr, "DEBUG: Reset done - heredocs.size after=%u %u\n",
|
|
331
|
+
scanner->heredocs.size, scanner->context_stack.size);
|
|
332
|
+
#endif
|
|
333
|
+
}
|
|
334
|
+
|
|
335
|
+
static unsigned serialize(Scanner *scanner, char *buffer) {
|
|
336
|
+
uint32_t size = 0;
|
|
337
|
+
|
|
338
|
+
buffer[size++] = (char)scanner->last_glob_paren_depth;
|
|
339
|
+
buffer[size++] = (char)scanner->ext_was_in_double_quote;
|
|
340
|
+
buffer[size++] = (char)scanner->ext_saw_outside_quote;
|
|
341
|
+
buffer[size++] = (char)scanner->context_stack.size;
|
|
342
|
+
buffer[size++] = (char)scanner->heredocs.size;
|
|
343
|
+
buffer[size++] = (char)scanner->just_returned_variable_name;
|
|
344
|
+
buffer[size++] = (char)scanner->just_returned_bare_dollar;
|
|
345
|
+
buffer[size++] = (char)scanner->just_exited_string;
|
|
346
|
+
buffer[size++] = (char)scanner->just_newline;
|
|
347
|
+
|
|
348
|
+
// Serialize context stack
|
|
349
|
+
for (uint32_t i = 0; i < scanner->context_stack.size; i++) {
|
|
350
|
+
if (size >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
|
|
351
|
+
return 0;
|
|
352
|
+
}
|
|
353
|
+
context_type_t *ctx = array_get(&scanner->context_stack, i);
|
|
354
|
+
buffer[size++] = (char)*ctx;
|
|
355
|
+
}
|
|
356
|
+
|
|
357
|
+
for (uint32_t i = 0; i < scanner->heredocs.size; i++) {
|
|
358
|
+
Heredoc *heredoc = array_get(&scanner->heredocs, i);
|
|
359
|
+
if (size + 3 + sizeof(uint32_t) + heredoc->delimiter.size >=
|
|
360
|
+
TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
|
|
361
|
+
return 0;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
buffer[size++] = (char)heredoc->is_raw;
|
|
365
|
+
buffer[size++] = (char)heredoc->started;
|
|
366
|
+
buffer[size++] = (char)heredoc->allows_indent;
|
|
367
|
+
|
|
368
|
+
memcpy(&buffer[size], &heredoc->delimiter.size, sizeof(uint32_t));
|
|
369
|
+
size += sizeof(uint32_t);
|
|
370
|
+
if (heredoc->delimiter.size > 0) {
|
|
371
|
+
memcpy(&buffer[size], heredoc->delimiter.contents,
|
|
372
|
+
heredoc->delimiter.size);
|
|
373
|
+
size += heredoc->delimiter.size;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
return size;
|
|
377
|
+
}
|
|
378
|
+
|
|
379
|
+
static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
|
|
380
|
+
#if DEBUG
|
|
381
|
+
fprintf(stderr,
|
|
382
|
+
"DEBUG: Deserialize called - length=%u, before heredocs.size =%u "
|
|
383
|
+
"ctx_stack=%u\n",
|
|
384
|
+
length, scanner->heredocs.size, scanner->context_stack.size);
|
|
385
|
+
#endif
|
|
386
|
+
if (length == 0) {
|
|
387
|
+
reset(scanner);
|
|
388
|
+
} else {
|
|
389
|
+
uint32_t size = 0;
|
|
390
|
+
scanner->last_glob_paren_depth = buffer[size++];
|
|
391
|
+
scanner->ext_was_in_double_quote = buffer[size++];
|
|
392
|
+
scanner->ext_saw_outside_quote = buffer[size++];
|
|
393
|
+
uint32_t context_stack_size = (unsigned char)buffer[size++];
|
|
394
|
+
uint32_t heredoc_count = (unsigned char)buffer[size++];
|
|
395
|
+
#if DEBUG
|
|
396
|
+
fprintf(stderr,
|
|
397
|
+
"DEBUG: Deserialize - heredoc_count=%u context_stack_size=%u\n",
|
|
398
|
+
heredoc_count, context_stack_size);
|
|
399
|
+
#endif
|
|
400
|
+
scanner->just_returned_variable_name = buffer[size++];
|
|
401
|
+
scanner->just_returned_bare_dollar = buffer[size++];
|
|
402
|
+
scanner->just_exited_string = buffer[size++];
|
|
403
|
+
scanner->just_newline = buffer[size++];
|
|
404
|
+
|
|
405
|
+
// Deserialize context stack
|
|
406
|
+
scanner->context_stack.size = 0;
|
|
407
|
+
for (uint32_t i = 0; i < context_stack_size; i++) {
|
|
408
|
+
if (size >= length)
|
|
409
|
+
break;
|
|
410
|
+
context_type_t ctx = (context_type_t)buffer[size++];
|
|
411
|
+
array_push(&scanner->context_stack, ctx);
|
|
412
|
+
}
|
|
413
|
+
|
|
414
|
+
for (uint32_t i = 0; i < heredoc_count; i++) {
|
|
415
|
+
Heredoc *heredoc = NULL;
|
|
416
|
+
if (i < scanner->heredocs.size) {
|
|
417
|
+
heredoc = array_get(&scanner->heredocs, i);
|
|
418
|
+
} else {
|
|
419
|
+
Heredoc new_heredoc = heredoc_new();
|
|
420
|
+
array_push(&scanner->heredocs, new_heredoc);
|
|
421
|
+
heredoc = array_back(&scanner->heredocs);
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
heredoc->is_raw = buffer[size++];
|
|
425
|
+
heredoc->started = buffer[size++];
|
|
426
|
+
heredoc->allows_indent = buffer[size++];
|
|
427
|
+
|
|
428
|
+
memcpy(&heredoc->delimiter.size, &buffer[size], sizeof(uint32_t));
|
|
429
|
+
size += sizeof(uint32_t);
|
|
430
|
+
array_reserve(&heredoc->delimiter, heredoc->delimiter.size);
|
|
431
|
+
|
|
432
|
+
if (heredoc->delimiter.size > 0) {
|
|
433
|
+
memcpy(heredoc->delimiter.contents, &buffer[size],
|
|
434
|
+
heredoc->delimiter.size);
|
|
435
|
+
size += heredoc->delimiter.size;
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
assert(size == length);
|
|
439
|
+
}
|
|
440
|
+
#if DEBUG
|
|
441
|
+
fprintf(stderr, "DEBUG: Deserialize done - heredocs.size after=%u %u\n",
|
|
442
|
+
scanner->heredocs.size, scanner->context_stack.size);
|
|
443
|
+
#endif
|
|
444
|
+
}
|
|
445
|
+
|
|
446
|
+
/**
|
|
447
|
+
* Consume a "word" in POSIX parlance, and returns it unquoted.
|
|
448
|
+
*
|
|
449
|
+
* This is an approximate implementation that doesn't deal with any
|
|
450
|
+
* POSIX-mandated substitution, and assumes the default value for
|
|
451
|
+
* IFS.
|
|
452
|
+
*/
|
|
453
|
+
static bool advance_word(TSLexer *lexer, String *unquoted_word) {
|
|
454
|
+
bool empty = true;
|
|
455
|
+
|
|
456
|
+
int32_t quote = 0;
|
|
457
|
+
if (lexer->lookahead == '\'' || lexer->lookahead == '"') {
|
|
458
|
+
quote = lexer->lookahead;
|
|
459
|
+
advance(lexer);
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
while (lexer->lookahead &&
|
|
463
|
+
!(quote ? lexer->lookahead == quote || lexer->lookahead == '\r' ||
|
|
464
|
+
lexer->lookahead == '\n'
|
|
465
|
+
: iswspace(lexer->lookahead))) {
|
|
466
|
+
if (lexer->lookahead == '\\') {
|
|
467
|
+
advance(lexer);
|
|
468
|
+
if (!lexer->lookahead) {
|
|
469
|
+
return false;
|
|
470
|
+
}
|
|
471
|
+
}
|
|
472
|
+
empty = false;
|
|
473
|
+
array_push(unquoted_word, lexer->lookahead);
|
|
474
|
+
advance(lexer);
|
|
475
|
+
}
|
|
476
|
+
array_push(unquoted_word, '\0');
|
|
477
|
+
|
|
478
|
+
if (quote && lexer->lookahead == quote) {
|
|
479
|
+
advance(lexer);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
return !empty;
|
|
483
|
+
}
|
|
484
|
+
|
|
485
|
+
static inline bool scan_raw_dollar(TSLexer *lexer, const bool *valid_symbols) {
|
|
486
|
+
skip_ws(lexer);
|
|
487
|
+
|
|
488
|
+
if (lexer->lookahead == '$') {
|
|
489
|
+
advance(lexer);
|
|
490
|
+
lexer->result_symbol = BARE_DOLLAR;
|
|
491
|
+
lexer->mark_end(lexer);
|
|
492
|
+
return iswspace(lexer->lookahead) || lexer->eof(lexer) ||
|
|
493
|
+
lexer->lookahead == '\"';
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
return false;
|
|
497
|
+
}
|
|
498
|
+
|
|
499
|
+
static bool scan_heredoc_start(Heredoc *heredoc, TSLexer *lexer) {
|
|
500
|
+
while (iswspace(lexer->lookahead)) {
|
|
501
|
+
skip(lexer);
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
lexer->result_symbol = HEREDOC_START;
|
|
505
|
+
heredoc->is_raw = lexer->lookahead == '\'' || lexer->lookahead == '"' ||
|
|
506
|
+
lexer->lookahead == '\\';
|
|
507
|
+
|
|
508
|
+
bool found_delimiter = advance_word(lexer, &heredoc->delimiter);
|
|
509
|
+
if (!found_delimiter) {
|
|
510
|
+
reset_string(&heredoc->delimiter);
|
|
511
|
+
return false;
|
|
512
|
+
}
|
|
513
|
+
return found_delimiter;
|
|
514
|
+
}
|
|
515
|
+
|
|
516
|
+
static bool scan_heredoc_end_identifier(Heredoc *heredoc, TSLexer *lexer) {
|
|
517
|
+
reset_string(&heredoc->current_leading_word);
|
|
518
|
+
// Scan the first 'n' characters on this line, to see if they match the
|
|
519
|
+
// heredoc delimiter
|
|
520
|
+
int32_t size = 0;
|
|
521
|
+
if (heredoc->delimiter.size > 0) {
|
|
522
|
+
while (lexer->lookahead != '\0' && lexer->lookahead != '\n' &&
|
|
523
|
+
(int32_t)*array_get(&heredoc->delimiter, size) ==
|
|
524
|
+
lexer->lookahead &&
|
|
525
|
+
heredoc->current_leading_word.size < heredoc->delimiter.size) {
|
|
526
|
+
array_push(&heredoc->current_leading_word, lexer->lookahead);
|
|
527
|
+
advance(lexer);
|
|
528
|
+
size++;
|
|
529
|
+
}
|
|
530
|
+
}
|
|
531
|
+
array_push(&heredoc->current_leading_word, '\0');
|
|
532
|
+
return heredoc->delimiter.size == 0
|
|
533
|
+
? false
|
|
534
|
+
: strcmp(heredoc->current_leading_word.contents,
|
|
535
|
+
heredoc->delimiter.contents) == 0;
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
static bool scan_heredoc_content(Scanner *scanner, TSLexer *lexer,
|
|
539
|
+
enum TokenType middle_type,
|
|
540
|
+
enum TokenType end_type) {
|
|
541
|
+
bool did_advance = false;
|
|
542
|
+
Heredoc *heredoc = array_back(&scanner->heredocs);
|
|
543
|
+
|
|
544
|
+
for (;;) {
|
|
545
|
+
switch (lexer->lookahead) {
|
|
546
|
+
case '\0': {
|
|
547
|
+
if (lexer->eof(lexer) && did_advance) {
|
|
548
|
+
reset_heredoc(heredoc);
|
|
549
|
+
lexer->result_symbol = end_type;
|
|
550
|
+
return true;
|
|
551
|
+
}
|
|
552
|
+
return false;
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
case '\\': {
|
|
556
|
+
did_advance = true;
|
|
557
|
+
advance(lexer);
|
|
558
|
+
advance(lexer);
|
|
559
|
+
break;
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
case '$': {
|
|
563
|
+
if (heredoc->is_raw) {
|
|
564
|
+
did_advance = true;
|
|
565
|
+
advance(lexer);
|
|
566
|
+
break;
|
|
567
|
+
}
|
|
568
|
+
if (did_advance) {
|
|
569
|
+
lexer->mark_end(lexer);
|
|
570
|
+
lexer->result_symbol = middle_type;
|
|
571
|
+
heredoc->started = true;
|
|
572
|
+
advance(lexer);
|
|
573
|
+
if (iswalpha(lexer->lookahead) || lexer->lookahead == '{' ||
|
|
574
|
+
lexer->lookahead == '(') {
|
|
575
|
+
return true;
|
|
576
|
+
}
|
|
577
|
+
break;
|
|
578
|
+
}
|
|
579
|
+
if (middle_type == HEREDOC_BODY_BEGINNING &&
|
|
580
|
+
lexer->get_column(lexer) == 0) {
|
|
581
|
+
lexer->mark_end(lexer);
|
|
582
|
+
lexer->result_symbol = middle_type;
|
|
583
|
+
heredoc->started = true;
|
|
584
|
+
return true;
|
|
585
|
+
}
|
|
586
|
+
return false;
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
case '\n': {
|
|
590
|
+
if (!did_advance) {
|
|
591
|
+
skip(lexer);
|
|
592
|
+
} else {
|
|
593
|
+
advance(lexer);
|
|
594
|
+
}
|
|
595
|
+
did_advance = true;
|
|
596
|
+
if (heredoc->allows_indent) {
|
|
597
|
+
while (iswspace(lexer->lookahead)) {
|
|
598
|
+
advance(lexer);
|
|
599
|
+
}
|
|
600
|
+
}
|
|
601
|
+
lexer->result_symbol = heredoc->started ? middle_type : end_type;
|
|
602
|
+
lexer->mark_end(lexer);
|
|
603
|
+
if (scan_heredoc_end_identifier(heredoc, lexer)) {
|
|
604
|
+
if (lexer->result_symbol == HEREDOC_END) {
|
|
605
|
+
array_pop(&scanner->heredocs);
|
|
606
|
+
}
|
|
607
|
+
return true;
|
|
608
|
+
}
|
|
609
|
+
break;
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
default: {
|
|
613
|
+
if (lexer->get_column(lexer) == 0) {
|
|
614
|
+
// an alternative is to check the starting column of the
|
|
615
|
+
// heredoc body and track that statefully
|
|
616
|
+
while (iswspace(lexer->lookahead)) {
|
|
617
|
+
if (did_advance) {
|
|
618
|
+
advance(lexer);
|
|
619
|
+
} else {
|
|
620
|
+
skip(lexer);
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
if (end_type != SIMPLE_HEREDOC_BODY) {
|
|
624
|
+
lexer->result_symbol = middle_type;
|
|
625
|
+
if (scan_heredoc_end_identifier(heredoc, lexer)) {
|
|
626
|
+
return true;
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
if (end_type == SIMPLE_HEREDOC_BODY) {
|
|
630
|
+
lexer->result_symbol = end_type;
|
|
631
|
+
lexer->mark_end(lexer);
|
|
632
|
+
if (scan_heredoc_end_identifier(heredoc, lexer)) {
|
|
633
|
+
return true;
|
|
634
|
+
}
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
did_advance = true;
|
|
638
|
+
advance(lexer);
|
|
639
|
+
break;
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
}
|
|
643
|
+
}
|
|
644
|
+
|
|
645
|
+
static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
|
|
646
|
+
#if DEBUG
|
|
647
|
+
fprintf(stderr, "SCANNER: invoked lookahead='%c'\n", lexer->lookahead);
|
|
648
|
+
for (int i = 0; i <= ERROR_RECOVERY; i++) {
|
|
649
|
+
if (valid_symbols[i]) {
|
|
650
|
+
fprintf(stderr, "SCANNER: valid symbol: %s\n", TokenNames[i]);
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
#endif
|
|
654
|
+
|
|
655
|
+
// Clear flag at start and capture its previous value
|
|
656
|
+
bool was_just_variable_name = scanner->just_returned_variable_name;
|
|
657
|
+
scanner->just_returned_variable_name = false;
|
|
658
|
+
|
|
659
|
+
bool was_just_bare_dollar = scanner->just_returned_bare_dollar;
|
|
660
|
+
scanner->just_returned_bare_dollar = false;
|
|
661
|
+
|
|
662
|
+
// Clear string exit flag at start and capture its previous value
|
|
663
|
+
bool was_just_exited_string = scanner->just_exited_string;
|
|
664
|
+
scanner->just_exited_string = false;
|
|
665
|
+
|
|
666
|
+
// FIXME: newline handling and exited string handling should go
|
|
667
|
+
bool was_just_newline = scanner->just_newline;
|
|
668
|
+
scanner->just_newline = false;
|
|
669
|
+
|
|
670
|
+
if (valid_symbols[CONCAT] && !in_error_recovery(valid_symbols)) {
|
|
671
|
+
context_type_t ctx = get_current_context(scanner);
|
|
672
|
+
#if DEBUG
|
|
673
|
+
fprintf(stderr,
|
|
674
|
+
"SCANNER: CONCAT handler lookeahead=%c "
|
|
675
|
+
"was_just_exited_string=%d was_just_newline=%d\n",
|
|
676
|
+
lexer->lookahead, was_just_exited_string, was_just_newline);
|
|
677
|
+
#endif
|
|
678
|
+
|
|
679
|
+
if (!(lexer->lookahead == 0 || iswspace(lexer->lookahead) ||
|
|
680
|
+
lexer->lookahead == '>' || lexer->lookahead == '<' ||
|
|
681
|
+
(lexer->lookahead == ')' &&
|
|
682
|
+
(valid_symbols[CLOSING_PAREN] ||
|
|
683
|
+
valid_symbols[CLOSING_DOUBLE_PAREN])) ||
|
|
684
|
+
lexer->lookahead == '(' || lexer->lookahead == ';' ||
|
|
685
|
+
lexer->lookahead == '&' || lexer->lookahead == '|' ||
|
|
686
|
+
lexer->lookahead == '{' ||
|
|
687
|
+
// prevent concat over newline after string ends
|
|
688
|
+
(was_just_exited_string && lexer->lookahead == '\n') ||
|
|
689
|
+
(lexer->lookahead == '"' && ctx == CTX_STRING) ||
|
|
690
|
+
(was_just_newline) ||
|
|
691
|
+
// Prevent recursion on / pattern
|
|
692
|
+
(lexer->lookahead == '/' &&
|
|
693
|
+
ctx == CTX_PARAMETER_PATTERN_SUBSTITUTE) ||
|
|
694
|
+
(lexer->lookahead == '}' && in_parameter_expansion(scanner)) ||
|
|
695
|
+
// Split subscript out
|
|
696
|
+
(lexer->lookahead == ']' && valid_symbols[CLOSING_BRACKET]) ||
|
|
697
|
+
(lexer->lookahead == '[' &&
|
|
698
|
+
was_just_variable_name) || // Suppress CONCAT after $var when [
|
|
699
|
+
(lexer->lookahead == ':' &&
|
|
700
|
+
was_just_variable_name) || // Suppress CONCAT after $var when :
|
|
701
|
+
(lexer->lookahead == '`' && ctx == CTX_BACKTICK)
|
|
702
|
+
)) {
|
|
703
|
+
// follows
|
|
704
|
+
#if DEBUG
|
|
705
|
+
fprintf(stderr, "SCANNER: CONCAT\n");
|
|
706
|
+
#endif
|
|
707
|
+
|
|
708
|
+
// So for a`b`, we want to return a concat. We check if the
|
|
709
|
+
// 2nd backtick has whitespace after it, and if it does we
|
|
710
|
+
// return concat.
|
|
711
|
+
if (lexer->lookahead == '`' && ctx != CTX_BACKTICK) {
|
|
712
|
+
lexer->mark_end(lexer);
|
|
713
|
+
advance(lexer);
|
|
714
|
+
bool was_escape = false;
|
|
715
|
+
while ((lexer->lookahead != '`' || was_escape) && !lexer->eof(lexer)) {
|
|
716
|
+
advance(lexer);
|
|
717
|
+
was_escape = false;
|
|
718
|
+
if (lexer->lookahead == '\\') {
|
|
719
|
+
was_escape = true;
|
|
720
|
+
}
|
|
721
|
+
}
|
|
722
|
+
if (lexer->eof(lexer)) {
|
|
723
|
+
return false;
|
|
724
|
+
}
|
|
725
|
+
if (lexer->lookahead == '`') {
|
|
726
|
+
advance(lexer);
|
|
727
|
+
}
|
|
728
|
+
if ((iswspace(lexer->lookahead) &&
|
|
729
|
+
lexer->lookahead != '\n' // HACK
|
|
730
|
+
) ||
|
|
731
|
+
lexer->eof(lexer)) {
|
|
732
|
+
lexer->result_symbol = CONCAT;
|
|
733
|
+
return true;
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
// strings w/ expansions that contains escaped quotes or
|
|
737
|
+
// backslashes need this to return a concat
|
|
738
|
+
if (lexer->lookahead == '\\') {
|
|
739
|
+
lexer->mark_end(lexer);
|
|
740
|
+
advance(lexer);
|
|
741
|
+
if (lexer->lookahead == '"' || lexer->lookahead == '\'' ||
|
|
742
|
+
lexer->lookahead == '\\') {
|
|
743
|
+
lexer->result_symbol = CONCAT;
|
|
744
|
+
return true;
|
|
745
|
+
}
|
|
746
|
+
if (lexer->eof(lexer)) {
|
|
747
|
+
return false;
|
|
748
|
+
}
|
|
749
|
+
} else {
|
|
750
|
+
lexer->mark_end(lexer);
|
|
751
|
+
lexer->result_symbol = CONCAT;
|
|
752
|
+
return true;
|
|
753
|
+
}
|
|
754
|
+
}
|
|
755
|
+
}
|
|
756
|
+
|
|
757
|
+
// Handle string context tracking
|
|
758
|
+
if (valid_symbols[DOUBLE_QUOTE]) {
|
|
759
|
+
if (get_current_context(scanner) != CTX_STRING) {
|
|
760
|
+
skip_ws(lexer);
|
|
761
|
+
|
|
762
|
+
if (lexer->lookahead == '"') {
|
|
763
|
+
// Entering a string context
|
|
764
|
+
enter_context(scanner, CTX_STRING);
|
|
765
|
+
#if DEBUG
|
|
766
|
+
fprintf(stderr, "SCANNER: Entering string context\n");
|
|
767
|
+
#endif
|
|
768
|
+
advance(lexer);
|
|
769
|
+
lexer->mark_end(lexer);
|
|
770
|
+
lexer->result_symbol = DOUBLE_QUOTE;
|
|
771
|
+
return true;
|
|
772
|
+
}
|
|
773
|
+
} else if (lexer->lookahead == '"') {
|
|
774
|
+
// Exiting a string context
|
|
775
|
+
exit_context(scanner, CTX_STRING);
|
|
776
|
+
// Set the flag to indicate we just exited a string
|
|
777
|
+
was_just_exited_string = scanner->just_exited_string = true;
|
|
778
|
+
|
|
779
|
+
#if DEBUG
|
|
780
|
+
fprintf(stderr, "SCANNER: Exiting string context\n");
|
|
781
|
+
#endif
|
|
782
|
+
|
|
783
|
+
advance(lexer);
|
|
784
|
+
lexer->mark_end(lexer);
|
|
785
|
+
lexer->result_symbol = DOUBLE_QUOTE;
|
|
786
|
+
return true;
|
|
787
|
+
}
|
|
788
|
+
}
|
|
789
|
+
|
|
790
|
+
// Handle string context tracking
|
|
791
|
+
if (valid_symbols[BACKTICK]) {
|
|
792
|
+
if (get_current_context(scanner) != CTX_BACKTICK) {
|
|
793
|
+
skip_ws(lexer);
|
|
794
|
+
|
|
795
|
+
if (lexer->lookahead == '`') {
|
|
796
|
+
// Entering a string context
|
|
797
|
+
enter_context(scanner, CTX_BACKTICK);
|
|
798
|
+
#if DEBUG
|
|
799
|
+
fprintf(stderr, "SCANNER: Entering backtick context\n");
|
|
800
|
+
#endif
|
|
801
|
+
advance(lexer);
|
|
802
|
+
lexer->mark_end(lexer);
|
|
803
|
+
lexer->result_symbol = BACKTICK;
|
|
804
|
+
return true;
|
|
805
|
+
}
|
|
806
|
+
} else if (lexer->lookahead == '`') {
|
|
807
|
+
// Exiting a string context
|
|
808
|
+
exit_context(scanner, CTX_BACKTICK);
|
|
809
|
+
|
|
810
|
+
#if DEBUG
|
|
811
|
+
fprintf(stderr, "SCANNER: Exiting backtick context\n");
|
|
812
|
+
#endif
|
|
813
|
+
|
|
814
|
+
advance(lexer);
|
|
815
|
+
lexer->mark_end(lexer);
|
|
816
|
+
lexer->result_symbol = BACKTICK;
|
|
817
|
+
return true;
|
|
818
|
+
}
|
|
819
|
+
}
|
|
820
|
+
|
|
821
|
+
#if DEBUG
|
|
822
|
+
fprintf(stderr,
|
|
823
|
+
"DEBUG: scan() start - was_just_bare_dollar=%s, lookahead='%c'\n",
|
|
824
|
+
was_just_bare_dollar ? "true" : "false", lexer->lookahead);
|
|
825
|
+
#endif
|
|
826
|
+
|
|
827
|
+
// Resolve and absorb newlines when requested
|
|
828
|
+
if (valid_symbols[NEWLINE] && !in_error_recovery(valid_symbols)) {
|
|
829
|
+
#if DEBUG
|
|
830
|
+
fprintf(stderr, "SCANNER: NEWLINE handler, lookahead='%c'\n",
|
|
831
|
+
lexer->lookahead);
|
|
832
|
+
#endif
|
|
833
|
+
skip_ws(lexer);
|
|
834
|
+
if (lexer->lookahead == '\n') {
|
|
835
|
+
while (iswspace(lexer->lookahead)) {
|
|
836
|
+
skip(lexer);
|
|
837
|
+
}
|
|
838
|
+
was_just_newline = scanner->just_newline = true;
|
|
839
|
+
lexer->mark_end(lexer);
|
|
840
|
+
lexer->result_symbol = NEWLINE;
|
|
841
|
+
return true;
|
|
842
|
+
}
|
|
843
|
+
else if (lexer->lookahead == '\\') {
|
|
844
|
+
lexer->mark_end(lexer);
|
|
845
|
+
skip(lexer);
|
|
846
|
+
if (lexer->lookahead == '\n') {
|
|
847
|
+
// Just ignore the newline
|
|
848
|
+
skip(lexer);
|
|
849
|
+
skip_ws(lexer);
|
|
850
|
+
} else {
|
|
851
|
+
// we consumed things we should not have
|
|
852
|
+
lexer->mark_end(lexer);
|
|
853
|
+
}
|
|
854
|
+
}
|
|
855
|
+
}
|
|
856
|
+
|
|
857
|
+
// Dedicated context-aware brace handler - handles closing braces for
|
|
858
|
+
// different contexts
|
|
859
|
+
if (valid_symbols[CLOSING_BRACE] && !in_error_recovery(valid_symbols)) {
|
|
860
|
+
context_type_t active = get_current_context(scanner);
|
|
861
|
+
|
|
862
|
+
skip_wsnl(lexer);
|
|
863
|
+
if (lexer->lookahead == '}') {
|
|
864
|
+
#if DEBUG
|
|
865
|
+
fprintf(stderr,
|
|
866
|
+
"SCANNER: Detected } closing brace, active "
|
|
867
|
+
"context=%d\n",
|
|
868
|
+
active);
|
|
869
|
+
#endif
|
|
870
|
+
if (active == CTX_PARAMETER ||
|
|
871
|
+
active == CTX_PARAMETER_PATTERN_SUFFIX ||
|
|
872
|
+
active == CTX_PARAMETER_PATTERN_SUBSTITUTE) {
|
|
873
|
+
#if DEBUG
|
|
874
|
+
fprintf(stderr,
|
|
875
|
+
"SCANNER: Exiting parameter expansion context on }\n");
|
|
876
|
+
#endif
|
|
877
|
+
exit_context(scanner, active);
|
|
878
|
+
lexer->result_symbol = CLOSING_BRACE;
|
|
879
|
+
advance(lexer);
|
|
880
|
+
lexer->mark_end(lexer);
|
|
881
|
+
return true;
|
|
882
|
+
} else if (active == CTX_BRACE_EXPANSION) {
|
|
883
|
+
#if DEBUG
|
|
884
|
+
fprintf(stderr,
|
|
885
|
+
"SCANNER: Exiting brace expression context on }\n");
|
|
886
|
+
#endif
|
|
887
|
+
exit_context(scanner, active);
|
|
888
|
+
lexer->result_symbol = CLOSING_BRACE;
|
|
889
|
+
advance(lexer);
|
|
890
|
+
lexer->mark_end(lexer);
|
|
891
|
+
return true;
|
|
892
|
+
} else if (active == CTX_COMPOUND) {
|
|
893
|
+
#if DEBUG
|
|
894
|
+
fprintf(stderr,
|
|
895
|
+
"SCANNER: Exiting compound expression context on }\n");
|
|
896
|
+
#endif
|
|
897
|
+
exit_context(scanner, active);
|
|
898
|
+
lexer->result_symbol = CLOSING_BRACE;
|
|
899
|
+
advance(lexer);
|
|
900
|
+
lexer->mark_end(lexer);
|
|
901
|
+
return true;
|
|
902
|
+
}
|
|
903
|
+
|
|
904
|
+
// Note: CTX_BRACE_EXPANSION closing braces are handled by grammar
|
|
905
|
+
// as token.immediate('}')
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
|
|
909
|
+
// Handle BARE_DOLLAR for parameter expansion: $ followed by {
|
|
910
|
+
if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols)) {
|
|
911
|
+
#if DEBUG
|
|
912
|
+
fprintf(stderr,
|
|
913
|
+
"SCANNER: Entering BARE_DOLLAR handler, lookahead='%c'\n",
|
|
914
|
+
lexer->lookahead);
|
|
915
|
+
#endif
|
|
916
|
+
|
|
917
|
+
// Only skip whitespace if we're starting with whitespace
|
|
918
|
+
// This preserves whitespace significance for concatenation
|
|
919
|
+
#if DEBUG
|
|
920
|
+
fprintf(stderr,
|
|
921
|
+
"SCANNER: BARE_DOLLAR whitespace check: "
|
|
922
|
+
"valid_symbols[CONCAT]=%d, lookahead='%c'\n",
|
|
923
|
+
valid_symbols[CONCAT], lexer->lookahead);
|
|
924
|
+
#endif
|
|
925
|
+
if (!valid_symbols[CONCAT] &&
|
|
926
|
+
(lexer->lookahead == ' ' || lexer->lookahead == '\t')) {
|
|
927
|
+
#if DEBUG
|
|
928
|
+
fprintf(stderr, "SCANNER: BARE_DOLLAR skipping whitespace\n");
|
|
929
|
+
#endif
|
|
930
|
+
while ((lexer->lookahead == ' ' || lexer->lookahead == '\t') &&
|
|
931
|
+
!lexer->eof(lexer)) {
|
|
932
|
+
skip(lexer);
|
|
933
|
+
}
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
if (lexer->lookahead == '$') {
|
|
937
|
+
#if DEBUG
|
|
938
|
+
fprintf(stderr, "SCANNER: Found $ character\n");
|
|
939
|
+
#endif
|
|
940
|
+
advance(lexer);
|
|
941
|
+
if (lexer->lookahead != '\"') {
|
|
942
|
+
lexer->mark_end(lexer);
|
|
943
|
+
lexer->result_symbol = BARE_DOLLAR;
|
|
944
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
945
|
+
true;
|
|
946
|
+
return true;
|
|
947
|
+
}
|
|
948
|
+
#if DEBUG
|
|
949
|
+
fprintf(stderr, "SCANNER: Not ${...} pattern, returning false\n");
|
|
950
|
+
#endif
|
|
951
|
+
}
|
|
952
|
+
#if DEBUG
|
|
953
|
+
fprintf(stderr, "SCANNER: No $ character found, continuing\n");
|
|
954
|
+
#endif
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
// Must be after BARE_DOLLAR to avoid conflict
|
|
958
|
+
// Handle PEEK_BARE_DOLLAR for concatenation: check if next non-whitespace
|
|
959
|
+
// token is $ without consuming
|
|
960
|
+
if (valid_symbols[PEEK_BARE_DOLLAR] && !in_error_recovery(valid_symbols)) {
|
|
961
|
+
#if DEBUG
|
|
962
|
+
fprintf(stderr,
|
|
963
|
+
"SCANNER: Entering PEEK_BARE_DOLLAR handler, lookahead='%c'\n",
|
|
964
|
+
lexer->lookahead);
|
|
965
|
+
#endif
|
|
966
|
+
|
|
967
|
+
if (lexer->lookahead == '$') {
|
|
968
|
+
#if DEBUG
|
|
969
|
+
fprintf(stderr, "SCANNER: PEEK found $ character, returning "
|
|
970
|
+
"PEEK_BARE_DOLLAR\n");
|
|
971
|
+
#endif
|
|
972
|
+
lexer->result_symbol = PEEK_BARE_DOLLAR;
|
|
973
|
+
return true;
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
#if DEBUG
|
|
977
|
+
fprintf(stderr, "SCANNER: PEEK did not find $ character\n");
|
|
978
|
+
#endif
|
|
979
|
+
}
|
|
980
|
+
|
|
981
|
+
// Handle BRACE_START - if we're in parameter expansion context, this is
|
|
982
|
+
// part of ${
|
|
983
|
+
if (valid_symbols[BRACE_START] && !in_error_recovery(valid_symbols)) {
|
|
984
|
+
if (lexer->lookahead == '{') {
|
|
985
|
+
if (was_just_bare_dollar) {
|
|
986
|
+
advance(lexer);
|
|
987
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
988
|
+
false; // Reset flag
|
|
989
|
+
lexer->result_symbol = BRACE_START;
|
|
990
|
+
lexer->mark_end(lexer);
|
|
991
|
+
// This is ${...} - increment expansion depth
|
|
992
|
+
enter_context(scanner, CTX_PARAMETER);
|
|
993
|
+
return true;
|
|
994
|
+
}
|
|
995
|
+
}
|
|
996
|
+
// If not after $, we may need to consume spaces
|
|
997
|
+
skip_ws(lexer);
|
|
998
|
+
if (lexer->lookahead == '{') {
|
|
999
|
+
advance(lexer);
|
|
1000
|
+
lexer->result_symbol = BRACE_START;
|
|
1001
|
+
lexer->mark_end(lexer);
|
|
1002
|
+
// This is ${...} - increment expansion depth
|
|
1003
|
+
enter_context(scanner, CTX_COMPOUND);
|
|
1004
|
+
return true;
|
|
1005
|
+
}
|
|
1006
|
+
}
|
|
1007
|
+
|
|
1008
|
+
// Handle OPENING_PAREN after BARE_DOLLAR
|
|
1009
|
+
if ((valid_symbols[OPENING_PAREN] || valid_symbols[DOUBLE_OPENING_PAREN] ||
|
|
1010
|
+
valid_symbols[ZSH_EXTENDED_GLOB_FLAGS]) &&
|
|
1011
|
+
!in_error_recovery(valid_symbols)) {
|
|
1012
|
+
skip_ws(lexer);
|
|
1013
|
+
if (lexer->lookahead == '(') {
|
|
1014
|
+
advance(lexer);
|
|
1015
|
+
lexer->mark_end(lexer);
|
|
1016
|
+
|
|
1017
|
+
if (was_just_bare_dollar) {
|
|
1018
|
+
#if DEBUG
|
|
1019
|
+
fprintf(stderr, "SCANNER: Detected OPENING_PAREN after "
|
|
1020
|
+
"BARE_DOLLAR\n");
|
|
1021
|
+
#endif
|
|
1022
|
+
if (lexer->lookahead == '(' &&
|
|
1023
|
+
valid_symbols[DOUBLE_OPENING_PAREN]) {
|
|
1024
|
+
advance(lexer);
|
|
1025
|
+
lexer->mark_end(lexer);
|
|
1026
|
+
// This is $((...)) - increment arithmetic depth
|
|
1027
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
1028
|
+
false; // Reset flag
|
|
1029
|
+
enter_context(scanner, CTX_ARITHMETIC);
|
|
1030
|
+
lexer->result_symbol = DOUBLE_OPENING_PAREN;
|
|
1031
|
+
return true;
|
|
1032
|
+
} else if (valid_symbols[OPENING_PAREN]) {
|
|
1033
|
+
// This is $(...) - increment command_depth
|
|
1034
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
1035
|
+
false; // Reset flag
|
|
1036
|
+
enter_context(scanner, CTX_COMMAND);
|
|
1037
|
+
lexer->result_symbol = OPENING_PAREN;
|
|
1038
|
+
return true;
|
|
1039
|
+
}
|
|
1040
|
+
} else if (lexer->lookahead == '(' &&
|
|
1041
|
+
valid_symbols[DOUBLE_OPENING_PAREN]) {
|
|
1042
|
+
advance(lexer);
|
|
1043
|
+
lexer->mark_end(lexer);
|
|
1044
|
+
// This is ((...)) - increment arithmetic depth
|
|
1045
|
+
enter_context(scanner, CTX_ARITHMETIC);
|
|
1046
|
+
lexer->result_symbol = DOUBLE_OPENING_PAREN;
|
|
1047
|
+
return true;
|
|
1048
|
+
} else if (valid_symbols[OPENING_PAREN] ||
|
|
1049
|
+
valid_symbols[ZSH_EXTENDED_GLOB_FLAGS]) {
|
|
1050
|
+
// Handle ZSH_EXTENDED_GLOB_FLAGS - (#flags) patterns
|
|
1051
|
+
if (lexer->lookahead == '#' &&
|
|
1052
|
+
valid_symbols[ZSH_EXTENDED_GLOB_FLAGS]) {
|
|
1053
|
+
advance(lexer);
|
|
1054
|
+
|
|
1055
|
+
// Check for valid flag characters
|
|
1056
|
+
bool found_flags = false;
|
|
1057
|
+
while (
|
|
1058
|
+
lexer->lookahead &&
|
|
1059
|
+
(iswalnum(lexer->lookahead) ||
|
|
1060
|
+
lexer->lookahead == '.' || lexer->lookahead == 'i' ||
|
|
1061
|
+
lexer->lookahead == 'q' || lexer->lookahead == 'b' ||
|
|
1062
|
+
lexer->lookahead == 'm' || lexer->lookahead == 'n' ||
|
|
1063
|
+
lexer->lookahead == 's' || lexer->lookahead == 'B' ||
|
|
1064
|
+
lexer->lookahead == 'I' || lexer->lookahead == 'N' ||
|
|
1065
|
+
lexer->lookahead == 'U' || lexer->lookahead == 'X' ||
|
|
1066
|
+
lexer->lookahead == 'c' || lexer->lookahead == 'e' ||
|
|
1067
|
+
lexer->lookahead == 'l' || lexer->lookahead == 'f' ||
|
|
1068
|
+
lexer->lookahead == 'a' || lexer->lookahead == 'C' ||
|
|
1069
|
+
lexer->lookahead == 'o')) {
|
|
1070
|
+
found_flags = true;
|
|
1071
|
+
advance(lexer);
|
|
1072
|
+
}
|
|
1073
|
+
|
|
1074
|
+
if (found_flags && lexer->lookahead == ')') {
|
|
1075
|
+
advance(lexer);
|
|
1076
|
+
lexer->mark_end(lexer);
|
|
1077
|
+
lexer->result_symbol = ZSH_EXTENDED_GLOB_FLAGS;
|
|
1078
|
+
return true;
|
|
1079
|
+
}
|
|
1080
|
+
|
|
1081
|
+
// If we get here, it's not a valid glob flags pattern
|
|
1082
|
+
// Reset and let other tokens handle it
|
|
1083
|
+
return false;
|
|
1084
|
+
}
|
|
1085
|
+
if (valid_symbols[OPENING_PAREN]) {
|
|
1086
|
+
#if DEBUG
|
|
1087
|
+
fprintf(stderr, "SCANNER: Detected OPENING_PAREN NOT AFTER "
|
|
1088
|
+
"BARE_DOLLAR\n");
|
|
1089
|
+
#endif
|
|
1090
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
1091
|
+
false; // Reset flag
|
|
1092
|
+
lexer->mark_end(lexer);
|
|
1093
|
+
enter_context(scanner, CTX_COMMAND);
|
|
1094
|
+
lexer->result_symbol = OPENING_PAREN;
|
|
1095
|
+
return true;
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
1098
|
+
}
|
|
1099
|
+
}
|
|
1100
|
+
|
|
1101
|
+
if ((valid_symbols[OPENING_BRACKET] || valid_symbols[TEST_COMMAND_START]) &&
|
|
1102
|
+
!in_error_recovery(valid_symbols)) {
|
|
1103
|
+
#if DEBUG
|
|
1104
|
+
fprintf(stderr,
|
|
1105
|
+
"DEBUG: CHECKING TEST_COMMAND_START=%d OPENING_BRACKET=%d "
|
|
1106
|
+
"lookahead=%c\n",
|
|
1107
|
+
valid_symbols[TEST_COMMAND_START],
|
|
1108
|
+
valid_symbols[OPENING_BRACKET], lexer->lookahead);
|
|
1109
|
+
#endif
|
|
1110
|
+
skip_wsnl(lexer);
|
|
1111
|
+
#if DEBUG
|
|
1112
|
+
fprintf(stderr,
|
|
1113
|
+
"DEBUG: CHECKING TEST_COMMAND_START=%d OPENING_BRACKET=%d "
|
|
1114
|
+
"lookahead=%c\n",
|
|
1115
|
+
valid_symbols[TEST_COMMAND_START],
|
|
1116
|
+
valid_symbols[OPENING_BRACKET], lexer->lookahead);
|
|
1117
|
+
#endif
|
|
1118
|
+
if (lexer->lookahead == '[') {
|
|
1119
|
+
advance(lexer);
|
|
1120
|
+
|
|
1121
|
+
if (lexer->lookahead == '[' && valid_symbols[TEST_COMMAND_START]) {
|
|
1122
|
+
advance(lexer);
|
|
1123
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
1124
|
+
false; // Reset flag
|
|
1125
|
+
lexer->result_symbol = TEST_COMMAND_START;
|
|
1126
|
+
lexer->mark_end(lexer);
|
|
1127
|
+
|
|
1128
|
+
#if DEBUG
|
|
1129
|
+
fprintf(stderr, "DEBUG: Detected TEST_COMMAND_START [[\n");
|
|
1130
|
+
#endif
|
|
1131
|
+
// Enter test command context
|
|
1132
|
+
enter_context(scanner, CTX_TEST);
|
|
1133
|
+
return true;
|
|
1134
|
+
} else if (was_just_bare_dollar && valid_symbols[OPENING_BRACKET]) {
|
|
1135
|
+
// This is $[
|
|
1136
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
1137
|
+
false; // Reset flag
|
|
1138
|
+
lexer->result_symbol = OPENING_BRACKET;
|
|
1139
|
+
lexer->mark_end(lexer);
|
|
1140
|
+
|
|
1141
|
+
#if DEBUG
|
|
1142
|
+
fprintf(stderr, "DEBUG: Detected OPENING_BRACKET $[\n");
|
|
1143
|
+
#endif
|
|
1144
|
+
enter_context(scanner, CTX_ARITHMETIC);
|
|
1145
|
+
return true;
|
|
1146
|
+
} else if (valid_symbols[OPENING_BRACKET]) {
|
|
1147
|
+
// This is single [
|
|
1148
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
1149
|
+
false; // Reset flag
|
|
1150
|
+
lexer->result_symbol = OPENING_BRACKET;
|
|
1151
|
+
lexer->mark_end(lexer);
|
|
1152
|
+
|
|
1153
|
+
#if DEBUG
|
|
1154
|
+
fprintf(stderr, "DEBUG: Detected OPENING_BRACKET [\n");
|
|
1155
|
+
#endif
|
|
1156
|
+
return true;
|
|
1157
|
+
}
|
|
1158
|
+
}
|
|
1159
|
+
}
|
|
1160
|
+
|
|
1161
|
+
// Handle TEST_COMMAND_END ]]
|
|
1162
|
+
if ((valid_symbols[TEST_COMMAND_END] || valid_symbols[CLOSING_BRACKET]) &&
|
|
1163
|
+
!in_error_recovery(valid_symbols)) {
|
|
1164
|
+
skip_ws(lexer);
|
|
1165
|
+
if (lexer->lookahead == ']') {
|
|
1166
|
+
advance(lexer);
|
|
1167
|
+
if (lexer->lookahead == ']' && valid_symbols[TEST_COMMAND_END]) {
|
|
1168
|
+
advance(lexer);
|
|
1169
|
+
lexer->result_symbol = TEST_COMMAND_END;
|
|
1170
|
+
lexer->mark_end(lexer);
|
|
1171
|
+
|
|
1172
|
+
#if DEBUG
|
|
1173
|
+
fprintf(stderr, "DEBUG: Detected TEST_COMMAND_END ]]\n");
|
|
1174
|
+
#endif
|
|
1175
|
+
// Exit test command context
|
|
1176
|
+
exit_context(scanner, CTX_TEST);
|
|
1177
|
+
return true;
|
|
1178
|
+
} else if (valid_symbols[CLOSING_BRACKET]) {
|
|
1179
|
+
// This is single ]
|
|
1180
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
1181
|
+
false; // Reset flag
|
|
1182
|
+
lexer->result_symbol = CLOSING_BRACKET;
|
|
1183
|
+
lexer->mark_end(lexer);
|
|
1184
|
+
|
|
1185
|
+
#if DEBUG
|
|
1186
|
+
fprintf(stderr, "DEBUG: Detected CLOSING_BRACKET ]\n");
|
|
1187
|
+
#endif
|
|
1188
|
+
return true;
|
|
1189
|
+
}
|
|
1190
|
+
// If only one ], don't consume it - let normal parsing handle it
|
|
1191
|
+
return false;
|
|
1192
|
+
}
|
|
1193
|
+
}
|
|
1194
|
+
|
|
1195
|
+
if ((valid_symbols[CLOSING_PAREN] || valid_symbols[CLOSING_DOUBLE_PAREN]) &&
|
|
1196
|
+
!in_error_recovery(valid_symbols)) {
|
|
1197
|
+
skip_ws(lexer);
|
|
1198
|
+
if (lexer->lookahead == ')') {
|
|
1199
|
+
advance(lexer);
|
|
1200
|
+
|
|
1201
|
+
if (lexer->lookahead == ')' &&
|
|
1202
|
+
valid_symbols[CLOSING_DOUBLE_PAREN]) {
|
|
1203
|
+
advance(lexer);
|
|
1204
|
+
lexer->result_symbol = CLOSING_DOUBLE_PAREN;
|
|
1205
|
+
lexer->mark_end(lexer);
|
|
1206
|
+
|
|
1207
|
+
// Exit test command context
|
|
1208
|
+
exit_context(scanner, CTX_ARITHMETIC);
|
|
1209
|
+
return true;
|
|
1210
|
+
} else if (valid_symbols[CLOSING_PAREN]) {
|
|
1211
|
+
// This is single )
|
|
1212
|
+
lexer->result_symbol = CLOSING_PAREN;
|
|
1213
|
+
lexer->mark_end(lexer);
|
|
1214
|
+
|
|
1215
|
+
// Exit relevant context
|
|
1216
|
+
|
|
1217
|
+
if (get_current_context(scanner) == CTX_COMMAND) {
|
|
1218
|
+
exit_context(scanner, CTX_COMMAND);
|
|
1219
|
+
} else if (get_current_context(scanner) == CTX_ARITHMETIC) {
|
|
1220
|
+
exit_context(scanner, CTX_ARITHMETIC);
|
|
1221
|
+
}
|
|
1222
|
+
return true;
|
|
1223
|
+
}
|
|
1224
|
+
// If only one ], don't consume it - let normal parsing handle it
|
|
1225
|
+
return false;
|
|
1226
|
+
}
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
// Handle PATTERN_START - emitted after pattern operators in parameter
|
|
1230
|
+
// expansions
|
|
1231
|
+
if (valid_symbols[PATTERN_START] && !in_error_recovery(valid_symbols)) {
|
|
1232
|
+
if (get_current_context(scanner) == CTX_PARAMETER &&
|
|
1233
|
+
lexer->lookahead !=
|
|
1234
|
+
'}') { // Don't emit if expansion is about to end
|
|
1235
|
+
|
|
1236
|
+
// Determine pattern context based on what type of pattern we're
|
|
1237
|
+
// entering
|
|
1238
|
+
#if DEBUG
|
|
1239
|
+
fprintf(stderr,
|
|
1240
|
+
"DEBUG: PATTERN_START emitting for substitution, "
|
|
1241
|
+
"lookahead='%c'\n",
|
|
1242
|
+
lexer->lookahead);
|
|
1243
|
+
#endif
|
|
1244
|
+
exit_context(scanner, CTX_PARAMETER);
|
|
1245
|
+
enter_context(scanner, CTX_PARAMETER_PATTERN_SUBSTITUTE);
|
|
1246
|
+
#if DEBUG
|
|
1247
|
+
fprintf(stderr, "DEBUG: Context after transition: %d\n",
|
|
1248
|
+
get_current_context(scanner));
|
|
1249
|
+
#endif
|
|
1250
|
+
lexer->result_symbol = PATTERN_START;
|
|
1251
|
+
lexer->mark_end(lexer);
|
|
1252
|
+
return true;
|
|
1253
|
+
}
|
|
1254
|
+
}
|
|
1255
|
+
|
|
1256
|
+
// Handle PATTERN_SUFFIX_START - emitted after pattern operators in
|
|
1257
|
+
// parameter expansions
|
|
1258
|
+
if (valid_symbols[PATTERN_SUFFIX_START] &&
|
|
1259
|
+
!in_error_recovery(valid_symbols)) {
|
|
1260
|
+
if (get_current_context(scanner) == CTX_PARAMETER &&
|
|
1261
|
+
lexer->lookahead !=
|
|
1262
|
+
'}') { // Don't emit if expansion is about to end
|
|
1263
|
+
|
|
1264
|
+
// Determine pattern context based on what type of pattern we're
|
|
1265
|
+
// entering % # patterns are suffix/prefix removal
|
|
1266
|
+
#if DEBUG
|
|
1267
|
+
fprintf(stderr,
|
|
1268
|
+
"DEBUG: PATTERN_START emitting for suffix/prefix, "
|
|
1269
|
+
"lookahead='%c'\n",
|
|
1270
|
+
lexer->lookahead);
|
|
1271
|
+
#endif
|
|
1272
|
+
|
|
1273
|
+
exit_context(scanner, CTX_PARAMETER);
|
|
1274
|
+
enter_context(scanner, CTX_PARAMETER_PATTERN_SUFFIX);
|
|
1275
|
+
#if DEBUG
|
|
1276
|
+
fprintf(stderr, "DEBUG: Context after transition: %d\n",
|
|
1277
|
+
get_current_context(scanner));
|
|
1278
|
+
#endif
|
|
1279
|
+
lexer->result_symbol = PATTERN_SUFFIX_START;
|
|
1280
|
+
lexer->mark_end(lexer);
|
|
1281
|
+
return true;
|
|
1282
|
+
}
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
// Handle hash operations in parameter expansion context
|
|
1286
|
+
if (in_parameter_expansion(scanner) && lexer->lookahead == '#' &&
|
|
1287
|
+
(valid_symbols[HASH_PATTERN] || valid_symbols[DOUBLE_HASH_PATTERN]) &&
|
|
1288
|
+
!in_error_recovery(valid_symbols)) {
|
|
1289
|
+
#if DEBUG
|
|
1290
|
+
fprintf(stderr, "SCANNER: Hash operation detected\n");
|
|
1291
|
+
#endif
|
|
1292
|
+
advance(lexer); // consume first #
|
|
1293
|
+
|
|
1294
|
+
if (lexer->lookahead == '#') {
|
|
1295
|
+
// Double hash: ##pattern
|
|
1296
|
+
if (valid_symbols[DOUBLE_HASH_PATTERN]) {
|
|
1297
|
+
#if DEBUG
|
|
1298
|
+
fprintf(stderr, "SCANNER: Returning DOUBLE_HASH_PATTERN\n");
|
|
1299
|
+
#endif
|
|
1300
|
+
advance(lexer); // consume second #
|
|
1301
|
+
lexer->result_symbol = DOUBLE_HASH_PATTERN;
|
|
1302
|
+
lexer->mark_end(lexer);
|
|
1303
|
+
return true;
|
|
1304
|
+
}
|
|
1305
|
+
} else {
|
|
1306
|
+
// Single hash: #pattern
|
|
1307
|
+
if (valid_symbols[HASH_PATTERN]) {
|
|
1308
|
+
#if DEBUG
|
|
1309
|
+
fprintf(stderr, "SCANNER: Returning HASH_PATTERN\n");
|
|
1310
|
+
#endif
|
|
1311
|
+
lexer->result_symbol = HASH_PATTERN;
|
|
1312
|
+
lexer->mark_end(lexer);
|
|
1313
|
+
return true;
|
|
1314
|
+
}
|
|
1315
|
+
}
|
|
1316
|
+
#if DEBUG
|
|
1317
|
+
fprintf(stderr, "SCANNER: Hash operation not matched\n");
|
|
1318
|
+
#endif
|
|
1319
|
+
return false;
|
|
1320
|
+
}
|
|
1321
|
+
|
|
1322
|
+
// Array operators: ${var[*]} and ${var[@]}
|
|
1323
|
+
if ((valid_symbols[ARRAY_STAR_TOKEN] || valid_symbols[ARRAY_AT_TOKEN]) &&
|
|
1324
|
+
!in_error_recovery(valid_symbols)) {
|
|
1325
|
+
if (lexer->lookahead == '*' && valid_symbols[ARRAY_STAR_TOKEN] &&
|
|
1326
|
+
!valid_symbols[REGEX] && !valid_symbols[REGEX_NO_SLASH] &&
|
|
1327
|
+
!valid_symbols[REGEX_NO_SPACE]) {
|
|
1328
|
+
lexer->result_symbol = ARRAY_STAR_TOKEN;
|
|
1329
|
+
advance(lexer);
|
|
1330
|
+
lexer->mark_end(lexer);
|
|
1331
|
+
return true;
|
|
1332
|
+
}
|
|
1333
|
+
if (lexer->lookahead == '@' && valid_symbols[ARRAY_AT_TOKEN]) {
|
|
1334
|
+
lexer->result_symbol = ARRAY_AT_TOKEN;
|
|
1335
|
+
advance(lexer);
|
|
1336
|
+
lexer->mark_end(lexer);
|
|
1337
|
+
return true;
|
|
1338
|
+
}
|
|
1339
|
+
}
|
|
1340
|
+
|
|
1341
|
+
if (valid_symbols[EMPTY_VALUE]) {
|
|
1342
|
+
if (iswspace(lexer->lookahead) || lexer->eof(lexer) ||
|
|
1343
|
+
lexer->lookahead == ';' || lexer->lookahead == '&' ||
|
|
1344
|
+
lexer->lookahead == '}') {
|
|
1345
|
+
lexer->mark_end(lexer);
|
|
1346
|
+
lexer->result_symbol = EMPTY_VALUE;
|
|
1347
|
+
return true;
|
|
1348
|
+
}
|
|
1349
|
+
}
|
|
1350
|
+
|
|
1351
|
+
if ((valid_symbols[HEREDOC_BODY_BEGINNING] ||
|
|
1352
|
+
valid_symbols[SIMPLE_HEREDOC_BODY]) &&
|
|
1353
|
+
scanner->heredocs.size > 0 &&
|
|
1354
|
+
!array_back(&scanner->heredocs)->started &&
|
|
1355
|
+
!in_error_recovery(valid_symbols)) {
|
|
1356
|
+
return scan_heredoc_content(scanner, lexer, HEREDOC_BODY_BEGINNING,
|
|
1357
|
+
SIMPLE_HEREDOC_BODY);
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
if (valid_symbols[HEREDOC_END] && scanner->heredocs.size > 0) {
|
|
1361
|
+
Heredoc *heredoc = array_back(&scanner->heredocs);
|
|
1362
|
+
if (scan_heredoc_end_identifier(heredoc, lexer)) {
|
|
1363
|
+
array_delete(&heredoc->current_leading_word);
|
|
1364
|
+
array_delete(&heredoc->delimiter);
|
|
1365
|
+
array_pop(&scanner->heredocs);
|
|
1366
|
+
lexer->result_symbol = HEREDOC_END;
|
|
1367
|
+
return true;
|
|
1368
|
+
}
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1371
|
+
if (valid_symbols[HEREDOC_CONTENT] && scanner->heredocs.size > 0 &&
|
|
1372
|
+
array_back(&scanner->heredocs)->started &&
|
|
1373
|
+
!in_error_recovery(valid_symbols)) {
|
|
1374
|
+
return scan_heredoc_content(scanner, lexer, HEREDOC_CONTENT,
|
|
1375
|
+
HEREDOC_END);
|
|
1376
|
+
}
|
|
1377
|
+
|
|
1378
|
+
if (valid_symbols[HEREDOC_START] && !in_error_recovery(valid_symbols) &&
|
|
1379
|
+
scanner->heredocs.size > 0) {
|
|
1380
|
+
#if DEBUG
|
|
1381
|
+
fprintf(stderr,
|
|
1382
|
+
"DEBUG: HEREDOC_START check - heredocs.size=%u, "
|
|
1383
|
+
"in_error_recovery=%s\n",
|
|
1384
|
+
scanner->heredocs.size,
|
|
1385
|
+
in_error_recovery(valid_symbols) ? "true" : "false");
|
|
1386
|
+
#endif
|
|
1387
|
+
return scan_heredoc_start(array_back(&scanner->heredocs), lexer);
|
|
1388
|
+
}
|
|
1389
|
+
|
|
1390
|
+
if (valid_symbols[TEST_OPERATOR] && !valid_symbols[EXPANSION_WORD]) {
|
|
1391
|
+
skip_ws(lexer);
|
|
1392
|
+
if (lexer->lookahead == '\\') {
|
|
1393
|
+
if (valid_symbols[EXTGLOB_PATTERN]) {
|
|
1394
|
+
goto extglob_pattern;
|
|
1395
|
+
}
|
|
1396
|
+
if (valid_symbols[REGEX_NO_SPACE]) {
|
|
1397
|
+
goto regex;
|
|
1398
|
+
}
|
|
1399
|
+
skip(lexer);
|
|
1400
|
+
|
|
1401
|
+
if (lexer->eof(lexer)) {
|
|
1402
|
+
return false;
|
|
1403
|
+
}
|
|
1404
|
+
|
|
1405
|
+
if (lexer->lookahead == '\r') {
|
|
1406
|
+
skip(lexer);
|
|
1407
|
+
if (lexer->lookahead == '\n') {
|
|
1408
|
+
skip(lexer);
|
|
1409
|
+
}
|
|
1410
|
+
} else if (lexer->lookahead == '\n') {
|
|
1411
|
+
skip(lexer);
|
|
1412
|
+
} else {
|
|
1413
|
+
return false;
|
|
1414
|
+
}
|
|
1415
|
+
|
|
1416
|
+
while (iswspace(lexer->lookahead)) {
|
|
1417
|
+
skip(lexer);
|
|
1418
|
+
}
|
|
1419
|
+
}
|
|
1420
|
+
|
|
1421
|
+
if (lexer->lookahead == '\n' && !valid_symbols[NEWLINE]) {
|
|
1422
|
+
skip(lexer);
|
|
1423
|
+
|
|
1424
|
+
while (iswspace(lexer->lookahead)) {
|
|
1425
|
+
skip(lexer);
|
|
1426
|
+
}
|
|
1427
|
+
}
|
|
1428
|
+
|
|
1429
|
+
if (lexer->lookahead == '-') {
|
|
1430
|
+
advance(lexer);
|
|
1431
|
+
|
|
1432
|
+
bool advanced_once = false;
|
|
1433
|
+
while (iswalpha(lexer->lookahead)) {
|
|
1434
|
+
advanced_once = true;
|
|
1435
|
+
advance(lexer);
|
|
1436
|
+
}
|
|
1437
|
+
|
|
1438
|
+
if (iswspace(lexer->lookahead) && advanced_once) {
|
|
1439
|
+
lexer->mark_end(lexer);
|
|
1440
|
+
advance(lexer);
|
|
1441
|
+
context_type_t ctx = get_current_context(scanner);
|
|
1442
|
+
if (lexer->lookahead == '}' &&
|
|
1443
|
+
(ctx == CTX_PARAMETER ||
|
|
1444
|
+
ctx == CTX_PARAMETER_PATTERN_SUFFIX ||
|
|
1445
|
+
ctx == CTX_PARAMETER_PATTERN_SUBSTITUTE)) {
|
|
1446
|
+
if (valid_symbols[EXPANSION_WORD]) {
|
|
1447
|
+
lexer->mark_end(lexer);
|
|
1448
|
+
lexer->result_symbol = EXPANSION_WORD;
|
|
1449
|
+
return true;
|
|
1450
|
+
}
|
|
1451
|
+
return false;
|
|
1452
|
+
}
|
|
1453
|
+
lexer->result_symbol = TEST_OPERATOR;
|
|
1454
|
+
return true;
|
|
1455
|
+
}
|
|
1456
|
+
if (iswspace(lexer->lookahead) && valid_symbols[EXTGLOB_PATTERN]) {
|
|
1457
|
+
lexer->result_symbol = EXTGLOB_PATTERN;
|
|
1458
|
+
return true;
|
|
1459
|
+
}
|
|
1460
|
+
}
|
|
1461
|
+
|
|
1462
|
+
if (valid_symbols[RAW_DOLLAR] && !in_error_recovery(valid_symbols) &&
|
|
1463
|
+
scan_raw_dollar(lexer, valid_symbols)) {
|
|
1464
|
+
return true;
|
|
1465
|
+
}
|
|
1466
|
+
}
|
|
1467
|
+
|
|
1468
|
+
if (valid_symbols[SIMPLE_VARIABLE_NAME] &&
|
|
1469
|
+
!in_error_recovery(valid_symbols)) {
|
|
1470
|
+
bool in_param_expand = in_parameter_expansion_context(scanner);
|
|
1471
|
+
|
|
1472
|
+
#if DEBUG
|
|
1473
|
+
fprintf(stderr, "SCANNER: trying SIMPLE_VARIABLE_NAME\n");
|
|
1474
|
+
#endif
|
|
1475
|
+
|
|
1476
|
+
skip_ws(lexer);
|
|
1477
|
+
if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') {
|
|
1478
|
+
int consumed = 0;
|
|
1479
|
+
while (iswalnum(lexer->lookahead) || lexer->lookahead == '_') {
|
|
1480
|
+
advance(lexer);
|
|
1481
|
+
consumed++;
|
|
1482
|
+
}
|
|
1483
|
+
|
|
1484
|
+
if (consumed > 0) {
|
|
1485
|
+
lexer->mark_end(lexer);
|
|
1486
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
1487
|
+
false;
|
|
1488
|
+
scanner->just_returned_variable_name = true;
|
|
1489
|
+
lexer->result_symbol = SIMPLE_VARIABLE_NAME;
|
|
1490
|
+
#if DEBUG
|
|
1491
|
+
fprintf(stderr, "SCANNER: SIMPLE_VARIABLE_NAME found\n");
|
|
1492
|
+
#endif
|
|
1493
|
+
return true;
|
|
1494
|
+
}
|
|
1495
|
+
}
|
|
1496
|
+
}
|
|
1497
|
+
|
|
1498
|
+
if (valid_symbols[SPECIAL_VARIABLE_NAME] &&
|
|
1499
|
+
!in_error_recovery(valid_symbols)) {
|
|
1500
|
+
// '*', '@', '?', '!', '#', '-', '$', '0', '_'
|
|
1501
|
+
skip_ws(lexer);
|
|
1502
|
+
bool in_param_expand = in_parameter_expansion_context(scanner);
|
|
1503
|
+
#if DEBUG
|
|
1504
|
+
if (in_param_expand) {
|
|
1505
|
+
if (lexer->lookahead == '!' || lexer->lookahead == '#')
|
|
1506
|
+
fprintf(stderr, "SCANNER: skipping flag chars as part of "
|
|
1507
|
+
"SPECIAL_VARIABLE_NAME\n");
|
|
1508
|
+
}
|
|
1509
|
+
#endif
|
|
1510
|
+
if (lexer->lookahead == '*' || lexer->lookahead == '@' ||
|
|
1511
|
+
lexer->lookahead == '?' || lexer->lookahead == '-' ||
|
|
1512
|
+
(lexer->lookahead == '!' && !in_param_expand) ||
|
|
1513
|
+
(lexer->lookahead == '#' && !in_param_expand) ||
|
|
1514
|
+
lexer->lookahead == '$' || lexer->lookahead == '_' ||
|
|
1515
|
+
iswdigit(lexer->lookahead)) {
|
|
1516
|
+
advance(lexer);
|
|
1517
|
+
lexer->mark_end(lexer);
|
|
1518
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar = false;
|
|
1519
|
+
was_just_variable_name = scanner->just_returned_variable_name =
|
|
1520
|
+
true;
|
|
1521
|
+
lexer->result_symbol = SPECIAL_VARIABLE_NAME;
|
|
1522
|
+
return true;
|
|
1523
|
+
}
|
|
1524
|
+
}
|
|
1525
|
+
|
|
1526
|
+
if ((valid_symbols[VARIABLE_NAME] || valid_symbols[FILE_DESCRIPTOR] ||
|
|
1527
|
+
valid_symbols[HEREDOC_ARROW]) &&
|
|
1528
|
+
!valid_symbols[REGEX_NO_SLASH] && !in_error_recovery(valid_symbols)) {
|
|
1529
|
+
for (;;) {
|
|
1530
|
+
if ((lexer->lookahead == ' ' || lexer->lookahead == '\t' ||
|
|
1531
|
+
lexer->lookahead == '\r' ||
|
|
1532
|
+
(lexer->lookahead == '\n' && !valid_symbols[NEWLINE])) &&
|
|
1533
|
+
!valid_symbols[EXPANSION_WORD] && !valid_symbols[CONCAT]) {
|
|
1534
|
+
#if DEBUG
|
|
1535
|
+
fprintf(stderr, "SCANNER: VARIABLE_NAME skipped ws\n");
|
|
1536
|
+
#endif
|
|
1537
|
+
// Only skip whitespace if CONCAT is not valid
|
|
1538
|
+
skip(lexer);
|
|
1539
|
+
} else if (lexer->lookahead == '\\') {
|
|
1540
|
+
skip(lexer);
|
|
1541
|
+
|
|
1542
|
+
if (lexer->eof(lexer)) {
|
|
1543
|
+
lexer->mark_end(lexer);
|
|
1544
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
1545
|
+
false; // Reset flag
|
|
1546
|
+
#if DEBUG
|
|
1547
|
+
fprintf(stderr, "SCANNER: VARIABLE_NAME after \\\n");
|
|
1548
|
+
#endif
|
|
1549
|
+
lexer->result_symbol = VARIABLE_NAME;
|
|
1550
|
+
scanner->just_returned_variable_name = true;
|
|
1551
|
+
return true;
|
|
1552
|
+
}
|
|
1553
|
+
|
|
1554
|
+
if (lexer->lookahead == '\r') {
|
|
1555
|
+
skip(lexer);
|
|
1556
|
+
}
|
|
1557
|
+
if (lexer->lookahead == '\n') {
|
|
1558
|
+
skip(lexer);
|
|
1559
|
+
} else {
|
|
1560
|
+
if (lexer->lookahead == '\\' &&
|
|
1561
|
+
valid_symbols[EXPANSION_WORD]) {
|
|
1562
|
+
goto expansion_word;
|
|
1563
|
+
}
|
|
1564
|
+
return false;
|
|
1565
|
+
}
|
|
1566
|
+
} else {
|
|
1567
|
+
break;
|
|
1568
|
+
}
|
|
1569
|
+
}
|
|
1570
|
+
|
|
1571
|
+
// no '*', '@', '?', '-', '$', '0', '_', '#'
|
|
1572
|
+
if (!valid_symbols[EXPANSION_WORD] &&
|
|
1573
|
+
(lexer->lookahead == '*' || lexer->lookahead == '@' ||
|
|
1574
|
+
lexer->lookahead == '?' || lexer->lookahead == '-' ||
|
|
1575
|
+
lexer->lookahead == '0' || lexer->lookahead == '_' ||
|
|
1576
|
+
lexer->lookahead == '#')) {
|
|
1577
|
+
lexer->mark_end(lexer);
|
|
1578
|
+
advance(lexer);
|
|
1579
|
+
if (lexer->lookahead == '=' || lexer->lookahead == '[' ||
|
|
1580
|
+
lexer->lookahead == ':' || lexer->lookahead == '-' ||
|
|
1581
|
+
lexer->lookahead == '%' || lexer->lookahead == '/') {
|
|
1582
|
+
return false;
|
|
1583
|
+
}
|
|
1584
|
+
if (valid_symbols[EXTGLOB_PATTERN] && iswspace(lexer->lookahead)) {
|
|
1585
|
+
lexer->mark_end(lexer);
|
|
1586
|
+
lexer->result_symbol = EXTGLOB_PATTERN;
|
|
1587
|
+
return true;
|
|
1588
|
+
}
|
|
1589
|
+
}
|
|
1590
|
+
|
|
1591
|
+
if (valid_symbols[HEREDOC_ARROW] && lexer->lookahead == '<') {
|
|
1592
|
+
advance(lexer);
|
|
1593
|
+
if (lexer->lookahead == '<') {
|
|
1594
|
+
advance(lexer);
|
|
1595
|
+
if (lexer->lookahead == '-') {
|
|
1596
|
+
advance(lexer);
|
|
1597
|
+
Heredoc heredoc = heredoc_new();
|
|
1598
|
+
heredoc.allows_indent = true;
|
|
1599
|
+
array_push(&scanner->heredocs, heredoc);
|
|
1600
|
+
#if DEBUG
|
|
1601
|
+
fprintf(stderr,
|
|
1602
|
+
"DEBUG: HEREDOC_ARROW_DASH - added heredoc, size "
|
|
1603
|
+
"now=%u\n",
|
|
1604
|
+
scanner->heredocs.size);
|
|
1605
|
+
#endif
|
|
1606
|
+
lexer->result_symbol = HEREDOC_ARROW_DASH;
|
|
1607
|
+
} else if (lexer->lookahead == '<' || lexer->lookahead == '=') {
|
|
1608
|
+
return false;
|
|
1609
|
+
} else {
|
|
1610
|
+
Heredoc heredoc = heredoc_new();
|
|
1611
|
+
array_push(&scanner->heredocs, heredoc);
|
|
1612
|
+
#if DEBUG
|
|
1613
|
+
fprintf(
|
|
1614
|
+
stderr,
|
|
1615
|
+
"DEBUG: HEREDOC_ARROW - added heredoc, size now=%u\n",
|
|
1616
|
+
scanner->heredocs.size);
|
|
1617
|
+
#endif
|
|
1618
|
+
lexer->result_symbol = HEREDOC_ARROW;
|
|
1619
|
+
}
|
|
1620
|
+
return true;
|
|
1621
|
+
}
|
|
1622
|
+
return false;
|
|
1623
|
+
}
|
|
1624
|
+
|
|
1625
|
+
bool is_number = true;
|
|
1626
|
+
if (iswdigit(lexer->lookahead)) {
|
|
1627
|
+
advance(lexer);
|
|
1628
|
+
} else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') {
|
|
1629
|
+
is_number = false;
|
|
1630
|
+
advance(lexer);
|
|
1631
|
+
} else {
|
|
1632
|
+
if (lexer->lookahead == '{') {
|
|
1633
|
+
goto brace_start;
|
|
1634
|
+
}
|
|
1635
|
+
if (valid_symbols[EXPANSION_WORD]) {
|
|
1636
|
+
goto expansion_word;
|
|
1637
|
+
}
|
|
1638
|
+
if (valid_symbols[EXTGLOB_PATTERN]) {
|
|
1639
|
+
goto extglob_pattern;
|
|
1640
|
+
}
|
|
1641
|
+
return false;
|
|
1642
|
+
}
|
|
1643
|
+
|
|
1644
|
+
for (;;) {
|
|
1645
|
+
if (iswdigit(lexer->lookahead)) {
|
|
1646
|
+
advance(lexer);
|
|
1647
|
+
} else if (iswalpha(lexer->lookahead) || lexer->lookahead == '_') {
|
|
1648
|
+
is_number = false;
|
|
1649
|
+
advance(lexer);
|
|
1650
|
+
} else {
|
|
1651
|
+
break;
|
|
1652
|
+
}
|
|
1653
|
+
}
|
|
1654
|
+
|
|
1655
|
+
if (is_number && valid_symbols[FILE_DESCRIPTOR] &&
|
|
1656
|
+
(lexer->lookahead == '>' || lexer->lookahead == '<')) {
|
|
1657
|
+
lexer->result_symbol = FILE_DESCRIPTOR;
|
|
1658
|
+
return true;
|
|
1659
|
+
}
|
|
1660
|
+
|
|
1661
|
+
if (valid_symbols[VARIABLE_NAME]) {
|
|
1662
|
+
if (lexer->lookahead == '+') {
|
|
1663
|
+
lexer->mark_end(lexer);
|
|
1664
|
+
advance(lexer);
|
|
1665
|
+
context_type_t ctx = get_current_context(scanner);
|
|
1666
|
+
if (lexer->lookahead == '=' || lexer->lookahead == ':' ||
|
|
1667
|
+
(ctx == CTX_PARAMETER ||
|
|
1668
|
+
ctx == CTX_PARAMETER_PATTERN_SUFFIX ||
|
|
1669
|
+
ctx == CTX_PARAMETER_PATTERN_SUBSTITUTE)) {
|
|
1670
|
+
#if DEBUG
|
|
1671
|
+
fprintf(stderr,
|
|
1672
|
+
"SCANNER: VARIABLE_NAME after + operator\n");
|
|
1673
|
+
#endif
|
|
1674
|
+
lexer->result_symbol = VARIABLE_NAME;
|
|
1675
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
1676
|
+
false;
|
|
1677
|
+
scanner->just_returned_variable_name = true;
|
|
1678
|
+
return true;
|
|
1679
|
+
}
|
|
1680
|
+
return false;
|
|
1681
|
+
}
|
|
1682
|
+
if (lexer->lookahead == '/') {
|
|
1683
|
+
return false;
|
|
1684
|
+
}
|
|
1685
|
+
context_type_t ctx = get_current_context(scanner);
|
|
1686
|
+
if (lexer->lookahead == '=' || lexer->lookahead == '[' ||
|
|
1687
|
+
lexer->lookahead == '%' ||
|
|
1688
|
+
(lexer->lookahead == '#' && !is_number) ||
|
|
1689
|
+
lexer->lookahead == '@' ||
|
|
1690
|
+
(lexer->lookahead == '-' &&
|
|
1691
|
+
(ctx == CTX_PARAMETER || ctx == CTX_PARAMETER_PATTERN_SUFFIX ||
|
|
1692
|
+
ctx == CTX_PARAMETER_PATTERN_SUBSTITUTE))) {
|
|
1693
|
+
lexer->mark_end(lexer);
|
|
1694
|
+
#if DEBUG
|
|
1695
|
+
fprintf(stderr, "SCANNER: VARIABLE_NAME after =\n");
|
|
1696
|
+
#endif
|
|
1697
|
+
lexer->result_symbol = VARIABLE_NAME;
|
|
1698
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
1699
|
+
false;
|
|
1700
|
+
scanner->just_returned_variable_name = true;
|
|
1701
|
+
return true;
|
|
1702
|
+
}
|
|
1703
|
+
|
|
1704
|
+
if (lexer->lookahead == '?') {
|
|
1705
|
+
lexer->mark_end(lexer);
|
|
1706
|
+
advance(lexer);
|
|
1707
|
+
lexer->result_symbol = VARIABLE_NAME;
|
|
1708
|
+
was_just_bare_dollar = scanner->just_returned_bare_dollar =
|
|
1709
|
+
false;
|
|
1710
|
+
#if DEBUG
|
|
1711
|
+
fprintf(stderr, "SCANNER: VARIABLE_NAME after ?\n");
|
|
1712
|
+
#endif
|
|
1713
|
+
scanner->just_returned_variable_name = true;
|
|
1714
|
+
return iswalpha(lexer->lookahead);
|
|
1715
|
+
}
|
|
1716
|
+
}
|
|
1717
|
+
|
|
1718
|
+
#if DEBUG
|
|
1719
|
+
fprintf(stderr, "DEBUG: expansion word not valud returning false\n");
|
|
1720
|
+
#endif
|
|
1721
|
+
return false;
|
|
1722
|
+
}
|
|
1723
|
+
|
|
1724
|
+
if (valid_symbols[BARE_DOLLAR] && !in_error_recovery(valid_symbols) &&
|
|
1725
|
+
scan_raw_dollar(lexer, valid_symbols)) {
|
|
1726
|
+
return true;
|
|
1727
|
+
}
|
|
1728
|
+
|
|
1729
|
+
regex:
|
|
1730
|
+
if ((valid_symbols[REGEX] || valid_symbols[REGEX_NO_SLASH] ||
|
|
1731
|
+
valid_symbols[REGEX_NO_SPACE]) &&
|
|
1732
|
+
!in_error_recovery(valid_symbols)) {
|
|
1733
|
+
if (valid_symbols[REGEX] || valid_symbols[REGEX_NO_SPACE]) {
|
|
1734
|
+
while (iswspace(lexer->lookahead)) {
|
|
1735
|
+
skip(lexer);
|
|
1736
|
+
}
|
|
1737
|
+
}
|
|
1738
|
+
|
|
1739
|
+
if ((lexer->lookahead != '"' && lexer->lookahead != '\'') ||
|
|
1740
|
+
((lexer->lookahead == '$' || lexer->lookahead == '\'') &&
|
|
1741
|
+
valid_symbols[REGEX_NO_SLASH]) ||
|
|
1742
|
+
(lexer->lookahead == '\'' && valid_symbols[REGEX_NO_SPACE])) {
|
|
1743
|
+
typedef struct {
|
|
1744
|
+
bool done;
|
|
1745
|
+
bool advanced_once;
|
|
1746
|
+
bool found_non_alnumdollarunderdash;
|
|
1747
|
+
bool last_was_escape;
|
|
1748
|
+
bool in_single_quote;
|
|
1749
|
+
uint32_t paren_depth;
|
|
1750
|
+
uint32_t bracket_depth;
|
|
1751
|
+
uint32_t brace_depth;
|
|
1752
|
+
} State;
|
|
1753
|
+
|
|
1754
|
+
if (lexer->lookahead == '$' && valid_symbols[REGEX_NO_SLASH]) {
|
|
1755
|
+
lexer->mark_end(lexer);
|
|
1756
|
+
advance(lexer);
|
|
1757
|
+
if (lexer->lookahead == '(') {
|
|
1758
|
+
return false;
|
|
1759
|
+
}
|
|
1760
|
+
}
|
|
1761
|
+
|
|
1762
|
+
lexer->mark_end(lexer);
|
|
1763
|
+
|
|
1764
|
+
State state = {false, false, false, false, false, 0, 0, 0};
|
|
1765
|
+
while (!state.done) {
|
|
1766
|
+
if (state.in_single_quote) {
|
|
1767
|
+
if (lexer->lookahead == '\'') {
|
|
1768
|
+
state.in_single_quote = false;
|
|
1769
|
+
advance(lexer);
|
|
1770
|
+
lexer->mark_end(lexer);
|
|
1771
|
+
|
|
1772
|
+
// Track entering parameter expansion context
|
|
1773
|
+
enter_context(scanner, CTX_PARAMETER);
|
|
1774
|
+
}
|
|
1775
|
+
}
|
|
1776
|
+
switch (lexer->lookahead) {
|
|
1777
|
+
case '\\':
|
|
1778
|
+
state.last_was_escape = true;
|
|
1779
|
+
break;
|
|
1780
|
+
case '\0':
|
|
1781
|
+
return false;
|
|
1782
|
+
case '(':
|
|
1783
|
+
state.paren_depth++;
|
|
1784
|
+
state.last_was_escape = false;
|
|
1785
|
+
break;
|
|
1786
|
+
case '[':
|
|
1787
|
+
state.bracket_depth++;
|
|
1788
|
+
state.last_was_escape = false;
|
|
1789
|
+
break;
|
|
1790
|
+
case '{':
|
|
1791
|
+
if (!state.last_was_escape) {
|
|
1792
|
+
state.brace_depth++;
|
|
1793
|
+
}
|
|
1794
|
+
state.last_was_escape = false;
|
|
1795
|
+
break;
|
|
1796
|
+
case ')':
|
|
1797
|
+
if (state.paren_depth == 0) {
|
|
1798
|
+
state.done = true;
|
|
1799
|
+
}
|
|
1800
|
+
state.paren_depth--;
|
|
1801
|
+
state.last_was_escape = false;
|
|
1802
|
+
break;
|
|
1803
|
+
case ']':
|
|
1804
|
+
if (state.bracket_depth == 0) {
|
|
1805
|
+
state.done = true;
|
|
1806
|
+
}
|
|
1807
|
+
state.bracket_depth--;
|
|
1808
|
+
state.last_was_escape = false;
|
|
1809
|
+
break;
|
|
1810
|
+
case '}':
|
|
1811
|
+
if (state.brace_depth == 0) {
|
|
1812
|
+
state.done = true;
|
|
1813
|
+
}
|
|
1814
|
+
state.brace_depth--;
|
|
1815
|
+
state.last_was_escape = false;
|
|
1816
|
+
break;
|
|
1817
|
+
case '\'':
|
|
1818
|
+
// Enter or exit a single-quoted string.
|
|
1819
|
+
state.in_single_quote = !state.in_single_quote;
|
|
1820
|
+
advance(lexer);
|
|
1821
|
+
state.advanced_once = true;
|
|
1822
|
+
state.last_was_escape = false;
|
|
1823
|
+
continue;
|
|
1824
|
+
default:
|
|
1825
|
+
state.last_was_escape = false;
|
|
1826
|
+
break;
|
|
1827
|
+
}
|
|
1828
|
+
|
|
1829
|
+
if (!state.done) {
|
|
1830
|
+
if (valid_symbols[REGEX]) {
|
|
1831
|
+
bool was_space = !state.in_single_quote &&
|
|
1832
|
+
iswspace(lexer->lookahead);
|
|
1833
|
+
advance(lexer);
|
|
1834
|
+
state.advanced_once = true;
|
|
1835
|
+
if (!was_space || state.paren_depth > 0) {
|
|
1836
|
+
lexer->mark_end(lexer);
|
|
1837
|
+
}
|
|
1838
|
+
} else if (valid_symbols[REGEX_NO_SLASH]) {
|
|
1839
|
+
if (lexer->lookahead == '/') {
|
|
1840
|
+
lexer->mark_end(lexer);
|
|
1841
|
+
lexer->result_symbol = REGEX_NO_SLASH;
|
|
1842
|
+
return state.advanced_once;
|
|
1843
|
+
}
|
|
1844
|
+
if (lexer->lookahead == '\\') {
|
|
1845
|
+
advance(lexer);
|
|
1846
|
+
state.advanced_once = true;
|
|
1847
|
+
if (!lexer->eof(lexer) && lexer->lookahead != '[' &&
|
|
1848
|
+
lexer->lookahead != '/') {
|
|
1849
|
+
advance(lexer);
|
|
1850
|
+
lexer->mark_end(lexer);
|
|
1851
|
+
}
|
|
1852
|
+
} else {
|
|
1853
|
+
bool was_space = !state.in_single_quote &&
|
|
1854
|
+
iswspace(lexer->lookahead);
|
|
1855
|
+
advance(lexer);
|
|
1856
|
+
state.advanced_once = true;
|
|
1857
|
+
if (!was_space) {
|
|
1858
|
+
lexer->mark_end(lexer);
|
|
1859
|
+
}
|
|
1860
|
+
}
|
|
1861
|
+
} else if (valid_symbols[REGEX_NO_SPACE]) {
|
|
1862
|
+
if (lexer->lookahead == '\\') {
|
|
1863
|
+
state.found_non_alnumdollarunderdash = true;
|
|
1864
|
+
advance(lexer);
|
|
1865
|
+
if (!lexer->eof(lexer)) {
|
|
1866
|
+
advance(lexer);
|
|
1867
|
+
}
|
|
1868
|
+
} else if (lexer->lookahead == '$') {
|
|
1869
|
+
lexer->mark_end(lexer);
|
|
1870
|
+
advance(lexer);
|
|
1871
|
+
// do not parse a command
|
|
1872
|
+
// substitution
|
|
1873
|
+
if (lexer->lookahead == '(') {
|
|
1874
|
+
return false;
|
|
1875
|
+
}
|
|
1876
|
+
// end $ always means regex, e.g.
|
|
1877
|
+
// 99999999$
|
|
1878
|
+
if (iswspace(lexer->lookahead)) {
|
|
1879
|
+
lexer->result_symbol = REGEX_NO_SPACE;
|
|
1880
|
+
lexer->mark_end(lexer);
|
|
1881
|
+
return true;
|
|
1882
|
+
}
|
|
1883
|
+
} else {
|
|
1884
|
+
bool was_space = !state.in_single_quote &&
|
|
1885
|
+
iswspace(lexer->lookahead);
|
|
1886
|
+
if (was_space && state.paren_depth == 0) {
|
|
1887
|
+
lexer->mark_end(lexer);
|
|
1888
|
+
lexer->result_symbol = REGEX_NO_SPACE;
|
|
1889
|
+
return state.found_non_alnumdollarunderdash;
|
|
1890
|
+
}
|
|
1891
|
+
if (!iswalnum(lexer->lookahead) &&
|
|
1892
|
+
lexer->lookahead != '$' &&
|
|
1893
|
+
lexer->lookahead != '-' &&
|
|
1894
|
+
lexer->lookahead != '_') {
|
|
1895
|
+
state.found_non_alnumdollarunderdash = true;
|
|
1896
|
+
}
|
|
1897
|
+
advance(lexer);
|
|
1898
|
+
}
|
|
1899
|
+
}
|
|
1900
|
+
}
|
|
1901
|
+
}
|
|
1902
|
+
|
|
1903
|
+
lexer->result_symbol =
|
|
1904
|
+
valid_symbols[REGEX_NO_SLASH] ? REGEX_NO_SLASH
|
|
1905
|
+
: valid_symbols[REGEX_NO_SPACE] ? REGEX_NO_SPACE
|
|
1906
|
+
: REGEX;
|
|
1907
|
+
if (valid_symbols[REGEX] && !state.advanced_once) {
|
|
1908
|
+
#if DEBUG
|
|
1909
|
+
fprintf(stderr, "DEBUG: regex not valid returning false\n");
|
|
1910
|
+
#endif
|
|
1911
|
+
return false;
|
|
1912
|
+
}
|
|
1913
|
+
return true;
|
|
1914
|
+
}
|
|
1915
|
+
}
|
|
1916
|
+
|
|
1917
|
+
extglob_pattern:
|
|
1918
|
+
if (valid_symbols[EXTGLOB_PATTERN] && !in_error_recovery(valid_symbols) &&
|
|
1919
|
+
!valid_symbols[REGEX] && !valid_symbols[REGEX_NO_SLASH] &&
|
|
1920
|
+
!valid_symbols[REGEX_NO_SPACE] &&
|
|
1921
|
+
!in_parameter_expansion_context(
|
|
1922
|
+
scanner) // Don't generate EXTGLOB_PATTERN inside ${...}
|
|
1923
|
+
) {
|
|
1924
|
+
// first skip ws, then check for ? * + @ !
|
|
1925
|
+
while (iswspace(lexer->lookahead)) {
|
|
1926
|
+
skip(lexer);
|
|
1927
|
+
}
|
|
1928
|
+
|
|
1929
|
+
if (lexer->lookahead == '?' || lexer->lookahead == '*' ||
|
|
1930
|
+
lexer->lookahead == '+' || lexer->lookahead == '@' ||
|
|
1931
|
+
lexer->lookahead == '!' || lexer->lookahead == '-' ||
|
|
1932
|
+
lexer->lookahead == ')' || lexer->lookahead == '\\' ||
|
|
1933
|
+
lexer->lookahead == '.' || lexer->lookahead == '[' ||
|
|
1934
|
+
(iswalpha(lexer->lookahead))) {
|
|
1935
|
+
if (lexer->lookahead == '\\') {
|
|
1936
|
+
advance(lexer);
|
|
1937
|
+
if ((iswspace(lexer->lookahead) || lexer->lookahead == '"') &&
|
|
1938
|
+
lexer->lookahead != '\r' && lexer->lookahead != '\n') {
|
|
1939
|
+
advance(lexer);
|
|
1940
|
+
} else {
|
|
1941
|
+
return false;
|
|
1942
|
+
}
|
|
1943
|
+
}
|
|
1944
|
+
|
|
1945
|
+
if (lexer->lookahead == ')' &&
|
|
1946
|
+
scanner->last_glob_paren_depth == 0) {
|
|
1947
|
+
lexer->mark_end(lexer);
|
|
1948
|
+
advance(lexer);
|
|
1949
|
+
|
|
1950
|
+
if (iswspace(lexer->lookahead)) {
|
|
1951
|
+
return false;
|
|
1952
|
+
}
|
|
1953
|
+
}
|
|
1954
|
+
|
|
1955
|
+
lexer->mark_end(lexer);
|
|
1956
|
+
bool was_non_alpha = !iswalpha(lexer->lookahead);
|
|
1957
|
+
if (lexer->lookahead != '[') {
|
|
1958
|
+
// no esac
|
|
1959
|
+
if (lexer->lookahead == 'e') {
|
|
1960
|
+
lexer->mark_end(lexer);
|
|
1961
|
+
advance(lexer);
|
|
1962
|
+
if (lexer->lookahead == 's') {
|
|
1963
|
+
advance(lexer);
|
|
1964
|
+
if (lexer->lookahead == 'a') {
|
|
1965
|
+
advance(lexer);
|
|
1966
|
+
if (lexer->lookahead == 'c') {
|
|
1967
|
+
advance(lexer);
|
|
1968
|
+
if (iswspace(lexer->lookahead)) {
|
|
1969
|
+
return false;
|
|
1970
|
+
}
|
|
1971
|
+
}
|
|
1972
|
+
}
|
|
1973
|
+
}
|
|
1974
|
+
} else {
|
|
1975
|
+
advance(lexer);
|
|
1976
|
+
}
|
|
1977
|
+
}
|
|
1978
|
+
|
|
1979
|
+
// -\w is just a word, find something else special
|
|
1980
|
+
if (lexer->lookahead == '-') {
|
|
1981
|
+
lexer->mark_end(lexer);
|
|
1982
|
+
advance(lexer);
|
|
1983
|
+
while (iswalnum(lexer->lookahead)) {
|
|
1984
|
+
advance(lexer);
|
|
1985
|
+
}
|
|
1986
|
+
|
|
1987
|
+
if (lexer->lookahead == ')' || lexer->lookahead == '\\' ||
|
|
1988
|
+
lexer->lookahead == '.') {
|
|
1989
|
+
return false;
|
|
1990
|
+
}
|
|
1991
|
+
lexer->mark_end(lexer);
|
|
1992
|
+
}
|
|
1993
|
+
|
|
1994
|
+
// case item -) or *)
|
|
1995
|
+
if (lexer->lookahead == ')' &&
|
|
1996
|
+
scanner->last_glob_paren_depth == 0) {
|
|
1997
|
+
lexer->mark_end(lexer);
|
|
1998
|
+
advance(lexer);
|
|
1999
|
+
if (iswspace(lexer->lookahead)) {
|
|
2000
|
+
lexer->result_symbol = EXTGLOB_PATTERN;
|
|
2001
|
+
return was_non_alpha;
|
|
2002
|
+
}
|
|
2003
|
+
}
|
|
2004
|
+
|
|
2005
|
+
if (iswspace(lexer->lookahead)) {
|
|
2006
|
+
lexer->mark_end(lexer);
|
|
2007
|
+
lexer->result_symbol = EXTGLOB_PATTERN;
|
|
2008
|
+
scanner->last_glob_paren_depth = 0;
|
|
2009
|
+
return true;
|
|
2010
|
+
}
|
|
2011
|
+
|
|
2012
|
+
if (lexer->lookahead == '$') {
|
|
2013
|
+
lexer->mark_end(lexer);
|
|
2014
|
+
advance(lexer);
|
|
2015
|
+
if (lexer->lookahead == '{' || lexer->lookahead == '(') {
|
|
2016
|
+
lexer->result_symbol = EXTGLOB_PATTERN;
|
|
2017
|
+
return true;
|
|
2018
|
+
}
|
|
2019
|
+
}
|
|
2020
|
+
|
|
2021
|
+
if (lexer->lookahead == '|') {
|
|
2022
|
+
lexer->mark_end(lexer);
|
|
2023
|
+
advance(lexer);
|
|
2024
|
+
lexer->result_symbol = EXTGLOB_PATTERN;
|
|
2025
|
+
return true;
|
|
2026
|
+
}
|
|
2027
|
+
|
|
2028
|
+
if (!iswalnum(lexer->lookahead) && lexer->lookahead != '(' &&
|
|
2029
|
+
lexer->lookahead != '"' && lexer->lookahead != '[' &&
|
|
2030
|
+
lexer->lookahead != '?' && lexer->lookahead != '/' &&
|
|
2031
|
+
lexer->lookahead != '\\' && lexer->lookahead != '_' &&
|
|
2032
|
+
lexer->lookahead != '*') {
|
|
2033
|
+
return false;
|
|
2034
|
+
}
|
|
2035
|
+
|
|
2036
|
+
typedef struct {
|
|
2037
|
+
bool done;
|
|
2038
|
+
bool saw_non_alphadot;
|
|
2039
|
+
uint32_t paren_depth;
|
|
2040
|
+
uint32_t bracket_depth;
|
|
2041
|
+
uint32_t brace_depth;
|
|
2042
|
+
} State;
|
|
2043
|
+
|
|
2044
|
+
State state = {false, was_non_alpha, scanner->last_glob_paren_depth,
|
|
2045
|
+
0, 0};
|
|
2046
|
+
while (!state.done) {
|
|
2047
|
+
switch (lexer->lookahead) {
|
|
2048
|
+
case '\0':
|
|
2049
|
+
return false;
|
|
2050
|
+
case '(':
|
|
2051
|
+
state.paren_depth++;
|
|
2052
|
+
break;
|
|
2053
|
+
case '[':
|
|
2054
|
+
state.bracket_depth++;
|
|
2055
|
+
break;
|
|
2056
|
+
case '{':
|
|
2057
|
+
state.brace_depth++;
|
|
2058
|
+
break;
|
|
2059
|
+
case ')':
|
|
2060
|
+
if (state.paren_depth == 0) {
|
|
2061
|
+
state.done = true;
|
|
2062
|
+
}
|
|
2063
|
+
state.paren_depth--;
|
|
2064
|
+
break;
|
|
2065
|
+
case ']':
|
|
2066
|
+
if (state.bracket_depth == 0) {
|
|
2067
|
+
state.done = true;
|
|
2068
|
+
}
|
|
2069
|
+
state.bracket_depth--;
|
|
2070
|
+
break;
|
|
2071
|
+
case '}':
|
|
2072
|
+
if (state.brace_depth == 0) {
|
|
2073
|
+
state.done = true;
|
|
2074
|
+
}
|
|
2075
|
+
state.brace_depth--;
|
|
2076
|
+
break;
|
|
2077
|
+
}
|
|
2078
|
+
|
|
2079
|
+
if (lexer->lookahead == '|') {
|
|
2080
|
+
lexer->mark_end(lexer);
|
|
2081
|
+
advance(lexer);
|
|
2082
|
+
if (state.paren_depth == 0 && state.bracket_depth == 0 &&
|
|
2083
|
+
state.brace_depth == 0) {
|
|
2084
|
+
lexer->result_symbol = EXTGLOB_PATTERN;
|
|
2085
|
+
return true;
|
|
2086
|
+
}
|
|
2087
|
+
}
|
|
2088
|
+
|
|
2089
|
+
if (!state.done) {
|
|
2090
|
+
bool was_space = iswspace(lexer->lookahead);
|
|
2091
|
+
if (lexer->lookahead == '$') {
|
|
2092
|
+
lexer->mark_end(lexer);
|
|
2093
|
+
if (!iswalpha(lexer->lookahead) &&
|
|
2094
|
+
lexer->lookahead != '.' &&
|
|
2095
|
+
lexer->lookahead != '\\') {
|
|
2096
|
+
state.saw_non_alphadot = true;
|
|
2097
|
+
}
|
|
2098
|
+
advance(lexer);
|
|
2099
|
+
if (lexer->lookahead == '(' ||
|
|
2100
|
+
lexer->lookahead == '{') {
|
|
2101
|
+
lexer->result_symbol = EXTGLOB_PATTERN;
|
|
2102
|
+
scanner->last_glob_paren_depth = state.paren_depth;
|
|
2103
|
+
return state.saw_non_alphadot;
|
|
2104
|
+
}
|
|
2105
|
+
}
|
|
2106
|
+
if (was_space) {
|
|
2107
|
+
lexer->mark_end(lexer);
|
|
2108
|
+
lexer->result_symbol = EXTGLOB_PATTERN;
|
|
2109
|
+
scanner->last_glob_paren_depth = 0;
|
|
2110
|
+
return state.saw_non_alphadot;
|
|
2111
|
+
}
|
|
2112
|
+
if (lexer->lookahead == '"') {
|
|
2113
|
+
lexer->mark_end(lexer);
|
|
2114
|
+
lexer->result_symbol = EXTGLOB_PATTERN;
|
|
2115
|
+
scanner->last_glob_paren_depth = 0;
|
|
2116
|
+
return state.saw_non_alphadot;
|
|
2117
|
+
}
|
|
2118
|
+
if (lexer->lookahead == '\\') {
|
|
2119
|
+
if (!iswalpha(lexer->lookahead) &&
|
|
2120
|
+
lexer->lookahead != '.' &&
|
|
2121
|
+
lexer->lookahead != '\\') {
|
|
2122
|
+
state.saw_non_alphadot = true;
|
|
2123
|
+
}
|
|
2124
|
+
advance(lexer);
|
|
2125
|
+
if (iswspace(lexer->lookahead) ||
|
|
2126
|
+
lexer->lookahead == '"') {
|
|
2127
|
+
advance(lexer);
|
|
2128
|
+
}
|
|
2129
|
+
} else {
|
|
2130
|
+
if (!iswalpha(lexer->lookahead) &&
|
|
2131
|
+
lexer->lookahead != '.' &&
|
|
2132
|
+
lexer->lookahead != '\\') {
|
|
2133
|
+
state.saw_non_alphadot = true;
|
|
2134
|
+
}
|
|
2135
|
+
advance(lexer);
|
|
2136
|
+
}
|
|
2137
|
+
if (!was_space) {
|
|
2138
|
+
lexer->mark_end(lexer);
|
|
2139
|
+
}
|
|
2140
|
+
}
|
|
2141
|
+
}
|
|
2142
|
+
|
|
2143
|
+
lexer->result_symbol = EXTGLOB_PATTERN;
|
|
2144
|
+
scanner->last_glob_paren_depth = 0;
|
|
2145
|
+
return state.saw_non_alphadot;
|
|
2146
|
+
}
|
|
2147
|
+
scanner->last_glob_paren_depth = 0;
|
|
2148
|
+
#if DEBUG
|
|
2149
|
+
fprintf(stderr, "DEBUG: EXTGLOB not valid returning false\n");
|
|
2150
|
+
#endif
|
|
2151
|
+
return false;
|
|
2152
|
+
}
|
|
2153
|
+
|
|
2154
|
+
expansion_word:
|
|
2155
|
+
if (valid_symbols[EXPANSION_WORD]) {
|
|
2156
|
+
#if DEBUG
|
|
2157
|
+
fprintf(stderr,
|
|
2158
|
+
"DEBUG: EXPANSION_WORD handler called, context=%d, "
|
|
2159
|
+
"lookahead='%c'\n",
|
|
2160
|
+
get_current_context(scanner), lexer->lookahead);
|
|
2161
|
+
#endif
|
|
2162
|
+
// If we just returned a variable name and encounter # or %,
|
|
2163
|
+
// don't consume them as expansion word - let them be operator tokens
|
|
2164
|
+
if (was_just_variable_name &&
|
|
2165
|
+
(lexer->lookahead == '#' || lexer->lookahead == '%')) {
|
|
2166
|
+
#if DEBUG
|
|
2167
|
+
fprintf(stderr, "DEBUG: EXPANSION_WORD early return due to "
|
|
2168
|
+
"variable_name + operator\n");
|
|
2169
|
+
#endif
|
|
2170
|
+
return false;
|
|
2171
|
+
}
|
|
2172
|
+
bool advanced_once = false;
|
|
2173
|
+
bool advance_once_space = false;
|
|
2174
|
+
for (;;) {
|
|
2175
|
+
if (lexer->lookahead == '\"') {
|
|
2176
|
+
return false;
|
|
2177
|
+
}
|
|
2178
|
+
if (lexer->lookahead == '$') {
|
|
2179
|
+
lexer->mark_end(lexer);
|
|
2180
|
+
advance(lexer);
|
|
2181
|
+
if (lexer->lookahead == '{' || lexer->lookahead == '(' ||
|
|
2182
|
+
lexer->lookahead == '\'' || iswalnum(lexer->lookahead)) {
|
|
2183
|
+
lexer->result_symbol = EXPANSION_WORD;
|
|
2184
|
+
return true;
|
|
2185
|
+
}
|
|
2186
|
+
advanced_once = true;
|
|
2187
|
+
}
|
|
2188
|
+
|
|
2189
|
+
if (lexer->lookahead == '/' &&
|
|
2190
|
+
should_stop_at_pattern_slash(scanner)) {
|
|
2191
|
+
lexer->mark_end(lexer);
|
|
2192
|
+
lexer->result_symbol = EXPANSION_WORD;
|
|
2193
|
+
return true;
|
|
2194
|
+
}
|
|
2195
|
+
|
|
2196
|
+
if (lexer->lookahead == '}' && in_parameter_expansion(scanner)) {
|
|
2197
|
+
// Track exiting parameter expansion context
|
|
2198
|
+
lexer->mark_end(lexer);
|
|
2199
|
+
lexer->result_symbol = EXPANSION_WORD;
|
|
2200
|
+
return true;
|
|
2201
|
+
}
|
|
2202
|
+
|
|
2203
|
+
if (lexer->lookahead == '(' &&
|
|
2204
|
+
!(advanced_once || advance_once_space)) {
|
|
2205
|
+
lexer->mark_end(lexer);
|
|
2206
|
+
advance(lexer);
|
|
2207
|
+
while (lexer->lookahead != ')' && !lexer->eof(lexer)) {
|
|
2208
|
+
// if we find a $( or ${ assume this is valid and is
|
|
2209
|
+
// a garbage concatenation of some weird word + an
|
|
2210
|
+
// expansion
|
|
2211
|
+
// I wonder where this can fail
|
|
2212
|
+
if (lexer->lookahead == '$') {
|
|
2213
|
+
lexer->mark_end(lexer);
|
|
2214
|
+
advance(lexer);
|
|
2215
|
+
if (lexer->lookahead == '{' ||
|
|
2216
|
+
lexer->lookahead == '(' ||
|
|
2217
|
+
lexer->lookahead == '\'' ||
|
|
2218
|
+
iswalnum(lexer->lookahead)) {
|
|
2219
|
+
lexer->result_symbol = EXPANSION_WORD;
|
|
2220
|
+
return true;
|
|
2221
|
+
}
|
|
2222
|
+
advanced_once = true;
|
|
2223
|
+
} else {
|
|
2224
|
+
|
|
2225
|
+
// In parameter expansion, handle subscript boundaries
|
|
2226
|
+
// and operators properly
|
|
2227
|
+
if (should_stop_at_pattern_operators(scanner)) {
|
|
2228
|
+
if (lexer->lookahead == ']') {
|
|
2229
|
+
// Stop at ] to let it be handled as subscript
|
|
2230
|
+
// terminator
|
|
2231
|
+
lexer->mark_end(lexer);
|
|
2232
|
+
lexer->result_symbol = EXPANSION_WORD;
|
|
2233
|
+
return true;
|
|
2234
|
+
}
|
|
2235
|
+
if (lexer->lookahead == '#' ||
|
|
2236
|
+
lexer->lookahead == '%') {
|
|
2237
|
+
// Stop at operators to let them be handled
|
|
2238
|
+
// separately
|
|
2239
|
+
lexer->mark_end(lexer);
|
|
2240
|
+
lexer->result_symbol = EXPANSION_WORD;
|
|
2241
|
+
return true;
|
|
2242
|
+
}
|
|
2243
|
+
if (lexer->lookahead == ':') {
|
|
2244
|
+
// Stop at colon to let it be handled separately
|
|
2245
|
+
// for colon-based operations
|
|
2246
|
+
lexer->mark_end(lexer);
|
|
2247
|
+
lexer->result_symbol = EXPANSION_WORD;
|
|
2248
|
+
return true;
|
|
2249
|
+
}
|
|
2250
|
+
}
|
|
2251
|
+
advanced_once =
|
|
2252
|
+
advanced_once || !iswspace(lexer->lookahead);
|
|
2253
|
+
advance_once_space =
|
|
2254
|
+
advance_once_space || iswspace(lexer->lookahead);
|
|
2255
|
+
advance(lexer);
|
|
2256
|
+
}
|
|
2257
|
+
}
|
|
2258
|
+
lexer->mark_end(lexer);
|
|
2259
|
+
if (lexer->lookahead == ')') {
|
|
2260
|
+
advanced_once = true;
|
|
2261
|
+
advance(lexer);
|
|
2262
|
+
lexer->mark_end(lexer);
|
|
2263
|
+
} else {
|
|
2264
|
+
return false;
|
|
2265
|
+
}
|
|
2266
|
+
}
|
|
2267
|
+
|
|
2268
|
+
if (lexer->lookahead == '\'') {
|
|
2269
|
+
return false;
|
|
2270
|
+
}
|
|
2271
|
+
|
|
2272
|
+
if (lexer->eof(lexer)) {
|
|
2273
|
+
return false;
|
|
2274
|
+
}
|
|
2275
|
+
|
|
2276
|
+
// In parameter expansion, handle subscript boundaries and operators
|
|
2277
|
+
// properly
|
|
2278
|
+
if (should_stop_at_pattern_operators(scanner)) {
|
|
2279
|
+
#if DEBUG
|
|
2280
|
+
fprintf(stderr,
|
|
2281
|
+
"DEBUG: EXPANSION_WORD checking pattern operators, "
|
|
2282
|
+
"lookahead='%c'\n",
|
|
2283
|
+
lexer->lookahead);
|
|
2284
|
+
#endif
|
|
2285
|
+
if (lexer->lookahead == ']') {
|
|
2286
|
+
// Stop at ] to let it be handled as subscript terminator
|
|
2287
|
+
#if DEBUG
|
|
2288
|
+
fprintf(stderr, "DEBUG: EXPANSION_WORD stopping at ]\n");
|
|
2289
|
+
#endif
|
|
2290
|
+
lexer->mark_end(lexer);
|
|
2291
|
+
lexer->result_symbol = EXPANSION_WORD;
|
|
2292
|
+
return true;
|
|
2293
|
+
}
|
|
2294
|
+
if (lexer->lookahead == '#' || lexer->lookahead == '%' ||
|
|
2295
|
+
lexer->lookahead == '/') {
|
|
2296
|
+
context_type_t ctx = get_current_context(scanner);
|
|
2297
|
+
if (lexer->lookahead == '/' &&
|
|
2298
|
+
ctx == CTX_PARAMETER_PATTERN_SUBSTITUTE &&
|
|
2299
|
+
// should_stop_at_pattern_operators(scanner) &&
|
|
2300
|
+
!advanced_once) {
|
|
2301
|
+
// Stop at operators to let them be handled separately
|
|
2302
|
+
#if DEBUG
|
|
2303
|
+
fprintf(
|
|
2304
|
+
stderr,
|
|
2305
|
+
"DEBUG: EXPANSION_WORD stopping at operator '%c'\n",
|
|
2306
|
+
lexer->lookahead);
|
|
2307
|
+
#endif
|
|
2308
|
+
lexer->mark_end(lexer);
|
|
2309
|
+
lexer->result_symbol = EXPANSION_WORD;
|
|
2310
|
+
return true;
|
|
2311
|
+
}
|
|
2312
|
+
}
|
|
2313
|
+
}
|
|
2314
|
+
|
|
2315
|
+
advanced_once = advanced_once || !iswspace(lexer->lookahead);
|
|
2316
|
+
advance_once_space =
|
|
2317
|
+
advance_once_space || iswspace(lexer->lookahead);
|
|
2318
|
+
advance(lexer);
|
|
2319
|
+
}
|
|
2320
|
+
} else {
|
|
2321
|
+
#if DEBUG
|
|
2322
|
+
fprintf(stderr, "DEBUG: EXPANSION_WORD not valid, skipping\n");
|
|
2323
|
+
#endif
|
|
2324
|
+
}
|
|
2325
|
+
|
|
2326
|
+
// This handles ranges in braces
|
|
2327
|
+
brace_start:
|
|
2328
|
+
if (valid_symbols[BRACE_EXPR_START] && !in_error_recovery(valid_symbols)) {
|
|
2329
|
+
skip_ws(lexer);
|
|
2330
|
+
|
|
2331
|
+
if (lexer->lookahead == '{') {
|
|
2332
|
+
advance(lexer);
|
|
2333
|
+
lexer->mark_end(lexer);
|
|
2334
|
+
|
|
2335
|
+
// Don't enter context - brace expressions are handled by grammar
|
|
2336
|
+
// The grammar will handle the entire {1..10} pattern itself
|
|
2337
|
+
|
|
2338
|
+
while (isdigit(lexer->lookahead)) {
|
|
2339
|
+
advance(lexer);
|
|
2340
|
+
}
|
|
2341
|
+
|
|
2342
|
+
if (lexer->lookahead != '.') {
|
|
2343
|
+
return false;
|
|
2344
|
+
}
|
|
2345
|
+
advance(lexer);
|
|
2346
|
+
|
|
2347
|
+
if (lexer->lookahead != '.') {
|
|
2348
|
+
return false;
|
|
2349
|
+
}
|
|
2350
|
+
advance(lexer);
|
|
2351
|
+
|
|
2352
|
+
while (isdigit(lexer->lookahead)) {
|
|
2353
|
+
advance(lexer);
|
|
2354
|
+
}
|
|
2355
|
+
|
|
2356
|
+
if (lexer->lookahead != '}') {
|
|
2357
|
+
return false;
|
|
2358
|
+
}
|
|
2359
|
+
|
|
2360
|
+
enter_context(scanner, CTX_BRACE_EXPANSION);
|
|
2361
|
+
lexer->result_symbol = BRACE_EXPR_START;
|
|
2362
|
+
return true;
|
|
2363
|
+
}
|
|
2364
|
+
}
|
|
2365
|
+
|
|
2366
|
+
#if DEBUG
|
|
2367
|
+
fprintf(stderr, "SCANNER: scan returning false\n");
|
|
2368
|
+
#endif
|
|
2369
|
+
|
|
2370
|
+
return false;
|
|
2371
|
+
}
|
|
2372
|
+
|
|
2373
|
+
void *tree_sitter_zsh_external_scanner_create() {
|
|
2374
|
+
Scanner *scanner = calloc(1, sizeof(Scanner));
|
|
2375
|
+
UINT32_MAX; // Initialize to invalid position
|
|
2376
|
+
array_init(&scanner->heredocs);
|
|
2377
|
+
array_init(&scanner->context_stack);
|
|
2378
|
+
#if DEBUG
|
|
2379
|
+
fprintf(
|
|
2380
|
+
stderr,
|
|
2381
|
+
"DEBUG: Scanner created - heredocs.size=%u, context_stack.size=%u\n",
|
|
2382
|
+
scanner->heredocs.size, scanner->context_stack.size);
|
|
2383
|
+
#endif
|
|
2384
|
+
return scanner;
|
|
2385
|
+
}
|
|
2386
|
+
|
|
2387
|
+
bool tree_sitter_zsh_external_scanner_scan(void *payload, TSLexer *lexer,
|
|
2388
|
+
const bool *valid_symbols) {
|
|
2389
|
+
Scanner *scanner = (Scanner *)payload;
|
|
2390
|
+
return scan(scanner, lexer, valid_symbols);
|
|
2391
|
+
}
|
|
2392
|
+
|
|
2393
|
+
unsigned tree_sitter_zsh_external_scanner_serialize(void *payload,
|
|
2394
|
+
char *state) {
|
|
2395
|
+
Scanner *scanner = (Scanner *)payload;
|
|
2396
|
+
return serialize(scanner, state);
|
|
2397
|
+
}
|
|
2398
|
+
|
|
2399
|
+
void tree_sitter_zsh_external_scanner_deserialize(void *payload,
|
|
2400
|
+
const char *state,
|
|
2401
|
+
unsigned length) {
|
|
2402
|
+
Scanner *scanner = (Scanner *)payload;
|
|
2403
|
+
deserialize(scanner, state, length);
|
|
2404
|
+
}
|
|
2405
|
+
|
|
2406
|
+
void tree_sitter_zsh_external_scanner_destroy(void *payload) {
|
|
2407
|
+
Scanner *scanner = (Scanner *)payload;
|
|
2408
|
+
for (size_t i = 0; i < scanner->heredocs.size; i++) {
|
|
2409
|
+
Heredoc *heredoc = array_get(&scanner->heredocs, i);
|
|
2410
|
+
array_delete(&heredoc->current_leading_word);
|
|
2411
|
+
array_delete(&heredoc->delimiter);
|
|
2412
|
+
}
|
|
2413
|
+
array_delete(&scanner->heredocs);
|
|
2414
|
+
array_delete(&scanner->context_stack);
|
|
2415
|
+
free(scanner);
|
|
2416
|
+
}
|