@acristoffers/tree-sitter-matlab 1.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +19 -0
- package/README.md +97 -0
- package/binding.gyp +35 -0
- package/bindings/node/binding.cc +19 -0
- package/bindings/node/binding_test.js +9 -0
- package/bindings/node/index.d.ts +27 -0
- package/bindings/node/index.js +11 -0
- package/grammar.js +1000 -0
- package/package.json +59 -0
- package/queries/emacs/highlights.scm +176 -0
- package/queries/emacs/textobjects.scm +93 -0
- package/queries/helix/context.scm +41 -0
- package/queries/helix/folds.scm +11 -0
- package/queries/helix/highlights.scm +127 -0
- package/queries/helix/indents.scm +24 -0
- package/queries/helix/injections.scm +2 -0
- package/queries/helix/locals.scm +19 -0
- package/queries/helix/textobjects.scm +9 -0
- package/queries/neovim/context.scm +41 -0
- package/queries/neovim/folds.scm +11 -0
- package/queries/neovim/highlights.scm +157 -0
- package/queries/neovim/indents.scm +36 -0
- package/queries/neovim/injections.scm +1 -0
- package/queries/neovim/locals.scm +20 -0
- package/queries/neovim/tags.scm +10 -0
- package/queries/neovim/textobjects.scm +110 -0
- package/src/grammar.json +6184 -0
- package/src/node-types.json +3719 -0
- package/src/parser.c +113495 -0
- package/src/scanner.c +1094 -0
- package/src/tree_sitter/alloc.h +54 -0
- package/src/tree_sitter/array.h +291 -0
- package/src/tree_sitter/parser.h +286 -0
- package/tree-sitter.json +36 -0
package/src/scanner.c
ADDED
|
@@ -0,0 +1,1094 @@
|
|
|
1
|
+
#include "tree_sitter/parser.h"
|
|
2
|
+
|
|
3
|
+
#include <stddef.h>
|
|
4
|
+
#include <stdint.h>
|
|
5
|
+
#include <stdio.h>
|
|
6
|
+
#include <string.h>
|
|
7
|
+
#include <wctype.h>
|
|
8
|
+
|
|
9
|
+
// Custom punctuation check for WASM compatibility
|
|
10
|
+
// ispunct() is not in tree-sitter's WASM allowed functions list
|
|
11
|
+
// https://github.com/tree-sitter/tree-sitter/blob/master/lib/src/wasm/stdlib-symbols.txt
|
|
12
|
+
static inline bool is_punct_char(const uint32_t chr)
|
|
13
|
+
{
|
|
14
|
+
if (chr >= 0x80) {
|
|
15
|
+
return false;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
return (chr >= 33 && chr <= 47) || // !"#$%&'()*+,-./
|
|
19
|
+
(chr >= 58 && chr <= 64) || // :;<=>?@
|
|
20
|
+
(chr >= 91 && chr <= 96) || // [\]^_`
|
|
21
|
+
(chr >= 123 && chr <= 126); // {|}~
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
enum TokenType {
|
|
25
|
+
COMMENT,
|
|
26
|
+
LINE_CONTINUATION,
|
|
27
|
+
COMMAND_NAME,
|
|
28
|
+
COMMAND_ARGUMENT,
|
|
29
|
+
SINGLE_QUOTE_STRING_START,
|
|
30
|
+
SINGLE_QUOTE_STRING_END,
|
|
31
|
+
DOUBLE_QUOTE_STRING_START,
|
|
32
|
+
DOUBLE_QUOTE_STRING_END,
|
|
33
|
+
FORMATTING_SEQUENCE,
|
|
34
|
+
ESCAPE_SEQUENCE,
|
|
35
|
+
STRING_CONTENT,
|
|
36
|
+
ENTRY_DELIMITER,
|
|
37
|
+
MULTIOUTPUT_VARIABLE_START,
|
|
38
|
+
IDENTIFIER,
|
|
39
|
+
CATCH_IDENTIFIER,
|
|
40
|
+
TRANSPOSE,
|
|
41
|
+
CTRANSPOSE,
|
|
42
|
+
ERROR_SENTINEL,
|
|
43
|
+
};
|
|
44
|
+
|
|
45
|
+
typedef struct
|
|
46
|
+
{
|
|
47
|
+
bool is_inside_command;
|
|
48
|
+
bool line_continuation;
|
|
49
|
+
bool is_shell_scape;
|
|
50
|
+
char string_delimiter;
|
|
51
|
+
} Scanner;
|
|
52
|
+
|
|
53
|
+
static const char* const keywords[] = {
|
|
54
|
+
"arguments", "break", "case", "catch", "classdef", "continue", "else", "elseif",
|
|
55
|
+
"end", "enumeration", "events", "for", "function", "global", "if", "methods",
|
|
56
|
+
"otherwise", "parfor", "persistent", "return", "spmd", "switch", "try", "while",
|
|
57
|
+
};
|
|
58
|
+
static const size_t keywords_size = sizeof(keywords) / sizeof(keywords[0]);
|
|
59
|
+
|
|
60
|
+
static inline void advance(TSLexer* lexer)
|
|
61
|
+
{
|
|
62
|
+
lexer->advance(lexer, false);
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
static inline void skip(TSLexer* lexer)
|
|
66
|
+
{
|
|
67
|
+
lexer->advance(lexer, true);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
static inline bool consume_char(char chr, TSLexer* lexer)
|
|
71
|
+
{
|
|
72
|
+
if (lexer->lookahead != chr) {
|
|
73
|
+
return false;
|
|
74
|
+
}
|
|
75
|
+
advance(lexer);
|
|
76
|
+
return true;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
static inline bool is_eol(const uint32_t chr)
|
|
80
|
+
{
|
|
81
|
+
return chr == '\n' || chr == '\r' || chr == ',' || chr == ';';
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
static inline bool iswspace_matlab(const uint32_t chr)
|
|
85
|
+
{
|
|
86
|
+
return iswspace(chr) && chr != '\n' && chr != '\r';
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
static inline bool is_identifier(const uint32_t chr, const bool start)
|
|
90
|
+
{
|
|
91
|
+
// isalpha or isdigit is SIGSEGVing os some UTF-8 chars, like U+10C6BD
|
|
92
|
+
// (0xF48C9ABD), a file with just those bytes shows the problem.
|
|
93
|
+
if (chr >= 0x80) {
|
|
94
|
+
return false;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const bool alpha = iswalpha(chr);
|
|
98
|
+
const bool numeric = !start && iswdigit(chr);
|
|
99
|
+
const bool special = chr == '_';
|
|
100
|
+
|
|
101
|
+
return alpha || numeric || special;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
static inline void consume_identifier(TSLexer* lexer, char* buffer)
|
|
105
|
+
{
|
|
106
|
+
size_t size = 0;
|
|
107
|
+
if (is_identifier(lexer->lookahead, true)) {
|
|
108
|
+
buffer[size] = (char) lexer->lookahead;
|
|
109
|
+
advance(lexer);
|
|
110
|
+
while (is_identifier(lexer->lookahead, false)) {
|
|
111
|
+
if (size == 255) {
|
|
112
|
+
buffer[0] = 0;
|
|
113
|
+
return;
|
|
114
|
+
}
|
|
115
|
+
buffer[++size] = (char) lexer->lookahead;
|
|
116
|
+
advance(lexer);
|
|
117
|
+
}
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
buffer[0] = 0;
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
static inline int skip_whitespaces(TSLexer* lexer)
|
|
124
|
+
{
|
|
125
|
+
// 0b001 -> something skipped
|
|
126
|
+
// 0b010 -> newline skipped
|
|
127
|
+
// 0b100 -> newline was at the end of skipped sequence
|
|
128
|
+
int skipped = 0;
|
|
129
|
+
while (!lexer->eof(lexer) && iswspace(lexer->lookahead)) {
|
|
130
|
+
skipped &= 0b011;
|
|
131
|
+
if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
|
|
132
|
+
skipped |= 0b111;
|
|
133
|
+
} else {
|
|
134
|
+
skipped |= 0b001;
|
|
135
|
+
}
|
|
136
|
+
skip(lexer);
|
|
137
|
+
}
|
|
138
|
+
return skipped;
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
static inline int consume_whitespaces(TSLexer* lexer)
|
|
142
|
+
{
|
|
143
|
+
int skipped = 0;
|
|
144
|
+
while (iswspace(lexer->lookahead)) {
|
|
145
|
+
skipped &= 0b011;
|
|
146
|
+
if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
|
|
147
|
+
skipped |= 0b111;
|
|
148
|
+
} else {
|
|
149
|
+
skipped |= 0b001;
|
|
150
|
+
}
|
|
151
|
+
advance(lexer);
|
|
152
|
+
}
|
|
153
|
+
return skipped;
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
static inline void consume_whitespaces_once(TSLexer* lexer)
|
|
157
|
+
{
|
|
158
|
+
while (iswspace(lexer->lookahead)) {
|
|
159
|
+
if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
|
|
160
|
+
advance(lexer);
|
|
161
|
+
break;
|
|
162
|
+
}
|
|
163
|
+
advance(lexer);
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
void* tree_sitter_matlab_external_scanner_create()
|
|
168
|
+
{
|
|
169
|
+
Scanner* scanner = calloc(1, sizeof(Scanner));
|
|
170
|
+
return scanner;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
void tree_sitter_matlab_external_scanner_destroy(void* payload)
|
|
174
|
+
{
|
|
175
|
+
if (payload != NULL) {
|
|
176
|
+
free(payload);
|
|
177
|
+
}
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
unsigned tree_sitter_matlab_external_scanner_serialize(void* payload, char* buffer)
|
|
181
|
+
{
|
|
182
|
+
Scanner* scanner = (Scanner*) payload;
|
|
183
|
+
buffer[0] = (char) scanner->is_inside_command;
|
|
184
|
+
buffer[1] = (char) scanner->line_continuation;
|
|
185
|
+
buffer[2] = (char) scanner->is_shell_scape;
|
|
186
|
+
buffer[3] = scanner->string_delimiter;
|
|
187
|
+
return 4;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
void tree_sitter_matlab_external_scanner_deserialize(
|
|
191
|
+
void* payload,
|
|
192
|
+
const char* buffer,
|
|
193
|
+
unsigned length)
|
|
194
|
+
{
|
|
195
|
+
Scanner* scanner = (Scanner*) payload;
|
|
196
|
+
if (length == 4) {
|
|
197
|
+
scanner->is_inside_command = buffer[0];
|
|
198
|
+
scanner->line_continuation = buffer[1];
|
|
199
|
+
scanner->is_shell_scape = buffer[2];
|
|
200
|
+
scanner->string_delimiter = buffer[3];
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
static inline void consume_comment_line(TSLexer* lexer)
|
|
205
|
+
{
|
|
206
|
+
while (lexer->lookahead != '\n' && lexer->lookahead != '\r' && !lexer->eof(lexer)) {
|
|
207
|
+
advance(lexer);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
// NOLINTNEXTLINE(*misc-no-recursion)
|
|
212
|
+
static bool scan_comment(TSLexer* lexer, bool entry_delimiter, bool ctranspose)
|
|
213
|
+
{
|
|
214
|
+
lexer->mark_end(lexer);
|
|
215
|
+
|
|
216
|
+
const bool percent = lexer->lookahead == '%';
|
|
217
|
+
const bool line_continuation = lexer->lookahead == '.' && consume_char('.', lexer)
|
|
218
|
+
&& consume_char('.', lexer) && consume_char('.', lexer);
|
|
219
|
+
const bool block = percent && consume_char('%', lexer) && consume_char('{', lexer);
|
|
220
|
+
|
|
221
|
+
// Since we cannot look multiple chars ahead in the main function, this
|
|
222
|
+
// ended up being handled here. It allows the correct detection of numbers
|
|
223
|
+
// like .5 inside matrices/cells: [0 .5].
|
|
224
|
+
if (entry_delimiter && !percent && !line_continuation) {
|
|
225
|
+
lexer->result_symbol = ENTRY_DELIMITER;
|
|
226
|
+
return iswdigit(lexer->lookahead);
|
|
227
|
+
}
|
|
228
|
+
// We are inside a matrix/cell row and there is a line continuation, like this:
|
|
229
|
+
// a = { 1 ...
|
|
230
|
+
// 2 ...
|
|
231
|
+
// }
|
|
232
|
+
|
|
233
|
+
if (entry_delimiter && line_continuation) {
|
|
234
|
+
consume_whitespaces(lexer);
|
|
235
|
+
if (lexer->lookahead == '.') {
|
|
236
|
+
lexer->mark_end(lexer);
|
|
237
|
+
advance(lexer);
|
|
238
|
+
lexer->result_symbol = iswdigit(lexer->lookahead) ? ENTRY_DELIMITER : LINE_CONTINUATION;
|
|
239
|
+
} else if (iswdigit(lexer->lookahead) || lexer->lookahead == '\'' || lexer->lookahead == '"') {
|
|
240
|
+
lexer->result_symbol = ENTRY_DELIMITER;
|
|
241
|
+
} else {
|
|
242
|
+
lexer->result_symbol = LINE_CONTINUATION;
|
|
243
|
+
lexer->mark_end(lexer);
|
|
244
|
+
}
|
|
245
|
+
return true;
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
if (block) {
|
|
249
|
+
while (!lexer->eof(lexer) && iswspace_matlab(lexer->lookahead)) {
|
|
250
|
+
advance(lexer);
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
if (!consume_char('\n', lexer) && !consume_char('\r', lexer)) {
|
|
254
|
+
consume_comment_line(lexer);
|
|
255
|
+
lexer->result_symbol = COMMENT;
|
|
256
|
+
lexer->mark_end(lexer);
|
|
257
|
+
return true;
|
|
258
|
+
}
|
|
259
|
+
|
|
260
|
+
// Empty block comment
|
|
261
|
+
if (lexer->lookahead == '%' && consume_char('%', lexer) && consume_char('}', lexer)) {
|
|
262
|
+
lexer->result_symbol = COMMENT;
|
|
263
|
+
lexer->mark_end(lexer);
|
|
264
|
+
return true;
|
|
265
|
+
}
|
|
266
|
+
|
|
267
|
+
while (!lexer->eof(lexer)) {
|
|
268
|
+
consume_comment_line(lexer);
|
|
269
|
+
advance(lexer);
|
|
270
|
+
consume_whitespaces(lexer);
|
|
271
|
+
|
|
272
|
+
if (consume_char('%', lexer) && consume_char('}', lexer)) {
|
|
273
|
+
lexer->result_symbol = COMMENT;
|
|
274
|
+
lexer->mark_end(lexer);
|
|
275
|
+
return true;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
return false;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
if (percent || line_continuation) {
|
|
283
|
+
consume_comment_line(lexer);
|
|
284
|
+
lexer->mark_end(lexer);
|
|
285
|
+
|
|
286
|
+
if (!line_continuation) {
|
|
287
|
+
lexer->result_symbol = COMMENT;
|
|
288
|
+
advance(lexer);
|
|
289
|
+
} else {
|
|
290
|
+
lexer->result_symbol = LINE_CONTINUATION;
|
|
291
|
+
consume_whitespaces_once(lexer);
|
|
292
|
+
lexer->mark_end(lexer);
|
|
293
|
+
return true;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// Merges consecutive comments into one token, unless they are
|
|
297
|
+
// separated by a newline.
|
|
298
|
+
while (!lexer->eof(lexer) && (lexer->lookahead == ' ' || lexer->lookahead == '\t')) {
|
|
299
|
+
advance(lexer);
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
if (lexer->lookahead == '%') {
|
|
303
|
+
return scan_comment(lexer, false, false);
|
|
304
|
+
}
|
|
305
|
+
|
|
306
|
+
return true;
|
|
307
|
+
}
|
|
308
|
+
|
|
309
|
+
if (ctranspose && lexer->lookahead == '\'') {
|
|
310
|
+
advance(lexer);
|
|
311
|
+
lexer->mark_end(lexer);
|
|
312
|
+
lexer->result_symbol = CTRANSPOSE;
|
|
313
|
+
return true;
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
return false;
|
|
317
|
+
}
|
|
318
|
+
|
|
319
|
+
static bool scan_command(Scanner* scanner, TSLexer* lexer, const bool* valid_symbols)
|
|
320
|
+
{
|
|
321
|
+
// Special case: shell escape
|
|
322
|
+
if (lexer->lookahead == '!') {
|
|
323
|
+
advance(lexer);
|
|
324
|
+
while (iswspace_matlab(lexer->lookahead)) {
|
|
325
|
+
advance(lexer);
|
|
326
|
+
}
|
|
327
|
+
while (lexer->lookahead != ' ' && lexer->lookahead != '\n' && !lexer->eof(lexer)) {
|
|
328
|
+
advance(lexer);
|
|
329
|
+
}
|
|
330
|
+
lexer->result_symbol = COMMAND_NAME;
|
|
331
|
+
lexer->mark_end(lexer);
|
|
332
|
+
while (iswspace_matlab(lexer->lookahead)) {
|
|
333
|
+
advance(lexer);
|
|
334
|
+
}
|
|
335
|
+
scanner->is_inside_command = lexer->lookahead != '\n';
|
|
336
|
+
scanner->is_shell_scape = scanner->is_inside_command;
|
|
337
|
+
return true;
|
|
338
|
+
}
|
|
339
|
+
|
|
340
|
+
if (!is_identifier(lexer->lookahead, true)) {
|
|
341
|
+
return false;
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
char buffer[256] = {0};
|
|
345
|
+
consume_identifier(lexer, buffer);
|
|
346
|
+
lexer->mark_end(lexer);
|
|
347
|
+
const char* allowed_commands[] = {"methods", "arguments", "enumeration", "events"};
|
|
348
|
+
if (buffer[0] != 0) {
|
|
349
|
+
if (lexer->lookahead == '.') {
|
|
350
|
+
// Since it is not followed by a space, it cannot be a command.
|
|
351
|
+
if ((strcmp("get", buffer) == 0 || strcmp("set", buffer) == 0)) {
|
|
352
|
+
return false;
|
|
353
|
+
}
|
|
354
|
+
// so it is ok to consume to identify a line continuation
|
|
355
|
+
// NOLINTNEXTLINE(*misc-redundant-expression)
|
|
356
|
+
if (consume_char('.', lexer) && consume_char('.', lexer) && consume_char('.', lexer)) {
|
|
357
|
+
// If it is a keyword, yield to the internal scanner
|
|
358
|
+
for (size_t i = 0; i < keywords_size; i++) {
|
|
359
|
+
if (strcmp(keywords[i], buffer) == 0) {
|
|
360
|
+
return false;
|
|
361
|
+
}
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
lexer->result_symbol = IDENTIFIER;
|
|
365
|
+
return true;
|
|
366
|
+
}
|
|
367
|
+
// The following keywords are allowed as commands if they get 1 argument
|
|
368
|
+
for (unsigned i = 0; i < sizeof(allowed_commands) / sizeof(allowed_commands[0]); i++) {
|
|
369
|
+
if (strcmp(allowed_commands[i], buffer) == 0) {
|
|
370
|
+
goto check_command_for_argument;
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
for (unsigned i = 0; i < keywords_size; i++) {
|
|
374
|
+
if (strcmp(keywords[i], buffer) == 0) {
|
|
375
|
+
return false;
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
goto skip_command_check;
|
|
380
|
+
|
|
381
|
+
check_command_for_argument:
|
|
382
|
+
// If this is a keyword-command, check if it has an argument.
|
|
383
|
+
// If it has no arguments, this is a keyword, not a command.
|
|
384
|
+
lexer->result_symbol = COMMAND_NAME;
|
|
385
|
+
while (!lexer->eof(lexer) && iswspace_matlab(lexer->lookahead)) {
|
|
386
|
+
advance(lexer);
|
|
387
|
+
}
|
|
388
|
+
if (is_identifier(lexer->lookahead, true)) {
|
|
389
|
+
scanner->is_inside_command = true;
|
|
390
|
+
return true;
|
|
391
|
+
}
|
|
392
|
+
return false;
|
|
393
|
+
|
|
394
|
+
skip_command_check:
|
|
395
|
+
|
|
396
|
+
// First case: found an end-of-line already, so this is a command for sure.
|
|
397
|
+
// example:
|
|
398
|
+
// pwd
|
|
399
|
+
// pwd;
|
|
400
|
+
// pwd,
|
|
401
|
+
if (is_eol(lexer->lookahead)) {
|
|
402
|
+
lexer->result_symbol = valid_symbols[CATCH_IDENTIFIER] ? CATCH_IDENTIFIER : COMMAND_NAME;
|
|
403
|
+
return true;
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
// If it's not followed by a space, it may be something else, like A' for
|
|
407
|
+
// example. Or A+2.
|
|
408
|
+
if (lexer->lookahead != ' ') {
|
|
409
|
+
lexer->result_symbol = IDENTIFIER;
|
|
410
|
+
return true;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// If followed by a line continuation, look after it
|
|
414
|
+
const int skipped = consume_whitespaces(lexer);
|
|
415
|
+
if (skipped & 4) { // Command followed by spaces then newline
|
|
416
|
+
scanner->is_inside_command = false;
|
|
417
|
+
lexer->result_symbol = COMMAND_NAME;
|
|
418
|
+
return true;
|
|
419
|
+
}
|
|
420
|
+
if (lexer->lookahead == '.' && consume_char('.', lexer) && consume_char('.', lexer)
|
|
421
|
+
&& consume_char('.', lexer)) {
|
|
422
|
+
lexer->result_symbol = IDENTIFIER;
|
|
423
|
+
return true;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
// If it is followed by a space, it doesn't mean it's a command yet.
|
|
427
|
+
// It could be A + 2 or A = 2. Let's check what is the first char after
|
|
428
|
+
// all whitespaces. We mark it already as this is the right place, and we
|
|
429
|
+
// only need to make sure this is a command and not something else from
|
|
430
|
+
// this point on.
|
|
431
|
+
lexer->result_symbol = COMMAND_NAME;
|
|
432
|
+
while (!lexer->eof(lexer) && iswspace_matlab(lexer->lookahead)) {
|
|
433
|
+
advance(lexer);
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
// Check for end-of-line again, since it may be that the user just put a
|
|
437
|
+
// space at the end, like `pwd ;`
|
|
438
|
+
if (is_eol(lexer->lookahead)) {
|
|
439
|
+
scanner->is_inside_command = true;
|
|
440
|
+
return true;
|
|
441
|
+
}
|
|
442
|
+
|
|
443
|
+
// The first char of the first argument cannot be /=()/
|
|
444
|
+
if (lexer->lookahead == '=' || lexer->lookahead == '(' || lexer->lookahead == ')') {
|
|
445
|
+
lexer->result_symbol = IDENTIFIER;
|
|
446
|
+
return true;
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// If it is a single quote, it is a command.
|
|
450
|
+
if (lexer->lookahead == '\'') {
|
|
451
|
+
scanner->is_inside_command = true;
|
|
452
|
+
return true;
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
// If it is an identifier char, then it's a command
|
|
456
|
+
if (is_identifier(lexer->lookahead, false)) {
|
|
457
|
+
scanner->is_inside_command = true;
|
|
458
|
+
return true;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// If it is a char greater than 0xC0, then assume it's a valid UTF-8
|
|
462
|
+
// char, and that this is a command.
|
|
463
|
+
if (lexer->lookahead >= 0xC0) {
|
|
464
|
+
scanner->is_inside_command = true;
|
|
465
|
+
return true;
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
// Let's now consider punctuation marks.
|
|
469
|
+
if (is_punct_char(lexer->lookahead)) {
|
|
470
|
+
// In this case, we advance and look at what comes next too.
|
|
471
|
+
const uint32_t first = lexer->lookahead;
|
|
472
|
+
advance(lexer);
|
|
473
|
+
const uint32_t second = lexer->lookahead;
|
|
474
|
+
|
|
475
|
+
// If it's the end-of-line, then it's a command.
|
|
476
|
+
if (is_eol(second)) {
|
|
477
|
+
scanner->is_inside_command = true;
|
|
478
|
+
return true;
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
if (iswspace_matlab(second)) {
|
|
482
|
+
// If it is a space, then it depends on what we have, since
|
|
483
|
+
// `disp + ;` is a valid command but `disp + 2;` isn't.
|
|
484
|
+
const char operators[] = {
|
|
485
|
+
'!',
|
|
486
|
+
'&',
|
|
487
|
+
'*',
|
|
488
|
+
'+',
|
|
489
|
+
'-',
|
|
490
|
+
'/',
|
|
491
|
+
'<',
|
|
492
|
+
'>',
|
|
493
|
+
'@',
|
|
494
|
+
'\\',
|
|
495
|
+
'^',
|
|
496
|
+
'|',
|
|
497
|
+
};
|
|
498
|
+
bool is_invalid = false;
|
|
499
|
+
for (size_t i = 0; i < sizeof(operators); i++) {
|
|
500
|
+
if (first == (uint32_t) operators[i]) {
|
|
501
|
+
is_invalid = true;
|
|
502
|
+
break;
|
|
503
|
+
}
|
|
504
|
+
}
|
|
505
|
+
// If it is an operator, this can only be a command if there
|
|
506
|
+
// are no further arguments.
|
|
507
|
+
if (is_invalid) {
|
|
508
|
+
advance(lexer);
|
|
509
|
+
while (iswspace_matlab(lexer->lookahead)) {
|
|
510
|
+
advance(lexer);
|
|
511
|
+
}
|
|
512
|
+
scanner->is_inside_command = is_eol(lexer->lookahead);
|
|
513
|
+
lexer->result_symbol = scanner->is_inside_command ? COMMAND_NAME : IDENTIFIER;
|
|
514
|
+
return true;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
// If it's not an operator, then this is a command.
|
|
518
|
+
scanner->is_inside_command = true;
|
|
519
|
+
return true;
|
|
520
|
+
}
|
|
521
|
+
|
|
522
|
+
// Now we check for the rest of the operators.
|
|
523
|
+
// Since they have 2 digits, it matters if the next is a space.
|
|
524
|
+
advance(lexer);
|
|
525
|
+
|
|
526
|
+
if (lexer->lookahead != ' ') {
|
|
527
|
+
scanner->is_inside_command = true;
|
|
528
|
+
return true;
|
|
529
|
+
}
|
|
530
|
+
|
|
531
|
+
const char operators[][2] = {
|
|
532
|
+
{'&', '&'},
|
|
533
|
+
{'|', '|'},
|
|
534
|
+
{'=', '='},
|
|
535
|
+
{'~', '='},
|
|
536
|
+
{'<', '='},
|
|
537
|
+
{'>', '='},
|
|
538
|
+
{'.', '+'},
|
|
539
|
+
{'.', '-'},
|
|
540
|
+
{'.', '*'},
|
|
541
|
+
{'.', '/'},
|
|
542
|
+
{'.', '\\'},
|
|
543
|
+
{'.', '^'},
|
|
544
|
+
};
|
|
545
|
+
|
|
546
|
+
for (int i = 0; i < 12; i++) {
|
|
547
|
+
if ((uint32_t) operators[i][0] == first && (uint32_t) operators[i][1] == second) {
|
|
548
|
+
lexer->result_symbol = IDENTIFIER;
|
|
549
|
+
return true;
|
|
550
|
+
}
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
scanner->is_inside_command = true;
|
|
554
|
+
return true;
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
return false;
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
static bool scan_command_argument(Scanner* scanner, TSLexer* lexer)
|
|
561
|
+
{
|
|
562
|
+
// If this is a shell escape command, we just break arguments in spaces
|
|
563
|
+
// since we don't know what shell it is.
|
|
564
|
+
if (scanner->is_shell_scape) {
|
|
565
|
+
if (lexer->eof(lexer)) {
|
|
566
|
+
return false;
|
|
567
|
+
}
|
|
568
|
+
|
|
569
|
+
while (lexer->lookahead != ' ' && lexer->lookahead != '\n' && !lexer->eof(lexer)) {
|
|
570
|
+
advance(lexer);
|
|
571
|
+
}
|
|
572
|
+
lexer->result_symbol = COMMAND_ARGUMENT;
|
|
573
|
+
lexer->mark_end(lexer);
|
|
574
|
+
while (iswspace_matlab(lexer->lookahead)) {
|
|
575
|
+
advance(lexer);
|
|
576
|
+
}
|
|
577
|
+
if (lexer->lookahead == '\n') {
|
|
578
|
+
scanner->is_inside_command = false;
|
|
579
|
+
scanner->is_shell_scape = false;
|
|
580
|
+
}
|
|
581
|
+
return true;
|
|
582
|
+
}
|
|
583
|
+
|
|
584
|
+
// Avoids infinite loop when the argument is right before the eof.
|
|
585
|
+
if (lexer->eof(lexer)) {
|
|
586
|
+
return false;
|
|
587
|
+
}
|
|
588
|
+
|
|
589
|
+
bool quote = false;
|
|
590
|
+
int32_t parens = 0;
|
|
591
|
+
bool consumed = false;
|
|
592
|
+
|
|
593
|
+
while (!lexer->eof(lexer)) {
|
|
594
|
+
// No matter what, found new line
|
|
595
|
+
const bool cond1 = lexer->lookahead == '\n' || lexer->lookahead == '\r';
|
|
596
|
+
// No quotes, no parens, found $._end_of_line or space
|
|
597
|
+
const bool cond2 = !quote && parens == 0
|
|
598
|
+
&& (is_eol(lexer->lookahead) || iswspace_matlab(lexer->lookahead));
|
|
599
|
+
// Inside parens, no quotes, found ;
|
|
600
|
+
const bool cond3 = !quote && parens != 0 && lexer->lookahead == ';';
|
|
601
|
+
if (cond1 || cond2 || cond3) {
|
|
602
|
+
lexer->result_symbol = COMMAND_ARGUMENT;
|
|
603
|
+
lexer->mark_end(lexer);
|
|
604
|
+
|
|
605
|
+
while (iswspace_matlab(lexer->lookahead)) {
|
|
606
|
+
advance(lexer);
|
|
607
|
+
}
|
|
608
|
+
|
|
609
|
+
if (is_eol(lexer->lookahead) || cond1) {
|
|
610
|
+
scanner->line_continuation = false;
|
|
611
|
+
scanner->is_inside_command = false;
|
|
612
|
+
}
|
|
613
|
+
|
|
614
|
+
return true;
|
|
615
|
+
}
|
|
616
|
+
|
|
617
|
+
// Line comment, finish.
|
|
618
|
+
if ((!quote || (quote && parens != 0)) && lexer->lookahead == '%') {
|
|
619
|
+
scanner->is_inside_command = false;
|
|
620
|
+
if (consumed) {
|
|
621
|
+
lexer->result_symbol = COMMAND_ARGUMENT;
|
|
622
|
+
lexer->mark_end(lexer);
|
|
623
|
+
return true;
|
|
624
|
+
}
|
|
625
|
+
return scan_comment(lexer, false, false);
|
|
626
|
+
}
|
|
627
|
+
|
|
628
|
+
// Line continuation
|
|
629
|
+
if ((!quote || (quote && parens != 0)) && lexer->lookahead == '.') {
|
|
630
|
+
lexer->result_symbol = COMMAND_ARGUMENT;
|
|
631
|
+
lexer->mark_end(lexer);
|
|
632
|
+
advance(lexer);
|
|
633
|
+
if (lexer->lookahead == '.') {
|
|
634
|
+
advance(lexer);
|
|
635
|
+
if (lexer->lookahead == '.') {
|
|
636
|
+
if (consumed) {
|
|
637
|
+
scanner->line_continuation = true;
|
|
638
|
+
} else {
|
|
639
|
+
consume_comment_line(lexer);
|
|
640
|
+
lexer->result_symbol = LINE_CONTINUATION;
|
|
641
|
+
lexer->mark_end(lexer);
|
|
642
|
+
}
|
|
643
|
+
return true;
|
|
644
|
+
}
|
|
645
|
+
consumed = true;
|
|
646
|
+
continue;
|
|
647
|
+
}
|
|
648
|
+
consumed = true;
|
|
649
|
+
continue;
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
if ((lexer->lookahead == '(' || lexer->lookahead == '[' || lexer->lookahead == '{')
|
|
653
|
+
&& (!quote || (quote && parens != 0))) {
|
|
654
|
+
parens++;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
if ((lexer->lookahead == ')' || lexer->lookahead == ']' || lexer->lookahead == '}')
|
|
658
|
+
&& (!quote || (quote && parens != 0))) {
|
|
659
|
+
parens--;
|
|
660
|
+
}
|
|
661
|
+
|
|
662
|
+
if (lexer->lookahead == '\'') {
|
|
663
|
+
quote = !quote;
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
advance(lexer);
|
|
667
|
+
consumed = true;
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
// Mark as argument so the scanner doesnt get called again in an infinite
|
|
671
|
+
// loop.
|
|
672
|
+
if (lexer->eof(lexer)) {
|
|
673
|
+
lexer->result_symbol = COMMAND_ARGUMENT;
|
|
674
|
+
lexer->mark_end(lexer);
|
|
675
|
+
return true;
|
|
676
|
+
}
|
|
677
|
+
|
|
678
|
+
return false;
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
static bool scan_string_open(Scanner* scanner, TSLexer* lexer)
|
|
682
|
+
{
|
|
683
|
+
switch (lexer->lookahead) {
|
|
684
|
+
case '"':
|
|
685
|
+
scanner->string_delimiter = '"';
|
|
686
|
+
advance(lexer);
|
|
687
|
+
lexer->result_symbol = DOUBLE_QUOTE_STRING_START;
|
|
688
|
+
lexer->mark_end(lexer);
|
|
689
|
+
return true;
|
|
690
|
+
case '\'':
|
|
691
|
+
scanner->string_delimiter = '\'';
|
|
692
|
+
advance(lexer);
|
|
693
|
+
lexer->result_symbol = SINGLE_QUOTE_STRING_START;
|
|
694
|
+
lexer->mark_end(lexer);
|
|
695
|
+
// A single quote string has to be ended in the same line.
|
|
696
|
+
while (!lexer->eof(lexer) && lexer->lookahead != '\n') {
|
|
697
|
+
if (lexer->lookahead == '\'') {
|
|
698
|
+
return true;
|
|
699
|
+
}
|
|
700
|
+
advance(lexer);
|
|
701
|
+
}
|
|
702
|
+
return false;
|
|
703
|
+
default:
|
|
704
|
+
return false;
|
|
705
|
+
}
|
|
706
|
+
}
|
|
707
|
+
|
|
708
|
+
static bool scan_string_close(Scanner* scanner, TSLexer* lexer)
|
|
709
|
+
{
|
|
710
|
+
if (lexer->lookahead == scanner->string_delimiter) {
|
|
711
|
+
advance(lexer);
|
|
712
|
+
if (lexer->lookahead == scanner->string_delimiter) {
|
|
713
|
+
advance(lexer);
|
|
714
|
+
lexer->result_symbol = STRING_CONTENT;
|
|
715
|
+
goto content;
|
|
716
|
+
}
|
|
717
|
+
lexer->result_symbol = scanner->string_delimiter == '"' ? DOUBLE_QUOTE_STRING_END
|
|
718
|
+
: SINGLE_QUOTE_STRING_END;
|
|
719
|
+
lexer->mark_end(lexer);
|
|
720
|
+
scanner->string_delimiter = 0;
|
|
721
|
+
return true;
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
// This means this string is not properly terminated.
|
|
725
|
+
if (lexer->lookahead == '\n' || lexer->lookahead == '\r' || lexer->eof(lexer)) {
|
|
726
|
+
scanner->string_delimiter = 0;
|
|
727
|
+
return false;
|
|
728
|
+
}
|
|
729
|
+
|
|
730
|
+
if (lexer->lookahead == '%') {
|
|
731
|
+
advance(lexer);
|
|
732
|
+
|
|
733
|
+
if (lexer->lookahead == '%') {
|
|
734
|
+
advance(lexer);
|
|
735
|
+
lexer->result_symbol = FORMATTING_SEQUENCE;
|
|
736
|
+
lexer->mark_end(lexer);
|
|
737
|
+
return true;
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
const char* valid_tokens = "1234567890.-+ #btcdeEfgGosuxX";
|
|
741
|
+
const char* end_tokens = "cdeEfgGosuxX";
|
|
742
|
+
while (!lexer->eof(lexer) && lexer->lookahead != '\n' && lexer->lookahead != '\r') {
|
|
743
|
+
bool is_valid = false;
|
|
744
|
+
for (size_t i = 0; i < strlen(valid_tokens); i++) {
|
|
745
|
+
if ((int32_t) valid_tokens[i] == lexer->lookahead) {
|
|
746
|
+
is_valid = true;
|
|
747
|
+
break;
|
|
748
|
+
}
|
|
749
|
+
}
|
|
750
|
+
|
|
751
|
+
if (!is_valid) {
|
|
752
|
+
lexer->result_symbol = STRING_CONTENT;
|
|
753
|
+
goto content;
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
for (int i = 0; i < 12; i++) {
|
|
757
|
+
if (end_tokens[i] == lexer->lookahead) {
|
|
758
|
+
advance(lexer);
|
|
759
|
+
lexer->result_symbol = FORMATTING_SEQUENCE;
|
|
760
|
+
lexer->mark_end(lexer);
|
|
761
|
+
return true;
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
advance(lexer);
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
scanner->string_delimiter = 0;
|
|
769
|
+
return false;
|
|
770
|
+
}
|
|
771
|
+
|
|
772
|
+
if (lexer->lookahead == '\\') {
|
|
773
|
+
advance(lexer);
|
|
774
|
+
|
|
775
|
+
if (lexer->lookahead == 'x') {
|
|
776
|
+
advance(lexer);
|
|
777
|
+
while (!lexer->eof(lexer)) {
|
|
778
|
+
const char* hexa_chars = "1234567890abcdefABCDEF";
|
|
779
|
+
bool is_valid = false;
|
|
780
|
+
for (int i = 0; i < 22; i++) {
|
|
781
|
+
if (hexa_chars[i] == lexer->lookahead) {
|
|
782
|
+
is_valid = true;
|
|
783
|
+
break;
|
|
784
|
+
}
|
|
785
|
+
}
|
|
786
|
+
|
|
787
|
+
if (!is_valid) {
|
|
788
|
+
lexer->result_symbol = ESCAPE_SEQUENCE;
|
|
789
|
+
lexer->mark_end(lexer);
|
|
790
|
+
return true;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
advance(lexer);
|
|
794
|
+
}
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
if (lexer->lookahead >= '0' && lexer->lookahead <= '7') {
|
|
798
|
+
while (lexer->lookahead >= '0' && lexer->lookahead <= '7' && !lexer->eof(lexer)) {
|
|
799
|
+
advance(lexer);
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
lexer->result_symbol = ESCAPE_SEQUENCE;
|
|
803
|
+
lexer->mark_end(lexer);
|
|
804
|
+
return true;
|
|
805
|
+
}
|
|
806
|
+
|
|
807
|
+
const char* escapes = "abfnrtv\\";
|
|
808
|
+
bool is_valid = false;
|
|
809
|
+
for (int i = 0; i < 8; i++) {
|
|
810
|
+
if (escapes[i] == lexer->lookahead) {
|
|
811
|
+
is_valid = true;
|
|
812
|
+
break;
|
|
813
|
+
}
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
if (is_valid) {
|
|
817
|
+
advance(lexer);
|
|
818
|
+
lexer->result_symbol = ESCAPE_SEQUENCE;
|
|
819
|
+
lexer->mark_end(lexer);
|
|
820
|
+
return true;
|
|
821
|
+
}
|
|
822
|
+
}
|
|
823
|
+
|
|
824
|
+
content:
|
|
825
|
+
while (lexer->lookahead != '\n' && lexer->lookahead != '\r' && !lexer->eof(lexer)) {
|
|
826
|
+
// In MATLAB '' and "" are valid inside their own kind: 'It''s ok' "He said ""it's ok"""
|
|
827
|
+
if (lexer->lookahead == scanner->string_delimiter) {
|
|
828
|
+
lexer->result_symbol = STRING_CONTENT;
|
|
829
|
+
lexer->mark_end(lexer);
|
|
830
|
+
advance(lexer);
|
|
831
|
+
if (lexer->lookahead != scanner->string_delimiter) {
|
|
832
|
+
return true;
|
|
833
|
+
}
|
|
834
|
+
advance(lexer);
|
|
835
|
+
continue;
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
// The scanner will be called again, and this time we will match in the if
|
|
839
|
+
// before this while.
|
|
840
|
+
if (lexer->lookahead == '%' || lexer->lookahead == '\\') {
|
|
841
|
+
lexer->result_symbol = STRING_CONTENT;
|
|
842
|
+
lexer->mark_end(lexer);
|
|
843
|
+
advance(lexer);
|
|
844
|
+
if (lexer->lookahead == scanner->string_delimiter || iswspace_matlab(lexer->lookahead)) {
|
|
845
|
+
goto content;
|
|
846
|
+
}
|
|
847
|
+
return true;
|
|
848
|
+
}
|
|
849
|
+
|
|
850
|
+
advance(lexer);
|
|
851
|
+
}
|
|
852
|
+
|
|
853
|
+
// Mark end of content here and end of string on next call. This is an
|
|
854
|
+
// unterminated string and it's better to wrongly finish it here, otherwise
|
|
855
|
+
// the error will appear god knows how many lines after this and it will be
|
|
856
|
+
// hard for the user to understand what went wrong.
|
|
857
|
+
if (lexer->lookahead == '\n' || lexer->lookahead == '\r' || lexer->eof(lexer)) {
|
|
858
|
+
lexer->result_symbol = STRING_CONTENT;
|
|
859
|
+
lexer->mark_end(lexer);
|
|
860
|
+
return true;
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
scanner->string_delimiter = 0;
|
|
864
|
+
return false;
|
|
865
|
+
}
|
|
866
|
+
|
|
867
|
+
static inline bool scan_multioutput_var_start(TSLexer* lexer)
|
|
868
|
+
{
|
|
869
|
+
advance(lexer);
|
|
870
|
+
lexer->result_symbol = MULTIOUTPUT_VARIABLE_START;
|
|
871
|
+
lexer->mark_end(lexer);
|
|
872
|
+
|
|
873
|
+
// We can have arrays inside function calls inside the multi-output variable, so we have to keep
|
|
874
|
+
// track.
|
|
875
|
+
unsigned sb_count = 0;
|
|
876
|
+
|
|
877
|
+
while (!lexer->eof(lexer)) {
|
|
878
|
+
// NOLINTNEXTLINE(*misc-redundant-expression)
|
|
879
|
+
if (consume_char('.', lexer) && consume_char('.', lexer) && consume_char('.', lexer)) {
|
|
880
|
+
consume_comment_line(lexer);
|
|
881
|
+
advance(lexer);
|
|
882
|
+
}
|
|
883
|
+
|
|
884
|
+
if (lexer->lookahead == '[') {
|
|
885
|
+
sb_count++;
|
|
886
|
+
advance(lexer);
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
if (lexer->lookahead != ']') {
|
|
890
|
+
advance(lexer);
|
|
891
|
+
} else if (sb_count > 0) {
|
|
892
|
+
sb_count--;
|
|
893
|
+
advance(lexer);
|
|
894
|
+
} else {
|
|
895
|
+
break;
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
|
|
899
|
+
if (lexer->lookahead != ']') {
|
|
900
|
+
return false;
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
advance(lexer);
|
|
904
|
+
|
|
905
|
+
while (!lexer->eof(lexer)) {
|
|
906
|
+
// NOLINTNEXTLINE(*misc-redundant-expression)
|
|
907
|
+
if (consume_char('.', lexer) && consume_char('.', lexer) && consume_char('.', lexer)) {
|
|
908
|
+
consume_comment_line(lexer);
|
|
909
|
+
advance(lexer);
|
|
910
|
+
} else if (iswspace_matlab(lexer->lookahead)) {
|
|
911
|
+
advance(lexer);
|
|
912
|
+
} else {
|
|
913
|
+
break;
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
if (lexer->lookahead == '=') {
|
|
918
|
+
advance(lexer);
|
|
919
|
+
if (lexer->lookahead != '=') {
|
|
920
|
+
return true;
|
|
921
|
+
}
|
|
922
|
+
}
|
|
923
|
+
|
|
924
|
+
return false;
|
|
925
|
+
}
|
|
926
|
+
|
|
927
|
+
static bool scan_identifier(TSLexer* lexer);
|
|
928
|
+
static bool scan_entry_delimiter(TSLexer* lexer, int skipped)
|
|
929
|
+
{
|
|
930
|
+
lexer->mark_end(lexer);
|
|
931
|
+
lexer->result_symbol = ENTRY_DELIMITER;
|
|
932
|
+
|
|
933
|
+
if (skipped & 2) {
|
|
934
|
+
return false;
|
|
935
|
+
}
|
|
936
|
+
|
|
937
|
+
if (lexer->lookahead == ',') {
|
|
938
|
+
advance(lexer);
|
|
939
|
+
lexer->mark_end(lexer);
|
|
940
|
+
lexer->result_symbol = ENTRY_DELIMITER;
|
|
941
|
+
return true;
|
|
942
|
+
}
|
|
943
|
+
|
|
944
|
+
if (lexer->lookahead == '.') {
|
|
945
|
+
advance(lexer);
|
|
946
|
+
advance(lexer);
|
|
947
|
+
return iswdigit(lexer->lookahead);
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
if (lexer->lookahead == '{' || lexer->lookahead == '(' || lexer->lookahead == '\'') {
|
|
951
|
+
return skipped != 0;
|
|
952
|
+
}
|
|
953
|
+
|
|
954
|
+
if (lexer->lookahead == '[') {
|
|
955
|
+
return true;
|
|
956
|
+
}
|
|
957
|
+
|
|
958
|
+
// These chars mean we cannot end the cell here, as the expression will
|
|
959
|
+
// surely continue OR we need to just leave the char there and the internal
|
|
960
|
+
// parser will do the rest.
|
|
961
|
+
const char no_end[] = {']', '}', '&', '|', '=', '<', '>', '*', '/', '\\', '^', ';', ':'};
|
|
962
|
+
for (size_t i = 0; i < sizeof(no_end); i++) {
|
|
963
|
+
if ((int32_t) no_end[i] == lexer->lookahead) {
|
|
964
|
+
return false;
|
|
965
|
+
}
|
|
966
|
+
}
|
|
967
|
+
|
|
968
|
+
if (lexer->lookahead == '~') {
|
|
969
|
+
advance(lexer);
|
|
970
|
+
return lexer->lookahead != '=';
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
const char maybe_end[] = {'+', '-'};
|
|
974
|
+
for (size_t i = 0; i < sizeof(maybe_end); i++) {
|
|
975
|
+
if ((int32_t) maybe_end[i] == lexer->lookahead) {
|
|
976
|
+
advance(lexer);
|
|
977
|
+
if (lexer->lookahead == ' ') {
|
|
978
|
+
return false;
|
|
979
|
+
}
|
|
980
|
+
return skipped != 0;
|
|
981
|
+
}
|
|
982
|
+
}
|
|
983
|
+
|
|
984
|
+
if (skipped != 0) {
|
|
985
|
+
return true;
|
|
986
|
+
}
|
|
987
|
+
|
|
988
|
+
if (is_identifier(lexer->lookahead, true)) {
|
|
989
|
+
return scan_identifier(lexer);
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
return false;
|
|
993
|
+
}
|
|
994
|
+
|
|
995
|
+
static bool scan_identifier(TSLexer* lexer)
|
|
996
|
+
{
|
|
997
|
+
char buffer[256] = {0};
|
|
998
|
+
consume_identifier(lexer, buffer);
|
|
999
|
+
if (buffer[0] != 0) {
|
|
1000
|
+
if (lexer->lookahead == '.') {
|
|
1001
|
+
if ((strcmp("get", buffer) == 0 || strcmp("set", buffer) == 0)) {
|
|
1002
|
+
return false;
|
|
1003
|
+
}
|
|
1004
|
+
lexer->result_symbol = IDENTIFIER;
|
|
1005
|
+
lexer->mark_end(lexer);
|
|
1006
|
+
return true;
|
|
1007
|
+
}
|
|
1008
|
+
for (size_t i = 0; i < keywords_size; i++) {
|
|
1009
|
+
if (strcmp(keywords[i], buffer) == 0) {
|
|
1010
|
+
return false;
|
|
1011
|
+
}
|
|
1012
|
+
}
|
|
1013
|
+
lexer->result_symbol = IDENTIFIER;
|
|
1014
|
+
lexer->mark_end(lexer);
|
|
1015
|
+
return true;
|
|
1016
|
+
}
|
|
1017
|
+
return false;
|
|
1018
|
+
}
|
|
1019
|
+
|
|
1020
|
+
static bool scan_transpose(TSLexer* lexer)
|
|
1021
|
+
{
|
|
1022
|
+
if (lexer->lookahead == '\'') {
|
|
1023
|
+
advance(lexer);
|
|
1024
|
+
lexer->mark_end(lexer);
|
|
1025
|
+
lexer->result_symbol = TRANSPOSE;
|
|
1026
|
+
return true;
|
|
1027
|
+
}
|
|
1028
|
+
if (lexer->lookahead == '.' && consume_char('\'', lexer)) {
|
|
1029
|
+
advance(lexer);
|
|
1030
|
+
lexer->mark_end(lexer);
|
|
1031
|
+
lexer->result_symbol = CTRANSPOSE;
|
|
1032
|
+
return true;
|
|
1033
|
+
}
|
|
1034
|
+
return false;
|
|
1035
|
+
}
|
|
1036
|
+
|
|
1037
|
+
bool tree_sitter_matlab_external_scanner_scan(void* payload, TSLexer* lexer, const bool* valid_symbols)
|
|
1038
|
+
{
|
|
1039
|
+
Scanner* scanner = (Scanner*) payload;
|
|
1040
|
+
if (scanner->string_delimiter == 0) {
|
|
1041
|
+
int skipped = skip_whitespaces(lexer);
|
|
1042
|
+
|
|
1043
|
+
if ((scanner->line_continuation || !scanner->is_inside_command) && valid_symbols[COMMENT]
|
|
1044
|
+
&& (lexer->lookahead == '%' || ((skipped & 2) == 0 && lexer->lookahead == '.'))) {
|
|
1045
|
+
return scan_comment(lexer, valid_symbols[ENTRY_DELIMITER], valid_symbols[CTRANSPOSE]);
|
|
1046
|
+
}
|
|
1047
|
+
|
|
1048
|
+
if (!scanner->is_inside_command) {
|
|
1049
|
+
if (skipped == 0 && valid_symbols[TRANSPOSE]) {
|
|
1050
|
+
if (scan_transpose(lexer)) {
|
|
1051
|
+
return true;
|
|
1052
|
+
}
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
if ((valid_symbols[SINGLE_QUOTE_STRING_START] && lexer->lookahead == '\'')
|
|
1056
|
+
|| (valid_symbols[DOUBLE_QUOTE_STRING_START] && lexer->lookahead == '"')) {
|
|
1057
|
+
return scan_string_open(scanner, lexer);
|
|
1058
|
+
}
|
|
1059
|
+
|
|
1060
|
+
if (!scanner->line_continuation) {
|
|
1061
|
+
if (valid_symbols[MULTIOUTPUT_VARIABLE_START] && lexer->lookahead == '[') {
|
|
1062
|
+
return scan_multioutput_var_start(lexer);
|
|
1063
|
+
}
|
|
1064
|
+
|
|
1065
|
+
if (valid_symbols[ENTRY_DELIMITER]) {
|
|
1066
|
+
return scan_entry_delimiter(lexer, skipped);
|
|
1067
|
+
}
|
|
1068
|
+
}
|
|
1069
|
+
|
|
1070
|
+
if (valid_symbols[COMMAND_NAME]) {
|
|
1071
|
+
scanner->is_inside_command = false;
|
|
1072
|
+
scanner->is_shell_scape = false;
|
|
1073
|
+
return scan_command(scanner, lexer, valid_symbols);
|
|
1074
|
+
}
|
|
1075
|
+
|
|
1076
|
+
if (valid_symbols[IDENTIFIER] && (skipped & 2) == 0) {
|
|
1077
|
+
scanner->is_inside_command = false;
|
|
1078
|
+
scanner->is_shell_scape = false;
|
|
1079
|
+
return scan_identifier(lexer);
|
|
1080
|
+
}
|
|
1081
|
+
} else {
|
|
1082
|
+
if (valid_symbols[COMMAND_ARGUMENT]) {
|
|
1083
|
+
return scan_command_argument(scanner, lexer);
|
|
1084
|
+
}
|
|
1085
|
+
}
|
|
1086
|
+
} else {
|
|
1087
|
+
if (valid_symbols[DOUBLE_QUOTE_STRING_END] || valid_symbols[SINGLE_QUOTE_STRING_END]
|
|
1088
|
+
|| valid_symbols[FORMATTING_SEQUENCE]) {
|
|
1089
|
+
return scan_string_close(scanner, lexer);
|
|
1090
|
+
}
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
return false;
|
|
1094
|
+
}
|