@acristoffers/tree-sitter-matlab 1.2.4 → 1.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/scanner.c CHANGED
@@ -48,6 +48,7 @@ typedef struct
48
48
  bool line_continuation;
49
49
  bool is_shell_scape;
50
50
  char string_delimiter;
51
+ bool generate_entry_delimiter;
51
52
  } Scanner;
52
53
 
53
54
  static const char* const keywords[] = {
@@ -155,7 +156,7 @@ static inline int consume_whitespaces(TSLexer* lexer)
155
156
 
156
157
  static inline void consume_whitespaces_once(TSLexer* lexer)
157
158
  {
158
- while (iswspace(lexer->lookahead)) {
159
+ while (!lexer->eof(lexer) && iswspace(lexer->lookahead)) {
159
160
  if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
160
161
  advance(lexer);
161
162
  break;
@@ -184,7 +185,8 @@ unsigned tree_sitter_matlab_external_scanner_serialize(void* payload, char* buff
184
185
  buffer[1] = (char) scanner->line_continuation;
185
186
  buffer[2] = (char) scanner->is_shell_scape;
186
187
  buffer[3] = scanner->string_delimiter;
187
- return 4;
188
+ buffer[4] = (char) scanner->generate_entry_delimiter;
189
+ return 5;
188
190
  }
189
191
 
190
192
  void tree_sitter_matlab_external_scanner_deserialize(
@@ -193,11 +195,12 @@ void tree_sitter_matlab_external_scanner_deserialize(
193
195
  unsigned length)
194
196
  {
195
197
  Scanner* scanner = (Scanner*) payload;
196
- if (length == 4) {
198
+ if (length == 5) {
197
199
  scanner->is_inside_command = buffer[0];
198
200
  scanner->line_continuation = buffer[1];
199
201
  scanner->is_shell_scape = buffer[2];
200
202
  scanner->string_delimiter = buffer[3];
203
+ scanner->generate_entry_delimiter = buffer[4];
201
204
  }
202
205
  }
203
206
 
@@ -209,7 +212,12 @@ static inline void consume_comment_line(TSLexer* lexer)
209
212
  }
210
213
 
211
214
  // NOLINTNEXTLINE(*misc-no-recursion)
212
- static bool scan_comment(TSLexer* lexer, bool entry_delimiter, bool ctranspose)
215
+ static bool scan_comment(
216
+ Scanner* scanner,
217
+ TSLexer* lexer,
218
+ bool entry_delimiter,
219
+ bool ctranspose,
220
+ int skipped)
213
221
  {
214
222
  lexer->mark_end(lexer);
215
223
 
@@ -222,34 +230,61 @@ static bool scan_comment(TSLexer* lexer, bool entry_delimiter, bool ctranspose)
222
230
  // ended up being handled here. It allows the correct detection of numbers
223
231
  // like .5 inside matrices/cells: [0 .5].
224
232
  if (entry_delimiter && !percent && !line_continuation) {
225
- lexer->result_symbol = ENTRY_DELIMITER;
226
- return iswdigit(lexer->lookahead);
233
+ if (iswdigit(lexer->lookahead)) {
234
+ lexer->result_symbol = ENTRY_DELIMITER;
235
+ return true;
236
+ }
237
+ if (lexer->lookahead == '\'') {
238
+ advance(lexer);
239
+ lexer->result_symbol = CTRANSPOSE;
240
+ lexer->mark_end(lexer);
241
+ return skipped == 0;
242
+ }
243
+ return false;
227
244
  }
245
+
228
246
  // We are inside a matrix/cell row and there is a line continuation, like this:
229
247
  // a = { 1 ...
230
248
  // 2 ...
231
249
  // }
232
-
233
250
  if (entry_delimiter && line_continuation) {
251
+ consume_comment_line(lexer);
234
252
  consume_whitespaces(lexer);
235
- if (lexer->lookahead == '.') {
236
- lexer->mark_end(lexer);
253
+
254
+ lexer->mark_end(lexer);
255
+ lexer->result_symbol = LINE_CONTINUATION;
256
+
257
+ const bool is_alpha = iswalpha(lexer->lookahead);
258
+ const bool is_digit = iswdigit(lexer->lookahead);
259
+ const bool is_meta = lexer->lookahead == '?' || lexer->lookahead == '@';
260
+ const bool is_quote = lexer->lookahead == '\'' || lexer->lookahead == '"';
261
+ const bool is_container = lexer->lookahead == '{' || lexer->lookahead == '['
262
+ || lexer->lookahead == '(';
263
+
264
+ if (lexer->lookahead == '~') {
237
265
  advance(lexer);
238
- lexer->result_symbol = iswdigit(lexer->lookahead) ? ENTRY_DELIMITER : LINE_CONTINUATION;
239
- } else if (iswdigit(lexer->lookahead) || lexer->lookahead == '\'' || lexer->lookahead == '"') {
240
- lexer->result_symbol = ENTRY_DELIMITER;
241
- } else {
242
- lexer->result_symbol = LINE_CONTINUATION;
243
- lexer->mark_end(lexer);
266
+ scanner->generate_entry_delimiter = lexer->lookahead != '=';
267
+ } else if (lexer->lookahead == '+' || lexer->lookahead == '-') {
268
+ advance(lexer);
269
+ scanner->generate_entry_delimiter = lexer->lookahead != ' ';
270
+ } else if (lexer->lookahead == '.') {
271
+ advance(lexer);
272
+ scanner->generate_entry_delimiter = is_digit;
273
+ } else if (is_alpha || is_digit || is_quote || is_container || is_meta) {
274
+ scanner->generate_entry_delimiter = true;
244
275
  }
245
276
  return true;
246
277
  }
247
278
 
248
279
  if (block) {
280
+ if (skipped & 2) {
281
+ return false;
282
+ }
283
+
284
+ // If it has things on the same line, it's not a block, just a comment
249
285
  while (!lexer->eof(lexer) && iswspace_matlab(lexer->lookahead)) {
250
286
  advance(lexer);
251
287
  }
252
-
253
288
  if (!consume_char('\n', lexer) && !consume_char('\r', lexer)) {
254
289
  consume_comment_line(lexer);
255
290
  lexer->result_symbol = COMMENT;
@@ -257,26 +292,30 @@ static bool scan_comment(TSLexer* lexer, bool entry_delimiter, bool ctranspose)
257
292
  return true;
258
293
  }
259
294
 
260
- // Empty block comment
261
- if (lexer->lookahead == '%' && consume_char('%', lexer) && consume_char('}', lexer)) {
262
- lexer->result_symbol = COMMENT;
263
- lexer->mark_end(lexer);
264
- return true;
265
- }
266
-
295
+ // Otherwise, find the matching closing block
296
+ int level = 1;
267
297
  while (!lexer->eof(lexer)) {
268
- consume_comment_line(lexer);
269
- advance(lexer);
270
298
  consume_whitespaces(lexer);
271
-
272
- if (consume_char('%', lexer) && consume_char('}', lexer)) {
273
- lexer->result_symbol = COMMENT;
274
- lexer->mark_end(lexer);
275
- return true;
299
+ if (consume_char('%', lexer)) {
300
+ if (consume_char('{', lexer) && (consume_whitespaces(lexer) & 2)) {
301
+ level++;
302
+ } else if (consume_char('}', lexer)) {
303
+ lexer->mark_end(lexer);
304
+ if (consume_whitespaces(lexer) & 2) {
305
+ level--;
306
+ }
307
+ }
308
+ if (level == 0) {
309
+ break;
310
+ }
311
+ continue;
276
312
  }
313
+ consume_comment_line(lexer);
314
+ lexer->mark_end(lexer);
277
315
  }
278
316
 
279
- return false;
317
+ lexer->result_symbol = COMMENT;
318
+ return true;
280
319
  }
281
320
 
282
321
  if (percent || line_continuation) {
@@ -300,7 +339,7 @@ static bool scan_comment(TSLexer* lexer, bool entry_delimiter, bool ctranspose)
300
339
  }
301
340
 
302
341
  if (lexer->lookahead == '%') {
303
- return scan_comment(lexer, false, false);
342
+ return scan_comment(scanner, lexer, false, false, 0);
304
343
  }
305
344
 
306
345
  return true;
@@ -357,6 +396,9 @@ static bool scan_command(Scanner* scanner, TSLexer* lexer, const bool* valid_sym
357
396
  // If it is a keyword, yield to the internal scanner
358
397
  for (size_t i = 0; i < keywords_size; i++) {
359
398
  if (strcmp(keywords[i], buffer) == 0) {
399
+ if (strcmp("enumeration", buffer) == 0) {
400
+ goto check_enumeration;
401
+ }
360
402
  return false;
361
403
  }
362
404
  }
@@ -378,6 +420,18 @@ static bool scan_command(Scanner* scanner, TSLexer* lexer, const bool* valid_sym
378
420
  }
379
421
  goto skip_command_check;
380
422
 
423
+ check_enumeration: {
424
+ const int skipped = consume_whitespaces(lexer);
425
+ if (skipped & 2) {
426
+ // enumeration can be a function
427
+ if (lexer->lookahead == '(') {
428
+ lexer->result_symbol = IDENTIFIER;
429
+ return true;
430
+ }
431
+ }
432
+ return false;
433
+ }
434
+
381
435
  check_command_for_argument:
382
436
  // If this is a keyword-command, check if it has an argument.
383
437
  // If it has no arguments, this is a keyword, not a command.
@@ -392,7 +446,6 @@ check_command_for_argument:
392
446
  return false;
393
447
 
394
448
  skip_command_check:
395
-
396
449
  // First case: found an end-of-line already, so this is a command for sure.
397
450
  // example:
398
451
  // pwd
@@ -412,7 +465,13 @@ skip_command_check:
412
465
 
413
466
  // If followed by a line continuation, look after it
414
467
  const int skipped = consume_whitespaces(lexer);
415
- if (skipped & 4) { // Command followed by spaces then newline
468
+ if (skipped & 2) {
469
+ // `catch e `
470
+ if (valid_symbols[CATCH_IDENTIFIER]) {
471
+ lexer->result_symbol = CATCH_IDENTIFIER;
472
+ return true;
473
+ }
474
+ // Command followed by spaces then newline
416
475
  scanner->is_inside_command = false;
417
476
  lexer->result_symbol = COMMAND_NAME;
418
477
  return true;
@@ -435,7 +494,11 @@ skip_command_check:
435
494
 
436
495
  // Check for end-of-line again, since it may be that the user just put a
437
496
  // space at the end, like `pwd ;`
438
- if (is_eol(lexer->lookahead)) {
497
+ if (is_eol(lexer->lookahead) || lexer->lookahead == '%') {
498
+ if (valid_symbols[CATCH_IDENTIFIER] && (skipped & 4) == 0) {
499
+ lexer->result_symbol = CATCH_IDENTIFIER;
500
+ return true;
501
+ }
439
502
  scanner->is_inside_command = true;
440
503
  return true;
441
504
  }
@@ -622,7 +685,7 @@ static bool scan_command_argument(Scanner* scanner, TSLexer* lexer)
622
685
  lexer->mark_end(lexer);
623
686
  return true;
624
687
  }
625
- return scan_comment(lexer, false, false);
688
+ return scan_comment(scanner, lexer, false, false, 0);
626
689
  }
627
690
 
628
691
  // Line continuation
@@ -1037,12 +1100,21 @@ static bool scan_transpose(TSLexer* lexer)
1037
1100
  bool tree_sitter_matlab_external_scanner_scan(void* payload, TSLexer* lexer, const bool* valid_symbols)
1038
1101
  {
1039
1102
  Scanner* scanner = (Scanner*) payload;
1103
+
1104
+ if (scanner->generate_entry_delimiter) {
1105
+ scanner->generate_entry_delimiter = false;
1106
+ lexer->mark_end(lexer);
1107
+ lexer->result_symbol = ENTRY_DELIMITER;
1108
+ return true;
1109
+ }
1110
+
1040
1111
  if (scanner->string_delimiter == 0) {
1041
1112
  int skipped = skip_whitespaces(lexer);
1042
1113
 
1043
1114
  if ((scanner->line_continuation || !scanner->is_inside_command) && valid_symbols[COMMENT]
1044
1115
  && (lexer->lookahead == '%' || ((skipped & 2) == 0 && lexer->lookahead == '.'))) {
1045
- return scan_comment(lexer, valid_symbols[ENTRY_DELIMITER], valid_symbols[CTRANSPOSE]);
1116
+ return scan_comment(
1117
+ scanner, lexer, valid_symbols[ENTRY_DELIMITER], valid_symbols[CTRANSPOSE], skipped);
1046
1118
  }
1047
1119
 
1048
1120
  if (!scanner->is_inside_command) {
package/tree-sitter.json CHANGED
@@ -12,7 +12,7 @@
12
12
  }
13
13
  ],
14
14
  "metadata": {
15
- "version": "1.2.4",
15
+ "version": "1.2.13",
16
16
  "license": "MIT",
17
17
  "description": "MATLAB tree-sitter parser",
18
18
  "authors": [