@ast-grep/lang-scala 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +24 -0
- package/index.d.ts +10 -0
- package/index.js +9 -0
- package/package.json +46 -0
- package/postinstall.js +4 -0
- package/prebuilds/prebuild-Linux-X64/parser.so +0 -0
- package/prebuilds/prebuild-Windows-X64/parser.so +0 -0
- package/prebuilds/prebuild-macOS-ARM64/parser.so +0 -0
- package/src/grammar.json +8533 -0
- package/src/node-types.json +8285 -0
- package/src/parser.c +887799 -0
- package/src/scanner.c +420 -0
- package/src/tree_sitter/alloc.h +54 -0
- package/src/tree_sitter/array.h +291 -0
- package/src/tree_sitter/parser.h +266 -0
- package/type.d.ts +7679 -0
package/src/scanner.c
ADDED
|
@@ -0,0 +1,420 @@
|
|
|
1
|
+
#include "tree_sitter/alloc.h"
|
|
2
|
+
#include "tree_sitter/array.h"
|
|
3
|
+
#include "tree_sitter/parser.h"
|
|
4
|
+
|
|
5
|
+
#include <wctype.h>
|
|
6
|
+
|
|
7
|
+
// #define DEBUG
|
|
8
|
+
|
|
9
|
+
#ifdef DEBUG
|
|
10
|
+
#define LOG(...) fprintf(stderr, __VA_ARGS__)
|
|
11
|
+
#else
|
|
12
|
+
#define LOG(...)
|
|
13
|
+
#endif
|
|
14
|
+
|
|
15
|
+
enum TokenType {
|
|
16
|
+
AUTOMATIC_SEMICOLON,
|
|
17
|
+
INDENT,
|
|
18
|
+
INTERPOLATED_STRING_MIDDLE,
|
|
19
|
+
INTERPOLATED_STRING_END,
|
|
20
|
+
INTERPOLATED_MULTILINE_STRING_MIDDLE,
|
|
21
|
+
INTERPOLATED_MULTILINE_STRING_END,
|
|
22
|
+
OUTDENT,
|
|
23
|
+
SIMPLE_MULTILINE_STRING,
|
|
24
|
+
SIMPLE_STRING,
|
|
25
|
+
ELSE,
|
|
26
|
+
CATCH,
|
|
27
|
+
FINALLY,
|
|
28
|
+
EXTENDS,
|
|
29
|
+
DERIVES,
|
|
30
|
+
WITH,
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
typedef struct {
|
|
34
|
+
Array(int16_t) indents;
|
|
35
|
+
int16_t last_indentation_size;
|
|
36
|
+
int16_t last_newline_count;
|
|
37
|
+
int16_t last_column;
|
|
38
|
+
} Scanner;
|
|
39
|
+
|
|
40
|
+
void *tree_sitter_scala_external_scanner_create() {
|
|
41
|
+
Scanner *scanner = ts_calloc(1, sizeof(Scanner));
|
|
42
|
+
array_init(&scanner->indents);
|
|
43
|
+
scanner->last_indentation_size = -1;
|
|
44
|
+
scanner->last_column = -1;
|
|
45
|
+
return scanner;
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
void tree_sitter_scala_external_scanner_destroy(void *payload) {
|
|
49
|
+
Scanner *scanner = payload;
|
|
50
|
+
array_delete(&scanner->indents);
|
|
51
|
+
ts_free(scanner);
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
unsigned tree_sitter_scala_external_scanner_serialize(void *payload, char *buffer) {
|
|
55
|
+
Scanner *scanner = (Scanner*)payload;
|
|
56
|
+
|
|
57
|
+
if ((scanner->indents.size + 3) * sizeof(int16_t) > TREE_SITTER_SERIALIZATION_BUFFER_SIZE) {
|
|
58
|
+
return 0;
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
size_t size = 0;
|
|
62
|
+
memcpy(buffer + size, &scanner->last_indentation_size, sizeof(int16_t));
|
|
63
|
+
size += sizeof(int16_t);
|
|
64
|
+
memcpy(buffer + size, &scanner->last_newline_count, sizeof(int16_t));
|
|
65
|
+
size += sizeof(int16_t);
|
|
66
|
+
memcpy(buffer + size, &scanner->last_column, sizeof(int16_t));
|
|
67
|
+
size += sizeof(int16_t);
|
|
68
|
+
|
|
69
|
+
for (unsigned i = 0; i < scanner->indents.size; i++) {
|
|
70
|
+
memcpy(buffer + size, &scanner->indents.contents[i], sizeof(int16_t));
|
|
71
|
+
size += sizeof(int16_t);
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
return size;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
void tree_sitter_scala_external_scanner_deserialize(void *payload, const char *buffer,
|
|
78
|
+
unsigned length) {
|
|
79
|
+
Scanner *scanner = (Scanner*)payload;
|
|
80
|
+
array_clear(&scanner->indents);
|
|
81
|
+
scanner->last_indentation_size = -1;
|
|
82
|
+
scanner->last_column = -1;
|
|
83
|
+
scanner->last_newline_count = 0;
|
|
84
|
+
|
|
85
|
+
if (length == 0) {
|
|
86
|
+
return;
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
size_t size = 0;
|
|
90
|
+
|
|
91
|
+
scanner->last_indentation_size = *(int16_t *)&buffer[size];
|
|
92
|
+
size += sizeof(int16_t);
|
|
93
|
+
scanner->last_newline_count = *(int16_t *)&buffer[size];
|
|
94
|
+
size += sizeof(int16_t);
|
|
95
|
+
scanner->last_column = *(int16_t *)&buffer[size];
|
|
96
|
+
size += sizeof(int16_t);
|
|
97
|
+
|
|
98
|
+
while (size < length) {
|
|
99
|
+
array_push(&scanner->indents, *(int16_t *)&buffer[size]);
|
|
100
|
+
size += sizeof(int16_t);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
assert(size == length);
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
|
|
107
|
+
|
|
108
|
+
static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
|
|
109
|
+
|
|
110
|
+
static bool scan_string_content(TSLexer *lexer, bool is_multiline, bool has_interpolation) {
|
|
111
|
+
unsigned closing_quote_count = 0;
|
|
112
|
+
for (;;) {
|
|
113
|
+
if (lexer->lookahead == '"') {
|
|
114
|
+
advance(lexer);
|
|
115
|
+
closing_quote_count++;
|
|
116
|
+
if (!is_multiline) {
|
|
117
|
+
lexer->result_symbol = has_interpolation ? INTERPOLATED_STRING_END : SIMPLE_STRING;
|
|
118
|
+
return true;
|
|
119
|
+
}
|
|
120
|
+
if (closing_quote_count >= 3 && lexer->lookahead != '"') {
|
|
121
|
+
lexer->result_symbol = has_interpolation ? INTERPOLATED_MULTILINE_STRING_END : SIMPLE_MULTILINE_STRING;
|
|
122
|
+
return true;
|
|
123
|
+
}
|
|
124
|
+
} else if (lexer->lookahead == '$') {
|
|
125
|
+
if (is_multiline && has_interpolation) {
|
|
126
|
+
lexer->result_symbol = INTERPOLATED_MULTILINE_STRING_MIDDLE;
|
|
127
|
+
return true;
|
|
128
|
+
}
|
|
129
|
+
if (has_interpolation) {
|
|
130
|
+
lexer->result_symbol = INTERPOLATED_STRING_MIDDLE;
|
|
131
|
+
return true;
|
|
132
|
+
}
|
|
133
|
+
advance(lexer);
|
|
134
|
+
} else {
|
|
135
|
+
closing_quote_count = 0;
|
|
136
|
+
if (lexer->lookahead == '\\') {
|
|
137
|
+
advance(lexer);
|
|
138
|
+
if (!lexer->eof(lexer)) {
|
|
139
|
+
advance(lexer);
|
|
140
|
+
}
|
|
141
|
+
} else if (lexer->lookahead == '\n') {
|
|
142
|
+
if (is_multiline) {
|
|
143
|
+
advance(lexer);
|
|
144
|
+
} else {
|
|
145
|
+
return false;
|
|
146
|
+
}
|
|
147
|
+
} else if (lexer->eof(lexer)) {
|
|
148
|
+
return false;
|
|
149
|
+
} else {
|
|
150
|
+
advance(lexer);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
static bool detect_comment_start(TSLexer *lexer) {
|
|
157
|
+
lexer->mark_end(lexer);
|
|
158
|
+
// Comments should not affect indentation
|
|
159
|
+
if (lexer->lookahead == '/') {
|
|
160
|
+
advance(lexer);
|
|
161
|
+
if (lexer->lookahead == '/' || lexer -> lookahead == '*') {
|
|
162
|
+
return true;
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
return false;
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
static bool scan_word(TSLexer *lexer, const char* const word) {
|
|
169
|
+
for (uint8_t i = 0; word[i] != '\0'; i++) {
|
|
170
|
+
if (lexer->lookahead != word[i]) {
|
|
171
|
+
return false;
|
|
172
|
+
}
|
|
173
|
+
advance(lexer);
|
|
174
|
+
}
|
|
175
|
+
return !iswalnum(lexer->lookahead);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
static inline void debug_indents(Scanner *scanner) {
|
|
179
|
+
LOG(" indents(%d): ", scanner->indents.size);
|
|
180
|
+
for (unsigned i = 0; i < scanner->indents.size; i++) {
|
|
181
|
+
LOG("%d ", scanner->indents.contents[i]);
|
|
182
|
+
}
|
|
183
|
+
LOG("\n");
|
|
184
|
+
}
|
|
185
|
+
|
|
186
|
+
bool tree_sitter_scala_external_scanner_scan(void *payload, TSLexer *lexer,
|
|
187
|
+
const bool *valid_symbols) {
|
|
188
|
+
Scanner *scanner = (Scanner *)payload;
|
|
189
|
+
int16_t prev = scanner->indents.size > 0 ? *array_back(&scanner->indents) : -1;
|
|
190
|
+
int16_t newline_count = 0;
|
|
191
|
+
int16_t indentation_size = 0;
|
|
192
|
+
|
|
193
|
+
while (iswspace(lexer->lookahead)) {
|
|
194
|
+
if (lexer->lookahead == '\n') {
|
|
195
|
+
newline_count++;
|
|
196
|
+
indentation_size = 0;
|
|
197
|
+
}
|
|
198
|
+
else {
|
|
199
|
+
indentation_size++;
|
|
200
|
+
}
|
|
201
|
+
skip(lexer);
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
// Before advancing the lexer, check if we can double outdent
|
|
205
|
+
if (
|
|
206
|
+
valid_symbols[OUTDENT] &&
|
|
207
|
+
(
|
|
208
|
+
lexer->lookahead == 0 ||
|
|
209
|
+
(
|
|
210
|
+
prev != -1 &&
|
|
211
|
+
(
|
|
212
|
+
lexer->lookahead == ')' ||
|
|
213
|
+
lexer->lookahead == ']' ||
|
|
214
|
+
lexer->lookahead == '}'
|
|
215
|
+
)
|
|
216
|
+
) ||
|
|
217
|
+
(
|
|
218
|
+
scanner->last_indentation_size != -1 &&
|
|
219
|
+
prev != -1 &&
|
|
220
|
+
scanner->last_indentation_size < prev
|
|
221
|
+
)
|
|
222
|
+
)
|
|
223
|
+
) {
|
|
224
|
+
if (scanner->indents.size > 0) {
|
|
225
|
+
array_pop(&scanner->indents);
|
|
226
|
+
}
|
|
227
|
+
LOG(" pop\n");
|
|
228
|
+
LOG(" OUTDENT\n");
|
|
229
|
+
lexer->result_symbol = OUTDENT;
|
|
230
|
+
return true;
|
|
231
|
+
}
|
|
232
|
+
scanner->last_indentation_size = -1;
|
|
233
|
+
|
|
234
|
+
if (
|
|
235
|
+
valid_symbols[INDENT] &&
|
|
236
|
+
newline_count > 0 &&
|
|
237
|
+
(
|
|
238
|
+
scanner->indents.size == 0 ||
|
|
239
|
+
indentation_size > *array_back(&scanner->indents)
|
|
240
|
+
)
|
|
241
|
+
) {
|
|
242
|
+
if (detect_comment_start(lexer)) {
|
|
243
|
+
return false;
|
|
244
|
+
}
|
|
245
|
+
array_push(&scanner->indents, indentation_size);
|
|
246
|
+
lexer->result_symbol = INDENT;
|
|
247
|
+
LOG(" INDENT\n");
|
|
248
|
+
return true;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
// This saves the indentation_size and newline_count so it can be used
|
|
252
|
+
// in subsequent calls for multiple outdent or autosemicolon.
|
|
253
|
+
if (valid_symbols[OUTDENT] &&
|
|
254
|
+
(lexer->lookahead == 0 ||
|
|
255
|
+
(
|
|
256
|
+
newline_count > 0 &&
|
|
257
|
+
prev != -1 &&
|
|
258
|
+
indentation_size < prev
|
|
259
|
+
)
|
|
260
|
+
)
|
|
261
|
+
) {
|
|
262
|
+
if (scanner->indents.size > 0) {
|
|
263
|
+
array_pop(&scanner->indents);
|
|
264
|
+
}
|
|
265
|
+
LOG(" pop\n");
|
|
266
|
+
LOG(" OUTDENT\n");
|
|
267
|
+
lexer->result_symbol = OUTDENT;
|
|
268
|
+
lexer->mark_end(lexer);
|
|
269
|
+
if (detect_comment_start(lexer)) {
|
|
270
|
+
return false;
|
|
271
|
+
}
|
|
272
|
+
scanner->last_indentation_size = indentation_size;
|
|
273
|
+
scanner->last_newline_count = newline_count;
|
|
274
|
+
if (lexer->eof(lexer)) {
|
|
275
|
+
scanner->last_column = -1;
|
|
276
|
+
} else {
|
|
277
|
+
scanner->last_column = (int16_t)lexer->get_column(lexer);
|
|
278
|
+
}
|
|
279
|
+
return true;
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
// Recover newline_count from the outdent reset
|
|
283
|
+
bool is_eof = lexer->eof(lexer);
|
|
284
|
+
if (
|
|
285
|
+
(
|
|
286
|
+
scanner->last_newline_count > 0 &&
|
|
287
|
+
(is_eof && scanner->last_column == -1)
|
|
288
|
+
) ||
|
|
289
|
+
(!is_eof && lexer->get_column(lexer) == (uint32_t)scanner->last_column)
|
|
290
|
+
) {
|
|
291
|
+
newline_count += scanner->last_newline_count;
|
|
292
|
+
}
|
|
293
|
+
scanner->last_newline_count = 0;
|
|
294
|
+
|
|
295
|
+
if (valid_symbols[AUTOMATIC_SEMICOLON] && newline_count > 0) {
|
|
296
|
+
// AUTOMATIC_SEMICOLON should not be issued in the middle of expressions
|
|
297
|
+
// Thus, we exit this branch when encountering comments, else/catch clauses, etc.
|
|
298
|
+
|
|
299
|
+
lexer->mark_end(lexer);
|
|
300
|
+
lexer->result_symbol = AUTOMATIC_SEMICOLON;
|
|
301
|
+
|
|
302
|
+
// Probably, a multi-line field expression, e.g.
|
|
303
|
+
// a
|
|
304
|
+
// .b
|
|
305
|
+
// .c
|
|
306
|
+
if (lexer->lookahead == '.') {
|
|
307
|
+
return false;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
// Single-line and multi-line comments
|
|
311
|
+
if (lexer->lookahead == '/') {
|
|
312
|
+
advance(lexer);
|
|
313
|
+
if (lexer->lookahead == '/') {
|
|
314
|
+
return false;
|
|
315
|
+
}
|
|
316
|
+
if (lexer->lookahead == '*') {
|
|
317
|
+
advance(lexer);
|
|
318
|
+
while (!lexer->eof(lexer)) {
|
|
319
|
+
if (lexer->lookahead == '*') {
|
|
320
|
+
advance(lexer);
|
|
321
|
+
if (lexer->lookahead == '/') {
|
|
322
|
+
advance(lexer);
|
|
323
|
+
break;
|
|
324
|
+
}
|
|
325
|
+
} else {
|
|
326
|
+
advance(lexer);
|
|
327
|
+
}
|
|
328
|
+
}
|
|
329
|
+
while (iswspace(lexer->lookahead)) {
|
|
330
|
+
if (lexer->lookahead == '\n' || lexer->lookahead == '\r') {
|
|
331
|
+
return false;
|
|
332
|
+
}
|
|
333
|
+
skip(lexer);
|
|
334
|
+
}
|
|
335
|
+
// If some code is present at the same line after comment end,
|
|
336
|
+
// we should still produce AUTOMATIC_SEMICOLON, e.g. in
|
|
337
|
+
// val a = 1
|
|
338
|
+
// /* comment */ val b = 2
|
|
339
|
+
return true;
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
if (valid_symbols[ELSE]) {
|
|
344
|
+
return !scan_word(lexer, "else");
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
if (valid_symbols[CATCH]) {
|
|
348
|
+
if (scan_word(lexer, "catch")) {
|
|
349
|
+
return false;
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if (valid_symbols[FINALLY]) {
|
|
354
|
+
if (scan_word(lexer, "finally")) {
|
|
355
|
+
return false;
|
|
356
|
+
}
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
if (valid_symbols[EXTENDS]) {
|
|
360
|
+
if (scan_word(lexer, "extends")) {
|
|
361
|
+
return false;
|
|
362
|
+
}
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
if (valid_symbols[WITH]) {
|
|
366
|
+
if (scan_word(lexer, "with")) {
|
|
367
|
+
return false;
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
if (valid_symbols[DERIVES]) {
|
|
372
|
+
if (scan_word(lexer, "derives")) {
|
|
373
|
+
return false;
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
if (newline_count > 1) {
|
|
378
|
+
return true;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
return true;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
while (iswspace(lexer->lookahead)) {
|
|
385
|
+
if (lexer->lookahead == '\n') {
|
|
386
|
+
newline_count++;
|
|
387
|
+
}
|
|
388
|
+
skip(lexer);
|
|
389
|
+
}
|
|
390
|
+
|
|
391
|
+
if (valid_symbols[SIMPLE_STRING] && lexer->lookahead == '"') {
|
|
392
|
+
advance(lexer);
|
|
393
|
+
|
|
394
|
+
bool is_multiline = false;
|
|
395
|
+
if (lexer->lookahead == '"') {
|
|
396
|
+
advance(lexer);
|
|
397
|
+
if (lexer->lookahead == '"') {
|
|
398
|
+
advance(lexer);
|
|
399
|
+
is_multiline = true;
|
|
400
|
+
} else {
|
|
401
|
+
lexer->result_symbol = SIMPLE_STRING;
|
|
402
|
+
return true;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
|
|
406
|
+
return scan_string_content(lexer, is_multiline, false);
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
if (valid_symbols[INTERPOLATED_STRING_MIDDLE]) {
|
|
410
|
+
return scan_string_content(lexer, false, true);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
if (valid_symbols[INTERPOLATED_MULTILINE_STRING_MIDDLE]) {
|
|
414
|
+
return scan_string_content(lexer, true, true);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
return false;
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
//
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#ifndef TREE_SITTER_ALLOC_H_
|
|
2
|
+
#define TREE_SITTER_ALLOC_H_
|
|
3
|
+
|
|
4
|
+
#ifdef __cplusplus
|
|
5
|
+
extern "C" {
|
|
6
|
+
#endif
|
|
7
|
+
|
|
8
|
+
#include <stdbool.h>
|
|
9
|
+
#include <stdio.h>
|
|
10
|
+
#include <stdlib.h>
|
|
11
|
+
|
|
12
|
+
// Allow clients to override allocation functions
|
|
13
|
+
#ifdef TREE_SITTER_REUSE_ALLOCATOR
|
|
14
|
+
|
|
15
|
+
extern void *(*ts_current_malloc)(size_t size);
|
|
16
|
+
extern void *(*ts_current_calloc)(size_t count, size_t size);
|
|
17
|
+
extern void *(*ts_current_realloc)(void *ptr, size_t size);
|
|
18
|
+
extern void (*ts_current_free)(void *ptr);
|
|
19
|
+
|
|
20
|
+
#ifndef ts_malloc
|
|
21
|
+
#define ts_malloc ts_current_malloc
|
|
22
|
+
#endif
|
|
23
|
+
#ifndef ts_calloc
|
|
24
|
+
#define ts_calloc ts_current_calloc
|
|
25
|
+
#endif
|
|
26
|
+
#ifndef ts_realloc
|
|
27
|
+
#define ts_realloc ts_current_realloc
|
|
28
|
+
#endif
|
|
29
|
+
#ifndef ts_free
|
|
30
|
+
#define ts_free ts_current_free
|
|
31
|
+
#endif
|
|
32
|
+
|
|
33
|
+
#else
|
|
34
|
+
|
|
35
|
+
#ifndef ts_malloc
|
|
36
|
+
#define ts_malloc malloc
|
|
37
|
+
#endif
|
|
38
|
+
#ifndef ts_calloc
|
|
39
|
+
#define ts_calloc calloc
|
|
40
|
+
#endif
|
|
41
|
+
#ifndef ts_realloc
|
|
42
|
+
#define ts_realloc realloc
|
|
43
|
+
#endif
|
|
44
|
+
#ifndef ts_free
|
|
45
|
+
#define ts_free free
|
|
46
|
+
#endif
|
|
47
|
+
|
|
48
|
+
#endif
|
|
49
|
+
|
|
50
|
+
#ifdef __cplusplus
|
|
51
|
+
}
|
|
52
|
+
#endif
|
|
53
|
+
|
|
54
|
+
#endif // TREE_SITTER_ALLOC_H_
|