@prosdevlab/dev-agent 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +138 -0
  3. package/dist/cli.js +74721 -0
  4. package/dist/cli.js.map +1 -0
  5. package/dist/mcp.js +61445 -0
  6. package/dist/mcp.js.map +1 -0
  7. package/dist/vendor/web-tree-sitter/lib/alloc.c +48 -0
  8. package/dist/vendor/web-tree-sitter/lib/alloc.h +41 -0
  9. package/dist/vendor/web-tree-sitter/lib/array.h +291 -0
  10. package/dist/vendor/web-tree-sitter/lib/atomic.h +68 -0
  11. package/dist/vendor/web-tree-sitter/lib/clock.h +146 -0
  12. package/dist/vendor/web-tree-sitter/lib/error_costs.h +11 -0
  13. package/dist/vendor/web-tree-sitter/lib/get_changed_ranges.c +523 -0
  14. package/dist/vendor/web-tree-sitter/lib/get_changed_ranges.h +36 -0
  15. package/dist/vendor/web-tree-sitter/lib/host.h +21 -0
  16. package/dist/vendor/web-tree-sitter/lib/language.c +293 -0
  17. package/dist/vendor/web-tree-sitter/lib/language.h +293 -0
  18. package/dist/vendor/web-tree-sitter/lib/length.h +52 -0
  19. package/dist/vendor/web-tree-sitter/lib/lexer.c +483 -0
  20. package/dist/vendor/web-tree-sitter/lib/lexer.h +54 -0
  21. package/dist/vendor/web-tree-sitter/lib/lib.c +12 -0
  22. package/dist/vendor/web-tree-sitter/lib/node.c +875 -0
  23. package/dist/vendor/web-tree-sitter/lib/parser.c +2297 -0
  24. package/dist/vendor/web-tree-sitter/lib/parser.h +286 -0
  25. package/dist/vendor/web-tree-sitter/lib/point.h +48 -0
  26. package/dist/vendor/web-tree-sitter/lib/query.c +4347 -0
  27. package/dist/vendor/web-tree-sitter/lib/reduce_action.h +34 -0
  28. package/dist/vendor/web-tree-sitter/lib/reusable_node.h +95 -0
  29. package/dist/vendor/web-tree-sitter/lib/stack.c +912 -0
  30. package/dist/vendor/web-tree-sitter/lib/stack.h +133 -0
  31. package/dist/vendor/web-tree-sitter/lib/subtree.c +1034 -0
  32. package/dist/vendor/web-tree-sitter/lib/subtree.h +399 -0
  33. package/dist/vendor/web-tree-sitter/lib/tree-sitter.c +987 -0
  34. package/dist/vendor/web-tree-sitter/lib/tree-sitter.cjs +2988 -0
  35. package/dist/vendor/web-tree-sitter/lib/tree-sitter.wasm +0 -0
  36. package/dist/vendor/web-tree-sitter/lib/tree-sitter.wasm.map +1 -0
  37. package/dist/vendor/web-tree-sitter/lib/tree.c +170 -0
  38. package/dist/vendor/web-tree-sitter/lib/tree.h +31 -0
  39. package/dist/vendor/web-tree-sitter/lib/tree_cursor.c +716 -0
  40. package/dist/vendor/web-tree-sitter/lib/tree_cursor.h +48 -0
  41. package/dist/vendor/web-tree-sitter/lib/ts_assert.h +11 -0
  42. package/dist/vendor/web-tree-sitter/lib/unicode.h +75 -0
  43. package/dist/vendor/web-tree-sitter/lib/wasm_store.c +1937 -0
  44. package/dist/vendor/web-tree-sitter/lib/wasm_store.h +31 -0
  45. package/dist/vendor/web-tree-sitter/package.json +98 -0
  46. package/dist/vendor/web-tree-sitter/tree-sitter.cjs +4031 -0
  47. package/dist/vendor/web-tree-sitter/tree-sitter.wasm +0 -0
  48. package/dist/wasm/tree-sitter-go.wasm +0 -0
  49. package/dist/wasm/tree-sitter.wasm +0 -0
  50. package/package.json +65 -0
@@ -0,0 +1,2297 @@
1
+ #include <time.h>
2
+ #include <stdio.h>
3
+ #include <limits.h>
4
+ #include <stdbool.h>
5
+ #include <inttypes.h>
6
+ #include "tree_sitter/api.h"
7
+ #include "./alloc.h"
8
+ #include "./array.h"
9
+ #include "./atomic.h"
10
+ #include "./clock.h"
11
+ #include "./error_costs.h"
12
+ #include "./get_changed_ranges.h"
13
+ #include "./language.h"
14
+ #include "./length.h"
15
+ #include "./lexer.h"
16
+ #include "./reduce_action.h"
17
+ #include "./reusable_node.h"
18
+ #include "./stack.h"
19
+ #include "./subtree.h"
20
+ #include "./tree.h"
21
+ #include "./ts_assert.h"
22
+ #include "./wasm_store.h"
23
+
24
+ #define LOG(...) \
25
+ if (self->lexer.logger.log || self->dot_graph_file) { \
26
+ snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
27
+ ts_parser__log(self); \
28
+ }
29
+
30
+ #define LOG_LOOKAHEAD(symbol_name, size) \
31
+ if (self->lexer.logger.log || self->dot_graph_file) { \
32
+ char *buf = self->lexer.debug_buffer; \
33
+ const char *symbol = symbol_name; \
34
+ int off = snprintf( \
35
+ buf, \
36
+ TREE_SITTER_SERIALIZATION_BUFFER_SIZE, \
37
+ "lexed_lookahead sym:" \
38
+ ); \
39
+ for ( \
40
+ int i = 0; \
41
+ symbol[i] != '\0' \
42
+ && off < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; \
43
+ i++ \
44
+ ) { \
45
+ switch (symbol[i]) { \
46
+ case '\t': buf[off++] = '\\'; buf[off++] = 't'; break; \
47
+ case '\n': buf[off++] = '\\'; buf[off++] = 'n'; break; \
48
+ case '\v': buf[off++] = '\\'; buf[off++] = 'v'; break; \
49
+ case '\f': buf[off++] = '\\'; buf[off++] = 'f'; break; \
50
+ case '\r': buf[off++] = '\\'; buf[off++] = 'r'; break; \
51
+ case '\\': buf[off++] = '\\'; buf[off++] = '\\'; break; \
52
+ default: buf[off++] = symbol[i]; break; \
53
+ } \
54
+ } \
55
+ snprintf( \
56
+ buf + off, \
57
+ TREE_SITTER_SERIALIZATION_BUFFER_SIZE - off, \
58
+ ", size:%u", \
59
+ size \
60
+ ); \
61
+ ts_parser__log(self); \
62
+ }
63
+
64
+ #define LOG_STACK() \
65
+ if (self->dot_graph_file) { \
66
+ ts_stack_print_dot_graph(self->stack, self->language, self->dot_graph_file); \
67
+ fputs("\n\n", self->dot_graph_file); \
68
+ }
69
+
70
+ #define LOG_TREE(tree) \
71
+ if (self->dot_graph_file) { \
72
+ ts_subtree_print_dot_graph(tree, self->language, self->dot_graph_file); \
73
+ fputs("\n", self->dot_graph_file); \
74
+ }
75
+
76
+ #define SYM_NAME(symbol) ts_language_symbol_name(self->language, symbol)
77
+
78
+ #define TREE_NAME(tree) SYM_NAME(ts_subtree_symbol(tree))
79
+
80
+ static const unsigned MAX_VERSION_COUNT = 6;
81
+ static const unsigned MAX_VERSION_COUNT_OVERFLOW = 4;
82
+ static const unsigned MAX_SUMMARY_DEPTH = 16;
83
+ static const unsigned MAX_COST_DIFFERENCE = 18 * ERROR_COST_PER_SKIPPED_TREE;
84
+ static const unsigned OP_COUNT_PER_PARSER_TIMEOUT_CHECK = 100;
85
+
86
+ typedef struct {
87
+ Subtree token;
88
+ Subtree last_external_token;
89
+ uint32_t byte_index;
90
+ } TokenCache;
91
+
92
+ struct TSParser {
93
+ Lexer lexer;
94
+ Stack *stack;
95
+ SubtreePool tree_pool;
96
+ const TSLanguage *language;
97
+ TSWasmStore *wasm_store;
98
+ ReduceActionSet reduce_actions;
99
+ Subtree finished_tree;
100
+ SubtreeArray trailing_extras;
101
+ SubtreeArray trailing_extras2;
102
+ SubtreeArray scratch_trees;
103
+ TokenCache token_cache;
104
+ ReusableNode reusable_node;
105
+ void *external_scanner_payload;
106
+ FILE *dot_graph_file;
107
+ TSClock end_clock;
108
+ TSDuration timeout_duration;
109
+ unsigned accept_count;
110
+ unsigned operation_count;
111
+ const volatile size_t *cancellation_flag;
112
+ Subtree old_tree;
113
+ TSRangeArray included_range_differences;
114
+ TSParseOptions parse_options;
115
+ TSParseState parse_state;
116
+ unsigned included_range_difference_index;
117
+ bool has_scanner_error;
118
+ bool canceled_balancing;
119
+ bool has_error;
120
+ };
121
+
122
+ typedef struct {
123
+ unsigned cost;
124
+ unsigned node_count;
125
+ int dynamic_precedence;
126
+ bool is_in_error;
127
+ } ErrorStatus;
128
+
129
+ typedef enum {
130
+ ErrorComparisonTakeLeft,
131
+ ErrorComparisonPreferLeft,
132
+ ErrorComparisonNone,
133
+ ErrorComparisonPreferRight,
134
+ ErrorComparisonTakeRight,
135
+ } ErrorComparison;
136
+
137
+ typedef struct {
138
+ const char *string;
139
+ uint32_t length;
140
+ } TSStringInput;
141
+
142
+ // StringInput
143
+
144
+ static const char *ts_string_input_read(
145
+ void *_self,
146
+ uint32_t byte,
147
+ TSPoint point,
148
+ uint32_t *length
149
+ ) {
150
+ (void)point;
151
+ TSStringInput *self = (TSStringInput *)_self;
152
+ if (byte >= self->length) {
153
+ *length = 0;
154
+ return "";
155
+ } else {
156
+ *length = self->length - byte;
157
+ return self->string + byte;
158
+ }
159
+ }
160
+
161
+ // Parser - Private
162
+
163
+ static void ts_parser__log(TSParser *self) {
164
+ if (self->lexer.logger.log) {
165
+ self->lexer.logger.log(
166
+ self->lexer.logger.payload,
167
+ TSLogTypeParse,
168
+ self->lexer.debug_buffer
169
+ );
170
+ }
171
+
172
+ if (self->dot_graph_file) {
173
+ fprintf(self->dot_graph_file, "graph {\nlabel=\"");
174
+ for (char *chr = &self->lexer.debug_buffer[0]; *chr != 0; chr++) {
175
+ if (*chr == '"' || *chr == '\\') fputc('\\', self->dot_graph_file);
176
+ fputc(*chr, self->dot_graph_file);
177
+ }
178
+ fprintf(self->dot_graph_file, "\"\n}\n\n");
179
+ }
180
+ }
181
+
182
+ static bool ts_parser__breakdown_top_of_stack(
183
+ TSParser *self,
184
+ StackVersion version
185
+ ) {
186
+ bool did_break_down = false;
187
+ bool pending = false;
188
+
189
+ do {
190
+ StackSliceArray pop = ts_stack_pop_pending(self->stack, version);
191
+ if (!pop.size) break;
192
+
193
+ did_break_down = true;
194
+ pending = false;
195
+ for (uint32_t i = 0; i < pop.size; i++) {
196
+ StackSlice slice = *array_get(&pop, i);
197
+ TSStateId state = ts_stack_state(self->stack, slice.version);
198
+ Subtree parent = *array_front(&slice.subtrees);
199
+
200
+ for (uint32_t j = 0, n = ts_subtree_child_count(parent); j < n; j++) {
201
+ Subtree child = ts_subtree_children(parent)[j];
202
+ pending = ts_subtree_child_count(child) > 0;
203
+
204
+ if (ts_subtree_is_error(child)) {
205
+ state = ERROR_STATE;
206
+ } else if (!ts_subtree_extra(child)) {
207
+ state = ts_language_next_state(self->language, state, ts_subtree_symbol(child));
208
+ }
209
+
210
+ ts_subtree_retain(child);
211
+ ts_stack_push(self->stack, slice.version, child, pending, state);
212
+ }
213
+
214
+ for (uint32_t j = 1; j < slice.subtrees.size; j++) {
215
+ Subtree tree = *array_get(&slice.subtrees, j);
216
+ ts_stack_push(self->stack, slice.version, tree, false, state);
217
+ }
218
+
219
+ ts_subtree_release(&self->tree_pool, parent);
220
+ array_delete(&slice.subtrees);
221
+
222
+ LOG("breakdown_top_of_stack tree:%s", TREE_NAME(parent));
223
+ LOG_STACK();
224
+ }
225
+ } while (pending);
226
+
227
+ return did_break_down;
228
+ }
229
+
230
+ static void ts_parser__breakdown_lookahead(
231
+ TSParser *self,
232
+ Subtree *lookahead,
233
+ TSStateId state,
234
+ ReusableNode *reusable_node
235
+ ) {
236
+ bool did_descend = false;
237
+ Subtree tree = reusable_node_tree(reusable_node);
238
+ while (ts_subtree_child_count(tree) > 0 && ts_subtree_parse_state(tree) != state) {
239
+ LOG("state_mismatch sym:%s", TREE_NAME(tree));
240
+ reusable_node_descend(reusable_node);
241
+ tree = reusable_node_tree(reusable_node);
242
+ did_descend = true;
243
+ }
244
+
245
+ if (did_descend) {
246
+ ts_subtree_release(&self->tree_pool, *lookahead);
247
+ *lookahead = tree;
248
+ ts_subtree_retain(*lookahead);
249
+ }
250
+ }
251
+
252
+ static ErrorComparison ts_parser__compare_versions(
253
+ TSParser *self,
254
+ ErrorStatus a,
255
+ ErrorStatus b
256
+ ) {
257
+ (void)self;
258
+ if (!a.is_in_error && b.is_in_error) {
259
+ if (a.cost < b.cost) {
260
+ return ErrorComparisonTakeLeft;
261
+ } else {
262
+ return ErrorComparisonPreferLeft;
263
+ }
264
+ }
265
+
266
+ if (a.is_in_error && !b.is_in_error) {
267
+ if (b.cost < a.cost) {
268
+ return ErrorComparisonTakeRight;
269
+ } else {
270
+ return ErrorComparisonPreferRight;
271
+ }
272
+ }
273
+
274
+ if (a.cost < b.cost) {
275
+ if ((b.cost - a.cost) * (1 + a.node_count) > MAX_COST_DIFFERENCE) {
276
+ return ErrorComparisonTakeLeft;
277
+ } else {
278
+ return ErrorComparisonPreferLeft;
279
+ }
280
+ }
281
+
282
+ if (b.cost < a.cost) {
283
+ if ((a.cost - b.cost) * (1 + b.node_count) > MAX_COST_DIFFERENCE) {
284
+ return ErrorComparisonTakeRight;
285
+ } else {
286
+ return ErrorComparisonPreferRight;
287
+ }
288
+ }
289
+
290
+ if (a.dynamic_precedence > b.dynamic_precedence) return ErrorComparisonPreferLeft;
291
+ if (b.dynamic_precedence > a.dynamic_precedence) return ErrorComparisonPreferRight;
292
+ return ErrorComparisonNone;
293
+ }
294
+
295
+ static ErrorStatus ts_parser__version_status(
296
+ TSParser *self,
297
+ StackVersion version
298
+ ) {
299
+ unsigned cost = ts_stack_error_cost(self->stack, version);
300
+ bool is_paused = ts_stack_is_paused(self->stack, version);
301
+ if (is_paused) cost += ERROR_COST_PER_SKIPPED_TREE;
302
+ return (ErrorStatus) {
303
+ .cost = cost,
304
+ .node_count = ts_stack_node_count_since_error(self->stack, version),
305
+ .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
306
+ .is_in_error = is_paused || ts_stack_state(self->stack, version) == ERROR_STATE
307
+ };
308
+ }
309
+
310
+ static bool ts_parser__better_version_exists(
311
+ TSParser *self,
312
+ StackVersion version,
313
+ bool is_in_error,
314
+ unsigned cost
315
+ ) {
316
+ if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) <= cost) {
317
+ return true;
318
+ }
319
+
320
+ Length position = ts_stack_position(self->stack, version);
321
+ ErrorStatus status = {
322
+ .cost = cost,
323
+ .is_in_error = is_in_error,
324
+ .dynamic_precedence = ts_stack_dynamic_precedence(self->stack, version),
325
+ .node_count = ts_stack_node_count_since_error(self->stack, version),
326
+ };
327
+
328
+ for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
329
+ if (i == version ||
330
+ !ts_stack_is_active(self->stack, i) ||
331
+ ts_stack_position(self->stack, i).bytes < position.bytes) continue;
332
+ ErrorStatus status_i = ts_parser__version_status(self, i);
333
+ switch (ts_parser__compare_versions(self, status, status_i)) {
334
+ case ErrorComparisonTakeRight:
335
+ return true;
336
+ case ErrorComparisonPreferRight:
337
+ if (ts_stack_can_merge(self->stack, i, version)) return true;
338
+ break;
339
+ default:
340
+ break;
341
+ }
342
+ }
343
+
344
+ return false;
345
+ }
346
+
347
+ static bool ts_parser__call_main_lex_fn(TSParser *self, TSLexerMode lex_mode) {
348
+ if (ts_language_is_wasm(self->language)) {
349
+ return ts_wasm_store_call_lex_main(self->wasm_store, lex_mode.lex_state);
350
+ } else {
351
+ return self->language->lex_fn(&self->lexer.data, lex_mode.lex_state);
352
+ }
353
+ }
354
+
355
+ static bool ts_parser__call_keyword_lex_fn(TSParser *self) {
356
+ if (ts_language_is_wasm(self->language)) {
357
+ return ts_wasm_store_call_lex_keyword(self->wasm_store, 0);
358
+ } else {
359
+ return self->language->keyword_lex_fn(&self->lexer.data, 0);
360
+ }
361
+ }
362
+
363
+ static void ts_parser__external_scanner_create(
364
+ TSParser *self
365
+ ) {
366
+ if (self->language && self->language->external_scanner.states) {
367
+ if (ts_language_is_wasm(self->language)) {
368
+ self->external_scanner_payload = (void *)(uintptr_t)ts_wasm_store_call_scanner_create(
369
+ self->wasm_store
370
+ );
371
+ if (ts_wasm_store_has_error(self->wasm_store)) {
372
+ self->has_scanner_error = true;
373
+ }
374
+ } else if (self->language->external_scanner.create) {
375
+ self->external_scanner_payload = self->language->external_scanner.create();
376
+ }
377
+ }
378
+ }
379
+
380
+ static void ts_parser__external_scanner_destroy(
381
+ TSParser *self
382
+ ) {
383
+ if (
384
+ self->language &&
385
+ self->external_scanner_payload &&
386
+ self->language->external_scanner.destroy &&
387
+ !ts_language_is_wasm(self->language)
388
+ ) {
389
+ self->language->external_scanner.destroy(
390
+ self->external_scanner_payload
391
+ );
392
+ }
393
+ self->external_scanner_payload = NULL;
394
+ }
395
+
396
+ static unsigned ts_parser__external_scanner_serialize(
397
+ TSParser *self
398
+ ) {
399
+ uint32_t length;
400
+ if (ts_language_is_wasm(self->language)) {
401
+ length = ts_wasm_store_call_scanner_serialize(
402
+ self->wasm_store,
403
+ (uintptr_t)self->external_scanner_payload,
404
+ self->lexer.debug_buffer
405
+ );
406
+ if (ts_wasm_store_has_error(self->wasm_store)) {
407
+ self->has_scanner_error = true;
408
+ }
409
+ } else {
410
+ length = self->language->external_scanner.serialize(
411
+ self->external_scanner_payload,
412
+ self->lexer.debug_buffer
413
+ );
414
+ }
415
+ ts_assert(length <= TREE_SITTER_SERIALIZATION_BUFFER_SIZE);
416
+ return length;
417
+ }
418
+
419
+ static void ts_parser__external_scanner_deserialize(
420
+ TSParser *self,
421
+ Subtree external_token
422
+ ) {
423
+ const char *data = NULL;
424
+ uint32_t length = 0;
425
+ if (external_token.ptr) {
426
+ data = ts_external_scanner_state_data(&external_token.ptr->external_scanner_state);
427
+ length = external_token.ptr->external_scanner_state.length;
428
+ }
429
+
430
+ if (ts_language_is_wasm(self->language)) {
431
+ ts_wasm_store_call_scanner_deserialize(
432
+ self->wasm_store,
433
+ (uintptr_t)self->external_scanner_payload,
434
+ data,
435
+ length
436
+ );
437
+ if (ts_wasm_store_has_error(self->wasm_store)) {
438
+ self->has_scanner_error = true;
439
+ }
440
+ } else {
441
+ self->language->external_scanner.deserialize(
442
+ self->external_scanner_payload,
443
+ data,
444
+ length
445
+ );
446
+ }
447
+ }
448
+
449
+ static bool ts_parser__external_scanner_scan(
450
+ TSParser *self,
451
+ TSStateId external_lex_state
452
+ ) {
453
+ if (ts_language_is_wasm(self->language)) {
454
+ bool result = ts_wasm_store_call_scanner_scan(
455
+ self->wasm_store,
456
+ (uintptr_t)self->external_scanner_payload,
457
+ external_lex_state * self->language->external_token_count
458
+ );
459
+ if (ts_wasm_store_has_error(self->wasm_store)) {
460
+ self->has_scanner_error = true;
461
+ }
462
+ return result;
463
+ } else {
464
+ const bool *valid_external_tokens = ts_language_enabled_external_tokens(
465
+ self->language,
466
+ external_lex_state
467
+ );
468
+ return self->language->external_scanner.scan(
469
+ self->external_scanner_payload,
470
+ &self->lexer.data,
471
+ valid_external_tokens
472
+ );
473
+ }
474
+ }
475
+
476
+ static bool ts_parser__can_reuse_first_leaf(
477
+ TSParser *self,
478
+ TSStateId state,
479
+ Subtree tree,
480
+ TableEntry *table_entry
481
+ ) {
482
+ TSSymbol leaf_symbol = ts_subtree_leaf_symbol(tree);
483
+ TSStateId leaf_state = ts_subtree_leaf_parse_state(tree);
484
+ TSLexerMode current_lex_mode = ts_language_lex_mode_for_state(self->language, state);
485
+ TSLexerMode leaf_lex_mode = ts_language_lex_mode_for_state(self->language, leaf_state);
486
+
487
+ // At the end of a non-terminal extra node, the lexer normally returns
488
+ // NULL, which indicates that the parser should look for a reduce action
489
+ // at symbol `0`. Avoid reusing tokens in this situation to ensure that
490
+ // the same thing happens when incrementally reparsing.
491
+ if (current_lex_mode.lex_state == (uint16_t)(-1)) return false;
492
+
493
+ // If the token was created in a state with the same set of lookaheads, it is reusable.
494
+ if (
495
+ table_entry->action_count > 0 &&
496
+ memcmp(&leaf_lex_mode, &current_lex_mode, sizeof(TSLexerMode)) == 0 &&
497
+ (
498
+ leaf_symbol != self->language->keyword_capture_token ||
499
+ (!ts_subtree_is_keyword(tree) && ts_subtree_parse_state(tree) == state)
500
+ )
501
+ ) return true;
502
+
503
+ // Empty tokens are not reusable in states with different lookaheads.
504
+ if (ts_subtree_size(tree).bytes == 0 && leaf_symbol != ts_builtin_sym_end) return false;
505
+
506
+ // If the current state allows external tokens or other tokens that conflict with this
507
+ // token, this token is not reusable.
508
+ return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable;
509
+ }
510
+
511
+ static Subtree ts_parser__lex(
512
+ TSParser *self,
513
+ StackVersion version,
514
+ TSStateId parse_state
515
+ ) {
516
+ TSLexerMode lex_mode = ts_language_lex_mode_for_state(self->language, parse_state);
517
+ if (lex_mode.lex_state == (uint16_t)-1) {
518
+ LOG("no_lookahead_after_non_terminal_extra");
519
+ return NULL_SUBTREE;
520
+ }
521
+
522
+ const Length start_position = ts_stack_position(self->stack, version);
523
+ const Subtree external_token = ts_stack_last_external_token(self->stack, version);
524
+
525
+ bool found_external_token = false;
526
+ bool error_mode = parse_state == ERROR_STATE;
527
+ bool skipped_error = false;
528
+ bool called_get_column = false;
529
+ int32_t first_error_character = 0;
530
+ Length error_start_position = length_zero();
531
+ Length error_end_position = length_zero();
532
+ uint32_t lookahead_end_byte = 0;
533
+ uint32_t external_scanner_state_len = 0;
534
+ bool external_scanner_state_changed = false;
535
+ ts_lexer_reset(&self->lexer, start_position);
536
+
537
+ for (;;) {
538
+ bool found_token = false;
539
+ Length current_position = self->lexer.current_position;
540
+ ColumnData column_data = self->lexer.column_data;
541
+
542
+ if (lex_mode.external_lex_state != 0) {
543
+ LOG(
544
+ "lex_external state:%d, row:%u, column:%u",
545
+ lex_mode.external_lex_state,
546
+ current_position.extent.row,
547
+ current_position.extent.column
548
+ );
549
+ ts_lexer_start(&self->lexer);
550
+ ts_parser__external_scanner_deserialize(self, external_token);
551
+ found_token = ts_parser__external_scanner_scan(self, lex_mode.external_lex_state);
552
+ if (self->has_scanner_error) return NULL_SUBTREE;
553
+ ts_lexer_finish(&self->lexer, &lookahead_end_byte);
554
+
555
+ if (found_token) {
556
+ external_scanner_state_len = ts_parser__external_scanner_serialize(self);
557
+ external_scanner_state_changed = !ts_external_scanner_state_eq(
558
+ ts_subtree_external_scanner_state(external_token),
559
+ self->lexer.debug_buffer,
560
+ external_scanner_state_len
561
+ );
562
+
563
+ // Avoid infinite loops caused by the external scanner returning empty tokens.
564
+ // Empty tokens are needed in some circumstances, e.g. indent/dedent tokens
565
+ // in Python. Ignore the following classes of empty tokens:
566
+ //
567
+ // * Tokens produced during error recovery. When recovering from an error,
568
+ // all tokens are allowed, so it's easy to accidentally return unwanted
569
+ // empty tokens.
570
+ // * Tokens that are marked as 'extra' in the grammar. These don't change
571
+ // the parse state, so they would definitely cause an infinite loop.
572
+ if (
573
+ self->lexer.token_end_position.bytes <= current_position.bytes &&
574
+ !external_scanner_state_changed
575
+ ) {
576
+ TSSymbol symbol = self->language->external_scanner.symbol_map[self->lexer.data.result_symbol];
577
+ TSStateId next_parse_state = ts_language_next_state(self->language, parse_state, symbol);
578
+ bool token_is_extra = (next_parse_state == parse_state);
579
+ if (error_mode || !ts_stack_has_advanced_since_error(self->stack, version) || token_is_extra) {
580
+ LOG(
581
+ "ignore_empty_external_token symbol:%s",
582
+ SYM_NAME(self->language->external_scanner.symbol_map[self->lexer.data.result_symbol])
583
+ );
584
+ found_token = false;
585
+ }
586
+ }
587
+ }
588
+
589
+ if (found_token) {
590
+ found_external_token = true;
591
+ called_get_column = self->lexer.did_get_column;
592
+ break;
593
+ }
594
+
595
+ ts_lexer_reset(&self->lexer, current_position);
596
+ self->lexer.column_data = column_data;
597
+ }
598
+
599
+ LOG(
600
+ "lex_internal state:%d, row:%u, column:%u",
601
+ lex_mode.lex_state,
602
+ current_position.extent.row,
603
+ current_position.extent.column
604
+ );
605
+ ts_lexer_start(&self->lexer);
606
+ found_token = ts_parser__call_main_lex_fn(self, lex_mode);
607
+ ts_lexer_finish(&self->lexer, &lookahead_end_byte);
608
+ if (found_token) break;
609
+
610
+ if (!error_mode) {
611
+ error_mode = true;
612
+ lex_mode = ts_language_lex_mode_for_state(self->language, ERROR_STATE);
613
+ ts_lexer_reset(&self->lexer, start_position);
614
+ continue;
615
+ }
616
+
617
+ if (!skipped_error) {
618
+ LOG("skip_unrecognized_character");
619
+ skipped_error = true;
620
+ error_start_position = self->lexer.token_start_position;
621
+ error_end_position = self->lexer.token_start_position;
622
+ first_error_character = self->lexer.data.lookahead;
623
+ }
624
+
625
+ if (self->lexer.current_position.bytes == error_end_position.bytes) {
626
+ if (self->lexer.data.eof(&self->lexer.data)) {
627
+ self->lexer.data.result_symbol = ts_builtin_sym_error;
628
+ break;
629
+ }
630
+ self->lexer.data.advance(&self->lexer.data, false);
631
+ }
632
+
633
+ error_end_position = self->lexer.current_position;
634
+ }
635
+
636
+ Subtree result;
637
+ if (skipped_error) {
638
+ Length padding = length_sub(error_start_position, start_position);
639
+ Length size = length_sub(error_end_position, error_start_position);
640
+ uint32_t lookahead_bytes = lookahead_end_byte - error_end_position.bytes;
641
+ result = ts_subtree_new_error(
642
+ &self->tree_pool,
643
+ first_error_character,
644
+ padding,
645
+ size,
646
+ lookahead_bytes,
647
+ parse_state,
648
+ self->language
649
+ );
650
+ } else {
651
+ bool is_keyword = false;
652
+ TSSymbol symbol = self->lexer.data.result_symbol;
653
+ Length padding = length_sub(self->lexer.token_start_position, start_position);
654
+ Length size = length_sub(self->lexer.token_end_position, self->lexer.token_start_position);
655
+ uint32_t lookahead_bytes = lookahead_end_byte - self->lexer.token_end_position.bytes;
656
+
657
+ if (found_external_token) {
658
+ symbol = self->language->external_scanner.symbol_map[symbol];
659
+ } else if (symbol == self->language->keyword_capture_token && symbol != 0) {
660
+ uint32_t end_byte = self->lexer.token_end_position.bytes;
661
+ ts_lexer_reset(&self->lexer, self->lexer.token_start_position);
662
+ ts_lexer_start(&self->lexer);
663
+
664
+ is_keyword = ts_parser__call_keyword_lex_fn(self);
665
+
666
+ if (
667
+ is_keyword &&
668
+ self->lexer.token_end_position.bytes == end_byte &&
669
+ (
670
+ ts_language_has_actions(self->language, parse_state, self->lexer.data.result_symbol) ||
671
+ ts_language_is_reserved_word(self->language, parse_state, self->lexer.data.result_symbol)
672
+ )
673
+ ) {
674
+ symbol = self->lexer.data.result_symbol;
675
+ }
676
+ }
677
+
678
+ result = ts_subtree_new_leaf(
679
+ &self->tree_pool,
680
+ symbol,
681
+ padding,
682
+ size,
683
+ lookahead_bytes,
684
+ parse_state,
685
+ found_external_token,
686
+ called_get_column,
687
+ is_keyword,
688
+ self->language
689
+ );
690
+
691
+ if (found_external_token) {
692
+ MutableSubtree mut_result = ts_subtree_to_mut_unsafe(result);
693
+ ts_external_scanner_state_init(
694
+ &mut_result.ptr->external_scanner_state,
695
+ self->lexer.debug_buffer,
696
+ external_scanner_state_len
697
+ );
698
+ mut_result.ptr->has_external_scanner_state_change = external_scanner_state_changed;
699
+ }
700
+ }
701
+
702
+ LOG_LOOKAHEAD(
703
+ SYM_NAME(ts_subtree_symbol(result)),
704
+ ts_subtree_total_size(result).bytes
705
+ );
706
+ return result;
707
+ }
708
+
709
+ static Subtree ts_parser__get_cached_token(
710
+ TSParser *self,
711
+ TSStateId state,
712
+ size_t position,
713
+ Subtree last_external_token,
714
+ TableEntry *table_entry
715
+ ) {
716
+ TokenCache *cache = &self->token_cache;
717
+ if (
718
+ cache->token.ptr && cache->byte_index == position &&
719
+ ts_subtree_external_scanner_state_eq(cache->last_external_token, last_external_token)
720
+ ) {
721
+ ts_language_table_entry(self->language, state, ts_subtree_symbol(cache->token), table_entry);
722
+ if (ts_parser__can_reuse_first_leaf(self, state, cache->token, table_entry)) {
723
+ ts_subtree_retain(cache->token);
724
+ return cache->token;
725
+ }
726
+ }
727
+ return NULL_SUBTREE;
728
+ }
729
+
730
+ static void ts_parser__set_cached_token(
731
+ TSParser *self,
732
+ uint32_t byte_index,
733
+ Subtree last_external_token,
734
+ Subtree token
735
+ ) {
736
+ TokenCache *cache = &self->token_cache;
737
+ if (token.ptr) ts_subtree_retain(token);
738
+ if (last_external_token.ptr) ts_subtree_retain(last_external_token);
739
+ if (cache->token.ptr) ts_subtree_release(&self->tree_pool, cache->token);
740
+ if (cache->last_external_token.ptr) ts_subtree_release(&self->tree_pool, cache->last_external_token);
741
+ cache->token = token;
742
+ cache->byte_index = byte_index;
743
+ cache->last_external_token = last_external_token;
744
+ }
745
+
746
+ static bool ts_parser__has_included_range_difference(
747
+ const TSParser *self,
748
+ uint32_t start_position,
749
+ uint32_t end_position
750
+ ) {
751
+ return ts_range_array_intersects(
752
+ &self->included_range_differences,
753
+ self->included_range_difference_index,
754
+ start_position,
755
+ end_position
756
+ );
757
+ }
758
+
759
+ static Subtree ts_parser__reuse_node(
760
+ TSParser *self,
761
+ StackVersion version,
762
+ TSStateId *state,
763
+ uint32_t position,
764
+ Subtree last_external_token,
765
+ TableEntry *table_entry
766
+ ) {
767
+ Subtree result;
768
+ while ((result = reusable_node_tree(&self->reusable_node)).ptr) {
769
+ uint32_t byte_offset = reusable_node_byte_offset(&self->reusable_node);
770
+ uint32_t end_byte_offset = byte_offset + ts_subtree_total_bytes(result);
771
+
772
+ // Do not reuse an EOF node if the included ranges array has changes
773
+ // later on in the file.
774
+ if (ts_subtree_is_eof(result)) end_byte_offset = UINT32_MAX;
775
+
776
+ if (byte_offset > position) {
777
+ LOG("before_reusable_node symbol:%s", TREE_NAME(result));
778
+ break;
779
+ }
780
+
781
+ if (byte_offset < position) {
782
+ LOG("past_reusable_node symbol:%s", TREE_NAME(result));
783
+ if (end_byte_offset <= position || !reusable_node_descend(&self->reusable_node)) {
784
+ reusable_node_advance(&self->reusable_node);
785
+ }
786
+ continue;
787
+ }
788
+
789
+ if (!ts_subtree_external_scanner_state_eq(self->reusable_node.last_external_token, last_external_token)) {
790
+ LOG("reusable_node_has_different_external_scanner_state symbol:%s", TREE_NAME(result));
791
+ reusable_node_advance(&self->reusable_node);
792
+ continue;
793
+ }
794
+
795
+ const char *reason = NULL;
796
+ if (ts_subtree_has_changes(result)) {
797
+ reason = "has_changes";
798
+ } else if (ts_subtree_is_error(result)) {
799
+ reason = "is_error";
800
+ } else if (ts_subtree_missing(result)) {
801
+ reason = "is_missing";
802
+ } else if (ts_subtree_is_fragile(result)) {
803
+ reason = "is_fragile";
804
+ } else if (ts_parser__has_included_range_difference(self, byte_offset, end_byte_offset)) {
805
+ reason = "contains_different_included_range";
806
+ }
807
+
808
+ if (reason) {
809
+ LOG("cant_reuse_node_%s tree:%s", reason, TREE_NAME(result));
810
+ if (!reusable_node_descend(&self->reusable_node)) {
811
+ reusable_node_advance(&self->reusable_node);
812
+ ts_parser__breakdown_top_of_stack(self, version);
813
+ *state = ts_stack_state(self->stack, version);
814
+ }
815
+ continue;
816
+ }
817
+
818
+ TSSymbol leaf_symbol = ts_subtree_leaf_symbol(result);
819
+ ts_language_table_entry(self->language, *state, leaf_symbol, table_entry);
820
+ if (!ts_parser__can_reuse_first_leaf(self, *state, result, table_entry)) {
821
+ LOG(
822
+ "cant_reuse_node symbol:%s, first_leaf_symbol:%s",
823
+ TREE_NAME(result),
824
+ SYM_NAME(leaf_symbol)
825
+ );
826
+ reusable_node_advance_past_leaf(&self->reusable_node);
827
+ break;
828
+ }
829
+
830
+ LOG("reuse_node symbol:%s", TREE_NAME(result));
831
+ ts_subtree_retain(result);
832
+ return result;
833
+ }
834
+
835
+ return NULL_SUBTREE;
836
+ }
837
+
838
+ // Determine if a given tree should be replaced by an alternative tree.
839
+ //
840
+ // The decision is based on the trees' error costs (if any), their dynamic precedence,
841
+ // and finally, as a default, by a recursive comparison of the trees' symbols.
842
+ static bool ts_parser__select_tree(TSParser *self, Subtree left, Subtree right) {
843
+ if (!left.ptr) return true;
844
+ if (!right.ptr) return false;
845
+
846
+ if (ts_subtree_error_cost(right) < ts_subtree_error_cost(left)) {
847
+ LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
848
+ return true;
849
+ }
850
+
851
+ if (ts_subtree_error_cost(left) < ts_subtree_error_cost(right)) {
852
+ LOG("select_smaller_error symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
853
+ return false;
854
+ }
855
+
856
+ if (ts_subtree_dynamic_precedence(right) > ts_subtree_dynamic_precedence(left)) {
857
+ LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32,
858
+ TREE_NAME(right), ts_subtree_dynamic_precedence(right), TREE_NAME(left),
859
+ ts_subtree_dynamic_precedence(left));
860
+ return true;
861
+ }
862
+
863
+ if (ts_subtree_dynamic_precedence(left) > ts_subtree_dynamic_precedence(right)) {
864
+ LOG("select_higher_precedence symbol:%s, prec:%" PRId32 ", over_symbol:%s, other_prec:%" PRId32,
865
+ TREE_NAME(left), ts_subtree_dynamic_precedence(left), TREE_NAME(right),
866
+ ts_subtree_dynamic_precedence(right));
867
+ return false;
868
+ }
869
+
870
+ if (ts_subtree_error_cost(left) > 0) return true;
871
+
872
+ int comparison = ts_subtree_compare(left, right, &self->tree_pool);
873
+ switch (comparison) {
874
+ case -1:
875
+ LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
876
+ return false;
877
+ break;
878
+ case 1:
879
+ LOG("select_earlier symbol:%s, over_symbol:%s", TREE_NAME(right), TREE_NAME(left));
880
+ return true;
881
+ default:
882
+ LOG("select_existing symbol:%s, over_symbol:%s", TREE_NAME(left), TREE_NAME(right));
883
+ return false;
884
+ }
885
+ }
886
+
887
+ // Determine if a given tree's children should be replaced by an alternative
888
+ // array of children.
889
+ static bool ts_parser__select_children(
890
+ TSParser *self,
891
+ Subtree left,
892
+ const SubtreeArray *children
893
+ ) {
894
+ array_assign(&self->scratch_trees, children);
895
+
896
+ // Create a temporary subtree using the scratch trees array. This node does
897
+ // not perform any allocation except for possibly growing the array to make
898
+ // room for its own heap data. The scratch tree is never explicitly released,
899
+ // so the same 'scratch trees' array can be reused again later.
900
+ MutableSubtree scratch_tree = ts_subtree_new_node(
901
+ ts_subtree_symbol(left),
902
+ &self->scratch_trees,
903
+ 0,
904
+ self->language
905
+ );
906
+
907
+ return ts_parser__select_tree(
908
+ self,
909
+ left,
910
+ ts_subtree_from_mut(scratch_tree)
911
+ );
912
+ }
913
+
914
+ static void ts_parser__shift(
915
+ TSParser *self,
916
+ StackVersion version,
917
+ TSStateId state,
918
+ Subtree lookahead,
919
+ bool extra
920
+ ) {
921
+ bool is_leaf = ts_subtree_child_count(lookahead) == 0;
922
+ Subtree subtree_to_push = lookahead;
923
+ if (extra != ts_subtree_extra(lookahead) && is_leaf) {
924
+ MutableSubtree result = ts_subtree_make_mut(&self->tree_pool, lookahead);
925
+ ts_subtree_set_extra(&result, extra);
926
+ subtree_to_push = ts_subtree_from_mut(result);
927
+ }
928
+
929
+ ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state);
930
+ if (ts_subtree_has_external_tokens(subtree_to_push)) {
931
+ ts_stack_set_last_external_token(
932
+ self->stack, version, ts_subtree_last_external_token(subtree_to_push)
933
+ );
934
+ }
935
+ }
936
+
937
+ static StackVersion ts_parser__reduce(
938
+ TSParser *self,
939
+ StackVersion version,
940
+ TSSymbol symbol,
941
+ uint32_t count,
942
+ int dynamic_precedence,
943
+ uint16_t production_id,
944
+ bool is_fragile,
945
+ bool end_of_non_terminal_extra
946
+ ) {
947
+ uint32_t initial_version_count = ts_stack_version_count(self->stack);
948
+
949
+ // Pop the given number of nodes from the given version of the parse stack.
950
+ // If stack versions have previously merged, then there may be more than one
951
+ // path back through the stack. For each path, create a new parent node to
952
+ // contain the popped children, and push it onto the stack in place of the
953
+ // children.
954
+ StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
955
+ uint32_t removed_version_count = 0;
956
+ uint32_t halted_version_count = ts_stack_halted_version_count(self->stack);
957
+ for (uint32_t i = 0; i < pop.size; i++) {
958
+ StackSlice slice = *array_get(&pop, i);
959
+ StackVersion slice_version = slice.version - removed_version_count;
960
+
961
+ // This is where new versions are added to the parse stack. The versions
962
+ // will all be sorted and truncated at the end of the outer parsing loop.
963
+ // Allow the maximum version count to be temporarily exceeded, but only
964
+ // by a limited threshold.
965
+ if (slice_version > MAX_VERSION_COUNT + MAX_VERSION_COUNT_OVERFLOW + halted_version_count) {
966
+ ts_stack_remove_version(self->stack, slice_version);
967
+ ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
968
+ removed_version_count++;
969
+ while (i + 1 < pop.size) {
970
+ LOG("aborting reduce with too many versions")
971
+ StackSlice next_slice = *array_get(&pop, i + 1);
972
+ if (next_slice.version != slice.version) break;
973
+ ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
974
+ i++;
975
+ }
976
+ continue;
977
+ }
978
+
979
+ // Extra tokens on top of the stack should not be included in this new parent
980
+ // node. They will be re-pushed onto the stack after the parent node is
981
+ // created and pushed.
982
+ SubtreeArray children = slice.subtrees;
983
+ ts_subtree_array_remove_trailing_extras(&children, &self->trailing_extras);
984
+
985
+ MutableSubtree parent = ts_subtree_new_node(
986
+ symbol, &children, production_id, self->language
987
+ );
988
+
989
+ // This pop operation may have caused multiple stack versions to collapse
990
+ // into one, because they all diverged from a common state. In that case,
991
+ // choose one of the arrays of trees to be the parent node's children, and
992
+ // delete the rest of the tree arrays.
993
+ while (i + 1 < pop.size) {
994
+ StackSlice next_slice = *array_get(&pop, i + 1);
995
+ if (next_slice.version != slice.version) break;
996
+ i++;
997
+
998
+ SubtreeArray next_slice_children = next_slice.subtrees;
999
+ ts_subtree_array_remove_trailing_extras(&next_slice_children, &self->trailing_extras2);
1000
+
1001
+ if (ts_parser__select_children(
1002
+ self,
1003
+ ts_subtree_from_mut(parent),
1004
+ &next_slice_children
1005
+ )) {
1006
+ ts_subtree_array_clear(&self->tree_pool, &self->trailing_extras);
1007
+ ts_subtree_release(&self->tree_pool, ts_subtree_from_mut(parent));
1008
+ array_swap(&self->trailing_extras, &self->trailing_extras2);
1009
+ parent = ts_subtree_new_node(
1010
+ symbol, &next_slice_children, production_id, self->language
1011
+ );
1012
+ } else {
1013
+ array_clear(&self->trailing_extras2);
1014
+ ts_subtree_array_delete(&self->tree_pool, &next_slice.subtrees);
1015
+ }
1016
+ }
1017
+
1018
+ TSStateId state = ts_stack_state(self->stack, slice_version);
1019
+ TSStateId next_state = ts_language_next_state(self->language, state, symbol);
1020
+ if (end_of_non_terminal_extra && next_state == state) {
1021
+ parent.ptr->extra = true;
1022
+ }
1023
+ if (is_fragile || pop.size > 1 || initial_version_count > 1) {
1024
+ parent.ptr->fragile_left = true;
1025
+ parent.ptr->fragile_right = true;
1026
+ parent.ptr->parse_state = TS_TREE_STATE_NONE;
1027
+ } else {
1028
+ parent.ptr->parse_state = state;
1029
+ }
1030
+ parent.ptr->dynamic_precedence += dynamic_precedence;
1031
+
1032
+ // Push the parent node onto the stack, along with any extra tokens that
1033
+ // were previously on top of the stack.
1034
+ ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state);
1035
+ for (uint32_t j = 0; j < self->trailing_extras.size; j++) {
1036
+ ts_stack_push(self->stack, slice_version, *array_get(&self->trailing_extras, j), false, next_state);
1037
+ }
1038
+
1039
+ for (StackVersion j = 0; j < slice_version; j++) {
1040
+ if (j == version) continue;
1041
+ if (ts_stack_merge(self->stack, j, slice_version)) {
1042
+ removed_version_count++;
1043
+ break;
1044
+ }
1045
+ }
1046
+ }
1047
+
1048
+ // Return the first new stack version that was created.
1049
+ return ts_stack_version_count(self->stack) > initial_version_count
1050
+ ? initial_version_count
1051
+ : STACK_VERSION_NONE;
1052
+ }
1053
+
1054
+ static void ts_parser__accept(
1055
+ TSParser *self,
1056
+ StackVersion version,
1057
+ Subtree lookahead
1058
+ ) {
1059
+ ts_assert(ts_subtree_is_eof(lookahead));
1060
+ ts_stack_push(self->stack, version, lookahead, false, 1);
1061
+
1062
+ StackSliceArray pop = ts_stack_pop_all(self->stack, version);
1063
+ for (uint32_t i = 0; i < pop.size; i++) {
1064
+ SubtreeArray trees = array_get(&pop, i)->subtrees;
1065
+
1066
+ Subtree root = NULL_SUBTREE;
1067
+ for (uint32_t j = trees.size - 1; j + 1 > 0; j--) {
1068
+ Subtree tree = *array_get(&trees, j);
1069
+ if (!ts_subtree_extra(tree)) {
1070
+ ts_assert(!tree.data.is_inline);
1071
+ uint32_t child_count = ts_subtree_child_count(tree);
1072
+ const Subtree *children = ts_subtree_children(tree);
1073
+ for (uint32_t k = 0; k < child_count; k++) {
1074
+ ts_subtree_retain(children[k]);
1075
+ }
1076
+ array_splice(&trees, j, 1, child_count, children);
1077
+ root = ts_subtree_from_mut(ts_subtree_new_node(
1078
+ ts_subtree_symbol(tree),
1079
+ &trees,
1080
+ tree.ptr->production_id,
1081
+ self->language
1082
+ ));
1083
+ ts_subtree_release(&self->tree_pool, tree);
1084
+ break;
1085
+ }
1086
+ }
1087
+
1088
+ ts_assert(root.ptr);
1089
+ self->accept_count++;
1090
+
1091
+ if (self->finished_tree.ptr) {
1092
+ if (ts_parser__select_tree(self, self->finished_tree, root)) {
1093
+ ts_subtree_release(&self->tree_pool, self->finished_tree);
1094
+ self->finished_tree = root;
1095
+ } else {
1096
+ ts_subtree_release(&self->tree_pool, root);
1097
+ }
1098
+ } else {
1099
+ self->finished_tree = root;
1100
+ }
1101
+ }
1102
+
1103
+ ts_stack_remove_version(self->stack, array_get(&pop, 0)->version);
1104
+ ts_stack_halt(self->stack, version);
1105
+ }
1106
+
1107
+ static bool ts_parser__do_all_potential_reductions(
1108
+ TSParser *self,
1109
+ StackVersion starting_version,
1110
+ TSSymbol lookahead_symbol
1111
+ ) {
1112
+ uint32_t initial_version_count = ts_stack_version_count(self->stack);
1113
+
1114
+ bool can_shift_lookahead_symbol = false;
1115
+ StackVersion version = starting_version;
1116
+ for (unsigned i = 0; true; i++) {
1117
+ uint32_t version_count = ts_stack_version_count(self->stack);
1118
+ if (version >= version_count) break;
1119
+
1120
+ bool merged = false;
1121
+ for (StackVersion j = initial_version_count; j < version; j++) {
1122
+ if (ts_stack_merge(self->stack, j, version)) {
1123
+ merged = true;
1124
+ break;
1125
+ }
1126
+ }
1127
+ if (merged) continue;
1128
+
1129
+ TSStateId state = ts_stack_state(self->stack, version);
1130
+ bool has_shift_action = false;
1131
+ array_clear(&self->reduce_actions);
1132
+
1133
+ TSSymbol first_symbol, end_symbol;
1134
+ if (lookahead_symbol != 0) {
1135
+ first_symbol = lookahead_symbol;
1136
+ end_symbol = lookahead_symbol + 1;
1137
+ } else {
1138
+ first_symbol = 1;
1139
+ end_symbol = self->language->token_count;
1140
+ }
1141
+
1142
+ for (TSSymbol symbol = first_symbol; symbol < end_symbol; symbol++) {
1143
+ TableEntry entry;
1144
+ ts_language_table_entry(self->language, state, symbol, &entry);
1145
+ for (uint32_t j = 0; j < entry.action_count; j++) {
1146
+ TSParseAction action = entry.actions[j];
1147
+ switch (action.type) {
1148
+ case TSParseActionTypeShift:
1149
+ case TSParseActionTypeRecover:
1150
+ if (!action.shift.extra && !action.shift.repetition) has_shift_action = true;
1151
+ break;
1152
+ case TSParseActionTypeReduce:
1153
+ if (action.reduce.child_count > 0)
1154
+ ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction) {
1155
+ .symbol = action.reduce.symbol,
1156
+ .count = action.reduce.child_count,
1157
+ .dynamic_precedence = action.reduce.dynamic_precedence,
1158
+ .production_id = action.reduce.production_id,
1159
+ });
1160
+ break;
1161
+ default:
1162
+ break;
1163
+ }
1164
+ }
1165
+ }
1166
+
1167
+ StackVersion reduction_version = STACK_VERSION_NONE;
1168
+ for (uint32_t j = 0; j < self->reduce_actions.size; j++) {
1169
+ ReduceAction action = *array_get(&self->reduce_actions, j);
1170
+
1171
+ reduction_version = ts_parser__reduce(
1172
+ self, version, action.symbol, action.count,
1173
+ action.dynamic_precedence, action.production_id,
1174
+ true, false
1175
+ );
1176
+ }
1177
+
1178
+ if (has_shift_action) {
1179
+ can_shift_lookahead_symbol = true;
1180
+ } else if (reduction_version != STACK_VERSION_NONE && i < MAX_VERSION_COUNT) {
1181
+ ts_stack_renumber_version(self->stack, reduction_version, version);
1182
+ continue;
1183
+ } else if (lookahead_symbol != 0) {
1184
+ ts_stack_remove_version(self->stack, version);
1185
+ }
1186
+
1187
+ if (version == starting_version) {
1188
+ version = version_count;
1189
+ } else {
1190
+ version++;
1191
+ }
1192
+ }
1193
+
1194
+ return can_shift_lookahead_symbol;
1195
+ }
1196
+
1197
+ static bool ts_parser__recover_to_state(
1198
+ TSParser *self,
1199
+ StackVersion version,
1200
+ unsigned depth,
1201
+ TSStateId goal_state
1202
+ ) {
1203
+ StackSliceArray pop = ts_stack_pop_count(self->stack, version, depth);
1204
+ StackVersion previous_version = STACK_VERSION_NONE;
1205
+
1206
+ for (unsigned i = 0; i < pop.size; i++) {
1207
+ StackSlice slice = *array_get(&pop, i);
1208
+
1209
+ if (slice.version == previous_version) {
1210
+ ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1211
+ array_erase(&pop, i--);
1212
+ continue;
1213
+ }
1214
+
1215
+ if (ts_stack_state(self->stack, slice.version) != goal_state) {
1216
+ ts_stack_halt(self->stack, slice.version);
1217
+ ts_subtree_array_delete(&self->tree_pool, &slice.subtrees);
1218
+ array_erase(&pop, i--);
1219
+ continue;
1220
+ }
1221
+
1222
+ SubtreeArray error_trees = ts_stack_pop_error(self->stack, slice.version);
1223
+ if (error_trees.size > 0) {
1224
+ ts_assert(error_trees.size == 1);
1225
+ Subtree error_tree = *array_get(&error_trees, 0);
1226
+ uint32_t error_child_count = ts_subtree_child_count(error_tree);
1227
+ if (error_child_count > 0) {
1228
+ array_splice(&slice.subtrees, 0, 0, error_child_count, ts_subtree_children(error_tree));
1229
+ for (unsigned j = 0; j < error_child_count; j++) {
1230
+ ts_subtree_retain(*array_get(&slice.subtrees, j));
1231
+ }
1232
+ }
1233
+ ts_subtree_array_delete(&self->tree_pool, &error_trees);
1234
+ }
1235
+
1236
+ ts_subtree_array_remove_trailing_extras(&slice.subtrees, &self->trailing_extras);
1237
+
1238
+ if (slice.subtrees.size > 0) {
1239
+ Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language);
1240
+ ts_stack_push(self->stack, slice.version, error, false, goal_state);
1241
+ } else {
1242
+ array_delete(&slice.subtrees);
1243
+ }
1244
+
1245
+ for (unsigned j = 0; j < self->trailing_extras.size; j++) {
1246
+ Subtree tree = *array_get(&self->trailing_extras, j);
1247
+ ts_stack_push(self->stack, slice.version, tree, false, goal_state);
1248
+ }
1249
+
1250
+ previous_version = slice.version;
1251
+ }
1252
+
1253
+ return previous_version != STACK_VERSION_NONE;
1254
+ }
1255
+
1256
+ static void ts_parser__recover(
1257
+ TSParser *self,
1258
+ StackVersion version,
1259
+ Subtree lookahead
1260
+ ) {
1261
+ bool did_recover = false;
1262
+ unsigned previous_version_count = ts_stack_version_count(self->stack);
1263
+ Length position = ts_stack_position(self->stack, version);
1264
+ StackSummary *summary = ts_stack_get_summary(self->stack, version);
1265
+ unsigned node_count_since_error = ts_stack_node_count_since_error(self->stack, version);
1266
+ unsigned current_error_cost = ts_stack_error_cost(self->stack, version);
1267
+
1268
+ // When the parser is in the error state, there are two strategies for recovering with a
1269
+ // given lookahead token:
1270
+ // 1. Find a previous state on the stack in which that lookahead token would be valid. Then,
1271
+ // create a new stack version that is in that state again. This entails popping all of the
1272
+ // subtrees that have been pushed onto the stack since that previous state, and wrapping
1273
+ // them in an ERROR node.
1274
+ // 2. Wrap the lookahead token in an ERROR node, push that ERROR node onto the stack, and
1275
+ // move on to the next lookahead token, remaining in the error state.
1276
+ //
1277
+ // First, try the strategy 1. Upon entering the error state, the parser recorded a summary
1278
+ // of the previous parse states and their depths. Look at each state in the summary, to see
1279
+ // if the current lookahead token would be valid in that state.
1280
+ if (summary && !ts_subtree_is_error(lookahead)) {
1281
+ for (unsigned i = 0; i < summary->size; i++) {
1282
+ StackSummaryEntry entry = *array_get(summary, i);
1283
+
1284
+ if (entry.state == ERROR_STATE) continue;
1285
+ if (entry.position.bytes == position.bytes) continue;
1286
+ unsigned depth = entry.depth;
1287
+ if (node_count_since_error > 0) depth++;
1288
+
1289
+ // Do not recover in ways that create redundant stack versions.
1290
+ bool would_merge = false;
1291
+ for (unsigned j = 0; j < previous_version_count; j++) {
1292
+ if (
1293
+ ts_stack_state(self->stack, j) == entry.state &&
1294
+ ts_stack_position(self->stack, j).bytes == position.bytes
1295
+ ) {
1296
+ would_merge = true;
1297
+ break;
1298
+ }
1299
+ }
1300
+ if (would_merge) continue;
1301
+
1302
+ // Do not recover if the result would clearly be worse than some existing stack version.
1303
+ unsigned new_cost =
1304
+ current_error_cost +
1305
+ entry.depth * ERROR_COST_PER_SKIPPED_TREE +
1306
+ (position.bytes - entry.position.bytes) * ERROR_COST_PER_SKIPPED_CHAR +
1307
+ (position.extent.row - entry.position.extent.row) * ERROR_COST_PER_SKIPPED_LINE;
1308
+ if (ts_parser__better_version_exists(self, version, false, new_cost)) break;
1309
+
1310
+ // If the current lookahead token is valid in some previous state, recover to that state.
1311
+ // Then stop looking for further recoveries.
1312
+ if (ts_language_has_actions(self->language, entry.state, ts_subtree_symbol(lookahead))) {
1313
+ if (ts_parser__recover_to_state(self, version, depth, entry.state)) {
1314
+ did_recover = true;
1315
+ LOG("recover_to_previous state:%u, depth:%u", entry.state, depth);
1316
+ LOG_STACK();
1317
+ break;
1318
+ }
1319
+ }
1320
+ }
1321
+ }
1322
+
1323
+ // In the process of attempting to recover, some stack versions may have been created
1324
+ // and subsequently halted. Remove those versions.
1325
+ for (unsigned i = previous_version_count; i < ts_stack_version_count(self->stack); i++) {
1326
+ if (!ts_stack_is_active(self->stack, i)) {
1327
+ LOG("removed paused version:%u", i);
1328
+ ts_stack_remove_version(self->stack, i--);
1329
+ LOG_STACK();
1330
+ }
1331
+ }
1332
+
1333
+ // If the parser is still in the error state at the end of the file, just wrap everything
1334
+ // in an ERROR node and terminate.
1335
+ if (ts_subtree_is_eof(lookahead)) {
1336
+ LOG("recover_eof");
1337
+ SubtreeArray children = array_new();
1338
+ Subtree parent = ts_subtree_new_error_node(&children, false, self->language);
1339
+ ts_stack_push(self->stack, version, parent, false, 1);
1340
+ ts_parser__accept(self, version, lookahead);
1341
+ return;
1342
+ }
1343
+
1344
+ // If strategy 1 succeeded, a new stack version will have been created which is able to handle
1345
+ // the current lookahead token. Now, in addition, try strategy 2 described above: skip the
1346
+ // current lookahead token by wrapping it in an ERROR node.
1347
+
1348
+ // Don't pursue this additional strategy if there are already too many stack versions.
1349
+ if (did_recover && ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
1350
+ ts_stack_halt(self->stack, version);
1351
+ ts_subtree_release(&self->tree_pool, lookahead);
1352
+ return;
1353
+ }
1354
+
1355
+ if (
1356
+ did_recover &&
1357
+ ts_subtree_has_external_scanner_state_change(lookahead)
1358
+ ) {
1359
+ ts_stack_halt(self->stack, version);
1360
+ ts_subtree_release(&self->tree_pool, lookahead);
1361
+ return;
1362
+ }
1363
+
1364
+ // Do not recover if the result would clearly be worse than some existing stack version.
1365
+ unsigned new_cost =
1366
+ current_error_cost + ERROR_COST_PER_SKIPPED_TREE +
1367
+ ts_subtree_total_bytes(lookahead) * ERROR_COST_PER_SKIPPED_CHAR +
1368
+ ts_subtree_total_size(lookahead).extent.row * ERROR_COST_PER_SKIPPED_LINE;
1369
+ if (ts_parser__better_version_exists(self, version, false, new_cost)) {
1370
+ ts_stack_halt(self->stack, version);
1371
+ ts_subtree_release(&self->tree_pool, lookahead);
1372
+ return;
1373
+ }
1374
+
1375
+ // If the current lookahead token is an extra token, mark it as extra. This means it won't
1376
+ // be counted in error cost calculations.
1377
+ unsigned n;
1378
+ const TSParseAction *actions = ts_language_actions(self->language, 1, ts_subtree_symbol(lookahead), &n);
1379
+ if (n > 0 && actions[n - 1].type == TSParseActionTypeShift && actions[n - 1].shift.extra) {
1380
+ MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1381
+ ts_subtree_set_extra(&mutable_lookahead, true);
1382
+ lookahead = ts_subtree_from_mut(mutable_lookahead);
1383
+ }
1384
+
1385
+ // Wrap the lookahead token in an ERROR.
1386
+ LOG("skip_token symbol:%s", TREE_NAME(lookahead));
1387
+ SubtreeArray children = array_new();
1388
+ array_reserve(&children, 1);
1389
+ array_push(&children, lookahead);
1390
+ MutableSubtree error_repeat = ts_subtree_new_node(
1391
+ ts_builtin_sym_error_repeat,
1392
+ &children,
1393
+ 0,
1394
+ self->language
1395
+ );
1396
+
1397
+ // If other tokens have already been skipped, so there is already an ERROR at the top of the
1398
+ // stack, then pop that ERROR off the stack and wrap the two ERRORs together into one larger
1399
+ // ERROR.
1400
+ if (node_count_since_error > 0) {
1401
+ StackSliceArray pop = ts_stack_pop_count(self->stack, version, 1);
1402
+
1403
+ // TODO: Figure out how to make this condition occur.
1404
+ // See https://github.com/atom/atom/issues/18450#issuecomment-439579778
1405
+ // If multiple stack versions have merged at this point, just pick one of the errors
1406
+ // arbitrarily and discard the rest.
1407
+ if (pop.size > 1) {
1408
+ for (unsigned i = 1; i < pop.size; i++) {
1409
+ ts_subtree_array_delete(&self->tree_pool, &array_get(&pop, i)->subtrees);
1410
+ }
1411
+ while (ts_stack_version_count(self->stack) > array_get(&pop, 0)->version + 1) {
1412
+ ts_stack_remove_version(self->stack, array_get(&pop, 0)->version + 1);
1413
+ }
1414
+ }
1415
+
1416
+ ts_stack_renumber_version(self->stack, array_get(&pop, 0)->version, version);
1417
+ array_push(&array_get(&pop, 0)->subtrees, ts_subtree_from_mut(error_repeat));
1418
+ error_repeat = ts_subtree_new_node(
1419
+ ts_builtin_sym_error_repeat,
1420
+ &array_get(&pop, 0)->subtrees,
1421
+ 0,
1422
+ self->language
1423
+ );
1424
+ }
1425
+
1426
+ // Push the new ERROR onto the stack.
1427
+ ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE);
1428
+ if (ts_subtree_has_external_tokens(lookahead)) {
1429
+ ts_stack_set_last_external_token(
1430
+ self->stack, version, ts_subtree_last_external_token(lookahead)
1431
+ );
1432
+ }
1433
+
1434
+ bool has_error = true;
1435
+ for (unsigned i = 0; i < ts_stack_version_count(self->stack); i++) {
1436
+ ErrorStatus status = ts_parser__version_status(self, i);
1437
+ if (!status.is_in_error) {
1438
+ has_error = false;
1439
+ break;
1440
+ }
1441
+ }
1442
+ self->has_error = has_error;
1443
+ }
1444
+
1445
+ static void ts_parser__handle_error(
1446
+ TSParser *self,
1447
+ StackVersion version,
1448
+ Subtree lookahead
1449
+ ) {
1450
+ uint32_t previous_version_count = ts_stack_version_count(self->stack);
1451
+
1452
+ // Perform any reductions that can happen in this state, regardless of the lookahead. After
1453
+ // skipping one or more invalid tokens, the parser might find a token that would have allowed
1454
+ // a reduction to take place.
1455
+ ts_parser__do_all_potential_reductions(self, version, 0);
1456
+ uint32_t version_count = ts_stack_version_count(self->stack);
1457
+ Length position = ts_stack_position(self->stack, version);
1458
+
1459
+ // Push a discontinuity onto the stack. Merge all of the stack versions that
1460
+ // were created in the previous step.
1461
+ bool did_insert_missing_token = false;
1462
+ for (StackVersion v = version; v < version_count;) {
1463
+ if (!did_insert_missing_token) {
1464
+ TSStateId state = ts_stack_state(self->stack, v);
1465
+ for (
1466
+ TSSymbol missing_symbol = 1;
1467
+ missing_symbol < (uint16_t)self->language->token_count;
1468
+ missing_symbol++
1469
+ ) {
1470
+ TSStateId state_after_missing_symbol = ts_language_next_state(
1471
+ self->language, state, missing_symbol
1472
+ );
1473
+ if (state_after_missing_symbol == 0 || state_after_missing_symbol == state) {
1474
+ continue;
1475
+ }
1476
+
1477
+ if (ts_language_has_reduce_action(
1478
+ self->language,
1479
+ state_after_missing_symbol,
1480
+ ts_subtree_leaf_symbol(lookahead)
1481
+ )) {
1482
+ // In case the parser is currently outside of any included range, the lexer will
1483
+ // snap to the beginning of the next included range. The missing token's padding
1484
+ // must be assigned to position it within the next included range.
1485
+ ts_lexer_reset(&self->lexer, position);
1486
+ ts_lexer_mark_end(&self->lexer);
1487
+ Length padding = length_sub(self->lexer.token_end_position, position);
1488
+ uint32_t lookahead_bytes = ts_subtree_total_bytes(lookahead) + ts_subtree_lookahead_bytes(lookahead);
1489
+
1490
+ StackVersion version_with_missing_tree = ts_stack_copy_version(self->stack, v);
1491
+ Subtree missing_tree = ts_subtree_new_missing_leaf(
1492
+ &self->tree_pool, missing_symbol,
1493
+ padding, lookahead_bytes,
1494
+ self->language
1495
+ );
1496
+ ts_stack_push(
1497
+ self->stack, version_with_missing_tree,
1498
+ missing_tree, false,
1499
+ state_after_missing_symbol
1500
+ );
1501
+
1502
+ if (ts_parser__do_all_potential_reductions(
1503
+ self, version_with_missing_tree,
1504
+ ts_subtree_leaf_symbol(lookahead)
1505
+ )) {
1506
+ LOG(
1507
+ "recover_with_missing symbol:%s, state:%u",
1508
+ SYM_NAME(missing_symbol),
1509
+ ts_stack_state(self->stack, version_with_missing_tree)
1510
+ );
1511
+ did_insert_missing_token = true;
1512
+ break;
1513
+ }
1514
+ }
1515
+ }
1516
+ }
1517
+
1518
+ ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE);
1519
+ v = (v == version) ? previous_version_count : v + 1;
1520
+ }
1521
+
1522
+ for (unsigned i = previous_version_count; i < version_count; i++) {
1523
+ bool did_merge = ts_stack_merge(self->stack, version, previous_version_count);
1524
+ ts_assert(did_merge);
1525
+ }
1526
+
1527
+ ts_stack_record_summary(self->stack, version, MAX_SUMMARY_DEPTH);
1528
+
1529
+ // Begin recovery with the current lookahead node, rather than waiting for the
1530
+ // next turn of the parse loop. This ensures that the tree accounts for the
1531
+ // current lookahead token's "lookahead bytes" value, which describes how far
1532
+ // the lexer needed to look ahead beyond the content of the token in order to
1533
+ // recognize it.
1534
+ if (ts_subtree_child_count(lookahead) > 0) {
1535
+ ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1536
+ }
1537
+ ts_parser__recover(self, version, lookahead);
1538
+
1539
+ LOG_STACK();
1540
+ }
1541
+
1542
+ static bool ts_parser__check_progress(TSParser *self, Subtree *lookahead, const uint32_t *position, unsigned operations) {
1543
+ self->operation_count += operations;
1544
+ if (self->operation_count >= OP_COUNT_PER_PARSER_TIMEOUT_CHECK) {
1545
+ self->operation_count = 0;
1546
+ }
1547
+ if (position != NULL) {
1548
+ self->parse_state.current_byte_offset = *position;
1549
+ self->parse_state.has_error = self->has_error;
1550
+ }
1551
+ if (
1552
+ self->operation_count == 0 &&
1553
+ (
1554
+ // TODO(amaanq): remove cancellation flag & clock checks before 0.26
1555
+ (self->cancellation_flag && atomic_load(self->cancellation_flag)) ||
1556
+ (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)) ||
1557
+ (self->parse_options.progress_callback && self->parse_options.progress_callback(&self->parse_state))
1558
+ )
1559
+ ) {
1560
+ if (lookahead && lookahead->ptr) {
1561
+ ts_subtree_release(&self->tree_pool, *lookahead);
1562
+ }
1563
+ return false;
1564
+ }
1565
+ return true;
1566
+ }
1567
+
1568
+ static bool ts_parser__advance(
1569
+ TSParser *self,
1570
+ StackVersion version,
1571
+ bool allow_node_reuse
1572
+ ) {
1573
+ TSStateId state = ts_stack_state(self->stack, version);
1574
+ uint32_t position = ts_stack_position(self->stack, version).bytes;
1575
+ Subtree last_external_token = ts_stack_last_external_token(self->stack, version);
1576
+
1577
+ bool did_reuse = true;
1578
+ Subtree lookahead = NULL_SUBTREE;
1579
+ TableEntry table_entry = {.action_count = 0};
1580
+
1581
+ // If possible, reuse a node from the previous syntax tree.
1582
+ if (allow_node_reuse) {
1583
+ lookahead = ts_parser__reuse_node(
1584
+ self, version, &state, position, last_external_token, &table_entry
1585
+ );
1586
+ }
1587
+
1588
+ // If no node from the previous syntax tree could be reused, then try to
1589
+ // reuse the token previously returned by the lexer.
1590
+ if (!lookahead.ptr) {
1591
+ did_reuse = false;
1592
+ lookahead = ts_parser__get_cached_token(
1593
+ self, state, position, last_external_token, &table_entry
1594
+ );
1595
+ }
1596
+
1597
+ bool needs_lex = !lookahead.ptr;
1598
+ for (;;) {
1599
+ // Otherwise, re-run the lexer.
1600
+ if (needs_lex) {
1601
+ needs_lex = false;
1602
+ lookahead = ts_parser__lex(self, version, state);
1603
+ if (self->has_scanner_error) return false;
1604
+
1605
+ if (lookahead.ptr) {
1606
+ ts_parser__set_cached_token(self, position, last_external_token, lookahead);
1607
+ ts_language_table_entry(self->language, state, ts_subtree_symbol(lookahead), &table_entry);
1608
+ }
1609
+
1610
+ // When parsing a non-terminal extra, a null lookahead indicates the
1611
+ // end of the rule. The reduction is stored in the EOF table entry.
1612
+ // After the reduction, the lexer needs to be run again.
1613
+ else {
1614
+ ts_language_table_entry(self->language, state, ts_builtin_sym_end, &table_entry);
1615
+ }
1616
+ }
1617
+
1618
+ // If a cancellation flag, timeout, or progress callback was provided, then check every
1619
+ // time a fixed number of parse actions has been processed.
1620
+ if (!ts_parser__check_progress(self, &lookahead, &position, 1)) {
1621
+ return false;
1622
+ }
1623
+
1624
+ // Process each parse action for the current lookahead token in
1625
+ // the current state. If there are multiple actions, then this is
1626
+ // an ambiguous state. REDUCE actions always create a new stack
1627
+ // version, whereas SHIFT actions update the existing stack version
1628
+ // and terminate this loop.
1629
+ bool did_reduce = false;
1630
+ StackVersion last_reduction_version = STACK_VERSION_NONE;
1631
+ for (uint32_t i = 0; i < table_entry.action_count; i++) {
1632
+ TSParseAction action = table_entry.actions[i];
1633
+
1634
+ switch (action.type) {
1635
+ case TSParseActionTypeShift: {
1636
+ if (action.shift.repetition) break;
1637
+ TSStateId next_state;
1638
+ if (action.shift.extra) {
1639
+ next_state = state;
1640
+ LOG("shift_extra");
1641
+ } else {
1642
+ next_state = action.shift.state;
1643
+ LOG("shift state:%u", next_state);
1644
+ }
1645
+
1646
+ if (ts_subtree_child_count(lookahead) > 0) {
1647
+ ts_parser__breakdown_lookahead(self, &lookahead, state, &self->reusable_node);
1648
+ next_state = ts_language_next_state(self->language, state, ts_subtree_symbol(lookahead));
1649
+ }
1650
+
1651
+ ts_parser__shift(self, version, next_state, lookahead, action.shift.extra);
1652
+ if (did_reuse) reusable_node_advance(&self->reusable_node);
1653
+ return true;
1654
+ }
1655
+
1656
+ case TSParseActionTypeReduce: {
1657
+ bool is_fragile = table_entry.action_count > 1;
1658
+ bool end_of_non_terminal_extra = lookahead.ptr == NULL;
1659
+ LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.reduce.symbol), action.reduce.child_count);
1660
+ StackVersion reduction_version = ts_parser__reduce(
1661
+ self, version, action.reduce.symbol, action.reduce.child_count,
1662
+ action.reduce.dynamic_precedence, action.reduce.production_id,
1663
+ is_fragile, end_of_non_terminal_extra
1664
+ );
1665
+ did_reduce = true;
1666
+ if (reduction_version != STACK_VERSION_NONE) {
1667
+ last_reduction_version = reduction_version;
1668
+ }
1669
+ break;
1670
+ }
1671
+
1672
+ case TSParseActionTypeAccept: {
1673
+ LOG("accept");
1674
+ ts_parser__accept(self, version, lookahead);
1675
+ return true;
1676
+ }
1677
+
1678
+ case TSParseActionTypeRecover: {
1679
+ if (ts_subtree_child_count(lookahead) > 0) {
1680
+ ts_parser__breakdown_lookahead(self, &lookahead, ERROR_STATE, &self->reusable_node);
1681
+ }
1682
+
1683
+ ts_parser__recover(self, version, lookahead);
1684
+ if (did_reuse) reusable_node_advance(&self->reusable_node);
1685
+ return true;
1686
+ }
1687
+ }
1688
+ }
1689
+
1690
+ // If a reduction was performed, then replace the current stack version
1691
+ // with one of the stack versions created by a reduction, and continue
1692
+ // processing this version of the stack with the same lookahead symbol.
1693
+ if (last_reduction_version != STACK_VERSION_NONE) {
1694
+ ts_stack_renumber_version(self->stack, last_reduction_version, version);
1695
+ LOG_STACK();
1696
+ state = ts_stack_state(self->stack, version);
1697
+
1698
+ // At the end of a non-terminal extra rule, the lexer will return a
1699
+ // null subtree, because the parser needs to perform a fixed reduction
1700
+ // regardless of the lookahead node. After performing that reduction,
1701
+ // (and completing the non-terminal extra rule) run the lexer again based
1702
+ // on the current parse state.
1703
+ if (!lookahead.ptr) {
1704
+ needs_lex = true;
1705
+ } else {
1706
+ ts_language_table_entry(
1707
+ self->language,
1708
+ state,
1709
+ ts_subtree_leaf_symbol(lookahead),
1710
+ &table_entry
1711
+ );
1712
+ }
1713
+
1714
+ continue;
1715
+ }
1716
+
1717
+ // A reduction was performed, but was merged into an existing stack version.
1718
+ // This version can be discarded.
1719
+ if (did_reduce) {
1720
+ if (lookahead.ptr) {
1721
+ ts_subtree_release(&self->tree_pool, lookahead);
1722
+ }
1723
+ ts_stack_halt(self->stack, version);
1724
+ return true;
1725
+ }
1726
+
1727
+ // If the current lookahead token is a keyword that is not valid, but the
1728
+ // default word token *is* valid, then treat the lookahead token as the word
1729
+ // token instead.
1730
+ if (
1731
+ ts_subtree_is_keyword(lookahead) &&
1732
+ ts_subtree_symbol(lookahead) != self->language->keyword_capture_token &&
1733
+ !ts_language_is_reserved_word(self->language, state, ts_subtree_symbol(lookahead))
1734
+ ) {
1735
+ ts_language_table_entry(
1736
+ self->language,
1737
+ state,
1738
+ self->language->keyword_capture_token,
1739
+ &table_entry
1740
+ );
1741
+ if (table_entry.action_count > 0) {
1742
+ LOG(
1743
+ "switch from_keyword:%s, to_word_token:%s",
1744
+ TREE_NAME(lookahead),
1745
+ SYM_NAME(self->language->keyword_capture_token)
1746
+ );
1747
+
1748
+ MutableSubtree mutable_lookahead = ts_subtree_make_mut(&self->tree_pool, lookahead);
1749
+ ts_subtree_set_symbol(&mutable_lookahead, self->language->keyword_capture_token, self->language);
1750
+ lookahead = ts_subtree_from_mut(mutable_lookahead);
1751
+ continue;
1752
+ }
1753
+ }
1754
+
1755
+ // If the current lookahead token is not valid and the previous subtree on
1756
+ // the stack was reused from an old tree, then it wasn't actually valid to
1757
+ // reuse that previous subtree. Remove it from the stack, and in its place,
1758
+ // push each of its children. Then try again to process the current lookahead.
1759
+ if (ts_parser__breakdown_top_of_stack(self, version)) {
1760
+ state = ts_stack_state(self->stack, version);
1761
+ ts_subtree_release(&self->tree_pool, lookahead);
1762
+ needs_lex = true;
1763
+ continue;
1764
+ }
1765
+
1766
+ // Otherwise, there is definitely an error in this version of the parse stack.
1767
+ // Mark this version as paused and continue processing any other stack
1768
+ // versions that exist. If some other version advances successfully, then
1769
+ // this version can simply be removed. But if all versions end up paused,
1770
+ // then error recovery is needed.
1771
+ LOG("detect_error lookahead:%s", TREE_NAME(lookahead));
1772
+ ts_stack_pause(self->stack, version, lookahead);
1773
+ return true;
1774
+ }
1775
+ }
1776
+
1777
+ static unsigned ts_parser__condense_stack(TSParser *self) {
1778
+ bool made_changes = false;
1779
+ unsigned min_error_cost = UINT_MAX;
1780
+ for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
1781
+ // Prune any versions that have been marked for removal.
1782
+ if (ts_stack_is_halted(self->stack, i)) {
1783
+ ts_stack_remove_version(self->stack, i);
1784
+ i--;
1785
+ continue;
1786
+ }
1787
+
1788
+ // Keep track of the minimum error cost of any stack version so
1789
+ // that it can be returned.
1790
+ ErrorStatus status_i = ts_parser__version_status(self, i);
1791
+ if (!status_i.is_in_error && status_i.cost < min_error_cost) {
1792
+ min_error_cost = status_i.cost;
1793
+ }
1794
+
1795
+ // Examine each pair of stack versions, removing any versions that
1796
+ // are clearly worse than another version. Ensure that the versions
1797
+ // are ordered from most promising to least promising.
1798
+ for (StackVersion j = 0; j < i; j++) {
1799
+ ErrorStatus status_j = ts_parser__version_status(self, j);
1800
+
1801
+ switch (ts_parser__compare_versions(self, status_j, status_i)) {
1802
+ case ErrorComparisonTakeLeft:
1803
+ made_changes = true;
1804
+ ts_stack_remove_version(self->stack, i);
1805
+ i--;
1806
+ j = i;
1807
+ break;
1808
+
1809
+ case ErrorComparisonPreferLeft:
1810
+ case ErrorComparisonNone:
1811
+ if (ts_stack_merge(self->stack, j, i)) {
1812
+ made_changes = true;
1813
+ i--;
1814
+ j = i;
1815
+ }
1816
+ break;
1817
+
1818
+ case ErrorComparisonPreferRight:
1819
+ made_changes = true;
1820
+ if (ts_stack_merge(self->stack, j, i)) {
1821
+ i--;
1822
+ j = i;
1823
+ } else {
1824
+ ts_stack_swap_versions(self->stack, i, j);
1825
+ }
1826
+ break;
1827
+
1828
+ case ErrorComparisonTakeRight:
1829
+ made_changes = true;
1830
+ ts_stack_remove_version(self->stack, j);
1831
+ i--;
1832
+ j--;
1833
+ break;
1834
+ }
1835
+ }
1836
+ }
1837
+
1838
+ // Enforce a hard upper bound on the number of stack versions by
1839
+ // discarding the least promising versions.
1840
+ while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
1841
+ ts_stack_remove_version(self->stack, MAX_VERSION_COUNT);
1842
+ made_changes = true;
1843
+ }
1844
+
1845
+ // If the best-performing stack version is currently paused, or all
1846
+ // versions are paused, then resume the best paused version and begin
1847
+ // the error recovery process. Otherwise, remove the paused versions.
1848
+ if (ts_stack_version_count(self->stack) > 0) {
1849
+ bool has_unpaused_version = false;
1850
+ for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
1851
+ if (ts_stack_is_paused(self->stack, i)) {
1852
+ if (!has_unpaused_version && self->accept_count < MAX_VERSION_COUNT) {
1853
+ LOG("resume version:%u", i);
1854
+ min_error_cost = ts_stack_error_cost(self->stack, i);
1855
+ Subtree lookahead = ts_stack_resume(self->stack, i);
1856
+ ts_parser__handle_error(self, i, lookahead);
1857
+ has_unpaused_version = true;
1858
+ } else {
1859
+ ts_stack_remove_version(self->stack, i);
1860
+ made_changes = true;
1861
+ i--;
1862
+ n--;
1863
+ }
1864
+ } else {
1865
+ has_unpaused_version = true;
1866
+ }
1867
+ }
1868
+ }
1869
+
1870
+ if (made_changes) {
1871
+ LOG("condense");
1872
+ LOG_STACK();
1873
+ }
1874
+
1875
+ return min_error_cost;
1876
+ }
1877
+
1878
+ static bool ts_parser__balance_subtree(TSParser *self) {
1879
+ Subtree finished_tree = self->finished_tree;
1880
+
1881
+ // If we haven't canceled balancing in progress before, then we want to clear the tree stack and
1882
+ // push the initial finished tree onto it. Otherwise, if we're resuming balancing after a
1883
+ // cancellation, we don't want to clear the tree stack.
1884
+ if (!self->canceled_balancing) {
1885
+ array_clear(&self->tree_pool.tree_stack);
1886
+ if (ts_subtree_child_count(finished_tree) > 0 && finished_tree.ptr->ref_count == 1) {
1887
+ array_push(&self->tree_pool.tree_stack, ts_subtree_to_mut_unsafe(finished_tree));
1888
+ }
1889
+ }
1890
+
1891
+ while (self->tree_pool.tree_stack.size > 0) {
1892
+ if (!ts_parser__check_progress(self, NULL, NULL, 1)) {
1893
+ return false;
1894
+ }
1895
+
1896
+ MutableSubtree tree = *array_get(&self->tree_pool.tree_stack,
1897
+ self->tree_pool.tree_stack.size - 1
1898
+ );
1899
+
1900
+ if (tree.ptr->repeat_depth > 0) {
1901
+ Subtree child1 = ts_subtree_children(tree)[0];
1902
+ Subtree child2 = ts_subtree_children(tree)[tree.ptr->child_count - 1];
1903
+ long repeat_delta = (long)ts_subtree_repeat_depth(child1) - (long)ts_subtree_repeat_depth(child2);
1904
+ if (repeat_delta > 0) {
1905
+ unsigned n = (unsigned)repeat_delta;
1906
+
1907
+ for (unsigned i = n / 2; i > 0; i /= 2) {
1908
+ ts_subtree_compress(tree, i, self->language, &self->tree_pool.tree_stack);
1909
+ n -= i;
1910
+
1911
+ // We scale the operation count increment in `ts_parser__check_progress` proportionately to the compression
1912
+ // size since larger values of i take longer to process. Shifting by 4 empirically provides good check
1913
+ // intervals (e.g. 193 operations when i=3100) to prevent blocking during large compressions.
1914
+ uint8_t operations = i >> 4 > 0 ? i >> 4 : 1;
1915
+ if (!ts_parser__check_progress(self, NULL, NULL, operations)) {
1916
+ return false;
1917
+ }
1918
+ }
1919
+ }
1920
+ }
1921
+
1922
+ (void)array_pop(&self->tree_pool.tree_stack);
1923
+
1924
+ for (uint32_t i = 0; i < tree.ptr->child_count; i++) {
1925
+ Subtree child = ts_subtree_children(tree)[i];
1926
+ if (ts_subtree_child_count(child) > 0 && child.ptr->ref_count == 1) {
1927
+ array_push(&self->tree_pool.tree_stack, ts_subtree_to_mut_unsafe(child));
1928
+ }
1929
+ }
1930
+ }
1931
+
1932
+ return true;
1933
+ }
1934
+
1935
+ static bool ts_parser_has_outstanding_parse(TSParser *self) {
1936
+ return (
1937
+ self->canceled_balancing ||
1938
+ self->external_scanner_payload ||
1939
+ ts_stack_state(self->stack, 0) != 1 ||
1940
+ ts_stack_node_count_since_error(self->stack, 0) != 0
1941
+ );
1942
+ }
1943
+
1944
+ // Parser - Public
1945
+
1946
+ TSParser *ts_parser_new(void) {
1947
+ TSParser *self = ts_calloc(1, sizeof(TSParser));
1948
+ ts_lexer_init(&self->lexer);
1949
+ array_init(&self->reduce_actions);
1950
+ array_reserve(&self->reduce_actions, 4);
1951
+ self->tree_pool = ts_subtree_pool_new(32);
1952
+ self->stack = ts_stack_new(&self->tree_pool);
1953
+ self->finished_tree = NULL_SUBTREE;
1954
+ self->reusable_node = reusable_node_new();
1955
+ self->dot_graph_file = NULL;
1956
+ self->cancellation_flag = NULL;
1957
+ self->timeout_duration = 0;
1958
+ self->language = NULL;
1959
+ self->has_scanner_error = false;
1960
+ self->has_error = false;
1961
+ self->canceled_balancing = false;
1962
+ self->external_scanner_payload = NULL;
1963
+ self->end_clock = clock_null();
1964
+ self->operation_count = 0;
1965
+ self->old_tree = NULL_SUBTREE;
1966
+ self->included_range_differences = (TSRangeArray) array_new();
1967
+ self->included_range_difference_index = 0;
1968
+ ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1969
+ return self;
1970
+ }
1971
+
1972
+ void ts_parser_delete(TSParser *self) {
1973
+ if (!self) return;
1974
+
1975
+ ts_parser_set_language(self, NULL);
1976
+ ts_stack_delete(self->stack);
1977
+ if (self->reduce_actions.contents) {
1978
+ array_delete(&self->reduce_actions);
1979
+ }
1980
+ if (self->included_range_differences.contents) {
1981
+ array_delete(&self->included_range_differences);
1982
+ }
1983
+ if (self->old_tree.ptr) {
1984
+ ts_subtree_release(&self->tree_pool, self->old_tree);
1985
+ self->old_tree = NULL_SUBTREE;
1986
+ }
1987
+ ts_wasm_store_delete(self->wasm_store);
1988
+ ts_lexer_delete(&self->lexer);
1989
+ ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
1990
+ ts_subtree_pool_delete(&self->tree_pool);
1991
+ reusable_node_delete(&self->reusable_node);
1992
+ array_delete(&self->trailing_extras);
1993
+ array_delete(&self->trailing_extras2);
1994
+ array_delete(&self->scratch_trees);
1995
+ ts_free(self);
1996
+ }
1997
+
1998
+ const TSLanguage *ts_parser_language(const TSParser *self) {
1999
+ return self->language;
2000
+ }
2001
+
2002
+ bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
2003
+ ts_parser_reset(self);
2004
+ ts_language_delete(self->language);
2005
+ self->language = NULL;
2006
+
2007
+ if (language) {
2008
+ if (
2009
+ language->abi_version > TREE_SITTER_LANGUAGE_VERSION ||
2010
+ language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
2011
+ ) return false;
2012
+
2013
+ if (ts_language_is_wasm(language)) {
2014
+ if (
2015
+ !self->wasm_store ||
2016
+ !ts_wasm_store_start(self->wasm_store, &self->lexer.data, language)
2017
+ ) return false;
2018
+ }
2019
+ }
2020
+
2021
+ self->language = ts_language_copy(language);
2022
+ return true;
2023
+ }
2024
+
2025
+ TSLogger ts_parser_logger(const TSParser *self) {
2026
+ return self->lexer.logger;
2027
+ }
2028
+
2029
+ void ts_parser_set_logger(TSParser *self, TSLogger logger) {
2030
+ self->lexer.logger = logger;
2031
+ }
2032
+
2033
+ void ts_parser_print_dot_graphs(TSParser *self, int fd) {
2034
+ if (self->dot_graph_file) {
2035
+ fclose(self->dot_graph_file);
2036
+ }
2037
+
2038
+ if (fd >= 0) {
2039
+ #ifdef _WIN32
2040
+ self->dot_graph_file = _fdopen(fd, "a");
2041
+ #else
2042
+ self->dot_graph_file = fdopen(fd, "a");
2043
+ #endif
2044
+ } else {
2045
+ self->dot_graph_file = NULL;
2046
+ }
2047
+ }
2048
+
2049
+ const size_t *ts_parser_cancellation_flag(const TSParser *self) {
2050
+ return (const size_t *)self->cancellation_flag;
2051
+ }
2052
+
2053
+ void ts_parser_set_cancellation_flag(TSParser *self, const size_t *flag) {
2054
+ self->cancellation_flag = (const volatile size_t *)flag;
2055
+ }
2056
+
2057
+ uint64_t ts_parser_timeout_micros(const TSParser *self) {
2058
+ return duration_to_micros(self->timeout_duration);
2059
+ }
2060
+
2061
+ void ts_parser_set_timeout_micros(TSParser *self, uint64_t timeout_micros) {
2062
+ self->timeout_duration = duration_from_micros(timeout_micros);
2063
+ }
2064
+
2065
+ bool ts_parser_set_included_ranges(
2066
+ TSParser *self,
2067
+ const TSRange *ranges,
2068
+ uint32_t count
2069
+ ) {
2070
+ return ts_lexer_set_included_ranges(&self->lexer, ranges, count);
2071
+ }
2072
+
2073
+ const TSRange *ts_parser_included_ranges(const TSParser *self, uint32_t *count) {
2074
+ return ts_lexer_included_ranges(&self->lexer, count);
2075
+ }
2076
+
2077
+ void ts_parser_reset(TSParser *self) {
2078
+ ts_parser__external_scanner_destroy(self);
2079
+ if (self->wasm_store) {
2080
+ ts_wasm_store_reset(self->wasm_store);
2081
+ }
2082
+
2083
+ if (self->old_tree.ptr) {
2084
+ ts_subtree_release(&self->tree_pool, self->old_tree);
2085
+ self->old_tree = NULL_SUBTREE;
2086
+ }
2087
+
2088
+ reusable_node_clear(&self->reusable_node);
2089
+ ts_lexer_reset(&self->lexer, length_zero());
2090
+ ts_stack_clear(self->stack);
2091
+ ts_parser__set_cached_token(self, 0, NULL_SUBTREE, NULL_SUBTREE);
2092
+ if (self->finished_tree.ptr) {
2093
+ ts_subtree_release(&self->tree_pool, self->finished_tree);
2094
+ self->finished_tree = NULL_SUBTREE;
2095
+ }
2096
+ self->accept_count = 0;
2097
+ self->has_scanner_error = false;
2098
+ self->has_error = false;
2099
+ self->canceled_balancing = false;
2100
+ self->parse_options = (TSParseOptions) {0};
2101
+ self->parse_state = (TSParseState) {0};
2102
+ }
2103
+
2104
+ TSTree *ts_parser_parse(
2105
+ TSParser *self,
2106
+ const TSTree *old_tree,
2107
+ TSInput input
2108
+ ) {
2109
+ TSTree *result = NULL;
2110
+ if (!self->language || !input.read) return NULL;
2111
+
2112
+ if (ts_language_is_wasm(self->language)) {
2113
+ if (!self->wasm_store) return NULL;
2114
+ ts_wasm_store_start(self->wasm_store, &self->lexer.data, self->language);
2115
+ }
2116
+
2117
+ ts_lexer_set_input(&self->lexer, input);
2118
+ array_clear(&self->included_range_differences);
2119
+ self->included_range_difference_index = 0;
2120
+
2121
+ self->operation_count = 0;
2122
+ if (self->timeout_duration) {
2123
+ self->end_clock = clock_after(clock_now(), self->timeout_duration);
2124
+ } else {
2125
+ self->end_clock = clock_null();
2126
+ }
2127
+
2128
+ if (ts_parser_has_outstanding_parse(self)) {
2129
+ LOG("resume_parsing");
2130
+ if (self->canceled_balancing) goto balance;
2131
+ } else {
2132
+ ts_parser__external_scanner_create(self);
2133
+ if (self->has_scanner_error) goto exit;
2134
+
2135
+ if (old_tree) {
2136
+ ts_subtree_retain(old_tree->root);
2137
+ self->old_tree = old_tree->root;
2138
+ ts_range_array_get_changed_ranges(
2139
+ old_tree->included_ranges, old_tree->included_range_count,
2140
+ self->lexer.included_ranges, self->lexer.included_range_count,
2141
+ &self->included_range_differences
2142
+ );
2143
+ reusable_node_reset(&self->reusable_node, old_tree->root);
2144
+ LOG("parse_after_edit");
2145
+ LOG_TREE(self->old_tree);
2146
+ for (unsigned i = 0; i < self->included_range_differences.size; i++) {
2147
+ TSRange *range = array_get(&self->included_range_differences, i);
2148
+ LOG("different_included_range %u - %u", range->start_byte, range->end_byte);
2149
+ }
2150
+ } else {
2151
+ reusable_node_clear(&self->reusable_node);
2152
+ LOG("new_parse");
2153
+ }
2154
+ }
2155
+
2156
+ uint32_t position = 0, last_position = 0, version_count = 0;
2157
+ do {
2158
+ for (
2159
+ StackVersion version = 0;
2160
+ version_count = ts_stack_version_count(self->stack),
2161
+ version < version_count;
2162
+ version++
2163
+ ) {
2164
+ bool allow_node_reuse = version_count == 1;
2165
+ while (ts_stack_is_active(self->stack, version)) {
2166
+ LOG(
2167
+ "process version:%u, version_count:%u, state:%d, row:%u, col:%u",
2168
+ version,
2169
+ ts_stack_version_count(self->stack),
2170
+ ts_stack_state(self->stack, version),
2171
+ ts_stack_position(self->stack, version).extent.row,
2172
+ ts_stack_position(self->stack, version).extent.column
2173
+ );
2174
+
2175
+ if (!ts_parser__advance(self, version, allow_node_reuse)) {
2176
+ if (self->has_scanner_error) goto exit;
2177
+ return NULL;
2178
+ }
2179
+
2180
+ LOG_STACK();
2181
+
2182
+ position = ts_stack_position(self->stack, version).bytes;
2183
+ if (position > last_position || (version > 0 && position == last_position)) {
2184
+ last_position = position;
2185
+ break;
2186
+ }
2187
+ }
2188
+ }
2189
+
2190
+ // After advancing each version of the stack, re-sort the versions by their cost,
2191
+ // removing any versions that are no longer worth pursuing.
2192
+ unsigned min_error_cost = ts_parser__condense_stack(self);
2193
+
2194
+ // If there's already a finished parse tree that's better than any in-progress version,
2195
+ // then terminate parsing. Clear the parse stack to remove any extra references to subtrees
2196
+ // within the finished tree, ensuring that these subtrees can be safely mutated in-place
2197
+ // for rebalancing.
2198
+ if (self->finished_tree.ptr && ts_subtree_error_cost(self->finished_tree) < min_error_cost) {
2199
+ ts_stack_clear(self->stack);
2200
+ break;
2201
+ }
2202
+
2203
+ while (self->included_range_difference_index < self->included_range_differences.size) {
2204
+ TSRange *range = array_get(&self->included_range_differences, self->included_range_difference_index);
2205
+ if (range->end_byte <= position) {
2206
+ self->included_range_difference_index++;
2207
+ } else {
2208
+ break;
2209
+ }
2210
+ }
2211
+ } while (version_count != 0);
2212
+
2213
+ balance:
2214
+ ts_assert(self->finished_tree.ptr);
2215
+ if (!ts_parser__balance_subtree(self)) {
2216
+ self->canceled_balancing = true;
2217
+ return false;
2218
+ }
2219
+ self->canceled_balancing = false;
2220
+ LOG("done");
2221
+ LOG_TREE(self->finished_tree);
2222
+
2223
+ result = ts_tree_new(
2224
+ self->finished_tree,
2225
+ self->language,
2226
+ self->lexer.included_ranges,
2227
+ self->lexer.included_range_count
2228
+ );
2229
+ self->finished_tree = NULL_SUBTREE;
2230
+
2231
+ exit:
2232
+ ts_parser_reset(self);
2233
+ return result;
2234
+ }
2235
+
2236
+ TSTree *ts_parser_parse_with_options(
2237
+ TSParser *self,
2238
+ const TSTree *old_tree,
2239
+ TSInput input,
2240
+ TSParseOptions parse_options
2241
+ ) {
2242
+ self->parse_options = parse_options;
2243
+ self->parse_state.payload = parse_options.payload;
2244
+ TSTree *result = ts_parser_parse(self, old_tree, input);
2245
+ // Reset parser options before further parse calls.
2246
+ self->parse_options = (TSParseOptions) {0};
2247
+ return result;
2248
+ }
2249
+
2250
+ TSTree *ts_parser_parse_string(
2251
+ TSParser *self,
2252
+ const TSTree *old_tree,
2253
+ const char *string,
2254
+ uint32_t length
2255
+ ) {
2256
+ return ts_parser_parse_string_encoding(self, old_tree, string, length, TSInputEncodingUTF8);
2257
+ }
2258
+
2259
+ TSTree *ts_parser_parse_string_encoding(
2260
+ TSParser *self,
2261
+ const TSTree *old_tree,
2262
+ const char *string,
2263
+ uint32_t length,
2264
+ TSInputEncoding encoding
2265
+ ) {
2266
+ TSStringInput input = {string, length};
2267
+ return ts_parser_parse(self, old_tree, (TSInput) {
2268
+ &input,
2269
+ ts_string_input_read,
2270
+ encoding,
2271
+ NULL,
2272
+ });
2273
+ }
2274
+
2275
+ void ts_parser_set_wasm_store(TSParser *self, TSWasmStore *store) {
2276
+ if (self->language && ts_language_is_wasm(self->language)) {
2277
+ // Copy the assigned language into the new store.
2278
+ const TSLanguage *copy = ts_language_copy(self->language);
2279
+ ts_parser_set_language(self, copy);
2280
+ ts_language_delete(copy);
2281
+ }
2282
+
2283
+ ts_wasm_store_delete(self->wasm_store);
2284
+ self->wasm_store = store;
2285
+ }
2286
+
2287
+ TSWasmStore *ts_parser_take_wasm_store(TSParser *self) {
2288
+ if (self->language && ts_language_is_wasm(self->language)) {
2289
+ ts_parser_set_language(self, NULL);
2290
+ }
2291
+
2292
+ TSWasmStore *result = self->wasm_store;
2293
+ self->wasm_store = NULL;
2294
+ return result;
2295
+ }
2296
+
2297
+ #undef LOG