@prosdevlab/dev-agent 0.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +138 -0
  3. package/dist/cli.js +74721 -0
  4. package/dist/cli.js.map +1 -0
  5. package/dist/mcp.js +61445 -0
  6. package/dist/mcp.js.map +1 -0
  7. package/dist/vendor/web-tree-sitter/lib/alloc.c +48 -0
  8. package/dist/vendor/web-tree-sitter/lib/alloc.h +41 -0
  9. package/dist/vendor/web-tree-sitter/lib/array.h +291 -0
  10. package/dist/vendor/web-tree-sitter/lib/atomic.h +68 -0
  11. package/dist/vendor/web-tree-sitter/lib/clock.h +146 -0
  12. package/dist/vendor/web-tree-sitter/lib/error_costs.h +11 -0
  13. package/dist/vendor/web-tree-sitter/lib/get_changed_ranges.c +523 -0
  14. package/dist/vendor/web-tree-sitter/lib/get_changed_ranges.h +36 -0
  15. package/dist/vendor/web-tree-sitter/lib/host.h +21 -0
  16. package/dist/vendor/web-tree-sitter/lib/language.c +293 -0
  17. package/dist/vendor/web-tree-sitter/lib/language.h +293 -0
  18. package/dist/vendor/web-tree-sitter/lib/length.h +52 -0
  19. package/dist/vendor/web-tree-sitter/lib/lexer.c +483 -0
  20. package/dist/vendor/web-tree-sitter/lib/lexer.h +54 -0
  21. package/dist/vendor/web-tree-sitter/lib/lib.c +12 -0
  22. package/dist/vendor/web-tree-sitter/lib/node.c +875 -0
  23. package/dist/vendor/web-tree-sitter/lib/parser.c +2297 -0
  24. package/dist/vendor/web-tree-sitter/lib/parser.h +286 -0
  25. package/dist/vendor/web-tree-sitter/lib/point.h +48 -0
  26. package/dist/vendor/web-tree-sitter/lib/query.c +4347 -0
  27. package/dist/vendor/web-tree-sitter/lib/reduce_action.h +34 -0
  28. package/dist/vendor/web-tree-sitter/lib/reusable_node.h +95 -0
  29. package/dist/vendor/web-tree-sitter/lib/stack.c +912 -0
  30. package/dist/vendor/web-tree-sitter/lib/stack.h +133 -0
  31. package/dist/vendor/web-tree-sitter/lib/subtree.c +1034 -0
  32. package/dist/vendor/web-tree-sitter/lib/subtree.h +399 -0
  33. package/dist/vendor/web-tree-sitter/lib/tree-sitter.c +987 -0
  34. package/dist/vendor/web-tree-sitter/lib/tree-sitter.cjs +2988 -0
  35. package/dist/vendor/web-tree-sitter/lib/tree-sitter.wasm +0 -0
  36. package/dist/vendor/web-tree-sitter/lib/tree-sitter.wasm.map +1 -0
  37. package/dist/vendor/web-tree-sitter/lib/tree.c +170 -0
  38. package/dist/vendor/web-tree-sitter/lib/tree.h +31 -0
  39. package/dist/vendor/web-tree-sitter/lib/tree_cursor.c +716 -0
  40. package/dist/vendor/web-tree-sitter/lib/tree_cursor.h +48 -0
  41. package/dist/vendor/web-tree-sitter/lib/ts_assert.h +11 -0
  42. package/dist/vendor/web-tree-sitter/lib/unicode.h +75 -0
  43. package/dist/vendor/web-tree-sitter/lib/wasm_store.c +1937 -0
  44. package/dist/vendor/web-tree-sitter/lib/wasm_store.h +31 -0
  45. package/dist/vendor/web-tree-sitter/package.json +98 -0
  46. package/dist/vendor/web-tree-sitter/tree-sitter.cjs +4031 -0
  47. package/dist/vendor/web-tree-sitter/tree-sitter.wasm +0 -0
  48. package/dist/wasm/tree-sitter-go.wasm +0 -0
  49. package/dist/wasm/tree-sitter.wasm +0 -0
  50. package/package.json +65 -0
@@ -0,0 +1,4347 @@
1
+ /*
2
+ * On NetBSD, defining standard requirements like this removes symbols
3
+ * from the namespace; however, we need non-standard symbols for
4
+ * endian.h.
5
+ */
6
+ #if defined(__NetBSD__) && defined(_POSIX_C_SOURCE)
7
+ #undef _POSIX_C_SOURCE
8
+ #endif
9
+
10
+ #include "tree_sitter/api.h"
11
+ #include "./alloc.h"
12
+ #include "./array.h"
13
+ #include "./clock.h"
14
+ #include "./language.h"
15
+ #include "./point.h"
16
+ #include "./tree_cursor.h"
17
+ #include "./unicode.h"
18
+ #include <wctype.h>
19
+
20
+ // #define DEBUG_ANALYZE_QUERY
21
+ // #define DEBUG_EXECUTE_QUERY
22
+
23
+ #define MAX_STEP_CAPTURE_COUNT 3
24
+ #define MAX_NEGATED_FIELD_COUNT 8
25
+ #define MAX_STATE_PREDECESSOR_COUNT 256
26
+ #define MAX_ANALYSIS_STATE_DEPTH 8
27
+ #define MAX_ANALYSIS_ITERATION_COUNT 256
28
+
29
+ /*
30
+ * Stream - A sequence of unicode characters derived from a UTF8 string.
31
+ * This struct is used in parsing queries from S-expressions.
32
+ */
33
+ typedef struct {
34
+ const char *input;
35
+ const char *start;
36
+ const char *end;
37
+ int32_t next;
38
+ uint8_t next_size;
39
+ } Stream;
40
+
41
+ /*
42
+ * QueryStep - A step in the process of matching a query. Each node within
43
+ * a query S-expression corresponds to one of these steps. An entire pattern
44
+ * is represented as a sequence of these steps. The basic properties of a
45
+ * node are represented by these fields:
46
+ * - `symbol` - The grammar symbol to match. A zero value represents the
47
+ * wildcard symbol, '_'.
48
+ * - `field` - The field name to match. A zero value means that a field name
49
+ * was not specified.
50
+ * - `capture_ids` - An array of integers representing the names of captures
51
+ * associated with this node in the pattern, terminated by a `NONE` value.
52
+ * - `depth` - The depth where this node occurs in the pattern. The root node
53
+ * of the pattern has depth zero.
54
+ * - `negated_field_list_id` - An id representing a set of fields that must
55
+ * not be present on a node matching this step.
56
+ *
57
+ * Steps have some additional fields in order to handle the `.` (or "anchor") operator,
58
+ * which forbids additional child nodes:
59
+ * - `is_immediate` - Indicates that the node matching this step cannot be preceded
60
+ * by other sibling nodes that weren't specified in the pattern.
61
+ * - `is_last_child` - Indicates that the node matching this step cannot have any
62
+ * subsequent named siblings.
63
+ *
64
+ * For simple patterns, steps are matched in sequential order. But in order to
65
+ * handle alternative/repeated/optional sub-patterns, query steps are not always
66
+ * structured as a linear sequence; they sometimes need to split and merge. This
67
+ * is done using the following fields:
68
+ * - `alternative_index` - The index of a different query step that serves as
69
+ * an alternative to this step. A `NONE` value represents no alternative.
70
+ * When a query state reaches a step with an alternative index, the state
71
+ * is duplicated, with one copy remaining at the original step, and one copy
72
+ * moving to the alternative step. The alternative may have its own alternative
73
+ * step, so this splitting is an iterative process.
74
+ * - `is_dead_end` - Indicates that this state cannot be passed directly, and
75
+ * exists only in order to redirect to an alternative index, with no splitting.
76
+ * - `is_pass_through` - Indicates that state has no matching logic of its own,
77
+ * and exists only to split a state. One copy of the state advances immediately
78
+ * to the next step, and one moves to the alternative step.
79
+ * - `alternative_is_immediate` - Indicates that this step's alternative step
80
+ * should be treated as if `is_immediate` is true.
81
+ *
82
+ * Steps also store some derived state that summarizes how they relate to other
83
+ * steps within the same pattern. This is used to optimize the matching process:
84
+ * - `contains_captures` - Indicates that this step or one of its child steps
85
+ * has a non-empty `capture_ids` list.
86
+ * - `parent_pattern_guaranteed` - Indicates that if this step is reached, then
87
+ * it and all of its subsequent sibling steps within the same parent pattern
88
+ * are guaranteed to match.
89
+ * - `root_pattern_guaranteed` - Similar to `parent_pattern_guaranteed`, but
90
+ * for the entire top-level pattern. When iterating through a query's
91
+ * captures using `ts_query_cursor_next_capture`, this field is used to
92
+ * detect that a capture can safely be returned from a match that has not
93
+ * even completed yet.
94
+ */
95
+ typedef struct {
96
+ TSSymbol symbol;
97
+ TSSymbol supertype_symbol;
98
+ TSFieldId field;
99
+ uint16_t capture_ids[MAX_STEP_CAPTURE_COUNT];
100
+ uint16_t depth;
101
+ uint16_t alternative_index;
102
+ uint16_t negated_field_list_id;
103
+ bool is_named: 1;
104
+ bool is_immediate: 1;
105
+ bool is_last_child: 1;
106
+ bool is_pass_through: 1;
107
+ bool is_dead_end: 1;
108
+ bool alternative_is_immediate: 1;
109
+ bool contains_captures: 1;
110
+ bool root_pattern_guaranteed: 1;
111
+ bool parent_pattern_guaranteed: 1;
112
+ bool is_missing: 1;
113
+ } QueryStep;
114
+
115
+ /*
116
+ * Slice - A slice of an external array. Within a query, capture names,
117
+ * literal string values, and predicate step information are stored in three
118
+ * contiguous arrays. Individual captures, string values, and predicates are
119
+ * represented as slices of these three arrays.
120
+ */
121
+ typedef struct {
122
+ uint32_t offset;
123
+ uint32_t length;
124
+ } Slice;
125
+
126
+ /*
127
+ * SymbolTable - a two-way mapping of strings to ids.
128
+ */
129
+ typedef struct {
130
+ Array(char) characters;
131
+ Array(Slice) slices;
132
+ } SymbolTable;
133
+
134
+ /**
135
+ * CaptureQuantifiers - a data structure holding the quantifiers of pattern captures.
136
+ */
137
+ typedef Array(uint8_t) CaptureQuantifiers;
138
+
139
+ /*
140
+ * PatternEntry - Information about the starting point for matching a particular
141
+ * pattern. These entries are stored in a 'pattern map' - a sorted array that
142
+ * makes it possible to efficiently lookup patterns based on the symbol for their
143
+ * first step. The entry consists of the following fields:
144
+ * - `pattern_index` - the index of the pattern within the query
145
+ * - `step_index` - the index of the pattern's first step in the shared `steps` array
146
+ * - `is_rooted` - whether or not the pattern has a single root node. This property
147
+ * affects decisions about whether or not to start the pattern for nodes outside
148
+ * of a QueryCursor's range restriction.
149
+ */
150
+ typedef struct {
151
+ uint16_t step_index;
152
+ uint16_t pattern_index;
153
+ bool is_rooted;
154
+ } PatternEntry;
155
+
156
+ typedef struct {
157
+ Slice steps;
158
+ Slice predicate_steps;
159
+ uint32_t start_byte;
160
+ uint32_t end_byte;
161
+ bool is_non_local;
162
+ } QueryPattern;
163
+
164
+ typedef struct {
165
+ uint32_t byte_offset;
166
+ uint16_t step_index;
167
+ } StepOffset;
168
+
169
+ /*
170
+ * QueryState - The state of an in-progress match of a particular pattern
171
+ * in a query. While executing, a `TSQueryCursor` must keep track of a number
172
+ * of possible in-progress matches. Each of those possible matches is
173
+ * represented as one of these states. Fields:
174
+ * - `id` - A numeric id that is exposed to the public API. This allows the
175
+ * caller to remove a given match, preventing any more of its captures
176
+ * from being returned.
177
+ * - `start_depth` - The depth in the tree where the first step of the state's
178
+ * pattern was matched.
179
+ * - `pattern_index` - The pattern that the state is matching.
180
+ * - `consumed_capture_count` - The number of captures from this match that
181
+ * have already been returned.
182
+ * - `capture_list_id` - A numeric id that can be used to retrieve the state's
183
+ * list of captures from the `CaptureListPool`.
184
+ * - `seeking_immediate_match` - A flag that indicates that the state's next
185
+ * step must be matched by the very next sibling. This is used when
186
+ * processing repetitions, or when processing a wildcard node followed by
187
+ * an anchor.
188
+ * - `has_in_progress_alternatives` - A flag that indicates that there is are
189
+ * other states that have the same captures as this state, but are at
190
+ * different steps in their pattern. This means that in order to obey the
191
+ * 'longest-match' rule, this state should not be returned as a match until
192
+ * it is clear that there can be no other alternative match with more captures.
193
+ */
194
+ typedef struct {
195
+ uint32_t id;
196
+ uint32_t capture_list_id;
197
+ uint16_t start_depth;
198
+ uint16_t step_index;
199
+ uint16_t pattern_index;
200
+ uint16_t consumed_capture_count: 12;
201
+ bool seeking_immediate_match: 1;
202
+ bool has_in_progress_alternatives: 1;
203
+ bool dead: 1;
204
+ bool needs_parent: 1;
205
+ } QueryState;
206
+
207
+ typedef Array(TSQueryCapture) CaptureList;
208
+
209
+ /*
210
+ * CaptureListPool - A collection of *lists* of captures. Each query state needs
211
+ * to maintain its own list of captures. To avoid repeated allocations, this struct
212
+ * maintains a fixed set of capture lists, and keeps track of which ones are
213
+ * currently in use by a query state.
214
+ */
215
+ typedef struct {
216
+ Array(CaptureList) list;
217
+ CaptureList empty_list;
218
+ // The maximum number of capture lists that we are allowed to allocate. We
219
+ // never allow `list` to allocate more entries than this, dropping pending
220
+ // matches if needed to stay under the limit.
221
+ uint32_t max_capture_list_count;
222
+ // The number of capture lists allocated in `list` that are not currently in
223
+ // use. We reuse those existing-but-unused capture lists before trying to
224
+ // allocate any new ones. We use an invalid value (UINT32_MAX) for a capture
225
+ // list's length to indicate that it's not in use.
226
+ uint32_t free_capture_list_count;
227
+ } CaptureListPool;
228
+
229
+ /*
230
+ * AnalysisState - The state needed for walking the parse table when analyzing
231
+ * a query pattern, to determine at which steps the pattern might fail to match.
232
+ */
233
+ typedef struct {
234
+ TSStateId parse_state;
235
+ TSSymbol parent_symbol;
236
+ uint16_t child_index;
237
+ TSFieldId field_id: 15;
238
+ bool done: 1;
239
+ } AnalysisStateEntry;
240
+
241
+ typedef struct {
242
+ AnalysisStateEntry stack[MAX_ANALYSIS_STATE_DEPTH];
243
+ uint16_t depth;
244
+ uint16_t step_index;
245
+ TSSymbol root_symbol;
246
+ } AnalysisState;
247
+
248
+ typedef Array(AnalysisState *) AnalysisStateSet;
249
+
250
+ typedef struct {
251
+ AnalysisStateSet states;
252
+ AnalysisStateSet next_states;
253
+ AnalysisStateSet deeper_states;
254
+ AnalysisStateSet state_pool;
255
+ Array(uint16_t) final_step_indices;
256
+ Array(TSSymbol) finished_parent_symbols;
257
+ bool did_abort;
258
+ } QueryAnalysis;
259
+
260
+ /*
261
+ * AnalysisSubgraph - A subset of the states in the parse table that are used
262
+ * in constructing nodes with a certain symbol. Each state is accompanied by
263
+ * some information about the possible node that could be produced in
264
+ * downstream states.
265
+ */
266
+ typedef struct {
267
+ TSStateId state;
268
+ uint16_t production_id;
269
+ uint8_t child_index: 7;
270
+ bool done: 1;
271
+ } AnalysisSubgraphNode;
272
+
273
+ typedef struct {
274
+ TSSymbol symbol;
275
+ Array(TSStateId) start_states;
276
+ Array(AnalysisSubgraphNode) nodes;
277
+ } AnalysisSubgraph;
278
+
279
+ typedef Array(AnalysisSubgraph) AnalysisSubgraphArray;
280
+
281
+ /*
282
+ * StatePredecessorMap - A map that stores the predecessors of each parse state.
283
+ * This is used during query analysis to determine which parse states can lead
284
+ * to which reduce actions.
285
+ */
286
+ typedef struct {
287
+ TSStateId *contents;
288
+ } StatePredecessorMap;
289
+
290
+ /*
291
+ * TSQuery - A tree query, compiled from a string of S-expressions. The query
292
+ * itself is immutable. The mutable state used in the process of executing the
293
+ * query is stored in a `TSQueryCursor`.
294
+ */
295
+ struct TSQuery {
296
+ SymbolTable captures;
297
+ SymbolTable predicate_values;
298
+ Array(CaptureQuantifiers) capture_quantifiers;
299
+ Array(QueryStep) steps;
300
+ Array(PatternEntry) pattern_map;
301
+ Array(TSQueryPredicateStep) predicate_steps;
302
+ Array(QueryPattern) patterns;
303
+ Array(StepOffset) step_offsets;
304
+ Array(TSFieldId) negated_fields;
305
+ Array(char) string_buffer;
306
+ Array(TSSymbol) repeat_symbols_with_rootless_patterns;
307
+ const TSLanguage *language;
308
+ uint16_t wildcard_root_pattern_count;
309
+ };
310
+
311
+ /*
312
+ * TSQueryCursor - A stateful struct used to execute a query on a tree.
313
+ */
314
+ struct TSQueryCursor {
315
+ const TSQuery *query;
316
+ TSTreeCursor cursor;
317
+ Array(QueryState) states;
318
+ Array(QueryState) finished_states;
319
+ CaptureListPool capture_list_pool;
320
+ uint32_t depth;
321
+ uint32_t max_start_depth;
322
+ uint32_t start_byte;
323
+ uint32_t end_byte;
324
+ TSPoint start_point;
325
+ TSPoint end_point;
326
+ uint32_t next_state_id;
327
+ TSClock end_clock;
328
+ TSDuration timeout_duration;
329
+ const TSQueryCursorOptions *query_options;
330
+ TSQueryCursorState query_state;
331
+ unsigned operation_count;
332
+ bool on_visible_node;
333
+ bool ascending;
334
+ bool halted;
335
+ bool did_exceed_match_limit;
336
+ };
337
+
338
+ static const TSQueryError PARENT_DONE = -1;
339
+ static const uint16_t PATTERN_DONE_MARKER = UINT16_MAX;
340
+ static const uint16_t NONE = UINT16_MAX;
341
+ static const TSSymbol WILDCARD_SYMBOL = 0;
342
+ static const unsigned OP_COUNT_PER_QUERY_TIMEOUT_CHECK = 100;
343
+
344
+ /**********
345
+ * Stream
346
+ **********/
347
+
348
+ // Advance to the next unicode code point in the stream.
349
+ static bool stream_advance(Stream *self) {
350
+ self->input += self->next_size;
351
+ if (self->input < self->end) {
352
+ uint32_t size = ts_decode_utf8(
353
+ (const uint8_t *)self->input,
354
+ (uint32_t)(self->end - self->input),
355
+ &self->next
356
+ );
357
+ if (size > 0) {
358
+ self->next_size = size;
359
+ return true;
360
+ }
361
+ } else {
362
+ self->next_size = 0;
363
+ self->next = '\0';
364
+ }
365
+ return false;
366
+ }
367
+
368
+ // Reset the stream to the given input position, represented as a pointer
369
+ // into the input string.
370
+ static void stream_reset(Stream *self, const char *input) {
371
+ self->input = input;
372
+ self->next_size = 0;
373
+ stream_advance(self);
374
+ }
375
+
376
+ static Stream stream_new(const char *string, uint32_t length) {
377
+ Stream self = {
378
+ .next = 0,
379
+ .input = string,
380
+ .start = string,
381
+ .end = string + length,
382
+ };
383
+ stream_advance(&self);
384
+ return self;
385
+ }
386
+
387
+ static void stream_skip_whitespace(Stream *self) {
388
+ for (;;) {
389
+ if (iswspace(self->next)) {
390
+ stream_advance(self);
391
+ } else if (self->next == ';') {
392
+ // skip over comments
393
+ stream_advance(self);
394
+ while (self->next && self->next != '\n') {
395
+ if (!stream_advance(self)) break;
396
+ }
397
+ } else {
398
+ break;
399
+ }
400
+ }
401
+ }
402
+
403
+ static bool stream_is_ident_start(Stream *self) {
404
+ return iswalnum(self->next) || self->next == '_' || self->next == '-';
405
+ }
406
+
407
+ static void stream_scan_identifier(Stream *stream) {
408
+ do {
409
+ stream_advance(stream);
410
+ } while (
411
+ iswalnum(stream->next) ||
412
+ stream->next == '_' ||
413
+ stream->next == '-' ||
414
+ stream->next == '.' ||
415
+ stream->next == '?' ||
416
+ stream->next == '!'
417
+ );
418
+ }
419
+
420
+ static uint32_t stream_offset(Stream *self) {
421
+ return (uint32_t)(self->input - self->start);
422
+ }
423
+
424
+ /******************
425
+ * CaptureListPool
426
+ ******************/
427
+
428
+ static CaptureListPool capture_list_pool_new(void) {
429
+ return (CaptureListPool) {
430
+ .list = array_new(),
431
+ .empty_list = array_new(),
432
+ .max_capture_list_count = UINT32_MAX,
433
+ .free_capture_list_count = 0,
434
+ };
435
+ }
436
+
437
+ static void capture_list_pool_reset(CaptureListPool *self) {
438
+ for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
439
+ // This invalid size means that the list is not in use.
440
+ array_get(&self->list, i)->size = UINT32_MAX;
441
+ }
442
+ self->free_capture_list_count = self->list.size;
443
+ }
444
+
445
+ static void capture_list_pool_delete(CaptureListPool *self) {
446
+ for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
447
+ array_delete(array_get(&self->list, i));
448
+ }
449
+ array_delete(&self->list);
450
+ }
451
+
452
+ static const CaptureList *capture_list_pool_get(const CaptureListPool *self, uint16_t id) {
453
+ if (id >= self->list.size) return &self->empty_list;
454
+ return array_get(&self->list, id);
455
+ }
456
+
457
+ static CaptureList *capture_list_pool_get_mut(CaptureListPool *self, uint16_t id) {
458
+ ts_assert(id < self->list.size);
459
+ return array_get(&self->list, id);
460
+ }
461
+
462
+ static bool capture_list_pool_is_empty(const CaptureListPool *self) {
463
+ // The capture list pool is empty if all allocated lists are in use, and we
464
+ // have reached the maximum allowed number of allocated lists.
465
+ return self->free_capture_list_count == 0 && self->list.size >= self->max_capture_list_count;
466
+ }
467
+
468
+ static uint16_t capture_list_pool_acquire(CaptureListPool *self) {
469
+ // First see if any already allocated capture list is currently unused.
470
+ if (self->free_capture_list_count > 0) {
471
+ for (uint16_t i = 0; i < (uint16_t)self->list.size; i++) {
472
+ if (array_get(&self->list, i)->size == UINT32_MAX) {
473
+ array_clear(array_get(&self->list, i));
474
+ self->free_capture_list_count--;
475
+ return i;
476
+ }
477
+ }
478
+ }
479
+
480
+ // Otherwise allocate and initialize a new capture list, as long as that
481
+ // doesn't put us over the requested maximum.
482
+ uint32_t i = self->list.size;
483
+ if (i >= self->max_capture_list_count) {
484
+ return NONE;
485
+ }
486
+ CaptureList list;
487
+ array_init(&list);
488
+ array_push(&self->list, list);
489
+ return i;
490
+ }
491
+
492
+ static void capture_list_pool_release(CaptureListPool *self, uint16_t id) {
493
+ if (id >= self->list.size) return;
494
+ array_get(&self->list, id)->size = UINT32_MAX;
495
+ self->free_capture_list_count++;
496
+ }
497
+
498
+ /**************
499
+ * Quantifiers
500
+ **************/
501
+
502
+ static TSQuantifier quantifier_mul(
503
+ TSQuantifier left,
504
+ TSQuantifier right
505
+ ) {
506
+ switch (left)
507
+ {
508
+ case TSQuantifierZero:
509
+ return TSQuantifierZero;
510
+ case TSQuantifierZeroOrOne:
511
+ switch (right) {
512
+ case TSQuantifierZero:
513
+ return TSQuantifierZero;
514
+ case TSQuantifierZeroOrOne:
515
+ case TSQuantifierOne:
516
+ return TSQuantifierZeroOrOne;
517
+ case TSQuantifierZeroOrMore:
518
+ case TSQuantifierOneOrMore:
519
+ return TSQuantifierZeroOrMore;
520
+ };
521
+ break;
522
+ case TSQuantifierZeroOrMore:
523
+ switch (right) {
524
+ case TSQuantifierZero:
525
+ return TSQuantifierZero;
526
+ case TSQuantifierZeroOrOne:
527
+ case TSQuantifierZeroOrMore:
528
+ case TSQuantifierOne:
529
+ case TSQuantifierOneOrMore:
530
+ return TSQuantifierZeroOrMore;
531
+ };
532
+ break;
533
+ case TSQuantifierOne:
534
+ return right;
535
+ case TSQuantifierOneOrMore:
536
+ switch (right) {
537
+ case TSQuantifierZero:
538
+ return TSQuantifierZero;
539
+ case TSQuantifierZeroOrOne:
540
+ case TSQuantifierZeroOrMore:
541
+ return TSQuantifierZeroOrMore;
542
+ case TSQuantifierOne:
543
+ case TSQuantifierOneOrMore:
544
+ return TSQuantifierOneOrMore;
545
+ };
546
+ break;
547
+ }
548
+ return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
549
+ }
550
+
551
+ static TSQuantifier quantifier_join(
552
+ TSQuantifier left,
553
+ TSQuantifier right
554
+ ) {
555
+ switch (left)
556
+ {
557
+ case TSQuantifierZero:
558
+ switch (right) {
559
+ case TSQuantifierZero:
560
+ return TSQuantifierZero;
561
+ case TSQuantifierZeroOrOne:
562
+ case TSQuantifierOne:
563
+ return TSQuantifierZeroOrOne;
564
+ case TSQuantifierZeroOrMore:
565
+ case TSQuantifierOneOrMore:
566
+ return TSQuantifierZeroOrMore;
567
+ };
568
+ break;
569
+ case TSQuantifierZeroOrOne:
570
+ switch (right) {
571
+ case TSQuantifierZero:
572
+ case TSQuantifierZeroOrOne:
573
+ case TSQuantifierOne:
574
+ return TSQuantifierZeroOrOne;
575
+ break;
576
+ case TSQuantifierZeroOrMore:
577
+ case TSQuantifierOneOrMore:
578
+ return TSQuantifierZeroOrMore;
579
+ break;
580
+ };
581
+ break;
582
+ case TSQuantifierZeroOrMore:
583
+ return TSQuantifierZeroOrMore;
584
+ case TSQuantifierOne:
585
+ switch (right) {
586
+ case TSQuantifierZero:
587
+ case TSQuantifierZeroOrOne:
588
+ return TSQuantifierZeroOrOne;
589
+ case TSQuantifierZeroOrMore:
590
+ return TSQuantifierZeroOrMore;
591
+ case TSQuantifierOne:
592
+ return TSQuantifierOne;
593
+ case TSQuantifierOneOrMore:
594
+ return TSQuantifierOneOrMore;
595
+ };
596
+ break;
597
+ case TSQuantifierOneOrMore:
598
+ switch (right) {
599
+ case TSQuantifierZero:
600
+ case TSQuantifierZeroOrOne:
601
+ case TSQuantifierZeroOrMore:
602
+ return TSQuantifierZeroOrMore;
603
+ case TSQuantifierOne:
604
+ case TSQuantifierOneOrMore:
605
+ return TSQuantifierOneOrMore;
606
+ };
607
+ break;
608
+ }
609
+ return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
610
+ }
611
+
612
+ static TSQuantifier quantifier_add(
613
+ TSQuantifier left,
614
+ TSQuantifier right
615
+ ) {
616
+ switch (left)
617
+ {
618
+ case TSQuantifierZero:
619
+ return right;
620
+ case TSQuantifierZeroOrOne:
621
+ switch (right) {
622
+ case TSQuantifierZero:
623
+ return TSQuantifierZeroOrOne;
624
+ case TSQuantifierZeroOrOne:
625
+ case TSQuantifierZeroOrMore:
626
+ return TSQuantifierZeroOrMore;
627
+ case TSQuantifierOne:
628
+ case TSQuantifierOneOrMore:
629
+ return TSQuantifierOneOrMore;
630
+ };
631
+ break;
632
+ case TSQuantifierZeroOrMore:
633
+ switch (right) {
634
+ case TSQuantifierZero:
635
+ return TSQuantifierZeroOrMore;
636
+ case TSQuantifierZeroOrOne:
637
+ case TSQuantifierZeroOrMore:
638
+ return TSQuantifierZeroOrMore;
639
+ case TSQuantifierOne:
640
+ case TSQuantifierOneOrMore:
641
+ return TSQuantifierOneOrMore;
642
+ };
643
+ break;
644
+ case TSQuantifierOne:
645
+ switch (right) {
646
+ case TSQuantifierZero:
647
+ return TSQuantifierOne;
648
+ case TSQuantifierZeroOrOne:
649
+ case TSQuantifierZeroOrMore:
650
+ case TSQuantifierOne:
651
+ case TSQuantifierOneOrMore:
652
+ return TSQuantifierOneOrMore;
653
+ };
654
+ break;
655
+ case TSQuantifierOneOrMore:
656
+ return TSQuantifierOneOrMore;
657
+ }
658
+ return TSQuantifierZero; // to make compiler happy, but all cases should be covered above!
659
+ }
660
+
661
+ // Create new capture quantifiers structure
662
+ static CaptureQuantifiers capture_quantifiers_new(void) {
663
+ return (CaptureQuantifiers) array_new();
664
+ }
665
+
666
+ // Delete capture quantifiers structure
667
+ static void capture_quantifiers_delete(
668
+ CaptureQuantifiers *self
669
+ ) {
670
+ array_delete(self);
671
+ }
672
+
673
+ // Clear capture quantifiers structure
674
+ static void capture_quantifiers_clear(
675
+ CaptureQuantifiers *self
676
+ ) {
677
+ array_clear(self);
678
+ }
679
+
680
+ // Replace capture quantifiers with the given quantifiers
681
+ static void capture_quantifiers_replace(
682
+ CaptureQuantifiers *self,
683
+ CaptureQuantifiers *quantifiers
684
+ ) {
685
+ array_clear(self);
686
+ array_push_all(self, quantifiers);
687
+ }
688
+
689
+ // Return capture quantifier for the given capture id
690
+ static TSQuantifier capture_quantifier_for_id(
691
+ const CaptureQuantifiers *self,
692
+ uint16_t id
693
+ ) {
694
+ return (self->size <= id) ? TSQuantifierZero : (TSQuantifier) *array_get(self, id);
695
+ }
696
+
697
+ // Add the given quantifier to the current value for id
698
+ static void capture_quantifiers_add_for_id(
699
+ CaptureQuantifiers *self,
700
+ uint16_t id,
701
+ TSQuantifier quantifier
702
+ ) {
703
+ if (self->size <= id) {
704
+ array_grow_by(self, id + 1 - self->size);
705
+ }
706
+ uint8_t *own_quantifier = array_get(self, id);
707
+ *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, quantifier);
708
+ }
709
+
710
+ // Point-wise add the given quantifiers to the current values
711
+ static void capture_quantifiers_add_all(
712
+ CaptureQuantifiers *self,
713
+ CaptureQuantifiers *quantifiers
714
+ ) {
715
+ if (self->size < quantifiers->size) {
716
+ array_grow_by(self, quantifiers->size - self->size);
717
+ }
718
+ for (uint16_t id = 0; id < (uint16_t)quantifiers->size; id++) {
719
+ uint8_t *quantifier = array_get(quantifiers, id);
720
+ uint8_t *own_quantifier = array_get(self, id);
721
+ *own_quantifier = (uint8_t) quantifier_add((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
722
+ }
723
+ }
724
+
725
+ // Join the given quantifier with the current values
726
+ static void capture_quantifiers_mul(
727
+ CaptureQuantifiers *self,
728
+ TSQuantifier quantifier
729
+ ) {
730
+ for (uint16_t id = 0; id < (uint16_t)self->size; id++) {
731
+ uint8_t *own_quantifier = array_get(self, id);
732
+ *own_quantifier = (uint8_t) quantifier_mul((TSQuantifier) *own_quantifier, quantifier);
733
+ }
734
+ }
735
+
736
+ // Point-wise join the quantifiers from a list of alternatives with the current values
737
+ static void capture_quantifiers_join_all(
738
+ CaptureQuantifiers *self,
739
+ CaptureQuantifiers *quantifiers
740
+ ) {
741
+ if (self->size < quantifiers->size) {
742
+ array_grow_by(self, quantifiers->size - self->size);
743
+ }
744
+ for (uint32_t id = 0; id < quantifiers->size; id++) {
745
+ uint8_t *quantifier = array_get(quantifiers, id);
746
+ uint8_t *own_quantifier = array_get(self, id);
747
+ *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, (TSQuantifier) *quantifier);
748
+ }
749
+ for (uint32_t id = quantifiers->size; id < self->size; id++) {
750
+ uint8_t *own_quantifier = array_get(self, id);
751
+ *own_quantifier = (uint8_t) quantifier_join((TSQuantifier) *own_quantifier, TSQuantifierZero);
752
+ }
753
+ }
754
+
755
+ /**************
756
+ * SymbolTable
757
+ **************/
758
+
759
+ static SymbolTable symbol_table_new(void) {
760
+ return (SymbolTable) {
761
+ .characters = array_new(),
762
+ .slices = array_new(),
763
+ };
764
+ }
765
+
766
+ static void symbol_table_delete(SymbolTable *self) {
767
+ array_delete(&self->characters);
768
+ array_delete(&self->slices);
769
+ }
770
+
771
+ static int symbol_table_id_for_name(
772
+ const SymbolTable *self,
773
+ const char *name,
774
+ uint32_t length
775
+ ) {
776
+ for (unsigned i = 0; i < self->slices.size; i++) {
777
+ Slice slice = *array_get(&self->slices, i);
778
+ if (
779
+ slice.length == length &&
780
+ !strncmp(array_get(&self->characters, slice.offset), name, length)
781
+ ) return i;
782
+ }
783
+ return -1;
784
+ }
785
+
786
+ static const char *symbol_table_name_for_id(
787
+ const SymbolTable *self,
788
+ uint16_t id,
789
+ uint32_t *length
790
+ ) {
791
+ Slice slice = *(array_get(&self->slices,id));
792
+ *length = slice.length;
793
+ return array_get(&self->characters, slice.offset);
794
+ }
795
+
796
+ static uint16_t symbol_table_insert_name(
797
+ SymbolTable *self,
798
+ const char *name,
799
+ uint32_t length
800
+ ) {
801
+ int id = symbol_table_id_for_name(self, name, length);
802
+ if (id >= 0) return (uint16_t)id;
803
+ Slice slice = {
804
+ .offset = self->characters.size,
805
+ .length = length,
806
+ };
807
+ array_grow_by(&self->characters, length + 1);
808
+ memcpy(array_get(&self->characters, slice.offset), name, length);
809
+ *array_get(&self->characters, self->characters.size - 1) = 0;
810
+ array_push(&self->slices, slice);
811
+ return self->slices.size - 1;
812
+ }
813
+
814
+ /************
815
+ * QueryStep
816
+ ************/
817
+
818
+ static QueryStep query_step__new(
819
+ TSSymbol symbol,
820
+ uint16_t depth,
821
+ bool is_immediate
822
+ ) {
823
+ QueryStep step = {
824
+ .symbol = symbol,
825
+ .depth = depth,
826
+ .field = 0,
827
+ .alternative_index = NONE,
828
+ .negated_field_list_id = 0,
829
+ .contains_captures = false,
830
+ .is_last_child = false,
831
+ .is_named = false,
832
+ .is_pass_through = false,
833
+ .is_dead_end = false,
834
+ .root_pattern_guaranteed = false,
835
+ .is_immediate = is_immediate,
836
+ .alternative_is_immediate = false,
837
+ };
838
+ for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
839
+ step.capture_ids[i] = NONE;
840
+ }
841
+ return step;
842
+ }
843
+
844
+ static void query_step__add_capture(QueryStep *self, uint16_t capture_id) {
845
+ for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
846
+ if (self->capture_ids[i] == NONE) {
847
+ self->capture_ids[i] = capture_id;
848
+ break;
849
+ }
850
+ }
851
+ }
852
+
853
+ static void query_step__remove_capture(QueryStep *self, uint16_t capture_id) {
854
+ for (unsigned i = 0; i < MAX_STEP_CAPTURE_COUNT; i++) {
855
+ if (self->capture_ids[i] == capture_id) {
856
+ self->capture_ids[i] = NONE;
857
+ while (i + 1 < MAX_STEP_CAPTURE_COUNT) {
858
+ if (self->capture_ids[i + 1] == NONE) break;
859
+ self->capture_ids[i] = self->capture_ids[i + 1];
860
+ self->capture_ids[i + 1] = NONE;
861
+ i++;
862
+ }
863
+ break;
864
+ }
865
+ }
866
+ }
867
+
868
+ /**********************
869
+ * StatePredecessorMap
870
+ **********************/
871
+
872
+ static inline StatePredecessorMap state_predecessor_map_new(
873
+ const TSLanguage *language
874
+ ) {
875
+ return (StatePredecessorMap) {
876
+ .contents = ts_calloc(
877
+ (size_t)language->state_count * (MAX_STATE_PREDECESSOR_COUNT + 1),
878
+ sizeof(TSStateId)
879
+ ),
880
+ };
881
+ }
882
+
883
+ static inline void state_predecessor_map_delete(StatePredecessorMap *self) {
884
+ ts_free(self->contents);
885
+ }
886
+
887
+ static inline void state_predecessor_map_add(
888
+ StatePredecessorMap *self,
889
+ TSStateId state,
890
+ TSStateId predecessor
891
+ ) {
892
+ size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
893
+ TSStateId *count = &self->contents[index];
894
+ if (
895
+ *count == 0 ||
896
+ (*count < MAX_STATE_PREDECESSOR_COUNT && self->contents[index + *count] != predecessor)
897
+ ) {
898
+ (*count)++;
899
+ self->contents[index + *count] = predecessor;
900
+ }
901
+ }
902
+
903
+ static inline const TSStateId *state_predecessor_map_get(
904
+ const StatePredecessorMap *self,
905
+ TSStateId state,
906
+ unsigned *count
907
+ ) {
908
+ size_t index = (size_t)state * (MAX_STATE_PREDECESSOR_COUNT + 1);
909
+ *count = self->contents[index];
910
+ return &self->contents[index + 1];
911
+ }
912
+
913
+ /****************
914
+ * AnalysisState
915
+ ****************/
916
+
917
+ static unsigned analysis_state__recursion_depth(const AnalysisState *self) {
918
+ unsigned result = 0;
919
+ for (unsigned i = 0; i < self->depth; i++) {
920
+ TSSymbol symbol = self->stack[i].parent_symbol;
921
+ for (unsigned j = 0; j < i; j++) {
922
+ if (self->stack[j].parent_symbol == symbol) {
923
+ result++;
924
+ break;
925
+ }
926
+ }
927
+ }
928
+ return result;
929
+ }
930
+
931
+ static inline int analysis_state__compare(
932
+ AnalysisState *const *self,
933
+ AnalysisState *const *other
934
+ ) {
935
+ if ((*self)->depth < (*other)->depth) return 1;
936
+ for (unsigned i = 0; i < (*self)->depth; i++) {
937
+ if (i >= (*other)->depth) return -1;
938
+ AnalysisStateEntry s1 = (*self)->stack[i];
939
+ AnalysisStateEntry s2 = (*other)->stack[i];
940
+ if (s1.child_index < s2.child_index) return -1;
941
+ if (s1.child_index > s2.child_index) return 1;
942
+ if (s1.parent_symbol < s2.parent_symbol) return -1;
943
+ if (s1.parent_symbol > s2.parent_symbol) return 1;
944
+ if (s1.parse_state < s2.parse_state) return -1;
945
+ if (s1.parse_state > s2.parse_state) return 1;
946
+ if (s1.field_id < s2.field_id) return -1;
947
+ if (s1.field_id > s2.field_id) return 1;
948
+ }
949
+ if ((*self)->step_index < (*other)->step_index) return -1;
950
+ if ((*self)->step_index > (*other)->step_index) return 1;
951
+ return 0;
952
+ }
953
+
954
+ static inline AnalysisStateEntry *analysis_state__top(AnalysisState *self) {
955
+ if (self->depth == 0) {
956
+ return &self->stack[0];
957
+ }
958
+ return &self->stack[self->depth - 1];
959
+ }
960
+
961
+ static inline bool analysis_state__has_supertype(AnalysisState *self, TSSymbol symbol) {
962
+ for (unsigned i = 0; i < self->depth; i++) {
963
+ if (self->stack[i].parent_symbol == symbol) return true;
964
+ }
965
+ return false;
966
+ }
967
+
968
+ /******************
969
+ * AnalysisStateSet
970
+ ******************/
971
+
972
+ // Obtains an `AnalysisState` instance, either by consuming one from this set's object pool, or by
973
+ // cloning one from scratch.
974
+ static inline AnalysisState *analysis_state_pool__clone_or_reuse(
975
+ AnalysisStateSet *self,
976
+ AnalysisState *borrowed_item
977
+ ) {
978
+ AnalysisState *new_item;
979
+ if (self->size) {
980
+ new_item = array_pop(self);
981
+ } else {
982
+ new_item = ts_malloc(sizeof(AnalysisState));
983
+ }
984
+ *new_item = *borrowed_item;
985
+ return new_item;
986
+ }
987
+
988
+ // Inserts a clone of the passed-in item at the appropriate position to maintain ordering in this
989
+ // set. The set does not contain duplicates, so if the item is already present, it will not be
990
+ // inserted, and no clone will be made.
991
+ //
992
+ // The caller retains ownership of the passed-in memory. However, the clone that is created by this
993
+ // function will be managed by the state set.
994
+ static inline void analysis_state_set__insert_sorted(
995
+ AnalysisStateSet *self,
996
+ AnalysisStateSet *pool,
997
+ AnalysisState *borrowed_item
998
+ ) {
999
+ unsigned index, exists;
1000
+ array_search_sorted_with(self, analysis_state__compare, &borrowed_item, &index, &exists);
1001
+ if (!exists) {
1002
+ AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
1003
+ array_insert(self, index, new_item);
1004
+ }
1005
+ }
1006
+
1007
+ // Inserts a clone of the passed-in item at the end position of this list.
1008
+ //
1009
+ // IMPORTANT: The caller MUST ENSURE that this item is larger (by the comparison function
1010
+ // `analysis_state__compare`) than largest item already in this set. If items are inserted in the
1011
+ // wrong order, the set will not function properly for future use.
1012
+ //
1013
+ // The caller retains ownership of the passed-in memory. However, the clone that is created by this
1014
+ // function will be managed by the state set.
1015
+ static inline void analysis_state_set__push(
1016
+ AnalysisStateSet *self,
1017
+ AnalysisStateSet *pool,
1018
+ AnalysisState *borrowed_item
1019
+ ) {
1020
+ AnalysisState *new_item = analysis_state_pool__clone_or_reuse(pool, borrowed_item);
1021
+ array_push(self, new_item);
1022
+ }
1023
+
1024
+ // Removes all items from this set, returning it to an empty state.
1025
+ static inline void analysis_state_set__clear(AnalysisStateSet *self, AnalysisStateSet *pool) {
1026
+ array_push_all(pool, self);
1027
+ array_clear(self);
1028
+ }
1029
+
1030
+ // Releases all memory that is managed with this state set, including any items currently present.
1031
+ // After calling this function, the set is no longer suitable for use.
1032
+ static inline void analysis_state_set__delete(AnalysisStateSet *self) {
1033
+ for (unsigned i = 0; i < self->size; i++) {
1034
+ ts_free(self->contents[i]);
1035
+ }
1036
+ array_delete(self);
1037
+ }
1038
+
1039
+ /****************
1040
+ * QueryAnalyzer
1041
+ ****************/
1042
+
1043
+ static inline QueryAnalysis query_analysis__new(void) {
1044
+ return (QueryAnalysis) {
1045
+ .states = array_new(),
1046
+ .next_states = array_new(),
1047
+ .deeper_states = array_new(),
1048
+ .state_pool = array_new(),
1049
+ .final_step_indices = array_new(),
1050
+ .finished_parent_symbols = array_new(),
1051
+ .did_abort = false,
1052
+ };
1053
+ }
1054
+
1055
+ static inline void query_analysis__delete(QueryAnalysis *self) {
1056
+ analysis_state_set__delete(&self->states);
1057
+ analysis_state_set__delete(&self->next_states);
1058
+ analysis_state_set__delete(&self->deeper_states);
1059
+ analysis_state_set__delete(&self->state_pool);
1060
+ array_delete(&self->final_step_indices);
1061
+ array_delete(&self->finished_parent_symbols);
1062
+ }
1063
+
1064
+ /***********************
1065
+ * AnalysisSubgraphNode
1066
+ ***********************/
1067
+
1068
+ static inline int analysis_subgraph_node__compare(const AnalysisSubgraphNode *self, const AnalysisSubgraphNode *other) {
1069
+ if (self->state < other->state) return -1;
1070
+ if (self->state > other->state) return 1;
1071
+ if (self->child_index < other->child_index) return -1;
1072
+ if (self->child_index > other->child_index) return 1;
1073
+ if (self->done < other->done) return -1;
1074
+ if (self->done > other->done) return 1;
1075
+ if (self->production_id < other->production_id) return -1;
1076
+ if (self->production_id > other->production_id) return 1;
1077
+ return 0;
1078
+ }
1079
+
1080
+ /*********
1081
+ * Query
1082
+ *********/
1083
+
1084
+ // The `pattern_map` contains a mapping from TSSymbol values to indices in the
1085
+ // `steps` array. For a given syntax node, the `pattern_map` makes it possible
1086
+ // to quickly find the starting steps of all of the patterns whose root matches
1087
+ // that node. Each entry has two fields: a `pattern_index`, which identifies one
1088
+ // of the patterns in the query, and a `step_index`, which indicates the start
1089
+ // offset of that pattern's steps within the `steps` array.
1090
+ //
1091
+ // The entries are sorted by the patterns' root symbols, and lookups use a
1092
+ // binary search. This ensures that the cost of this initial lookup step
1093
+ // scales logarithmically with the number of patterns in the query.
1094
+ //
1095
+ // This returns `true` if the symbol is present and `false` otherwise.
1096
+ // If the symbol is not present `*result` is set to the index where the
1097
+ // symbol should be inserted.
1098
+ static inline bool ts_query__pattern_map_search(
1099
+ const TSQuery *self,
1100
+ TSSymbol needle,
1101
+ uint32_t *result
1102
+ ) {
1103
+ uint32_t base_index = self->wildcard_root_pattern_count;
1104
+ uint32_t size = self->pattern_map.size - base_index;
1105
+ if (size == 0) {
1106
+ *result = base_index;
1107
+ return false;
1108
+ }
1109
+ while (size > 1) {
1110
+ uint32_t half_size = size / 2;
1111
+ uint32_t mid_index = base_index + half_size;
1112
+ TSSymbol mid_symbol = array_get(&self->steps,
1113
+ array_get(&self->pattern_map, mid_index)->step_index
1114
+ )->symbol;
1115
+ if (needle > mid_symbol) base_index = mid_index;
1116
+ size -= half_size;
1117
+ }
1118
+
1119
+ TSSymbol symbol = array_get(&self->steps,
1120
+ array_get(&self->pattern_map, base_index)->step_index
1121
+ )->symbol;
1122
+
1123
+ if (needle > symbol) {
1124
+ base_index++;
1125
+ if (base_index < self->pattern_map.size) {
1126
+ symbol = array_get(&self->steps,
1127
+ array_get(&self->pattern_map, base_index)->step_index
1128
+ )->symbol;
1129
+ }
1130
+ }
1131
+
1132
+ *result = base_index;
1133
+ return needle == symbol;
1134
+ }
1135
+
1136
+ // Insert a new pattern's start index into the pattern map, maintaining
1137
+ // the pattern map's ordering invariant.
1138
+ static inline void ts_query__pattern_map_insert(
1139
+ TSQuery *self,
1140
+ TSSymbol symbol,
1141
+ PatternEntry new_entry
1142
+ ) {
1143
+ uint32_t index;
1144
+ ts_query__pattern_map_search(self, symbol, &index);
1145
+
1146
+ // Ensure that the entries are sorted not only by symbol, but also
1147
+ // by pattern_index. This way, states for earlier patterns will be
1148
+ // initiated first, which allows the ordering of the states array
1149
+ // to be maintained more efficiently.
1150
+ while (index < self->pattern_map.size) {
1151
+ PatternEntry *entry = array_get(&self->pattern_map, index);
1152
+ if (
1153
+ array_get(&self->steps, entry->step_index)->symbol == symbol &&
1154
+ entry->pattern_index < new_entry.pattern_index
1155
+ ) {
1156
+ index++;
1157
+ } else {
1158
+ break;
1159
+ }
1160
+ }
1161
+
1162
+ array_insert(&self->pattern_map, index, new_entry);
1163
+ }
1164
+
1165
+ // Walk the subgraph for this non-terminal, tracking all of the possible
1166
+ // sequences of progress within the pattern.
1167
+ static void ts_query__perform_analysis(
1168
+ TSQuery *self,
1169
+ const AnalysisSubgraphArray *subgraphs,
1170
+ QueryAnalysis *analysis
1171
+ ) {
1172
+ unsigned recursion_depth_limit = 0;
1173
+ unsigned prev_final_step_count = 0;
1174
+ array_clear(&analysis->final_step_indices);
1175
+ array_clear(&analysis->finished_parent_symbols);
1176
+
1177
+ for (unsigned iteration = 0;; iteration++) {
1178
+ if (iteration == MAX_ANALYSIS_ITERATION_COUNT) {
1179
+ analysis->did_abort = true;
1180
+ break;
1181
+ }
1182
+
1183
+ #ifdef DEBUG_ANALYZE_QUERY
1184
+ printf("Iteration: %u. Final step indices:", iteration);
1185
+ for (unsigned j = 0; j < analysis->final_step_indices.size; j++) {
1186
+ printf(" %4u", *array_get(&analysis->final_step_indices, j));
1187
+ }
1188
+ printf("\n");
1189
+ for (unsigned j = 0; j < analysis->states.size; j++) {
1190
+ AnalysisState *state = *array_get(&analysis->states, j);
1191
+ printf(" %3u: step: %u, stack: [", j, state->step_index);
1192
+ for (unsigned k = 0; k < state->depth; k++) {
1193
+ printf(
1194
+ " {%s, child: %u, state: %4u",
1195
+ self->language->symbol_names[state->stack[k].parent_symbol],
1196
+ state->stack[k].child_index,
1197
+ state->stack[k].parse_state
1198
+ );
1199
+ if (state->stack[k].field_id) printf(", field: %s", self->language->field_names[state->stack[k].field_id]);
1200
+ if (state->stack[k].done) printf(", DONE");
1201
+ printf("}");
1202
+ }
1203
+ printf(" ]\n");
1204
+ }
1205
+ #endif
1206
+
1207
+ // If no further progress can be made within the current recursion depth limit, then
1208
+ // bump the depth limit by one, and continue to process the states the exceeded the
1209
+ // limit. But only allow this if progress has been made since the last time the depth
1210
+ // limit was increased.
1211
+ if (analysis->states.size == 0) {
1212
+ if (
1213
+ analysis->deeper_states.size > 0 &&
1214
+ analysis->final_step_indices.size > prev_final_step_count
1215
+ ) {
1216
+ #ifdef DEBUG_ANALYZE_QUERY
1217
+ printf("Increase recursion depth limit to %u\n", recursion_depth_limit + 1);
1218
+ #endif
1219
+
1220
+ prev_final_step_count = analysis->final_step_indices.size;
1221
+ recursion_depth_limit++;
1222
+ AnalysisStateSet _states = analysis->states;
1223
+ analysis->states = analysis->deeper_states;
1224
+ analysis->deeper_states = _states;
1225
+ continue;
1226
+ }
1227
+
1228
+ break;
1229
+ }
1230
+
1231
+ analysis_state_set__clear(&analysis->next_states, &analysis->state_pool);
1232
+ for (unsigned j = 0; j < analysis->states.size; j++) {
1233
+ AnalysisState * const state = *array_get(&analysis->states, j);
1234
+
1235
+ // For efficiency, it's important to avoid processing the same analysis state more
1236
+ // than once. To achieve this, keep the states in order of ascending position within
1237
+ // their hypothetical syntax trees. In each iteration of this loop, start by advancing
1238
+ // the states that have made the least progress. Avoid advancing states that have already
1239
+ // made more progress.
1240
+ if (analysis->next_states.size > 0) {
1241
+ int comparison = analysis_state__compare(
1242
+ &state,
1243
+ array_back(&analysis->next_states)
1244
+ );
1245
+ if (comparison == 0) {
1246
+ analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, state);
1247
+ continue;
1248
+ } else if (comparison > 0) {
1249
+ #ifdef DEBUG_ANALYZE_QUERY
1250
+ printf("Terminate iteration at state %u\n", j);
1251
+ #endif
1252
+ while (j < analysis->states.size) {
1253
+ analysis_state_set__push(
1254
+ &analysis->next_states,
1255
+ &analysis->state_pool,
1256
+ *array_get(&analysis->states, j)
1257
+ );
1258
+ j++;
1259
+ }
1260
+ break;
1261
+ }
1262
+ }
1263
+
1264
+ const TSStateId parse_state = analysis_state__top(state)->parse_state;
1265
+ const TSSymbol parent_symbol = analysis_state__top(state)->parent_symbol;
1266
+ const TSFieldId parent_field_id = analysis_state__top(state)->field_id;
1267
+ const unsigned child_index = analysis_state__top(state)->child_index;
1268
+ const QueryStep * const step = array_get(&self->steps, state->step_index);
1269
+
1270
+ unsigned subgraph_index, exists;
1271
+ array_search_sorted_by(subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
1272
+ if (!exists) continue;
1273
+ const AnalysisSubgraph *subgraph = array_get(subgraphs, subgraph_index);
1274
+
1275
+ // Follow every possible path in the parse table, but only visit states that
1276
+ // are part of the subgraph for the current symbol.
1277
+ LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, parse_state);
1278
+ while (ts_lookahead_iterator__next(&lookahead_iterator)) {
1279
+ TSSymbol sym = lookahead_iterator.symbol;
1280
+
1281
+ AnalysisSubgraphNode successor = {
1282
+ .state = parse_state,
1283
+ .child_index = child_index,
1284
+ };
1285
+ if (lookahead_iterator.action_count) {
1286
+ const TSParseAction *action = &lookahead_iterator.actions[lookahead_iterator.action_count - 1];
1287
+ if (action->type == TSParseActionTypeShift) {
1288
+ if (!action->shift.extra) {
1289
+ successor.state = action->shift.state;
1290
+ successor.child_index++;
1291
+ }
1292
+ } else {
1293
+ continue;
1294
+ }
1295
+ } else if (lookahead_iterator.next_state != 0) {
1296
+ successor.state = lookahead_iterator.next_state;
1297
+ successor.child_index++;
1298
+ } else {
1299
+ continue;
1300
+ }
1301
+
1302
+ unsigned node_index;
1303
+ array_search_sorted_with(
1304
+ &subgraph->nodes,
1305
+ analysis_subgraph_node__compare, &successor,
1306
+ &node_index, &exists
1307
+ );
1308
+ while (node_index < subgraph->nodes.size) {
1309
+ AnalysisSubgraphNode *node = array_get(&subgraph->nodes, node_index);
1310
+ node_index++;
1311
+ if (node->state != successor.state || node->child_index != successor.child_index) break;
1312
+
1313
+ // Use the subgraph to determine what alias and field will eventually be applied
1314
+ // to this child node.
1315
+ TSSymbol alias = ts_language_alias_at(self->language, node->production_id, child_index);
1316
+ TSSymbol visible_symbol = alias
1317
+ ? alias
1318
+ : self->language->symbol_metadata[sym].visible
1319
+ ? self->language->public_symbol_map[sym]
1320
+ : 0;
1321
+ TSFieldId field_id = parent_field_id;
1322
+ if (!field_id) {
1323
+ const TSFieldMapEntry *field_map, *field_map_end;
1324
+ ts_language_field_map(self->language, node->production_id, &field_map, &field_map_end);
1325
+ for (; field_map != field_map_end; field_map++) {
1326
+ if (!field_map->inherited && field_map->child_index == child_index) {
1327
+ field_id = field_map->field_id;
1328
+ break;
1329
+ }
1330
+ }
1331
+ }
1332
+
1333
+ // Create a new state that has advanced past this hypothetical subtree.
1334
+ AnalysisState next_state = *state;
1335
+ AnalysisStateEntry *next_state_top = analysis_state__top(&next_state);
1336
+ next_state_top->child_index = successor.child_index;
1337
+ next_state_top->parse_state = successor.state;
1338
+ if (node->done) next_state_top->done = true;
1339
+
1340
+ // Determine if this hypothetical child node would match the current step
1341
+ // of the query pattern.
1342
+ bool does_match = false;
1343
+
1344
+ // ERROR nodes can appear anywhere, so if the step is
1345
+ // looking for an ERROR node, consider it potentially matchable.
1346
+ if (step->symbol == ts_builtin_sym_error) {
1347
+ does_match = true;
1348
+ } else if (visible_symbol) {
1349
+ does_match = true;
1350
+ if (step->symbol == WILDCARD_SYMBOL) {
1351
+ if (
1352
+ step->is_named &&
1353
+ !self->language->symbol_metadata[visible_symbol].named
1354
+ ) does_match = false;
1355
+ } else if (step->symbol != visible_symbol) {
1356
+ does_match = false;
1357
+ }
1358
+ if (step->field && step->field != field_id) {
1359
+ does_match = false;
1360
+ }
1361
+ if (
1362
+ step->supertype_symbol &&
1363
+ !analysis_state__has_supertype(state, step->supertype_symbol)
1364
+ ) does_match = false;
1365
+ }
1366
+
1367
+ // If this child is hidden, then descend into it and walk through its children.
1368
+ // If the top entry of the stack is at the end of its rule, then that entry can
1369
+ // be replaced. Otherwise, push a new entry onto the stack.
1370
+ else if (sym >= self->language->token_count) {
1371
+ if (!next_state_top->done) {
1372
+ if (next_state.depth + 1 >= MAX_ANALYSIS_STATE_DEPTH) {
1373
+ #ifdef DEBUG_ANALYZE_QUERY
1374
+ printf("Exceeded depth limit for state %u\n", j);
1375
+ #endif
1376
+
1377
+ analysis->did_abort = true;
1378
+ continue;
1379
+ }
1380
+
1381
+ next_state.depth++;
1382
+ next_state_top = analysis_state__top(&next_state);
1383
+ }
1384
+
1385
+ *next_state_top = (AnalysisStateEntry) {
1386
+ .parse_state = parse_state,
1387
+ .parent_symbol = sym,
1388
+ .child_index = 0,
1389
+ .field_id = field_id,
1390
+ .done = false,
1391
+ };
1392
+
1393
+ if (analysis_state__recursion_depth(&next_state) > recursion_depth_limit) {
1394
+ analysis_state_set__insert_sorted(
1395
+ &analysis->deeper_states,
1396
+ &analysis->state_pool,
1397
+ &next_state
1398
+ );
1399
+ continue;
1400
+ }
1401
+ }
1402
+
1403
+ // Pop from the stack when this state reached the end of its current syntax node.
1404
+ while (next_state.depth > 0 && next_state_top->done) {
1405
+ next_state.depth--;
1406
+ next_state_top = analysis_state__top(&next_state);
1407
+ }
1408
+
1409
+ // If this hypothetical child did match the current step of the query pattern,
1410
+ // then advance to the next step at the current depth. This involves skipping
1411
+ // over any descendant steps of the current child.
1412
+ const QueryStep *next_step = step;
1413
+ if (does_match) {
1414
+ for (;;) {
1415
+ next_state.step_index++;
1416
+ next_step = array_get(&self->steps, next_state.step_index);
1417
+ if (
1418
+ next_step->depth == PATTERN_DONE_MARKER ||
1419
+ next_step->depth <= step->depth
1420
+ ) break;
1421
+ }
1422
+ } else if (successor.state == parse_state) {
1423
+ continue;
1424
+ }
1425
+
1426
+ for (;;) {
1427
+ // Skip pass-through states. Although these states have alternatives, they are only
1428
+ // used to implement repetitions, and query analysis does not need to process
1429
+ // repetitions in order to determine whether steps are possible and definite.
1430
+ if (next_step->is_pass_through) {
1431
+ next_state.step_index++;
1432
+ next_step++;
1433
+ continue;
1434
+ }
1435
+
1436
+ // If the pattern is finished or hypothetical parent node is complete, then
1437
+ // record that matching can terminate at this step of the pattern. Otherwise,
1438
+ // add this state to the list of states to process on the next iteration.
1439
+ if (!next_step->is_dead_end) {
1440
+ bool did_finish_pattern = array_get(&self->steps, next_state.step_index)->depth != step->depth;
1441
+ if (did_finish_pattern) {
1442
+ array_insert_sorted_by(&analysis->finished_parent_symbols, , state->root_symbol);
1443
+ } else if (next_state.depth == 0) {
1444
+ array_insert_sorted_by(&analysis->final_step_indices, , next_state.step_index);
1445
+ } else {
1446
+ analysis_state_set__insert_sorted(&analysis->next_states, &analysis->state_pool, &next_state);
1447
+ }
1448
+ }
1449
+
1450
+ // If the state has advanced to a step with an alternative step, then add another state
1451
+ // at that alternative step. This process is simpler than the process of actually matching a
1452
+ // pattern during query execution, because for the purposes of query analysis, there is no
1453
+ // need to process repetitions.
1454
+ if (
1455
+ does_match &&
1456
+ next_step->alternative_index != NONE &&
1457
+ next_step->alternative_index > next_state.step_index
1458
+ ) {
1459
+ next_state.step_index = next_step->alternative_index;
1460
+ next_step = array_get(&self->steps, next_state.step_index);
1461
+ } else {
1462
+ break;
1463
+ }
1464
+ }
1465
+ }
1466
+ }
1467
+ }
1468
+
1469
+ AnalysisStateSet _states = analysis->states;
1470
+ analysis->states = analysis->next_states;
1471
+ analysis->next_states = _states;
1472
+ }
1473
+ }
1474
+
1475
+ static bool ts_query__analyze_patterns(TSQuery *self, unsigned *error_offset) {
1476
+ Array(uint16_t) non_rooted_pattern_start_steps = array_new();
1477
+ for (unsigned i = 0; i < self->pattern_map.size; i++) {
1478
+ PatternEntry *pattern = array_get(&self->pattern_map, i);
1479
+ if (!pattern->is_rooted) {
1480
+ QueryStep *step = array_get(&self->steps, pattern->step_index);
1481
+ if (step->symbol != WILDCARD_SYMBOL) {
1482
+ array_push(&non_rooted_pattern_start_steps, i);
1483
+ }
1484
+ }
1485
+ }
1486
+
1487
+ // Walk forward through all of the steps in the query, computing some
1488
+ // basic information about each step. Mark all of the steps that contain
1489
+ // captures, and record the indices of all of the steps that have child steps.
1490
+ Array(uint32_t) parent_step_indices = array_new();
1491
+ for (unsigned i = 0; i < self->steps.size; i++) {
1492
+ QueryStep *step = array_get(&self->steps, i);
1493
+ if (step->depth == PATTERN_DONE_MARKER) {
1494
+ step->parent_pattern_guaranteed = true;
1495
+ step->root_pattern_guaranteed = true;
1496
+ continue;
1497
+ }
1498
+
1499
+ bool has_children = false;
1500
+ bool is_wildcard = step->symbol == WILDCARD_SYMBOL;
1501
+ step->contains_captures = step->capture_ids[0] != NONE;
1502
+ for (unsigned j = i + 1; j < self->steps.size; j++) {
1503
+ QueryStep *next_step = array_get(&self->steps, j);
1504
+ if (
1505
+ next_step->depth == PATTERN_DONE_MARKER ||
1506
+ next_step->depth <= step->depth
1507
+ ) break;
1508
+ if (next_step->capture_ids[0] != NONE) {
1509
+ step->contains_captures = true;
1510
+ }
1511
+ if (!is_wildcard) {
1512
+ next_step->root_pattern_guaranteed = true;
1513
+ next_step->parent_pattern_guaranteed = true;
1514
+ }
1515
+ has_children = true;
1516
+ }
1517
+
1518
+ if (has_children && !is_wildcard) {
1519
+ array_push(&parent_step_indices, i);
1520
+ }
1521
+ }
1522
+
1523
+ // For every parent symbol in the query, initialize an 'analysis subgraph'.
1524
+ // This subgraph lists all of the states in the parse table that are directly
1525
+ // involved in building subtrees for this symbol.
1526
+ //
1527
+ // In addition to the parent symbols in the query, construct subgraphs for all
1528
+ // of the hidden symbols in the grammar, because these might occur within
1529
+ // one of the parent nodes, such that their children appear to belong to the
1530
+ // parent.
1531
+ AnalysisSubgraphArray subgraphs = array_new();
1532
+ for (unsigned i = 0; i < parent_step_indices.size; i++) {
1533
+ uint32_t parent_step_index = *array_get(&parent_step_indices, i);
1534
+ TSSymbol parent_symbol = array_get(&self->steps, parent_step_index)->symbol;
1535
+ AnalysisSubgraph subgraph = { .symbol = parent_symbol };
1536
+ array_insert_sorted_by(&subgraphs, .symbol, subgraph);
1537
+ }
1538
+ for (TSSymbol sym = (uint16_t)self->language->token_count; sym < (uint16_t)self->language->symbol_count; sym++) {
1539
+ if (!ts_language_symbol_metadata(self->language, sym).visible) {
1540
+ AnalysisSubgraph subgraph = { .symbol = sym };
1541
+ array_insert_sorted_by(&subgraphs, .symbol, subgraph);
1542
+ }
1543
+ }
1544
+
1545
+ // Scan the parse table to find the data needed to populate these subgraphs.
1546
+ // Collect three things during this scan:
1547
+ // 1) All of the parse states where one of these symbols can start.
1548
+ // 2) All of the parse states where one of these symbols can end, along
1549
+ // with information about the node that would be created.
1550
+ // 3) A list of predecessor states for each state.
1551
+ StatePredecessorMap predecessor_map = state_predecessor_map_new(self->language);
1552
+ for (TSStateId state = 1; state < (uint16_t)self->language->state_count; state++) {
1553
+ unsigned subgraph_index, exists;
1554
+ LookaheadIterator lookahead_iterator = ts_language_lookaheads(self->language, state);
1555
+ while (ts_lookahead_iterator__next(&lookahead_iterator)) {
1556
+ if (lookahead_iterator.action_count) {
1557
+ for (unsigned i = 0; i < lookahead_iterator.action_count; i++) {
1558
+ const TSParseAction *action = &lookahead_iterator.actions[i];
1559
+ if (action->type == TSParseActionTypeReduce) {
1560
+ const TSSymbol *aliases, *aliases_end;
1561
+ ts_language_aliases_for_symbol(
1562
+ self->language,
1563
+ action->reduce.symbol,
1564
+ &aliases,
1565
+ &aliases_end
1566
+ );
1567
+ for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
1568
+ array_search_sorted_by(
1569
+ &subgraphs,
1570
+ .symbol,
1571
+ *symbol,
1572
+ &subgraph_index,
1573
+ &exists
1574
+ );
1575
+ if (exists) {
1576
+ AnalysisSubgraph *subgraph = array_get(&subgraphs, subgraph_index);
1577
+ if (subgraph->nodes.size == 0 || array_back(&subgraph->nodes)->state != state) {
1578
+ array_push(&subgraph->nodes, ((AnalysisSubgraphNode) {
1579
+ .state = state,
1580
+ .production_id = action->reduce.production_id,
1581
+ .child_index = action->reduce.child_count,
1582
+ .done = true,
1583
+ }));
1584
+ }
1585
+ }
1586
+ }
1587
+ } else if (action->type == TSParseActionTypeShift && !action->shift.extra) {
1588
+ TSStateId next_state = action->shift.state;
1589
+ state_predecessor_map_add(&predecessor_map, next_state, state);
1590
+ }
1591
+ }
1592
+ } else if (lookahead_iterator.next_state != 0) {
1593
+ if (lookahead_iterator.next_state != state) {
1594
+ state_predecessor_map_add(&predecessor_map, lookahead_iterator.next_state, state);
1595
+ }
1596
+ if (ts_language_state_is_primary(self->language, state)) {
1597
+ const TSSymbol *aliases, *aliases_end;
1598
+ ts_language_aliases_for_symbol(
1599
+ self->language,
1600
+ lookahead_iterator.symbol,
1601
+ &aliases,
1602
+ &aliases_end
1603
+ );
1604
+ for (const TSSymbol *symbol = aliases; symbol < aliases_end; symbol++) {
1605
+ array_search_sorted_by(
1606
+ &subgraphs,
1607
+ .symbol,
1608
+ *symbol,
1609
+ &subgraph_index,
1610
+ &exists
1611
+ );
1612
+ if (exists) {
1613
+ AnalysisSubgraph *subgraph = array_get(&subgraphs, subgraph_index);
1614
+ if (
1615
+ subgraph->start_states.size == 0 ||
1616
+ *array_back(&subgraph->start_states) != state
1617
+ )
1618
+ array_push(&subgraph->start_states, state);
1619
+ }
1620
+ }
1621
+ }
1622
+ }
1623
+ }
1624
+ }
1625
+
1626
+ // For each subgraph, compute the preceding states by walking backward
1627
+ // from the end states using the predecessor map.
1628
+ Array(AnalysisSubgraphNode) next_nodes = array_new();
1629
+ for (unsigned i = 0; i < subgraphs.size; i++) {
1630
+ AnalysisSubgraph *subgraph = array_get(&subgraphs, i);
1631
+ if (subgraph->nodes.size == 0) {
1632
+ array_delete(&subgraph->start_states);
1633
+ array_erase(&subgraphs, i);
1634
+ i--;
1635
+ continue;
1636
+ }
1637
+ array_assign(&next_nodes, &subgraph->nodes);
1638
+ while (next_nodes.size > 0) {
1639
+ AnalysisSubgraphNode node = array_pop(&next_nodes);
1640
+ if (node.child_index > 1) {
1641
+ unsigned predecessor_count;
1642
+ const TSStateId *predecessors = state_predecessor_map_get(
1643
+ &predecessor_map,
1644
+ node.state,
1645
+ &predecessor_count
1646
+ );
1647
+ for (unsigned j = 0; j < predecessor_count; j++) {
1648
+ AnalysisSubgraphNode predecessor_node = {
1649
+ .state = predecessors[j],
1650
+ .child_index = node.child_index - 1,
1651
+ .production_id = node.production_id,
1652
+ .done = false,
1653
+ };
1654
+ unsigned index, exists;
1655
+ array_search_sorted_with(
1656
+ &subgraph->nodes, analysis_subgraph_node__compare, &predecessor_node,
1657
+ &index, &exists
1658
+ );
1659
+ if (!exists) {
1660
+ array_insert(&subgraph->nodes, index, predecessor_node);
1661
+ array_push(&next_nodes, predecessor_node);
1662
+ }
1663
+ }
1664
+ }
1665
+ }
1666
+ }
1667
+
1668
+ #ifdef DEBUG_ANALYZE_QUERY
1669
+ printf("\nSubgraphs:\n");
1670
+ for (unsigned i = 0; i < subgraphs.size; i++) {
1671
+ AnalysisSubgraph *subgraph = array_get(&subgraphs, i);
1672
+ printf(" %u, %s:\n", subgraph->symbol, ts_language_symbol_name(self->language, subgraph->symbol));
1673
+ for (unsigned j = 0; j < subgraph->start_states.size; j++) {
1674
+ printf(
1675
+ " {state: %u}\n",
1676
+ *array_get(&subgraph->start_states, j)
1677
+ );
1678
+ }
1679
+ for (unsigned j = 0; j < subgraph->nodes.size; j++) {
1680
+ AnalysisSubgraphNode *node = array_get(&subgraph->nodes, j);
1681
+ printf(
1682
+ " {state: %u, child_index: %u, production_id: %u, done: %d}\n",
1683
+ node->state, node->child_index, node->production_id, node->done
1684
+ );
1685
+ }
1686
+ printf("\n");
1687
+ }
1688
+ #endif
1689
+
1690
+ // For each non-terminal pattern, determine if the pattern can successfully match,
1691
+ // and identify all of the possible children within the pattern where matching could fail.
1692
+ bool all_patterns_are_valid = true;
1693
+ QueryAnalysis analysis = query_analysis__new();
1694
+ for (unsigned i = 0; i < parent_step_indices.size; i++) {
1695
+ uint16_t parent_step_index = *array_get(&parent_step_indices, i);
1696
+ uint16_t parent_depth = array_get(&self->steps, parent_step_index)->depth;
1697
+ TSSymbol parent_symbol = array_get(&self->steps, parent_step_index)->symbol;
1698
+ if (parent_symbol == ts_builtin_sym_error) continue;
1699
+
1700
+ // Find the subgraph that corresponds to this pattern's root symbol. If the pattern's
1701
+ // root symbol is a terminal, then return an error.
1702
+ unsigned subgraph_index, exists;
1703
+ array_search_sorted_by(&subgraphs, .symbol, parent_symbol, &subgraph_index, &exists);
1704
+ if (!exists) {
1705
+ unsigned first_child_step_index = parent_step_index + 1;
1706
+ uint32_t j, child_exists;
1707
+ array_search_sorted_by(&self->step_offsets, .step_index, first_child_step_index, &j, &child_exists);
1708
+ ts_assert(child_exists);
1709
+ *error_offset = array_get(&self->step_offsets, j)->byte_offset;
1710
+ all_patterns_are_valid = false;
1711
+ break;
1712
+ }
1713
+
1714
+ // Initialize an analysis state at every parse state in the table where
1715
+ // this parent symbol can occur.
1716
+ AnalysisSubgraph *subgraph = array_get(&subgraphs, subgraph_index);
1717
+ analysis_state_set__clear(&analysis.states, &analysis.state_pool);
1718
+ analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool);
1719
+ for (unsigned j = 0; j < subgraph->start_states.size; j++) {
1720
+ TSStateId parse_state = *array_get(&subgraph->start_states, j);
1721
+ analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) {
1722
+ .step_index = parent_step_index + 1,
1723
+ .stack = {
1724
+ [0] = {
1725
+ .parse_state = parse_state,
1726
+ .parent_symbol = parent_symbol,
1727
+ .child_index = 0,
1728
+ .field_id = 0,
1729
+ .done = false,
1730
+ },
1731
+ },
1732
+ .depth = 1,
1733
+ .root_symbol = parent_symbol,
1734
+ }));
1735
+ }
1736
+
1737
+ #ifdef DEBUG_ANALYZE_QUERY
1738
+ printf(
1739
+ "\nWalk states for %s:\n",
1740
+ ts_language_symbol_name(self->language, (*array_get(&analysis.states, 0))->stack[0].parent_symbol)
1741
+ );
1742
+ #endif
1743
+
1744
+ analysis.did_abort = false;
1745
+ ts_query__perform_analysis(self, &subgraphs, &analysis);
1746
+
1747
+ // If this pattern could not be fully analyzed, then every step should
1748
+ // be considered fallible.
1749
+ if (analysis.did_abort) {
1750
+ for (unsigned j = parent_step_index + 1; j < self->steps.size; j++) {
1751
+ QueryStep *step = array_get(&self->steps, j);
1752
+ if (
1753
+ step->depth <= parent_depth ||
1754
+ step->depth == PATTERN_DONE_MARKER
1755
+ ) break;
1756
+ if (!step->is_dead_end) {
1757
+ step->parent_pattern_guaranteed = false;
1758
+ step->root_pattern_guaranteed = false;
1759
+ }
1760
+ }
1761
+ continue;
1762
+ }
1763
+
1764
+ // If this pattern cannot match, store the pattern index so that it can be
1765
+ // returned to the caller.
1766
+ if (analysis.finished_parent_symbols.size == 0) {
1767
+ ts_assert(analysis.final_step_indices.size > 0);
1768
+ uint16_t impossible_step_index = *array_back(&analysis.final_step_indices);
1769
+ uint32_t j, impossible_exists;
1770
+ array_search_sorted_by(&self->step_offsets, .step_index, impossible_step_index, &j, &impossible_exists);
1771
+ if (j >= self->step_offsets.size) j = self->step_offsets.size - 1;
1772
+ *error_offset = array_get(&self->step_offsets, j)->byte_offset;
1773
+ all_patterns_are_valid = false;
1774
+ break;
1775
+ }
1776
+
1777
+ // Mark as fallible any step where a match terminated.
1778
+ // Later, this property will be propagated to all of the step's predecessors.
1779
+ for (unsigned j = 0; j < analysis.final_step_indices.size; j++) {
1780
+ uint32_t final_step_index = *array_get(&analysis.final_step_indices, j);
1781
+ QueryStep *step = array_get(&self->steps, final_step_index);
1782
+ if (
1783
+ step->depth != PATTERN_DONE_MARKER &&
1784
+ step->depth > parent_depth &&
1785
+ !step->is_dead_end
1786
+ ) {
1787
+ step->parent_pattern_guaranteed = false;
1788
+ step->root_pattern_guaranteed = false;
1789
+ }
1790
+ }
1791
+ }
1792
+
1793
+ // Mark as indefinite any step with captures that are used in predicates.
1794
+ Array(uint16_t) predicate_capture_ids = array_new();
1795
+ for (unsigned i = 0; i < self->patterns.size; i++) {
1796
+ QueryPattern *pattern = array_get(&self->patterns, i);
1797
+
1798
+ // Gather all of the captures that are used in predicates for this pattern.
1799
+ array_clear(&predicate_capture_ids);
1800
+ for (
1801
+ unsigned start = pattern->predicate_steps.offset,
1802
+ end = start + pattern->predicate_steps.length,
1803
+ j = start; j < end; j++
1804
+ ) {
1805
+ TSQueryPredicateStep *step = array_get(&self->predicate_steps, j);
1806
+ if (step->type == TSQueryPredicateStepTypeCapture) {
1807
+ uint16_t value_id = step->value_id;
1808
+ array_insert_sorted_by(&predicate_capture_ids, , value_id);
1809
+ }
1810
+ }
1811
+
1812
+ // Find all of the steps that have these captures.
1813
+ for (
1814
+ unsigned start = pattern->steps.offset,
1815
+ end = start + pattern->steps.length,
1816
+ j = start; j < end; j++
1817
+ ) {
1818
+ QueryStep *step = array_get(&self->steps, j);
1819
+ for (unsigned k = 0; k < MAX_STEP_CAPTURE_COUNT; k++) {
1820
+ uint16_t capture_id = step->capture_ids[k];
1821
+ if (capture_id == NONE) break;
1822
+ unsigned index, exists;
1823
+ array_search_sorted_by(&predicate_capture_ids, , capture_id, &index, &exists);
1824
+ if (exists) {
1825
+ step->root_pattern_guaranteed = false;
1826
+ break;
1827
+ }
1828
+ }
1829
+ }
1830
+ }
1831
+
1832
+ // Propagate fallibility. If a pattern is fallible at a given step, then it is
1833
+ // fallible at all of its preceding steps.
1834
+ bool done = self->steps.size == 0;
1835
+ while (!done) {
1836
+ done = true;
1837
+ for (unsigned i = self->steps.size - 1; i > 0; i--) {
1838
+ QueryStep *step = array_get(&self->steps, i);
1839
+ if (step->depth == PATTERN_DONE_MARKER) continue;
1840
+
1841
+ // Determine if this step is definite or has definite alternatives.
1842
+ bool parent_pattern_guaranteed = false;
1843
+ for (;;) {
1844
+ if (step->root_pattern_guaranteed) {
1845
+ parent_pattern_guaranteed = true;
1846
+ break;
1847
+ }
1848
+ if (step->alternative_index == NONE || step->alternative_index < i) {
1849
+ break;
1850
+ }
1851
+ step = array_get(&self->steps, step->alternative_index);
1852
+ }
1853
+
1854
+ // If not, mark its predecessor as indefinite.
1855
+ if (!parent_pattern_guaranteed) {
1856
+ QueryStep *prev_step = array_get(&self->steps, i - 1);
1857
+ if (
1858
+ !prev_step->is_dead_end &&
1859
+ prev_step->depth != PATTERN_DONE_MARKER &&
1860
+ prev_step->root_pattern_guaranteed
1861
+ ) {
1862
+ prev_step->root_pattern_guaranteed = false;
1863
+ done = false;
1864
+ }
1865
+ }
1866
+ }
1867
+ }
1868
+
1869
+ #ifdef DEBUG_ANALYZE_QUERY
1870
+ printf("Steps:\n");
1871
+ for (unsigned i = 0; i < self->steps.size; i++) {
1872
+ QueryStep *step = array_get(&self->steps, i);
1873
+ if (step->depth == PATTERN_DONE_MARKER) {
1874
+ printf(" %u: DONE\n", i);
1875
+ } else {
1876
+ printf(
1877
+ " %u: {symbol: %s, field: %s, depth: %u, parent_pattern_guaranteed: %d, root_pattern_guaranteed: %d}\n",
1878
+ i,
1879
+ (step->symbol == WILDCARD_SYMBOL)
1880
+ ? "ANY"
1881
+ : ts_language_symbol_name(self->language, step->symbol),
1882
+ (step->field ? ts_language_field_name_for_id(self->language, step->field) : "-"),
1883
+ step->depth,
1884
+ step->parent_pattern_guaranteed,
1885
+ step->root_pattern_guaranteed
1886
+ );
1887
+ }
1888
+ }
1889
+ #endif
1890
+
1891
+ // Determine which repetition symbols in this language have the possibility
1892
+ // of matching non-rooted patterns in this query. These repetition symbols
1893
+ // prevent certain optimizations with range restrictions.
1894
+ analysis.did_abort = false;
1895
+ for (uint32_t i = 0; i < non_rooted_pattern_start_steps.size; i++) {
1896
+ uint16_t pattern_entry_index = *array_get(&non_rooted_pattern_start_steps, i);
1897
+ PatternEntry *pattern_entry = array_get(&self->pattern_map, pattern_entry_index);
1898
+
1899
+ analysis_state_set__clear(&analysis.states, &analysis.state_pool);
1900
+ analysis_state_set__clear(&analysis.deeper_states, &analysis.state_pool);
1901
+ for (unsigned j = 0; j < subgraphs.size; j++) {
1902
+ AnalysisSubgraph *subgraph = array_get(&subgraphs, j);
1903
+ TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, subgraph->symbol);
1904
+ if (metadata.visible || metadata.named) continue;
1905
+
1906
+ for (uint32_t k = 0; k < subgraph->start_states.size; k++) {
1907
+ TSStateId parse_state = *array_get(&subgraph->start_states, k);
1908
+ analysis_state_set__push(&analysis.states, &analysis.state_pool, &((AnalysisState) {
1909
+ .step_index = pattern_entry->step_index,
1910
+ .stack = {
1911
+ [0] = {
1912
+ .parse_state = parse_state,
1913
+ .parent_symbol = subgraph->symbol,
1914
+ .child_index = 0,
1915
+ .field_id = 0,
1916
+ .done = false,
1917
+ },
1918
+ },
1919
+ .root_symbol = subgraph->symbol,
1920
+ .depth = 1,
1921
+ }));
1922
+ }
1923
+ }
1924
+
1925
+ #ifdef DEBUG_ANALYZE_QUERY
1926
+ printf("\nWalk states for rootless pattern step %u:\n", pattern_entry->step_index);
1927
+ #endif
1928
+
1929
+ ts_query__perform_analysis(
1930
+ self,
1931
+ &subgraphs,
1932
+ &analysis
1933
+ );
1934
+
1935
+ if (analysis.finished_parent_symbols.size > 0) {
1936
+ array_get(&self->patterns, pattern_entry->pattern_index)->is_non_local = true;
1937
+ }
1938
+
1939
+ for (unsigned k = 0; k < analysis.finished_parent_symbols.size; k++) {
1940
+ TSSymbol symbol = *array_get(&analysis.finished_parent_symbols, k);
1941
+ array_insert_sorted_by(&self->repeat_symbols_with_rootless_patterns, , symbol);
1942
+ }
1943
+ }
1944
+
1945
+ #ifdef DEBUG_ANALYZE_QUERY
1946
+ if (self->repeat_symbols_with_rootless_patterns.size > 0) {
1947
+ printf("\nRepetition symbols with rootless patterns:\n");
1948
+ printf("aborted analysis: %d\n", analysis.did_abort);
1949
+ for (unsigned i = 0; i < self->repeat_symbols_with_rootless_patterns.size; i++) {
1950
+ TSSymbol symbol = *array_get(&self->repeat_symbols_with_rootless_patterns, i);
1951
+ printf(" %u, %s\n", symbol, ts_language_symbol_name(self->language, symbol));
1952
+ }
1953
+ printf("\n");
1954
+ }
1955
+ #endif
1956
+
1957
+ // Cleanup
1958
+ for (unsigned i = 0; i < subgraphs.size; i++) {
1959
+ array_delete(&array_get(&subgraphs, i)->start_states);
1960
+ array_delete(&array_get(&subgraphs, i)->nodes);
1961
+ }
1962
+ array_delete(&subgraphs);
1963
+ query_analysis__delete(&analysis);
1964
+ array_delete(&next_nodes);
1965
+ array_delete(&non_rooted_pattern_start_steps);
1966
+ array_delete(&parent_step_indices);
1967
+ array_delete(&predicate_capture_ids);
1968
+ state_predecessor_map_delete(&predecessor_map);
1969
+
1970
+ return all_patterns_are_valid;
1971
+ }
1972
+
1973
+ static void ts_query__add_negated_fields(
1974
+ TSQuery *self,
1975
+ uint16_t step_index,
1976
+ TSFieldId *field_ids,
1977
+ uint16_t field_count
1978
+ ) {
1979
+ QueryStep *step = array_get(&self->steps, step_index);
1980
+
1981
+ // The negated field array stores a list of field lists, separated by zeros.
1982
+ // Try to find the start index of an existing list that matches this new list.
1983
+ bool failed_match = false;
1984
+ unsigned match_count = 0;
1985
+ unsigned start_i = 0;
1986
+ for (unsigned i = 0; i < self->negated_fields.size; i++) {
1987
+ TSFieldId existing_field_id = *array_get(&self->negated_fields, i);
1988
+
1989
+ // At each zero value, terminate the match attempt. If we've exactly
1990
+ // matched the new field list, then reuse this index. Otherwise,
1991
+ // start over the matching process.
1992
+ if (existing_field_id == 0) {
1993
+ if (match_count == field_count) {
1994
+ step->negated_field_list_id = start_i;
1995
+ return;
1996
+ } else {
1997
+ start_i = i + 1;
1998
+ match_count = 0;
1999
+ failed_match = false;
2000
+ }
2001
+ }
2002
+
2003
+ // If the existing list matches our new list so far, then advance
2004
+ // to the next element of the new list.
2005
+ else if (
2006
+ match_count < field_count &&
2007
+ existing_field_id == field_ids[match_count] &&
2008
+ !failed_match
2009
+ ) {
2010
+ match_count++;
2011
+ }
2012
+
2013
+ // Otherwise, this existing list has failed to match.
2014
+ else {
2015
+ match_count = 0;
2016
+ failed_match = true;
2017
+ }
2018
+ }
2019
+
2020
+ step->negated_field_list_id = self->negated_fields.size;
2021
+ array_extend(&self->negated_fields, field_count, field_ids);
2022
+ array_push(&self->negated_fields, 0);
2023
+ }
2024
+
2025
+ static TSQueryError ts_query__parse_string_literal(
2026
+ TSQuery *self,
2027
+ Stream *stream
2028
+ ) {
2029
+ const char *string_start = stream->input;
2030
+ if (stream->next != '"') return TSQueryErrorSyntax;
2031
+ stream_advance(stream);
2032
+ const char *prev_position = stream->input;
2033
+
2034
+ bool is_escaped = false;
2035
+ array_clear(&self->string_buffer);
2036
+ for (;;) {
2037
+ if (is_escaped) {
2038
+ is_escaped = false;
2039
+ switch (stream->next) {
2040
+ case 'n':
2041
+ array_push(&self->string_buffer, '\n');
2042
+ break;
2043
+ case 'r':
2044
+ array_push(&self->string_buffer, '\r');
2045
+ break;
2046
+ case 't':
2047
+ array_push(&self->string_buffer, '\t');
2048
+ break;
2049
+ case '0':
2050
+ array_push(&self->string_buffer, '\0');
2051
+ break;
2052
+ default:
2053
+ array_extend(&self->string_buffer, stream->next_size, stream->input);
2054
+ break;
2055
+ }
2056
+ prev_position = stream->input + stream->next_size;
2057
+ } else {
2058
+ if (stream->next == '\\') {
2059
+ array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position);
2060
+ prev_position = stream->input + 1;
2061
+ is_escaped = true;
2062
+ } else if (stream->next == '"') {
2063
+ array_extend(&self->string_buffer, (uint32_t)(stream->input - prev_position), prev_position);
2064
+ stream_advance(stream);
2065
+ return TSQueryErrorNone;
2066
+ } else if (stream->next == '\n') {
2067
+ stream_reset(stream, string_start);
2068
+ return TSQueryErrorSyntax;
2069
+ }
2070
+ }
2071
+ if (!stream_advance(stream)) {
2072
+ stream_reset(stream, string_start);
2073
+ return TSQueryErrorSyntax;
2074
+ }
2075
+ }
2076
+ }
2077
+
2078
+ // Parse a single predicate associated with a pattern, adding it to the
2079
+ // query's internal `predicate_steps` array. Predicates are arbitrary
2080
+ // S-expressions associated with a pattern which are meant to be handled at
2081
+ // a higher level of abstraction, such as the Rust/JavaScript bindings. They
2082
+ // can contain '@'-prefixed capture names, double-quoted strings, and bare
2083
+ // symbols, which also represent strings.
2084
+ static TSQueryError ts_query__parse_predicate(
2085
+ TSQuery *self,
2086
+ Stream *stream
2087
+ ) {
2088
+ if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2089
+ const char *predicate_name = stream->input;
2090
+ stream_scan_identifier(stream);
2091
+ uint32_t length = (uint32_t)(stream->input - predicate_name);
2092
+ uint16_t id = symbol_table_insert_name(
2093
+ &self->predicate_values,
2094
+ predicate_name,
2095
+ length
2096
+ );
2097
+ array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2098
+ .type = TSQueryPredicateStepTypeString,
2099
+ .value_id = id,
2100
+ }));
2101
+ stream_skip_whitespace(stream);
2102
+
2103
+ for (;;) {
2104
+ if (stream->next == ')') {
2105
+ stream_advance(stream);
2106
+ stream_skip_whitespace(stream);
2107
+ array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2108
+ .type = TSQueryPredicateStepTypeDone,
2109
+ .value_id = 0,
2110
+ }));
2111
+ break;
2112
+ }
2113
+
2114
+ // Parse an '@'-prefixed capture name
2115
+ else if (stream->next == '@') {
2116
+ stream_advance(stream);
2117
+
2118
+ // Parse the capture name
2119
+ if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2120
+ const char *capture_name = stream->input;
2121
+ stream_scan_identifier(stream);
2122
+ uint32_t capture_length = (uint32_t)(stream->input - capture_name);
2123
+
2124
+ // Add the capture id to the first step of the pattern
2125
+ int capture_id = symbol_table_id_for_name(
2126
+ &self->captures,
2127
+ capture_name,
2128
+ capture_length
2129
+ );
2130
+ if (capture_id == -1) {
2131
+ stream_reset(stream, capture_name);
2132
+ return TSQueryErrorCapture;
2133
+ }
2134
+
2135
+ array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2136
+ .type = TSQueryPredicateStepTypeCapture,
2137
+ .value_id = capture_id,
2138
+ }));
2139
+ }
2140
+
2141
+ // Parse a string literal
2142
+ else if (stream->next == '"') {
2143
+ TSQueryError e = ts_query__parse_string_literal(self, stream);
2144
+ if (e) return e;
2145
+ uint16_t query_id = symbol_table_insert_name(
2146
+ &self->predicate_values,
2147
+ self->string_buffer.contents,
2148
+ self->string_buffer.size
2149
+ );
2150
+ array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2151
+ .type = TSQueryPredicateStepTypeString,
2152
+ .value_id = query_id,
2153
+ }));
2154
+ }
2155
+
2156
+ // Parse a bare symbol
2157
+ else if (stream_is_ident_start(stream)) {
2158
+ const char *symbol_start = stream->input;
2159
+ stream_scan_identifier(stream);
2160
+ uint32_t symbol_length = (uint32_t)(stream->input - symbol_start);
2161
+ uint16_t query_id = symbol_table_insert_name(
2162
+ &self->predicate_values,
2163
+ symbol_start,
2164
+ symbol_length
2165
+ );
2166
+ array_push(&self->predicate_steps, ((TSQueryPredicateStep) {
2167
+ .type = TSQueryPredicateStepTypeString,
2168
+ .value_id = query_id,
2169
+ }));
2170
+ }
2171
+
2172
+ else {
2173
+ return TSQueryErrorSyntax;
2174
+ }
2175
+
2176
+ stream_skip_whitespace(stream);
2177
+ }
2178
+
2179
+ return 0;
2180
+ }
2181
+
2182
+ // Read one S-expression pattern from the stream, and incorporate it into
2183
+ // the query's internal state machine representation. For nested patterns,
2184
+ // this function calls itself recursively.
2185
+ //
2186
+ // The caller is responsible for passing in a dedicated CaptureQuantifiers.
2187
+ // These should not be shared between different calls to ts_query__parse_pattern!
2188
+ static TSQueryError ts_query__parse_pattern(
2189
+ TSQuery *self,
2190
+ Stream *stream,
2191
+ uint32_t depth,
2192
+ bool is_immediate,
2193
+ CaptureQuantifiers *capture_quantifiers
2194
+ ) {
2195
+ if (stream->next == 0) return TSQueryErrorSyntax;
2196
+ if (stream->next == ')' || stream->next == ']') return PARENT_DONE;
2197
+
2198
+ const uint32_t starting_step_index = self->steps.size;
2199
+
2200
+ // Store the byte offset of each step in the query.
2201
+ if (
2202
+ self->step_offsets.size == 0 ||
2203
+ array_back(&self->step_offsets)->step_index != starting_step_index
2204
+ ) {
2205
+ array_push(&self->step_offsets, ((StepOffset) {
2206
+ .step_index = starting_step_index,
2207
+ .byte_offset = stream_offset(stream),
2208
+ }));
2209
+ }
2210
+
2211
+ // An open bracket is the start of an alternation.
2212
+ if (stream->next == '[') {
2213
+ stream_advance(stream);
2214
+ stream_skip_whitespace(stream);
2215
+
2216
+ // Parse each branch, and add a placeholder step in between the branches.
2217
+ Array(uint32_t) branch_step_indices = array_new();
2218
+ CaptureQuantifiers branch_capture_quantifiers = capture_quantifiers_new();
2219
+ for (;;) {
2220
+ uint32_t start_index = self->steps.size;
2221
+ TSQueryError e = ts_query__parse_pattern(
2222
+ self,
2223
+ stream,
2224
+ depth,
2225
+ is_immediate,
2226
+ &branch_capture_quantifiers
2227
+ );
2228
+
2229
+ if (e == PARENT_DONE) {
2230
+ if (stream->next == ']' && branch_step_indices.size > 0) {
2231
+ stream_advance(stream);
2232
+ break;
2233
+ }
2234
+ e = TSQueryErrorSyntax;
2235
+ }
2236
+ if (e) {
2237
+ capture_quantifiers_delete(&branch_capture_quantifiers);
2238
+ array_delete(&branch_step_indices);
2239
+ return e;
2240
+ }
2241
+
2242
+ if (start_index == starting_step_index) {
2243
+ capture_quantifiers_replace(capture_quantifiers, &branch_capture_quantifiers);
2244
+ } else {
2245
+ capture_quantifiers_join_all(capture_quantifiers, &branch_capture_quantifiers);
2246
+ }
2247
+
2248
+ array_push(&branch_step_indices, start_index);
2249
+ array_push(&self->steps, query_step__new(0, depth, false));
2250
+ capture_quantifiers_clear(&branch_capture_quantifiers);
2251
+ }
2252
+ (void)array_pop(&self->steps);
2253
+
2254
+ // For all of the branches except for the last one, add the subsequent branch as an
2255
+ // alternative, and link the end of the branch to the current end of the steps.
2256
+ for (unsigned i = 0; i < branch_step_indices.size - 1; i++) {
2257
+ uint32_t step_index = *array_get(&branch_step_indices, i);
2258
+ uint32_t next_step_index = *array_get(&branch_step_indices, i + 1);
2259
+ QueryStep *start_step = array_get(&self->steps, step_index);
2260
+ QueryStep *end_step = array_get(&self->steps, next_step_index - 1);
2261
+ start_step->alternative_index = next_step_index;
2262
+ end_step->alternative_index = self->steps.size;
2263
+ end_step->is_dead_end = true;
2264
+ }
2265
+
2266
+ capture_quantifiers_delete(&branch_capture_quantifiers);
2267
+ array_delete(&branch_step_indices);
2268
+ }
2269
+
2270
+ // An open parenthesis can be the start of three possible constructs:
2271
+ // * A grouped sequence
2272
+ // * A predicate
2273
+ // * A named node
2274
+ else if (stream->next == '(') {
2275
+ stream_advance(stream);
2276
+ stream_skip_whitespace(stream);
2277
+
2278
+ // If this parenthesis is followed by a node, then it represents a grouped sequence.
2279
+ if (stream->next == '(' || stream->next == '"' || stream->next == '[') {
2280
+ bool child_is_immediate = is_immediate;
2281
+ CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
2282
+ for (;;) {
2283
+ if (stream->next == '.') {
2284
+ child_is_immediate = true;
2285
+ stream_advance(stream);
2286
+ stream_skip_whitespace(stream);
2287
+ }
2288
+ TSQueryError e = ts_query__parse_pattern(
2289
+ self,
2290
+ stream,
2291
+ depth,
2292
+ child_is_immediate,
2293
+ &child_capture_quantifiers
2294
+ );
2295
+ if (e == PARENT_DONE) {
2296
+ if (stream->next == ')') {
2297
+ stream_advance(stream);
2298
+ break;
2299
+ }
2300
+ e = TSQueryErrorSyntax;
2301
+ }
2302
+ if (e) {
2303
+ capture_quantifiers_delete(&child_capture_quantifiers);
2304
+ return e;
2305
+ }
2306
+
2307
+ capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
2308
+ capture_quantifiers_clear(&child_capture_quantifiers);
2309
+ child_is_immediate = false;
2310
+ }
2311
+
2312
+ capture_quantifiers_delete(&child_capture_quantifiers);
2313
+ }
2314
+
2315
+ // A dot/pound character indicates the start of a predicate.
2316
+ else if (stream->next == '.' || stream->next == '#') {
2317
+ stream_advance(stream);
2318
+ return ts_query__parse_predicate(self, stream);
2319
+ }
2320
+
2321
+ // Otherwise, this parenthesis is the start of a named node.
2322
+ else {
2323
+ TSSymbol symbol;
2324
+ bool is_missing = false;
2325
+ const char *node_name = stream->input;
2326
+
2327
+ // Parse a normal node name
2328
+ if (stream_is_ident_start(stream)) {
2329
+ stream_scan_identifier(stream);
2330
+ uint32_t length = (uint32_t)(stream->input - node_name);
2331
+
2332
+ // Parse the wildcard symbol
2333
+ if (length == 1 && node_name[0] == '_') {
2334
+ symbol = WILDCARD_SYMBOL;
2335
+ } else if (!strncmp(node_name, "MISSING", length)) {
2336
+ is_missing = true;
2337
+ stream_skip_whitespace(stream);
2338
+
2339
+ if (stream_is_ident_start(stream)) {
2340
+ const char *missing_node_name = stream->input;
2341
+ stream_scan_identifier(stream);
2342
+ uint32_t missing_node_length = (uint32_t)(stream->input - missing_node_name);
2343
+ symbol = ts_language_symbol_for_name(
2344
+ self->language,
2345
+ missing_node_name,
2346
+ missing_node_length,
2347
+ true
2348
+ );
2349
+ if (!symbol) {
2350
+ stream_reset(stream, missing_node_name);
2351
+ return TSQueryErrorNodeType;
2352
+ }
2353
+ }
2354
+
2355
+ else if (stream->next == '"') {
2356
+ const char *string_start = stream->input;
2357
+ TSQueryError e = ts_query__parse_string_literal(self, stream);
2358
+ if (e) return e;
2359
+
2360
+ symbol = ts_language_symbol_for_name(
2361
+ self->language,
2362
+ self->string_buffer.contents,
2363
+ self->string_buffer.size,
2364
+ false
2365
+ );
2366
+ if (!symbol) {
2367
+ stream_reset(stream, string_start + 1);
2368
+ return TSQueryErrorNodeType;
2369
+ }
2370
+ }
2371
+
2372
+ else if (stream->next == ')') {
2373
+ symbol = WILDCARD_SYMBOL;
2374
+ }
2375
+
2376
+ else {
2377
+ stream_reset(stream, stream->input);
2378
+ return TSQueryErrorSyntax;
2379
+ }
2380
+ }
2381
+
2382
+ else {
2383
+ symbol = ts_language_symbol_for_name(
2384
+ self->language,
2385
+ node_name,
2386
+ length,
2387
+ true
2388
+ );
2389
+ if (!symbol) {
2390
+ stream_reset(stream, node_name);
2391
+ return TSQueryErrorNodeType;
2392
+ }
2393
+ }
2394
+ } else {
2395
+ return TSQueryErrorSyntax;
2396
+ }
2397
+
2398
+ // Add a step for the node.
2399
+ array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
2400
+ QueryStep *step = array_back(&self->steps);
2401
+ if (ts_language_symbol_metadata(self->language, symbol).supertype) {
2402
+ step->supertype_symbol = step->symbol;
2403
+ step->symbol = WILDCARD_SYMBOL;
2404
+ }
2405
+ if (is_missing) {
2406
+ step->is_missing = true;
2407
+ }
2408
+ if (symbol == WILDCARD_SYMBOL) {
2409
+ step->is_named = true;
2410
+ }
2411
+
2412
+ stream_skip_whitespace(stream);
2413
+
2414
+ if (stream->next == '/') {
2415
+ if (!step->supertype_symbol) {
2416
+ stream_reset(stream, node_name - 1); // reset to the start of the node
2417
+ return TSQueryErrorStructure;
2418
+ }
2419
+
2420
+ stream_advance(stream);
2421
+ if (!stream_is_ident_start(stream)) {
2422
+ return TSQueryErrorSyntax;
2423
+ }
2424
+
2425
+ const char *subtype_node_name = stream->input;
2426
+ stream_scan_identifier(stream);
2427
+ uint32_t length = (uint32_t)(stream->input - subtype_node_name);
2428
+
2429
+ step->symbol = ts_language_symbol_for_name(
2430
+ self->language,
2431
+ subtype_node_name,
2432
+ length,
2433
+ true
2434
+ );
2435
+ if (!step->symbol) {
2436
+ stream_reset(stream, subtype_node_name);
2437
+ return TSQueryErrorNodeType;
2438
+ }
2439
+
2440
+ // Get all the possible subtypes for the given supertype,
2441
+ // and check if the given subtype is valid.
2442
+ if (self->language->abi_version >= LANGUAGE_VERSION_WITH_RESERVED_WORDS) {
2443
+ uint32_t subtype_length;
2444
+ const TSSymbol *subtypes = ts_language_subtypes(
2445
+ self->language,
2446
+ step->supertype_symbol,
2447
+ &subtype_length
2448
+ );
2449
+
2450
+ bool subtype_is_valid = false;
2451
+ for (uint32_t i = 0; i < subtype_length; i++) {
2452
+ if (subtypes[i] == step->symbol) {
2453
+ subtype_is_valid = true;
2454
+ break;
2455
+ }
2456
+ }
2457
+
2458
+ // This subtype is not valid for the given supertype.
2459
+ if (!subtype_is_valid) {
2460
+ stream_reset(stream, node_name - 1); // reset to the start of the node
2461
+ return TSQueryErrorStructure;
2462
+ }
2463
+ }
2464
+
2465
+ stream_skip_whitespace(stream);
2466
+ }
2467
+
2468
+ // Parse the child patterns
2469
+ bool child_is_immediate = false;
2470
+ uint16_t last_child_step_index = 0;
2471
+ uint16_t negated_field_count = 0;
2472
+ TSFieldId negated_field_ids[MAX_NEGATED_FIELD_COUNT];
2473
+ CaptureQuantifiers child_capture_quantifiers = capture_quantifiers_new();
2474
+ for (;;) {
2475
+ // Parse a negated field assertion
2476
+ if (stream->next == '!') {
2477
+ stream_advance(stream);
2478
+ stream_skip_whitespace(stream);
2479
+ if (!stream_is_ident_start(stream)) {
2480
+ capture_quantifiers_delete(&child_capture_quantifiers);
2481
+ return TSQueryErrorSyntax;
2482
+ }
2483
+ const char *field_name = stream->input;
2484
+ stream_scan_identifier(stream);
2485
+ uint32_t length = (uint32_t)(stream->input - field_name);
2486
+ stream_skip_whitespace(stream);
2487
+
2488
+ TSFieldId field_id = ts_language_field_id_for_name(
2489
+ self->language,
2490
+ field_name,
2491
+ length
2492
+ );
2493
+ if (!field_id) {
2494
+ stream->input = field_name;
2495
+ capture_quantifiers_delete(&child_capture_quantifiers);
2496
+ return TSQueryErrorField;
2497
+ }
2498
+
2499
+ // Keep the field ids sorted.
2500
+ if (negated_field_count < MAX_NEGATED_FIELD_COUNT) {
2501
+ negated_field_ids[negated_field_count] = field_id;
2502
+ negated_field_count++;
2503
+ }
2504
+
2505
+ continue;
2506
+ }
2507
+
2508
+ // Parse a sibling anchor
2509
+ if (stream->next == '.') {
2510
+ child_is_immediate = true;
2511
+ stream_advance(stream);
2512
+ stream_skip_whitespace(stream);
2513
+ }
2514
+
2515
+ uint16_t step_index = self->steps.size;
2516
+ TSQueryError e = ts_query__parse_pattern(
2517
+ self,
2518
+ stream,
2519
+ depth + 1,
2520
+ child_is_immediate,
2521
+ &child_capture_quantifiers
2522
+ );
2523
+ // In the event we only parsed a predicate, meaning no new steps were added,
2524
+ // then subtract one so we're not indexing past the end of the array
2525
+ if (step_index == self->steps.size) step_index--;
2526
+ if (e == PARENT_DONE) {
2527
+ if (stream->next == ')') {
2528
+ if (child_is_immediate) {
2529
+ if (last_child_step_index == 0) {
2530
+ capture_quantifiers_delete(&child_capture_quantifiers);
2531
+ return TSQueryErrorSyntax;
2532
+ }
2533
+ // Mark this step *and* its alternatives as the last child of the parent.
2534
+ QueryStep *last_child_step = array_get(&self->steps, last_child_step_index);
2535
+ last_child_step->is_last_child = true;
2536
+ if (
2537
+ last_child_step->alternative_index != NONE &&
2538
+ last_child_step->alternative_index < self->steps.size
2539
+ ) {
2540
+ QueryStep *alternative_step = array_get(&self->steps, last_child_step->alternative_index);
2541
+ alternative_step->is_last_child = true;
2542
+ while (
2543
+ alternative_step->alternative_index != NONE &&
2544
+ alternative_step->alternative_index < self->steps.size
2545
+ ) {
2546
+ alternative_step = array_get(&self->steps, alternative_step->alternative_index);
2547
+ alternative_step->is_last_child = true;
2548
+ }
2549
+ }
2550
+ }
2551
+
2552
+ if (negated_field_count) {
2553
+ ts_query__add_negated_fields(
2554
+ self,
2555
+ starting_step_index,
2556
+ negated_field_ids,
2557
+ negated_field_count
2558
+ );
2559
+ }
2560
+
2561
+ stream_advance(stream);
2562
+ break;
2563
+ }
2564
+ e = TSQueryErrorSyntax;
2565
+ }
2566
+ if (e) {
2567
+ capture_quantifiers_delete(&child_capture_quantifiers);
2568
+ return e;
2569
+ }
2570
+
2571
+ capture_quantifiers_add_all(capture_quantifiers, &child_capture_quantifiers);
2572
+
2573
+ last_child_step_index = step_index;
2574
+ child_is_immediate = false;
2575
+ capture_quantifiers_clear(&child_capture_quantifiers);
2576
+ }
2577
+ capture_quantifiers_delete(&child_capture_quantifiers);
2578
+ }
2579
+ }
2580
+
2581
+ // Parse a wildcard pattern
2582
+ else if (stream->next == '_') {
2583
+ stream_advance(stream);
2584
+ stream_skip_whitespace(stream);
2585
+
2586
+ // Add a step that matches any kind of node
2587
+ array_push(&self->steps, query_step__new(WILDCARD_SYMBOL, depth, is_immediate));
2588
+ }
2589
+
2590
+ // Parse a double-quoted anonymous leaf node expression
2591
+ else if (stream->next == '"') {
2592
+ const char *string_start = stream->input;
2593
+ TSQueryError e = ts_query__parse_string_literal(self, stream);
2594
+ if (e) return e;
2595
+
2596
+ // Add a step for the node
2597
+ TSSymbol symbol = ts_language_symbol_for_name(
2598
+ self->language,
2599
+ self->string_buffer.contents,
2600
+ self->string_buffer.size,
2601
+ false
2602
+ );
2603
+ if (!symbol) {
2604
+ stream_reset(stream, string_start + 1);
2605
+ return TSQueryErrorNodeType;
2606
+ }
2607
+ array_push(&self->steps, query_step__new(symbol, depth, is_immediate));
2608
+ }
2609
+
2610
+ // Parse a field-prefixed pattern
2611
+ else if (stream_is_ident_start(stream)) {
2612
+ // Parse the field name
2613
+ const char *field_name = stream->input;
2614
+ stream_scan_identifier(stream);
2615
+ uint32_t length = (uint32_t)(stream->input - field_name);
2616
+ stream_skip_whitespace(stream);
2617
+
2618
+ if (stream->next != ':') {
2619
+ stream_reset(stream, field_name);
2620
+ return TSQueryErrorSyntax;
2621
+ }
2622
+ stream_advance(stream);
2623
+ stream_skip_whitespace(stream);
2624
+
2625
+ // Parse the pattern
2626
+ CaptureQuantifiers field_capture_quantifiers = capture_quantifiers_new();
2627
+ TSQueryError e = ts_query__parse_pattern(
2628
+ self,
2629
+ stream,
2630
+ depth,
2631
+ is_immediate,
2632
+ &field_capture_quantifiers
2633
+ );
2634
+ if (e) {
2635
+ capture_quantifiers_delete(&field_capture_quantifiers);
2636
+ if (e == PARENT_DONE) e = TSQueryErrorSyntax;
2637
+ return e;
2638
+ }
2639
+
2640
+ // Add the field name to the first step of the pattern
2641
+ TSFieldId field_id = ts_language_field_id_for_name(
2642
+ self->language,
2643
+ field_name,
2644
+ length
2645
+ );
2646
+ if (!field_id) {
2647
+ stream->input = field_name;
2648
+ return TSQueryErrorField;
2649
+ }
2650
+
2651
+ uint32_t step_index = starting_step_index;
2652
+ QueryStep *step = array_get(&self->steps, step_index);
2653
+ for (;;) {
2654
+ step->field = field_id;
2655
+ if (
2656
+ step->alternative_index != NONE &&
2657
+ step->alternative_index > step_index &&
2658
+ step->alternative_index < self->steps.size
2659
+ ) {
2660
+ step_index = step->alternative_index;
2661
+ step = array_get(&self->steps, step_index);
2662
+ } else {
2663
+ break;
2664
+ }
2665
+ }
2666
+
2667
+ capture_quantifiers_add_all(capture_quantifiers, &field_capture_quantifiers);
2668
+ capture_quantifiers_delete(&field_capture_quantifiers);
2669
+ }
2670
+
2671
+ else {
2672
+ return TSQueryErrorSyntax;
2673
+ }
2674
+
2675
+ stream_skip_whitespace(stream);
2676
+
2677
+ // Parse suffixes modifiers for this pattern
2678
+ TSQuantifier quantifier = TSQuantifierOne;
2679
+ for (;;) {
2680
+ // Parse the one-or-more operator.
2681
+ if (stream->next == '+') {
2682
+ quantifier = quantifier_join(TSQuantifierOneOrMore, quantifier);
2683
+
2684
+ stream_advance(stream);
2685
+ stream_skip_whitespace(stream);
2686
+
2687
+ QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
2688
+ repeat_step.alternative_index = starting_step_index;
2689
+ repeat_step.is_pass_through = true;
2690
+ repeat_step.alternative_is_immediate = true;
2691
+ array_push(&self->steps, repeat_step);
2692
+ }
2693
+
2694
+ // Parse the zero-or-more repetition operator.
2695
+ else if (stream->next == '*') {
2696
+ quantifier = quantifier_join(TSQuantifierZeroOrMore, quantifier);
2697
+
2698
+ stream_advance(stream);
2699
+ stream_skip_whitespace(stream);
2700
+
2701
+ QueryStep repeat_step = query_step__new(WILDCARD_SYMBOL, depth, false);
2702
+ repeat_step.alternative_index = starting_step_index;
2703
+ repeat_step.is_pass_through = true;
2704
+ repeat_step.alternative_is_immediate = true;
2705
+ array_push(&self->steps, repeat_step);
2706
+
2707
+ // Stop when `step->alternative_index` is `NONE` or it points to
2708
+ // `repeat_step` or beyond. Note that having just been pushed,
2709
+ // `repeat_step` occupies slot `self->steps.size - 1`.
2710
+ QueryStep *step = array_get(&self->steps, starting_step_index);
2711
+ while (step->alternative_index != NONE && step->alternative_index < self->steps.size - 1) {
2712
+ step = array_get(&self->steps, step->alternative_index);
2713
+ }
2714
+ step->alternative_index = self->steps.size;
2715
+ }
2716
+
2717
+ // Parse the optional operator.
2718
+ else if (stream->next == '?') {
2719
+ quantifier = quantifier_join(TSQuantifierZeroOrOne, quantifier);
2720
+
2721
+ stream_advance(stream);
2722
+ stream_skip_whitespace(stream);
2723
+
2724
+ QueryStep *step = array_get(&self->steps, starting_step_index);
2725
+ while (step->alternative_index != NONE && step->alternative_index < self->steps.size) {
2726
+ step = array_get(&self->steps, step->alternative_index);
2727
+ }
2728
+ step->alternative_index = self->steps.size;
2729
+ }
2730
+
2731
+ // Parse an '@'-prefixed capture pattern
2732
+ else if (stream->next == '@') {
2733
+ stream_advance(stream);
2734
+ if (!stream_is_ident_start(stream)) return TSQueryErrorSyntax;
2735
+ const char *capture_name = stream->input;
2736
+ stream_scan_identifier(stream);
2737
+ uint32_t length = (uint32_t)(stream->input - capture_name);
2738
+ stream_skip_whitespace(stream);
2739
+
2740
+ // Add the capture id to the first step of the pattern
2741
+ uint16_t capture_id = symbol_table_insert_name(
2742
+ &self->captures,
2743
+ capture_name,
2744
+ length
2745
+ );
2746
+
2747
+ // Add the capture quantifier
2748
+ capture_quantifiers_add_for_id(capture_quantifiers, capture_id, TSQuantifierOne);
2749
+
2750
+ uint32_t step_index = starting_step_index;
2751
+ for (;;) {
2752
+ QueryStep *step = array_get(&self->steps, step_index);
2753
+ query_step__add_capture(step, capture_id);
2754
+ if (
2755
+ step->alternative_index != NONE &&
2756
+ step->alternative_index > step_index &&
2757
+ step->alternative_index < self->steps.size
2758
+ ) {
2759
+ step_index = step->alternative_index;
2760
+ } else {
2761
+ break;
2762
+ }
2763
+ }
2764
+ }
2765
+
2766
+ // No more suffix modifiers
2767
+ else {
2768
+ break;
2769
+ }
2770
+ }
2771
+
2772
+ capture_quantifiers_mul(capture_quantifiers, quantifier);
2773
+
2774
+ return 0;
2775
+ }
2776
+
2777
+ TSQuery *ts_query_new(
2778
+ const TSLanguage *language,
2779
+ const char *source,
2780
+ uint32_t source_len,
2781
+ uint32_t *error_offset,
2782
+ TSQueryError *error_type
2783
+ ) {
2784
+ if (
2785
+ !language ||
2786
+ language->abi_version > TREE_SITTER_LANGUAGE_VERSION ||
2787
+ language->abi_version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
2788
+ ) {
2789
+ *error_type = TSQueryErrorLanguage;
2790
+ return NULL;
2791
+ }
2792
+
2793
+ TSQuery *self = ts_malloc(sizeof(TSQuery));
2794
+ *self = (TSQuery) {
2795
+ .steps = array_new(),
2796
+ .pattern_map = array_new(),
2797
+ .captures = symbol_table_new(),
2798
+ .capture_quantifiers = array_new(),
2799
+ .predicate_values = symbol_table_new(),
2800
+ .predicate_steps = array_new(),
2801
+ .patterns = array_new(),
2802
+ .step_offsets = array_new(),
2803
+ .string_buffer = array_new(),
2804
+ .negated_fields = array_new(),
2805
+ .repeat_symbols_with_rootless_patterns = array_new(),
2806
+ .wildcard_root_pattern_count = 0,
2807
+ .language = ts_language_copy(language),
2808
+ };
2809
+
2810
+ array_push(&self->negated_fields, 0);
2811
+
2812
+ // Parse all of the S-expressions in the given string.
2813
+ Stream stream = stream_new(source, source_len);
2814
+ stream_skip_whitespace(&stream);
2815
+ while (stream.input < stream.end) {
2816
+ uint32_t pattern_index = self->patterns.size;
2817
+ uint32_t start_step_index = self->steps.size;
2818
+ uint32_t start_predicate_step_index = self->predicate_steps.size;
2819
+ array_push(&self->patterns, ((QueryPattern) {
2820
+ .steps = (Slice) {.offset = start_step_index},
2821
+ .predicate_steps = (Slice) {.offset = start_predicate_step_index},
2822
+ .start_byte = stream_offset(&stream),
2823
+ .is_non_local = false,
2824
+ }));
2825
+ CaptureQuantifiers capture_quantifiers = capture_quantifiers_new();
2826
+ *error_type = ts_query__parse_pattern(self, &stream, 0, false, &capture_quantifiers);
2827
+ array_push(&self->steps, query_step__new(0, PATTERN_DONE_MARKER, false));
2828
+
2829
+ QueryPattern *pattern = array_back(&self->patterns);
2830
+ pattern->steps.length = self->steps.size - start_step_index;
2831
+ pattern->predicate_steps.length = self->predicate_steps.size - start_predicate_step_index;
2832
+ pattern->end_byte = stream_offset(&stream);
2833
+
2834
+ // If any pattern could not be parsed, then report the error information
2835
+ // and terminate.
2836
+ if (*error_type) {
2837
+ if (*error_type == PARENT_DONE) *error_type = TSQueryErrorSyntax;
2838
+ *error_offset = stream_offset(&stream);
2839
+ capture_quantifiers_delete(&capture_quantifiers);
2840
+ ts_query_delete(self);
2841
+ return NULL;
2842
+ }
2843
+
2844
+ // Maintain a list of capture quantifiers for each pattern
2845
+ array_push(&self->capture_quantifiers, capture_quantifiers);
2846
+
2847
+ // Maintain a map that can look up patterns for a given root symbol.
2848
+ uint16_t wildcard_root_alternative_index = NONE;
2849
+ for (;;) {
2850
+ QueryStep *step = array_get(&self->steps, start_step_index);
2851
+
2852
+ // If a pattern has a wildcard at its root, but it has a non-wildcard child,
2853
+ // then optimize the matching process by skipping matching the wildcard.
2854
+ // Later, during the matching process, the query cursor will check that
2855
+ // there is a parent node, and capture it if necessary.
2856
+ if (step->symbol == WILDCARD_SYMBOL && step->depth == 0 && !step->field) {
2857
+ QueryStep *second_step = array_get(&self->steps, start_step_index + 1);
2858
+ if (second_step->symbol != WILDCARD_SYMBOL && second_step->depth == 1 && !second_step->is_immediate) {
2859
+ wildcard_root_alternative_index = step->alternative_index;
2860
+ start_step_index += 1;
2861
+ step = second_step;
2862
+ }
2863
+ }
2864
+
2865
+ // Determine whether the pattern has a single root node. This affects
2866
+ // decisions about whether or not to start matching the pattern when
2867
+ // a query cursor has a range restriction or when immediately within an
2868
+ // error node.
2869
+ uint32_t start_depth = step->depth;
2870
+ bool is_rooted = start_depth == 0;
2871
+ for (uint32_t step_index = start_step_index + 1; step_index < self->steps.size; step_index++) {
2872
+ QueryStep *child_step = array_get(&self->steps, step_index);
2873
+ if (child_step->is_dead_end) break;
2874
+ if (child_step->depth == start_depth) {
2875
+ is_rooted = false;
2876
+ break;
2877
+ }
2878
+ }
2879
+
2880
+ ts_query__pattern_map_insert(self, step->symbol, (PatternEntry) {
2881
+ .step_index = start_step_index,
2882
+ .pattern_index = pattern_index,
2883
+ .is_rooted = is_rooted
2884
+ });
2885
+ if (step->symbol == WILDCARD_SYMBOL) {
2886
+ self->wildcard_root_pattern_count++;
2887
+ }
2888
+
2889
+ // If there are alternatives or options at the root of the pattern,
2890
+ // then add multiple entries to the pattern map.
2891
+ if (step->alternative_index != NONE) {
2892
+ start_step_index = step->alternative_index;
2893
+ } else if (wildcard_root_alternative_index != NONE) {
2894
+ start_step_index = wildcard_root_alternative_index;
2895
+ wildcard_root_alternative_index = NONE;
2896
+ } else {
2897
+ break;
2898
+ }
2899
+ }
2900
+ }
2901
+
2902
+ if (!ts_query__analyze_patterns(self, error_offset)) {
2903
+ *error_type = TSQueryErrorStructure;
2904
+ ts_query_delete(self);
2905
+ return NULL;
2906
+ }
2907
+
2908
+ array_delete(&self->string_buffer);
2909
+ return self;
2910
+ }
2911
+
2912
+ void ts_query_delete(TSQuery *self) {
2913
+ if (self) {
2914
+ array_delete(&self->steps);
2915
+ array_delete(&self->pattern_map);
2916
+ array_delete(&self->predicate_steps);
2917
+ array_delete(&self->patterns);
2918
+ array_delete(&self->step_offsets);
2919
+ array_delete(&self->string_buffer);
2920
+ array_delete(&self->negated_fields);
2921
+ array_delete(&self->repeat_symbols_with_rootless_patterns);
2922
+ ts_language_delete(self->language);
2923
+ symbol_table_delete(&self->captures);
2924
+ symbol_table_delete(&self->predicate_values);
2925
+ for (uint32_t index = 0; index < self->capture_quantifiers.size; index++) {
2926
+ CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, index);
2927
+ capture_quantifiers_delete(capture_quantifiers);
2928
+ }
2929
+ array_delete(&self->capture_quantifiers);
2930
+ ts_free(self);
2931
+ }
2932
+ }
2933
+
2934
+ uint32_t ts_query_pattern_count(const TSQuery *self) {
2935
+ return self->patterns.size;
2936
+ }
2937
+
2938
+ uint32_t ts_query_capture_count(const TSQuery *self) {
2939
+ return self->captures.slices.size;
2940
+ }
2941
+
2942
+ uint32_t ts_query_string_count(const TSQuery *self) {
2943
+ return self->predicate_values.slices.size;
2944
+ }
2945
+
2946
+ const char *ts_query_capture_name_for_id(
2947
+ const TSQuery *self,
2948
+ uint32_t index,
2949
+ uint32_t *length
2950
+ ) {
2951
+ return symbol_table_name_for_id(&self->captures, index, length);
2952
+ }
2953
+
2954
+ TSQuantifier ts_query_capture_quantifier_for_id(
2955
+ const TSQuery *self,
2956
+ uint32_t pattern_index,
2957
+ uint32_t capture_index
2958
+ ) {
2959
+ CaptureQuantifiers *capture_quantifiers = array_get(&self->capture_quantifiers, pattern_index);
2960
+ return capture_quantifier_for_id(capture_quantifiers, capture_index);
2961
+ }
2962
+
2963
+ const char *ts_query_string_value_for_id(
2964
+ const TSQuery *self,
2965
+ uint32_t index,
2966
+ uint32_t *length
2967
+ ) {
2968
+ return symbol_table_name_for_id(&self->predicate_values, index, length);
2969
+ }
2970
+
2971
+ const TSQueryPredicateStep *ts_query_predicates_for_pattern(
2972
+ const TSQuery *self,
2973
+ uint32_t pattern_index,
2974
+ uint32_t *step_count
2975
+ ) {
2976
+ Slice slice = array_get(&self->patterns, pattern_index)->predicate_steps;
2977
+ *step_count = slice.length;
2978
+ if (slice.length == 0) return NULL;
2979
+ return array_get(&self->predicate_steps, slice.offset);
2980
+ }
2981
+
2982
+ uint32_t ts_query_start_byte_for_pattern(
2983
+ const TSQuery *self,
2984
+ uint32_t pattern_index
2985
+ ) {
2986
+ return array_get(&self->patterns, pattern_index)->start_byte;
2987
+ }
2988
+
2989
+ uint32_t ts_query_end_byte_for_pattern(
2990
+ const TSQuery *self,
2991
+ uint32_t pattern_index
2992
+ ) {
2993
+ return array_get(&self->patterns, pattern_index)->end_byte;
2994
+ }
2995
+
2996
+ bool ts_query_is_pattern_rooted(
2997
+ const TSQuery *self,
2998
+ uint32_t pattern_index
2999
+ ) {
3000
+ for (unsigned i = 0; i < self->pattern_map.size; i++) {
3001
+ PatternEntry *entry = array_get(&self->pattern_map, i);
3002
+ if (entry->pattern_index == pattern_index) {
3003
+ if (!entry->is_rooted) return false;
3004
+ }
3005
+ }
3006
+ return true;
3007
+ }
3008
+
3009
+ bool ts_query_is_pattern_non_local(
3010
+ const TSQuery *self,
3011
+ uint32_t pattern_index
3012
+ ) {
3013
+ if (pattern_index < self->patterns.size) {
3014
+ return array_get(&self->patterns, pattern_index)->is_non_local;
3015
+ } else {
3016
+ return false;
3017
+ }
3018
+ }
3019
+
3020
+ bool ts_query_is_pattern_guaranteed_at_step(
3021
+ const TSQuery *self,
3022
+ uint32_t byte_offset
3023
+ ) {
3024
+ uint32_t step_index = UINT32_MAX;
3025
+ for (unsigned i = 0; i < self->step_offsets.size; i++) {
3026
+ StepOffset *step_offset = array_get(&self->step_offsets, i);
3027
+ if (step_offset->byte_offset > byte_offset) break;
3028
+ step_index = step_offset->step_index;
3029
+ }
3030
+ if (step_index < self->steps.size) {
3031
+ return array_get(&self->steps, step_index)->root_pattern_guaranteed;
3032
+ } else {
3033
+ return false;
3034
+ }
3035
+ }
3036
+
3037
+ bool ts_query__step_is_fallible(
3038
+ const TSQuery *self,
3039
+ uint16_t step_index
3040
+ ) {
3041
+ ts_assert((uint32_t)step_index + 1 < self->steps.size);
3042
+ QueryStep *step = array_get(&self->steps, step_index);
3043
+ QueryStep *next_step = array_get(&self->steps, step_index + 1);
3044
+ return (
3045
+ next_step->depth != PATTERN_DONE_MARKER &&
3046
+ next_step->depth > step->depth &&
3047
+ (!next_step->parent_pattern_guaranteed || step->symbol == WILDCARD_SYMBOL)
3048
+ );
3049
+ }
3050
+
3051
+ void ts_query_disable_capture(
3052
+ TSQuery *self,
3053
+ const char *name,
3054
+ uint32_t length
3055
+ ) {
3056
+ // Remove capture information for any pattern step that previously
3057
+ // captured with the given name.
3058
+ int id = symbol_table_id_for_name(&self->captures, name, length);
3059
+ if (id != -1) {
3060
+ for (unsigned i = 0; i < self->steps.size; i++) {
3061
+ QueryStep *step = array_get(&self->steps, i);
3062
+ query_step__remove_capture(step, id);
3063
+ }
3064
+ }
3065
+ }
3066
+
3067
+ void ts_query_disable_pattern(
3068
+ TSQuery *self,
3069
+ uint32_t pattern_index
3070
+ ) {
3071
+ // Remove the given pattern from the pattern map. Its steps will still
3072
+ // be in the `steps` array, but they will never be read.
3073
+ for (unsigned i = 0; i < self->pattern_map.size; i++) {
3074
+ PatternEntry *pattern = array_get(&self->pattern_map, i);
3075
+ if (pattern->pattern_index == pattern_index) {
3076
+ array_erase(&self->pattern_map, i);
3077
+ i--;
3078
+ }
3079
+ }
3080
+ }
3081
+
3082
+ /***************
3083
+ * QueryCursor
3084
+ ***************/
3085
+
3086
+ TSQueryCursor *ts_query_cursor_new(void) {
3087
+ TSQueryCursor *self = ts_malloc(sizeof(TSQueryCursor));
3088
+ *self = (TSQueryCursor) {
3089
+ .did_exceed_match_limit = false,
3090
+ .ascending = false,
3091
+ .halted = false,
3092
+ .states = array_new(),
3093
+ .finished_states = array_new(),
3094
+ .capture_list_pool = capture_list_pool_new(),
3095
+ .start_byte = 0,
3096
+ .end_byte = UINT32_MAX,
3097
+ .start_point = {0, 0},
3098
+ .end_point = POINT_MAX,
3099
+ .max_start_depth = UINT32_MAX,
3100
+ .timeout_duration = 0,
3101
+ .end_clock = clock_null(),
3102
+ .operation_count = 0,
3103
+ };
3104
+ array_reserve(&self->states, 8);
3105
+ array_reserve(&self->finished_states, 8);
3106
+ return self;
3107
+ }
3108
+
3109
+ void ts_query_cursor_delete(TSQueryCursor *self) {
3110
+ array_delete(&self->states);
3111
+ array_delete(&self->finished_states);
3112
+ ts_tree_cursor_delete(&self->cursor);
3113
+ capture_list_pool_delete(&self->capture_list_pool);
3114
+ ts_free(self);
3115
+ }
3116
+
3117
+ bool ts_query_cursor_did_exceed_match_limit(const TSQueryCursor *self) {
3118
+ return self->did_exceed_match_limit;
3119
+ }
3120
+
3121
+ uint32_t ts_query_cursor_match_limit(const TSQueryCursor *self) {
3122
+ return self->capture_list_pool.max_capture_list_count;
3123
+ }
3124
+
3125
+ void ts_query_cursor_set_match_limit(TSQueryCursor *self, uint32_t limit) {
3126
+ self->capture_list_pool.max_capture_list_count = limit;
3127
+ }
3128
+
3129
+ uint64_t ts_query_cursor_timeout_micros(const TSQueryCursor *self) {
3130
+ return duration_to_micros(self->timeout_duration);
3131
+ }
3132
+
3133
+ void ts_query_cursor_set_timeout_micros(TSQueryCursor *self, uint64_t timeout_micros) {
3134
+ self->timeout_duration = duration_from_micros(timeout_micros);
3135
+ }
3136
+
3137
+ #ifdef DEBUG_EXECUTE_QUERY
3138
+ #define LOG(...) fprintf(stderr, __VA_ARGS__)
3139
+ #else
3140
+ #define LOG(...)
3141
+ #endif
3142
+
3143
+ void ts_query_cursor_exec(
3144
+ TSQueryCursor *self,
3145
+ const TSQuery *query,
3146
+ TSNode node
3147
+ ) {
3148
+ if (query) {
3149
+ LOG("query steps:\n");
3150
+ for (unsigned i = 0; i < query->steps.size; i++) {
3151
+ QueryStep *step = array_get(&query->steps, i);
3152
+ LOG(" %u: {", i);
3153
+ if (step->depth == PATTERN_DONE_MARKER) {
3154
+ LOG("DONE");
3155
+ } else if (step->is_dead_end) {
3156
+ LOG("dead_end");
3157
+ } else if (step->is_pass_through) {
3158
+ LOG("pass_through");
3159
+ } else if (step->symbol != WILDCARD_SYMBOL) {
3160
+ LOG("symbol: %s", query->language->symbol_names[step->symbol]);
3161
+ } else {
3162
+ LOG("symbol: *");
3163
+ }
3164
+ if (step->field) {
3165
+ LOG(", field: %s", query->language->field_names[step->field]);
3166
+ }
3167
+ if (step->alternative_index != NONE) {
3168
+ LOG(", alternative: %u", step->alternative_index);
3169
+ }
3170
+ LOG("},\n");
3171
+ }
3172
+ }
3173
+
3174
+ array_clear(&self->states);
3175
+ array_clear(&self->finished_states);
3176
+ ts_tree_cursor_reset(&self->cursor, node);
3177
+ capture_list_pool_reset(&self->capture_list_pool);
3178
+ self->on_visible_node = true;
3179
+ self->next_state_id = 0;
3180
+ self->depth = 0;
3181
+ self->ascending = false;
3182
+ self->halted = false;
3183
+ self->query = query;
3184
+ self->did_exceed_match_limit = false;
3185
+ self->operation_count = 0;
3186
+ if (self->timeout_duration) {
3187
+ self->end_clock = clock_after(clock_now(), self->timeout_duration);
3188
+ } else {
3189
+ self->end_clock = clock_null();
3190
+ }
3191
+ self->query_options = NULL;
3192
+ self->query_state = (TSQueryCursorState) {0};
3193
+ }
3194
+
3195
+ void ts_query_cursor_exec_with_options(
3196
+ TSQueryCursor *self,
3197
+ const TSQuery *query,
3198
+ TSNode node,
3199
+ const TSQueryCursorOptions *query_options
3200
+ ) {
3201
+ ts_query_cursor_exec(self, query, node);
3202
+ if (query_options) {
3203
+ self->query_options = query_options;
3204
+ self->query_state = (TSQueryCursorState) {
3205
+ .payload = query_options->payload
3206
+ };
3207
+ }
3208
+ }
3209
+
3210
+ bool ts_query_cursor_set_byte_range(
3211
+ TSQueryCursor *self,
3212
+ uint32_t start_byte,
3213
+ uint32_t end_byte
3214
+ ) {
3215
+ if (end_byte == 0) {
3216
+ end_byte = UINT32_MAX;
3217
+ }
3218
+ if (start_byte > end_byte) {
3219
+ return false;
3220
+ }
3221
+ self->start_byte = start_byte;
3222
+ self->end_byte = end_byte;
3223
+ return true;
3224
+ }
3225
+
3226
+ bool ts_query_cursor_set_point_range(
3227
+ TSQueryCursor *self,
3228
+ TSPoint start_point,
3229
+ TSPoint end_point
3230
+ ) {
3231
+ if (end_point.row == 0 && end_point.column == 0) {
3232
+ end_point = POINT_MAX;
3233
+ }
3234
+ if (point_gt(start_point, end_point)) {
3235
+ return false;
3236
+ }
3237
+ self->start_point = start_point;
3238
+ self->end_point = end_point;
3239
+ return true;
3240
+ }
3241
+
3242
+ // Search through all of the in-progress states, and find the captured
3243
+ // node that occurs earliest in the document.
3244
+ static bool ts_query_cursor__first_in_progress_capture(
3245
+ TSQueryCursor *self,
3246
+ uint32_t *state_index,
3247
+ uint32_t *byte_offset,
3248
+ uint32_t *pattern_index,
3249
+ bool *is_definite
3250
+ ) {
3251
+ bool result = false;
3252
+ *state_index = UINT32_MAX;
3253
+ *byte_offset = UINT32_MAX;
3254
+ *pattern_index = UINT32_MAX;
3255
+ for (unsigned i = 0; i < self->states.size; i++) {
3256
+ QueryState *state = array_get(&self->states, i);
3257
+ if (state->dead) continue;
3258
+
3259
+ const CaptureList *captures = capture_list_pool_get(
3260
+ &self->capture_list_pool,
3261
+ state->capture_list_id
3262
+ );
3263
+ if (state->consumed_capture_count >= captures->size) {
3264
+ continue;
3265
+ }
3266
+
3267
+ TSNode node = array_get(captures, state->consumed_capture_count)->node;
3268
+ if (
3269
+ ts_node_end_byte(node) <= self->start_byte ||
3270
+ point_lte(ts_node_end_point(node), self->start_point)
3271
+ ) {
3272
+ state->consumed_capture_count++;
3273
+ i--;
3274
+ continue;
3275
+ }
3276
+
3277
+ uint32_t node_start_byte = ts_node_start_byte(node);
3278
+ if (
3279
+ !result ||
3280
+ node_start_byte < *byte_offset ||
3281
+ (node_start_byte == *byte_offset && state->pattern_index < *pattern_index)
3282
+ ) {
3283
+ QueryStep *step = array_get(&self->query->steps, state->step_index);
3284
+ if (is_definite) {
3285
+ // We're being a bit conservative here by asserting that the following step
3286
+ // is not immediate, because this capture might end up being discarded if the
3287
+ // following symbol in the tree isn't the required symbol for this step.
3288
+ *is_definite = step->root_pattern_guaranteed && !step->is_immediate;
3289
+ } else if (step->root_pattern_guaranteed) {
3290
+ continue;
3291
+ }
3292
+
3293
+ result = true;
3294
+ *state_index = i;
3295
+ *byte_offset = node_start_byte;
3296
+ *pattern_index = state->pattern_index;
3297
+ }
3298
+ }
3299
+ return result;
3300
+ }
3301
+
3302
+ // Determine which node is first in a depth-first traversal
3303
+ int ts_query_cursor__compare_nodes(TSNode left, TSNode right) {
3304
+ if (left.id != right.id) {
3305
+ uint32_t left_start = ts_node_start_byte(left);
3306
+ uint32_t right_start = ts_node_start_byte(right);
3307
+ if (left_start < right_start) return -1;
3308
+ if (left_start > right_start) return 1;
3309
+ uint32_t left_node_count = ts_node_end_byte(left);
3310
+ uint32_t right_node_count = ts_node_end_byte(right);
3311
+ if (left_node_count > right_node_count) return -1;
3312
+ if (left_node_count < right_node_count) return 1;
3313
+ }
3314
+ return 0;
3315
+ }
3316
+
3317
+ // Determine if either state contains a superset of the other state's captures.
3318
+ void ts_query_cursor__compare_captures(
3319
+ TSQueryCursor *self,
3320
+ QueryState *left_state,
3321
+ QueryState *right_state,
3322
+ bool *left_contains_right,
3323
+ bool *right_contains_left
3324
+ ) {
3325
+ const CaptureList *left_captures = capture_list_pool_get(
3326
+ &self->capture_list_pool,
3327
+ left_state->capture_list_id
3328
+ );
3329
+ const CaptureList *right_captures = capture_list_pool_get(
3330
+ &self->capture_list_pool,
3331
+ right_state->capture_list_id
3332
+ );
3333
+ *left_contains_right = true;
3334
+ *right_contains_left = true;
3335
+ unsigned i = 0, j = 0;
3336
+ for (;;) {
3337
+ if (i < left_captures->size) {
3338
+ if (j < right_captures->size) {
3339
+ TSQueryCapture *left = array_get(left_captures, i);
3340
+ TSQueryCapture *right = array_get(right_captures, j);
3341
+ if (left->node.id == right->node.id && left->index == right->index) {
3342
+ i++;
3343
+ j++;
3344
+ } else {
3345
+ switch (ts_query_cursor__compare_nodes(left->node, right->node)) {
3346
+ case -1:
3347
+ *right_contains_left = false;
3348
+ i++;
3349
+ break;
3350
+ case 1:
3351
+ *left_contains_right = false;
3352
+ j++;
3353
+ break;
3354
+ default:
3355
+ *right_contains_left = false;
3356
+ *left_contains_right = false;
3357
+ i++;
3358
+ j++;
3359
+ break;
3360
+ }
3361
+ }
3362
+ } else {
3363
+ *right_contains_left = false;
3364
+ break;
3365
+ }
3366
+ } else {
3367
+ if (j < right_captures->size) {
3368
+ *left_contains_right = false;
3369
+ }
3370
+ break;
3371
+ }
3372
+ }
3373
+ }
3374
+
3375
+ static void ts_query_cursor__add_state(
3376
+ TSQueryCursor *self,
3377
+ const PatternEntry *pattern
3378
+ ) {
3379
+ QueryStep *step = array_get(&self->query->steps, pattern->step_index);
3380
+ uint32_t start_depth = self->depth - step->depth;
3381
+
3382
+ // Keep the states array in ascending order of start_depth and pattern_index,
3383
+ // so that it can be processed more efficiently elsewhere. Usually, there is
3384
+ // no work to do here because of two facts:
3385
+ // * States with lower start_depth are naturally added first due to the
3386
+ // order in which nodes are visited.
3387
+ // * Earlier patterns are naturally added first because of the ordering of the
3388
+ // pattern_map data structure that's used to initiate matches.
3389
+ //
3390
+ // This loop is only needed in cases where two conditions hold:
3391
+ // * A pattern consists of more than one sibling node, so that its states
3392
+ // remain in progress after exiting the node that started the match.
3393
+ // * The first node in the pattern matches against multiple nodes at the
3394
+ // same depth.
3395
+ //
3396
+ // An example of this is the pattern '((comment)* (function))'. If multiple
3397
+ // `comment` nodes appear in a row, then we may initiate a new state for this
3398
+ // pattern while another state for the same pattern is already in progress.
3399
+ // If there are multiple patterns like this in a query, then this loop will
3400
+ // need to execute in order to keep the states ordered by pattern_index.
3401
+ uint32_t index = self->states.size;
3402
+ while (index > 0) {
3403
+ QueryState *prev_state = array_get(&self->states, index - 1);
3404
+ if (prev_state->start_depth < start_depth) break;
3405
+ if (prev_state->start_depth == start_depth) {
3406
+ // Avoid inserting an unnecessary duplicate state, which would be
3407
+ // immediately pruned by the longest-match criteria.
3408
+ if (
3409
+ prev_state->pattern_index == pattern->pattern_index &&
3410
+ prev_state->step_index == pattern->step_index
3411
+ ) return;
3412
+ if (prev_state->pattern_index <= pattern->pattern_index) break;
3413
+ }
3414
+ index--;
3415
+ }
3416
+
3417
+ LOG(
3418
+ " start state. pattern:%u, step:%u\n",
3419
+ pattern->pattern_index,
3420
+ pattern->step_index
3421
+ );
3422
+ array_insert(&self->states, index, ((QueryState) {
3423
+ .id = UINT32_MAX,
3424
+ .capture_list_id = NONE,
3425
+ .step_index = pattern->step_index,
3426
+ .pattern_index = pattern->pattern_index,
3427
+ .start_depth = start_depth,
3428
+ .consumed_capture_count = 0,
3429
+ .seeking_immediate_match = true,
3430
+ .has_in_progress_alternatives = false,
3431
+ .needs_parent = step->depth == 1,
3432
+ .dead = false,
3433
+ }));
3434
+ }
3435
+
3436
+ // Acquire a capture list for this state. If there are no capture lists left in the
3437
+ // pool, this will steal the capture list from another existing state, and mark that
3438
+ // other state as 'dead'.
3439
+ static CaptureList *ts_query_cursor__prepare_to_capture(
3440
+ TSQueryCursor *self,
3441
+ QueryState *state,
3442
+ unsigned state_index_to_preserve
3443
+ ) {
3444
+ if (state->capture_list_id == NONE) {
3445
+ state->capture_list_id = capture_list_pool_acquire(&self->capture_list_pool);
3446
+
3447
+ // If there are no capture lists left in the pool, then terminate whichever
3448
+ // state has captured the earliest node in the document, and steal its
3449
+ // capture list.
3450
+ if (state->capture_list_id == NONE) {
3451
+ self->did_exceed_match_limit = true;
3452
+ uint32_t state_index, byte_offset, pattern_index;
3453
+ if (
3454
+ ts_query_cursor__first_in_progress_capture(
3455
+ self,
3456
+ &state_index,
3457
+ &byte_offset,
3458
+ &pattern_index,
3459
+ NULL
3460
+ ) &&
3461
+ state_index != state_index_to_preserve
3462
+ ) {
3463
+ LOG(
3464
+ " abandon state. index:%u, pattern:%u, offset:%u.\n",
3465
+ state_index, pattern_index, byte_offset
3466
+ );
3467
+ QueryState *other_state = array_get(&self->states, state_index);
3468
+ state->capture_list_id = other_state->capture_list_id;
3469
+ other_state->capture_list_id = NONE;
3470
+ other_state->dead = true;
3471
+ CaptureList *list = capture_list_pool_get_mut(
3472
+ &self->capture_list_pool,
3473
+ state->capture_list_id
3474
+ );
3475
+ array_clear(list);
3476
+ return list;
3477
+ } else {
3478
+ LOG(" ran out of capture lists");
3479
+ return NULL;
3480
+ }
3481
+ }
3482
+ }
3483
+ return capture_list_pool_get_mut(&self->capture_list_pool, state->capture_list_id);
3484
+ }
3485
+
3486
+ static void ts_query_cursor__capture(
3487
+ TSQueryCursor *self,
3488
+ QueryState *state,
3489
+ QueryStep *step,
3490
+ TSNode node
3491
+ ) {
3492
+ if (state->dead) return;
3493
+ CaptureList *capture_list = ts_query_cursor__prepare_to_capture(self, state, UINT32_MAX);
3494
+ if (!capture_list) {
3495
+ state->dead = true;
3496
+ return;
3497
+ }
3498
+
3499
+ for (unsigned j = 0; j < MAX_STEP_CAPTURE_COUNT; j++) {
3500
+ uint16_t capture_id = step->capture_ids[j];
3501
+ if (step->capture_ids[j] == NONE) break;
3502
+ array_push(capture_list, ((TSQueryCapture) { node, capture_id }));
3503
+ LOG(
3504
+ " capture node. type:%s, pattern:%u, capture_id:%u, capture_count:%u\n",
3505
+ ts_node_type(node),
3506
+ state->pattern_index,
3507
+ capture_id,
3508
+ capture_list->size
3509
+ );
3510
+ }
3511
+ }
3512
+
3513
+ // Duplicate the given state and insert the newly-created state immediately after
3514
+ // the given state in the `states` array. Ensures that the given state reference is
3515
+ // still valid, even if the states array is reallocated.
3516
+ static QueryState *ts_query_cursor__copy_state(
3517
+ TSQueryCursor *self,
3518
+ QueryState **state_ref
3519
+ ) {
3520
+ const QueryState *state = *state_ref;
3521
+ uint32_t state_index = (uint32_t)(state - self->states.contents);
3522
+ QueryState copy = *state;
3523
+ copy.capture_list_id = NONE;
3524
+
3525
+ // If the state has captures, copy its capture list.
3526
+ if (state->capture_list_id != NONE) {
3527
+ CaptureList *new_captures = ts_query_cursor__prepare_to_capture(self, &copy, state_index);
3528
+ if (!new_captures) return NULL;
3529
+ const CaptureList *old_captures = capture_list_pool_get(
3530
+ &self->capture_list_pool,
3531
+ state->capture_list_id
3532
+ );
3533
+ array_push_all(new_captures, old_captures);
3534
+ }
3535
+
3536
+ array_insert(&self->states, state_index + 1, copy);
3537
+ *state_ref = array_get(&self->states, state_index);
3538
+ return array_get(&self->states, state_index + 1);
3539
+ }
3540
+
3541
+ static inline bool ts_query_cursor__should_descend(
3542
+ TSQueryCursor *self,
3543
+ bool node_intersects_range
3544
+ ) {
3545
+
3546
+ if (node_intersects_range && self->depth < self->max_start_depth) {
3547
+ return true;
3548
+ }
3549
+
3550
+ // If there are in-progress matches whose remaining steps occur
3551
+ // deeper in the tree, then descend.
3552
+ for (unsigned i = 0; i < self->states.size; i++) {
3553
+ QueryState *state = array_get(&self->states, i);
3554
+ QueryStep *next_step = array_get(&self->query->steps, state->step_index);
3555
+ if (
3556
+ next_step->depth != PATTERN_DONE_MARKER &&
3557
+ state->start_depth + next_step->depth > self->depth
3558
+ ) {
3559
+ return true;
3560
+ }
3561
+ }
3562
+
3563
+ if (self->depth >= self->max_start_depth) {
3564
+ return false;
3565
+ }
3566
+
3567
+ // If the current node is hidden, then a non-rooted pattern might match
3568
+ // one if its roots inside of this node, and match another of its roots
3569
+ // as part of a sibling node, so we may need to descend.
3570
+ if (!self->on_visible_node) {
3571
+ // Descending into a repetition node outside of the range can be
3572
+ // expensive, because these nodes can have many visible children.
3573
+ // Avoid descending into repetition nodes unless we have already
3574
+ // determined that this query can match rootless patterns inside
3575
+ // of this type of repetition node.
3576
+ Subtree subtree = ts_tree_cursor_current_subtree(&self->cursor);
3577
+ if (ts_subtree_is_repetition(subtree)) {
3578
+ bool exists;
3579
+ uint32_t index;
3580
+ array_search_sorted_by(
3581
+ &self->query->repeat_symbols_with_rootless_patterns,,
3582
+ ts_subtree_symbol(subtree),
3583
+ &index,
3584
+ &exists
3585
+ );
3586
+ return exists;
3587
+ }
3588
+
3589
+ return true;
3590
+ }
3591
+
3592
+ return false;
3593
+ }
3594
+
3595
+ // Walk the tree, processing patterns until at least one pattern finishes,
3596
+ // If one or more patterns finish, return `true` and store their states in the
3597
+ // `finished_states` array. Multiple patterns can finish on the same node. If
3598
+ // there are no more matches, return `false`.
3599
+ static inline bool ts_query_cursor__advance(
3600
+ TSQueryCursor *self,
3601
+ bool stop_on_definite_step
3602
+ ) {
3603
+ bool did_match = false;
3604
+ for (;;) {
3605
+ if (self->halted) {
3606
+ while (self->states.size > 0) {
3607
+ QueryState state = array_pop(&self->states);
3608
+ capture_list_pool_release(
3609
+ &self->capture_list_pool,
3610
+ state.capture_list_id
3611
+ );
3612
+ }
3613
+ }
3614
+
3615
+ if (++self->operation_count == OP_COUNT_PER_QUERY_TIMEOUT_CHECK) {
3616
+ self->operation_count = 0;
3617
+ }
3618
+
3619
+ if (self->query_options && self->query_options->progress_callback) {
3620
+ self->query_state.current_byte_offset = ts_node_start_byte(ts_tree_cursor_current_node(&self->cursor));
3621
+ }
3622
+ if (
3623
+ did_match ||
3624
+ self->halted ||
3625
+ (
3626
+ self->operation_count == 0 &&
3627
+ (
3628
+ (!clock_is_null(self->end_clock) && clock_is_gt(clock_now(), self->end_clock)) ||
3629
+ (self->query_options && self->query_options->progress_callback && self->query_options->progress_callback(&self->query_state))
3630
+ )
3631
+ )
3632
+ ) {
3633
+ return did_match;
3634
+ }
3635
+
3636
+ // Exit the current node.
3637
+ if (self->ascending) {
3638
+ if (self->on_visible_node) {
3639
+ LOG(
3640
+ "leave node. depth:%u, type:%s\n",
3641
+ self->depth,
3642
+ ts_node_type(ts_tree_cursor_current_node(&self->cursor))
3643
+ );
3644
+
3645
+ // After leaving a node, remove any states that cannot make further progress.
3646
+ uint32_t deleted_count = 0;
3647
+ for (unsigned i = 0, n = self->states.size; i < n; i++) {
3648
+ QueryState *state = array_get(&self->states, i);
3649
+ QueryStep *step = array_get(&self->query->steps, state->step_index);
3650
+
3651
+ // If a state completed its pattern inside of this node, but was deferred from finishing
3652
+ // in order to search for longer matches, mark it as finished.
3653
+ if (
3654
+ step->depth == PATTERN_DONE_MARKER &&
3655
+ (state->start_depth > self->depth || self->depth == 0)
3656
+ ) {
3657
+ LOG(" finish pattern %u\n", state->pattern_index);
3658
+ array_push(&self->finished_states, *state);
3659
+ did_match = true;
3660
+ deleted_count++;
3661
+ }
3662
+
3663
+ // If a state needed to match something within this node, then remove that state
3664
+ // as it has failed to match.
3665
+ else if (
3666
+ step->depth != PATTERN_DONE_MARKER &&
3667
+ (uint32_t)state->start_depth + (uint32_t)step->depth > self->depth
3668
+ ) {
3669
+ LOG(
3670
+ " failed to match. pattern:%u, step:%u\n",
3671
+ state->pattern_index,
3672
+ state->step_index
3673
+ );
3674
+ capture_list_pool_release(
3675
+ &self->capture_list_pool,
3676
+ state->capture_list_id
3677
+ );
3678
+ deleted_count++;
3679
+ }
3680
+
3681
+ else if (deleted_count > 0) {
3682
+ *array_get(&self->states, i - deleted_count) = *state;
3683
+ }
3684
+ }
3685
+ self->states.size -= deleted_count;
3686
+ }
3687
+
3688
+ // Leave this node by stepping to its next sibling or to its parent.
3689
+ switch (ts_tree_cursor_goto_next_sibling_internal(&self->cursor)) {
3690
+ case TreeCursorStepVisible:
3691
+ if (!self->on_visible_node) {
3692
+ self->depth++;
3693
+ self->on_visible_node = true;
3694
+ }
3695
+ self->ascending = false;
3696
+ break;
3697
+ case TreeCursorStepHidden:
3698
+ if (self->on_visible_node) {
3699
+ self->depth--;
3700
+ self->on_visible_node = false;
3701
+ }
3702
+ self->ascending = false;
3703
+ break;
3704
+ default:
3705
+ if (ts_tree_cursor_goto_parent(&self->cursor)) {
3706
+ self->depth--;
3707
+ } else {
3708
+ LOG("halt at root\n");
3709
+ self->halted = true;
3710
+ }
3711
+ }
3712
+ }
3713
+
3714
+ // Enter a new node.
3715
+ else {
3716
+ // Get the properties of the current node.
3717
+ TSNode node = ts_tree_cursor_current_node(&self->cursor);
3718
+ TSNode parent_node = ts_tree_cursor_parent_node(&self->cursor);
3719
+
3720
+ uint32_t start_byte = ts_node_start_byte(node);
3721
+ uint32_t end_byte = ts_node_end_byte(node);
3722
+ TSPoint start_point = ts_node_start_point(node);
3723
+ TSPoint end_point = ts_node_end_point(node);
3724
+ bool is_empty = start_byte == end_byte;
3725
+
3726
+ bool parent_precedes_range = !ts_node_is_null(parent_node) && (
3727
+ ts_node_end_byte(parent_node) <= self->start_byte ||
3728
+ point_lte(ts_node_end_point(parent_node), self->start_point)
3729
+ );
3730
+ bool parent_follows_range = !ts_node_is_null(parent_node) && (
3731
+ ts_node_start_byte(parent_node) >= self->end_byte ||
3732
+ point_gte(ts_node_start_point(parent_node), self->end_point)
3733
+ );
3734
+ bool node_precedes_range =
3735
+ parent_precedes_range ||
3736
+ end_byte < self->start_byte ||
3737
+ point_lt(end_point, self->start_point) ||
3738
+ (!is_empty && end_byte == self->start_byte) ||
3739
+ (!is_empty && point_eq(end_point, self->start_point));
3740
+
3741
+ bool node_follows_range = parent_follows_range || (
3742
+ start_byte >= self->end_byte ||
3743
+ point_gte(start_point, self->end_point)
3744
+ );
3745
+ bool parent_intersects_range = !parent_precedes_range && !parent_follows_range;
3746
+ bool node_intersects_range = !node_precedes_range && !node_follows_range;
3747
+
3748
+ if (self->on_visible_node) {
3749
+ TSSymbol symbol = ts_node_symbol(node);
3750
+ bool is_named = ts_node_is_named(node);
3751
+ bool is_missing = ts_node_is_missing(node);
3752
+ bool has_later_siblings;
3753
+ bool has_later_named_siblings;
3754
+ bool can_have_later_siblings_with_this_field;
3755
+ TSFieldId field_id = 0;
3756
+ TSSymbol supertypes[8] = {0};
3757
+ unsigned supertype_count = 8;
3758
+ ts_tree_cursor_current_status(
3759
+ &self->cursor,
3760
+ &field_id,
3761
+ &has_later_siblings,
3762
+ &has_later_named_siblings,
3763
+ &can_have_later_siblings_with_this_field,
3764
+ supertypes,
3765
+ &supertype_count
3766
+ );
3767
+ LOG(
3768
+ "enter node. depth:%u, type:%s, field:%s, row:%u state_count:%u, finished_state_count:%u\n",
3769
+ self->depth,
3770
+ ts_node_type(node),
3771
+ ts_language_field_name_for_id(self->query->language, field_id),
3772
+ ts_node_start_point(node).row,
3773
+ self->states.size,
3774
+ self->finished_states.size
3775
+ );
3776
+
3777
+ bool node_is_error = symbol == ts_builtin_sym_error;
3778
+ bool parent_is_error =
3779
+ !ts_node_is_null(parent_node) &&
3780
+ ts_node_symbol(parent_node) == ts_builtin_sym_error;
3781
+
3782
+ // Add new states for any patterns whose root node is a wildcard.
3783
+ if (!node_is_error) {
3784
+ for (unsigned i = 0; i < self->query->wildcard_root_pattern_count; i++) {
3785
+ PatternEntry *pattern = array_get(&self->query->pattern_map, i);
3786
+
3787
+ // If this node matches the first step of the pattern, then add a new
3788
+ // state at the start of this pattern.
3789
+ QueryStep *step = array_get(&self->query->steps, pattern->step_index);
3790
+ uint32_t start_depth = self->depth - step->depth;
3791
+ if (
3792
+ (pattern->is_rooted ?
3793
+ node_intersects_range :
3794
+ (parent_intersects_range && !parent_is_error)) &&
3795
+ (!step->field || field_id == step->field) &&
3796
+ (!step->supertype_symbol || supertype_count > 0) &&
3797
+ (start_depth <= self->max_start_depth)
3798
+ ) {
3799
+ ts_query_cursor__add_state(self, pattern);
3800
+ }
3801
+ }
3802
+ }
3803
+
3804
+ // Add new states for any patterns whose root node matches this node.
3805
+ unsigned i;
3806
+ if (ts_query__pattern_map_search(self->query, symbol, &i)) {
3807
+ PatternEntry *pattern = array_get(&self->query->pattern_map, i);
3808
+
3809
+ QueryStep *step = array_get(&self->query->steps, pattern->step_index);
3810
+ uint32_t start_depth = self->depth - step->depth;
3811
+ do {
3812
+ // If this node matches the first step of the pattern, then add a new
3813
+ // state at the start of this pattern.
3814
+ if (
3815
+ (pattern->is_rooted ?
3816
+ node_intersects_range :
3817
+ (parent_intersects_range && !parent_is_error)) &&
3818
+ (!step->field || field_id == step->field) &&
3819
+ (start_depth <= self->max_start_depth)
3820
+ ) {
3821
+ ts_query_cursor__add_state(self, pattern);
3822
+ }
3823
+
3824
+ // Advance to the next pattern whose root node matches this node.
3825
+ i++;
3826
+ if (i == self->query->pattern_map.size) break;
3827
+ pattern = array_get(&self->query->pattern_map, i);
3828
+ step = array_get(&self->query->steps, pattern->step_index);
3829
+ } while (step->symbol == symbol);
3830
+ }
3831
+
3832
+ // Update all of the in-progress states with current node.
3833
+ for (unsigned j = 0, copy_count = 0; j < self->states.size; j += 1 + copy_count) {
3834
+ QueryState *state = array_get(&self->states, j);
3835
+ QueryStep *step = array_get(&self->query->steps, state->step_index);
3836
+ state->has_in_progress_alternatives = false;
3837
+ copy_count = 0;
3838
+
3839
+ // Check that the node matches all of the criteria for the next
3840
+ // step of the pattern.
3841
+ if ((uint32_t)state->start_depth + (uint32_t)step->depth != self->depth) continue;
3842
+
3843
+ // Determine if this node matches this step of the pattern, and also
3844
+ // if this node can have later siblings that match this step of the
3845
+ // pattern.
3846
+ bool node_does_match = false;
3847
+ if (step->symbol == WILDCARD_SYMBOL) {
3848
+ if (step->is_missing) {
3849
+ node_does_match = is_missing;
3850
+ } else {
3851
+ node_does_match = !node_is_error && (is_named || !step->is_named);
3852
+ }
3853
+ } else {
3854
+ node_does_match = symbol == step->symbol && (!step->is_missing || is_missing);
3855
+ }
3856
+ bool later_sibling_can_match = has_later_siblings;
3857
+ if ((step->is_immediate && is_named) || state->seeking_immediate_match) {
3858
+ later_sibling_can_match = false;
3859
+ }
3860
+ if (step->is_last_child && has_later_named_siblings) {
3861
+ node_does_match = false;
3862
+ }
3863
+ if (step->supertype_symbol) {
3864
+ bool has_supertype = false;
3865
+ for (unsigned k = 0; k < supertype_count; k++) {
3866
+ if (supertypes[k] == step->supertype_symbol) {
3867
+ has_supertype = true;
3868
+ break;
3869
+ }
3870
+ }
3871
+ if (!has_supertype) node_does_match = false;
3872
+ }
3873
+ if (step->field) {
3874
+ if (step->field == field_id) {
3875
+ if (!can_have_later_siblings_with_this_field) {
3876
+ later_sibling_can_match = false;
3877
+ }
3878
+ } else {
3879
+ node_does_match = false;
3880
+ }
3881
+ }
3882
+
3883
+ if (step->negated_field_list_id) {
3884
+ TSFieldId *negated_field_ids = array_get(&self->query->negated_fields, step->negated_field_list_id);
3885
+ for (;;) {
3886
+ TSFieldId negated_field_id = *negated_field_ids;
3887
+ if (negated_field_id) {
3888
+ negated_field_ids++;
3889
+ if (ts_node_child_by_field_id(node, negated_field_id).id) {
3890
+ node_does_match = false;
3891
+ break;
3892
+ }
3893
+ } else {
3894
+ break;
3895
+ }
3896
+ }
3897
+ }
3898
+
3899
+ // Remove states immediately if it is ever clear that they cannot match.
3900
+ if (!node_does_match) {
3901
+ if (!later_sibling_can_match) {
3902
+ LOG(
3903
+ " discard state. pattern:%u, step:%u\n",
3904
+ state->pattern_index,
3905
+ state->step_index
3906
+ );
3907
+ capture_list_pool_release(
3908
+ &self->capture_list_pool,
3909
+ state->capture_list_id
3910
+ );
3911
+ array_erase(&self->states, j);
3912
+ j--;
3913
+ }
3914
+ continue;
3915
+ }
3916
+
3917
+ // Some patterns can match their root node in multiple ways, capturing different
3918
+ // children. If this pattern step could match later children within the same
3919
+ // parent, then this query state cannot simply be updated in place. It must be
3920
+ // split into two states: one that matches this node, and one which skips over
3921
+ // this node, to preserve the possibility of matching later siblings.
3922
+ if (later_sibling_can_match && (
3923
+ step->contains_captures ||
3924
+ ts_query__step_is_fallible(self->query, state->step_index)
3925
+ )) {
3926
+ if (ts_query_cursor__copy_state(self, &state)) {
3927
+ LOG(
3928
+ " split state for capture. pattern:%u, step:%u\n",
3929
+ state->pattern_index,
3930
+ state->step_index
3931
+ );
3932
+ copy_count++;
3933
+ }
3934
+ }
3935
+
3936
+ // If this pattern started with a wildcard, such that the pattern map
3937
+ // actually points to the *second* step of the pattern, then check
3938
+ // that the node has a parent, and capture the parent node if necessary.
3939
+ if (state->needs_parent) {
3940
+ TSNode parent = ts_tree_cursor_parent_node(&self->cursor);
3941
+ if (ts_node_is_null(parent)) {
3942
+ LOG(" missing parent node\n");
3943
+ state->dead = true;
3944
+ } else {
3945
+ state->needs_parent = false;
3946
+ QueryStep *skipped_wildcard_step = step;
3947
+ do {
3948
+ skipped_wildcard_step--;
3949
+ } while (
3950
+ skipped_wildcard_step->is_dead_end ||
3951
+ skipped_wildcard_step->is_pass_through ||
3952
+ skipped_wildcard_step->depth > 0
3953
+ );
3954
+ if (skipped_wildcard_step->capture_ids[0] != NONE) {
3955
+ LOG(" capture wildcard parent\n");
3956
+ ts_query_cursor__capture(
3957
+ self,
3958
+ state,
3959
+ skipped_wildcard_step,
3960
+ parent
3961
+ );
3962
+ }
3963
+ }
3964
+ }
3965
+
3966
+ // If the current node is captured in this pattern, add it to the capture list.
3967
+ if (step->capture_ids[0] != NONE) {
3968
+ ts_query_cursor__capture(self, state, step, node);
3969
+ }
3970
+
3971
+ if (state->dead) {
3972
+ array_erase(&self->states, j);
3973
+ j--;
3974
+ continue;
3975
+ }
3976
+
3977
+ // Advance this state to the next step of its pattern.
3978
+ state->step_index++;
3979
+ LOG(
3980
+ " advance state. pattern:%u, step:%u\n",
3981
+ state->pattern_index,
3982
+ state->step_index
3983
+ );
3984
+
3985
+ QueryStep *next_step = array_get(&self->query->steps, state->step_index);
3986
+
3987
+ // For a given step, if the current symbol is the wildcard symbol, `_`, and it is **not**
3988
+ // named, meaning it should capture anonymous nodes, **and** the next step is immediate,
3989
+ // we reuse the `seeking_immediate_match` flag to indicate that we are looking for an
3990
+ // immediate match due to an unnamed wildcard symbol.
3991
+ //
3992
+ // The reason for this is that typically, anchors will not consider anonymous nodes,
3993
+ // but we're special casing the wildcard symbol to allow for any immediate matches,
3994
+ // regardless of whether they are named or not.
3995
+ if (step->symbol == WILDCARD_SYMBOL && !step->is_named && next_step->is_immediate) {
3996
+ state->seeking_immediate_match = true;
3997
+ } else {
3998
+ state->seeking_immediate_match = false;
3999
+ }
4000
+
4001
+ if (stop_on_definite_step && next_step->root_pattern_guaranteed) did_match = true;
4002
+
4003
+ // If this state's next step has an alternative step, then copy the state in order
4004
+ // to pursue both alternatives. The alternative step itself may have an alternative,
4005
+ // so this is an interactive process.
4006
+ unsigned end_index = j + 1;
4007
+ for (unsigned k = j; k < end_index; k++) {
4008
+ QueryState *child_state = array_get(&self->states, k);
4009
+ QueryStep *child_step = array_get(&self->query->steps, child_state->step_index);
4010
+ if (child_step->alternative_index != NONE) {
4011
+ // A "dead-end" step exists only to add a non-sequential jump into the step sequence,
4012
+ // via its alternative index. When a state reaches a dead-end step, it jumps straight
4013
+ // to the step's alternative.
4014
+ if (child_step->is_dead_end) {
4015
+ child_state->step_index = child_step->alternative_index;
4016
+ k--;
4017
+ continue;
4018
+ }
4019
+
4020
+ // A "pass-through" step exists only to add a branch into the step sequence,
4021
+ // via its alternative_index. When a state reaches a pass-through step, it splits
4022
+ // in order to process the alternative step, and then it advances to the next step.
4023
+ if (child_step->is_pass_through) {
4024
+ child_state->step_index++;
4025
+ k--;
4026
+ }
4027
+
4028
+ QueryState *copy = ts_query_cursor__copy_state(self, &child_state);
4029
+ if (copy) {
4030
+ LOG(
4031
+ " split state for branch. pattern:%u, from_step:%u, to_step:%u, immediate:%d, capture_count: %u\n",
4032
+ copy->pattern_index,
4033
+ copy->step_index,
4034
+ next_step->alternative_index,
4035
+ next_step->alternative_is_immediate,
4036
+ capture_list_pool_get(&self->capture_list_pool, copy->capture_list_id)->size
4037
+ );
4038
+ end_index++;
4039
+ copy_count++;
4040
+ copy->step_index = child_step->alternative_index;
4041
+ if (child_step->alternative_is_immediate) {
4042
+ copy->seeking_immediate_match = true;
4043
+ }
4044
+ }
4045
+ }
4046
+ }
4047
+ }
4048
+
4049
+ for (unsigned j = 0; j < self->states.size; j++) {
4050
+ QueryState *state = array_get(&self->states, j);
4051
+ if (state->dead) {
4052
+ array_erase(&self->states, j);
4053
+ j--;
4054
+ continue;
4055
+ }
4056
+
4057
+ // Enforce the longest-match criteria. When a query pattern contains optional or
4058
+ // repeated nodes, this is necessary to avoid multiple redundant states, where
4059
+ // one state has a strict subset of another state's captures.
4060
+ bool did_remove = false;
4061
+ for (unsigned k = j + 1; k < self->states.size; k++) {
4062
+ QueryState *other_state = array_get(&self->states, k);
4063
+
4064
+ // Query states are kept in ascending order of start_depth and pattern_index.
4065
+ // Since the longest-match criteria is only used for deduping matches of the same
4066
+ // pattern and root node, we only need to perform pairwise comparisons within a
4067
+ // small slice of the states array.
4068
+ if (
4069
+ other_state->start_depth != state->start_depth ||
4070
+ other_state->pattern_index != state->pattern_index
4071
+ ) break;
4072
+
4073
+ bool left_contains_right, right_contains_left;
4074
+ ts_query_cursor__compare_captures(
4075
+ self,
4076
+ state,
4077
+ other_state,
4078
+ &left_contains_right,
4079
+ &right_contains_left
4080
+ );
4081
+ if (left_contains_right) {
4082
+ if (state->step_index == other_state->step_index) {
4083
+ LOG(
4084
+ " drop shorter state. pattern: %u, step_index: %u\n",
4085
+ state->pattern_index,
4086
+ state->step_index
4087
+ );
4088
+ capture_list_pool_release(&self->capture_list_pool, other_state->capture_list_id);
4089
+ array_erase(&self->states, k);
4090
+ k--;
4091
+ continue;
4092
+ }
4093
+ other_state->has_in_progress_alternatives = true;
4094
+ }
4095
+ if (right_contains_left) {
4096
+ if (state->step_index == other_state->step_index) {
4097
+ LOG(
4098
+ " drop shorter state. pattern: %u, step_index: %u\n",
4099
+ state->pattern_index,
4100
+ state->step_index
4101
+ );
4102
+ capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
4103
+ array_erase(&self->states, j);
4104
+ j--;
4105
+ did_remove = true;
4106
+ break;
4107
+ }
4108
+ state->has_in_progress_alternatives = true;
4109
+ }
4110
+ }
4111
+
4112
+ // If the state is at the end of its pattern, remove it from the list
4113
+ // of in-progress states and add it to the list of finished states.
4114
+ if (!did_remove) {
4115
+ LOG(
4116
+ " keep state. pattern: %u, start_depth: %u, step_index: %u, capture_count: %u\n",
4117
+ state->pattern_index,
4118
+ state->start_depth,
4119
+ state->step_index,
4120
+ capture_list_pool_get(&self->capture_list_pool, state->capture_list_id)->size
4121
+ );
4122
+ QueryStep *next_step = array_get(&self->query->steps, state->step_index);
4123
+ if (next_step->depth == PATTERN_DONE_MARKER) {
4124
+ if (state->has_in_progress_alternatives) {
4125
+ LOG(" defer finishing pattern %u\n", state->pattern_index);
4126
+ } else {
4127
+ LOG(" finish pattern %u\n", state->pattern_index);
4128
+ array_push(&self->finished_states, *state);
4129
+ array_erase(&self->states, (uint32_t)(state - self->states.contents));
4130
+ did_match = true;
4131
+ j--;
4132
+ }
4133
+ }
4134
+ }
4135
+ }
4136
+ }
4137
+
4138
+ if (ts_query_cursor__should_descend(self, node_intersects_range)) {
4139
+ switch (ts_tree_cursor_goto_first_child_internal(&self->cursor)) {
4140
+ case TreeCursorStepVisible:
4141
+ self->depth++;
4142
+ self->on_visible_node = true;
4143
+ continue;
4144
+ case TreeCursorStepHidden:
4145
+ self->on_visible_node = false;
4146
+ continue;
4147
+ default:
4148
+ break;
4149
+ }
4150
+ }
4151
+
4152
+ self->ascending = true;
4153
+ }
4154
+ }
4155
+ }
4156
+
4157
+ bool ts_query_cursor_next_match(
4158
+ TSQueryCursor *self,
4159
+ TSQueryMatch *match
4160
+ ) {
4161
+ if (self->finished_states.size == 0) {
4162
+ if (!ts_query_cursor__advance(self, false)) {
4163
+ return false;
4164
+ }
4165
+ }
4166
+
4167
+ QueryState *state = array_get(&self->finished_states, 0);
4168
+ if (state->id == UINT32_MAX) state->id = self->next_state_id++;
4169
+ match->id = state->id;
4170
+ match->pattern_index = state->pattern_index;
4171
+ const CaptureList *captures = capture_list_pool_get(
4172
+ &self->capture_list_pool,
4173
+ state->capture_list_id
4174
+ );
4175
+ match->captures = captures->contents;
4176
+ match->capture_count = captures->size;
4177
+ capture_list_pool_release(&self->capture_list_pool, state->capture_list_id);
4178
+ array_erase(&self->finished_states, 0);
4179
+ return true;
4180
+ }
4181
+
4182
+ void ts_query_cursor_remove_match(
4183
+ TSQueryCursor *self,
4184
+ uint32_t match_id
4185
+ ) {
4186
+ for (unsigned i = 0; i < self->finished_states.size; i++) {
4187
+ const QueryState *state = array_get(&self->finished_states, i);
4188
+ if (state->id == match_id) {
4189
+ capture_list_pool_release(
4190
+ &self->capture_list_pool,
4191
+ state->capture_list_id
4192
+ );
4193
+ array_erase(&self->finished_states, i);
4194
+ return;
4195
+ }
4196
+ }
4197
+
4198
+ // Remove unfinished query states as well to prevent future
4199
+ // captures for a match being removed.
4200
+ for (unsigned i = 0; i < self->states.size; i++) {
4201
+ const QueryState *state = array_get(&self->states, i);
4202
+ if (state->id == match_id) {
4203
+ capture_list_pool_release(
4204
+ &self->capture_list_pool,
4205
+ state->capture_list_id
4206
+ );
4207
+ array_erase(&self->states, i);
4208
+ return;
4209
+ }
4210
+ }
4211
+ }
4212
+
4213
+ bool ts_query_cursor_next_capture(
4214
+ TSQueryCursor *self,
4215
+ TSQueryMatch *match,
4216
+ uint32_t *capture_index
4217
+ ) {
4218
+ // The goal here is to return captures in order, even though they may not
4219
+ // be discovered in order, because patterns can overlap. Search for matches
4220
+ // until there is a finished capture that is before any unfinished capture.
4221
+ for (;;) {
4222
+ // First, find the earliest capture in an unfinished match.
4223
+ uint32_t first_unfinished_capture_byte;
4224
+ uint32_t first_unfinished_pattern_index;
4225
+ uint32_t first_unfinished_state_index;
4226
+ bool first_unfinished_state_is_definite = false;
4227
+ bool found_unfinished_state = ts_query_cursor__first_in_progress_capture(
4228
+ self,
4229
+ &first_unfinished_state_index,
4230
+ &first_unfinished_capture_byte,
4231
+ &first_unfinished_pattern_index,
4232
+ &first_unfinished_state_is_definite
4233
+ );
4234
+
4235
+ // Then find the earliest capture in a finished match. It must occur
4236
+ // before the first capture in an *unfinished* match.
4237
+ QueryState *first_finished_state = NULL;
4238
+ uint32_t first_finished_capture_byte = first_unfinished_capture_byte;
4239
+ uint32_t first_finished_pattern_index = first_unfinished_pattern_index;
4240
+ for (unsigned i = 0; i < self->finished_states.size;) {
4241
+ QueryState *state = array_get(&self->finished_states, i);
4242
+ const CaptureList *captures = capture_list_pool_get(
4243
+ &self->capture_list_pool,
4244
+ state->capture_list_id
4245
+ );
4246
+
4247
+ // Remove states whose captures are all consumed.
4248
+ if (state->consumed_capture_count >= captures->size) {
4249
+ capture_list_pool_release(
4250
+ &self->capture_list_pool,
4251
+ state->capture_list_id
4252
+ );
4253
+ array_erase(&self->finished_states, i);
4254
+ continue;
4255
+ }
4256
+
4257
+ TSNode node = array_get(captures, state->consumed_capture_count)->node;
4258
+
4259
+ bool node_precedes_range = (
4260
+ ts_node_end_byte(node) <= self->start_byte ||
4261
+ point_lte(ts_node_end_point(node), self->start_point)
4262
+ );
4263
+ bool node_follows_range = (
4264
+ ts_node_start_byte(node) >= self->end_byte ||
4265
+ point_gte(ts_node_start_point(node), self->end_point)
4266
+ );
4267
+ bool node_outside_of_range = node_precedes_range || node_follows_range;
4268
+
4269
+ // Skip captures that are outside of the cursor's range.
4270
+ if (node_outside_of_range) {
4271
+ state->consumed_capture_count++;
4272
+ continue;
4273
+ }
4274
+
4275
+ uint32_t node_start_byte = ts_node_start_byte(node);
4276
+ if (
4277
+ node_start_byte < first_finished_capture_byte ||
4278
+ (
4279
+ node_start_byte == first_finished_capture_byte &&
4280
+ state->pattern_index < first_finished_pattern_index
4281
+ )
4282
+ ) {
4283
+ first_finished_state = state;
4284
+ first_finished_capture_byte = node_start_byte;
4285
+ first_finished_pattern_index = state->pattern_index;
4286
+ }
4287
+ i++;
4288
+ }
4289
+
4290
+ // If there is finished capture that is clearly before any unfinished
4291
+ // capture, then return its match, and its capture index. Internally
4292
+ // record the fact that the capture has been 'consumed'.
4293
+ QueryState *state;
4294
+ if (first_finished_state) {
4295
+ state = first_finished_state;
4296
+ } else if (first_unfinished_state_is_definite) {
4297
+ state = array_get(&self->states, first_unfinished_state_index);
4298
+ } else {
4299
+ state = NULL;
4300
+ }
4301
+
4302
+ if (state) {
4303
+ if (state->id == UINT32_MAX) state->id = self->next_state_id++;
4304
+ match->id = state->id;
4305
+ match->pattern_index = state->pattern_index;
4306
+ const CaptureList *captures = capture_list_pool_get(
4307
+ &self->capture_list_pool,
4308
+ state->capture_list_id
4309
+ );
4310
+ match->captures = captures->contents;
4311
+ match->capture_count = captures->size;
4312
+ *capture_index = state->consumed_capture_count;
4313
+ state->consumed_capture_count++;
4314
+ return true;
4315
+ }
4316
+
4317
+ if (capture_list_pool_is_empty(&self->capture_list_pool) && found_unfinished_state) {
4318
+ LOG(
4319
+ " abandon state. index:%u, pattern:%u, offset:%u.\n",
4320
+ first_unfinished_state_index,
4321
+ first_unfinished_pattern_index,
4322
+ first_unfinished_capture_byte
4323
+ );
4324
+ capture_list_pool_release(
4325
+ &self->capture_list_pool,
4326
+ array_get(&self->states, first_unfinished_state_index)->capture_list_id
4327
+ );
4328
+ array_erase(&self->states, first_unfinished_state_index);
4329
+ }
4330
+
4331
+ // If there are no finished matches that are ready to be returned, then
4332
+ // continue finding more matches.
4333
+ if (
4334
+ !ts_query_cursor__advance(self, true) &&
4335
+ self->finished_states.size == 0
4336
+ ) return false;
4337
+ }
4338
+ }
4339
+
4340
+ void ts_query_cursor_set_max_start_depth(
4341
+ TSQueryCursor *self,
4342
+ uint32_t max_start_depth
4343
+ ) {
4344
+ self->max_start_depth = max_start_depth;
4345
+ }
4346
+
4347
+ #undef LOG