tree-sitter-beancount 2.4.2 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/scanner.c CHANGED
@@ -11,6 +11,10 @@
11
11
  #include <tree_sitter/parser.h>
12
12
  #include <wctype.h>
13
13
 
14
+ #if !defined (UINT8_MAX)
15
+ #define UINT8_MAX 255
16
+ #endif
17
+
14
18
  // Utility macros
15
19
  #define MAX(a, b) ((a) > (b) ? (a) : (b))
16
20
 
@@ -127,10 +131,12 @@ enum TokenType {
127
131
  * The scanner maintains two stacks to track the current parsing state:
128
132
  * - indent_length_stack: Tracks indentation levels for proper nesting
129
133
  * - org_section_stack: Tracks org-mode section nesting levels
134
+ * - eof_returned: Flag to prevent returning EOF multiple times (prevents infinite loops)
130
135
  */
131
136
  typedef struct {
132
137
  vec indent_length_stack; // Stack of indentation levels
133
138
  vec org_section_stack; // Stack of org-mode section levels
139
+ bool eof_returned; // Flag to prevent returning EOF multiple times
134
140
  } Scanner;
135
141
 
136
142
  /**
@@ -142,11 +148,14 @@ typedef struct {
142
148
  * Serializes the scanner's indentation and section stacks for later restoration.
143
149
  * This is used by tree-sitter to maintain parsing state across incremental updates.
144
150
  *
145
- * Format: [indent_count][indent_data...][section_count][section_data...]
151
+ * Format: [eof_returned][indent_count][indent_data...][section_count][section_data...]
146
152
  */
147
- unsigned serialize(Scanner *scanner, char *buffer) {
153
+ static unsigned serialize(Scanner *scanner, char *buffer) {
148
154
  size_t i = 0;
149
155
 
156
+ // Serialize EOF flag
157
+ buffer[i++] = scanner->eof_returned ? 1 : 0;
158
+
150
159
  // Serialize indentation stack
151
160
  // Skip the first element (always 0) and limit to UINT8_MAX for safety
152
161
  size_t indent_count = scanner->indent_length_stack.length - 1;
@@ -155,7 +164,7 @@ unsigned serialize(Scanner *scanner, char *buffer) {
155
164
  buffer[i++] = (char)indent_count;
156
165
 
157
166
  // Write indentation stack data (starting from index 1)
158
- int iter = 1;
167
+ uint32_t iter = 1;
159
168
  for (; iter < scanner->indent_length_stack.length
160
169
  && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
161
170
  ++iter) {
@@ -190,12 +199,13 @@ unsigned serialize(Scanner *scanner, char *buffer) {
190
199
  *
191
200
  * The stacks are always initialized with a base element of 0.
192
201
  */
193
- void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
202
+ static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
194
203
  // Reset scanner to initial state
195
204
  VEC_CLEAR(scanner->org_section_stack);
196
205
  VEC_CLEAR(scanner->indent_length_stack);
197
206
  VEC_PUSH(scanner->org_section_stack, 0);
198
207
  VEC_PUSH(scanner->indent_length_stack, 0);
208
+ scanner->eof_returned = false;
199
209
 
200
210
  // Handle empty buffer case
201
211
  if (length == 0)
@@ -203,6 +213,12 @@ void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
203
213
 
204
214
  size_t i = 0;
205
215
 
216
+ // Deserialize EOF flag
217
+ scanner->eof_returned = (buffer[i++] != 0);
218
+
219
+ // Check if we have more data
220
+ if (i >= length) return;
221
+
206
222
  // Deserialize indentation stack
207
223
  size_t indent_count = (unsigned char)buffer[i++];
208
224
  size_t end_indent = i + indent_count;
@@ -280,19 +296,26 @@ static int16_t count_leading_whitespace(TSLexer *lexer) {
280
296
 
281
297
  /**
282
298
  * @brief Handle end-of-file detection
299
+ * @param scanner The scanner state (for tracking eof_returned flag)
283
300
  * @param lexer The tree-sitter lexer interface
284
301
  * @param valid_symbols Array indicating which tokens are valid
285
302
  * @return true if EOF token was produced, false otherwise
286
303
  */
287
- static bool handle_eof(TSLexer *lexer, const bool *valid_symbols) {
304
+ static bool handle_eof(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
288
305
  if (lexer->lookahead != '\0') {
289
306
  return false;
290
307
  }
291
308
 
309
+ // SECTIONEND can be returned multiple times at EOF to close nested sections
310
+ // The parser controls this via valid_symbols
292
311
  if (valid_symbols[SECTIONEND]) {
293
312
  lexer->result_symbol = SECTIONEND;
294
313
  return true;
295
- } else if (valid_symbols[END_OF_FILE]) {
314
+ }
315
+
316
+ // END_OF_FILE should only be returned once to prevent infinite loops
317
+ if (valid_symbols[END_OF_FILE] && !scanner->eof_returned) {
318
+ scanner->eof_returned = true;
296
319
  lexer->result_symbol = END_OF_FILE;
297
320
  return true;
298
321
  }
@@ -357,7 +380,7 @@ static bool parse_section_header(Scanner *scanner, TSLexer *lexer, const bool *v
357
380
  * - End of file detection
358
381
  * - Indentation tracking for proper nesting
359
382
  */
360
- bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
383
+ static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
361
384
 
362
385
  // Don't produce tokens during error recovery
363
386
  if (in_error_recovery(valid_symbols))
@@ -370,7 +393,7 @@ bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
370
393
  int16_t indent_length = count_leading_whitespace(lexer);
371
394
 
372
395
  // Handle end of file
373
- if (handle_eof(lexer, valid_symbols)) {
396
+ if (handle_eof(scanner, lexer, valid_symbols)) {
374
397
  return true;
375
398
  }
376
399
 
@@ -391,6 +414,7 @@ bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
391
414
  static void init_scanner(Scanner *scanner) {
392
415
  scanner->indent_length_stack = (vec)VEC_NEW;
393
416
  scanner->org_section_stack = (vec)VEC_NEW;
417
+ scanner->eof_returned = false;
394
418
 
395
419
  // Initialize stacks with base element 0
396
420
  VEC_PUSH(scanner->indent_length_stack, 0);