tree-sitter-beancount 2.4.2 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/grammar.js +44 -22
- package/package.json +3 -3
- package/prebuilds/darwin-arm64/tree-sitter-beancount.node +0 -0
- package/prebuilds/darwin-x64/tree-sitter-beancount.node +0 -0
- package/prebuilds/linux-arm64/tree-sitter-beancount.node +0 -0
- package/prebuilds/linux-x64/tree-sitter-beancount.node +0 -0
- package/prebuilds/win32-arm64/tree-sitter-beancount.node +0 -0
- package/prebuilds/win32-x64/tree-sitter-beancount.node +0 -0
- package/src/grammar.json +177 -50
- package/src/node-types.json +24 -1
- package/src/parser.c +8764 -7281
- package/src/scanner.c +32 -8
package/src/scanner.c
CHANGED
|
@@ -11,6 +11,10 @@
|
|
|
11
11
|
#include <tree_sitter/parser.h>
|
|
12
12
|
#include <wctype.h>
|
|
13
13
|
|
|
14
|
+
#if !defined (UINT8_MAX)
|
|
15
|
+
#define UINT8_MAX 255
|
|
16
|
+
#endif
|
|
17
|
+
|
|
14
18
|
// Utility macros
|
|
15
19
|
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
|
16
20
|
|
|
@@ -127,10 +131,12 @@ enum TokenType {
|
|
|
127
131
|
* The scanner maintains two stacks to track the current parsing state:
|
|
128
132
|
* - indent_length_stack: Tracks indentation levels for proper nesting
|
|
129
133
|
* - org_section_stack: Tracks org-mode section nesting levels
|
|
134
|
+
* - eof_returned: Flag to prevent returning EOF multiple times (prevents infinite loops)
|
|
130
135
|
*/
|
|
131
136
|
typedef struct {
|
|
132
137
|
vec indent_length_stack; // Stack of indentation levels
|
|
133
138
|
vec org_section_stack; // Stack of org-mode section levels
|
|
139
|
+
bool eof_returned; // Flag to prevent returning EOF multiple times
|
|
134
140
|
} Scanner;
|
|
135
141
|
|
|
136
142
|
/**
|
|
@@ -142,11 +148,14 @@ typedef struct {
|
|
|
142
148
|
* Serializes the scanner's indentation and section stacks for later restoration.
|
|
143
149
|
* This is used by tree-sitter to maintain parsing state across incremental updates.
|
|
144
150
|
*
|
|
145
|
-
* Format: [indent_count][indent_data...][section_count][section_data...]
|
|
151
|
+
* Format: [eof_returned][indent_count][indent_data...][section_count][section_data...]
|
|
146
152
|
*/
|
|
147
|
-
unsigned serialize(Scanner *scanner, char *buffer) {
|
|
153
|
+
static unsigned serialize(Scanner *scanner, char *buffer) {
|
|
148
154
|
size_t i = 0;
|
|
149
155
|
|
|
156
|
+
// Serialize EOF flag
|
|
157
|
+
buffer[i++] = scanner->eof_returned ? 1 : 0;
|
|
158
|
+
|
|
150
159
|
// Serialize indentation stack
|
|
151
160
|
// Skip the first element (always 0) and limit to UINT8_MAX for safety
|
|
152
161
|
size_t indent_count = scanner->indent_length_stack.length - 1;
|
|
@@ -155,7 +164,7 @@ unsigned serialize(Scanner *scanner, char *buffer) {
|
|
|
155
164
|
buffer[i++] = (char)indent_count;
|
|
156
165
|
|
|
157
166
|
// Write indentation stack data (starting from index 1)
|
|
158
|
-
|
|
167
|
+
uint32_t iter = 1;
|
|
159
168
|
for (; iter < scanner->indent_length_stack.length
|
|
160
169
|
&& i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
|
|
161
170
|
++iter) {
|
|
@@ -190,12 +199,13 @@ unsigned serialize(Scanner *scanner, char *buffer) {
|
|
|
190
199
|
*
|
|
191
200
|
* The stacks are always initialized with a base element of 0.
|
|
192
201
|
*/
|
|
193
|
-
void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
|
|
202
|
+
static void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
|
|
194
203
|
// Reset scanner to initial state
|
|
195
204
|
VEC_CLEAR(scanner->org_section_stack);
|
|
196
205
|
VEC_CLEAR(scanner->indent_length_stack);
|
|
197
206
|
VEC_PUSH(scanner->org_section_stack, 0);
|
|
198
207
|
VEC_PUSH(scanner->indent_length_stack, 0);
|
|
208
|
+
scanner->eof_returned = false;
|
|
199
209
|
|
|
200
210
|
// Handle empty buffer case
|
|
201
211
|
if (length == 0)
|
|
@@ -203,6 +213,12 @@ void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
|
|
|
203
213
|
|
|
204
214
|
size_t i = 0;
|
|
205
215
|
|
|
216
|
+
// Deserialize EOF flag
|
|
217
|
+
scanner->eof_returned = (buffer[i++] != 0);
|
|
218
|
+
|
|
219
|
+
// Check if we have more data
|
|
220
|
+
if (i >= length) return;
|
|
221
|
+
|
|
206
222
|
// Deserialize indentation stack
|
|
207
223
|
size_t indent_count = (unsigned char)buffer[i++];
|
|
208
224
|
size_t end_indent = i + indent_count;
|
|
@@ -280,19 +296,26 @@ static int16_t count_leading_whitespace(TSLexer *lexer) {
|
|
|
280
296
|
|
|
281
297
|
/**
|
|
282
298
|
* @brief Handle end-of-file detection
|
|
299
|
+
* @param scanner The scanner state (for tracking eof_returned flag)
|
|
283
300
|
* @param lexer The tree-sitter lexer interface
|
|
284
301
|
* @param valid_symbols Array indicating which tokens are valid
|
|
285
302
|
* @return true if EOF token was produced, false otherwise
|
|
286
303
|
*/
|
|
287
|
-
static bool handle_eof(TSLexer *lexer, const bool *valid_symbols) {
|
|
304
|
+
static bool handle_eof(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
|
|
288
305
|
if (lexer->lookahead != '\0') {
|
|
289
306
|
return false;
|
|
290
307
|
}
|
|
291
308
|
|
|
309
|
+
// SECTIONEND can be returned multiple times at EOF to close nested sections
|
|
310
|
+
// The parser controls this via valid_symbols
|
|
292
311
|
if (valid_symbols[SECTIONEND]) {
|
|
293
312
|
lexer->result_symbol = SECTIONEND;
|
|
294
313
|
return true;
|
|
295
|
-
}
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// END_OF_FILE should only be returned once to prevent infinite loops
|
|
317
|
+
if (valid_symbols[END_OF_FILE] && !scanner->eof_returned) {
|
|
318
|
+
scanner->eof_returned = true;
|
|
296
319
|
lexer->result_symbol = END_OF_FILE;
|
|
297
320
|
return true;
|
|
298
321
|
}
|
|
@@ -357,7 +380,7 @@ static bool parse_section_header(Scanner *scanner, TSLexer *lexer, const bool *v
|
|
|
357
380
|
* - End of file detection
|
|
358
381
|
* - Indentation tracking for proper nesting
|
|
359
382
|
*/
|
|
360
|
-
bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
|
|
383
|
+
static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
|
|
361
384
|
|
|
362
385
|
// Don't produce tokens during error recovery
|
|
363
386
|
if (in_error_recovery(valid_symbols))
|
|
@@ -370,7 +393,7 @@ bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
|
|
|
370
393
|
int16_t indent_length = count_leading_whitespace(lexer);
|
|
371
394
|
|
|
372
395
|
// Handle end of file
|
|
373
|
-
if (handle_eof(lexer, valid_symbols)) {
|
|
396
|
+
if (handle_eof(scanner, lexer, valid_symbols)) {
|
|
374
397
|
return true;
|
|
375
398
|
}
|
|
376
399
|
|
|
@@ -391,6 +414,7 @@ bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
|
|
|
391
414
|
static void init_scanner(Scanner *scanner) {
|
|
392
415
|
scanner->indent_length_stack = (vec)VEC_NEW;
|
|
393
416
|
scanner->org_section_stack = (vec)VEC_NEW;
|
|
417
|
+
scanner->eof_returned = false;
|
|
394
418
|
|
|
395
419
|
// Initialize stacks with base element 0
|
|
396
420
|
VEC_PUSH(scanner->indent_length_stack, 0);
|