tree-sitter-beancount 2.3.3 → 2.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +334 -4
- package/binding.gyp +17 -7
- package/bindings/node/binding.cc +14 -22
- package/bindings/node/index.d.ts +28 -0
- package/bindings/node/index.js +3 -15
- package/grammar.js +38 -125
- package/package.json +35 -5
- package/prebuilds/darwin-arm64/tree-sitter-beancount.node +0 -0
- package/prebuilds/darwin-x64/tree-sitter-beancount.node +0 -0
- package/prebuilds/linux-arm64/tree-sitter-beancount.node +0 -0
- package/prebuilds/linux-x64/tree-sitter-beancount.node +0 -0
- package/prebuilds/win32-arm64/tree-sitter-beancount.node +0 -0
- package/prebuilds/win32-x64/tree-sitter-beancount.node +0 -0
- package/src/grammar.json +149 -560
- package/src/node-types.json +10 -11
- package/src/parser.c +7615 -9089
- package/src/scanner.c +345 -67
- package/src/tree_sitter/alloc.h +54 -0
- package/src/tree_sitter/array.h +291 -0
- package/src/tree_sitter/parser.h +68 -12
- package/.clang-format +0 -20
- package/.envrc +0 -1
- package/.gitattributes +0 -6
- package/.github/dependabot.yml +0 -26
- package/.github/workflows/cicd.yml +0 -30
- package/.github/workflows/release.yml +0 -72
- package/CHANGELOG.md +0 -80
- package/Cargo.lock +0 -71
- package/Cargo.toml +0 -26
- package/Package.swift +0 -20
- package/bindings/rust/build.rs +0 -39
- package/bindings/rust/lib.rs +0 -52
- package/flake.lock +0 -141
- package/flake.nix +0 -120
- package/test/corpus/arithmetic.txt +0 -373
- package/test/corpus/comment.txt +0 -992
- package/test/corpus/currencies.txt +0 -66
- package/test/corpus/entry_types.txt +0 -389
- package/test/corpus/markdown_orgmode.txt +0 -60
- package/test/corpus/metadata.txt +0 -414
- package/test/corpus/multi_line.txt +0 -27
- package/test/corpus/orgmode_sections.txt +0 -53
- package/test/corpus/parse_lots.txt +0 -417
- package/test/corpus/parser_include.txt +0 -23
- package/test/corpus/parser_links.txt +0 -32
- package/test/corpus/parser_options.txt +0 -39
- package/test/corpus/parser_plugin.txt +0 -35
- package/test/corpus/push_pop_meta.txt +0 -34
- package/test/corpus/push_pop_tag.txt +0 -23
- package/test/corpus/transaction.txt +0 -224
- package/test/corpus/ugly_bugs.txt +0 -91
package/src/scanner.c
CHANGED
|
@@ -1,67 +1,160 @@
|
|
|
1
|
-
|
|
1
|
+
/**
|
|
2
|
+
* @file scanner.c
|
|
3
|
+
* @brief External scanner for tree-sitter-beancount parser
|
|
4
|
+
*
|
|
5
|
+
* This file implements an external scanner for the Beancount language parser.
|
|
6
|
+
* It handles context-sensitive parsing of section headers and indentation tracking
|
|
7
|
+
* for Org-mode style sections within Beancount files.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
#include <stdio.h>
|
|
2
11
|
#include <tree_sitter/parser.h>
|
|
3
12
|
#include <wctype.h>
|
|
4
13
|
|
|
14
|
+
// Utility macros
|
|
5
15
|
#define MAX(a, b) ((a) > (b) ? (a) : (b))
|
|
6
16
|
|
|
17
|
+
// Configuration constants
|
|
18
|
+
#define INITIAL_VEC_CAPACITY 16 // Initial capacity for dynamic arrays
|
|
19
|
+
#define TAB_WIDTH 8 // Number of spaces equivalent to one tab
|
|
20
|
+
|
|
21
|
+
/**
|
|
22
|
+
* @brief Dynamic array for storing integer values
|
|
23
|
+
*
|
|
24
|
+
* A resizable array implementation used for tracking indentation levels
|
|
25
|
+
* and org-mode section nesting levels.
|
|
26
|
+
*/
|
|
7
27
|
typedef struct {
|
|
8
|
-
uint32_t length;
|
|
9
|
-
uint32_t capacity;
|
|
10
|
-
int16_t *data;
|
|
28
|
+
uint32_t length; // Current number of elements
|
|
29
|
+
uint32_t capacity; // Maximum number of elements before reallocation needed
|
|
30
|
+
int16_t *data; // Pointer to the data array
|
|
11
31
|
} vec;
|
|
12
32
|
|
|
13
|
-
|
|
14
|
-
|
|
33
|
+
/**
|
|
34
|
+
* @brief Resize a dynamic array to a new capacity
|
|
35
|
+
* @param vec The vector to resize
|
|
36
|
+
* @param _cap The new capacity (0 to free the array)
|
|
37
|
+
*
|
|
38
|
+
* Safely resizes the vector's data array. If allocation fails, the original
|
|
39
|
+
* data is preserved. If capacity is 0, the array is freed.
|
|
40
|
+
*/
|
|
15
41
|
#define VEC_RESIZE(vec, _cap) \
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
42
|
+
do { \
|
|
43
|
+
if ((_cap) == 0) { \
|
|
44
|
+
free((vec).data); \
|
|
45
|
+
(vec).data = NULL; \
|
|
46
|
+
(vec).capacity = 0; \
|
|
47
|
+
break; \
|
|
48
|
+
} \
|
|
49
|
+
void *tmp = realloc((vec).data, (_cap) * sizeof((vec).data[0])); \
|
|
50
|
+
if (tmp == NULL) { \
|
|
51
|
+
/* Allocation failed - keep existing data and capacity */ \
|
|
52
|
+
break; \
|
|
53
|
+
} \
|
|
54
|
+
(vec).data = tmp; \
|
|
55
|
+
(vec).capacity = (_cap); \
|
|
56
|
+
} while (0)
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* @brief Grow a vector to at least the specified capacity
|
|
60
|
+
* @param vec The vector to grow
|
|
61
|
+
* @param _cap The minimum required capacity
|
|
62
|
+
*
|
|
63
|
+
* Only resizes if the current capacity is less than the requested capacity.
|
|
64
|
+
*/
|
|
20
65
|
#define VEC_GROW(vec, _cap) \
|
|
21
66
|
if ((vec).capacity < (_cap)) { \
|
|
22
67
|
VEC_RESIZE((vec), (_cap)); \
|
|
23
68
|
}
|
|
24
69
|
|
|
70
|
+
/**
|
|
71
|
+
* @brief Push an element to the end of a vector
|
|
72
|
+
* @param vec The vector to push to
|
|
73
|
+
* @param el The element to push
|
|
74
|
+
*
|
|
75
|
+
* Automatically grows the vector if needed. Uses exponential growth strategy
|
|
76
|
+
* (doubling capacity) for amortized O(1) performance. If allocation fails,
|
|
77
|
+
* the push is skipped to prevent buffer overflow.
|
|
78
|
+
*/
|
|
25
79
|
#define VEC_PUSH(vec, el) \
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
80
|
+
do { \
|
|
81
|
+
if ((vec).capacity == (vec).length) { \
|
|
82
|
+
uint32_t new_cap = MAX(INITIAL_VEC_CAPACITY, (vec).length * 2); \
|
|
83
|
+
VEC_RESIZE((vec), new_cap); \
|
|
84
|
+
if ((vec).capacity < new_cap) { \
|
|
85
|
+
/* Allocation failed - skip push to avoid buffer overflow */ \
|
|
86
|
+
break; \
|
|
87
|
+
} \
|
|
88
|
+
} \
|
|
89
|
+
(vec).data[(vec).length++] = (el); \
|
|
90
|
+
} while (0)
|
|
91
|
+
|
|
92
|
+
/** @brief Remove the last element from a vector (decreases length by 1) */
|
|
31
93
|
#define VEC_POP(vec) (vec).length--;
|
|
32
94
|
|
|
95
|
+
/** @brief Initialize a new empty vector */
|
|
33
96
|
#define VEC_NEW \
|
|
34
|
-
{ .
|
|
97
|
+
{ .length = 0, .capacity = 0, .data = NULL }
|
|
35
98
|
|
|
99
|
+
/** @brief Get the last element of a vector (assumes vector is not empty) */
|
|
36
100
|
#define VEC_BACK(vec) ((vec).data[(vec).length - 1])
|
|
37
101
|
|
|
102
|
+
/** @brief Free the memory used by a vector's data array */
|
|
38
103
|
#define VEC_FREE(vec) \
|
|
39
104
|
{ \
|
|
40
105
|
if ((vec).data != NULL) \
|
|
41
106
|
free((vec).data); \
|
|
42
107
|
}
|
|
43
108
|
|
|
109
|
+
/** @brief Clear all elements from a vector (keeps allocated memory) */
|
|
44
110
|
#define VEC_CLEAR(vec) (vec).length = 0;
|
|
45
111
|
|
|
112
|
+
/**
|
|
113
|
+
* @brief Token types that the external scanner can produce
|
|
114
|
+
*
|
|
115
|
+
* These tokens are used to handle context-sensitive parsing that
|
|
116
|
+
* cannot be handled by the main grammar alone.
|
|
117
|
+
*/
|
|
46
118
|
enum TokenType {
|
|
47
|
-
SECTION,
|
|
48
|
-
SECTIONEND,
|
|
49
|
-
END_OF_FILE,
|
|
119
|
+
SECTION, // Start of an org-mode style section (e.g., "* Section")
|
|
120
|
+
SECTIONEND, // End of a section (detected by indentation change)
|
|
121
|
+
END_OF_FILE, // End of file marker
|
|
50
122
|
};
|
|
51
123
|
|
|
124
|
+
/**
|
|
125
|
+
* @brief Scanner state for tracking parsing context
|
|
126
|
+
*
|
|
127
|
+
* The scanner maintains two stacks to track the current parsing state:
|
|
128
|
+
* - indent_length_stack: Tracks indentation levels for proper nesting
|
|
129
|
+
* - org_section_stack: Tracks org-mode section nesting levels
|
|
130
|
+
*/
|
|
52
131
|
typedef struct {
|
|
53
|
-
vec indent_length_stack;
|
|
54
|
-
vec org_section_stack;
|
|
132
|
+
vec indent_length_stack; // Stack of indentation levels
|
|
133
|
+
vec org_section_stack; // Stack of org-mode section levels
|
|
55
134
|
} Scanner;
|
|
56
135
|
|
|
136
|
+
/**
|
|
137
|
+
* @brief Serialize scanner state into a buffer
|
|
138
|
+
* @param scanner The scanner state to serialize
|
|
139
|
+
* @param buffer The buffer to write the serialized data to
|
|
140
|
+
* @return The number of bytes written to the buffer
|
|
141
|
+
*
|
|
142
|
+
* Serializes the scanner's indentation and section stacks for later restoration.
|
|
143
|
+
* This is used by tree-sitter to maintain parsing state across incremental updates.
|
|
144
|
+
*
|
|
145
|
+
* Format: [indent_count][indent_data...][section_count][section_data...]
|
|
146
|
+
*/
|
|
57
147
|
unsigned serialize(Scanner *scanner, char *buffer) {
|
|
58
148
|
size_t i = 0;
|
|
59
149
|
|
|
150
|
+
// Serialize indentation stack
|
|
151
|
+
// Skip the first element (always 0) and limit to UINT8_MAX for safety
|
|
60
152
|
size_t indent_count = scanner->indent_length_stack.length - 1;
|
|
61
153
|
if (indent_count > UINT8_MAX)
|
|
62
154
|
indent_count = UINT8_MAX;
|
|
63
155
|
buffer[i++] = (char)indent_count;
|
|
64
156
|
|
|
157
|
+
// Write indentation stack data (starting from index 1)
|
|
65
158
|
int iter = 1;
|
|
66
159
|
for (; iter < scanner->indent_length_stack.length
|
|
67
160
|
&& i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
|
|
@@ -69,11 +162,13 @@ unsigned serialize(Scanner *scanner, char *buffer) {
|
|
|
69
162
|
buffer[i++] = (char)scanner->indent_length_stack.data[iter];
|
|
70
163
|
}
|
|
71
164
|
|
|
165
|
+
// Serialize org section stack
|
|
72
166
|
size_t org_section_count = scanner->org_section_stack.length - 1;
|
|
73
167
|
if (org_section_count > UINT8_MAX)
|
|
74
168
|
org_section_count = UINT8_MAX;
|
|
75
169
|
buffer[i++] = (char)org_section_count;
|
|
76
170
|
|
|
171
|
+
// Write org section stack data (starting from index 1)
|
|
77
172
|
iter = 1;
|
|
78
173
|
for (; iter < scanner->org_section_stack.length
|
|
79
174
|
&& i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
|
|
@@ -84,122 +179,305 @@ unsigned serialize(Scanner *scanner, char *buffer) {
|
|
|
84
179
|
return i;
|
|
85
180
|
}
|
|
86
181
|
|
|
182
|
+
/**
|
|
183
|
+
* @brief Deserialize scanner state from a buffer
|
|
184
|
+
* @param scanner The scanner to restore state into
|
|
185
|
+
* @param buffer The buffer containing serialized data
|
|
186
|
+
* @param length The length of the buffer in bytes
|
|
187
|
+
*
|
|
188
|
+
* Restores the scanner's indentation and section stacks from serialized data.
|
|
189
|
+
* This is used by tree-sitter to restore parsing state during incremental updates.
|
|
190
|
+
*
|
|
191
|
+
* The stacks are always initialized with a base element of 0.
|
|
192
|
+
*/
|
|
87
193
|
void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
|
|
194
|
+
// Reset scanner to initial state
|
|
88
195
|
VEC_CLEAR(scanner->org_section_stack);
|
|
89
|
-
VEC_PUSH(scanner->org_section_stack, 0);
|
|
90
196
|
VEC_CLEAR(scanner->indent_length_stack);
|
|
197
|
+
VEC_PUSH(scanner->org_section_stack, 0);
|
|
91
198
|
VEC_PUSH(scanner->indent_length_stack, 0);
|
|
92
199
|
|
|
200
|
+
// Handle empty buffer case
|
|
93
201
|
if (length == 0)
|
|
94
202
|
return;
|
|
95
203
|
|
|
96
204
|
size_t i = 0;
|
|
97
205
|
|
|
206
|
+
// Deserialize indentation stack
|
|
98
207
|
size_t indent_count = (unsigned char)buffer[i++];
|
|
99
|
-
|
|
208
|
+
size_t end_indent = i + indent_count;
|
|
209
|
+
for (; i < end_indent && i < length; i++) {
|
|
100
210
|
VEC_PUSH(scanner->indent_length_stack, (unsigned char)buffer[i]);
|
|
101
211
|
}
|
|
102
212
|
|
|
213
|
+
// Check if we have more data for org section stack
|
|
214
|
+
if (i >= length) return;
|
|
215
|
+
|
|
216
|
+
// Deserialize org section stack
|
|
103
217
|
size_t org_section_count = (unsigned char)buffer[i++];
|
|
104
|
-
|
|
218
|
+
size_t end_section = i + org_section_count;
|
|
219
|
+
for (; i < end_section && i < length; i++) {
|
|
105
220
|
VEC_PUSH(scanner->org_section_stack, (unsigned char)buffer[i]);
|
|
106
221
|
}
|
|
107
222
|
}
|
|
108
223
|
|
|
109
|
-
|
|
224
|
+
/** @brief Advance the lexer to the next character (include in parse result) */
|
|
225
|
+
static inline void advance(TSLexer *lexer) {
|
|
110
226
|
lexer->advance(lexer, false);
|
|
111
227
|
}
|
|
112
228
|
|
|
113
|
-
|
|
229
|
+
/** @brief Skip the current character (exclude from parse result) */
|
|
230
|
+
static inline void skip(TSLexer *lexer) {
|
|
114
231
|
lexer->advance(lexer, true);
|
|
115
232
|
}
|
|
116
233
|
|
|
234
|
+
/**
|
|
235
|
+
* @brief Check if the parser is in error recovery mode
|
|
236
|
+
* @param valid_symbols Array indicating which symbols are valid at this position
|
|
237
|
+
* @return true if all scanner tokens are valid (indicates error recovery)
|
|
238
|
+
*
|
|
239
|
+
* When the parser is in error recovery, it will accept any token we produce.
|
|
240
|
+
* We detect this by checking if all our token types are marked as valid.
|
|
241
|
+
*/
|
|
117
242
|
static bool in_error_recovery(const bool *valid_symbols) {
|
|
118
243
|
return (valid_symbols[SECTION] && valid_symbols[SECTIONEND]
|
|
119
244
|
&& valid_symbols[END_OF_FILE]);
|
|
120
245
|
}
|
|
121
246
|
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
247
|
+
/**
|
|
248
|
+
* @brief Check if a character is a headline marker
|
|
249
|
+
* @param c The character to check
|
|
250
|
+
* @return true if the character starts a headline ('*' or '#')
|
|
251
|
+
*
|
|
252
|
+
* Headlines in org-mode style sections start with '*' or markdown-style '#'.
|
|
253
|
+
*/
|
|
254
|
+
static inline bool is_headline_marker(char c) {
|
|
255
|
+
return c == '*' || c == '#';
|
|
256
|
+
}
|
|
126
257
|
|
|
127
|
-
|
|
258
|
+
/**
|
|
259
|
+
* @brief Count leading whitespace characters
|
|
260
|
+
* @param lexer The tree-sitter lexer interface
|
|
261
|
+
* @return The indentation length in spaces (tabs converted to equivalent spaces)
|
|
262
|
+
*
|
|
263
|
+
* Counts spaces and tabs at the beginning of a line, converting tabs to spaces
|
|
264
|
+
* using the TAB_WIDTH constant. Stops at first non-whitespace character.
|
|
265
|
+
*/
|
|
266
|
+
static int16_t count_leading_whitespace(TSLexer *lexer) {
|
|
128
267
|
int16_t indent_length = 0;
|
|
129
|
-
|
|
130
|
-
|
|
268
|
+
|
|
269
|
+
while (lexer->lookahead == ' ' || lexer->lookahead == '\t') {
|
|
131
270
|
if (lexer->lookahead == ' ') {
|
|
132
271
|
indent_length++;
|
|
133
272
|
} else if (lexer->lookahead == '\t') {
|
|
134
|
-
indent_length +=
|
|
135
|
-
} else if (lexer->lookahead == '\0') {
|
|
136
|
-
|
|
137
|
-
if (valid_symbols[SECTIONEND]) {
|
|
138
|
-
lexer->result_symbol = SECTIONEND;
|
|
139
|
-
} else if (valid_symbols[END_OF_FILE]) {
|
|
140
|
-
lexer->result_symbol = END_OF_FILE;
|
|
141
|
-
} else {
|
|
142
|
-
return false;
|
|
143
|
-
}
|
|
144
|
-
|
|
145
|
-
return true;
|
|
146
|
-
} else {
|
|
147
|
-
break;
|
|
273
|
+
indent_length += TAB_WIDTH; // Convert tabs to equivalent spaces
|
|
148
274
|
}
|
|
149
|
-
skip(lexer);
|
|
275
|
+
skip(lexer); // Skip whitespace character
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return indent_length;
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
/**
|
|
282
|
+
* @brief Handle end-of-file detection
|
|
283
|
+
* @param lexer The tree-sitter lexer interface
|
|
284
|
+
* @param valid_symbols Array indicating which tokens are valid
|
|
285
|
+
* @return true if EOF token was produced, false otherwise
|
|
286
|
+
*/
|
|
287
|
+
static bool handle_eof(TSLexer *lexer, const bool *valid_symbols) {
|
|
288
|
+
if (lexer->lookahead != '\0') {
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
if (valid_symbols[SECTIONEND]) {
|
|
293
|
+
lexer->result_symbol = SECTIONEND;
|
|
294
|
+
return true;
|
|
295
|
+
} else if (valid_symbols[END_OF_FILE]) {
|
|
296
|
+
lexer->result_symbol = END_OF_FILE;
|
|
297
|
+
return true;
|
|
150
298
|
}
|
|
151
299
|
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
300
|
+
return false;
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
/**
|
|
304
|
+
* @brief Parse section header and determine section boundaries
|
|
305
|
+
* @param scanner The scanner state
|
|
306
|
+
* @param lexer The tree-sitter lexer interface
|
|
307
|
+
* @param valid_symbols Array indicating which tokens are valid
|
|
308
|
+
* @return true if section token was produced, false otherwise
|
|
309
|
+
*/
|
|
310
|
+
static bool parse_section_header(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
|
|
311
|
+
if (!is_headline_marker(lexer->lookahead)) {
|
|
312
|
+
return false;
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
lexer->mark_end(lexer);
|
|
316
|
+
|
|
317
|
+
// Count consecutive headline markers (* or #)
|
|
318
|
+
int16_t stars = 1;
|
|
319
|
+
skip(lexer);
|
|
320
|
+
while (is_headline_marker(lexer->lookahead)) {
|
|
321
|
+
stars++;
|
|
155
322
|
skip(lexer);
|
|
156
|
-
|
|
157
|
-
stars++;
|
|
158
|
-
skip(lexer);
|
|
159
|
-
}
|
|
323
|
+
}
|
|
160
324
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
VEC_POP(scanner->org_section_stack);
|
|
164
|
-
lexer->result_symbol = SECTIONEND;
|
|
165
|
-
return true;
|
|
166
|
-
} else if (valid_symbols[SECTION] && iswspace(lexer->lookahead)) {
|
|
167
|
-
VEC_PUSH(scanner->org_section_stack, stars);
|
|
168
|
-
lexer->result_symbol = SECTION;
|
|
169
|
-
return true;
|
|
170
|
-
}
|
|
325
|
+
// Must be followed by whitespace to be a valid header
|
|
326
|
+
if (!iswspace(lexer->lookahead)) {
|
|
171
327
|
return false;
|
|
172
328
|
}
|
|
173
329
|
|
|
174
|
-
|
|
330
|
+
// Determine if this is a section end or section start
|
|
331
|
+
if (valid_symbols[SECTIONEND] && stars > 0
|
|
332
|
+
&& scanner->org_section_stack.length > 0
|
|
333
|
+
&& stars <= VEC_BACK(scanner->org_section_stack)) {
|
|
334
|
+
// This header closes a section (equal or higher level)
|
|
335
|
+
VEC_POP(scanner->org_section_stack);
|
|
336
|
+
lexer->result_symbol = SECTIONEND;
|
|
337
|
+
return true;
|
|
338
|
+
} else if (valid_symbols[SECTION]) {
|
|
339
|
+
// This header starts a new section
|
|
340
|
+
VEC_PUSH(scanner->org_section_stack, stars);
|
|
341
|
+
lexer->result_symbol = SECTION;
|
|
342
|
+
return true;
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
return false; // Header found but not at appropriate parsing state
|
|
175
346
|
}
|
|
176
347
|
|
|
348
|
+
/**
|
|
349
|
+
* @brief Main scanning function for the external scanner
|
|
350
|
+
* @param scanner The scanner state
|
|
351
|
+
* @param lexer The tree-sitter lexer interface
|
|
352
|
+
* @param valid_symbols Array indicating which tokens are valid at this position
|
|
353
|
+
* @return true if a token was successfully scanned, false otherwise
|
|
354
|
+
*
|
|
355
|
+
* This function handles context-sensitive parsing of:
|
|
356
|
+
* - Section boundaries (SECTION/SECTIONEND tokens)
|
|
357
|
+
* - End of file detection
|
|
358
|
+
* - Indentation tracking for proper nesting
|
|
359
|
+
*/
|
|
360
|
+
bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
|
|
361
|
+
|
|
362
|
+
// Don't produce tokens during error recovery
|
|
363
|
+
if (in_error_recovery(valid_symbols))
|
|
364
|
+
return false;
|
|
365
|
+
|
|
366
|
+
// Mark the current position for potential token end
|
|
367
|
+
lexer->mark_end(lexer);
|
|
368
|
+
|
|
369
|
+
// Count leading whitespace to determine indentation level
|
|
370
|
+
int16_t indent_length = count_leading_whitespace(lexer);
|
|
371
|
+
|
|
372
|
+
// Handle end of file
|
|
373
|
+
if (handle_eof(lexer, valid_symbols)) {
|
|
374
|
+
return true;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
// Check for org-mode style section headers (must start at column 0)
|
|
378
|
+
if (indent_length == 0) {
|
|
379
|
+
return parse_section_header(scanner, lexer, valid_symbols);
|
|
380
|
+
}
|
|
381
|
+
|
|
382
|
+
return false; // No special tokens found
|
|
383
|
+
}
|
|
384
|
+
|
|
385
|
+
/**
|
|
386
|
+
* @brief Initialize a scanner with default state
|
|
387
|
+
* @param scanner The scanner to initialize
|
|
388
|
+
*
|
|
389
|
+
* Sets up the scanner with empty stacks containing the base element (0).
|
|
390
|
+
*/
|
|
391
|
+
static void init_scanner(Scanner *scanner) {
|
|
392
|
+
scanner->indent_length_stack = (vec)VEC_NEW;
|
|
393
|
+
scanner->org_section_stack = (vec)VEC_NEW;
|
|
394
|
+
|
|
395
|
+
// Initialize stacks with base element 0
|
|
396
|
+
VEC_PUSH(scanner->indent_length_stack, 0);
|
|
397
|
+
VEC_PUSH(scanner->org_section_stack, 0);
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
/**
|
|
401
|
+
* @brief Create a new scanner instance
|
|
402
|
+
* @return Pointer to the newly created scanner, or NULL if allocation fails
|
|
403
|
+
*
|
|
404
|
+
* This function is called by tree-sitter to create a new scanner instance.
|
|
405
|
+
* The scanner is initialized with empty stacks.
|
|
406
|
+
*/
|
|
177
407
|
void *tree_sitter_beancount_external_scanner_create() {
|
|
178
408
|
Scanner *scanner = (Scanner *)calloc(1, sizeof(Scanner));
|
|
179
|
-
|
|
409
|
+
if (scanner == NULL) {
|
|
410
|
+
return NULL; // Allocation failed
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
init_scanner(scanner);
|
|
180
414
|
return scanner;
|
|
181
415
|
}
|
|
182
416
|
|
|
417
|
+
/**
|
|
418
|
+
* @brief Entry point for scanning tokens
|
|
419
|
+
* @param payload Pointer to the scanner instance
|
|
420
|
+
* @param lexer The tree-sitter lexer interface
|
|
421
|
+
* @param valid_symbols Array indicating which tokens are valid
|
|
422
|
+
* @return true if a token was successfully scanned
|
|
423
|
+
*
|
|
424
|
+
* This is the main entry point called by tree-sitter for token scanning.
|
|
425
|
+
*/
|
|
183
426
|
bool tree_sitter_beancount_external_scanner_scan(void *payload,
|
|
184
427
|
TSLexer *lexer,
|
|
185
428
|
const bool *valid_symbols) {
|
|
429
|
+
if (payload == NULL || lexer == NULL || valid_symbols == NULL) {
|
|
430
|
+
return false; // Invalid parameters
|
|
431
|
+
}
|
|
432
|
+
|
|
186
433
|
Scanner *scanner = (Scanner *)payload;
|
|
187
434
|
return scan(scanner, lexer, valid_symbols);
|
|
188
435
|
}
|
|
189
436
|
|
|
437
|
+
/**
|
|
438
|
+
* @brief Serialize scanner state for incremental parsing
|
|
439
|
+
* @param payload Pointer to the scanner instance
|
|
440
|
+
* @param buffer Buffer to write serialized state to
|
|
441
|
+
* @return Number of bytes written to the buffer
|
|
442
|
+
*
|
|
443
|
+
* Called by tree-sitter to save scanner state for incremental parsing.
|
|
444
|
+
*/
|
|
190
445
|
unsigned tree_sitter_beancount_external_scanner_serialize(void *payload,
|
|
191
446
|
char *buffer) {
|
|
447
|
+
if (payload == NULL || buffer == NULL) {
|
|
448
|
+
return 0; // Invalid parameters
|
|
449
|
+
}
|
|
450
|
+
|
|
192
451
|
Scanner *scanner = (Scanner *)payload;
|
|
193
452
|
return serialize(scanner, buffer);
|
|
194
453
|
}
|
|
195
454
|
|
|
455
|
+
/**
|
|
456
|
+
* @brief Deserialize scanner state from buffer
|
|
457
|
+
* @param payload Pointer to the scanner instance
|
|
458
|
+
* @param buffer Buffer containing serialized state
|
|
459
|
+
* @param length Length of the buffer in bytes
|
|
460
|
+
*
|
|
461
|
+
* Called by tree-sitter to restore scanner state during incremental parsing.
|
|
462
|
+
*/
|
|
196
463
|
void tree_sitter_beancount_external_scanner_deserialize(void *payload,
|
|
197
464
|
const char *buffer,
|
|
198
465
|
unsigned length) {
|
|
466
|
+
if (payload == NULL) {
|
|
467
|
+
return; // Invalid parameters
|
|
468
|
+
}
|
|
469
|
+
|
|
199
470
|
Scanner *scanner = (Scanner *)payload;
|
|
200
471
|
deserialize(scanner, buffer, length);
|
|
201
472
|
}
|
|
202
473
|
|
|
474
|
+
/**
|
|
475
|
+
* @brief Destroy a scanner instance and free its memory
|
|
476
|
+
* @param payload Pointer to the scanner instance to destroy
|
|
477
|
+
*
|
|
478
|
+
* Called by tree-sitter when the scanner is no longer needed.
|
|
479
|
+
* Frees all allocated memory including the vector data arrays.
|
|
480
|
+
*/
|
|
203
481
|
void tree_sitter_beancount_external_scanner_destroy(void *payload) {
|
|
204
482
|
Scanner *scanner = (Scanner *)payload;
|
|
205
483
|
VEC_FREE(scanner->indent_length_stack);
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
#ifndef TREE_SITTER_ALLOC_H_
|
|
2
|
+
#define TREE_SITTER_ALLOC_H_
|
|
3
|
+
|
|
4
|
+
#ifdef __cplusplus
|
|
5
|
+
extern "C" {
|
|
6
|
+
#endif
|
|
7
|
+
|
|
8
|
+
#include <stdbool.h>
|
|
9
|
+
#include <stdio.h>
|
|
10
|
+
#include <stdlib.h>
|
|
11
|
+
|
|
12
|
+
// Allow clients to override allocation functions
|
|
13
|
+
#ifdef TREE_SITTER_REUSE_ALLOCATOR
|
|
14
|
+
|
|
15
|
+
extern void *(*ts_current_malloc)(size_t size);
|
|
16
|
+
extern void *(*ts_current_calloc)(size_t count, size_t size);
|
|
17
|
+
extern void *(*ts_current_realloc)(void *ptr, size_t size);
|
|
18
|
+
extern void (*ts_current_free)(void *ptr);
|
|
19
|
+
|
|
20
|
+
#ifndef ts_malloc
|
|
21
|
+
#define ts_malloc ts_current_malloc
|
|
22
|
+
#endif
|
|
23
|
+
#ifndef ts_calloc
|
|
24
|
+
#define ts_calloc ts_current_calloc
|
|
25
|
+
#endif
|
|
26
|
+
#ifndef ts_realloc
|
|
27
|
+
#define ts_realloc ts_current_realloc
|
|
28
|
+
#endif
|
|
29
|
+
#ifndef ts_free
|
|
30
|
+
#define ts_free ts_current_free
|
|
31
|
+
#endif
|
|
32
|
+
|
|
33
|
+
#else
|
|
34
|
+
|
|
35
|
+
#ifndef ts_malloc
|
|
36
|
+
#define ts_malloc malloc
|
|
37
|
+
#endif
|
|
38
|
+
#ifndef ts_calloc
|
|
39
|
+
#define ts_calloc calloc
|
|
40
|
+
#endif
|
|
41
|
+
#ifndef ts_realloc
|
|
42
|
+
#define ts_realloc realloc
|
|
43
|
+
#endif
|
|
44
|
+
#ifndef ts_free
|
|
45
|
+
#define ts_free free
|
|
46
|
+
#endif
|
|
47
|
+
|
|
48
|
+
#endif
|
|
49
|
+
|
|
50
|
+
#ifdef __cplusplus
|
|
51
|
+
}
|
|
52
|
+
#endif
|
|
53
|
+
|
|
54
|
+
#endif // TREE_SITTER_ALLOC_H_
|