tree-sitter-beancount 2.1.3 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/scanner.c ADDED
@@ -0,0 +1,208 @@
1
+ #include <stdio.h> // test
2
+ #include <tree_sitter/parser.h>
3
+ #include <wctype.h>
4
+
5
+ #define MAX(a, b) ((a) > (b) ? (a) : (b))
6
+
7
+ typedef struct {
8
+ uint32_t length;
9
+ uint32_t capacity;
10
+ int16_t *data;
11
+ } vec;
12
+
13
+ #define MAX(a, b) ((a) > (b) ? (a) : (b))
14
+
15
+ #define VEC_RESIZE(vec, _cap) \
16
+ void *tmp = realloc((vec).data, (_cap) * sizeof((vec).data[0])); \
17
+ (vec).data = tmp; \
18
+ (vec).capacity = (_cap);
19
+
20
+ #define VEC_GROW(vec, _cap) \
21
+ if ((vec).capacity < (_cap)) { \
22
+ VEC_RESIZE((vec), (_cap)); \
23
+ }
24
+
25
+ #define VEC_PUSH(vec, el) \
26
+ if ((vec).capacity == (vec).length) { \
27
+ VEC_RESIZE((vec), MAX(16, (vec).length * 2)); \
28
+ } \
29
+ (vec).data[(vec).length++] = (el);
30
+
31
+ #define VEC_POP(vec) (vec).length--;
32
+
33
+ #define VEC_NEW \
34
+ { .len = 0, .capacity = 0, .data = NULL }
35
+
36
+ #define VEC_BACK(vec) ((vec).data[(vec).length - 1])
37
+
38
+ #define VEC_FREE(vec) \
39
+ { \
40
+ if ((vec).data != NULL) \
41
+ free((vec).data); \
42
+ }
43
+
44
+ #define VEC_CLEAR(vec) (vec).length = 0;
45
+
46
+ enum TokenType {
47
+ SECTION,
48
+ SECTIONEND,
49
+ END_OF_FILE,
50
+ };
51
+
52
+ typedef struct {
53
+ vec indent_length_stack;
54
+ vec org_section_stack;
55
+ } Scanner;
56
+
57
+ unsigned serialize(Scanner *scanner, char *buffer) {
58
+ size_t i = 0;
59
+
60
+ size_t indent_count = scanner->indent_length_stack.length - 1;
61
+ if (indent_count > UINT8_MAX)
62
+ indent_count = UINT8_MAX;
63
+ buffer[i++] = (char)indent_count;
64
+
65
+ int iter = 1;
66
+ for (; iter < scanner->indent_length_stack.length
67
+ && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
68
+ ++iter) {
69
+ buffer[i++] = (char)scanner->indent_length_stack.data[iter];
70
+ }
71
+
72
+ size_t org_section_count = scanner->org_section_stack.length - 1;
73
+ if (org_section_count > UINT8_MAX)
74
+ org_section_count = UINT8_MAX;
75
+ buffer[i++] = (char)org_section_count;
76
+
77
+ iter = 1;
78
+ for (; iter < scanner->org_section_stack.length
79
+ && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
80
+ ++iter) {
81
+ buffer[i++] = (char)scanner->org_section_stack.data[iter];
82
+ }
83
+
84
+ return i;
85
+ }
86
+
87
+ void deserialize(Scanner *scanner, const char *buffer, unsigned length) {
88
+ VEC_CLEAR(scanner->org_section_stack);
89
+ VEC_PUSH(scanner->org_section_stack, 0);
90
+ VEC_CLEAR(scanner->indent_length_stack);
91
+ VEC_PUSH(scanner->indent_length_stack, 0);
92
+
93
+ if (length == 0)
94
+ return;
95
+
96
+ size_t i = 0;
97
+
98
+ size_t indent_count = (unsigned char)buffer[i++];
99
+ for (; i <= indent_count; i++) {
100
+ VEC_PUSH(scanner->indent_length_stack, (unsigned char)buffer[i]);
101
+ }
102
+
103
+ size_t org_section_count = (unsigned char)buffer[i++];
104
+ for (; i < length; i++) {
105
+ VEC_PUSH(scanner->org_section_stack, (unsigned char)buffer[i]);
106
+ }
107
+ }
108
+
109
+ void advance(TSLexer *lexer) {
110
+ lexer->advance(lexer, false);
111
+ }
112
+
113
+ void skip(TSLexer *lexer) {
114
+ lexer->advance(lexer, true);
115
+ }
116
+
117
+ static bool in_error_recovery(const bool *valid_symbols) {
118
+ return (valid_symbols[SECTION] && valid_symbols[SECTIONEND]
119
+ && valid_symbols[END_OF_FILE]);
120
+ }
121
+
122
+ bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) {
123
+
124
+ if (in_error_recovery(valid_symbols))
125
+ return false;
126
+
127
+ // - Section ends
128
+ int16_t indent_length = 0;
129
+ lexer->mark_end(lexer);
130
+ for (;;) {
131
+ if (lexer->lookahead == ' ') {
132
+ indent_length++;
133
+ } else if (lexer->lookahead == '\t') {
134
+ indent_length += 8;
135
+ } else if (lexer->lookahead == '\0') {
136
+
137
+ if (valid_symbols[SECTIONEND]) {
138
+ lexer->result_symbol = SECTIONEND;
139
+ } else if (valid_symbols[END_OF_FILE]) {
140
+ lexer->result_symbol = END_OF_FILE;
141
+ } else {
142
+ return false;
143
+ }
144
+
145
+ return true;
146
+ } else {
147
+ break;
148
+ }
149
+ skip(lexer);
150
+ }
151
+
152
+ if (indent_length == 0 && lexer->lookahead == '*') {
153
+ lexer->mark_end(lexer);
154
+ int16_t stars = 1;
155
+ skip(lexer);
156
+ while (lexer->lookahead == '*') {
157
+ stars++;
158
+ skip(lexer);
159
+ }
160
+
161
+ if (valid_symbols[SECTIONEND] && iswspace(lexer->lookahead) && stars > 0
162
+ && stars <= VEC_BACK(scanner->org_section_stack)) {
163
+ VEC_POP(scanner->org_section_stack);
164
+ lexer->result_symbol = SECTIONEND;
165
+ return true;
166
+ } else if (valid_symbols[SECTION] && iswspace(lexer->lookahead)) {
167
+ VEC_PUSH(scanner->org_section_stack, stars);
168
+ lexer->result_symbol = SECTION;
169
+ return true;
170
+ }
171
+ return false;
172
+ }
173
+
174
+ return false; // default
175
+ }
176
+
177
+ void *tree_sitter_beancount_external_scanner_create() {
178
+ Scanner *scanner = (Scanner *)calloc(1, sizeof(Scanner));
179
+ deserialize(scanner, NULL, 0);
180
+ return scanner;
181
+ }
182
+
183
+ bool tree_sitter_beancount_external_scanner_scan(void *payload,
184
+ TSLexer *lexer,
185
+ const bool *valid_symbols) {
186
+ Scanner *scanner = (Scanner *)payload;
187
+ return scan(scanner, lexer, valid_symbols);
188
+ }
189
+
190
+ unsigned tree_sitter_beancount_external_scanner_serialize(void *payload,
191
+ char *buffer) {
192
+ Scanner *scanner = (Scanner *)payload;
193
+ return serialize(scanner, buffer);
194
+ }
195
+
196
+ void tree_sitter_beancount_external_scanner_deserialize(void *payload,
197
+ const char *buffer,
198
+ unsigned length) {
199
+ Scanner *scanner = (Scanner *)payload;
200
+ deserialize(scanner, buffer, length);
201
+ }
202
+
203
+ void tree_sitter_beancount_external_scanner_destroy(void *payload) {
204
+ Scanner *scanner = (Scanner *)payload;
205
+ VEC_FREE(scanner->indent_length_stack);
206
+ VEC_FREE(scanner->org_section_stack);
207
+ free(scanner);
208
+ }
@@ -13,9 +13,8 @@ extern "C" {
13
13
  #define ts_builtin_sym_end 0
14
14
  #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024
15
15
 
16
- typedef uint16_t TSStateId;
17
-
18
16
  #ifndef TREE_SITTER_API_H_
17
+ typedef uint16_t TSStateId;
19
18
  typedef uint16_t TSSymbol;
20
19
  typedef uint16_t TSFieldId;
21
20
  typedef struct TSLanguage TSLanguage;
@@ -130,9 +129,16 @@ struct TSLanguage {
130
129
  * Lexer Macros
131
130
  */
132
131
 
132
+ #ifdef _MSC_VER
133
+ #define UNUSED __pragma(warning(suppress : 4101))
134
+ #else
135
+ #define UNUSED __attribute__((unused))
136
+ #endif
137
+
133
138
  #define START_LEXER() \
134
139
  bool result = false; \
135
140
  bool skip = false; \
141
+ UNUSED \
136
142
  bool eof = false; \
137
143
  int32_t lookahead; \
138
144
  goto start; \
@@ -166,7 +172,7 @@ struct TSLanguage {
166
172
  * Parse Table Macros
167
173
  */
168
174
 
169
- #define SMALL_STATE(id) id - LARGE_STATE_COUNT
175
+ #define SMALL_STATE(id) ((id) - LARGE_STATE_COUNT)
170
176
 
171
177
  #define STATE(id) id
172
178
 
@@ -176,7 +182,7 @@ struct TSLanguage {
176
182
  {{ \
177
183
  .shift = { \
178
184
  .type = TSParseActionTypeShift, \
179
- .state = state_value \
185
+ .state = (state_value) \
180
186
  } \
181
187
  }}
182
188
 
@@ -184,7 +190,7 @@ struct TSLanguage {
184
190
  {{ \
185
191
  .shift = { \
186
192
  .type = TSParseActionTypeShift, \
187
- .state = state_value, \
193
+ .state = (state_value), \
188
194
  .repetition = true \
189
195
  } \
190
196
  }}
package/src/scanner.cc DELETED
@@ -1,171 +0,0 @@
1
- #include <cwctype>
2
- #include <tree_sitter/parser.h>
3
- #include <vector>
4
-
5
- namespace {
6
-
7
- using std::iswspace;
8
- using std::vector;
9
-
10
- enum TokenType {
11
- SECTION,
12
- SECTIONEND,
13
- END_OF_FILE,
14
- };
15
-
16
- struct Scanner {
17
- vector<int16_t> indent_length_stack;
18
- vector<int16_t> org_section_stack;
19
-
20
- Scanner() {
21
- deserialize(NULL, 0);
22
- }
23
-
24
- unsigned serialize(char *buffer) {
25
- size_t i = 0;
26
-
27
- size_t indent_count = indent_length_stack.size() - 1;
28
- if (indent_count > UINT8_MAX)
29
- indent_count = UINT8_MAX;
30
- buffer[i++] = indent_count;
31
-
32
- vector<int16_t>::iterator iter = indent_length_stack.begin() + 1,
33
- end = indent_length_stack.end();
34
-
35
- for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
36
- ++iter) {
37
- buffer[i++] = *iter;
38
- }
39
-
40
- iter = org_section_stack.begin() + 1;
41
- end = org_section_stack.end();
42
-
43
- for (; iter != end && i < TREE_SITTER_SERIALIZATION_BUFFER_SIZE;
44
- ++iter) {
45
- buffer[i++] = *iter;
46
- }
47
-
48
- return i;
49
- }
50
-
51
- void deserialize(const char *buffer, unsigned length) {
52
- org_section_stack.clear();
53
- org_section_stack.push_back(0);
54
- indent_length_stack.clear();
55
- indent_length_stack.push_back(-1);
56
-
57
- if (length == 0)
58
- return;
59
-
60
- size_t i = 0;
61
-
62
- size_t indent_count = (uint8_t)buffer[i++];
63
-
64
- for (; i <= indent_count; i++)
65
- indent_length_stack.push_back(buffer[i]);
66
- for (; i < length; i++)
67
- org_section_stack.push_back(buffer[i]);
68
- }
69
-
70
- void advance(TSLexer *lexer) {
71
- lexer->advance(lexer, false);
72
- }
73
-
74
- void skip(TSLexer *lexer) {
75
- lexer->advance(lexer, true);
76
- }
77
-
78
- static bool in_error_recovery(const bool *valid_symbols) {
79
- return (valid_symbols[SECTION] && valid_symbols[SECTIONEND]
80
- && valid_symbols[END_OF_FILE]);
81
- }
82
-
83
- bool scan(TSLexer *lexer, const bool *valid_symbols) {
84
-
85
- if (in_error_recovery(valid_symbols))
86
- return false;
87
-
88
- // - Section ends
89
- int16_t indent_length = 0;
90
- lexer->mark_end(lexer);
91
- for (;;) {
92
- if (lexer->lookahead == ' ') {
93
- indent_length++;
94
- } else if (lexer->lookahead == '\t') {
95
- indent_length += 8;
96
- } else if (lexer->lookahead == '\0') {
97
-
98
- if (valid_symbols[SECTIONEND]) {
99
- lexer->result_symbol = SECTIONEND;
100
- } else if (valid_symbols[END_OF_FILE]) {
101
- lexer->result_symbol = END_OF_FILE;
102
- } else {
103
- return false;
104
- }
105
-
106
- return true;
107
- } else {
108
- break;
109
- }
110
- skip(lexer);
111
- }
112
-
113
- if (indent_length == 0 && lexer->lookahead == '*') {
114
- lexer->mark_end(lexer);
115
- int16_t stars = 1;
116
- skip(lexer);
117
- while (lexer->lookahead == '*') {
118
- stars++;
119
- skip(lexer);
120
- }
121
-
122
- if (valid_symbols[SECTIONEND] && iswspace(lexer->lookahead)
123
- && stars > 0 && stars <= org_section_stack.back()) {
124
- org_section_stack.pop_back();
125
- lexer->result_symbol = SECTIONEND;
126
- return true;
127
- } else if (valid_symbols[SECTION] && iswspace(lexer->lookahead)) {
128
- org_section_stack.push_back(stars);
129
- lexer->result_symbol = SECTION;
130
- return true;
131
- }
132
- return false;
133
- }
134
-
135
- return false; // default
136
- }
137
- };
138
-
139
- } // namespace
140
-
141
- extern "C" {
142
-
143
- void *tree_sitter_beancount_external_scanner_create() {
144
- return new Scanner();
145
- }
146
-
147
- bool tree_sitter_beancount_external_scanner_scan(void *payload,
148
- TSLexer *lexer,
149
- const bool *valid_symbols) {
150
- Scanner *scanner = static_cast<Scanner *>(payload);
151
- return scanner->scan(lexer, valid_symbols);
152
- }
153
-
154
- unsigned tree_sitter_beancount_external_scanner_serialize(void *payload,
155
- char *buffer) {
156
- Scanner *scanner = static_cast<Scanner *>(payload);
157
- return scanner->serialize(buffer);
158
- }
159
-
160
- void tree_sitter_beancount_external_scanner_deserialize(void *payload,
161
- const char *buffer,
162
- unsigned length) {
163
- Scanner *scanner = static_cast<Scanner *>(payload);
164
- scanner->deserialize(buffer, length);
165
- }
166
-
167
- void tree_sitter_beancount_external_scanner_destroy(void *payload) {
168
- Scanner *scanner = static_cast<Scanner *>(payload);
169
- delete scanner;
170
- }
171
- }