@ast-grep/lang-python 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -4
- package/prebuilds/prebuild-Linux-ARM64/parser.so +0 -0
- package/prebuilds/prebuild-Linux-X64/parser.so +0 -0
- package/prebuilds/prebuild-Windows-X64/parser.so +0 -0
- package/prebuilds/prebuild-macOS-ARM64/parser.so +0 -0
- package/src/grammar.json +231 -136
- package/src/node-types.json +6 -31
- package/src/parser.c +57605 -58061
- package/src/scanner.c +16 -12
- package/src/tree_sitter/parser.h +27 -7
- package/type.d.ts +6 -27
package/src/scanner.c
CHANGED
|
@@ -85,7 +85,7 @@ static inline void set_end_character(Delimiter *delimiter, int32_t character) {
|
|
|
85
85
|
typedef struct {
|
|
86
86
|
Array(uint16_t) indents;
|
|
87
87
|
Array(Delimiter) delimiters;
|
|
88
|
-
bool
|
|
88
|
+
bool inside_interpolated_string;
|
|
89
89
|
} Scanner;
|
|
90
90
|
|
|
91
91
|
static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
|
|
@@ -177,7 +177,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
|
|
|
177
177
|
lexer->mark_end(lexer);
|
|
178
178
|
array_pop(&scanner->delimiters);
|
|
179
179
|
lexer->result_symbol = STRING_END;
|
|
180
|
-
scanner->
|
|
180
|
+
scanner->inside_interpolated_string = false;
|
|
181
181
|
}
|
|
182
182
|
return true;
|
|
183
183
|
}
|
|
@@ -195,7 +195,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
|
|
|
195
195
|
advance(lexer);
|
|
196
196
|
array_pop(&scanner->delimiters);
|
|
197
197
|
lexer->result_symbol = STRING_END;
|
|
198
|
-
scanner->
|
|
198
|
+
scanner->inside_interpolated_string = false;
|
|
199
199
|
}
|
|
200
200
|
lexer->mark_end(lexer);
|
|
201
201
|
return true;
|
|
@@ -211,7 +211,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
|
|
|
211
211
|
lexer->mark_end(lexer);
|
|
212
212
|
|
|
213
213
|
bool found_end_of_line = false;
|
|
214
|
-
|
|
214
|
+
uint16_t indent_length = 0;
|
|
215
215
|
int32_t first_comment_indent_length = -1;
|
|
216
216
|
for (;;) {
|
|
217
217
|
if (lexer->lookahead == '\n') {
|
|
@@ -280,7 +280,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
|
|
|
280
280
|
if ((valid_symbols[DEDENT] ||
|
|
281
281
|
(!valid_symbols[NEWLINE] && !(valid_symbols[STRING_START] && next_tok_is_string_start) &&
|
|
282
282
|
!within_brackets)) &&
|
|
283
|
-
indent_length < current_indent_length && !scanner->
|
|
283
|
+
indent_length < current_indent_length && !scanner->inside_interpolated_string &&
|
|
284
284
|
|
|
285
285
|
// Wait to create a dedent token until we've consumed any
|
|
286
286
|
// comments
|
|
@@ -303,7 +303,8 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
|
|
|
303
303
|
|
|
304
304
|
bool has_flags = false;
|
|
305
305
|
while (lexer->lookahead) {
|
|
306
|
-
if (lexer->lookahead == 'f' || lexer->lookahead == 'F'
|
|
306
|
+
if (lexer->lookahead == 'f' || lexer->lookahead == 'F' || lexer->lookahead == 't' ||
|
|
307
|
+
lexer->lookahead == 'T') {
|
|
307
308
|
set_format(&delimiter);
|
|
308
309
|
} else if (lexer->lookahead == 'r' || lexer->lookahead == 'R') {
|
|
309
310
|
set_raw(&delimiter);
|
|
@@ -349,7 +350,7 @@ bool tree_sitter_python_external_scanner_scan(void *payload, TSLexer *lexer, con
|
|
|
349
350
|
if (end_character(&delimiter)) {
|
|
350
351
|
array_push(&scanner->delimiters, delimiter);
|
|
351
352
|
lexer->result_symbol = STRING_START;
|
|
352
|
-
scanner->
|
|
353
|
+
scanner->inside_interpolated_string = is_format(&delimiter);
|
|
353
354
|
return true;
|
|
354
355
|
}
|
|
355
356
|
if (has_flags) {
|
|
@@ -365,7 +366,7 @@ unsigned tree_sitter_python_external_scanner_serialize(void *payload, char *buff
|
|
|
365
366
|
|
|
366
367
|
size_t size = 0;
|
|
367
368
|
|
|
368
|
-
buffer[size++] = (char)scanner->
|
|
369
|
+
buffer[size++] = (char)scanner->inside_interpolated_string;
|
|
369
370
|
|
|
370
371
|
size_t delimiter_count = scanner->delimiters.size;
|
|
371
372
|
if (delimiter_count > UINT8_MAX) {
|
|
@@ -380,7 +381,9 @@ unsigned tree_sitter_python_external_scanner_serialize(void *payload, char *buff
|
|
|
380
381
|
|
|
381
382
|
uint32_t iter = 1;
|
|
382
383
|
for (; iter < scanner->indents.size && size < TREE_SITTER_SERIALIZATION_BUFFER_SIZE; ++iter) {
|
|
383
|
-
|
|
384
|
+
uint16_t indent_value = *array_get(&scanner->indents, iter);
|
|
385
|
+
buffer[size++] = (char)(indent_value & 0xFF);
|
|
386
|
+
buffer[size++] = (char)((indent_value >> 8) & 0xFF);
|
|
384
387
|
}
|
|
385
388
|
|
|
386
389
|
return size;
|
|
@@ -396,7 +399,7 @@ void tree_sitter_python_external_scanner_deserialize(void *payload, const char *
|
|
|
396
399
|
if (length > 0) {
|
|
397
400
|
size_t size = 0;
|
|
398
401
|
|
|
399
|
-
scanner->
|
|
402
|
+
scanner->inside_interpolated_string = (bool)buffer[size++];
|
|
400
403
|
|
|
401
404
|
size_t delimiter_count = (uint8_t)buffer[size++];
|
|
402
405
|
if (delimiter_count > 0) {
|
|
@@ -406,8 +409,9 @@ void tree_sitter_python_external_scanner_deserialize(void *payload, const char *
|
|
|
406
409
|
size += delimiter_count;
|
|
407
410
|
}
|
|
408
411
|
|
|
409
|
-
for (; size < length; size
|
|
410
|
-
|
|
412
|
+
for (; size + 1 < length; size += 2) {
|
|
413
|
+
uint16_t indent_value = (unsigned char)buffer[size] | ((unsigned char)buffer[size + 1] << 8);
|
|
414
|
+
array_push(&scanner->indents, indent_value);
|
|
411
415
|
}
|
|
412
416
|
}
|
|
413
417
|
}
|
package/src/tree_sitter/parser.h
CHANGED
|
@@ -18,6 +18,11 @@ typedef uint16_t TSStateId;
|
|
|
18
18
|
typedef uint16_t TSSymbol;
|
|
19
19
|
typedef uint16_t TSFieldId;
|
|
20
20
|
typedef struct TSLanguage TSLanguage;
|
|
21
|
+
typedef struct TSLanguageMetadata {
|
|
22
|
+
uint8_t major_version;
|
|
23
|
+
uint8_t minor_version;
|
|
24
|
+
uint8_t patch_version;
|
|
25
|
+
} TSLanguageMetadata;
|
|
21
26
|
#endif
|
|
22
27
|
|
|
23
28
|
typedef struct {
|
|
@@ -26,10 +31,11 @@ typedef struct {
|
|
|
26
31
|
bool inherited;
|
|
27
32
|
} TSFieldMapEntry;
|
|
28
33
|
|
|
34
|
+
// Used to index the field and supertype maps.
|
|
29
35
|
typedef struct {
|
|
30
36
|
uint16_t index;
|
|
31
37
|
uint16_t length;
|
|
32
|
-
}
|
|
38
|
+
} TSMapSlice;
|
|
33
39
|
|
|
34
40
|
typedef struct {
|
|
35
41
|
bool visible;
|
|
@@ -79,6 +85,12 @@ typedef struct {
|
|
|
79
85
|
uint16_t external_lex_state;
|
|
80
86
|
} TSLexMode;
|
|
81
87
|
|
|
88
|
+
typedef struct {
|
|
89
|
+
uint16_t lex_state;
|
|
90
|
+
uint16_t external_lex_state;
|
|
91
|
+
uint16_t reserved_word_set_id;
|
|
92
|
+
} TSLexerMode;
|
|
93
|
+
|
|
82
94
|
typedef union {
|
|
83
95
|
TSParseAction action;
|
|
84
96
|
struct {
|
|
@@ -93,7 +105,7 @@ typedef struct {
|
|
|
93
105
|
} TSCharacterRange;
|
|
94
106
|
|
|
95
107
|
struct TSLanguage {
|
|
96
|
-
uint32_t
|
|
108
|
+
uint32_t abi_version;
|
|
97
109
|
uint32_t symbol_count;
|
|
98
110
|
uint32_t alias_count;
|
|
99
111
|
uint32_t token_count;
|
|
@@ -109,13 +121,13 @@ struct TSLanguage {
|
|
|
109
121
|
const TSParseActionEntry *parse_actions;
|
|
110
122
|
const char * const *symbol_names;
|
|
111
123
|
const char * const *field_names;
|
|
112
|
-
const
|
|
124
|
+
const TSMapSlice *field_map_slices;
|
|
113
125
|
const TSFieldMapEntry *field_map_entries;
|
|
114
126
|
const TSSymbolMetadata *symbol_metadata;
|
|
115
127
|
const TSSymbol *public_symbol_map;
|
|
116
128
|
const uint16_t *alias_map;
|
|
117
129
|
const TSSymbol *alias_sequences;
|
|
118
|
-
const
|
|
130
|
+
const TSLexerMode *lex_modes;
|
|
119
131
|
bool (*lex_fn)(TSLexer *, TSStateId);
|
|
120
132
|
bool (*keyword_lex_fn)(TSLexer *, TSStateId);
|
|
121
133
|
TSSymbol keyword_capture_token;
|
|
@@ -129,15 +141,23 @@ struct TSLanguage {
|
|
|
129
141
|
void (*deserialize)(void *, const char *, unsigned);
|
|
130
142
|
} external_scanner;
|
|
131
143
|
const TSStateId *primary_state_ids;
|
|
144
|
+
const char *name;
|
|
145
|
+
const TSSymbol *reserved_words;
|
|
146
|
+
uint16_t max_reserved_word_set_size;
|
|
147
|
+
uint32_t supertype_count;
|
|
148
|
+
const TSSymbol *supertype_symbols;
|
|
149
|
+
const TSMapSlice *supertype_map_slices;
|
|
150
|
+
const TSSymbol *supertype_map_entries;
|
|
151
|
+
TSLanguageMetadata metadata;
|
|
132
152
|
};
|
|
133
153
|
|
|
134
|
-
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
|
|
154
|
+
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
|
|
135
155
|
uint32_t index = 0;
|
|
136
156
|
uint32_t size = len - index;
|
|
137
157
|
while (size > 1) {
|
|
138
158
|
uint32_t half_size = size / 2;
|
|
139
159
|
uint32_t mid_index = index + half_size;
|
|
140
|
-
TSCharacterRange *range = &ranges[mid_index];
|
|
160
|
+
const TSCharacterRange *range = &ranges[mid_index];
|
|
141
161
|
if (lookahead >= range->start && lookahead <= range->end) {
|
|
142
162
|
return true;
|
|
143
163
|
} else if (lookahead > range->end) {
|
|
@@ -145,7 +165,7 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
|
|
|
145
165
|
}
|
|
146
166
|
size -= half_size;
|
|
147
167
|
}
|
|
148
|
-
TSCharacterRange *range = &ranges[index];
|
|
168
|
+
const TSCharacterRange *range = &ranges[index];
|
|
149
169
|
return (lookahead >= range->start && lookahead <= range->end);
|
|
150
170
|
}
|
|
151
171
|
|
package/type.d.ts
CHANGED
|
@@ -1347,7 +1347,7 @@ type pythonTypes = {
|
|
|
1347
1347
|
]
|
|
1348
1348
|
},
|
|
1349
1349
|
"value": {
|
|
1350
|
-
"multiple":
|
|
1350
|
+
"multiple": true,
|
|
1351
1351
|
"required": false,
|
|
1352
1352
|
"types": [
|
|
1353
1353
|
{
|
|
@@ -1368,25 +1368,6 @@ type pythonTypes = {
|
|
|
1368
1368
|
]
|
|
1369
1369
|
}
|
|
1370
1370
|
},
|
|
1371
|
-
"except_group_clause": {
|
|
1372
|
-
"type": "except_group_clause",
|
|
1373
|
-
"named": true,
|
|
1374
|
-
"fields": {},
|
|
1375
|
-
"children": {
|
|
1376
|
-
"multiple": true,
|
|
1377
|
-
"required": true,
|
|
1378
|
-
"types": [
|
|
1379
|
-
{
|
|
1380
|
-
"type": "block",
|
|
1381
|
-
"named": true
|
|
1382
|
-
},
|
|
1383
|
-
{
|
|
1384
|
-
"type": "expression",
|
|
1385
|
-
"named": true
|
|
1386
|
-
}
|
|
1387
|
-
]
|
|
1388
|
-
}
|
|
1389
|
-
},
|
|
1390
1371
|
"exec_statement": {
|
|
1391
1372
|
"type": "exec_statement",
|
|
1392
1373
|
"named": true,
|
|
@@ -2729,7 +2710,7 @@ type pythonTypes = {
|
|
|
2729
2710
|
},
|
|
2730
2711
|
"children": {
|
|
2731
2712
|
"multiple": true,
|
|
2732
|
-
"required":
|
|
2713
|
+
"required": false,
|
|
2733
2714
|
"types": [
|
|
2734
2715
|
{
|
|
2735
2716
|
"type": "else_clause",
|
|
@@ -2739,10 +2720,6 @@ type pythonTypes = {
|
|
|
2739
2720
|
"type": "except_clause",
|
|
2740
2721
|
"named": true
|
|
2741
2722
|
},
|
|
2742
|
-
{
|
|
2743
|
-
"type": "except_group_clause",
|
|
2744
|
-
"named": true
|
|
2745
|
-
},
|
|
2746
2723
|
{
|
|
2747
2724
|
"type": "finally_clause",
|
|
2748
2725
|
"named": true
|
|
@@ -3168,7 +3145,8 @@ type pythonTypes = {
|
|
|
3168
3145
|
},
|
|
3169
3146
|
"comment": {
|
|
3170
3147
|
"type": "comment",
|
|
3171
|
-
"named": true
|
|
3148
|
+
"named": true,
|
|
3149
|
+
"extra": true
|
|
3172
3150
|
},
|
|
3173
3151
|
"ellipsis": {
|
|
3174
3152
|
"type": "ellipsis",
|
|
@@ -3200,7 +3178,8 @@ type pythonTypes = {
|
|
|
3200
3178
|
},
|
|
3201
3179
|
"line_continuation": {
|
|
3202
3180
|
"type": "line_continuation",
|
|
3203
|
-
"named": true
|
|
3181
|
+
"named": true,
|
|
3182
|
+
"extra": true
|
|
3204
3183
|
},
|
|
3205
3184
|
"none": {
|
|
3206
3185
|
"type": "none",
|