tree-sitter-batch 0.10.1 → 0.11.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -3
- package/grammar.js +68 -26
- package/package.json +4 -2
- package/prebuilds/darwin-arm64/tree-sitter-batch.node +0 -0
- package/prebuilds/darwin-x64/tree-sitter-batch.node +0 -0
- package/prebuilds/linux-arm64/tree-sitter-batch.node +0 -0
- package/prebuilds/linux-x64/tree-sitter-batch.node +0 -0
- package/prebuilds/win32-arm64/tree-sitter-batch.node +0 -0
- package/prebuilds/win32-x64/tree-sitter-batch.node +0 -0
- package/src/grammar.json +503 -165
- package/src/node-types.json +58 -3
- package/src/parser.c +16430 -9380
- package/tree-sitter-batch.wasm +0 -0
- package/tree-sitter.json +1 -1
package/README.md
CHANGED
|
@@ -7,11 +7,14 @@ Parses `.bat` and `.cmd` files into a concrete syntax tree for syntax highlighti
|
|
|
7
7
|
## Features
|
|
8
8
|
|
|
9
9
|
- **Control flow** — `IF`/`ELSE` (EXIST, DEFINED, ERRORLEVEL, comparison with NOT), `FOR` (/D /R /L /F), `GOTO`, `CALL`, `EXIT /B`
|
|
10
|
-
- **Variables** — `SET` (plain, `/A` arithmetic, `/P` prompt), `%VAR%`, `!VAR!`, `%%i`, `%~dp0`, `%VAR:old=new%`, escaped forms `%%VAR%%` `%%%%i`
|
|
10
|
+
- **Variables** — `SET` (plain, `/A` arithmetic, `/P` prompt, display-only without `=`), `%VAR%`, `!VAR!`, `%%i`, `%~dp0`, `%VAR:old=new%`, subscripted delayed expansion `!ARR[%%i]!`, for-variable modifiers `%%~dpnxf` `%%~zS` `%%~$PATH:F`, escaped forms `%%VAR%%` `%%%%i`
|
|
11
11
|
- **Echo** — free-form text with literal `(` `)` `!` `%`, inline strings, and variable references
|
|
12
|
-
- **Operators** — pipes `|`, redirects `>` `>>` `2>` `2>&1
|
|
13
|
-
- **
|
|
12
|
+
- **Operators** — pipes `|`, redirects `>` `>>` `<` `2>` `2>&1` (fds 0-9, including variable handles `>&%FD%` / `<&%FD%`), conditional `&&` `||`, separator `&`
|
|
13
|
+
- **Line continuation** — trailing caret `^` joins the current line with the next (e.g. `"%JAVACMD%" ^` followed by indented arguments)
|
|
14
|
+
- **Commands** — bare names, variable references as command, and quoted paths (`"C:\path\app.exe" args`, `call "%SCRIPT%" %*`)
|
|
15
|
+
- **Structure** — labels `:name`, comments `REM` `::`, parenthesized blocks (including `@(...)`), `@ECHO OFF`, macro invocations, DosTips idioms `(call,)` / `(call;)` / `(call)` for ERRORLEVEL manipulation
|
|
14
16
|
- **Scope** — `SETLOCAL`/`ENDLOCAL` with `ENABLEDELAYEDEXPANSION`
|
|
17
|
+
- **Polyglot headers** — tolerates batch/PowerShell `<# ... #>` header lines and batch/VBScript lines marked with a trailing `'VBS` so SysToolsLib-style dual-language scripts parse cleanly
|
|
15
18
|
- **Case-insensitive** — all keywords match regardless of casing
|
|
16
19
|
|
|
17
20
|
## Example
|
package/grammar.js
CHANGED
|
@@ -1,16 +1,21 @@
|
|
|
1
|
-
|
|
1
|
+
/// <reference types="tree-sitter-cli/dsl"/>
|
|
2
|
+
// @ts-check
|
|
3
|
+
|
|
4
|
+
/** @param {string} word */
|
|
5
|
+
const ci = (word) => new RegExp(word.split('').map((/** @type {string} */ c) => /[a-zA-Z]/.test(c) ? `[${c.toLowerCase()}${c.toUpperCase()}]` : c).join(''));
|
|
6
|
+
/** @param {string} word */
|
|
2
7
|
const kw = (word) => token(prec(10, ci(word)));
|
|
3
8
|
const varRefChoice = () => choice(
|
|
4
9
|
seq('%%', /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, '%%'),
|
|
5
|
-
seq('%', /[
|
|
10
|
+
seq('%', /[$@+\-a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, '%'),
|
|
6
11
|
seq('%~', /[a-zA-Z]*/, /[0-9]/),
|
|
7
12
|
seq('%', /[0-9]/),
|
|
8
13
|
'%*',
|
|
9
|
-
seq('%%%%%%', optional('~'), /[a-zA-Z0-9]/),
|
|
10
|
-
seq('%%%%', optional('~'), /[a-zA-Z0-9]/),
|
|
11
|
-
seq('%%', optional('~'
|
|
14
|
+
seq('%%%%%%', optional(seq('~', /[a-zA-Z$:_]*/)), /[a-zA-Z0-9.]/),
|
|
15
|
+
seq('%%%%', optional(seq('~', /[a-zA-Z$:_]*/)), /[a-zA-Z0-9.]/),
|
|
16
|
+
seq('%%', optional(seq('~', /[a-zA-Z$:_]*/)), /[a-zA-Z0-9.]/),
|
|
12
17
|
seq('%%', /[0-9]/),
|
|
13
|
-
seq('!', /[$%a-zA-Z_][
|
|
18
|
+
seq('!', /[$%a-zA-Z_][$%a-zA-Z0-9_.#]*(?:\[[^!\r\n\]]*\])?/, '!'),
|
|
14
19
|
seq('%', /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, ':', /[^%\r\n]+/, '%'),
|
|
15
20
|
seq('%', /[^%=\s\r\n]/, '%'),
|
|
16
21
|
seq('!', /[%$a-zA-Z_][%$a-zA-Z0-9_.#()\[\]]*/, /:[^!\r\n]+/, '!'),
|
|
@@ -18,10 +23,11 @@ const varRefChoice = () => choice(
|
|
|
18
23
|
seq('%', /\\[@a-zA-Z_0-9.]+/, '%'),
|
|
19
24
|
seq('%', /"[^"%\r\n]+"/, '%'),
|
|
20
25
|
);
|
|
26
|
+
/** @param {GrammarSymbols<string>} $ */
|
|
21
27
|
const operand = ($) => [
|
|
22
28
|
$.cond_exec, $.pipe_stmt, $.redirect_stmt, $.call_stmt, $.cmd, $.parenthesized,
|
|
23
29
|
$.variable_assignment, $.goto_stmt, $.exit_stmt, $.setlocal_stmt, $.endlocal_stmt,
|
|
24
|
-
$.if_stmt, $.for_stmt, $.macro_invocation,
|
|
30
|
+
$.if_stmt, $.for_stmt, $.macro_invocation, $.echo_off,
|
|
25
31
|
];
|
|
26
32
|
|
|
27
33
|
export default grammar({
|
|
@@ -45,6 +51,24 @@ export default grammar({
|
|
|
45
51
|
seq(optional('@'), /[rR][eE][mM]/, optional(seq(/[ \t]/, /[^\r\n]*/))),
|
|
46
52
|
seq(':', /[^$a-zA-Z_\r\n]/, /[^\r\n]*/),
|
|
47
53
|
seq('%#', /[^\r\n]*/, '#%'),
|
|
54
|
+
// Polyglot batch/PowerShell header line: `<# ...` is a PS block-comment open
|
|
55
|
+
// that batch interprets as a (failing) redirect; we treat it as a comment.
|
|
56
|
+
seq('<#', /[^\r\n]*/),
|
|
57
|
+
// Batch+VBScript polyglot: lines containing a whitespace-prefixed `'VBS`
|
|
58
|
+
// marker are extracted by the script itself as VBScript; the batch
|
|
59
|
+
// interpreter never runs them. Consume the rest of the line after the
|
|
60
|
+
// marker so any trailing VBScript tokens don't leak into the batch parse.
|
|
61
|
+
// Two branches are needed:
|
|
62
|
+
// - lines with code before the marker: `...content... [ \t]'VBS rest`
|
|
63
|
+
// (covers e.g. `Set foo = ... 'VBS`);
|
|
64
|
+
// - the blank-separator VBS line that is whitespace-only before `'VBS`
|
|
65
|
+
// — `extras` consumes the leading whitespace, so the token starts
|
|
66
|
+
// directly at `'VBS` (covers CheckEOL.bat-style polyglot separators).
|
|
67
|
+
// The first branch's leading char class must exclude ` ` and `\t` —
|
|
68
|
+
// otherwise this high-precedence branch starts tokens inside leading
|
|
69
|
+
// whitespace and bleeds into `command_name`'s span on indented lines.
|
|
70
|
+
seq(/[^ \t'\r\n][^\r\n]*[ \t]'VBS/, /[^\r\n]*/),
|
|
71
|
+
seq('\'VBS', /[^\r\n]*/),
|
|
48
72
|
))),
|
|
49
73
|
label: () => token(seq(':', /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/, optional(seq(/[ \t]/, /[^\r\n]*/)))),
|
|
50
74
|
variable_assignment: ($) => prec(8, seq(
|
|
@@ -55,9 +79,9 @@ export default grammar({
|
|
|
55
79
|
seq(
|
|
56
80
|
/[ \t]+/,
|
|
57
81
|
choice(
|
|
58
|
-
seq('"', repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), '=', optional($.quoted_assignment_value), '"', optional($.argument_list)),
|
|
59
|
-
seq('^"', repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), '=', optional($.caret_quoted_assignment_value), optional('^"')),
|
|
60
|
-
seq(choice($.variable_reference, alias($._var_name_pattern, $.variable_name)), '=', optional($.assignment_value)),
|
|
82
|
+
seq('"', repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), optional(seq('=', optional($.quoted_assignment_value))), '"', optional($.argument_list)),
|
|
83
|
+
seq(token(prec(2, '^"')), repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), optional(seq('=', optional($.caret_quoted_assignment_value))), optional(token(prec(2, '^"')))),
|
|
84
|
+
seq(repeat1(choice($.variable_reference, alias($._var_name_pattern, $.variable_name))), optional(seq('=', optional($.assignment_value)))),
|
|
61
85
|
),
|
|
62
86
|
),
|
|
63
87
|
),
|
|
@@ -71,7 +95,7 @@ export default grammar({
|
|
|
71
95
|
alias('^', $.assignment_literal),
|
|
72
96
|
))),
|
|
73
97
|
_var_name_pattern: () => token(choice(
|
|
74
|
-
/[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/,
|
|
98
|
+
/[$@a-zA-Z_.][$@a-zA-Z0-9_.#()\[\]]*/,
|
|
75
99
|
/\/[@a-zA-Z_][@a-zA-Z0-9_.#()\[\]]+/,
|
|
76
100
|
)),
|
|
77
101
|
_quoted_var_name_pattern: () => token(prec(1, choice(/[^\s="%][^="%]*/, /%%[a-zA-Z]?/, /"[^="\r\n]+"/))),
|
|
@@ -82,7 +106,10 @@ export default grammar({
|
|
|
82
106
|
prompt_assignment: ($) => seq(
|
|
83
107
|
optional(/[ \t]+/), alias(ci('/p'), $.set_option),
|
|
84
108
|
optional(/[ \t]+/),
|
|
85
|
-
|
|
109
|
+
choice(
|
|
110
|
+
seq(optional(alias($._var_name_pattern, $.variable_name)), '=', optional($.assignment_value)),
|
|
111
|
+
seq('"', optional(alias($._var_name_pattern, $.variable_name)), '=', optional($.quoted_assignment_value), '"', optional($.argument_list)),
|
|
112
|
+
),
|
|
86
113
|
),
|
|
87
114
|
arithmetic_expression: () => token(choice(
|
|
88
115
|
seq('"', /[^"\r\n]*/, '"'),
|
|
@@ -94,6 +121,7 @@ export default grammar({
|
|
|
94
121
|
alias(/[^%!()\r\n]+/, $.assignment_literal),
|
|
95
122
|
alias('%', $.assignment_literal),
|
|
96
123
|
alias('!', $.assignment_literal),
|
|
124
|
+
$._line_continuation,
|
|
97
125
|
))),
|
|
98
126
|
assignment_paren_group: ($) => seq('(', repeat(choice(
|
|
99
127
|
$.variable_reference,
|
|
@@ -140,7 +168,8 @@ export default grammar({
|
|
|
140
168
|
$.redirect_stmt, $.pipe_stmt, $.comment,
|
|
141
169
|
),
|
|
142
170
|
_if_operand: ($) => choice(
|
|
143
|
-
$.string,
|
|
171
|
+
prec.right(repeat1($.string)),
|
|
172
|
+
$.bracketed_value, $.paren_expression,
|
|
144
173
|
prec.right(seq(
|
|
145
174
|
choice($.variable_reference, alias($._if_word, $.argument_value), $.integer),
|
|
146
175
|
repeat(choice(
|
|
@@ -162,8 +191,14 @@ export default grammar({
|
|
|
162
191
|
)),
|
|
163
192
|
call_stmt: ($) => prec(8, seq(
|
|
164
193
|
optional('@'), kw('call'),
|
|
165
|
-
choice(
|
|
166
|
-
|
|
194
|
+
optional(choice(
|
|
195
|
+
token.immediate(','),
|
|
196
|
+
token.immediate(';'),
|
|
197
|
+
seq(
|
|
198
|
+
choice(token(seq(':', /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/)), $.command_name, $.variable_reference, $.string),
|
|
199
|
+
optional($.argument_list),
|
|
200
|
+
),
|
|
201
|
+
)),
|
|
167
202
|
)),
|
|
168
203
|
exit_stmt: ($) => prec(8, seq(
|
|
169
204
|
optional('@'), kw('exit'),
|
|
@@ -188,7 +223,7 @@ export default grammar({
|
|
|
188
223
|
choice($.parenthesized, $._body_stmt),
|
|
189
224
|
)),
|
|
190
225
|
for_options: () => token(prec(10, choice(ci('/d'), seq(ci('/r'), optional(seq(/[ \t]+/, /(%[^\s%]|[^\s%])+%?/))), ci('/l'), seq(ci('/f'), optional(seq(/[ \t]+/, '"', /[^"]*/, '"')))))),
|
|
191
|
-
for_variable: () => token(seq('%%',
|
|
226
|
+
for_variable: () => token(seq('%%', /[a-zA-Z0-9.]/)),
|
|
192
227
|
for_set: ($) => prec.right(repeat1(choice(
|
|
193
228
|
$.variable_reference,
|
|
194
229
|
$.for_set_group,
|
|
@@ -204,18 +239,21 @@ export default grammar({
|
|
|
204
239
|
alias('%', $.for_set_literal),
|
|
205
240
|
alias('!', $.for_set_literal),
|
|
206
241
|
)), ')'),
|
|
207
|
-
parenthesized: ($) => seq('(', repeat(choice(seq($._stmt, /\r?\n/), /\r?\n/)), optional($._stmt), ')'),
|
|
242
|
+
parenthesized: ($) => seq(optional('@'), '(', repeat(choice(seq($._stmt, /\r?\n/), /\r?\n/)), optional($._stmt), ')'),
|
|
208
243
|
redirect_stmt: ($) => prec.right(4, choice(
|
|
209
|
-
seq(choice($.call_stmt, $.cmd, $.parenthesized), $.redirection),
|
|
210
|
-
seq($.redirection, choice($.call_stmt, $.cmd, $.parenthesized)),
|
|
244
|
+
seq(choice($.call_stmt, $.cmd, $.parenthesized, $.variable_assignment), $.redirection),
|
|
245
|
+
seq($.redirection, choice($.call_stmt, $.cmd, $.parenthesized, $.variable_assignment)),
|
|
211
246
|
)),
|
|
212
247
|
redirection: ($) => {
|
|
213
|
-
const file_redir = seq(
|
|
248
|
+
const file_redir = seq($.redirect_op, $.redirect_target);
|
|
214
249
|
const one_redir = choice(file_redir, $.fd_redirect);
|
|
215
250
|
return prec.right(repeat1(one_redir));
|
|
216
251
|
},
|
|
217
|
-
fd_redirect: () =>
|
|
218
|
-
|
|
252
|
+
fd_redirect: ($) => choice(
|
|
253
|
+
token(seq(optional(/[0-9]/), />&|<&/, /[0-9]/)),
|
|
254
|
+
seq(alias(token(seq(optional(/[0-9]/), />&|<&/)), $.fd_redirect_op), $.variable_reference),
|
|
255
|
+
),
|
|
256
|
+
redirect_op: () => token(choice(/[0-9]?>>/, /[0-9]?>/, /[0-9]?</)),
|
|
219
257
|
redirect_target: () => token(choice(ci('nul'), ci('con'), /[^\s|&><\r\n]+/)),
|
|
220
258
|
pipe_stmt: ($) => prec.left(3, seq(choice($.pipe_stmt, $.redirect_stmt, $.call_stmt, $.cmd, $.parenthesized), '|', choice($.redirect_stmt, $.call_stmt, $.cmd, $.parenthesized))),
|
|
221
259
|
cond_exec: ($) => choice(
|
|
@@ -233,21 +271,25 @@ export default grammar({
|
|
|
233
271
|
bracketed_value: ($) => seq('[', repeat(choice($.variable_reference, alias(token(/[^%!\[\]\r\n]+/), $.bracketed_literal))), ']'),
|
|
234
272
|
cmd: ($) => prec.right(5, choice(
|
|
235
273
|
seq(optional('@'), alias(kw('echo'), $.command_name), optional(alias($._echo_text, $.argument_list))),
|
|
236
|
-
seq(optional('@'), choice($.command_name, $.variable_reference), optional($.argument_list)),
|
|
274
|
+
seq(optional('@'), choice($.command_name, $.variable_reference, $.string), optional($.argument_list)),
|
|
237
275
|
)),
|
|
238
276
|
_echo_text: ($) => prec.right(repeat1(choice(
|
|
239
277
|
$.variable_reference,
|
|
240
278
|
$.string,
|
|
241
279
|
alias($._echo_literal, $.argument_value),
|
|
280
|
+
$._line_continuation,
|
|
242
281
|
))),
|
|
282
|
+
// Trailing `^` escapes the following newline, joining the next physical
|
|
283
|
+
// line onto the current logical one. Treated as whitespace between tokens.
|
|
284
|
+
_line_continuation: () => token(/\^\r?\n[ \t]*/),
|
|
243
285
|
_echo_literal: () => token(/(?:\^[&|<>^()]|[^\s|&><"\r\n%!()])+|[()!%]/),
|
|
244
286
|
macro_invocation: ($) => prec.right(6, seq($.variable_reference, $.parenthesized, optional($.else_clause))),
|
|
245
|
-
command_name: () => /[$a-zA-
|
|
287
|
+
command_name: () => /[$a-zA-Z_0-9][$a-zA-Z0-9_.#-]*/,
|
|
246
288
|
argument_list: ($) => prec.right(repeat1($._arg)),
|
|
247
|
-
_arg: ($) => choice($.string, $.variable_reference, $.command_option, $.paren_expression, $.argument_value),
|
|
289
|
+
_arg: ($) => choice($.string, $.variable_reference, $.command_option, $.paren_expression, $.argument_value, $._line_continuation),
|
|
248
290
|
command_option: () => token(seq('/', /[a-zA-Z_?][a-zA-Z0-9_:]*/)),
|
|
249
291
|
paren_expression: ($) => seq('(', repeat($._arg), ')'),
|
|
250
|
-
argument_value: () => /(?:\^[&|<>^()]|[^\s|&><"\r\n%!()])(?:\^[&|<>^()]|[^\s|&><"\r\n()])
|
|
292
|
+
argument_value: () => /(?:\^[&|<>^()]|[^\s|&><"\r\n%!()])(?:\^[&|<>^()]|[^\s|&><"\r\n()])*|[!%]/,
|
|
251
293
|
integer: () => /[0-9]+/,
|
|
252
294
|
},
|
|
253
295
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "tree-sitter-batch",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.11.1",
|
|
4
4
|
"description": "A Windows Batch/CMD grammar for tree-sitter",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"repository": {
|
|
@@ -39,7 +39,8 @@
|
|
|
39
39
|
"globals": "^17.5.0",
|
|
40
40
|
"prebuildify": "^6.0.1",
|
|
41
41
|
"tree-sitter-cli": "0.26.8",
|
|
42
|
-
"tree-sitter-go-types": "^0.1.0"
|
|
42
|
+
"tree-sitter-go-types": "^0.1.0",
|
|
43
|
+
"typescript": "^6.0.3"
|
|
43
44
|
},
|
|
44
45
|
"overrides": {
|
|
45
46
|
"eslint-plugin-jsdoc": "^62.9.0"
|
|
@@ -58,6 +59,7 @@
|
|
|
58
59
|
"start": "tree-sitter playground",
|
|
59
60
|
"generate": "tree-sitter generate --abi 14 && tree-sitter-go-types",
|
|
60
61
|
"lint": "eslint grammar.js",
|
|
62
|
+
"typecheck": "tsc --noEmit -p tsconfig.json",
|
|
61
63
|
"test": "node --test bindings/node/*_test.js"
|
|
62
64
|
}
|
|
63
65
|
}
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
Binary file
|