tree-sitter-batch 0.10.1 → 0.11.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,11 +7,14 @@ Parses `.bat` and `.cmd` files into a concrete syntax tree for syntax highlighti
7
7
  ## Features
8
8
 
9
9
  - **Control flow** — `IF`/`ELSE` (EXIST, DEFINED, ERRORLEVEL, comparison with NOT), `FOR` (/D /R /L /F), `GOTO`, `CALL`, `EXIT /B`
10
- - **Variables** — `SET` (plain, `/A` arithmetic, `/P` prompt), `%VAR%`, `!VAR!`, `%%i`, `%~dp0`, `%VAR:old=new%`, escaped forms `%%VAR%%` `%%%%i`
10
+ - **Variables** — `SET` (plain, `/A` arithmetic, `/P` prompt, display-only without `=`), `%VAR%`, `!VAR!`, `%%i`, `%~dp0`, `%VAR:old=new%`, subscripted delayed expansion `!ARR[%%i]!`, for-variable modifiers `%%~dpnxf` `%%~zS` `%%~$PATH:F`, escaped forms `%%VAR%%` `%%%%i`
11
11
  - **Echo** — free-form text with literal `(` `)` `!` `%`, inline strings, and variable references
12
- - **Operators** — pipes `|`, redirects `>` `>>` `2>` `2>&1`, conditional `&&` `||`, separator `&`
13
- - **Structure** — labels `:name`, comments `REM` `::`, parenthesized blocks, `@ECHO OFF`, macro invocations
12
+ - **Operators** &mdash; pipes `|`, redirects `>` `>>` `<` `2>` `2>&1` (fds 0-9, including variable handles `>&%FD%` / `<&%FD%`), conditional `&&` `||`, separator `&`
13
+ - **Line continuation** &mdash; trailing caret `^` joins the current line with the next (e.g. `"%JAVACMD%" ^` followed by indented arguments)
14
+ - **Commands** &mdash; bare names, variable references as command, and quoted paths (`"C:\path\app.exe" args`, `call "%SCRIPT%" %*`)
15
+ - **Structure** &mdash; labels `:name`, comments `REM` `::`, parenthesized blocks (including `@(...)`), `@ECHO OFF`, macro invocations, DosTips idioms `(call,)` / `(call;)` / `(call)` for ERRORLEVEL manipulation
14
16
  - **Scope** &mdash; `SETLOCAL`/`ENDLOCAL` with `ENABLEDELAYEDEXPANSION`
17
+ - **Polyglot headers** &mdash; tolerates batch/PowerShell `<# ... #>` header lines and batch/VBScript lines marked with a trailing `'VBS` so SysToolsLib-style dual-language scripts parse cleanly
15
18
  - **Case-insensitive** &mdash; all keywords match regardless of casing
16
19
 
17
20
  ## Example
package/grammar.js CHANGED
@@ -1,16 +1,21 @@
1
- const ci = (word) => new RegExp(word.split('').map((c) => /[a-zA-Z]/.test(c) ? `[${c.toLowerCase()}${c.toUpperCase()}]` : c).join(''));
1
+ /// <reference types="tree-sitter-cli/dsl"/>
2
+ // @ts-check
3
+
4
+ /** @param {string} word */
5
+ const ci = (word) => new RegExp(word.split('').map((/** @type {string} */ c) => /[a-zA-Z]/.test(c) ? `[${c.toLowerCase()}${c.toUpperCase()}]` : c).join(''));
6
+ /** @param {string} word */
2
7
  const kw = (word) => token(prec(10, ci(word)));
3
8
  const varRefChoice = () => choice(
4
9
  seq('%%', /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, '%%'),
5
- seq('%', /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, '%'),
10
+ seq('%', /[$@+\-a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, '%'),
6
11
  seq('%~', /[a-zA-Z]*/, /[0-9]/),
7
12
  seq('%', /[0-9]/),
8
13
  '%*',
9
- seq('%%%%%%', optional('~'), /[a-zA-Z0-9]/),
10
- seq('%%%%', optional('~'), /[a-zA-Z0-9]/),
11
- seq('%%', optional('~'), /[a-zA-Z]/),
14
+ seq('%%%%%%', optional(seq('~', /[a-zA-Z$:_]*/)), /[a-zA-Z0-9.]/),
15
+ seq('%%%%', optional(seq('~', /[a-zA-Z$:_]*/)), /[a-zA-Z0-9.]/),
16
+ seq('%%', optional(seq('~', /[a-zA-Z$:_]*/)), /[a-zA-Z0-9.]/),
12
17
  seq('%%', /[0-9]/),
13
- seq('!', /[$%a-zA-Z_][$a-zA-Z0-9_.#]*/, '!'),
18
+ seq('!', /[$%a-zA-Z_][$%a-zA-Z0-9_.#]*(?:\[[^!\r\n\]]*\])?/, '!'),
14
19
  seq('%', /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, ':', /[^%\r\n]+/, '%'),
15
20
  seq('%', /[^%=\s\r\n]/, '%'),
16
21
  seq('!', /[%$a-zA-Z_][%$a-zA-Z0-9_.#()\[\]]*/, /:[^!\r\n]+/, '!'),
@@ -18,10 +23,11 @@ const varRefChoice = () => choice(
18
23
  seq('%', /\\[@a-zA-Z_0-9.]+/, '%'),
19
24
  seq('%', /"[^"%\r\n]+"/, '%'),
20
25
  );
26
+ /** @param {GrammarSymbols<string>} $ */
21
27
  const operand = ($) => [
22
28
  $.cond_exec, $.pipe_stmt, $.redirect_stmt, $.call_stmt, $.cmd, $.parenthesized,
23
29
  $.variable_assignment, $.goto_stmt, $.exit_stmt, $.setlocal_stmt, $.endlocal_stmt,
24
- $.if_stmt, $.for_stmt, $.macro_invocation,
30
+ $.if_stmt, $.for_stmt, $.macro_invocation, $.echo_off,
25
31
  ];
26
32
 
27
33
  export default grammar({
@@ -45,6 +51,24 @@ export default grammar({
45
51
  seq(optional('@'), /[rR][eE][mM]/, optional(seq(/[ \t]/, /[^\r\n]*/))),
46
52
  seq(':', /[^$a-zA-Z_\r\n]/, /[^\r\n]*/),
47
53
  seq('%#', /[^\r\n]*/, '#%'),
54
+ // Polyglot batch/PowerShell header line: `<# ...` is a PS block-comment open
55
+ // that batch interprets as a (failing) redirect; we treat it as a comment.
56
+ seq('<#', /[^\r\n]*/),
57
+ // Batch+VBScript polyglot: lines containing a whitespace-prefixed `'VBS`
58
+ // marker are extracted by the script itself as VBScript; the batch
59
+ // interpreter never runs them. Consume the rest of the line after the
60
+ // marker so any trailing VBScript tokens don't leak into the batch parse.
61
+ // Two branches are needed:
62
+ // - lines with code before the marker: `...content... [ \t]'VBS rest`
63
+ // (covers e.g. `Set foo = ... 'VBS`);
64
+ // - the blank-separator VBS line that is whitespace-only before `'VBS`
65
+ // — `extras` consumes the leading whitespace, so the token starts
66
+ // directly at `'VBS` (covers CheckEOL.bat-style polyglot separators).
67
+ // The first branch's leading char class must exclude ` ` and `\t` —
68
+ // otherwise this high-precedence branch starts tokens inside leading
69
+ // whitespace and bleeds into `command_name`'s span on indented lines.
70
+ seq(/[^ \t'\r\n][^\r\n]*[ \t]'VBS/, /[^\r\n]*/),
71
+ seq('\'VBS', /[^\r\n]*/),
48
72
  ))),
49
73
  label: () => token(seq(':', /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/, optional(seq(/[ \t]/, /[^\r\n]*/)))),
50
74
  variable_assignment: ($) => prec(8, seq(
@@ -55,9 +79,9 @@ export default grammar({
55
79
  seq(
56
80
  /[ \t]+/,
57
81
  choice(
58
- seq('"', repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), '=', optional($.quoted_assignment_value), '"', optional($.argument_list)),
59
- seq('^"', repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), '=', optional($.caret_quoted_assignment_value), optional('^"')),
60
- seq(choice($.variable_reference, alias($._var_name_pattern, $.variable_name)), '=', optional($.assignment_value)),
82
+ seq('"', repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), optional(seq('=', optional($.quoted_assignment_value))), '"', optional($.argument_list)),
83
+ seq(token(prec(2, '^"')), repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), optional(seq('=', optional($.caret_quoted_assignment_value))), optional(token(prec(2, '^"')))),
84
+ seq(repeat1(choice($.variable_reference, alias($._var_name_pattern, $.variable_name))), optional(seq('=', optional($.assignment_value)))),
61
85
  ),
62
86
  ),
63
87
  ),
@@ -71,7 +95,7 @@ export default grammar({
71
95
  alias('^', $.assignment_literal),
72
96
  ))),
73
97
  _var_name_pattern: () => token(choice(
74
- /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/,
98
+ /[$@a-zA-Z_.][$@a-zA-Z0-9_.#()\[\]]*/,
75
99
  /\/[@a-zA-Z_][@a-zA-Z0-9_.#()\[\]]+/,
76
100
  )),
77
101
  _quoted_var_name_pattern: () => token(prec(1, choice(/[^\s="%][^="%]*/, /%%[a-zA-Z]?/, /"[^="\r\n]+"/))),
@@ -82,7 +106,10 @@ export default grammar({
82
106
  prompt_assignment: ($) => seq(
83
107
  optional(/[ \t]+/), alias(ci('/p'), $.set_option),
84
108
  optional(/[ \t]+/),
85
- alias(/[@a-zA-Z_][@a-zA-Z0-9_()\[\]]*/, $.variable_name), '=', optional($.assignment_value),
109
+ choice(
110
+ seq(optional(alias($._var_name_pattern, $.variable_name)), '=', optional($.assignment_value)),
111
+ seq('"', optional(alias($._var_name_pattern, $.variable_name)), '=', optional($.quoted_assignment_value), '"', optional($.argument_list)),
112
+ ),
86
113
  ),
87
114
  arithmetic_expression: () => token(choice(
88
115
  seq('"', /[^"\r\n]*/, '"'),
@@ -94,6 +121,7 @@ export default grammar({
94
121
  alias(/[^%!()\r\n]+/, $.assignment_literal),
95
122
  alias('%', $.assignment_literal),
96
123
  alias('!', $.assignment_literal),
124
+ $._line_continuation,
97
125
  ))),
98
126
  assignment_paren_group: ($) => seq('(', repeat(choice(
99
127
  $.variable_reference,
@@ -140,7 +168,8 @@ export default grammar({
140
168
  $.redirect_stmt, $.pipe_stmt, $.comment,
141
169
  ),
142
170
  _if_operand: ($) => choice(
143
- $.string, $.bracketed_value, $.paren_expression,
171
+ prec.right(repeat1($.string)),
172
+ $.bracketed_value, $.paren_expression,
144
173
  prec.right(seq(
145
174
  choice($.variable_reference, alias($._if_word, $.argument_value), $.integer),
146
175
  repeat(choice(
@@ -162,8 +191,14 @@ export default grammar({
162
191
  )),
163
192
  call_stmt: ($) => prec(8, seq(
164
193
  optional('@'), kw('call'),
165
- choice(token(seq(':', /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/)), $.command_name, $.variable_reference),
166
- optional($.argument_list),
194
+ optional(choice(
195
+ token.immediate(','),
196
+ token.immediate(';'),
197
+ seq(
198
+ choice(token(seq(':', /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/)), $.command_name, $.variable_reference, $.string),
199
+ optional($.argument_list),
200
+ ),
201
+ )),
167
202
  )),
168
203
  exit_stmt: ($) => prec(8, seq(
169
204
  optional('@'), kw('exit'),
@@ -188,7 +223,7 @@ export default grammar({
188
223
  choice($.parenthesized, $._body_stmt),
189
224
  )),
190
225
  for_options: () => token(prec(10, choice(ci('/d'), seq(ci('/r'), optional(seq(/[ \t]+/, /(%[^\s%]|[^\s%])+%?/))), ci('/l'), seq(ci('/f'), optional(seq(/[ \t]+/, '"', /[^"]*/, '"')))))),
191
- for_variable: () => token(seq('%%', optional('~'), /[a-zA-Z]/)),
226
+ for_variable: () => token(seq('%%', /[a-zA-Z0-9.]/)),
192
227
  for_set: ($) => prec.right(repeat1(choice(
193
228
  $.variable_reference,
194
229
  $.for_set_group,
@@ -204,18 +239,21 @@ export default grammar({
204
239
  alias('%', $.for_set_literal),
205
240
  alias('!', $.for_set_literal),
206
241
  )), ')'),
207
- parenthesized: ($) => seq('(', repeat(choice(seq($._stmt, /\r?\n/), /\r?\n/)), optional($._stmt), ')'),
242
+ parenthesized: ($) => seq(optional('@'), '(', repeat(choice(seq($._stmt, /\r?\n/), /\r?\n/)), optional($._stmt), ')'),
208
243
  redirect_stmt: ($) => prec.right(4, choice(
209
- seq(choice($.call_stmt, $.cmd, $.parenthesized), $.redirection),
210
- seq($.redirection, choice($.call_stmt, $.cmd, $.parenthesized)),
244
+ seq(choice($.call_stmt, $.cmd, $.parenthesized, $.variable_assignment), $.redirection),
245
+ seq($.redirection, choice($.call_stmt, $.cmd, $.parenthesized, $.variable_assignment)),
211
246
  )),
212
247
  redirection: ($) => {
213
- const file_redir = seq(optional(/[0-2]/), $.redirect_op, $.redirect_target);
248
+ const file_redir = seq($.redirect_op, $.redirect_target);
214
249
  const one_redir = choice(file_redir, $.fd_redirect);
215
250
  return prec.right(repeat1(one_redir));
216
251
  },
217
- fd_redirect: () => token(seq(optional(/[0-2]/), '>&', /[0-9]/)),
218
- redirect_op: () => token(choice('2>>', '2>', '>>', '>', '<')),
252
+ fd_redirect: ($) => choice(
253
+ token(seq(optional(/[0-9]/), />&|<&/, /[0-9]/)),
254
+ seq(alias(token(seq(optional(/[0-9]/), />&|<&/)), $.fd_redirect_op), $.variable_reference),
255
+ ),
256
+ redirect_op: () => token(choice(/[0-9]?>>/, /[0-9]?>/, /[0-9]?</)),
219
257
  redirect_target: () => token(choice(ci('nul'), ci('con'), /[^\s|&><\r\n]+/)),
220
258
  pipe_stmt: ($) => prec.left(3, seq(choice($.pipe_stmt, $.redirect_stmt, $.call_stmt, $.cmd, $.parenthesized), '|', choice($.redirect_stmt, $.call_stmt, $.cmd, $.parenthesized))),
221
259
  cond_exec: ($) => choice(
@@ -233,21 +271,25 @@ export default grammar({
233
271
  bracketed_value: ($) => seq('[', repeat(choice($.variable_reference, alias(token(/[^%!\[\]\r\n]+/), $.bracketed_literal))), ']'),
234
272
  cmd: ($) => prec.right(5, choice(
235
273
  seq(optional('@'), alias(kw('echo'), $.command_name), optional(alias($._echo_text, $.argument_list))),
236
- seq(optional('@'), choice($.command_name, $.variable_reference), optional($.argument_list)),
274
+ seq(optional('@'), choice($.command_name, $.variable_reference, $.string), optional($.argument_list)),
237
275
  )),
238
276
  _echo_text: ($) => prec.right(repeat1(choice(
239
277
  $.variable_reference,
240
278
  $.string,
241
279
  alias($._echo_literal, $.argument_value),
280
+ $._line_continuation,
242
281
  ))),
282
+ // Trailing `^` escapes the following newline, joining the next physical
283
+ // line onto the current logical one. Treated as whitespace between tokens.
284
+ _line_continuation: () => token(/\^\r?\n[ \t]*/),
243
285
  _echo_literal: () => token(/(?:\^[&|<>^()]|[^\s|&><"\r\n%!()])+|[()!%]/),
244
286
  macro_invocation: ($) => prec.right(6, seq($.variable_reference, $.parenthesized, optional($.else_clause))),
245
- command_name: () => /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/,
287
+ command_name: () => /[$a-zA-Z_0-9][$a-zA-Z0-9_.#-]*/,
246
288
  argument_list: ($) => prec.right(repeat1($._arg)),
247
- _arg: ($) => choice($.string, $.variable_reference, $.command_option, $.paren_expression, $.argument_value),
289
+ _arg: ($) => choice($.string, $.variable_reference, $.command_option, $.paren_expression, $.argument_value, $._line_continuation),
248
290
  command_option: () => token(seq('/', /[a-zA-Z_?][a-zA-Z0-9_:]*/)),
249
291
  paren_expression: ($) => seq('(', repeat($._arg), ')'),
250
- argument_value: () => /(?:\^[&|<>^()]|[^\s|&><"\r\n%!()])(?:\^[&|<>^()]|[^\s|&><"\r\n()])*/,
292
+ argument_value: () => /(?:\^[&|<>^()]|[^\s|&><"\r\n%!()])(?:\^[&|<>^()]|[^\s|&><"\r\n()])*|[!%]/,
251
293
  integer: () => /[0-9]+/,
252
294
  },
253
295
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tree-sitter-batch",
3
- "version": "0.10.1",
3
+ "version": "0.11.1",
4
4
  "description": "A Windows Batch/CMD grammar for tree-sitter",
5
5
  "type": "module",
6
6
  "repository": {
@@ -39,7 +39,8 @@
39
39
  "globals": "^17.5.0",
40
40
  "prebuildify": "^6.0.1",
41
41
  "tree-sitter-cli": "0.26.8",
42
- "tree-sitter-go-types": "^0.1.0"
42
+ "tree-sitter-go-types": "^0.1.0",
43
+ "typescript": "^6.0.3"
43
44
  },
44
45
  "overrides": {
45
46
  "eslint-plugin-jsdoc": "^62.9.0"
@@ -58,6 +59,7 @@
58
59
  "start": "tree-sitter playground",
59
60
  "generate": "tree-sitter generate --abi 14 && tree-sitter-go-types",
60
61
  "lint": "eslint grammar.js",
62
+ "typecheck": "tsc --noEmit -p tsconfig.json",
61
63
  "test": "node --test bindings/node/*_test.js"
62
64
  }
63
65
  }