tree-sitter-batch 0.10.1 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -7,11 +7,14 @@ Parses `.bat` and `.cmd` files into a concrete syntax tree for syntax highlighti
7
7
  ## Features
8
8
 
9
9
  - **Control flow** — `IF`/`ELSE` (EXIST, DEFINED, ERRORLEVEL, comparison with NOT), `FOR` (/D /R /L /F), `GOTO`, `CALL`, `EXIT /B`
10
- - **Variables** — `SET` (plain, `/A` arithmetic, `/P` prompt), `%VAR%`, `!VAR!`, `%%i`, `%~dp0`, `%VAR:old=new%`, escaped forms `%%VAR%%` `%%%%i`
10
+ - **Variables** — `SET` (plain, `/A` arithmetic, `/P` prompt, display-only without `=`), `%VAR%`, `!VAR!`, `%%i`, `%~dp0`, `%VAR:old=new%`, subscripted delayed expansion `!ARR[%%i]!`, for-variable modifiers `%%~dpnxf` `%%~zS` `%%~$PATH:F`, escaped forms `%%VAR%%` `%%%%i`
11
11
  - **Echo** — free-form text with literal `(` `)` `!` `%`, inline strings, and variable references
12
- - **Operators** — pipes `|`, redirects `>` `>>` `2>` `2>&1`, conditional `&&` `||`, separator `&`
13
- - **Structure** — labels `:name`, comments `REM` `::`, parenthesized blocks, `@ECHO OFF`, macro invocations
12
+ - **Operators** &mdash; pipes `|`, redirects `>` `>>` `<` `2>` `2>&1` (fds 0-9, including variable handles `>&%FD%` / `<&%FD%`), conditional `&&` `||`, separator `&`
13
+ - **Line continuation** &mdash; trailing caret `^` joins the current line with the next (e.g. `"%JAVACMD%" ^` followed by indented arguments)
14
+ - **Commands** &mdash; bare names, variable references as command, and quoted paths (`"C:\path\app.exe" args`, `call "%SCRIPT%" %*`)
15
+ - **Structure** &mdash; labels `:name`, comments `REM` `::`, parenthesized blocks (including `@(...)`), `@ECHO OFF`, macro invocations, DosTips idioms `(call,)` / `(call;)` / `(call)` for ERRORLEVEL manipulation
14
16
  - **Scope** &mdash; `SETLOCAL`/`ENDLOCAL` with `ENABLEDELAYEDEXPANSION`
17
+ - **Polyglot headers** &mdash; tolerates batch/PowerShell `<# ... #>` header lines and batch/VBScript lines marked with a trailing `'VBS` so SysToolsLib-style dual-language scripts parse cleanly
15
18
  - **Case-insensitive** &mdash; all keywords match regardless of casing
16
19
 
17
20
  ## Example
package/grammar.js CHANGED
@@ -1,16 +1,21 @@
1
- const ci = (word) => new RegExp(word.split('').map((c) => /[a-zA-Z]/.test(c) ? `[${c.toLowerCase()}${c.toUpperCase()}]` : c).join(''));
1
+ /// <reference types="tree-sitter-cli/dsl"/>
2
+ // @ts-check
3
+
4
+ /** @param {string} word */
5
+ const ci = (word) => new RegExp(word.split('').map((/** @type {string} */ c) => /[a-zA-Z]/.test(c) ? `[${c.toLowerCase()}${c.toUpperCase()}]` : c).join(''));
6
+ /** @param {string} word */
2
7
  const kw = (word) => token(prec(10, ci(word)));
3
8
  const varRefChoice = () => choice(
4
9
  seq('%%', /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, '%%'),
5
- seq('%', /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, '%'),
10
+ seq('%', /[$@+\-a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, '%'),
6
11
  seq('%~', /[a-zA-Z]*/, /[0-9]/),
7
12
  seq('%', /[0-9]/),
8
13
  '%*',
9
- seq('%%%%%%', optional('~'), /[a-zA-Z0-9]/),
10
- seq('%%%%', optional('~'), /[a-zA-Z0-9]/),
11
- seq('%%', optional('~'), /[a-zA-Z]/),
14
+ seq('%%%%%%', optional(seq('~', /[a-zA-Z$:_]*/)), /[a-zA-Z0-9.]/),
15
+ seq('%%%%', optional(seq('~', /[a-zA-Z$:_]*/)), /[a-zA-Z0-9.]/),
16
+ seq('%%', optional(seq('~', /[a-zA-Z$:_]*/)), /[a-zA-Z0-9.]/),
12
17
  seq('%%', /[0-9]/),
13
- seq('!', /[$%a-zA-Z_][$a-zA-Z0-9_.#]*/, '!'),
18
+ seq('!', /[$%a-zA-Z_][$%a-zA-Z0-9_.#]*(?:\[[^!\r\n\]]*\])?/, '!'),
14
19
  seq('%', /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, ':', /[^%\r\n]+/, '%'),
15
20
  seq('%', /[^%=\s\r\n]/, '%'),
16
21
  seq('!', /[%$a-zA-Z_][%$a-zA-Z0-9_.#()\[\]]*/, /:[^!\r\n]+/, '!'),
@@ -18,10 +23,11 @@ const varRefChoice = () => choice(
18
23
  seq('%', /\\[@a-zA-Z_0-9.]+/, '%'),
19
24
  seq('%', /"[^"%\r\n]+"/, '%'),
20
25
  );
26
+ /** @param {GrammarSymbols<string>} $ */
21
27
  const operand = ($) => [
22
28
  $.cond_exec, $.pipe_stmt, $.redirect_stmt, $.call_stmt, $.cmd, $.parenthesized,
23
29
  $.variable_assignment, $.goto_stmt, $.exit_stmt, $.setlocal_stmt, $.endlocal_stmt,
24
- $.if_stmt, $.for_stmt, $.macro_invocation,
30
+ $.if_stmt, $.for_stmt, $.macro_invocation, $.echo_off,
25
31
  ];
26
32
 
27
33
  export default grammar({
@@ -45,6 +51,14 @@ export default grammar({
45
51
  seq(optional('@'), /[rR][eE][mM]/, optional(seq(/[ \t]/, /[^\r\n]*/))),
46
52
  seq(':', /[^$a-zA-Z_\r\n]/, /[^\r\n]*/),
47
53
  seq('%#', /[^\r\n]*/, '#%'),
54
+ // Polyglot batch/PowerShell header line: `<# ...` is a PS block-comment open
55
+ // that batch interprets as a (failing) redirect; we treat it as a comment.
56
+ seq('<#', /[^\r\n]*/),
57
+ // Batch+VBScript polyglot: lines containing a whitespace-prefixed `'VBS`
58
+ // marker are extracted by the script itself as VBScript; the batch
59
+ // interpreter never runs them. Consume the rest of the line after the
60
+ // marker so any trailing VBScript tokens don't leak into the batch parse.
61
+ seq(/[^'\r\n][^\r\n]*[ \t]'VBS/, /[^\r\n]*/),
48
62
  ))),
49
63
  label: () => token(seq(':', /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/, optional(seq(/[ \t]/, /[^\r\n]*/)))),
50
64
  variable_assignment: ($) => prec(8, seq(
@@ -55,9 +69,9 @@ export default grammar({
55
69
  seq(
56
70
  /[ \t]+/,
57
71
  choice(
58
- seq('"', repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), '=', optional($.quoted_assignment_value), '"', optional($.argument_list)),
59
- seq('^"', repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), '=', optional($.caret_quoted_assignment_value), optional('^"')),
60
- seq(choice($.variable_reference, alias($._var_name_pattern, $.variable_name)), '=', optional($.assignment_value)),
72
+ seq('"', repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), optional(seq('=', optional($.quoted_assignment_value))), '"', optional($.argument_list)),
73
+ seq(token(prec(2, '^"')), repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), optional(seq('=', optional($.caret_quoted_assignment_value))), optional(token(prec(2, '^"')))),
74
+ seq(repeat1(choice($.variable_reference, alias($._var_name_pattern, $.variable_name))), optional(seq('=', optional($.assignment_value)))),
61
75
  ),
62
76
  ),
63
77
  ),
@@ -71,7 +85,7 @@ export default grammar({
71
85
  alias('^', $.assignment_literal),
72
86
  ))),
73
87
  _var_name_pattern: () => token(choice(
74
- /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/,
88
+ /[$@a-zA-Z_.][$@a-zA-Z0-9_.#()\[\]]*/,
75
89
  /\/[@a-zA-Z_][@a-zA-Z0-9_.#()\[\]]+/,
76
90
  )),
77
91
  _quoted_var_name_pattern: () => token(prec(1, choice(/[^\s="%][^="%]*/, /%%[a-zA-Z]?/, /"[^="\r\n]+"/))),
@@ -82,7 +96,10 @@ export default grammar({
82
96
  prompt_assignment: ($) => seq(
83
97
  optional(/[ \t]+/), alias(ci('/p'), $.set_option),
84
98
  optional(/[ \t]+/),
85
- alias(/[@a-zA-Z_][@a-zA-Z0-9_()\[\]]*/, $.variable_name), '=', optional($.assignment_value),
99
+ choice(
100
+ seq(optional(alias($._var_name_pattern, $.variable_name)), '=', optional($.assignment_value)),
101
+ seq('"', optional(alias($._var_name_pattern, $.variable_name)), '=', optional($.quoted_assignment_value), '"', optional($.argument_list)),
102
+ ),
86
103
  ),
87
104
  arithmetic_expression: () => token(choice(
88
105
  seq('"', /[^"\r\n]*/, '"'),
@@ -94,6 +111,7 @@ export default grammar({
94
111
  alias(/[^%!()\r\n]+/, $.assignment_literal),
95
112
  alias('%', $.assignment_literal),
96
113
  alias('!', $.assignment_literal),
114
+ $._line_continuation,
97
115
  ))),
98
116
  assignment_paren_group: ($) => seq('(', repeat(choice(
99
117
  $.variable_reference,
@@ -140,7 +158,8 @@ export default grammar({
140
158
  $.redirect_stmt, $.pipe_stmt, $.comment,
141
159
  ),
142
160
  _if_operand: ($) => choice(
143
- $.string, $.bracketed_value, $.paren_expression,
161
+ prec.right(repeat1($.string)),
162
+ $.bracketed_value, $.paren_expression,
144
163
  prec.right(seq(
145
164
  choice($.variable_reference, alias($._if_word, $.argument_value), $.integer),
146
165
  repeat(choice(
@@ -162,8 +181,14 @@ export default grammar({
162
181
  )),
163
182
  call_stmt: ($) => prec(8, seq(
164
183
  optional('@'), kw('call'),
165
- choice(token(seq(':', /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/)), $.command_name, $.variable_reference),
166
- optional($.argument_list),
184
+ optional(choice(
185
+ token.immediate(','),
186
+ token.immediate(';'),
187
+ seq(
188
+ choice(token(seq(':', /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/)), $.command_name, $.variable_reference, $.string),
189
+ optional($.argument_list),
190
+ ),
191
+ )),
167
192
  )),
168
193
  exit_stmt: ($) => prec(8, seq(
169
194
  optional('@'), kw('exit'),
@@ -188,7 +213,7 @@ export default grammar({
188
213
  choice($.parenthesized, $._body_stmt),
189
214
  )),
190
215
  for_options: () => token(prec(10, choice(ci('/d'), seq(ci('/r'), optional(seq(/[ \t]+/, /(%[^\s%]|[^\s%])+%?/))), ci('/l'), seq(ci('/f'), optional(seq(/[ \t]+/, '"', /[^"]*/, '"')))))),
191
- for_variable: () => token(seq('%%', optional('~'), /[a-zA-Z]/)),
216
+ for_variable: () => token(seq('%%', /[a-zA-Z0-9.]/)),
192
217
  for_set: ($) => prec.right(repeat1(choice(
193
218
  $.variable_reference,
194
219
  $.for_set_group,
@@ -204,18 +229,21 @@ export default grammar({
204
229
  alias('%', $.for_set_literal),
205
230
  alias('!', $.for_set_literal),
206
231
  )), ')'),
207
- parenthesized: ($) => seq('(', repeat(choice(seq($._stmt, /\r?\n/), /\r?\n/)), optional($._stmt), ')'),
232
+ parenthesized: ($) => seq(optional('@'), '(', repeat(choice(seq($._stmt, /\r?\n/), /\r?\n/)), optional($._stmt), ')'),
208
233
  redirect_stmt: ($) => prec.right(4, choice(
209
- seq(choice($.call_stmt, $.cmd, $.parenthesized), $.redirection),
210
- seq($.redirection, choice($.call_stmt, $.cmd, $.parenthesized)),
234
+ seq(choice($.call_stmt, $.cmd, $.parenthesized, $.variable_assignment), $.redirection),
235
+ seq($.redirection, choice($.call_stmt, $.cmd, $.parenthesized, $.variable_assignment)),
211
236
  )),
212
237
  redirection: ($) => {
213
- const file_redir = seq(optional(/[0-2]/), $.redirect_op, $.redirect_target);
238
+ const file_redir = seq($.redirect_op, $.redirect_target);
214
239
  const one_redir = choice(file_redir, $.fd_redirect);
215
240
  return prec.right(repeat1(one_redir));
216
241
  },
217
- fd_redirect: () => token(seq(optional(/[0-2]/), '>&', /[0-9]/)),
218
- redirect_op: () => token(choice('2>>', '2>', '>>', '>', '<')),
242
+ fd_redirect: ($) => choice(
243
+ token(seq(optional(/[0-9]/), />&|<&/, /[0-9]/)),
244
+ seq(alias(token(seq(optional(/[0-9]/), />&|<&/)), $.fd_redirect_op), $.variable_reference),
245
+ ),
246
+ redirect_op: () => token(choice(/[0-9]?>>/, /[0-9]?>/, /[0-9]?</)),
219
247
  redirect_target: () => token(choice(ci('nul'), ci('con'), /[^\s|&><\r\n]+/)),
220
248
  pipe_stmt: ($) => prec.left(3, seq(choice($.pipe_stmt, $.redirect_stmt, $.call_stmt, $.cmd, $.parenthesized), '|', choice($.redirect_stmt, $.call_stmt, $.cmd, $.parenthesized))),
221
249
  cond_exec: ($) => choice(
@@ -233,21 +261,25 @@ export default grammar({
233
261
  bracketed_value: ($) => seq('[', repeat(choice($.variable_reference, alias(token(/[^%!\[\]\r\n]+/), $.bracketed_literal))), ']'),
234
262
  cmd: ($) => prec.right(5, choice(
235
263
  seq(optional('@'), alias(kw('echo'), $.command_name), optional(alias($._echo_text, $.argument_list))),
236
- seq(optional('@'), choice($.command_name, $.variable_reference), optional($.argument_list)),
264
+ seq(optional('@'), choice($.command_name, $.variable_reference, $.string), optional($.argument_list)),
237
265
  )),
238
266
  _echo_text: ($) => prec.right(repeat1(choice(
239
267
  $.variable_reference,
240
268
  $.string,
241
269
  alias($._echo_literal, $.argument_value),
270
+ $._line_continuation,
242
271
  ))),
272
+ // Trailing `^` escapes the following newline, joining the next physical
273
+ // line onto the current logical one. Treated as whitespace between tokens.
274
+ _line_continuation: () => token(/\^\r?\n[ \t]*/),
243
275
  _echo_literal: () => token(/(?:\^[&|<>^()]|[^\s|&><"\r\n%!()])+|[()!%]/),
244
276
  macro_invocation: ($) => prec.right(6, seq($.variable_reference, $.parenthesized, optional($.else_clause))),
245
- command_name: () => /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/,
277
+ command_name: () => /[$a-zA-Z_0-9][$a-zA-Z0-9_.#-]*/,
246
278
  argument_list: ($) => prec.right(repeat1($._arg)),
247
- _arg: ($) => choice($.string, $.variable_reference, $.command_option, $.paren_expression, $.argument_value),
279
+ _arg: ($) => choice($.string, $.variable_reference, $.command_option, $.paren_expression, $.argument_value, $._line_continuation),
248
280
  command_option: () => token(seq('/', /[a-zA-Z_?][a-zA-Z0-9_:]*/)),
249
281
  paren_expression: ($) => seq('(', repeat($._arg), ')'),
250
- argument_value: () => /(?:\^[&|<>^()]|[^\s|&><"\r\n%!()])(?:\^[&|<>^()]|[^\s|&><"\r\n()])*/,
282
+ argument_value: () => /(?:\^[&|<>^()]|[^\s|&><"\r\n%!()])(?:\^[&|<>^()]|[^\s|&><"\r\n()])*|[!%]/,
251
283
  integer: () => /[0-9]+/,
252
284
  },
253
285
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tree-sitter-batch",
3
- "version": "0.10.1",
3
+ "version": "0.11.0",
4
4
  "description": "A Windows Batch/CMD grammar for tree-sitter",
5
5
  "type": "module",
6
6
  "repository": {
@@ -39,7 +39,8 @@
39
39
  "globals": "^17.5.0",
40
40
  "prebuildify": "^6.0.1",
41
41
  "tree-sitter-cli": "0.26.8",
42
- "tree-sitter-go-types": "^0.1.0"
42
+ "tree-sitter-go-types": "^0.1.0",
43
+ "typescript": "^6.0.3"
43
44
  },
44
45
  "overrides": {
45
46
  "eslint-plugin-jsdoc": "^62.9.0"
@@ -58,6 +59,7 @@
58
59
  "start": "tree-sitter playground",
59
60
  "generate": "tree-sitter generate --abi 14 && tree-sitter-go-types",
60
61
  "lint": "eslint grammar.js",
62
+ "typecheck": "tsc --noEmit -p tsconfig.json",
61
63
  "test": "node --test bindings/node/*_test.js"
62
64
  }
63
65
  }