tree-sitter-batch 0.8.0 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -6,11 +6,15 @@ Parses `.bat` and `.cmd` files into a concrete syntax tree for syntax highlighti
6
6
 
7
7
  ## Features
8
8
 
9
- - **Control flow** — `IF`/`ELSE` (EXIST, DEFINED, ERRORLEVEL, comparison with NOT), `FOR` (/D /R /L /F), `GOTO`, `CALL`
10
- - **Variables** — `SET` (plain, `/A` arithmetic, `/P` prompt), `%VAR%`, `!VAR!`, `%%i`, `%~dp0`, `%VAR:old=new%`
11
- - **Operators** — pipes `|`, redirects `>` `>>` `2>` `2>&1`, conditional `&&` `||`
12
- - **Structure** — labels `:name`, comments `REM` `::`, parenthesized blocks, `@ECHO OFF`
9
+ - **Control flow** — `IF`/`ELSE` (EXIST, DEFINED, ERRORLEVEL, comparison with NOT), `FOR` (/D /R /L /F), `GOTO`, `CALL`, `EXIT /B`
10
+ - **Variables** — `SET` (plain, `/A` arithmetic, `/P` prompt, display-only without `=`), `%VAR%`, `!VAR!`, `%%i`, `%~dp0`, `%VAR:old=new%`, subscripted delayed expansion `!ARR[%%i]!`, for-variable modifiers `%%~dpnxf` `%%~zS` `%%~$PATH:F`, escaped forms `%%VAR%%` `%%%%i`
11
+ - **Echo** — free-form text with literal `(` `)` `!` `%`, inline strings, and variable references
12
+ - **Operators** &mdash; pipes `|`, redirects `>` `>>` `<` `2>` `2>&1` (fds 0-9, including variable handles `>&%FD%` / `<&%FD%`), conditional `&&` `||`, separator `&`
13
+ - **Line continuation** &mdash; trailing caret `^` joins the current line with the next (e.g. `"%JAVACMD%" ^` followed by indented arguments)
14
+ - **Commands** &mdash; bare names, variable references as command, and quoted paths (`"C:\path\app.exe" args`, `call "%SCRIPT%" %*`)
15
+ - **Structure** &mdash; labels `:name`, comments `REM` `::`, parenthesized blocks (including `@(...)`), `@ECHO OFF`, macro invocations, DosTips idioms `(call,)` / `(call;)` / `(call)` for ERRORLEVEL manipulation
13
16
  - **Scope** &mdash; `SETLOCAL`/`ENDLOCAL` with `ENABLEDELAYEDEXPANSION`
17
+ - **Polyglot headers** &mdash; tolerates batch/PowerShell `<# ... #>` header lines and batch/VBScript lines marked with a trailing `'VBS` so SysToolsLib-style dual-language scripts parse cleanly
14
18
  - **Case-insensitive** &mdash; all keywords match regardless of casing
15
19
 
16
20
  ## Example
@@ -51,20 +55,20 @@ Parsed tree:
51
55
  (variable_assignment
52
56
  (set_keyword) (variable_name) (assignment_value))
53
57
  (variable_assignment
54
- (set_keyword) (set_option) (variable_name) (assignment_value))
58
+ (set_keyword) (arithmetic_assignment (set_option) (arithmetic_expression)))
55
59
  (if_stmt
56
60
  (string)
57
61
  (parenthesized
58
62
  (cmd (command_name) (argument_list (argument_value)))))
59
63
  (for_stmt
60
64
  (for_variable)
61
- (for_set)
65
+ (for_set (for_set_literal))
62
66
  (parenthesized
63
67
  (cmd (command_name) (argument_list (string) (string)))))
64
68
  (if_stmt
65
69
  (variable_reference)
66
70
  (comparison_op)
67
- (integer)
71
+ (argument_value)
68
72
  (parenthesized
69
73
  (cmd (command_name) (argument_list (argument_value) (argument_value))))
70
74
  (else_clause
package/grammar.js CHANGED
@@ -1,9 +1,33 @@
1
- const ci = (word) => new RegExp(word.split('').map((c) => /[a-zA-Z]/.test(c) ? `[${c.toLowerCase()}${c.toUpperCase()}]` : c).join(''));
1
+ /// <reference types="tree-sitter-cli/dsl"/>
2
+ // @ts-check
3
+
4
+ /** @param {string} word */
5
+ const ci = (word) => new RegExp(word.split('').map((/** @type {string} */ c) => /[a-zA-Z]/.test(c) ? `[${c.toLowerCase()}${c.toUpperCase()}]` : c).join(''));
6
+ /** @param {string} word */
2
7
  const kw = (word) => token(prec(10, ci(word)));
8
+ const varRefChoice = () => choice(
9
+ seq('%%', /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, '%%'),
10
+ seq('%', /[$@+\-a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, '%'),
11
+ seq('%~', /[a-zA-Z]*/, /[0-9]/),
12
+ seq('%', /[0-9]/),
13
+ '%*',
14
+ seq('%%%%%%', optional(seq('~', /[a-zA-Z$:_]*/)), /[a-zA-Z0-9.]/),
15
+ seq('%%%%', optional(seq('~', /[a-zA-Z$:_]*/)), /[a-zA-Z0-9.]/),
16
+ seq('%%', optional(seq('~', /[a-zA-Z$:_]*/)), /[a-zA-Z0-9.]/),
17
+ seq('%%', /[0-9]/),
18
+ seq('!', /[$%a-zA-Z_][$%a-zA-Z0-9_.#]*(?:\[[^!\r\n\]]*\])?/, '!'),
19
+ seq('%', /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, ':', /[^%\r\n]+/, '%'),
20
+ seq('%', /[^%=\s\r\n]/, '%'),
21
+ seq('!', /[%$a-zA-Z_][%$a-zA-Z0-9_.#()\[\]]*/, /:[^!\r\n]+/, '!'),
22
+ seq('%', /[<>\/]+[@a-zA-Z_0-9.]*/, '%'),
23
+ seq('%', /\\[@a-zA-Z_0-9.]+/, '%'),
24
+ seq('%', /"[^"%\r\n]+"/, '%'),
25
+ );
26
+ /** @param {GrammarSymbols<string>} $ */
3
27
  const operand = ($) => [
4
28
  $.cond_exec, $.pipe_stmt, $.redirect_stmt, $.call_stmt, $.cmd, $.parenthesized,
5
29
  $.variable_assignment, $.goto_stmt, $.exit_stmt, $.setlocal_stmt, $.endlocal_stmt,
6
- $.if_stmt, $.for_stmt, $.macro_invocation,
30
+ $.if_stmt, $.for_stmt, $.macro_invocation, $.echo_off,
7
31
  ];
8
32
 
9
33
  export default grammar({
@@ -27,6 +51,14 @@ export default grammar({
27
51
  seq(optional('@'), /[rR][eE][mM]/, optional(seq(/[ \t]/, /[^\r\n]*/))),
28
52
  seq(':', /[^$a-zA-Z_\r\n]/, /[^\r\n]*/),
29
53
  seq('%#', /[^\r\n]*/, '#%'),
54
+ // Polyglot batch/PowerShell header line: `<# ...` is a PS block-comment open
55
+ // that batch interprets as a (failing) redirect; we treat it as a comment.
56
+ seq('<#', /[^\r\n]*/),
57
+ // Batch+VBScript polyglot: lines containing a whitespace-prefixed `'VBS`
58
+ // marker are extracted by the script itself as VBScript; the batch
59
+ // interpreter never runs them. Consume the rest of the line after the
60
+ // marker so any trailing VBScript tokens don't leak into the batch parse.
61
+ seq(/[^'\r\n][^\r\n]*[ \t]'VBS/, /[^\r\n]*/),
30
62
  ))),
31
63
  label: () => token(seq(':', /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/, optional(seq(/[ \t]/, /[^\r\n]*/)))),
32
64
  variable_assignment: ($) => prec(8, seq(
@@ -37,9 +69,9 @@ export default grammar({
37
69
  seq(
38
70
  /[ \t]+/,
39
71
  choice(
40
- seq('"', repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), '=', optional($.quoted_assignment_value), '"', optional($.argument_list)),
41
- seq('^"', repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), '=', optional($.caret_quoted_assignment_value), optional('^"')),
42
- seq(choice($.variable_reference, alias($._var_name_pattern, $.variable_name)), '=', optional($.assignment_value)),
72
+ seq('"', repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), optional(seq('=', optional($.quoted_assignment_value))), '"', optional($.argument_list)),
73
+ seq(token(prec(2, '^"')), repeat1(choice($.variable_reference, alias($._quoted_var_name_pattern, $.variable_name))), optional(seq('=', optional($.caret_quoted_assignment_value))), optional(token(prec(2, '^"')))),
74
+ seq(repeat1(choice($.variable_reference, alias($._var_name_pattern, $.variable_name))), optional(seq('=', optional($.assignment_value)))),
43
75
  ),
44
76
  ),
45
77
  ),
@@ -53,7 +85,7 @@ export default grammar({
53
85
  alias('^', $.assignment_literal),
54
86
  ))),
55
87
  _var_name_pattern: () => token(choice(
56
- /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/,
88
+ /[$@a-zA-Z_.][$@a-zA-Z0-9_.#()\[\]]*/,
57
89
  /\/[@a-zA-Z_][@a-zA-Z0-9_.#()\[\]]+/,
58
90
  )),
59
91
  _quoted_var_name_pattern: () => token(prec(1, choice(/[^\s="%][^="%]*/, /%%[a-zA-Z]?/, /"[^="\r\n]+"/))),
@@ -64,7 +96,10 @@ export default grammar({
64
96
  prompt_assignment: ($) => seq(
65
97
  optional(/[ \t]+/), alias(ci('/p'), $.set_option),
66
98
  optional(/[ \t]+/),
67
- alias(/[@a-zA-Z_][@a-zA-Z0-9_()\[\]]*/, $.variable_name), '=', optional($.assignment_value),
99
+ choice(
100
+ seq(optional(alias($._var_name_pattern, $.variable_name)), '=', optional($.assignment_value)),
101
+ seq('"', optional(alias($._var_name_pattern, $.variable_name)), '=', optional($.quoted_assignment_value), '"', optional($.argument_list)),
102
+ ),
68
103
  ),
69
104
  arithmetic_expression: () => token(choice(
70
105
  seq('"', /[^"\r\n]*/, '"'),
@@ -76,6 +111,7 @@ export default grammar({
76
111
  alias(/[^%!()\r\n]+/, $.assignment_literal),
77
112
  alias('%', $.assignment_literal),
78
113
  alias('!', $.assignment_literal),
114
+ $._line_continuation,
79
115
  ))),
80
116
  assignment_paren_group: ($) => seq('(', repeat(choice(
81
117
  $.variable_reference,
@@ -93,11 +129,11 @@ export default grammar({
93
129
  ))),
94
130
  if_stmt: ($) => prec.right(8, seq(
95
131
  optional('@'), kw('if'),
96
- optional(alias(ci('/i'), $.if_option)),
132
+ optional(alias(token(prec(2, ci('/i'))), $.if_option)),
97
133
  optional(kw('not')),
98
134
  choice(
99
- seq(kw('exist'), choice($.string, $.variable_reference, $.argument_value)),
100
- seq(kw('defined'), choice(/[$a-zA-Z_][$a-zA-Z0-9_.]*/, $.string, $.variable_reference)),
135
+ seq(kw('exist'), $._if_operand),
136
+ seq(kw('defined'), choice(/[$a-zA-Z_][$a-zA-Z0-9_.]*/, $._if_operand)),
101
137
  seq(kw('errorlevel'), $.integer),
102
138
  seq(
103
139
  $._if_operand,
@@ -122,10 +158,18 @@ export default grammar({
122
158
  $.redirect_stmt, $.pipe_stmt, $.comment,
123
159
  ),
124
160
  _if_operand: ($) => choice(
125
- $.string, $.variable_reference, $.integer, $.bracketed_value,
126
- alias($._if_word, $.argument_value),
161
+ prec.right(repeat1($.string)),
162
+ $.bracketed_value, $.paren_expression,
163
+ prec.right(seq(
164
+ choice($.variable_reference, alias($._if_word, $.argument_value), $.integer),
165
+ repeat(choice(
166
+ alias($._variable_reference_immediate, $.variable_reference),
167
+ alias($._if_word_rest, $.argument_value),
168
+ )),
169
+ )),
127
170
  ),
128
- _if_word: () => token(prec(-1, /[^=<>\s\[\]"|&()][^=<>\s"|&()]*/)),
171
+ _if_word: () => token(prec(1, /[^=<>\s\[\]"|&()%!][^=<>\s"|&()%!]*/)),
172
+ _if_word_rest: () => token.immediate(/[^=<>\s\[\]"|&()%!][^=<>\s"|&()%!]*/),
129
173
  comparison_op: () => token(prec(10, choice('==', ci('equ'), ci('neq'), ci('lss'), ci('leq'), ci('gtr'), ci('geq')))),
130
174
  goto_stmt: ($) => prec(8, seq(
131
175
  optional('@'), kw('goto'),
@@ -137,8 +181,14 @@ export default grammar({
137
181
  )),
138
182
  call_stmt: ($) => prec(8, seq(
139
183
  optional('@'), kw('call'),
140
- choice(token(seq(':', /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/)), $.command_name, $.variable_reference),
141
- optional($.argument_list),
184
+ optional(choice(
185
+ token.immediate(','),
186
+ token.immediate(';'),
187
+ seq(
188
+ choice(token(seq(':', /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/)), $.command_name, $.variable_reference, $.string),
189
+ optional($.argument_list),
190
+ ),
191
+ )),
142
192
  )),
143
193
  exit_stmt: ($) => prec(8, seq(
144
194
  optional('@'), kw('exit'),
@@ -163,7 +213,7 @@ export default grammar({
163
213
  choice($.parenthesized, $._body_stmt),
164
214
  )),
165
215
  for_options: () => token(prec(10, choice(ci('/d'), seq(ci('/r'), optional(seq(/[ \t]+/, /(%[^\s%]|[^\s%])+%?/))), ci('/l'), seq(ci('/f'), optional(seq(/[ \t]+/, '"', /[^"]*/, '"')))))),
166
- for_variable: () => token(seq('%%', optional('~'), /[a-zA-Z]/)),
216
+ for_variable: () => token(seq('%%', /[a-zA-Z0-9.]/)),
167
217
  for_set: ($) => prec.right(repeat1(choice(
168
218
  $.variable_reference,
169
219
  $.for_set_group,
@@ -179,18 +229,21 @@ export default grammar({
179
229
  alias('%', $.for_set_literal),
180
230
  alias('!', $.for_set_literal),
181
231
  )), ')'),
182
- parenthesized: ($) => seq('(', repeat(choice(seq($._stmt, /\r?\n/), /\r?\n/)), optional($._stmt), ')'),
232
+ parenthesized: ($) => seq(optional('@'), '(', repeat(choice(seq($._stmt, /\r?\n/), /\r?\n/)), optional($._stmt), ')'),
183
233
  redirect_stmt: ($) => prec.right(4, choice(
184
- seq(choice($.call_stmt, $.cmd, $.parenthesized), $.redirection),
185
- seq($.redirection, choice($.call_stmt, $.cmd, $.parenthesized)),
234
+ seq(choice($.call_stmt, $.cmd, $.parenthesized, $.variable_assignment), $.redirection),
235
+ seq($.redirection, choice($.call_stmt, $.cmd, $.parenthesized, $.variable_assignment)),
186
236
  )),
187
237
  redirection: ($) => {
188
- const file_redir = seq(optional(/[0-2]/), $.redirect_op, $.redirect_target);
238
+ const file_redir = seq($.redirect_op, $.redirect_target);
189
239
  const one_redir = choice(file_redir, $.fd_redirect);
190
240
  return prec.right(repeat1(one_redir));
191
241
  },
192
- fd_redirect: () => token(seq(optional(/[0-2]/), '>&', /[0-9]/)),
193
- redirect_op: () => token(choice('2>>', '2>', '>>', '>', '<')),
242
+ fd_redirect: ($) => choice(
243
+ token(seq(optional(/[0-9]/), />&|<&/, /[0-9]/)),
244
+ seq(alias(token(seq(optional(/[0-9]/), />&|<&/)), $.fd_redirect_op), $.variable_reference),
245
+ ),
246
+ redirect_op: () => token(choice(/[0-9]?>>/, /[0-9]?>/, /[0-9]?</)),
194
247
  redirect_target: () => token(choice(ci('nul'), ci('con'), /[^\s|&><\r\n]+/)),
195
248
  pipe_stmt: ($) => prec.left(3, seq(choice($.pipe_stmt, $.redirect_stmt, $.call_stmt, $.cmd, $.parenthesized), '|', choice($.redirect_stmt, $.call_stmt, $.cmd, $.parenthesized))),
196
249
  cond_exec: ($) => choice(
@@ -202,30 +255,31 @@ export default grammar({
202
255
  '&',
203
256
  choice(...operand($), $.comment),
204
257
  )),
205
- variable_reference: () => token(choice(
206
- seq('%', /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, '%'),
207
- seq('%~', /[a-zA-Z]*/, /[0-9]/),
208
- seq('%', /[0-9]/),
209
- '%*',
210
- seq('%%', optional('~'), /[a-zA-Z]/),
211
- seq('!', /[$%a-zA-Z_][$a-zA-Z0-9_.#]*/, '!'),
212
- seq('%', /[$@a-zA-Z_][$@a-zA-Z0-9_.#()\[\]]*/, ':', /[^%\r\n]+/, '%'),
213
- seq('%', /[^%=\s\r\n]/, '%'),
214
- seq('!', /[%$a-zA-Z_][%$a-zA-Z0-9_.#()\[\]]*/, /:[^!\r\n]+/, '!'),
215
- seq('%', /[<>\/]+[@a-zA-Z_0-9.]*/, '%'),
216
- seq('%', /\\[@a-zA-Z_0-9.]+/, '%'),
217
- seq('%', /"[^"%\r\n]+"/, '%'),
218
- )),
258
+ variable_reference: () => token(varRefChoice()),
259
+ _variable_reference_immediate: () => token.immediate(varRefChoice()),
219
260
  string: () => token(seq('"', /[^"\r\n]*/, '"')),
220
261
  bracketed_value: ($) => seq('[', repeat(choice($.variable_reference, alias(token(/[^%!\[\]\r\n]+/), $.bracketed_literal))), ']'),
221
- cmd: ($) => prec.right(5, seq(optional('@'), choice($.command_name, $.variable_reference), optional($.argument_list))),
262
+ cmd: ($) => prec.right(5, choice(
263
+ seq(optional('@'), alias(kw('echo'), $.command_name), optional(alias($._echo_text, $.argument_list))),
264
+ seq(optional('@'), choice($.command_name, $.variable_reference, $.string), optional($.argument_list)),
265
+ )),
266
+ _echo_text: ($) => prec.right(repeat1(choice(
267
+ $.variable_reference,
268
+ $.string,
269
+ alias($._echo_literal, $.argument_value),
270
+ $._line_continuation,
271
+ ))),
272
+ // Trailing `^` escapes the following newline, joining the next physical
273
+ // line onto the current logical one. Treated as whitespace between tokens.
274
+ _line_continuation: () => token(/\^\r?\n[ \t]*/),
275
+ _echo_literal: () => token(/(?:\^[&|<>^()]|[^\s|&><"\r\n%!()])+|[()!%]/),
222
276
  macro_invocation: ($) => prec.right(6, seq($.variable_reference, $.parenthesized, optional($.else_clause))),
223
- command_name: () => /[$a-zA-Z_][$a-zA-Z0-9_.#-]*/,
277
+ command_name: () => /[$a-zA-Z_0-9][$a-zA-Z0-9_.#-]*/,
224
278
  argument_list: ($) => prec.right(repeat1($._arg)),
225
- _arg: ($) => choice($.string, $.variable_reference, $.command_option, $.paren_expression, $.argument_value),
279
+ _arg: ($) => choice($.string, $.variable_reference, $.command_option, $.paren_expression, $.argument_value, $._line_continuation),
226
280
  command_option: () => token(seq('/', /[a-zA-Z_?][a-zA-Z0-9_:]*/)),
227
281
  paren_expression: ($) => seq('(', repeat($._arg), ')'),
228
- argument_value: () => /(?:\^[&|<>^()]|[^\s|&><"\r\n%!()])(?:\^[&|<>^()]|[^\s|&><"\r\n()])*/,
282
+ argument_value: () => /(?:\^[&|<>^()]|[^\s|&><"\r\n%!()])(?:\^[&|<>^()]|[^\s|&><"\r\n()])*|[!%]/,
229
283
  integer: () => /[0-9]+/,
230
284
  },
231
285
  });
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "tree-sitter-batch",
3
- "version": "0.8.0",
3
+ "version": "0.11.0",
4
4
  "description": "A Windows Batch/CMD grammar for tree-sitter",
5
5
  "type": "module",
6
6
  "repository": {
@@ -34,11 +34,16 @@
34
34
  "node-gyp-build": "^4.8.4"
35
35
  },
36
36
  "devDependencies": {
37
- "eslint": "^9.15.0",
37
+ "eslint": "^10.2.0",
38
38
  "eslint-config-treesitter": "^1.0.2",
39
+ "globals": "^17.5.0",
39
40
  "prebuildify": "^6.0.1",
40
- "tree-sitter-cli": "0.24.7",
41
- "tree-sitter-go-types": "^0.1.0"
41
+ "tree-sitter-cli": "0.26.8",
42
+ "tree-sitter-go-types": "^0.1.0",
43
+ "typescript": "^6.0.3"
44
+ },
45
+ "overrides": {
46
+ "eslint-plugin-jsdoc": "^62.9.0"
42
47
  },
43
48
  "peerDependencies": {
44
49
  "tree-sitter": ">=0.25.0"
@@ -52,8 +57,9 @@
52
57
  "install": "node-gyp-build",
53
58
  "prestart": "tree-sitter build --wasm",
54
59
  "start": "tree-sitter playground",
55
- "generate": "tree-sitter generate && tree-sitter-go-types",
60
+ "generate": "tree-sitter generate --abi 14 && tree-sitter-go-types",
56
61
  "lint": "eslint grammar.js",
62
+ "typecheck": "tsc --noEmit -p tsconfig.json",
57
63
  "test": "node --test bindings/node/*_test.js"
58
64
  }
59
65
  }