@tishlang/tish 1.13.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (106) hide show
  1. package/Cargo.toml +2 -0
  2. package/bin/tish +0 -0
  3. package/crates/js_to_tish/src/transform/expr.rs +1 -0
  4. package/crates/tish/Cargo.toml +11 -3
  5. package/crates/tish/build.rs +21 -0
  6. package/crates/tish/src/cli_help.rs +15 -4
  7. package/crates/tish/src/main.rs +93 -21
  8. package/crates/tish/src/repl_completion.rs +0 -1
  9. package/crates/tish/tests/error_source_location.rs +36 -0
  10. package/crates/tish/tests/fixtures/runtime_error_location.tish +5 -0
  11. package/crates/tish/tests/fixtures/trycatch_runtime_errors.tish +15 -0
  12. package/crates/tish/tests/fixtures/tty_capability.tish +9 -0
  13. package/crates/tish/tests/integration_test.rs +402 -91
  14. package/crates/tish/tests/trycatch_runtime_errors.rs +45 -0
  15. package/crates/tish/tests/tty_capability.rs +43 -0
  16. package/crates/tish_ast/src/ast.rs +37 -8
  17. package/crates/tish_builtins/Cargo.toml +2 -0
  18. package/crates/tish_builtins/src/array.rs +375 -13
  19. package/crates/tish_builtins/src/collections.rs +481 -0
  20. package/crates/tish_builtins/src/construct.rs +59 -19
  21. package/crates/tish_builtins/src/date.rs +538 -0
  22. package/crates/tish_builtins/src/globals.rs +86 -6
  23. package/crates/tish_builtins/src/iterator.rs +129 -0
  24. package/crates/tish_builtins/src/lib.rs +5 -0
  25. package/crates/tish_builtins/src/number.rs +96 -0
  26. package/crates/tish_builtins/src/object.rs +2 -2
  27. package/crates/tish_builtins/src/string.rs +19 -20
  28. package/crates/tish_builtins/src/symbol.rs +1 -1
  29. package/crates/tish_builtins/src/typedarrays.rs +298 -0
  30. package/crates/tish_bytecode/src/chunk.rs +69 -1
  31. package/crates/tish_bytecode/src/compiler.rs +933 -89
  32. package/crates/tish_bytecode/src/encoding.rs +2 -0
  33. package/crates/tish_bytecode/src/lib.rs +2 -1
  34. package/crates/tish_bytecode/src/opcode.rs +47 -4
  35. package/crates/tish_bytecode/src/serialize.rs +31 -1
  36. package/crates/tish_compile/Cargo.toml +1 -0
  37. package/crates/tish_compile/src/check.rs +774 -0
  38. package/crates/tish_compile/src/codegen.rs +2334 -349
  39. package/crates/tish_compile/src/infer.rs +1395 -6
  40. package/crates/tish_compile/src/lib.rs +50 -8
  41. package/crates/tish_compile/src/resolve.rs +584 -21
  42. package/crates/tish_compile/src/types.rs +106 -2
  43. package/crates/tish_compile_js/src/codegen.rs +67 -0
  44. package/crates/tish_compile_js/src/tests_jsx.rs +64 -0
  45. package/crates/tish_core/Cargo.toml +7 -1
  46. package/crates/tish_core/src/console_style.rs +11 -1
  47. package/crates/tish_core/src/json.rs +81 -38
  48. package/crates/tish_core/src/lib.rs +3 -0
  49. package/crates/tish_core/src/shape.rs +85 -0
  50. package/crates/tish_core/src/value.rs +679 -25
  51. package/crates/tish_core/src/vmref.rs +13 -8
  52. package/crates/tish_cranelift/src/link.rs +17 -4
  53. package/crates/tish_cranelift_runtime/Cargo.toml +1 -0
  54. package/crates/tish_eval/Cargo.toml +6 -0
  55. package/crates/tish_eval/src/eval.rs +665 -117
  56. package/crates/tish_eval/src/http.rs +4 -1
  57. package/crates/tish_eval/src/natives.rs +165 -13
  58. package/crates/tish_eval/src/value.rs +31 -13
  59. package/crates/tish_eval/src/value_convert.rs +10 -4
  60. package/crates/tish_ffi/Cargo.toml +26 -0
  61. package/crates/tish_ffi/src/lib.rs +518 -0
  62. package/crates/tish_ffi/tests/fixtures/testmod/Cargo.toml +18 -0
  63. package/crates/tish_ffi/tests/fixtures/testmod/src/lib.rs +46 -0
  64. package/crates/tish_ffi/tests/loader.rs +65 -0
  65. package/crates/tish_fmt/src/lib.rs +43 -5
  66. package/crates/tish_lexer/src/lib.rs +397 -9
  67. package/crates/tish_lexer/src/token.rs +7 -0
  68. package/crates/tish_lint/src/lib.rs +2 -10
  69. package/crates/tish_lsp/src/import_goto.rs +2 -0
  70. package/crates/tish_lsp/src/main.rs +439 -26
  71. package/crates/tish_native/src/build.rs +55 -1
  72. package/crates/tish_opt/src/lib.rs +126 -23
  73. package/crates/tish_parser/src/lib.rs +55 -1
  74. package/crates/tish_parser/src/parser.rs +456 -34
  75. package/crates/tish_pg/src/lib.rs +3 -3
  76. package/crates/tish_resolve/src/lib.rs +99 -59
  77. package/crates/tish_runtime/Cargo.toml +4 -0
  78. package/crates/tish_runtime/src/http.rs +66 -17
  79. package/crates/tish_runtime/src/http_fetch.rs +29 -8
  80. package/crates/tish_runtime/src/http_hyper.rs +25 -2
  81. package/crates/tish_runtime/src/lib.rs +299 -44
  82. package/crates/tish_runtime/src/promise.rs +328 -18
  83. package/crates/tish_runtime/src/timers.rs +13 -7
  84. package/crates/tish_runtime/src/tty.rs +226 -0
  85. package/crates/tish_runtime/src/ws.rs +35 -18
  86. package/crates/tish_runtime/tests/fetch_readable_stream.rs +2 -2
  87. package/crates/tish_ui/src/jsx.rs +10 -0
  88. package/crates/tish_ui/src/runtime/hooks.rs +19 -15
  89. package/crates/tish_ui/src/runtime/mod.rs +15 -12
  90. package/crates/tish_vm/Cargo.toml +14 -1
  91. package/crates/tish_vm/src/jit.rs +1050 -0
  92. package/crates/tish_vm/src/lib.rs +2 -0
  93. package/crates/tish_vm/src/vm.rs +1546 -202
  94. package/crates/tish_vm/tests/concurrent_shared_state.rs +140 -0
  95. package/crates/tish_wasm/src/lib.rs +6 -2
  96. package/crates/tish_wasm_runtime/src/gpu.rs +17 -1
  97. package/crates/tishlang_cargo_bindgen/src/classify.rs +1 -3
  98. package/crates/tishlang_cargo_bindgen/src/lib.rs +2 -2
  99. package/crates/tishlang_cargo_bindgen/src/metadata.rs +1 -1
  100. package/justfile +8 -0
  101. package/package.json +1 -1
  102. package/platform/darwin-arm64/tish +0 -0
  103. package/platform/darwin-x64/tish +0 -0
  104. package/platform/linux-arm64/tish +0 -0
  105. package/platform/linux-x64/tish +0 -0
  106. package/platform/win32-x64/tish.exe +0 -0
@@ -508,6 +508,16 @@ impl Printer {
508
508
  self.depth = level;
509
509
  match s {
510
510
  Statement::Block { statements, span } => self.block(statements, *span, level, true),
511
+ // Comma-declarators: render each as its own statement line. The caller
512
+ // (print_seq) emits the trailing newline, so only separate internally.
513
+ Statement::Multi { statements, .. } => {
514
+ for (i, st) in statements.iter().enumerate() {
515
+ if i > 0 {
516
+ self.buf.push('\n');
517
+ }
518
+ self.stmt(st, level);
519
+ }
520
+ }
511
521
  Statement::VarDecl {
512
522
  name,
513
523
  mutable,
@@ -1072,6 +1082,33 @@ impl Printer {
1072
1082
  self.type_ann(x);
1073
1083
  }
1074
1084
  }
1085
+ TypeAnnotation::Tuple(elems) => {
1086
+ self.buf.push('[');
1087
+ for (i, x) in elems.iter().enumerate() {
1088
+ if i > 0 {
1089
+ self.buf.push_str(", ");
1090
+ }
1091
+ self.type_ann(x);
1092
+ }
1093
+ self.buf.push(']');
1094
+ }
1095
+ TypeAnnotation::Literal(lit) => match lit {
1096
+ tishlang_ast::TypeLiteral::Str(s) => {
1097
+ self.buf.push('"');
1098
+ self.buf.push_str(s.as_ref());
1099
+ self.buf.push('"');
1100
+ }
1101
+ tishlang_ast::TypeLiteral::Num(n) => self.buf.push_str(&n.to_string()),
1102
+ tishlang_ast::TypeLiteral::Bool(b) => self.buf.push_str(&b.to_string()),
1103
+ },
1104
+ TypeAnnotation::Intersection(parts) => {
1105
+ for (i, x) in parts.iter().enumerate() {
1106
+ if i > 0 {
1107
+ self.buf.push_str(" & ");
1108
+ }
1109
+ self.type_ann(x);
1110
+ }
1111
+ }
1075
1112
  }
1076
1113
  }
1077
1114
 
@@ -1118,10 +1155,10 @@ impl Printer {
1118
1155
  }
1119
1156
  Expr::Unary { op, operand, .. } => {
1120
1157
  match op {
1121
- UnaryOp::Not => self.buf.push_str("!"),
1122
- UnaryOp::Neg => self.buf.push_str("-"),
1123
- UnaryOp::Pos => self.buf.push_str("+"),
1124
- UnaryOp::BitNot => self.buf.push_str("~"),
1158
+ UnaryOp::Not => self.buf.push('!'),
1159
+ UnaryOp::Neg => self.buf.push('-'),
1160
+ UnaryOp::Pos => self.buf.push('+'),
1161
+ UnaryOp::BitNot => self.buf.push('~'),
1125
1162
  UnaryOp::Void => self.buf.push_str("void "),
1126
1163
  }
1127
1164
  self.child(operand, PREC_POSTFIX);
@@ -1460,7 +1497,7 @@ fn binop_prec(op: BinOp) -> u8 {
1460
1497
  BinOp::BitOr => 5,
1461
1498
  BinOp::BitXor => 6,
1462
1499
  BinOp::BitAnd => 7,
1463
- BinOp::Shl | BinOp::Shr => 8,
1500
+ BinOp::Shl | BinOp::Shr | BinOp::UShr => 8,
1464
1501
  BinOp::Eq | BinOp::Ne | BinOp::StrictEq | BinOp::StrictNe => 9,
1465
1502
  BinOp::Lt | BinOp::Le | BinOp::Gt | BinOp::Ge | BinOp::In => 10,
1466
1503
  BinOp::Add | BinOp::Sub => 11,
@@ -1519,6 +1556,7 @@ fn binop(op: BinOp) -> &'static str {
1519
1556
  BinOp::BitXor => "^",
1520
1557
  BinOp::Shl => "<<",
1521
1558
  BinOp::Shr => ">>",
1559
+ BinOp::UShr => ">>>",
1522
1560
  BinOp::In => "in",
1523
1561
  }
1524
1562
  }
@@ -24,6 +24,37 @@ struct JsxEl {
24
24
  attr_value_braces: i32,
25
25
  }
26
26
 
27
+ /// Lexer configuration.
28
+ #[derive(Debug, Clone, Copy, Default)]
29
+ pub struct LexerOptions {
30
+ /// When true, suppress the virtual `Indent`/`Dedent` tokens so blocks are delimited
31
+ /// **only** by braces. Indentation is treated as ordinary whitespace, so off-side
32
+ /// (brace-less) blocks no longer form. Useful for debugging how nested blocks
33
+ /// transpile — see the `TISH_IGNORE_INDENT` environment variable for a global toggle.
34
+ pub ignore_indent: bool,
35
+ }
36
+
37
+ impl LexerOptions {
38
+ /// Build options from the environment. `TISH_IGNORE_INDENT=1` (or `true`/`yes`) sets
39
+ /// `ignore_indent`, so every parse path (run/build/dump-ast/fmt/lint/lsp) honors it
40
+ /// without threading a flag through the whole pipeline.
41
+ pub fn from_env() -> Self {
42
+ Self {
43
+ ignore_indent: env_truthy(std::env::var_os("TISH_IGNORE_INDENT")),
44
+ }
45
+ }
46
+ }
47
+
48
+ /// Interpret an environment-variable value as a boolean flag: `1`, `true`, or `yes`
49
+ /// (exact, case-sensitive) enable it; anything else — including unset — leaves it off.
50
+ /// Split out from the `std::env` read so the rule is unit-testable without mutating
51
+ /// process-global state (which `Lexer::new` reads, so env-mutating tests would race).
52
+ fn env_truthy(value: Option<std::ffi::OsString>) -> bool {
53
+ value
54
+ .map(|v| v == "1" || v == "true" || v == "yes")
55
+ .unwrap_or(false)
56
+ }
57
+
27
58
  #[derive(Debug, Clone)]
28
59
  pub struct Lexer<'a> {
29
60
  chars: Peekable<Chars<'a>>,
@@ -41,10 +72,20 @@ pub struct Lexer<'a> {
41
72
  jsx_depth: i32,
42
73
  jsx_child_brace_depth: i32,
43
74
  jsx_in_closing_tag: bool,
75
+ ignore_indent: bool,
76
+ /// Kind of the last emitted significant token, for `<` disambiguation: after a *value* position
77
+ /// (ident, `)`, `]`, literal) a `<` is a comparison / generic-args opener (`Lt`), never a JSX tag.
78
+ last_significant_kind: Option<TokenKind>,
44
79
  }
45
80
 
46
81
  impl<'a> Lexer<'a> {
82
+ /// Create a lexer, reading options from the environment (e.g. `TISH_IGNORE_INDENT`).
47
83
  pub fn new(source: &'a str) -> Self {
84
+ Self::with_options(source, LexerOptions::from_env())
85
+ }
86
+
87
+ /// Create a lexer with explicit options, bypassing the environment.
88
+ pub fn with_options(source: &'a str, options: LexerOptions) -> Self {
48
89
  Self {
49
90
  chars: source.chars().peekable(),
50
91
  pos: 0,
@@ -61,9 +102,29 @@ impl<'a> Lexer<'a> {
61
102
  jsx_depth: 0,
62
103
  jsx_child_brace_depth: 0,
63
104
  jsx_in_closing_tag: false,
105
+ ignore_indent: options.ignore_indent,
106
+ last_significant_kind: None,
64
107
  }
65
108
  }
66
109
 
110
+ /// True when the previous significant token ends a value, so a following `<` is `Lt`
111
+ /// (comparison / generic args), not the start of a JSX element.
112
+ fn last_is_value(&self) -> bool {
113
+ matches!(
114
+ self.last_significant_kind,
115
+ Some(
116
+ TokenKind::Ident
117
+ | TokenKind::RParen
118
+ | TokenKind::RBracket
119
+ | TokenKind::Number
120
+ | TokenKind::String
121
+ | TokenKind::True
122
+ | TokenKind::False
123
+ | TokenKind::Null
124
+ )
125
+ )
126
+ }
127
+
67
128
  #[inline]
68
129
  fn jsx_sync_in_opening_tag(&mut self) {
69
130
  self.jsx_in_opening_tag = self.jsx_stack.last().map(|e| e.in_opener).unwrap_or(false);
@@ -172,12 +233,59 @@ impl<'a> Lexer<'a> {
172
233
  }
173
234
 
174
235
  fn read_number(&mut self, first: char) -> String {
236
+ // Radix-prefixed integer literals: `0x`/`0X` (hex), `0o`/`0O` (octal), `0b`/`0B`
237
+ // (binary), with optional `_` digit separators. JS semantics — a non-negative
238
+ // integer. Convert to a decimal string here so every downstream consumer (the
239
+ // parser's `parse::<f64>()`, the formatter, …) sees a plain number, unchanged.
240
+ if first == '0' {
241
+ if let Some(radix) = self.radix_prefix() {
242
+ self.advance(); // consume the x/o/b marker
243
+ let mut digits = String::with_capacity(16);
244
+ while let Some(c) = self.peek() {
245
+ if c == '_' {
246
+ self.advance(); // digit separator
247
+ } else if c.is_digit(radix) {
248
+ digits.push(c);
249
+ self.advance();
250
+ } else {
251
+ break;
252
+ }
253
+ }
254
+ return Self::radix_digits_to_decimal(&digits, radix);
255
+ }
256
+ }
257
+
175
258
  let mut s = String::with_capacity(16);
176
259
  s.push(first);
177
260
  while let Some(c) = self.peek() {
178
261
  if c.is_ascii_digit() || c == '.' {
179
262
  s.push(c);
180
263
  self.advance();
264
+ } else if c == '_' && Self::ends_with_digit(&s) && self.underscore_between_digits() {
265
+ self.advance(); // numeric separator (`15_000`) — drop it, JS-style
266
+ } else if (c == 'e' || c == 'E') && self.exponent_follows() {
267
+ // Scientific notation: `e`/`E` then optional sign then digits.
268
+ // Guarded by lookahead so `3em` lexes as `3` + `em`, not a bad number.
269
+ s.push(c);
270
+ self.advance(); // consume e/E
271
+ if matches!(self.peek(), Some('+') | Some('-')) {
272
+ s.push(self.peek().unwrap());
273
+ self.advance();
274
+ }
275
+ while let Some(d) = self.peek() {
276
+ if d.is_ascii_digit() {
277
+ s.push(d);
278
+ self.advance();
279
+ } else if d == '_'
280
+ && Self::ends_with_digit(&s)
281
+ && self.underscore_between_digits()
282
+ {
283
+ self.advance(); // numeric separator inside the exponent (`1e1_0`)
284
+ } else {
285
+ break;
286
+ }
287
+ }
288
+ break; // the exponent terminates the numeric literal
181
289
  } else {
182
290
  break;
183
291
  }
@@ -185,6 +293,68 @@ impl<'a> Lexer<'a> {
185
293
  s
186
294
  }
187
295
 
296
+ /// True iff the literal accumulated so far ends in a decimal digit — used to reject a
297
+ /// `_` separator that isn't preceded by a digit (e.g. leading `_5` or post-`.` `1._5`).
298
+ fn ends_with_digit(s: &str) -> bool {
299
+ s.chars().last().is_some_and(|c| c.is_ascii_digit())
300
+ }
301
+
302
+ /// With `peek()` positioned at a `_`, look ahead (without consuming) to confirm the
303
+ /// next character is a decimal digit, i.e. the `_` sits between two digits and is a
304
+ /// valid JS numeric separator (rejects trailing `5_` and doubled `1__0`).
305
+ fn underscore_between_digits(&self) -> bool {
306
+ let mut la = self.chars.clone();
307
+ la.next(); // skip the `_` currently under peek()
308
+ la.next().is_some_and(|c| c.is_ascii_digit())
309
+ }
310
+
311
+ /// With the current peek positioned at an `e`/`E`, decide (without consuming)
312
+ /// whether a valid exponent `[+-]?\d` follows. `Chars` is `Clone`, so we look
313
+ /// ahead on a throwaway clone of the iterator.
314
+ fn exponent_follows(&self) -> bool {
315
+ let mut la = self.chars.clone();
316
+ la.next(); // skip the e/E currently under peek()
317
+ match la.next() {
318
+ Some(d) if d.is_ascii_digit() => true,
319
+ Some('+') | Some('-') => la.next().is_some_and(|d| d.is_ascii_digit()),
320
+ _ => false,
321
+ }
322
+ }
323
+
324
+ /// With a leading `0` already consumed and `peek()` at the radix marker, return the
325
+ /// radix (16 / 8 / 2) iff this is a valid `0x` / `0o` / `0b` prefix followed by at
326
+ /// least one valid digit. Returns `None` otherwise, so `0`, `0.5`, `0e3`, `0xZ`, and
327
+ /// `0x_1` all stay on the decimal path. Looks ahead on a clone of the `Chars` iterator
328
+ /// (`Chars: Clone`) without consuming.
329
+ fn radix_prefix(&self) -> Option<u32> {
330
+ let mut la = self.chars.clone();
331
+ let radix = match la.next()? {
332
+ 'x' | 'X' => 16,
333
+ 'o' | 'O' => 8,
334
+ 'b' | 'B' => 2,
335
+ _ => return None,
336
+ };
337
+ match la.next() {
338
+ Some(c) if c.is_digit(radix) => Some(radix),
339
+ _ => None,
340
+ }
341
+ }
342
+
343
+ /// Convert the (separator-free) digits of a radix-prefixed literal to the decimal
344
+ /// string the `Number` token carries. `u128` is exact for ≤128-bit literals — far
345
+ /// beyond any real input; the `f64` fallback only triggers for absurdly long ones and
346
+ /// loses precision past 2^53, exactly as JS's conversion to a double would.
347
+ fn radix_digits_to_decimal(digits: &str, radix: u32) -> String {
348
+ if let Ok(v) = u128::from_str_radix(digits, radix) {
349
+ return v.to_string();
350
+ }
351
+ let mut v = 0.0_f64;
352
+ for c in digits.chars() {
353
+ v = v * radix as f64 + c.to_digit(radix).unwrap_or(0) as f64;
354
+ }
355
+ format!("{v}")
356
+ }
357
+
188
358
  /// Handle escape sequence, returning the unescaped character.
189
359
  /// `extra_allowed` contains additional characters that can be escaped in this context.
190
360
  fn handle_escape(&mut self, extra_allowed: &[char]) -> Result<char, String> {
@@ -193,12 +363,71 @@ impl<'a> Lexer<'a> {
193
363
  'n' => Ok('\n'),
194
364
  'r' => Ok('\r'),
195
365
  't' => Ok('\t'),
366
+ 'b' => Ok('\u{0008}'),
367
+ 'f' => Ok('\u{000C}'),
368
+ 'v' => Ok('\u{000B}'),
369
+ '0' => Ok('\0'),
196
370
  '\\' => Ok('\\'),
371
+ // `\xNN` — exactly two hex digits → code point 0x00..=0xFF (JS/TS).
372
+ 'x' => {
373
+ let cp = self.read_hex_digits(2)?;
374
+ char::from_u32(cp).ok_or_else(|| format!("Invalid \\x escape: \\x{:02X}", cp))
375
+ }
376
+ // `\uNNNN` (exactly four hex digits) or `\u{N..}` (1-6 hex digits, ES6).
377
+ 'u' => {
378
+ let cp = if self.peek() == Some('{') {
379
+ self.advance(); // consume '{'
380
+ let cp = self.read_hex_until_brace()?;
381
+ match self.advance() {
382
+ Some('}') => cp,
383
+ _ => return Err("Unterminated \\u{...} escape (expected '}')".to_string()),
384
+ }
385
+ } else {
386
+ self.read_hex_digits(4)?
387
+ };
388
+ // Lone surrogates (0xD800..=0xDFFF) are valid UTF-16 code units in JS but
389
+ // not Unicode scalar values; tish strings are UTF-8, so reject them.
390
+ char::from_u32(cp)
391
+ .ok_or_else(|| format!("Invalid \\u escape: code point U+{:04X}", cp))
392
+ }
197
393
  c if extra_allowed.contains(&c) => Ok(c),
198
394
  _ => Err(format!("Unknown escape: \\{}", escaped)),
199
395
  }
200
396
  }
201
397
 
398
+ /// Read exactly `n` hex digits and return the parsed code point.
399
+ fn read_hex_digits(&mut self, n: usize) -> Result<u32, String> {
400
+ let mut value: u32 = 0;
401
+ for _ in 0..n {
402
+ let c = self.advance().ok_or("Unterminated hex escape")?;
403
+ let digit = c
404
+ .to_digit(16)
405
+ .ok_or_else(|| format!("Invalid hex digit in escape: '{}'", c))?;
406
+ value = value * 16 + digit;
407
+ }
408
+ Ok(value)
409
+ }
410
+
411
+ /// Read 1-6 hex digits for a `\u{...}` escape (stops at `}`); validates the count
412
+ /// and that the value is within the Unicode range.
413
+ fn read_hex_until_brace(&mut self) -> Result<u32, String> {
414
+ let mut value: u32 = 0;
415
+ let mut count = 0;
416
+ while let Some(c) = self.peek() {
417
+ let Some(digit) = c.to_digit(16) else { break };
418
+ self.advance();
419
+ value = value * 16 + digit;
420
+ count += 1;
421
+ if count > 6 || value > 0x10_FFFF {
422
+ return Err("Invalid \\u{...} escape: code point out of range".to_string());
423
+ }
424
+ }
425
+ if count == 0 {
426
+ return Err("Empty \\u{} escape (expected hex digits)".to_string());
427
+ }
428
+ Ok(value)
429
+ }
430
+
202
431
  fn read_string(&mut self, quote: char) -> Result<String, String> {
203
432
  let mut s = String::with_capacity(32);
204
433
  let extra = if quote == '"' {
@@ -307,6 +536,14 @@ impl<'a> Lexer<'a> {
307
536
  }
308
537
 
309
538
  pub fn next_token(&mut self) -> Result<Option<Token>, String> {
539
+ let tok = self.next_token_inner()?;
540
+ if let Some(t) = &tok {
541
+ self.last_significant_kind = Some(t.kind);
542
+ }
543
+ Ok(tok)
544
+ }
545
+
546
+ fn next_token_inner(&mut self) -> Result<Option<Token>, String> {
310
547
  if let Some(tok) = self.pending_dedents.pop_front() {
311
548
  return Ok(Some(tok));
312
549
  }
@@ -323,8 +560,13 @@ impl<'a> Lexer<'a> {
323
560
 
324
561
  if self.at_line_start {
325
562
  self.at_line_start = false;
563
+ // Always consume the leading whitespace; only *emit* Indent/Dedent when indentation
564
+ // is significant. With `ignore_indent`, the level is discarded so the indent stack
565
+ // stays at `[0]` and no virtual tokens are produced (brace-only blocks).
326
566
  let level = self.read_indent_level();
327
- if level > 0 || self.peek().map(|c| c != '\n').unwrap_or(false) {
567
+ if !self.ignore_indent
568
+ && (level > 0 || self.peek().map(|c| c != '\n').unwrap_or(false))
569
+ {
328
570
  if let Some(tok) = self.emit_indent_or_dedent(level) {
329
571
  return Ok(Some(tok));
330
572
  }
@@ -458,12 +700,15 @@ impl<'a> Lexer<'a> {
458
700
  } else if self.peek() == Some('/') {
459
701
  self.jsx_in_closing_tag = true;
460
702
  TokenKind::Lt
461
- } else if self.peek() == Some('>')
703
+ } else if (self.peek() == Some('>')
462
704
  || self
463
705
  .peek()
464
706
  .map(|c| c.is_ascii_alphabetic() || c == '_')
465
- .unwrap_or(false)
707
+ .unwrap_or(false))
708
+ && !self.last_is_value()
466
709
  {
710
+ // JSX open tag — only in expression position. After a value (`ident<`, `)<`,
711
+ // `]<`, literal) this is `Lt`: a comparison or generic-args opener.
467
712
  self.jsx_depth += 1;
468
713
  self.jsx_stack.push(JsxEl {
469
714
  in_opener: true,
@@ -481,16 +726,34 @@ impl<'a> Lexer<'a> {
481
726
  TokenKind::Ge
482
727
  } else if self.peek() == Some('>') {
483
728
  self.advance();
484
- TokenKind::Shr
729
+ if self.peek() == Some('>') {
730
+ self.advance();
731
+ TokenKind::UShr // `>>>`
732
+ } else {
733
+ TokenKind::Shr
734
+ }
485
735
  } else {
486
- if self.jsx_in_closing_tag {
487
- self.jsx_depth = (self.jsx_depth - 1).max(0);
488
- self.jsx_stack.pop();
489
- self.jsx_sync_in_opening_tag();
490
- } else if self.jsx_in_opening_tag && self.jsx_saw_slash_before_gt {
736
+ if self.jsx_in_closing_tag
737
+ || (self.jsx_in_opening_tag && self.jsx_saw_slash_before_gt)
738
+ {
491
739
  self.jsx_depth = (self.jsx_depth - 1).max(0);
492
740
  self.jsx_stack.pop();
493
741
  self.jsx_sync_in_opening_tag();
742
+ // A child element just closed (`</span>` or `<br/>`). If a parent element
743
+ // is still open and past its opening tag, we're back in that parent's
744
+ // children region, so the following run is JSX text — re-enter text mode.
745
+ // Without this, trailing text after a child element ("… as JSON") is lexed
746
+ // as code and a bare keyword (`as`, `in`, `if`, …) breaks the parse (#108).
747
+ //
748
+ // Guard on `jsx_child_brace_depth == 0`: if the closed element lived inside a
749
+ // `{…}` expression container (e.g. `<div>{items.map(x => <span/>)}</div>`),
750
+ // we're still in that expression, not the parent's text children — entering
751
+ // text mode there would swallow the following `)`/`,` as JsxText.
752
+ if self.jsx_child_brace_depth == 0
753
+ && self.jsx_stack.last().map(|e| !e.in_opener).unwrap_or(false)
754
+ {
755
+ self.jsx_after_gt = true;
756
+ }
494
757
  } else if let Some(top) = self.jsx_stack.last_mut() {
495
758
  if top.in_opener && top.attr_value_braces > 0 {
496
759
  // `>` is a comparison (or shift) token inside `{ ... }`, not end of opening tag.
@@ -698,6 +961,80 @@ mod tests {
698
961
  assert_eq!(string_tok.literal.as_deref(), Some("H"));
699
962
  }
700
963
 
964
+ #[test]
965
+ fn radix_integer_literals() {
966
+ // Hex / octal / binary prefixes (any case) convert to a decimal `Number` literal,
967
+ // honoring `_` digit separators.
968
+ let cases = [
969
+ ("0xff", "255"),
970
+ ("0xFF", "255"),
971
+ ("0X1a", "26"),
972
+ ("0o17", "15"),
973
+ ("0O7", "7"),
974
+ ("0b1010", "10"),
975
+ ("0B0", "0"),
976
+ ("0xdeadbeef", "3735928559"),
977
+ ("0xFF_FF", "65535"),
978
+ ("0b1111_0000", "240"),
979
+ ];
980
+ for (src, expected) in cases {
981
+ let tokens = Lexer::new(src).collect::<Result<Vec<_>, _>>().unwrap();
982
+ let num = tokens
983
+ .iter()
984
+ .find(|t| t.kind == TokenKind::Number)
985
+ .unwrap_or_else(|| panic!("no Number token for {src}"));
986
+ assert_eq!(num.literal.as_deref(), Some(expected), "for {src}");
987
+ }
988
+ }
989
+
990
+ #[test]
991
+ fn decimal_numeric_separators() {
992
+ // `_` between digits is a JS numeric separator: dropped from the literal value.
993
+ // Issue #57.
994
+ let only_number = |src: &str| -> String {
995
+ let tokens = Lexer::new(src).collect::<Result<Vec<_>, _>>().unwrap();
996
+ let nums: Vec<_> = tokens
997
+ .iter()
998
+ .filter(|t| t.kind == TokenKind::Number)
999
+ .collect();
1000
+ assert_eq!(nums.len(), 1, "expected exactly one Number token for {src}");
1001
+ // No stray identifier should be produced from the separated digits.
1002
+ assert!(
1003
+ !tokens.iter().any(|t| t.kind == TokenKind::Ident),
1004
+ "unexpected Ident token while lexing {src}"
1005
+ );
1006
+ nums[0].literal.as_deref().unwrap().to_string()
1007
+ };
1008
+ assert_eq!(only_number("15_000"), "15000");
1009
+ assert_eq!(only_number("1_000_000"), "1000000");
1010
+ assert_eq!(only_number("3.14_159"), "3.14159");
1011
+ assert_eq!(only_number("1e1_0"), "1e10");
1012
+ }
1013
+
1014
+ #[test]
1015
+ fn non_radix_zero_prefixed_stays_decimal() {
1016
+ // A leading zero is NOT legacy octal; an invalid prefix is not a radix literal.
1017
+ let num_literal = |src: &str| -> String {
1018
+ Lexer::new(src)
1019
+ .collect::<Result<Vec<_>, _>>()
1020
+ .unwrap()
1021
+ .into_iter()
1022
+ .find(|t| t.kind == TokenKind::Number)
1023
+ .unwrap()
1024
+ .literal
1025
+ .as_deref()
1026
+ .unwrap()
1027
+ .to_string()
1028
+ };
1029
+ assert_eq!(num_literal("07"), "07"); // decimal, not octal
1030
+ assert_eq!(num_literal("0"), "0");
1031
+ // `0xZ` → the Number token is just `0`, then `xZ` lexes as an identifier.
1032
+ let toks = Lexer::new("0xZ").collect::<Result<Vec<_>, _>>().unwrap();
1033
+ assert_eq!(toks[0].kind, TokenKind::Number);
1034
+ assert_eq!(toks[0].literal.as_deref(), Some("0"));
1035
+ assert_eq!(toks[1].kind, TokenKind::Ident);
1036
+ }
1037
+
701
1038
  #[test]
702
1039
  fn line_comment_does_not_emit_spurious_indent_before_next_line() {
703
1040
  let with_comment = "fn f() {\n return {\n a: 1, // c\n b: 2\n }\n}\n";
@@ -713,4 +1050,55 @@ mod tests {
713
1050
  .collect::<Vec<_>>()
714
1051
  );
715
1052
  }
1053
+
1054
+ /// A leading-indented line is what actually drives the lexer to emit virtual tokens:
1055
+ /// ` a()` opens an indent level (Indent) and the dedented `b()` closes it (Dedent).
1056
+ const INDENTED_SRC: &str = " a()\nb()\n";
1057
+
1058
+ #[test]
1059
+ fn default_options_still_emit_indent_and_dedent() {
1060
+ let tokens: Vec<_> = Lexer::with_options(INDENTED_SRC, LexerOptions::default())
1061
+ .collect::<Result<Vec<_>, _>>()
1062
+ .unwrap();
1063
+ assert!(
1064
+ tokens.iter().any(|t| t.kind == TokenKind::Indent),
1065
+ "expected an Indent token in the default (indentation-significant) mode"
1066
+ );
1067
+ assert!(
1068
+ tokens.iter().any(|t| t.kind == TokenKind::Dedent),
1069
+ "expected a Dedent token in the default (indentation-significant) mode"
1070
+ );
1071
+ }
1072
+
1073
+ #[test]
1074
+ fn ignore_indent_emits_no_virtual_tokens() {
1075
+ let tokens: Vec<_> =
1076
+ Lexer::with_options(INDENTED_SRC, LexerOptions { ignore_indent: true })
1077
+ .collect::<Result<Vec<_>, _>>()
1078
+ .unwrap();
1079
+ assert!(
1080
+ !tokens
1081
+ .iter()
1082
+ .any(|t| matches!(t.kind, TokenKind::Indent | TokenKind::Dedent)),
1083
+ "expected no Indent/Dedent with ignore_indent, got: {:?}",
1084
+ tokens.iter().map(|t| t.kind).collect::<Vec<_>>()
1085
+ );
1086
+ }
1087
+
1088
+ #[test]
1089
+ fn env_truthy_enables_only_on_recognized_values() {
1090
+ use std::ffi::OsString;
1091
+ let v = |s: &str| env_truthy(Some(OsString::from(s)));
1092
+ // Recognized truthy values turn the flag on.
1093
+ assert!(v("1"));
1094
+ assert!(v("true"));
1095
+ assert!(v("yes"));
1096
+ // Everything else leaves it off, including unset, empty, and near-misses.
1097
+ assert!(!env_truthy(None));
1098
+ assert!(!v(""));
1099
+ assert!(!v("0"));
1100
+ assert!(!v("false"));
1101
+ assert!(!v("no"));
1102
+ assert!(!v("TRUE")); // exact match only — case-sensitive by design
1103
+ }
716
1104
  }
@@ -50,6 +50,7 @@ pub enum TokenKind {
50
50
  Do,
51
51
  TypeOf,
52
52
  Void,
53
+ Delete,
53
54
  Of,
54
55
  In,
55
56
  Async,
@@ -59,6 +60,8 @@ pub enum TokenKind {
59
60
  Export,
60
61
  Type,
61
62
  Declare,
63
+ Interface,
64
+ As,
62
65
 
63
66
  // Punctuation
64
67
  LParen,
@@ -108,6 +111,7 @@ pub enum TokenKind {
108
111
  BitNot,
109
112
  Shl,
110
113
  Shr,
114
+ UShr,
111
115
  OptionalChain,
112
116
  NullishCoalesce,
113
117
  Question,
@@ -148,6 +152,7 @@ impl TokenKind {
148
152
  "do" => TokenKind::Do,
149
153
  "typeof" => TokenKind::TypeOf,
150
154
  "void" => TokenKind::Void,
155
+ "delete" => TokenKind::Delete,
151
156
  "of" => TokenKind::Of,
152
157
  "in" => TokenKind::In,
153
158
  "async" => TokenKind::Async,
@@ -157,6 +162,8 @@ impl TokenKind {
157
162
  "export" => TokenKind::Export,
158
163
  "type" => TokenKind::Type,
159
164
  "declare" => TokenKind::Declare,
165
+ "interface" => TokenKind::Interface,
166
+ "as" => TokenKind::As,
160
167
  _ => TokenKind::Ident,
161
168
  }
162
169
  }
@@ -109,17 +109,9 @@ fn lint_stmt(s: &Statement, out: &mut Vec<LintDiagnostic>) {
109
109
  }
110
110
  }
111
111
  Statement::ExprStmt { expr, .. } => lint_expr(expr, out),
112
- Statement::VarDecl { init, .. } => {
113
- if let Some(e) = init {
114
- lint_expr(e, out);
115
- }
116
- }
112
+ Statement::VarDecl { init: Some(e), .. } => lint_expr(e, out),
117
113
  Statement::VarDeclDestructure { init, .. } => lint_expr(init, out),
118
- Statement::Return { value, .. } => {
119
- if let Some(e) = value {
120
- lint_expr(e, out);
121
- }
122
- }
114
+ Statement::Return { value: Some(e), .. } => lint_expr(e, out),
123
115
  Statement::Throw { value, .. } => lint_expr(value, out),
124
116
  _ => {}
125
117
  }
@@ -304,6 +304,7 @@ pub struct NativeMemberDefinition {
304
304
 
305
305
  /// Static member chain `root.a.b` where `root` is an import: resolve the leaf to a Rust `pub fn`,
306
306
  /// else to `lsp-pragmas.d.tish` in the native package (e.g. `tish-macos`).
307
+ #[allow(clippy::too_many_arguments)] // LSP request context (program/file/text/roots/cache/position/word)
307
308
  pub fn native_member_definition(
308
309
  program: &Program,
309
310
  file_path: &Path,
@@ -411,6 +412,7 @@ pub fn native_member_definition(
411
412
 
412
413
  /// Static member chain `root.a.b` where `root` is an import: resolve the leaf name to a Rust `pub fn`
413
414
  /// (native / `cargo:`) or a single-level export in a relative `.tish` module.
415
+ #[allow(clippy::too_many_arguments)] // LSP request context (program/file/text/roots/cache/position/word)
414
416
  pub fn definition_for_native_receiver_member(
415
417
  program: &Program,
416
418
  file_path: &Path,