npm - @tishlang/tish-format - Versions diffs - 1.0.13 → 2.0.1 - Mend

@tishlang/tish-format 1.0.13 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (108) hide show

package/Cargo.toml +2 -0
package/bin/tish-format +0 -0
package/crates/js_to_tish/src/transform/expr.rs +1 -0
package/crates/tish/Cargo.toml +10 -2
package/crates/tish/build.rs +21 -0
package/crates/tish/src/cli_help.rs +15 -4
package/crates/tish/src/main.rs +93 -21
package/crates/tish/src/repl_completion.rs +0 -1
package/crates/tish/tests/error_source_location.rs +36 -0
package/crates/tish/tests/fixtures/runtime_error_location.tish +5 -0
package/crates/tish/tests/fixtures/trycatch_runtime_errors.tish +15 -0
package/crates/tish/tests/fixtures/tty_capability.tish +9 -0
package/crates/tish/tests/integration_test.rs +402 -91
package/crates/tish/tests/trycatch_runtime_errors.rs +45 -0
package/crates/tish/tests/tty_capability.rs +43 -0
package/crates/tish_ast/src/ast.rs +37 -8
package/crates/tish_builtins/Cargo.toml +2 -0
package/crates/tish_builtins/src/array.rs +375 -13
package/crates/tish_builtins/src/collections.rs +481 -0
package/crates/tish_builtins/src/construct.rs +59 -19
package/crates/tish_builtins/src/date.rs +538 -0
package/crates/tish_builtins/src/globals.rs +86 -6
package/crates/tish_builtins/src/iterator.rs +129 -0
package/crates/tish_builtins/src/lib.rs +5 -0
package/crates/tish_builtins/src/number.rs +96 -0
package/crates/tish_builtins/src/object.rs +2 -2
package/crates/tish_builtins/src/string.rs +19 -20
package/crates/tish_builtins/src/symbol.rs +1 -1
package/crates/tish_builtins/src/typedarrays.rs +298 -0
package/crates/tish_bytecode/src/chunk.rs +69 -1
package/crates/tish_bytecode/src/compiler.rs +933 -89
package/crates/tish_bytecode/src/encoding.rs +2 -0
package/crates/tish_bytecode/src/lib.rs +2 -1
package/crates/tish_bytecode/src/opcode.rs +47 -4
package/crates/tish_bytecode/src/serialize.rs +31 -1
package/crates/tish_compile/Cargo.toml +1 -0
package/crates/tish_compile/src/check.rs +774 -0
package/crates/tish_compile/src/codegen.rs +2334 -349
package/crates/tish_compile/src/infer.rs +1395 -6
package/crates/tish_compile/src/lib.rs +50 -8
package/crates/tish_compile/src/resolve.rs +584 -21
package/crates/tish_compile/src/types.rs +106 -2
package/crates/tish_compile_js/src/codegen.rs +67 -0
package/crates/tish_compile_js/src/tests_jsx.rs +64 -0
package/crates/tish_core/Cargo.toml +7 -1
package/crates/tish_core/src/console_style.rs +11 -1
package/crates/tish_core/src/json.rs +81 -38
package/crates/tish_core/src/lib.rs +3 -0
package/crates/tish_core/src/shape.rs +85 -0
package/crates/tish_core/src/value.rs +679 -25
package/crates/tish_core/src/vmref.rs +13 -8
package/crates/tish_cranelift/src/link.rs +17 -4
package/crates/tish_cranelift_runtime/Cargo.toml +1 -0
package/crates/tish_eval/Cargo.toml +6 -0
package/crates/tish_eval/src/eval.rs +665 -117
package/crates/tish_eval/src/http.rs +4 -1
package/crates/tish_eval/src/natives.rs +165 -13
package/crates/tish_eval/src/value.rs +31 -13
package/crates/tish_eval/src/value_convert.rs +10 -4
package/crates/tish_ffi/Cargo.toml +26 -0
package/crates/tish_ffi/src/lib.rs +518 -0
package/crates/tish_ffi/tests/fixtures/testmod/Cargo.toml +18 -0
package/crates/tish_ffi/tests/fixtures/testmod/src/lib.rs +46 -0
package/crates/tish_ffi/tests/loader.rs +65 -0
package/crates/tish_fmt/Cargo.toml +1 -1
package/crates/tish_fmt/src/lib.rs +61 -5
package/crates/tish_lexer/src/lib.rs +397 -9
package/crates/tish_lexer/src/token.rs +7 -0
package/crates/tish_lint/src/lib.rs +2 -10
package/crates/tish_lsp/src/import_goto.rs +2 -0
package/crates/tish_lsp/src/main.rs +439 -26
package/crates/tish_native/src/build.rs +55 -1
package/crates/tish_opt/src/lib.rs +126 -23
package/crates/tish_parser/src/lib.rs +55 -1
package/crates/tish_parser/src/parser.rs +456 -34
package/crates/tish_pg/src/lib.rs +3 -3
package/crates/tish_resolve/src/lib.rs +99 -59
package/crates/tish_runtime/Cargo.toml +4 -0
package/crates/tish_runtime/src/http.rs +66 -17
package/crates/tish_runtime/src/http_fetch.rs +29 -8
package/crates/tish_runtime/src/http_hyper.rs +25 -2
package/crates/tish_runtime/src/lib.rs +299 -44
package/crates/tish_runtime/src/promise.rs +328 -18
package/crates/tish_runtime/src/timers.rs +13 -7
package/crates/tish_runtime/src/tty.rs +226 -0
package/crates/tish_runtime/src/ws.rs +35 -18
package/crates/tish_runtime/tests/fetch_readable_stream.rs +2 -2
package/crates/tish_ui/src/jsx.rs +10 -0
package/crates/tish_ui/src/runtime/hooks.rs +19 -15
package/crates/tish_ui/src/runtime/mod.rs +15 -12
package/crates/tish_vm/Cargo.toml +14 -1
package/crates/tish_vm/src/jit.rs +1050 -0
package/crates/tish_vm/src/lib.rs +2 -0
package/crates/tish_vm/src/vm.rs +1546 -202
package/crates/tish_vm/tests/concurrent_shared_state.rs +140 -0
package/crates/tish_wasm/src/lib.rs +6 -2
package/crates/tish_wasm_runtime/src/gpu.rs +17 -1
package/crates/tishlang_cargo_bindgen/src/classify.rs +1 -3
package/crates/tishlang_cargo_bindgen/src/lib.rs +2 -2
package/crates/tishlang_cargo_bindgen/src/metadata.rs +1 -1
package/justfile +8 -0
package/package.json +2 -2
package/platform/darwin-arm64/tish-fmt +0 -0
package/platform/darwin-x64/tish-fmt +0 -0
package/platform/linux-arm64/tish-fmt +0 -0
package/platform/linux-x64/tish-fmt +0 -0
package/platform/win32-x64/tish-fmt.exe +0 -0
package/README.md +0 -138

package/crates/tish_fmt/src/lib.rs CHANGED Viewed

@@ -508,6 +508,16 @@ impl Printer {
         self.depth = level;
         match s {
             Statement::Block { statements, span } => self.block(statements, *span, level, true),
+            // Comma-declarators: render each as its own statement line. The caller
+            // (print_seq) emits the trailing newline, so only separate internally.
+            Statement::Multi { statements, .. } => {
+                for (i, st) in statements.iter().enumerate() {
+                    if i > 0 {
+                        self.buf.push('\n');
+                    }
+                    self.stmt(st, level);
+                }
+            }
             Statement::VarDecl {
                 name,
                 mutable,
@@ -1072,6 +1082,33 @@ impl Printer {
                     self.type_ann(x);
                 }
             }
+            TypeAnnotation::Tuple(elems) => {
+                self.buf.push('[');
+                for (i, x) in elems.iter().enumerate() {
+                    if i > 0 {
+                        self.buf.push_str(", ");
+                    }
+                    self.type_ann(x);
+                }
+                self.buf.push(']');
+            }
+            TypeAnnotation::Literal(lit) => match lit {
+                tishlang_ast::TypeLiteral::Str(s) => {
+                    self.buf.push('"');
+                    self.buf.push_str(s.as_ref());
+                    self.buf.push('"');
+                }
+                tishlang_ast::TypeLiteral::Num(n) => self.buf.push_str(&n.to_string()),
+                tishlang_ast::TypeLiteral::Bool(b) => self.buf.push_str(&b.to_string()),
+            },
+            TypeAnnotation::Intersection(parts) => {
+                for (i, x) in parts.iter().enumerate() {
+                    if i > 0 {
+                        self.buf.push_str(" & ");
+                    }
+                    self.type_ann(x);
+                }
+            }
         }
     }
@@ -1118,10 +1155,10 @@ impl Printer {
             }
             Expr::Unary { op, operand, .. } => {
                 match op {
-                    UnaryOp::Not => self.buf.push_str("!"),
-                    UnaryOp::Neg => self.buf.push_str("-"),
-                    UnaryOp::Pos => self.buf.push_str("+"),
-                    UnaryOp::BitNot => self.buf.push_str("~"),
+                    UnaryOp::Not => self.buf.push('!'),
+                    UnaryOp::Neg => self.buf.push('-'),
+                    UnaryOp::Pos => self.buf.push('+'),
+                    UnaryOp::BitNot => self.buf.push('~'),
                     UnaryOp::Void => self.buf.push_str("void "),
                 }
                 self.child(operand, PREC_POSTFIX);
@@ -1202,6 +1239,10 @@ impl Printer {
                 self.buf.push_str("typeof ");
                 self.child(operand, PREC_POSTFIX);
             }
+            Expr::Delete { target, .. } => {
+                self.buf.push_str("delete ");
+                self.child(target, PREC_POSTFIX);
+            }
             Expr::PostfixInc { name, .. } => {
                 self.buf.push_str(name.as_ref());
                 self.buf.push_str("++");
@@ -1460,7 +1501,7 @@ fn binop_prec(op: BinOp) -> u8 {
         BinOp::BitOr => 5,
         BinOp::BitXor => 6,
         BinOp::BitAnd => 7,
-        BinOp::Shl | BinOp::Shr => 8,
+        BinOp::Shl | BinOp::Shr | BinOp::UShr => 8,
         BinOp::Eq | BinOp::Ne | BinOp::StrictEq | BinOp::StrictNe => 9,
         BinOp::Lt | BinOp::Le | BinOp::Gt | BinOp::Ge | BinOp::In => 10,
         BinOp::Add | BinOp::Sub => 11,
@@ -1482,6 +1523,7 @@ fn expr_prec(e: &Expr) -> u8 {
         Expr::Binary { op, .. } => binop_prec(*op),
         Expr::Unary { .. }
         | Expr::TypeOf { .. }
+        | Expr::Delete { .. }
         | Expr::Await { .. }
         | Expr::PrefixInc { .. }
         | Expr::PrefixDec { .. } => 14,
@@ -1519,6 +1561,7 @@ fn binop(op: BinOp) -> &'static str {
         BinOp::BitXor => "^",
         BinOp::Shl => "<<",
         BinOp::Shr => ">>",
+        BinOp::UShr => ">>>",
         BinOp::In => "in",
     }
 }
@@ -2098,4 +2141,17 @@ let x = add(1, 2)
         let out = format_source(src).unwrap();
         assert_eq!(out, src, "{out:?}");
     }
+    #[test]
+    fn formats_delete_expression() {
+        // Regression: Expr::Delete (the `delete` operator) must be handled by the formatter —
+        // a non-exhaustive `match` here broke the `tish-format` build once the delete feature landed.
+        let src = "fn f(o, k) {\ndelete o.a\ndelete o[\"b\"]\nlet x = delete o[k]\nreturn x\n}\n";
+        let out = format_source(src).unwrap();
+        assert!(out.contains("delete o.a"), "{out}");
+        assert!(out.contains("delete o[\"b\"]"), "{out}");
+        assert!(out.contains("delete o[k]"), "{out}");
+        tishlang_parser::parse(&out).unwrap();
+        assert_eq!(format_source(&out).unwrap(), out, "not idempotent:\n{out}");
+    }
 }

package/crates/tish_lexer/src/lib.rs CHANGED Viewed

@@ -24,6 +24,37 @@ struct JsxEl {
     attr_value_braces: i32,
 }
+/// Lexer configuration.
+#[derive(Debug, Clone, Copy, Default)]
+pub struct LexerOptions {
+    /// When true, suppress the virtual `Indent`/`Dedent` tokens so blocks are delimited
+    /// **only** by braces. Indentation is treated as ordinary whitespace, so off-side
+    /// (brace-less) blocks no longer form. Useful for debugging how nested blocks
+    /// transpile — see the `TISH_IGNORE_INDENT` environment variable for a global toggle.
+    pub ignore_indent: bool,
+}
+impl LexerOptions {
+    /// Build options from the environment. `TISH_IGNORE_INDENT=1` (or `true`/`yes`) sets
+    /// `ignore_indent`, so every parse path (run/build/dump-ast/fmt/lint/lsp) honors it
+    /// without threading a flag through the whole pipeline.
+    pub fn from_env() -> Self {
+        Self {
+            ignore_indent: env_truthy(std::env::var_os("TISH_IGNORE_INDENT")),
+        }
+    }
+}
+/// Interpret an environment-variable value as a boolean flag: `1`, `true`, or `yes`
+/// (exact, case-sensitive) enable it; anything else — including unset — leaves it off.
+/// Split out from the `std::env` read so the rule is unit-testable without mutating
+/// process-global state (which `Lexer::new` reads, so env-mutating tests would race).
+fn env_truthy(value: Option<std::ffi::OsString>) -> bool {
+    value
+        .map(|v| v == "1" || v == "true" || v == "yes")
+        .unwrap_or(false)
+}
 #[derive(Debug, Clone)]
 pub struct Lexer<'a> {
     chars: Peekable<Chars<'a>>,
@@ -41,10 +72,20 @@ pub struct Lexer<'a> {
     jsx_depth: i32,
     jsx_child_brace_depth: i32,
     jsx_in_closing_tag: bool,
+    ignore_indent: bool,
+    /// Kind of the last emitted significant token, for `<` disambiguation: after a *value* position
+    /// (ident, `)`, `]`, literal) a `<` is a comparison / generic-args opener (`Lt`), never a JSX tag.
+    last_significant_kind: Option<TokenKind>,
 }
 impl<'a> Lexer<'a> {
+    /// Create a lexer, reading options from the environment (e.g. `TISH_IGNORE_INDENT`).
     pub fn new(source: &'a str) -> Self {
+        Self::with_options(source, LexerOptions::from_env())
+    }
+    /// Create a lexer with explicit options, bypassing the environment.
+    pub fn with_options(source: &'a str, options: LexerOptions) -> Self {
         Self {
             chars: source.chars().peekable(),
             pos: 0,
@@ -61,9 +102,29 @@ impl<'a> Lexer<'a> {
             jsx_depth: 0,
             jsx_child_brace_depth: 0,
             jsx_in_closing_tag: false,
+            ignore_indent: options.ignore_indent,
+            last_significant_kind: None,
         }
     }
+    /// True when the previous significant token ends a value, so a following `<` is `Lt`
+    /// (comparison / generic args), not the start of a JSX element.
+    fn last_is_value(&self) -> bool {
+        matches!(
+            self.last_significant_kind,
+            Some(
+                TokenKind::Ident
+                    | TokenKind::RParen
+                    | TokenKind::RBracket
+                    | TokenKind::Number
+                    | TokenKind::String
+                    | TokenKind::True
+                    | TokenKind::False
+                    | TokenKind::Null
+            )
+        )
+    }
     #[inline]
     fn jsx_sync_in_opening_tag(&mut self) {
         self.jsx_in_opening_tag = self.jsx_stack.last().map(|e| e.in_opener).unwrap_or(false);
@@ -172,12 +233,59 @@ impl<'a> Lexer<'a> {
     }
     fn read_number(&mut self, first: char) -> String {
+        // Radix-prefixed integer literals: `0x`/`0X` (hex), `0o`/`0O` (octal), `0b`/`0B`
+        // (binary), with optional `_` digit separators. JS semantics — a non-negative
+        // integer. Convert to a decimal string here so every downstream consumer (the
+        // parser's `parse::<f64>()`, the formatter, …) sees a plain number, unchanged.
+        if first == '0' {
+            if let Some(radix) = self.radix_prefix() {
+                self.advance(); // consume the x/o/b marker
+                let mut digits = String::with_capacity(16);
+                while let Some(c) = self.peek() {
+                    if c == '_' {
+                        self.advance(); // digit separator
+                    } else if c.is_digit(radix) {
+                        digits.push(c);
+                        self.advance();
+                    } else {
+                        break;
+                    }
+                }
+                return Self::radix_digits_to_decimal(&digits, radix);
+            }
+        }
         let mut s = String::with_capacity(16);
         s.push(first);
         while let Some(c) = self.peek() {
             if c.is_ascii_digit() || c == '.' {
                 s.push(c);
                 self.advance();
+            } else if c == '_' && Self::ends_with_digit(&s) && self.underscore_between_digits() {
+                self.advance(); // numeric separator (`15_000`) — drop it, JS-style
+            } else if (c == 'e' || c == 'E') && self.exponent_follows() {
+                // Scientific notation: `e`/`E` then optional sign then digits.
+                // Guarded by lookahead so `3em` lexes as `3` + `em`, not a bad number.
+                s.push(c);
+                self.advance(); // consume e/E
+                if matches!(self.peek(), Some('+') | Some('-')) {
+                    s.push(self.peek().unwrap());
+                    self.advance();
+                }
+                while let Some(d) = self.peek() {
+                    if d.is_ascii_digit() {
+                        s.push(d);
+                        self.advance();
+                    } else if d == '_'
+                        && Self::ends_with_digit(&s)
+                        && self.underscore_between_digits()
+                    {
+                        self.advance(); // numeric separator inside the exponent (`1e1_0`)
+                    } else {
+                        break;
+                    }
+                }
+                break; // the exponent terminates the numeric literal
             } else {
                 break;
             }
@@ -185,6 +293,68 @@ impl<'a> Lexer<'a> {
         s
     }
+    /// True iff the literal accumulated so far ends in a decimal digit — used to reject a
+    /// `_` separator that isn't preceded by a digit (e.g. leading `_5` or post-`.` `1._5`).
+    fn ends_with_digit(s: &str) -> bool {
+        s.chars().last().is_some_and(|c| c.is_ascii_digit())
+    }
+    /// With `peek()` positioned at a `_`, look ahead (without consuming) to confirm the
+    /// next character is a decimal digit, i.e. the `_` sits between two digits and is a
+    /// valid JS numeric separator (rejects trailing `5_` and doubled `1__0`).
+    fn underscore_between_digits(&self) -> bool {
+        let mut la = self.chars.clone();
+        la.next(); // skip the `_` currently under peek()
+        la.next().is_some_and(|c| c.is_ascii_digit())
+    }
+    /// With the current peek positioned at an `e`/`E`, decide (without consuming)
+    /// whether a valid exponent `[+-]?\d` follows. `Chars` is `Clone`, so we look
+    /// ahead on a throwaway clone of the iterator.
+    fn exponent_follows(&self) -> bool {
+        let mut la = self.chars.clone();
+        la.next(); // skip the e/E currently under peek()
+        match la.next() {
+            Some(d) if d.is_ascii_digit() => true,
+            Some('+') | Some('-') => la.next().is_some_and(|d| d.is_ascii_digit()),
+            _ => false,
+        }
+    }
+    /// With a leading `0` already consumed and `peek()` at the radix marker, return the
+    /// radix (16 / 8 / 2) iff this is a valid `0x` / `0o` / `0b` prefix followed by at
+    /// least one valid digit. Returns `None` otherwise, so `0`, `0.5`, `0e3`, `0xZ`, and
+    /// `0x_1` all stay on the decimal path. Looks ahead on a clone of the `Chars` iterator
+    /// (`Chars: Clone`) without consuming.
+    fn radix_prefix(&self) -> Option<u32> {
+        let mut la = self.chars.clone();
+        let radix = match la.next()? {
+            'x' | 'X' => 16,
+            'o' | 'O' => 8,
+            'b' | 'B' => 2,
+            _ => return None,
+        };
+        match la.next() {
+            Some(c) if c.is_digit(radix) => Some(radix),
+            _ => None,
+        }
+    }
+    /// Convert the (separator-free) digits of a radix-prefixed literal to the decimal
+    /// string the `Number` token carries. `u128` is exact for ≤128-bit literals — far
+    /// beyond any real input; the `f64` fallback only triggers for absurdly long ones and
+    /// loses precision past 2^53, exactly as JS's conversion to a double would.
+    fn radix_digits_to_decimal(digits: &str, radix: u32) -> String {
+        if let Ok(v) = u128::from_str_radix(digits, radix) {
+            return v.to_string();
+        }
+        let mut v = 0.0_f64;
+        for c in digits.chars() {
+            v = v * radix as f64 + c.to_digit(radix).unwrap_or(0) as f64;
+        }
+        format!("{v}")
+    }
     /// Handle escape sequence, returning the unescaped character.
     /// `extra_allowed` contains additional characters that can be escaped in this context.
     fn handle_escape(&mut self, extra_allowed: &[char]) -> Result<char, String> {
@@ -193,12 +363,71 @@ impl<'a> Lexer<'a> {
             'n' => Ok('\n'),
             'r' => Ok('\r'),
             't' => Ok('\t'),
+            'b' => Ok('\u{0008}'),
+            'f' => Ok('\u{000C}'),
+            'v' => Ok('\u{000B}'),
+            '0' => Ok('\0'),
             '\\' => Ok('\\'),
+            // `\xNN` — exactly two hex digits → code point 0x00..=0xFF (JS/TS).
+            'x' => {
+                let cp = self.read_hex_digits(2)?;
+                char::from_u32(cp).ok_or_else(|| format!("Invalid \\x escape: \\x{:02X}", cp))
+            }
+            // `\uNNNN` (exactly four hex digits) or `\u{N..}` (1-6 hex digits, ES6).
+            'u' => {
+                let cp = if self.peek() == Some('{') {
+                    self.advance(); // consume '{'
+                    let cp = self.read_hex_until_brace()?;
+                    match self.advance() {
+                        Some('}') => cp,
+                        _ => return Err("Unterminated \\u{...} escape (expected '}')".to_string()),
+                    }
+                } else {
+                    self.read_hex_digits(4)?
+                };
+                // Lone surrogates (0xD800..=0xDFFF) are valid UTF-16 code units in JS but
+                // not Unicode scalar values; tish strings are UTF-8, so reject them.
+                char::from_u32(cp)
+                    .ok_or_else(|| format!("Invalid \\u escape: code point U+{:04X}", cp))
+            }
             c if extra_allowed.contains(&c) => Ok(c),
             _ => Err(format!("Unknown escape: \\{}", escaped)),
         }
     }
+    /// Read exactly `n` hex digits and return the parsed code point.
+    fn read_hex_digits(&mut self, n: usize) -> Result<u32, String> {
+        let mut value: u32 = 0;
+        for _ in 0..n {
+            let c = self.advance().ok_or("Unterminated hex escape")?;
+            let digit = c
+                .to_digit(16)
+                .ok_or_else(|| format!("Invalid hex digit in escape: '{}'", c))?;
+            value = value * 16 + digit;
+        }
+        Ok(value)
+    }
+    /// Read 1-6 hex digits for a `\u{...}` escape (stops at `}`); validates the count
+    /// and that the value is within the Unicode range.
+    fn read_hex_until_brace(&mut self) -> Result<u32, String> {
+        let mut value: u32 = 0;
+        let mut count = 0;
+        while let Some(c) = self.peek() {
+            let Some(digit) = c.to_digit(16) else { break };
+            self.advance();
+            value = value * 16 + digit;
+            count += 1;
+            if count > 6 || value > 0x10_FFFF {
+                return Err("Invalid \\u{...} escape: code point out of range".to_string());
+            }
+        }
+        if count == 0 {
+            return Err("Empty \\u{} escape (expected hex digits)".to_string());
+        }
+        Ok(value)
+    }
     fn read_string(&mut self, quote: char) -> Result<String, String> {
         let mut s = String::with_capacity(32);
         let extra = if quote == '"' {
@@ -307,6 +536,14 @@ impl<'a> Lexer<'a> {
     }
     pub fn next_token(&mut self) -> Result<Option<Token>, String> {
+        let tok = self.next_token_inner()?;
+        if let Some(t) = &tok {
+            self.last_significant_kind = Some(t.kind);
+        }
+        Ok(tok)
+    }
+    fn next_token_inner(&mut self) -> Result<Option<Token>, String> {
         if let Some(tok) = self.pending_dedents.pop_front() {
             return Ok(Some(tok));
         }
@@ -323,8 +560,13 @@ impl<'a> Lexer<'a> {
         if self.at_line_start {
             self.at_line_start = false;
+            // Always consume the leading whitespace; only *emit* Indent/Dedent when indentation
+            // is significant. With `ignore_indent`, the level is discarded so the indent stack
+            // stays at `[0]` and no virtual tokens are produced (brace-only blocks).
             let level = self.read_indent_level();
-            if level > 0 || self.peek().map(|c| c != '\n').unwrap_or(false) {
+            if !self.ignore_indent
+                && (level > 0 || self.peek().map(|c| c != '\n').unwrap_or(false))
+            {
                 if let Some(tok) = self.emit_indent_or_dedent(level) {
                     return Ok(Some(tok));
                 }
@@ -458,12 +700,15 @@ impl<'a> Lexer<'a> {
                 } else if self.peek() == Some('/') {
                     self.jsx_in_closing_tag = true;
                     TokenKind::Lt
-                } else if self.peek() == Some('>')
+                } else if (self.peek() == Some('>')
                     || self
                         .peek()
                         .map(|c| c.is_ascii_alphabetic() || c == '_')
-                        .unwrap_or(false)
+                        .unwrap_or(false))
+                    && !self.last_is_value()
                 {
+                    // JSX open tag — only in expression position. After a value (`ident<`, `)<`,
+                    // `]<`, literal) this is `Lt`: a comparison or generic-args opener.
                     self.jsx_depth += 1;
                     self.jsx_stack.push(JsxEl {
                         in_opener: true,
@@ -481,16 +726,34 @@ impl<'a> Lexer<'a> {
                     TokenKind::Ge
                 } else if self.peek() == Some('>') {
                     self.advance();
-                    TokenKind::Shr
+                    if self.peek() == Some('>') {
+                        self.advance();
+                        TokenKind::UShr // `>>>`
+                    } else {
+                        TokenKind::Shr
+                    }
                 } else {
-                    if self.jsx_in_closing_tag {
-                        self.jsx_depth = (self.jsx_depth - 1).max(0);
-                        self.jsx_stack.pop();
-                        self.jsx_sync_in_opening_tag();
-                    } else if self.jsx_in_opening_tag && self.jsx_saw_slash_before_gt {
+                    if self.jsx_in_closing_tag
+                        || (self.jsx_in_opening_tag && self.jsx_saw_slash_before_gt)
+                    {
                         self.jsx_depth = (self.jsx_depth - 1).max(0);
                         self.jsx_stack.pop();
                         self.jsx_sync_in_opening_tag();
+                        // A child element just closed (`</span>` or `<br/>`). If a parent element
+                        // is still open and past its opening tag, we're back in that parent's
+                        // children region, so the following run is JSX text — re-enter text mode.
+                        // Without this, trailing text after a child element ("… as JSON") is lexed
+                        // as code and a bare keyword (`as`, `in`, `if`, …) breaks the parse (#108).
+                        //
+                        // Guard on `jsx_child_brace_depth == 0`: if the closed element lived inside a
+                        // `{…}` expression container (e.g. `<div>{items.map(x => <span/>)}</div>`),
+                        // we're still in that expression, not the parent's text children — entering
+                        // text mode there would swallow the following `)`/`,` as JsxText.
+                        if self.jsx_child_brace_depth == 0
+                            && self.jsx_stack.last().map(|e| !e.in_opener).unwrap_or(false)
+                        {
+                            self.jsx_after_gt = true;
+                        }
                     } else if let Some(top) = self.jsx_stack.last_mut() {
                         if top.in_opener && top.attr_value_braces > 0 {
                             // `>` is a comparison (or shift) token inside `{ ... }`, not end of opening tag.
@@ -698,6 +961,80 @@ mod tests {
         assert_eq!(string_tok.literal.as_deref(), Some("H"));
     }
+    #[test]
+    fn radix_integer_literals() {
+        // Hex / octal / binary prefixes (any case) convert to a decimal `Number` literal,
+        // honoring `_` digit separators.
+        let cases = [
+            ("0xff", "255"),
+            ("0xFF", "255"),
+            ("0X1a", "26"),
+            ("0o17", "15"),
+            ("0O7", "7"),
+            ("0b1010", "10"),
+            ("0B0", "0"),
+            ("0xdeadbeef", "3735928559"),
+            ("0xFF_FF", "65535"),
+            ("0b1111_0000", "240"),
+        ];
+        for (src, expected) in cases {
+            let tokens = Lexer::new(src).collect::<Result<Vec<_>, _>>().unwrap();
+            let num = tokens
+                .iter()
+                .find(|t| t.kind == TokenKind::Number)
+                .unwrap_or_else(|| panic!("no Number token for {src}"));
+            assert_eq!(num.literal.as_deref(), Some(expected), "for {src}");
+        }
+    }
+    #[test]
+    fn decimal_numeric_separators() {
+        // `_` between digits is a JS numeric separator: dropped from the literal value.
+        // Issue #57.
+        let only_number = |src: &str| -> String {
+            let tokens = Lexer::new(src).collect::<Result<Vec<_>, _>>().unwrap();
+            let nums: Vec<_> = tokens
+                .iter()
+                .filter(|t| t.kind == TokenKind::Number)
+                .collect();
+            assert_eq!(nums.len(), 1, "expected exactly one Number token for {src}");
+            // No stray identifier should be produced from the separated digits.
+            assert!(
+                !tokens.iter().any(|t| t.kind == TokenKind::Ident),
+                "unexpected Ident token while lexing {src}"
+            );
+            nums[0].literal.as_deref().unwrap().to_string()
+        };
+        assert_eq!(only_number("15_000"), "15000");
+        assert_eq!(only_number("1_000_000"), "1000000");
+        assert_eq!(only_number("3.14_159"), "3.14159");
+        assert_eq!(only_number("1e1_0"), "1e10");
+    }
+    #[test]
+    fn non_radix_zero_prefixed_stays_decimal() {
+        // A leading zero is NOT legacy octal; an invalid prefix is not a radix literal.
+        let num_literal = |src: &str| -> String {
+            Lexer::new(src)
+                .collect::<Result<Vec<_>, _>>()
+                .unwrap()
+                .into_iter()
+                .find(|t| t.kind == TokenKind::Number)
+                .unwrap()
+                .literal
+                .as_deref()
+                .unwrap()
+                .to_string()
+        };
+        assert_eq!(num_literal("07"), "07"); // decimal, not octal
+        assert_eq!(num_literal("0"), "0");
+        // `0xZ` → the Number token is just `0`, then `xZ` lexes as an identifier.
+        let toks = Lexer::new("0xZ").collect::<Result<Vec<_>, _>>().unwrap();
+        assert_eq!(toks[0].kind, TokenKind::Number);
+        assert_eq!(toks[0].literal.as_deref(), Some("0"));
+        assert_eq!(toks[1].kind, TokenKind::Ident);
+    }
     #[test]
     fn line_comment_does_not_emit_spurious_indent_before_next_line() {
         let with_comment = "fn f() {\n  return {\n    a: 1, // c\n    b: 2\n  }\n}\n";
@@ -713,4 +1050,55 @@ mod tests {
                 .collect::<Vec<_>>()
         );
     }
+    /// A leading-indented line is what actually drives the lexer to emit virtual tokens:
+    /// `  a()` opens an indent level (Indent) and the dedented `b()` closes it (Dedent).
+    const INDENTED_SRC: &str = "  a()\nb()\n";
+    #[test]
+    fn default_options_still_emit_indent_and_dedent() {
+        let tokens: Vec<_> = Lexer::with_options(INDENTED_SRC, LexerOptions::default())
+            .collect::<Result<Vec<_>, _>>()
+            .unwrap();
+        assert!(
+            tokens.iter().any(|t| t.kind == TokenKind::Indent),
+            "expected an Indent token in the default (indentation-significant) mode"
+        );
+        assert!(
+            tokens.iter().any(|t| t.kind == TokenKind::Dedent),
+            "expected a Dedent token in the default (indentation-significant) mode"
+        );
+    }
+    #[test]
+    fn ignore_indent_emits_no_virtual_tokens() {
+        let tokens: Vec<_> =
+            Lexer::with_options(INDENTED_SRC, LexerOptions { ignore_indent: true })
+                .collect::<Result<Vec<_>, _>>()
+                .unwrap();
+        assert!(
+            !tokens
+                .iter()
+                .any(|t| matches!(t.kind, TokenKind::Indent | TokenKind::Dedent)),
+            "expected no Indent/Dedent with ignore_indent, got: {:?}",
+            tokens.iter().map(|t| t.kind).collect::<Vec<_>>()
+        );
+    }
+    #[test]
+    fn env_truthy_enables_only_on_recognized_values() {
+        use std::ffi::OsString;
+        let v = |s: &str| env_truthy(Some(OsString::from(s)));
+        // Recognized truthy values turn the flag on.
+        assert!(v("1"));
+        assert!(v("true"));
+        assert!(v("yes"));
+        // Everything else leaves it off, including unset, empty, and near-misses.
+        assert!(!env_truthy(None));
+        assert!(!v(""));
+        assert!(!v("0"));
+        assert!(!v("false"));
+        assert!(!v("no"));
+        assert!(!v("TRUE")); // exact match only — case-sensitive by design
+    }
 }

package/crates/tish_lexer/src/token.rs CHANGED Viewed

@@ -50,6 +50,7 @@ pub enum TokenKind {
     Do,
     TypeOf,
     Void,
+    Delete,
     Of,
     In,
     Async,
@@ -59,6 +60,8 @@ pub enum TokenKind {
     Export,
     Type,
     Declare,
+    Interface,
+    As,
     // Punctuation
     LParen,
@@ -108,6 +111,7 @@ pub enum TokenKind {
     BitNot,
     Shl,
     Shr,
+    UShr,
     OptionalChain,
     NullishCoalesce,
     Question,
@@ -148,6 +152,7 @@ impl TokenKind {
             "do" => TokenKind::Do,
             "typeof" => TokenKind::TypeOf,
             "void" => TokenKind::Void,
+            "delete" => TokenKind::Delete,
             "of" => TokenKind::Of,
             "in" => TokenKind::In,
             "async" => TokenKind::Async,
@@ -157,6 +162,8 @@ impl TokenKind {
             "export" => TokenKind::Export,
             "type" => TokenKind::Type,
             "declare" => TokenKind::Declare,
+            "interface" => TokenKind::Interface,
+            "as" => TokenKind::As,
             _ => TokenKind::Ident,
         }
     }