@tishlang/tish-format 1.0.13 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +2 -0
- package/bin/tish-format +0 -0
- package/crates/js_to_tish/src/transform/expr.rs +1 -0
- package/crates/tish/Cargo.toml +10 -2
- package/crates/tish/build.rs +21 -0
- package/crates/tish/src/cli_help.rs +15 -4
- package/crates/tish/src/main.rs +93 -21
- package/crates/tish/src/repl_completion.rs +0 -1
- package/crates/tish/tests/error_source_location.rs +36 -0
- package/crates/tish/tests/fixtures/runtime_error_location.tish +5 -0
- package/crates/tish/tests/fixtures/trycatch_runtime_errors.tish +15 -0
- package/crates/tish/tests/fixtures/tty_capability.tish +9 -0
- package/crates/tish/tests/integration_test.rs +402 -91
- package/crates/tish/tests/trycatch_runtime_errors.rs +45 -0
- package/crates/tish/tests/tty_capability.rs +43 -0
- package/crates/tish_ast/src/ast.rs +37 -8
- package/crates/tish_builtins/Cargo.toml +2 -0
- package/crates/tish_builtins/src/array.rs +375 -13
- package/crates/tish_builtins/src/collections.rs +481 -0
- package/crates/tish_builtins/src/construct.rs +59 -19
- package/crates/tish_builtins/src/date.rs +538 -0
- package/crates/tish_builtins/src/globals.rs +86 -6
- package/crates/tish_builtins/src/iterator.rs +129 -0
- package/crates/tish_builtins/src/lib.rs +5 -0
- package/crates/tish_builtins/src/number.rs +96 -0
- package/crates/tish_builtins/src/object.rs +2 -2
- package/crates/tish_builtins/src/string.rs +19 -20
- package/crates/tish_builtins/src/symbol.rs +1 -1
- package/crates/tish_builtins/src/typedarrays.rs +298 -0
- package/crates/tish_bytecode/src/chunk.rs +69 -1
- package/crates/tish_bytecode/src/compiler.rs +933 -89
- package/crates/tish_bytecode/src/encoding.rs +2 -0
- package/crates/tish_bytecode/src/lib.rs +2 -1
- package/crates/tish_bytecode/src/opcode.rs +47 -4
- package/crates/tish_bytecode/src/serialize.rs +31 -1
- package/crates/tish_compile/Cargo.toml +1 -0
- package/crates/tish_compile/src/check.rs +774 -0
- package/crates/tish_compile/src/codegen.rs +2334 -349
- package/crates/tish_compile/src/infer.rs +1395 -6
- package/crates/tish_compile/src/lib.rs +50 -8
- package/crates/tish_compile/src/resolve.rs +584 -21
- package/crates/tish_compile/src/types.rs +106 -2
- package/crates/tish_compile_js/src/codegen.rs +67 -0
- package/crates/tish_compile_js/src/tests_jsx.rs +64 -0
- package/crates/tish_core/Cargo.toml +7 -1
- package/crates/tish_core/src/console_style.rs +11 -1
- package/crates/tish_core/src/json.rs +81 -38
- package/crates/tish_core/src/lib.rs +3 -0
- package/crates/tish_core/src/shape.rs +85 -0
- package/crates/tish_core/src/value.rs +679 -25
- package/crates/tish_core/src/vmref.rs +13 -8
- package/crates/tish_cranelift/src/link.rs +17 -4
- package/crates/tish_cranelift_runtime/Cargo.toml +1 -0
- package/crates/tish_eval/Cargo.toml +6 -0
- package/crates/tish_eval/src/eval.rs +665 -117
- package/crates/tish_eval/src/http.rs +4 -1
- package/crates/tish_eval/src/natives.rs +165 -13
- package/crates/tish_eval/src/value.rs +31 -13
- package/crates/tish_eval/src/value_convert.rs +10 -4
- package/crates/tish_ffi/Cargo.toml +26 -0
- package/crates/tish_ffi/src/lib.rs +518 -0
- package/crates/tish_ffi/tests/fixtures/testmod/Cargo.toml +18 -0
- package/crates/tish_ffi/tests/fixtures/testmod/src/lib.rs +46 -0
- package/crates/tish_ffi/tests/loader.rs +65 -0
- package/crates/tish_fmt/Cargo.toml +1 -1
- package/crates/tish_fmt/src/lib.rs +61 -5
- package/crates/tish_lexer/src/lib.rs +397 -9
- package/crates/tish_lexer/src/token.rs +7 -0
- package/crates/tish_lint/src/lib.rs +2 -10
- package/crates/tish_lsp/src/import_goto.rs +2 -0
- package/crates/tish_lsp/src/main.rs +439 -26
- package/crates/tish_native/src/build.rs +55 -1
- package/crates/tish_opt/src/lib.rs +126 -23
- package/crates/tish_parser/src/lib.rs +55 -1
- package/crates/tish_parser/src/parser.rs +456 -34
- package/crates/tish_pg/src/lib.rs +3 -3
- package/crates/tish_resolve/src/lib.rs +99 -59
- package/crates/tish_runtime/Cargo.toml +4 -0
- package/crates/tish_runtime/src/http.rs +66 -17
- package/crates/tish_runtime/src/http_fetch.rs +29 -8
- package/crates/tish_runtime/src/http_hyper.rs +25 -2
- package/crates/tish_runtime/src/lib.rs +299 -44
- package/crates/tish_runtime/src/promise.rs +328 -18
- package/crates/tish_runtime/src/timers.rs +13 -7
- package/crates/tish_runtime/src/tty.rs +226 -0
- package/crates/tish_runtime/src/ws.rs +35 -18
- package/crates/tish_runtime/tests/fetch_readable_stream.rs +2 -2
- package/crates/tish_ui/src/jsx.rs +10 -0
- package/crates/tish_ui/src/runtime/hooks.rs +19 -15
- package/crates/tish_ui/src/runtime/mod.rs +15 -12
- package/crates/tish_vm/Cargo.toml +14 -1
- package/crates/tish_vm/src/jit.rs +1050 -0
- package/crates/tish_vm/src/lib.rs +2 -0
- package/crates/tish_vm/src/vm.rs +1546 -202
- package/crates/tish_vm/tests/concurrent_shared_state.rs +140 -0
- package/crates/tish_wasm/src/lib.rs +6 -2
- package/crates/tish_wasm_runtime/src/gpu.rs +17 -1
- package/crates/tishlang_cargo_bindgen/src/classify.rs +1 -3
- package/crates/tishlang_cargo_bindgen/src/lib.rs +2 -2
- package/crates/tishlang_cargo_bindgen/src/metadata.rs +1 -1
- package/justfile +8 -0
- package/package.json +2 -2
- package/platform/darwin-arm64/tish-fmt +0 -0
- package/platform/darwin-x64/tish-fmt +0 -0
- package/platform/linux-arm64/tish-fmt +0 -0
- package/platform/linux-x64/tish-fmt +0 -0
- package/platform/win32-x64/tish-fmt.exe +0 -0
- package/README.md +0 -138
|
@@ -508,6 +508,16 @@ impl Printer {
|
|
|
508
508
|
self.depth = level;
|
|
509
509
|
match s {
|
|
510
510
|
Statement::Block { statements, span } => self.block(statements, *span, level, true),
|
|
511
|
+
// Comma-declarators: render each as its own statement line. The caller
|
|
512
|
+
// (print_seq) emits the trailing newline, so only separate internally.
|
|
513
|
+
Statement::Multi { statements, .. } => {
|
|
514
|
+
for (i, st) in statements.iter().enumerate() {
|
|
515
|
+
if i > 0 {
|
|
516
|
+
self.buf.push('\n');
|
|
517
|
+
}
|
|
518
|
+
self.stmt(st, level);
|
|
519
|
+
}
|
|
520
|
+
}
|
|
511
521
|
Statement::VarDecl {
|
|
512
522
|
name,
|
|
513
523
|
mutable,
|
|
@@ -1072,6 +1082,33 @@ impl Printer {
|
|
|
1072
1082
|
self.type_ann(x);
|
|
1073
1083
|
}
|
|
1074
1084
|
}
|
|
1085
|
+
TypeAnnotation::Tuple(elems) => {
|
|
1086
|
+
self.buf.push('[');
|
|
1087
|
+
for (i, x) in elems.iter().enumerate() {
|
|
1088
|
+
if i > 0 {
|
|
1089
|
+
self.buf.push_str(", ");
|
|
1090
|
+
}
|
|
1091
|
+
self.type_ann(x);
|
|
1092
|
+
}
|
|
1093
|
+
self.buf.push(']');
|
|
1094
|
+
}
|
|
1095
|
+
TypeAnnotation::Literal(lit) => match lit {
|
|
1096
|
+
tishlang_ast::TypeLiteral::Str(s) => {
|
|
1097
|
+
self.buf.push('"');
|
|
1098
|
+
self.buf.push_str(s.as_ref());
|
|
1099
|
+
self.buf.push('"');
|
|
1100
|
+
}
|
|
1101
|
+
tishlang_ast::TypeLiteral::Num(n) => self.buf.push_str(&n.to_string()),
|
|
1102
|
+
tishlang_ast::TypeLiteral::Bool(b) => self.buf.push_str(&b.to_string()),
|
|
1103
|
+
},
|
|
1104
|
+
TypeAnnotation::Intersection(parts) => {
|
|
1105
|
+
for (i, x) in parts.iter().enumerate() {
|
|
1106
|
+
if i > 0 {
|
|
1107
|
+
self.buf.push_str(" & ");
|
|
1108
|
+
}
|
|
1109
|
+
self.type_ann(x);
|
|
1110
|
+
}
|
|
1111
|
+
}
|
|
1075
1112
|
}
|
|
1076
1113
|
}
|
|
1077
1114
|
|
|
@@ -1118,10 +1155,10 @@ impl Printer {
|
|
|
1118
1155
|
}
|
|
1119
1156
|
Expr::Unary { op, operand, .. } => {
|
|
1120
1157
|
match op {
|
|
1121
|
-
UnaryOp::Not => self.buf.
|
|
1122
|
-
UnaryOp::Neg => self.buf.
|
|
1123
|
-
UnaryOp::Pos => self.buf.
|
|
1124
|
-
UnaryOp::BitNot => self.buf.
|
|
1158
|
+
UnaryOp::Not => self.buf.push('!'),
|
|
1159
|
+
UnaryOp::Neg => self.buf.push('-'),
|
|
1160
|
+
UnaryOp::Pos => self.buf.push('+'),
|
|
1161
|
+
UnaryOp::BitNot => self.buf.push('~'),
|
|
1125
1162
|
UnaryOp::Void => self.buf.push_str("void "),
|
|
1126
1163
|
}
|
|
1127
1164
|
self.child(operand, PREC_POSTFIX);
|
|
@@ -1202,6 +1239,10 @@ impl Printer {
|
|
|
1202
1239
|
self.buf.push_str("typeof ");
|
|
1203
1240
|
self.child(operand, PREC_POSTFIX);
|
|
1204
1241
|
}
|
|
1242
|
+
Expr::Delete { target, .. } => {
|
|
1243
|
+
self.buf.push_str("delete ");
|
|
1244
|
+
self.child(target, PREC_POSTFIX);
|
|
1245
|
+
}
|
|
1205
1246
|
Expr::PostfixInc { name, .. } => {
|
|
1206
1247
|
self.buf.push_str(name.as_ref());
|
|
1207
1248
|
self.buf.push_str("++");
|
|
@@ -1460,7 +1501,7 @@ fn binop_prec(op: BinOp) -> u8 {
|
|
|
1460
1501
|
BinOp::BitOr => 5,
|
|
1461
1502
|
BinOp::BitXor => 6,
|
|
1462
1503
|
BinOp::BitAnd => 7,
|
|
1463
|
-
BinOp::Shl | BinOp::Shr => 8,
|
|
1504
|
+
BinOp::Shl | BinOp::Shr | BinOp::UShr => 8,
|
|
1464
1505
|
BinOp::Eq | BinOp::Ne | BinOp::StrictEq | BinOp::StrictNe => 9,
|
|
1465
1506
|
BinOp::Lt | BinOp::Le | BinOp::Gt | BinOp::Ge | BinOp::In => 10,
|
|
1466
1507
|
BinOp::Add | BinOp::Sub => 11,
|
|
@@ -1482,6 +1523,7 @@ fn expr_prec(e: &Expr) -> u8 {
|
|
|
1482
1523
|
Expr::Binary { op, .. } => binop_prec(*op),
|
|
1483
1524
|
Expr::Unary { .. }
|
|
1484
1525
|
| Expr::TypeOf { .. }
|
|
1526
|
+
| Expr::Delete { .. }
|
|
1485
1527
|
| Expr::Await { .. }
|
|
1486
1528
|
| Expr::PrefixInc { .. }
|
|
1487
1529
|
| Expr::PrefixDec { .. } => 14,
|
|
@@ -1519,6 +1561,7 @@ fn binop(op: BinOp) -> &'static str {
|
|
|
1519
1561
|
BinOp::BitXor => "^",
|
|
1520
1562
|
BinOp::Shl => "<<",
|
|
1521
1563
|
BinOp::Shr => ">>",
|
|
1564
|
+
BinOp::UShr => ">>>",
|
|
1522
1565
|
BinOp::In => "in",
|
|
1523
1566
|
}
|
|
1524
1567
|
}
|
|
@@ -2098,4 +2141,17 @@ let x = add(1, 2)
|
|
|
2098
2141
|
let out = format_source(src).unwrap();
|
|
2099
2142
|
assert_eq!(out, src, "{out:?}");
|
|
2100
2143
|
}
|
|
2144
|
+
|
|
2145
|
+
#[test]
|
|
2146
|
+
fn formats_delete_expression() {
|
|
2147
|
+
// Regression: Expr::Delete (the `delete` operator) must be handled by the formatter —
|
|
2148
|
+
// a non-exhaustive `match` here broke the `tish-format` build once the delete feature landed.
|
|
2149
|
+
let src = "fn f(o, k) {\ndelete o.a\ndelete o[\"b\"]\nlet x = delete o[k]\nreturn x\n}\n";
|
|
2150
|
+
let out = format_source(src).unwrap();
|
|
2151
|
+
assert!(out.contains("delete o.a"), "{out}");
|
|
2152
|
+
assert!(out.contains("delete o[\"b\"]"), "{out}");
|
|
2153
|
+
assert!(out.contains("delete o[k]"), "{out}");
|
|
2154
|
+
tishlang_parser::parse(&out).unwrap();
|
|
2155
|
+
assert_eq!(format_source(&out).unwrap(), out, "not idempotent:\n{out}");
|
|
2156
|
+
}
|
|
2101
2157
|
}
|
|
@@ -24,6 +24,37 @@ struct JsxEl {
|
|
|
24
24
|
attr_value_braces: i32,
|
|
25
25
|
}
|
|
26
26
|
|
|
27
|
+
/// Lexer configuration.
|
|
28
|
+
#[derive(Debug, Clone, Copy, Default)]
|
|
29
|
+
pub struct LexerOptions {
|
|
30
|
+
/// When true, suppress the virtual `Indent`/`Dedent` tokens so blocks are delimited
|
|
31
|
+
/// **only** by braces. Indentation is treated as ordinary whitespace, so off-side
|
|
32
|
+
/// (brace-less) blocks no longer form. Useful for debugging how nested blocks
|
|
33
|
+
/// transpile — see the `TISH_IGNORE_INDENT` environment variable for a global toggle.
|
|
34
|
+
pub ignore_indent: bool,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
impl LexerOptions {
|
|
38
|
+
/// Build options from the environment. `TISH_IGNORE_INDENT=1` (or `true`/`yes`) sets
|
|
39
|
+
/// `ignore_indent`, so every parse path (run/build/dump-ast/fmt/lint/lsp) honors it
|
|
40
|
+
/// without threading a flag through the whole pipeline.
|
|
41
|
+
pub fn from_env() -> Self {
|
|
42
|
+
Self {
|
|
43
|
+
ignore_indent: env_truthy(std::env::var_os("TISH_IGNORE_INDENT")),
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/// Interpret an environment-variable value as a boolean flag: `1`, `true`, or `yes`
|
|
49
|
+
/// (exact, case-sensitive) enable it; anything else — including unset — leaves it off.
|
|
50
|
+
/// Split out from the `std::env` read so the rule is unit-testable without mutating
|
|
51
|
+
/// process-global state (which `Lexer::new` reads, so env-mutating tests would race).
|
|
52
|
+
fn env_truthy(value: Option<std::ffi::OsString>) -> bool {
|
|
53
|
+
value
|
|
54
|
+
.map(|v| v == "1" || v == "true" || v == "yes")
|
|
55
|
+
.unwrap_or(false)
|
|
56
|
+
}
|
|
57
|
+
|
|
27
58
|
#[derive(Debug, Clone)]
|
|
28
59
|
pub struct Lexer<'a> {
|
|
29
60
|
chars: Peekable<Chars<'a>>,
|
|
@@ -41,10 +72,20 @@ pub struct Lexer<'a> {
|
|
|
41
72
|
jsx_depth: i32,
|
|
42
73
|
jsx_child_brace_depth: i32,
|
|
43
74
|
jsx_in_closing_tag: bool,
|
|
75
|
+
ignore_indent: bool,
|
|
76
|
+
/// Kind of the last emitted significant token, for `<` disambiguation: after a *value* position
|
|
77
|
+
/// (ident, `)`, `]`, literal) a `<` is a comparison / generic-args opener (`Lt`), never a JSX tag.
|
|
78
|
+
last_significant_kind: Option<TokenKind>,
|
|
44
79
|
}
|
|
45
80
|
|
|
46
81
|
impl<'a> Lexer<'a> {
|
|
82
|
+
/// Create a lexer, reading options from the environment (e.g. `TISH_IGNORE_INDENT`).
|
|
47
83
|
pub fn new(source: &'a str) -> Self {
|
|
84
|
+
Self::with_options(source, LexerOptions::from_env())
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/// Create a lexer with explicit options, bypassing the environment.
|
|
88
|
+
pub fn with_options(source: &'a str, options: LexerOptions) -> Self {
|
|
48
89
|
Self {
|
|
49
90
|
chars: source.chars().peekable(),
|
|
50
91
|
pos: 0,
|
|
@@ -61,9 +102,29 @@ impl<'a> Lexer<'a> {
|
|
|
61
102
|
jsx_depth: 0,
|
|
62
103
|
jsx_child_brace_depth: 0,
|
|
63
104
|
jsx_in_closing_tag: false,
|
|
105
|
+
ignore_indent: options.ignore_indent,
|
|
106
|
+
last_significant_kind: None,
|
|
64
107
|
}
|
|
65
108
|
}
|
|
66
109
|
|
|
110
|
+
/// True when the previous significant token ends a value, so a following `<` is `Lt`
|
|
111
|
+
/// (comparison / generic args), not the start of a JSX element.
|
|
112
|
+
fn last_is_value(&self) -> bool {
|
|
113
|
+
matches!(
|
|
114
|
+
self.last_significant_kind,
|
|
115
|
+
Some(
|
|
116
|
+
TokenKind::Ident
|
|
117
|
+
| TokenKind::RParen
|
|
118
|
+
| TokenKind::RBracket
|
|
119
|
+
| TokenKind::Number
|
|
120
|
+
| TokenKind::String
|
|
121
|
+
| TokenKind::True
|
|
122
|
+
| TokenKind::False
|
|
123
|
+
| TokenKind::Null
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
}
|
|
127
|
+
|
|
67
128
|
#[inline]
|
|
68
129
|
fn jsx_sync_in_opening_tag(&mut self) {
|
|
69
130
|
self.jsx_in_opening_tag = self.jsx_stack.last().map(|e| e.in_opener).unwrap_or(false);
|
|
@@ -172,12 +233,59 @@ impl<'a> Lexer<'a> {
|
|
|
172
233
|
}
|
|
173
234
|
|
|
174
235
|
fn read_number(&mut self, first: char) -> String {
|
|
236
|
+
// Radix-prefixed integer literals: `0x`/`0X` (hex), `0o`/`0O` (octal), `0b`/`0B`
|
|
237
|
+
// (binary), with optional `_` digit separators. JS semantics — a non-negative
|
|
238
|
+
// integer. Convert to a decimal string here so every downstream consumer (the
|
|
239
|
+
// parser's `parse::<f64>()`, the formatter, …) sees a plain number, unchanged.
|
|
240
|
+
if first == '0' {
|
|
241
|
+
if let Some(radix) = self.radix_prefix() {
|
|
242
|
+
self.advance(); // consume the x/o/b marker
|
|
243
|
+
let mut digits = String::with_capacity(16);
|
|
244
|
+
while let Some(c) = self.peek() {
|
|
245
|
+
if c == '_' {
|
|
246
|
+
self.advance(); // digit separator
|
|
247
|
+
} else if c.is_digit(radix) {
|
|
248
|
+
digits.push(c);
|
|
249
|
+
self.advance();
|
|
250
|
+
} else {
|
|
251
|
+
break;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
return Self::radix_digits_to_decimal(&digits, radix);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
175
258
|
let mut s = String::with_capacity(16);
|
|
176
259
|
s.push(first);
|
|
177
260
|
while let Some(c) = self.peek() {
|
|
178
261
|
if c.is_ascii_digit() || c == '.' {
|
|
179
262
|
s.push(c);
|
|
180
263
|
self.advance();
|
|
264
|
+
} else if c == '_' && Self::ends_with_digit(&s) && self.underscore_between_digits() {
|
|
265
|
+
self.advance(); // numeric separator (`15_000`) — drop it, JS-style
|
|
266
|
+
} else if (c == 'e' || c == 'E') && self.exponent_follows() {
|
|
267
|
+
// Scientific notation: `e`/`E` then optional sign then digits.
|
|
268
|
+
// Guarded by lookahead so `3em` lexes as `3` + `em`, not a bad number.
|
|
269
|
+
s.push(c);
|
|
270
|
+
self.advance(); // consume e/E
|
|
271
|
+
if matches!(self.peek(), Some('+') | Some('-')) {
|
|
272
|
+
s.push(self.peek().unwrap());
|
|
273
|
+
self.advance();
|
|
274
|
+
}
|
|
275
|
+
while let Some(d) = self.peek() {
|
|
276
|
+
if d.is_ascii_digit() {
|
|
277
|
+
s.push(d);
|
|
278
|
+
self.advance();
|
|
279
|
+
} else if d == '_'
|
|
280
|
+
&& Self::ends_with_digit(&s)
|
|
281
|
+
&& self.underscore_between_digits()
|
|
282
|
+
{
|
|
283
|
+
self.advance(); // numeric separator inside the exponent (`1e1_0`)
|
|
284
|
+
} else {
|
|
285
|
+
break;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
break; // the exponent terminates the numeric literal
|
|
181
289
|
} else {
|
|
182
290
|
break;
|
|
183
291
|
}
|
|
@@ -185,6 +293,68 @@ impl<'a> Lexer<'a> {
|
|
|
185
293
|
s
|
|
186
294
|
}
|
|
187
295
|
|
|
296
|
+
/// True iff the literal accumulated so far ends in a decimal digit — used to reject a
|
|
297
|
+
/// `_` separator that isn't preceded by a digit (e.g. leading `_5` or post-`.` `1._5`).
|
|
298
|
+
fn ends_with_digit(s: &str) -> bool {
|
|
299
|
+
s.chars().last().is_some_and(|c| c.is_ascii_digit())
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
/// With `peek()` positioned at a `_`, look ahead (without consuming) to confirm the
|
|
303
|
+
/// next character is a decimal digit, i.e. the `_` sits between two digits and is a
|
|
304
|
+
/// valid JS numeric separator (rejects trailing `5_` and doubled `1__0`).
|
|
305
|
+
fn underscore_between_digits(&self) -> bool {
|
|
306
|
+
let mut la = self.chars.clone();
|
|
307
|
+
la.next(); // skip the `_` currently under peek()
|
|
308
|
+
la.next().is_some_and(|c| c.is_ascii_digit())
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/// With the current peek positioned at an `e`/`E`, decide (without consuming)
|
|
312
|
+
/// whether a valid exponent `[+-]?\d` follows. `Chars` is `Clone`, so we look
|
|
313
|
+
/// ahead on a throwaway clone of the iterator.
|
|
314
|
+
fn exponent_follows(&self) -> bool {
|
|
315
|
+
let mut la = self.chars.clone();
|
|
316
|
+
la.next(); // skip the e/E currently under peek()
|
|
317
|
+
match la.next() {
|
|
318
|
+
Some(d) if d.is_ascii_digit() => true,
|
|
319
|
+
Some('+') | Some('-') => la.next().is_some_and(|d| d.is_ascii_digit()),
|
|
320
|
+
_ => false,
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/// With a leading `0` already consumed and `peek()` at the radix marker, return the
|
|
325
|
+
/// radix (16 / 8 / 2) iff this is a valid `0x` / `0o` / `0b` prefix followed by at
|
|
326
|
+
/// least one valid digit. Returns `None` otherwise, so `0`, `0.5`, `0e3`, `0xZ`, and
|
|
327
|
+
/// `0x_1` all stay on the decimal path. Looks ahead on a clone of the `Chars` iterator
|
|
328
|
+
/// (`Chars: Clone`) without consuming.
|
|
329
|
+
fn radix_prefix(&self) -> Option<u32> {
|
|
330
|
+
let mut la = self.chars.clone();
|
|
331
|
+
let radix = match la.next()? {
|
|
332
|
+
'x' | 'X' => 16,
|
|
333
|
+
'o' | 'O' => 8,
|
|
334
|
+
'b' | 'B' => 2,
|
|
335
|
+
_ => return None,
|
|
336
|
+
};
|
|
337
|
+
match la.next() {
|
|
338
|
+
Some(c) if c.is_digit(radix) => Some(radix),
|
|
339
|
+
_ => None,
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
/// Convert the (separator-free) digits of a radix-prefixed literal to the decimal
|
|
344
|
+
/// string the `Number` token carries. `u128` is exact for ≤128-bit literals — far
|
|
345
|
+
/// beyond any real input; the `f64` fallback only triggers for absurdly long ones and
|
|
346
|
+
/// loses precision past 2^53, exactly as JS's conversion to a double would.
|
|
347
|
+
fn radix_digits_to_decimal(digits: &str, radix: u32) -> String {
|
|
348
|
+
if let Ok(v) = u128::from_str_radix(digits, radix) {
|
|
349
|
+
return v.to_string();
|
|
350
|
+
}
|
|
351
|
+
let mut v = 0.0_f64;
|
|
352
|
+
for c in digits.chars() {
|
|
353
|
+
v = v * radix as f64 + c.to_digit(radix).unwrap_or(0) as f64;
|
|
354
|
+
}
|
|
355
|
+
format!("{v}")
|
|
356
|
+
}
|
|
357
|
+
|
|
188
358
|
/// Handle escape sequence, returning the unescaped character.
|
|
189
359
|
/// `extra_allowed` contains additional characters that can be escaped in this context.
|
|
190
360
|
fn handle_escape(&mut self, extra_allowed: &[char]) -> Result<char, String> {
|
|
@@ -193,12 +363,71 @@ impl<'a> Lexer<'a> {
|
|
|
193
363
|
'n' => Ok('\n'),
|
|
194
364
|
'r' => Ok('\r'),
|
|
195
365
|
't' => Ok('\t'),
|
|
366
|
+
'b' => Ok('\u{0008}'),
|
|
367
|
+
'f' => Ok('\u{000C}'),
|
|
368
|
+
'v' => Ok('\u{000B}'),
|
|
369
|
+
'0' => Ok('\0'),
|
|
196
370
|
'\\' => Ok('\\'),
|
|
371
|
+
// `\xNN` — exactly two hex digits → code point 0x00..=0xFF (JS/TS).
|
|
372
|
+
'x' => {
|
|
373
|
+
let cp = self.read_hex_digits(2)?;
|
|
374
|
+
char::from_u32(cp).ok_or_else(|| format!("Invalid \\x escape: \\x{:02X}", cp))
|
|
375
|
+
}
|
|
376
|
+
// `\uNNNN` (exactly four hex digits) or `\u{N..}` (1-6 hex digits, ES6).
|
|
377
|
+
'u' => {
|
|
378
|
+
let cp = if self.peek() == Some('{') {
|
|
379
|
+
self.advance(); // consume '{'
|
|
380
|
+
let cp = self.read_hex_until_brace()?;
|
|
381
|
+
match self.advance() {
|
|
382
|
+
Some('}') => cp,
|
|
383
|
+
_ => return Err("Unterminated \\u{...} escape (expected '}')".to_string()),
|
|
384
|
+
}
|
|
385
|
+
} else {
|
|
386
|
+
self.read_hex_digits(4)?
|
|
387
|
+
};
|
|
388
|
+
// Lone surrogates (0xD800..=0xDFFF) are valid UTF-16 code units in JS but
|
|
389
|
+
// not Unicode scalar values; tish strings are UTF-8, so reject them.
|
|
390
|
+
char::from_u32(cp)
|
|
391
|
+
.ok_or_else(|| format!("Invalid \\u escape: code point U+{:04X}", cp))
|
|
392
|
+
}
|
|
197
393
|
c if extra_allowed.contains(&c) => Ok(c),
|
|
198
394
|
_ => Err(format!("Unknown escape: \\{}", escaped)),
|
|
199
395
|
}
|
|
200
396
|
}
|
|
201
397
|
|
|
398
|
+
/// Read exactly `n` hex digits and return the parsed code point.
|
|
399
|
+
fn read_hex_digits(&mut self, n: usize) -> Result<u32, String> {
|
|
400
|
+
let mut value: u32 = 0;
|
|
401
|
+
for _ in 0..n {
|
|
402
|
+
let c = self.advance().ok_or("Unterminated hex escape")?;
|
|
403
|
+
let digit = c
|
|
404
|
+
.to_digit(16)
|
|
405
|
+
.ok_or_else(|| format!("Invalid hex digit in escape: '{}'", c))?;
|
|
406
|
+
value = value * 16 + digit;
|
|
407
|
+
}
|
|
408
|
+
Ok(value)
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
/// Read 1-6 hex digits for a `\u{...}` escape (stops at `}`); validates the count
|
|
412
|
+
/// and that the value is within the Unicode range.
|
|
413
|
+
fn read_hex_until_brace(&mut self) -> Result<u32, String> {
|
|
414
|
+
let mut value: u32 = 0;
|
|
415
|
+
let mut count = 0;
|
|
416
|
+
while let Some(c) = self.peek() {
|
|
417
|
+
let Some(digit) = c.to_digit(16) else { break };
|
|
418
|
+
self.advance();
|
|
419
|
+
value = value * 16 + digit;
|
|
420
|
+
count += 1;
|
|
421
|
+
if count > 6 || value > 0x10_FFFF {
|
|
422
|
+
return Err("Invalid \\u{...} escape: code point out of range".to_string());
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
if count == 0 {
|
|
426
|
+
return Err("Empty \\u{} escape (expected hex digits)".to_string());
|
|
427
|
+
}
|
|
428
|
+
Ok(value)
|
|
429
|
+
}
|
|
430
|
+
|
|
202
431
|
fn read_string(&mut self, quote: char) -> Result<String, String> {
|
|
203
432
|
let mut s = String::with_capacity(32);
|
|
204
433
|
let extra = if quote == '"' {
|
|
@@ -307,6 +536,14 @@ impl<'a> Lexer<'a> {
|
|
|
307
536
|
}
|
|
308
537
|
|
|
309
538
|
pub fn next_token(&mut self) -> Result<Option<Token>, String> {
|
|
539
|
+
let tok = self.next_token_inner()?;
|
|
540
|
+
if let Some(t) = &tok {
|
|
541
|
+
self.last_significant_kind = Some(t.kind);
|
|
542
|
+
}
|
|
543
|
+
Ok(tok)
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
fn next_token_inner(&mut self) -> Result<Option<Token>, String> {
|
|
310
547
|
if let Some(tok) = self.pending_dedents.pop_front() {
|
|
311
548
|
return Ok(Some(tok));
|
|
312
549
|
}
|
|
@@ -323,8 +560,13 @@ impl<'a> Lexer<'a> {
|
|
|
323
560
|
|
|
324
561
|
if self.at_line_start {
|
|
325
562
|
self.at_line_start = false;
|
|
563
|
+
// Always consume the leading whitespace; only *emit* Indent/Dedent when indentation
|
|
564
|
+
// is significant. With `ignore_indent`, the level is discarded so the indent stack
|
|
565
|
+
// stays at `[0]` and no virtual tokens are produced (brace-only blocks).
|
|
326
566
|
let level = self.read_indent_level();
|
|
327
|
-
if
|
|
567
|
+
if !self.ignore_indent
|
|
568
|
+
&& (level > 0 || self.peek().map(|c| c != '\n').unwrap_or(false))
|
|
569
|
+
{
|
|
328
570
|
if let Some(tok) = self.emit_indent_or_dedent(level) {
|
|
329
571
|
return Ok(Some(tok));
|
|
330
572
|
}
|
|
@@ -458,12 +700,15 @@ impl<'a> Lexer<'a> {
|
|
|
458
700
|
} else if self.peek() == Some('/') {
|
|
459
701
|
self.jsx_in_closing_tag = true;
|
|
460
702
|
TokenKind::Lt
|
|
461
|
-
} else if self.peek() == Some('>')
|
|
703
|
+
} else if (self.peek() == Some('>')
|
|
462
704
|
|| self
|
|
463
705
|
.peek()
|
|
464
706
|
.map(|c| c.is_ascii_alphabetic() || c == '_')
|
|
465
|
-
.unwrap_or(false)
|
|
707
|
+
.unwrap_or(false))
|
|
708
|
+
&& !self.last_is_value()
|
|
466
709
|
{
|
|
710
|
+
// JSX open tag — only in expression position. After a value (`ident<`, `)<`,
|
|
711
|
+
// `]<`, literal) this is `Lt`: a comparison or generic-args opener.
|
|
467
712
|
self.jsx_depth += 1;
|
|
468
713
|
self.jsx_stack.push(JsxEl {
|
|
469
714
|
in_opener: true,
|
|
@@ -481,16 +726,34 @@ impl<'a> Lexer<'a> {
|
|
|
481
726
|
TokenKind::Ge
|
|
482
727
|
} else if self.peek() == Some('>') {
|
|
483
728
|
self.advance();
|
|
484
|
-
|
|
729
|
+
if self.peek() == Some('>') {
|
|
730
|
+
self.advance();
|
|
731
|
+
TokenKind::UShr // `>>>`
|
|
732
|
+
} else {
|
|
733
|
+
TokenKind::Shr
|
|
734
|
+
}
|
|
485
735
|
} else {
|
|
486
|
-
if self.jsx_in_closing_tag
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
self.jsx_sync_in_opening_tag();
|
|
490
|
-
} else if self.jsx_in_opening_tag && self.jsx_saw_slash_before_gt {
|
|
736
|
+
if self.jsx_in_closing_tag
|
|
737
|
+
|| (self.jsx_in_opening_tag && self.jsx_saw_slash_before_gt)
|
|
738
|
+
{
|
|
491
739
|
self.jsx_depth = (self.jsx_depth - 1).max(0);
|
|
492
740
|
self.jsx_stack.pop();
|
|
493
741
|
self.jsx_sync_in_opening_tag();
|
|
742
|
+
// A child element just closed (`</span>` or `<br/>`). If a parent element
|
|
743
|
+
// is still open and past its opening tag, we're back in that parent's
|
|
744
|
+
// children region, so the following run is JSX text — re-enter text mode.
|
|
745
|
+
// Without this, trailing text after a child element ("… as JSON") is lexed
|
|
746
|
+
// as code and a bare keyword (`as`, `in`, `if`, …) breaks the parse (#108).
|
|
747
|
+
//
|
|
748
|
+
// Guard on `jsx_child_brace_depth == 0`: if the closed element lived inside a
|
|
749
|
+
// `{…}` expression container (e.g. `<div>{items.map(x => <span/>)}</div>`),
|
|
750
|
+
// we're still in that expression, not the parent's text children — entering
|
|
751
|
+
// text mode there would swallow the following `)`/`,` as JsxText.
|
|
752
|
+
if self.jsx_child_brace_depth == 0
|
|
753
|
+
&& self.jsx_stack.last().map(|e| !e.in_opener).unwrap_or(false)
|
|
754
|
+
{
|
|
755
|
+
self.jsx_after_gt = true;
|
|
756
|
+
}
|
|
494
757
|
} else if let Some(top) = self.jsx_stack.last_mut() {
|
|
495
758
|
if top.in_opener && top.attr_value_braces > 0 {
|
|
496
759
|
// `>` is a comparison (or shift) token inside `{ ... }`, not end of opening tag.
|
|
@@ -698,6 +961,80 @@ mod tests {
|
|
|
698
961
|
assert_eq!(string_tok.literal.as_deref(), Some("H"));
|
|
699
962
|
}
|
|
700
963
|
|
|
964
|
+
#[test]
|
|
965
|
+
fn radix_integer_literals() {
|
|
966
|
+
// Hex / octal / binary prefixes (any case) convert to a decimal `Number` literal,
|
|
967
|
+
// honoring `_` digit separators.
|
|
968
|
+
let cases = [
|
|
969
|
+
("0xff", "255"),
|
|
970
|
+
("0xFF", "255"),
|
|
971
|
+
("0X1a", "26"),
|
|
972
|
+
("0o17", "15"),
|
|
973
|
+
("0O7", "7"),
|
|
974
|
+
("0b1010", "10"),
|
|
975
|
+
("0B0", "0"),
|
|
976
|
+
("0xdeadbeef", "3735928559"),
|
|
977
|
+
("0xFF_FF", "65535"),
|
|
978
|
+
("0b1111_0000", "240"),
|
|
979
|
+
];
|
|
980
|
+
for (src, expected) in cases {
|
|
981
|
+
let tokens = Lexer::new(src).collect::<Result<Vec<_>, _>>().unwrap();
|
|
982
|
+
let num = tokens
|
|
983
|
+
.iter()
|
|
984
|
+
.find(|t| t.kind == TokenKind::Number)
|
|
985
|
+
.unwrap_or_else(|| panic!("no Number token for {src}"));
|
|
986
|
+
assert_eq!(num.literal.as_deref(), Some(expected), "for {src}");
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
#[test]
|
|
991
|
+
fn decimal_numeric_separators() {
|
|
992
|
+
// `_` between digits is a JS numeric separator: dropped from the literal value.
|
|
993
|
+
// Issue #57.
|
|
994
|
+
let only_number = |src: &str| -> String {
|
|
995
|
+
let tokens = Lexer::new(src).collect::<Result<Vec<_>, _>>().unwrap();
|
|
996
|
+
let nums: Vec<_> = tokens
|
|
997
|
+
.iter()
|
|
998
|
+
.filter(|t| t.kind == TokenKind::Number)
|
|
999
|
+
.collect();
|
|
1000
|
+
assert_eq!(nums.len(), 1, "expected exactly one Number token for {src}");
|
|
1001
|
+
// No stray identifier should be produced from the separated digits.
|
|
1002
|
+
assert!(
|
|
1003
|
+
!tokens.iter().any(|t| t.kind == TokenKind::Ident),
|
|
1004
|
+
"unexpected Ident token while lexing {src}"
|
|
1005
|
+
);
|
|
1006
|
+
nums[0].literal.as_deref().unwrap().to_string()
|
|
1007
|
+
};
|
|
1008
|
+
assert_eq!(only_number("15_000"), "15000");
|
|
1009
|
+
assert_eq!(only_number("1_000_000"), "1000000");
|
|
1010
|
+
assert_eq!(only_number("3.14_159"), "3.14159");
|
|
1011
|
+
assert_eq!(only_number("1e1_0"), "1e10");
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
#[test]
|
|
1015
|
+
fn non_radix_zero_prefixed_stays_decimal() {
|
|
1016
|
+
// A leading zero is NOT legacy octal; an invalid prefix is not a radix literal.
|
|
1017
|
+
let num_literal = |src: &str| -> String {
|
|
1018
|
+
Lexer::new(src)
|
|
1019
|
+
.collect::<Result<Vec<_>, _>>()
|
|
1020
|
+
.unwrap()
|
|
1021
|
+
.into_iter()
|
|
1022
|
+
.find(|t| t.kind == TokenKind::Number)
|
|
1023
|
+
.unwrap()
|
|
1024
|
+
.literal
|
|
1025
|
+
.as_deref()
|
|
1026
|
+
.unwrap()
|
|
1027
|
+
.to_string()
|
|
1028
|
+
};
|
|
1029
|
+
assert_eq!(num_literal("07"), "07"); // decimal, not octal
|
|
1030
|
+
assert_eq!(num_literal("0"), "0");
|
|
1031
|
+
// `0xZ` → the Number token is just `0`, then `xZ` lexes as an identifier.
|
|
1032
|
+
let toks = Lexer::new("0xZ").collect::<Result<Vec<_>, _>>().unwrap();
|
|
1033
|
+
assert_eq!(toks[0].kind, TokenKind::Number);
|
|
1034
|
+
assert_eq!(toks[0].literal.as_deref(), Some("0"));
|
|
1035
|
+
assert_eq!(toks[1].kind, TokenKind::Ident);
|
|
1036
|
+
}
|
|
1037
|
+
|
|
701
1038
|
#[test]
|
|
702
1039
|
fn line_comment_does_not_emit_spurious_indent_before_next_line() {
|
|
703
1040
|
let with_comment = "fn f() {\n return {\n a: 1, // c\n b: 2\n }\n}\n";
|
|
@@ -713,4 +1050,55 @@ mod tests {
|
|
|
713
1050
|
.collect::<Vec<_>>()
|
|
714
1051
|
);
|
|
715
1052
|
}
|
|
1053
|
+
|
|
1054
|
+
/// A leading-indented line is what actually drives the lexer to emit virtual tokens:
|
|
1055
|
+
/// ` a()` opens an indent level (Indent) and the dedented `b()` closes it (Dedent).
|
|
1056
|
+
const INDENTED_SRC: &str = " a()\nb()\n";
|
|
1057
|
+
|
|
1058
|
+
#[test]
|
|
1059
|
+
fn default_options_still_emit_indent_and_dedent() {
|
|
1060
|
+
let tokens: Vec<_> = Lexer::with_options(INDENTED_SRC, LexerOptions::default())
|
|
1061
|
+
.collect::<Result<Vec<_>, _>>()
|
|
1062
|
+
.unwrap();
|
|
1063
|
+
assert!(
|
|
1064
|
+
tokens.iter().any(|t| t.kind == TokenKind::Indent),
|
|
1065
|
+
"expected an Indent token in the default (indentation-significant) mode"
|
|
1066
|
+
);
|
|
1067
|
+
assert!(
|
|
1068
|
+
tokens.iter().any(|t| t.kind == TokenKind::Dedent),
|
|
1069
|
+
"expected a Dedent token in the default (indentation-significant) mode"
|
|
1070
|
+
);
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
#[test]
|
|
1074
|
+
fn ignore_indent_emits_no_virtual_tokens() {
|
|
1075
|
+
let tokens: Vec<_> =
|
|
1076
|
+
Lexer::with_options(INDENTED_SRC, LexerOptions { ignore_indent: true })
|
|
1077
|
+
.collect::<Result<Vec<_>, _>>()
|
|
1078
|
+
.unwrap();
|
|
1079
|
+
assert!(
|
|
1080
|
+
!tokens
|
|
1081
|
+
.iter()
|
|
1082
|
+
.any(|t| matches!(t.kind, TokenKind::Indent | TokenKind::Dedent)),
|
|
1083
|
+
"expected no Indent/Dedent with ignore_indent, got: {:?}",
|
|
1084
|
+
tokens.iter().map(|t| t.kind).collect::<Vec<_>>()
|
|
1085
|
+
);
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
#[test]
|
|
1089
|
+
fn env_truthy_enables_only_on_recognized_values() {
|
|
1090
|
+
use std::ffi::OsString;
|
|
1091
|
+
let v = |s: &str| env_truthy(Some(OsString::from(s)));
|
|
1092
|
+
// Recognized truthy values turn the flag on.
|
|
1093
|
+
assert!(v("1"));
|
|
1094
|
+
assert!(v("true"));
|
|
1095
|
+
assert!(v("yes"));
|
|
1096
|
+
// Everything else leaves it off, including unset, empty, and near-misses.
|
|
1097
|
+
assert!(!env_truthy(None));
|
|
1098
|
+
assert!(!v(""));
|
|
1099
|
+
assert!(!v("0"));
|
|
1100
|
+
assert!(!v("false"));
|
|
1101
|
+
assert!(!v("no"));
|
|
1102
|
+
assert!(!v("TRUE")); // exact match only — case-sensitive by design
|
|
1103
|
+
}
|
|
716
1104
|
}
|
|
@@ -50,6 +50,7 @@ pub enum TokenKind {
|
|
|
50
50
|
Do,
|
|
51
51
|
TypeOf,
|
|
52
52
|
Void,
|
|
53
|
+
Delete,
|
|
53
54
|
Of,
|
|
54
55
|
In,
|
|
55
56
|
Async,
|
|
@@ -59,6 +60,8 @@ pub enum TokenKind {
|
|
|
59
60
|
Export,
|
|
60
61
|
Type,
|
|
61
62
|
Declare,
|
|
63
|
+
Interface,
|
|
64
|
+
As,
|
|
62
65
|
|
|
63
66
|
// Punctuation
|
|
64
67
|
LParen,
|
|
@@ -108,6 +111,7 @@ pub enum TokenKind {
|
|
|
108
111
|
BitNot,
|
|
109
112
|
Shl,
|
|
110
113
|
Shr,
|
|
114
|
+
UShr,
|
|
111
115
|
OptionalChain,
|
|
112
116
|
NullishCoalesce,
|
|
113
117
|
Question,
|
|
@@ -148,6 +152,7 @@ impl TokenKind {
|
|
|
148
152
|
"do" => TokenKind::Do,
|
|
149
153
|
"typeof" => TokenKind::TypeOf,
|
|
150
154
|
"void" => TokenKind::Void,
|
|
155
|
+
"delete" => TokenKind::Delete,
|
|
151
156
|
"of" => TokenKind::Of,
|
|
152
157
|
"in" => TokenKind::In,
|
|
153
158
|
"async" => TokenKind::Async,
|
|
@@ -157,6 +162,8 @@ impl TokenKind {
|
|
|
157
162
|
"export" => TokenKind::Export,
|
|
158
163
|
"type" => TokenKind::Type,
|
|
159
164
|
"declare" => TokenKind::Declare,
|
|
165
|
+
"interface" => TokenKind::Interface,
|
|
166
|
+
"as" => TokenKind::As,
|
|
160
167
|
_ => TokenKind::Ident,
|
|
161
168
|
}
|
|
162
169
|
}
|