@tishlang/tish-format 1.0.12 → 2.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +51 -0
- package/LICENSE +13 -0
- package/bin/tish-format +0 -0
- package/crates/js_to_tish/Cargo.toml +11 -0
- package/crates/js_to_tish/README.md +18 -0
- package/crates/js_to_tish/src/error.rs +55 -0
- package/crates/js_to_tish/src/lib.rs +11 -0
- package/crates/js_to_tish/src/span_util.rs +35 -0
- package/crates/js_to_tish/src/transform/expr.rs +611 -0
- package/crates/js_to_tish/src/transform/stmt.rs +503 -0
- package/crates/js_to_tish/src/transform.rs +60 -0
- package/crates/tish/Cargo.toml +62 -0
- package/crates/tish/build.rs +21 -0
- package/crates/tish/src/cargo_native_registry.rs +32 -0
- package/crates/tish/src/cli_help.rs +576 -0
- package/crates/tish/src/main.rs +853 -0
- package/crates/tish/src/repl_completion.rs +199 -0
- package/crates/tish/tests/cargo_example_compile.rs +67 -0
- package/crates/tish/tests/error_source_location.rs +36 -0
- package/crates/tish/tests/fixtures/cargo_example_project/Cargo.toml +3 -0
- package/crates/tish/tests/fixtures/cargo_example_project/crates/demo-shim/Cargo.toml +11 -0
- package/crates/tish/tests/fixtures/cargo_example_project/crates/demo-shim/src/lib.rs +12 -0
- package/crates/tish/tests/fixtures/cargo_example_project/package.json +10 -0
- package/crates/tish/tests/fixtures/cargo_example_project/src/main.tish +3 -0
- package/crates/tish/tests/fixtures/runtime_error_location.tish +5 -0
- package/crates/tish/tests/fixtures/trycatch_runtime_errors.tish +15 -0
- package/crates/tish/tests/fixtures/tty_capability.tish +9 -0
- package/crates/tish/tests/integration_test.rs +1406 -0
- package/crates/tish/tests/run_optimize_stdout_parity.rs +50 -0
- package/crates/tish/tests/shortcircuit.rs +65 -0
- package/crates/tish/tests/trycatch_runtime_errors.rs +45 -0
- package/crates/tish/tests/tty_capability.rs +43 -0
- package/crates/tish_ast/Cargo.toml +9 -0
- package/crates/tish_ast/src/ast.rs +649 -0
- package/crates/tish_ast/src/lib.rs +5 -0
- package/crates/tish_build_utils/Cargo.toml +11 -0
- package/crates/tish_build_utils/src/lib.rs +577 -0
- package/crates/tish_builtins/Cargo.toml +22 -0
- package/crates/tish_builtins/src/array.rs +803 -0
- package/crates/tish_builtins/src/collections.rs +481 -0
- package/crates/tish_builtins/src/construct.rs +199 -0
- package/crates/tish_builtins/src/date.rs +538 -0
- package/crates/tish_builtins/src/globals.rs +293 -0
- package/crates/tish_builtins/src/helpers.rs +35 -0
- package/crates/tish_builtins/src/iterator.rs +129 -0
- package/crates/tish_builtins/src/lib.rs +21 -0
- package/crates/tish_builtins/src/math.rs +89 -0
- package/crates/tish_builtins/src/number.rs +96 -0
- package/crates/tish_builtins/src/object.rs +36 -0
- package/crates/tish_builtins/src/string.rs +646 -0
- package/crates/tish_builtins/src/symbol.rs +83 -0
- package/crates/tish_builtins/src/typedarrays.rs +298 -0
- package/crates/tish_bytecode/Cargo.toml +17 -0
- package/crates/tish_bytecode/src/chunk.rs +164 -0
- package/crates/tish_bytecode/src/compiler.rs +2604 -0
- package/crates/tish_bytecode/src/encoding.rs +102 -0
- package/crates/tish_bytecode/src/lib.rs +20 -0
- package/crates/tish_bytecode/src/opcode.rs +185 -0
- package/crates/tish_bytecode/src/peephole.rs +189 -0
- package/crates/tish_bytecode/src/serialize.rs +193 -0
- package/crates/tish_bytecode/tests/break_continue_bytecode.rs +44 -0
- package/crates/tish_bytecode/tests/constant_folding.rs +84 -0
- package/crates/tish_bytecode/tests/sort_optimization.rs +31 -0
- package/crates/tish_compile/Cargo.toml +27 -0
- package/crates/tish_compile/src/check.rs +774 -0
- package/crates/tish_compile/src/codegen.rs +7317 -0
- package/crates/tish_compile/src/infer.rs +1681 -0
- package/crates/tish_compile/src/lib.rs +206 -0
- package/crates/tish_compile/src/resolve.rs +1951 -0
- package/crates/tish_compile/src/types.rs +605 -0
- package/crates/tish_compile_js/Cargo.toml +18 -0
- package/crates/tish_compile_js/examples/jsx_vdom_smoke.tish +8 -0
- package/crates/tish_compile_js/src/codegen.rs +938 -0
- package/crates/tish_compile_js/src/error.rs +20 -0
- package/crates/tish_compile_js/src/lib.rs +26 -0
- package/crates/tish_compile_js/src/tests_jsx.rs +414 -0
- package/crates/tish_compiler_wasm/Cargo.toml +21 -0
- package/crates/tish_compiler_wasm/src/lib.rs +57 -0
- package/crates/tish_compiler_wasm/src/resolve_virtual.rs +473 -0
- package/crates/tish_core/Cargo.toml +32 -0
- package/crates/tish_core/src/console_style.rs +170 -0
- package/crates/tish_core/src/json.rs +430 -0
- package/crates/tish_core/src/lib.rs +20 -0
- package/crates/tish_core/src/macros.rs +36 -0
- package/crates/tish_core/src/shape.rs +85 -0
- package/crates/tish_core/src/uri.rs +118 -0
- package/crates/tish_core/src/value.rs +1350 -0
- package/crates/tish_core/src/vmref.rs +183 -0
- package/crates/tish_cranelift/Cargo.toml +19 -0
- package/crates/tish_cranelift/src/lib.rs +43 -0
- package/crates/tish_cranelift/src/link.rs +130 -0
- package/crates/tish_cranelift/src/lower.rs +85 -0
- package/crates/tish_cranelift_runtime/Cargo.toml +26 -0
- package/crates/tish_cranelift_runtime/src/lib.rs +45 -0
- package/crates/tish_eval/Cargo.toml +51 -0
- package/crates/tish_eval/src/eval.rs +4265 -0
- package/crates/tish_eval/src/http.rs +191 -0
- package/crates/tish_eval/src/lib.rs +99 -0
- package/crates/tish_eval/src/natives.rs +551 -0
- package/crates/tish_eval/src/promise.rs +179 -0
- package/crates/tish_eval/src/regex.rs +299 -0
- package/crates/tish_eval/src/timers.rs +120 -0
- package/crates/tish_eval/src/value.rs +336 -0
- package/crates/tish_eval/src/value_convert.rs +117 -0
- package/crates/tish_ffi/Cargo.toml +26 -0
- package/crates/tish_ffi/src/lib.rs +518 -0
- package/crates/tish_ffi/tests/fixtures/testmod/Cargo.toml +18 -0
- package/crates/tish_ffi/tests/fixtures/testmod/src/lib.rs +46 -0
- package/crates/tish_ffi/tests/loader.rs +65 -0
- package/crates/tish_fmt/Cargo.toml +16 -0
- package/crates/tish_fmt/src/bin/tish-fmt.rs +41 -0
- package/crates/tish_fmt/src/lib.rs +2157 -0
- package/crates/tish_jsx_web/Cargo.toml +9 -0
- package/crates/tish_jsx_web/README.md +5 -0
- package/crates/tish_jsx_web/src/lib.rs +2 -0
- package/crates/tish_lexer/Cargo.toml +9 -0
- package/crates/tish_lexer/src/lib.rs +1104 -0
- package/crates/tish_lexer/src/token.rs +170 -0
- package/crates/tish_lint/Cargo.toml +18 -0
- package/crates/tish_lint/src/bin/tish-lint.rs +195 -0
- package/crates/tish_lint/src/lib.rs +281 -0
- package/crates/tish_llvm/Cargo.toml +13 -0
- package/crates/tish_llvm/src/lib.rs +115 -0
- package/crates/tish_lsp/Cargo.toml +25 -0
- package/crates/tish_lsp/README.md +26 -0
- package/crates/tish_lsp/src/builtin_goto.rs +362 -0
- package/crates/tish_lsp/src/import_goto.rs +564 -0
- package/crates/tish_lsp/src/main.rs +1459 -0
- package/crates/tish_native/Cargo.toml +16 -0
- package/crates/tish_native/src/build.rs +481 -0
- package/crates/tish_native/src/config.rs +48 -0
- package/crates/tish_native/src/lib.rs +416 -0
- package/crates/tish_opt/Cargo.toml +13 -0
- package/crates/tish_opt/src/lib.rs +1046 -0
- package/crates/tish_parser/Cargo.toml +11 -0
- package/crates/tish_parser/src/lib.rs +386 -0
- package/crates/tish_parser/src/parser.rs +2726 -0
- package/crates/tish_pg/Cargo.toml +34 -0
- package/crates/tish_pg/README.md +38 -0
- package/crates/tish_pg/src/error.rs +52 -0
- package/crates/tish_pg/src/lib.rs +955 -0
- package/crates/tish_resolve/Cargo.toml +13 -0
- package/crates/tish_resolve/src/lib.rs +3601 -0
- package/crates/tish_resolve/src/pos.rs +141 -0
- package/crates/tish_runtime/Cargo.toml +100 -0
- package/crates/tish_runtime/src/http.rs +1347 -0
- package/crates/tish_runtime/src/http_fetch.rs +492 -0
- package/crates/tish_runtime/src/http_hyper.rs +441 -0
- package/crates/tish_runtime/src/http_prefork.rs +189 -0
- package/crates/tish_runtime/src/lib.rs +1447 -0
- package/crates/tish_runtime/src/native_promise.rs +15 -0
- package/crates/tish_runtime/src/promise.rs +558 -0
- package/crates/tish_runtime/src/promise_io.rs +38 -0
- package/crates/tish_runtime/src/timers.rs +172 -0
- package/crates/tish_runtime/src/tty.rs +226 -0
- package/crates/tish_runtime/src/ws.rs +778 -0
- package/crates/tish_runtime/tests/fetch_readable_stream.rs +102 -0
- package/crates/tish_ui/Cargo.toml +17 -0
- package/crates/tish_ui/src/jsx.rs +692 -0
- package/crates/tish_ui/src/lib.rs +20 -0
- package/crates/tish_ui/src/runtime/hooks.rs +573 -0
- package/crates/tish_ui/src/runtime/mod.rs +183 -0
- package/crates/tish_vm/Cargo.toml +60 -0
- package/crates/tish_vm/src/jit.rs +1050 -0
- package/crates/tish_vm/src/lib.rs +41 -0
- package/crates/tish_vm/src/vm.rs +3536 -0
- package/crates/tish_vm/tests/concurrent_shared_state.rs +140 -0
- package/crates/tish_vm/tests/fixtures/or_string_cmd.tish +2 -0
- package/crates/tish_vm/tests/lexical_scope_declare.rs +34 -0
- package/crates/tish_vm/tests/peephole_jump_chain_logical_or.rs +150 -0
- package/crates/tish_wasm/Cargo.toml +15 -0
- package/crates/tish_wasm/src/lib.rs +428 -0
- package/crates/tish_wasm_runtime/Cargo.toml +37 -0
- package/crates/tish_wasm_runtime/src/gpu.rs +429 -0
- package/crates/tish_wasm_runtime/src/lib.rs +42 -0
- package/crates/tishlang_cargo_bindgen/Cargo.toml +26 -0
- package/crates/tishlang_cargo_bindgen/src/classify.rs +261 -0
- package/crates/tishlang_cargo_bindgen/src/discover.rs +125 -0
- package/crates/tishlang_cargo_bindgen/src/infer.rs +382 -0
- package/crates/tishlang_cargo_bindgen/src/lib.rs +349 -0
- package/crates/tishlang_cargo_bindgen/src/main.rs +167 -0
- package/crates/tishlang_cargo_bindgen/src/metadata.rs +117 -0
- package/justfile +276 -0
- package/package.json +2 -2
- package/platform/darwin-arm64/tish-fmt +0 -0
- package/platform/darwin-x64/tish-fmt +0 -0
- package/platform/linux-arm64/tish-fmt +0 -0
- package/platform/linux-x64/tish-fmt +0 -0
- package/platform/win32-x64/tish-fmt.exe +0 -0
|
@@ -0,0 +1,1104 @@
|
|
|
1
|
+
//! Tish lexer with indent normalization and tab/space handling.
|
|
2
|
+
//!
|
|
3
|
+
//! Normalizes tabs and spaces to a single indent level so both styles work.
|
|
4
|
+
//! Emits virtual Indent/Dedent tokens for optional-brace blocks.
|
|
5
|
+
|
|
6
|
+
mod token;
|
|
7
|
+
|
|
8
|
+
pub use token::{Span, Token, TokenKind};
|
|
9
|
+
|
|
10
|
+
use std::collections::VecDeque;
|
|
11
|
+
use std::iter::Peekable;
|
|
12
|
+
use std::str::Chars;
|
|
13
|
+
|
|
14
|
+
const INDENT_WIDTH: usize = 2;
|
|
15
|
+
const TAB_AS_LEVELS: usize = 1;
|
|
16
|
+
|
|
17
|
+
/// One JSX element on the stack: tracks whether we are still in its opening tag (`<Tag ...`)
|
|
18
|
+
/// and how many `{` are open inside that element's **attribute values** (embedded JS).
|
|
19
|
+
/// This lets `>` be a comparison operator inside `{...}` while still closing `<span>` when
|
|
20
|
+
/// `attr_value_braces == 0` for the innermost element (React-like).
|
|
21
|
+
#[derive(Debug, Clone)]
|
|
22
|
+
struct JsxEl {
|
|
23
|
+
in_opener: bool,
|
|
24
|
+
attr_value_braces: i32,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
/// Lexer configuration.
|
|
28
|
+
#[derive(Debug, Clone, Copy, Default)]
|
|
29
|
+
pub struct LexerOptions {
|
|
30
|
+
/// When true, suppress the virtual `Indent`/`Dedent` tokens so blocks are delimited
|
|
31
|
+
/// **only** by braces. Indentation is treated as ordinary whitespace, so off-side
|
|
32
|
+
/// (brace-less) blocks no longer form. Useful for debugging how nested blocks
|
|
33
|
+
/// transpile — see the `TISH_IGNORE_INDENT` environment variable for a global toggle.
|
|
34
|
+
pub ignore_indent: bool,
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
impl LexerOptions {
|
|
38
|
+
/// Build options from the environment. `TISH_IGNORE_INDENT=1` (or `true`/`yes`) sets
|
|
39
|
+
/// `ignore_indent`, so every parse path (run/build/dump-ast/fmt/lint/lsp) honors it
|
|
40
|
+
/// without threading a flag through the whole pipeline.
|
|
41
|
+
pub fn from_env() -> Self {
|
|
42
|
+
Self {
|
|
43
|
+
ignore_indent: env_truthy(std::env::var_os("TISH_IGNORE_INDENT")),
|
|
44
|
+
}
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
/// Interpret an environment-variable value as a boolean flag: `1`, `true`, or `yes`
|
|
49
|
+
/// (exact, case-sensitive) enable it; anything else — including unset — leaves it off.
|
|
50
|
+
/// Split out from the `std::env` read so the rule is unit-testable without mutating
|
|
51
|
+
/// process-global state (which `Lexer::new` reads, so env-mutating tests would race).
|
|
52
|
+
fn env_truthy(value: Option<std::ffi::OsString>) -> bool {
|
|
53
|
+
value
|
|
54
|
+
.map(|v| v == "1" || v == "true" || v == "yes")
|
|
55
|
+
.unwrap_or(false)
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
#[derive(Debug, Clone)]
|
|
59
|
+
pub struct Lexer<'a> {
|
|
60
|
+
chars: Peekable<Chars<'a>>,
|
|
61
|
+
pos: usize,
|
|
62
|
+
line: usize,
|
|
63
|
+
col: usize,
|
|
64
|
+
indent_stack: Vec<usize>,
|
|
65
|
+
at_line_start: bool,
|
|
66
|
+
pending_dedents: VecDeque<Token>,
|
|
67
|
+
template_brace_stack: Vec<usize>,
|
|
68
|
+
jsx_after_gt: bool,
|
|
69
|
+
jsx_in_opening_tag: bool,
|
|
70
|
+
jsx_saw_slash_before_gt: bool,
|
|
71
|
+
jsx_stack: Vec<JsxEl>,
|
|
72
|
+
jsx_depth: i32,
|
|
73
|
+
jsx_child_brace_depth: i32,
|
|
74
|
+
jsx_in_closing_tag: bool,
|
|
75
|
+
ignore_indent: bool,
|
|
76
|
+
/// Kind of the last emitted significant token, for `<` disambiguation: after a *value* position
|
|
77
|
+
/// (ident, `)`, `]`, literal) a `<` is a comparison / generic-args opener (`Lt`), never a JSX tag.
|
|
78
|
+
last_significant_kind: Option<TokenKind>,
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
impl<'a> Lexer<'a> {
|
|
82
|
+
/// Create a lexer, reading options from the environment (e.g. `TISH_IGNORE_INDENT`).
|
|
83
|
+
pub fn new(source: &'a str) -> Self {
|
|
84
|
+
Self::with_options(source, LexerOptions::from_env())
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
/// Create a lexer with explicit options, bypassing the environment.
|
|
88
|
+
pub fn with_options(source: &'a str, options: LexerOptions) -> Self {
|
|
89
|
+
Self {
|
|
90
|
+
chars: source.chars().peekable(),
|
|
91
|
+
pos: 0,
|
|
92
|
+
line: 1,
|
|
93
|
+
col: 1,
|
|
94
|
+
indent_stack: vec![0],
|
|
95
|
+
at_line_start: true,
|
|
96
|
+
pending_dedents: VecDeque::new(),
|
|
97
|
+
template_brace_stack: Vec::new(),
|
|
98
|
+
jsx_after_gt: false,
|
|
99
|
+
jsx_in_opening_tag: false,
|
|
100
|
+
jsx_saw_slash_before_gt: false,
|
|
101
|
+
jsx_stack: Vec::new(),
|
|
102
|
+
jsx_depth: 0,
|
|
103
|
+
jsx_child_brace_depth: 0,
|
|
104
|
+
jsx_in_closing_tag: false,
|
|
105
|
+
ignore_indent: options.ignore_indent,
|
|
106
|
+
last_significant_kind: None,
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/// True when the previous significant token ends a value, so a following `<` is `Lt`
|
|
111
|
+
/// (comparison / generic args), not the start of a JSX element.
|
|
112
|
+
fn last_is_value(&self) -> bool {
|
|
113
|
+
matches!(
|
|
114
|
+
self.last_significant_kind,
|
|
115
|
+
Some(
|
|
116
|
+
TokenKind::Ident
|
|
117
|
+
| TokenKind::RParen
|
|
118
|
+
| TokenKind::RBracket
|
|
119
|
+
| TokenKind::Number
|
|
120
|
+
| TokenKind::String
|
|
121
|
+
| TokenKind::True
|
|
122
|
+
| TokenKind::False
|
|
123
|
+
| TokenKind::Null
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
#[inline]
|
|
129
|
+
fn jsx_sync_in_opening_tag(&mut self) {
|
|
130
|
+
self.jsx_in_opening_tag = self.jsx_stack.last().map(|e| e.in_opener).unwrap_or(false);
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
fn read_jsx_text(&mut self, start: (usize, usize)) -> Result<Option<Token>, String> {
|
|
134
|
+
let mut s = String::new();
|
|
135
|
+
loop {
|
|
136
|
+
match self.peek() {
|
|
137
|
+
None | Some('{') | Some('<') => break,
|
|
138
|
+
Some(c) => {
|
|
139
|
+
self.advance();
|
|
140
|
+
s.push(c);
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
if s.is_empty() {
|
|
145
|
+
Ok(None)
|
|
146
|
+
} else {
|
|
147
|
+
let end = self.span_start();
|
|
148
|
+
Ok(Some(Token {
|
|
149
|
+
kind: TokenKind::JsxText,
|
|
150
|
+
span: Span { start, end },
|
|
151
|
+
literal: Some(s.into()),
|
|
152
|
+
}))
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
fn peek(&mut self) -> Option<char> {
|
|
157
|
+
self.chars.peek().copied()
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
fn advance(&mut self) -> Option<char> {
|
|
161
|
+
let c = self.chars.next()?;
|
|
162
|
+
self.pos += c.len_utf8();
|
|
163
|
+
if c == '\n' {
|
|
164
|
+
self.line += 1;
|
|
165
|
+
self.col = 1;
|
|
166
|
+
self.at_line_start = true;
|
|
167
|
+
} else {
|
|
168
|
+
self.col += 1;
|
|
169
|
+
}
|
|
170
|
+
Some(c)
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
fn span_start(&self) -> (usize, usize) {
|
|
174
|
+
(self.line, self.col)
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
fn read_indent_level(&mut self) -> usize {
|
|
178
|
+
let mut level = 0;
|
|
179
|
+
loop {
|
|
180
|
+
match self.peek() {
|
|
181
|
+
Some(' ') => {
|
|
182
|
+
self.advance();
|
|
183
|
+
level += 1;
|
|
184
|
+
}
|
|
185
|
+
Some('\t') => {
|
|
186
|
+
self.advance();
|
|
187
|
+
level += TAB_AS_LEVELS;
|
|
188
|
+
}
|
|
189
|
+
_ => break,
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
level.div_ceil(INDENT_WIDTH)
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
fn skip_whitespace(&mut self) {
|
|
196
|
+
while let Some(c) = self.peek() {
|
|
197
|
+
if c == ' ' || c == '\t' || c == '\r' {
|
|
198
|
+
self.advance();
|
|
199
|
+
} else if c == '\n' {
|
|
200
|
+
self.advance();
|
|
201
|
+
self.at_line_start = true;
|
|
202
|
+
} else {
|
|
203
|
+
break;
|
|
204
|
+
}
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
fn skip_line_comment(&mut self) {
|
|
209
|
+
while let Some(c) = self.advance() {
|
|
210
|
+
if c == '\n' {
|
|
211
|
+
break;
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
|
|
216
|
+
fn skip_block_comment(&mut self) -> Result<(), String> {
|
|
217
|
+
let mut depth = 1;
|
|
218
|
+
while depth > 0 {
|
|
219
|
+
match self.advance() {
|
|
220
|
+
Some('*') if self.peek() == Some('/') => {
|
|
221
|
+
self.advance();
|
|
222
|
+
depth -= 1;
|
|
223
|
+
}
|
|
224
|
+
Some('/') if self.peek() == Some('*') => {
|
|
225
|
+
self.advance();
|
|
226
|
+
depth += 1;
|
|
227
|
+
}
|
|
228
|
+
None => return Err("Unterminated block comment".to_string()),
|
|
229
|
+
_ => {}
|
|
230
|
+
}
|
|
231
|
+
}
|
|
232
|
+
Ok(())
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
fn read_number(&mut self, first: char) -> String {
|
|
236
|
+
// Radix-prefixed integer literals: `0x`/`0X` (hex), `0o`/`0O` (octal), `0b`/`0B`
|
|
237
|
+
// (binary), with optional `_` digit separators. JS semantics — a non-negative
|
|
238
|
+
// integer. Convert to a decimal string here so every downstream consumer (the
|
|
239
|
+
// parser's `parse::<f64>()`, the formatter, …) sees a plain number, unchanged.
|
|
240
|
+
if first == '0' {
|
|
241
|
+
if let Some(radix) = self.radix_prefix() {
|
|
242
|
+
self.advance(); // consume the x/o/b marker
|
|
243
|
+
let mut digits = String::with_capacity(16);
|
|
244
|
+
while let Some(c) = self.peek() {
|
|
245
|
+
if c == '_' {
|
|
246
|
+
self.advance(); // digit separator
|
|
247
|
+
} else if c.is_digit(radix) {
|
|
248
|
+
digits.push(c);
|
|
249
|
+
self.advance();
|
|
250
|
+
} else {
|
|
251
|
+
break;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
return Self::radix_digits_to_decimal(&digits, radix);
|
|
255
|
+
}
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
let mut s = String::with_capacity(16);
|
|
259
|
+
s.push(first);
|
|
260
|
+
while let Some(c) = self.peek() {
|
|
261
|
+
if c.is_ascii_digit() || c == '.' {
|
|
262
|
+
s.push(c);
|
|
263
|
+
self.advance();
|
|
264
|
+
} else if c == '_' && Self::ends_with_digit(&s) && self.underscore_between_digits() {
|
|
265
|
+
self.advance(); // numeric separator (`15_000`) — drop it, JS-style
|
|
266
|
+
} else if (c == 'e' || c == 'E') && self.exponent_follows() {
|
|
267
|
+
// Scientific notation: `e`/`E` then optional sign then digits.
|
|
268
|
+
// Guarded by lookahead so `3em` lexes as `3` + `em`, not a bad number.
|
|
269
|
+
s.push(c);
|
|
270
|
+
self.advance(); // consume e/E
|
|
271
|
+
if matches!(self.peek(), Some('+') | Some('-')) {
|
|
272
|
+
s.push(self.peek().unwrap());
|
|
273
|
+
self.advance();
|
|
274
|
+
}
|
|
275
|
+
while let Some(d) = self.peek() {
|
|
276
|
+
if d.is_ascii_digit() {
|
|
277
|
+
s.push(d);
|
|
278
|
+
self.advance();
|
|
279
|
+
} else if d == '_'
|
|
280
|
+
&& Self::ends_with_digit(&s)
|
|
281
|
+
&& self.underscore_between_digits()
|
|
282
|
+
{
|
|
283
|
+
self.advance(); // numeric separator inside the exponent (`1e1_0`)
|
|
284
|
+
} else {
|
|
285
|
+
break;
|
|
286
|
+
}
|
|
287
|
+
}
|
|
288
|
+
break; // the exponent terminates the numeric literal
|
|
289
|
+
} else {
|
|
290
|
+
break;
|
|
291
|
+
}
|
|
292
|
+
}
|
|
293
|
+
s
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
/// True iff the literal accumulated so far ends in a decimal digit — used to reject a
|
|
297
|
+
/// `_` separator that isn't preceded by a digit (e.g. leading `_5` or post-`.` `1._5`).
|
|
298
|
+
fn ends_with_digit(s: &str) -> bool {
|
|
299
|
+
s.chars().last().is_some_and(|c| c.is_ascii_digit())
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
/// With `peek()` positioned at a `_`, look ahead (without consuming) to confirm the
|
|
303
|
+
/// next character is a decimal digit, i.e. the `_` sits between two digits and is a
|
|
304
|
+
/// valid JS numeric separator (rejects trailing `5_` and doubled `1__0`).
|
|
305
|
+
fn underscore_between_digits(&self) -> bool {
|
|
306
|
+
let mut la = self.chars.clone();
|
|
307
|
+
la.next(); // skip the `_` currently under peek()
|
|
308
|
+
la.next().is_some_and(|c| c.is_ascii_digit())
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
/// With the current peek positioned at an `e`/`E`, decide (without consuming)
|
|
312
|
+
/// whether a valid exponent `[+-]?\d` follows. `Chars` is `Clone`, so we look
|
|
313
|
+
/// ahead on a throwaway clone of the iterator.
|
|
314
|
+
fn exponent_follows(&self) -> bool {
|
|
315
|
+
let mut la = self.chars.clone();
|
|
316
|
+
la.next(); // skip the e/E currently under peek()
|
|
317
|
+
match la.next() {
|
|
318
|
+
Some(d) if d.is_ascii_digit() => true,
|
|
319
|
+
Some('+') | Some('-') => la.next().is_some_and(|d| d.is_ascii_digit()),
|
|
320
|
+
_ => false,
|
|
321
|
+
}
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
/// With a leading `0` already consumed and `peek()` at the radix marker, return the
|
|
325
|
+
/// radix (16 / 8 / 2) iff this is a valid `0x` / `0o` / `0b` prefix followed by at
|
|
326
|
+
/// least one valid digit. Returns `None` otherwise, so `0`, `0.5`, `0e3`, `0xZ`, and
|
|
327
|
+
/// `0x_1` all stay on the decimal path. Looks ahead on a clone of the `Chars` iterator
|
|
328
|
+
/// (`Chars: Clone`) without consuming.
|
|
329
|
+
fn radix_prefix(&self) -> Option<u32> {
|
|
330
|
+
let mut la = self.chars.clone();
|
|
331
|
+
let radix = match la.next()? {
|
|
332
|
+
'x' | 'X' => 16,
|
|
333
|
+
'o' | 'O' => 8,
|
|
334
|
+
'b' | 'B' => 2,
|
|
335
|
+
_ => return None,
|
|
336
|
+
};
|
|
337
|
+
match la.next() {
|
|
338
|
+
Some(c) if c.is_digit(radix) => Some(radix),
|
|
339
|
+
_ => None,
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
/// Convert the (separator-free) digits of a radix-prefixed literal to the decimal
|
|
344
|
+
/// string the `Number` token carries. `u128` is exact for ≤128-bit literals — far
|
|
345
|
+
/// beyond any real input; the `f64` fallback only triggers for absurdly long ones and
|
|
346
|
+
/// loses precision past 2^53, exactly as JS's conversion to a double would.
|
|
347
|
+
fn radix_digits_to_decimal(digits: &str, radix: u32) -> String {
|
|
348
|
+
if let Ok(v) = u128::from_str_radix(digits, radix) {
|
|
349
|
+
return v.to_string();
|
|
350
|
+
}
|
|
351
|
+
let mut v = 0.0_f64;
|
|
352
|
+
for c in digits.chars() {
|
|
353
|
+
v = v * radix as f64 + c.to_digit(radix).unwrap_or(0) as f64;
|
|
354
|
+
}
|
|
355
|
+
format!("{v}")
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
/// Handle escape sequence, returning the unescaped character.
|
|
359
|
+
/// `extra_allowed` contains additional characters that can be escaped in this context.
|
|
360
|
+
fn handle_escape(&mut self, extra_allowed: &[char]) -> Result<char, String> {
|
|
361
|
+
let escaped = self.advance().ok_or("Unterminated escape")?;
|
|
362
|
+
match escaped {
|
|
363
|
+
'n' => Ok('\n'),
|
|
364
|
+
'r' => Ok('\r'),
|
|
365
|
+
't' => Ok('\t'),
|
|
366
|
+
'b' => Ok('\u{0008}'),
|
|
367
|
+
'f' => Ok('\u{000C}'),
|
|
368
|
+
'v' => Ok('\u{000B}'),
|
|
369
|
+
'0' => Ok('\0'),
|
|
370
|
+
'\\' => Ok('\\'),
|
|
371
|
+
// `\xNN` — exactly two hex digits → code point 0x00..=0xFF (JS/TS).
|
|
372
|
+
'x' => {
|
|
373
|
+
let cp = self.read_hex_digits(2)?;
|
|
374
|
+
char::from_u32(cp).ok_or_else(|| format!("Invalid \\x escape: \\x{:02X}", cp))
|
|
375
|
+
}
|
|
376
|
+
// `\uNNNN` (exactly four hex digits) or `\u{N..}` (1-6 hex digits, ES6).
|
|
377
|
+
'u' => {
|
|
378
|
+
let cp = if self.peek() == Some('{') {
|
|
379
|
+
self.advance(); // consume '{'
|
|
380
|
+
let cp = self.read_hex_until_brace()?;
|
|
381
|
+
match self.advance() {
|
|
382
|
+
Some('}') => cp,
|
|
383
|
+
_ => return Err("Unterminated \\u{...} escape (expected '}')".to_string()),
|
|
384
|
+
}
|
|
385
|
+
} else {
|
|
386
|
+
self.read_hex_digits(4)?
|
|
387
|
+
};
|
|
388
|
+
// Lone surrogates (0xD800..=0xDFFF) are valid UTF-16 code units in JS but
|
|
389
|
+
// not Unicode scalar values; tish strings are UTF-8, so reject them.
|
|
390
|
+
char::from_u32(cp)
|
|
391
|
+
.ok_or_else(|| format!("Invalid \\u escape: code point U+{:04X}", cp))
|
|
392
|
+
}
|
|
393
|
+
c if extra_allowed.contains(&c) => Ok(c),
|
|
394
|
+
_ => Err(format!("Unknown escape: \\{}", escaped)),
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
/// Read exactly `n` hex digits and return the parsed code point.
|
|
399
|
+
fn read_hex_digits(&mut self, n: usize) -> Result<u32, String> {
|
|
400
|
+
let mut value: u32 = 0;
|
|
401
|
+
for _ in 0..n {
|
|
402
|
+
let c = self.advance().ok_or("Unterminated hex escape")?;
|
|
403
|
+
let digit = c
|
|
404
|
+
.to_digit(16)
|
|
405
|
+
.ok_or_else(|| format!("Invalid hex digit in escape: '{}'", c))?;
|
|
406
|
+
value = value * 16 + digit;
|
|
407
|
+
}
|
|
408
|
+
Ok(value)
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
/// Read 1-6 hex digits for a `\u{...}` escape (stops at `}`); validates the count
|
|
412
|
+
/// and that the value is within the Unicode range.
|
|
413
|
+
fn read_hex_until_brace(&mut self) -> Result<u32, String> {
|
|
414
|
+
let mut value: u32 = 0;
|
|
415
|
+
let mut count = 0;
|
|
416
|
+
while let Some(c) = self.peek() {
|
|
417
|
+
let Some(digit) = c.to_digit(16) else { break };
|
|
418
|
+
self.advance();
|
|
419
|
+
value = value * 16 + digit;
|
|
420
|
+
count += 1;
|
|
421
|
+
if count > 6 || value > 0x10_FFFF {
|
|
422
|
+
return Err("Invalid \\u{...} escape: code point out of range".to_string());
|
|
423
|
+
}
|
|
424
|
+
}
|
|
425
|
+
if count == 0 {
|
|
426
|
+
return Err("Empty \\u{} escape (expected hex digits)".to_string());
|
|
427
|
+
}
|
|
428
|
+
Ok(value)
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
fn read_string(&mut self, quote: char) -> Result<String, String> {
|
|
432
|
+
let mut s = String::with_capacity(32);
|
|
433
|
+
let extra = if quote == '"' {
|
|
434
|
+
&['"', '\''][..]
|
|
435
|
+
} else {
|
|
436
|
+
&['\'', '"'][..]
|
|
437
|
+
};
|
|
438
|
+
loop {
|
|
439
|
+
match self.advance() {
|
|
440
|
+
None => return Err("Unterminated string".to_string()),
|
|
441
|
+
Some(c) if c == quote => break,
|
|
442
|
+
Some('\\') => s.push(self.handle_escape(extra)?),
|
|
443
|
+
Some(c) => s.push(c),
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
Ok(s)
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
fn read_ident_or_keyword(&mut self, first: char) -> String {
|
|
450
|
+
let mut s = String::with_capacity(16);
|
|
451
|
+
s.push(first);
|
|
452
|
+
while let Some(c) = self.peek() {
|
|
453
|
+
if c.is_ascii_alphanumeric() || c == '_' {
|
|
454
|
+
s.push(c);
|
|
455
|
+
self.advance();
|
|
456
|
+
} else {
|
|
457
|
+
break;
|
|
458
|
+
}
|
|
459
|
+
}
|
|
460
|
+
s
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
/// Read a template literal. If `is_continuation` is true, we're continuing after a `}`.
|
|
464
|
+
fn read_template(
|
|
465
|
+
&mut self,
|
|
466
|
+
start: (usize, usize),
|
|
467
|
+
is_continuation: bool,
|
|
468
|
+
) -> Result<Option<Token>, String> {
|
|
469
|
+
let mut s = String::with_capacity(if is_continuation { 32 } else { 64 });
|
|
470
|
+
let extra = &['`', '$', '{'][..];
|
|
471
|
+
|
|
472
|
+
loop {
|
|
473
|
+
match self.advance() {
|
|
474
|
+
None => return Err("Unterminated template literal".to_string()),
|
|
475
|
+
Some('`') => {
|
|
476
|
+
let end = self.span_start();
|
|
477
|
+
let kind = if is_continuation {
|
|
478
|
+
TokenKind::TemplateTail
|
|
479
|
+
} else {
|
|
480
|
+
TokenKind::TemplateNoSub
|
|
481
|
+
};
|
|
482
|
+
return Ok(Some(Token {
|
|
483
|
+
kind,
|
|
484
|
+
span: Span { start, end },
|
|
485
|
+
literal: Some(s.into()),
|
|
486
|
+
}));
|
|
487
|
+
}
|
|
488
|
+
Some('$') if self.peek() == Some('{') => {
|
|
489
|
+
self.advance();
|
|
490
|
+
self.template_brace_stack.push(1);
|
|
491
|
+
let end = self.span_start();
|
|
492
|
+
let kind = if is_continuation {
|
|
493
|
+
TokenKind::TemplateMiddle
|
|
494
|
+
} else {
|
|
495
|
+
TokenKind::TemplateHead
|
|
496
|
+
};
|
|
497
|
+
return Ok(Some(Token {
|
|
498
|
+
kind,
|
|
499
|
+
span: Span { start, end },
|
|
500
|
+
literal: Some(s.into()),
|
|
501
|
+
}));
|
|
502
|
+
}
|
|
503
|
+
Some('\\') => s.push(self.handle_escape(extra)?),
|
|
504
|
+
Some(c) => s.push(c),
|
|
505
|
+
}
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
fn emit_indent_or_dedent(&mut self, level: usize) -> Option<Token> {
|
|
510
|
+
let top = *self.indent_stack.last().unwrap();
|
|
511
|
+
let start = self.span_start();
|
|
512
|
+
|
|
513
|
+
if level > top {
|
|
514
|
+
self.indent_stack.push(level);
|
|
515
|
+
Some(Token {
|
|
516
|
+
kind: TokenKind::Indent,
|
|
517
|
+
span: Span { start, end: start },
|
|
518
|
+
literal: None,
|
|
519
|
+
})
|
|
520
|
+
} else if level < top {
|
|
521
|
+
while self.indent_stack.len() > 1 && *self.indent_stack.last().unwrap() > level {
|
|
522
|
+
self.indent_stack.pop();
|
|
523
|
+
self.pending_dedents.push_back(Token {
|
|
524
|
+
kind: TokenKind::Dedent,
|
|
525
|
+
span: Span { start, end: start },
|
|
526
|
+
literal: None,
|
|
527
|
+
});
|
|
528
|
+
}
|
|
529
|
+
if *self.indent_stack.last().unwrap_or(&0) != level {
|
|
530
|
+
self.indent_stack.push(level);
|
|
531
|
+
}
|
|
532
|
+
self.pending_dedents.pop_front()
|
|
533
|
+
} else {
|
|
534
|
+
None
|
|
535
|
+
}
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
pub fn next_token(&mut self) -> Result<Option<Token>, String> {
|
|
539
|
+
let tok = self.next_token_inner()?;
|
|
540
|
+
if let Some(t) = &tok {
|
|
541
|
+
self.last_significant_kind = Some(t.kind);
|
|
542
|
+
}
|
|
543
|
+
Ok(tok)
|
|
544
|
+
}
|
|
545
|
+
|
|
546
|
+
fn next_token_inner(&mut self) -> Result<Option<Token>, String> {
|
|
547
|
+
if let Some(tok) = self.pending_dedents.pop_front() {
|
|
548
|
+
return Ok(Some(tok));
|
|
549
|
+
}
|
|
550
|
+
|
|
551
|
+
if self.jsx_after_gt {
|
|
552
|
+
self.jsx_after_gt = false;
|
|
553
|
+
if !matches!(self.peek(), Some('{') | Some('<') | None) {
|
|
554
|
+
let start = self.span_start();
|
|
555
|
+
if let Some(tok) = self.read_jsx_text(start)? {
|
|
556
|
+
return Ok(Some(tok));
|
|
557
|
+
}
|
|
558
|
+
}
|
|
559
|
+
}
|
|
560
|
+
|
|
561
|
+
if self.at_line_start {
|
|
562
|
+
self.at_line_start = false;
|
|
563
|
+
// Always consume the leading whitespace; only *emit* Indent/Dedent when indentation
|
|
564
|
+
// is significant. With `ignore_indent`, the level is discarded so the indent stack
|
|
565
|
+
// stays at `[0]` and no virtual tokens are produced (brace-only blocks).
|
|
566
|
+
let level = self.read_indent_level();
|
|
567
|
+
if !self.ignore_indent
|
|
568
|
+
&& (level > 0 || self.peek().map(|c| c != '\n').unwrap_or(false))
|
|
569
|
+
{
|
|
570
|
+
if let Some(tok) = self.emit_indent_or_dedent(level) {
|
|
571
|
+
return Ok(Some(tok));
|
|
572
|
+
}
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
|
|
576
|
+
self.skip_whitespace();
|
|
577
|
+
if self.at_line_start {
|
|
578
|
+
return self.next_token();
|
|
579
|
+
}
|
|
580
|
+
|
|
581
|
+
let start = self.span_start();
|
|
582
|
+
let c = match self.advance() {
|
|
583
|
+
Some(c) => c,
|
|
584
|
+
None => {
|
|
585
|
+
if let Some(tok) = self.pending_dedents.pop_front() {
|
|
586
|
+
return Ok(Some(tok));
|
|
587
|
+
}
|
|
588
|
+
if self.indent_stack.len() > 1 {
|
|
589
|
+
self.indent_stack.pop();
|
|
590
|
+
return Ok(Some(Token {
|
|
591
|
+
kind: TokenKind::Dedent,
|
|
592
|
+
span: Span {
|
|
593
|
+
start: (self.line, self.col),
|
|
594
|
+
end: (self.line, self.col),
|
|
595
|
+
},
|
|
596
|
+
literal: None,
|
|
597
|
+
}));
|
|
598
|
+
}
|
|
599
|
+
return Ok(None);
|
|
600
|
+
}
|
|
601
|
+
};
|
|
602
|
+
|
|
603
|
+
let kind = match c {
|
|
604
|
+
'(' => TokenKind::LParen,
|
|
605
|
+
')' => TokenKind::RParen,
|
|
606
|
+
'{' => {
|
|
607
|
+
if self.jsx_in_opening_tag {
|
|
608
|
+
if let Some(top) = self.jsx_stack.last_mut() {
|
|
609
|
+
top.attr_value_braces += 1;
|
|
610
|
+
}
|
|
611
|
+
} else if self.jsx_depth > 0 {
|
|
612
|
+
self.jsx_child_brace_depth += 1;
|
|
613
|
+
}
|
|
614
|
+
if let Some(depth) = self.template_brace_stack.last_mut() {
|
|
615
|
+
*depth += 1;
|
|
616
|
+
}
|
|
617
|
+
TokenKind::LBrace
|
|
618
|
+
}
|
|
619
|
+
'}' => {
|
|
620
|
+
let mut handled = false;
|
|
621
|
+
if let Some(top) = self.jsx_stack.last() {
|
|
622
|
+
if top.in_opener && top.attr_value_braces > 0 {
|
|
623
|
+
if let Some(top) = self.jsx_stack.last_mut() {
|
|
624
|
+
top.attr_value_braces -= 1;
|
|
625
|
+
}
|
|
626
|
+
handled = true;
|
|
627
|
+
}
|
|
628
|
+
}
|
|
629
|
+
if !handled && self.jsx_child_brace_depth > 0 {
|
|
630
|
+
self.jsx_child_brace_depth -= 1;
|
|
631
|
+
if self.jsx_child_brace_depth == 0 {
|
|
632
|
+
self.jsx_after_gt = true;
|
|
633
|
+
}
|
|
634
|
+
}
|
|
635
|
+
if let Some(depth) = self.template_brace_stack.last_mut() {
|
|
636
|
+
*depth -= 1;
|
|
637
|
+
if *depth == 0 {
|
|
638
|
+
self.template_brace_stack.pop();
|
|
639
|
+
return self.read_template(start, true);
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
TokenKind::RBrace
|
|
643
|
+
}
|
|
644
|
+
'[' => TokenKind::LBracket,
|
|
645
|
+
']' => TokenKind::RBracket,
|
|
646
|
+
';' => TokenKind::Semicolon,
|
|
647
|
+
',' => TokenKind::Comma,
|
|
648
|
+
'.' => {
|
|
649
|
+
if self.peek() == Some('?') {
|
|
650
|
+
self.advance();
|
|
651
|
+
TokenKind::OptionalChain
|
|
652
|
+
} else if self.peek() == Some('.') {
|
|
653
|
+
self.advance();
|
|
654
|
+
if self.peek() == Some('.') {
|
|
655
|
+
self.advance();
|
|
656
|
+
TokenKind::Spread
|
|
657
|
+
} else {
|
|
658
|
+
return Err("Unexpected .. (use ... for rest params)".to_string());
|
|
659
|
+
}
|
|
660
|
+
} else {
|
|
661
|
+
TokenKind::Dot
|
|
662
|
+
}
|
|
663
|
+
}
|
|
664
|
+
'=' => {
|
|
665
|
+
if self.peek() == Some('=') {
|
|
666
|
+
self.advance();
|
|
667
|
+
if self.peek() == Some('=') {
|
|
668
|
+
self.advance();
|
|
669
|
+
TokenKind::StrictEq
|
|
670
|
+
} else {
|
|
671
|
+
TokenKind::Eq
|
|
672
|
+
}
|
|
673
|
+
} else if self.peek() == Some('>') {
|
|
674
|
+
self.advance();
|
|
675
|
+
TokenKind::Arrow
|
|
676
|
+
} else {
|
|
677
|
+
TokenKind::Assign
|
|
678
|
+
}
|
|
679
|
+
}
|
|
680
|
+
'!' => {
|
|
681
|
+
if self.peek() == Some('=') {
|
|
682
|
+
self.advance();
|
|
683
|
+
if self.peek() == Some('=') {
|
|
684
|
+
self.advance();
|
|
685
|
+
TokenKind::StrictNe
|
|
686
|
+
} else {
|
|
687
|
+
TokenKind::Ne
|
|
688
|
+
}
|
|
689
|
+
} else {
|
|
690
|
+
TokenKind::Not
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
'<' => {
|
|
694
|
+
if self.peek() == Some('=') {
|
|
695
|
+
self.advance();
|
|
696
|
+
TokenKind::Le
|
|
697
|
+
} else if self.peek() == Some('<') {
|
|
698
|
+
self.advance();
|
|
699
|
+
TokenKind::Shl
|
|
700
|
+
} else if self.peek() == Some('/') {
|
|
701
|
+
self.jsx_in_closing_tag = true;
|
|
702
|
+
TokenKind::Lt
|
|
703
|
+
} else if (self.peek() == Some('>')
|
|
704
|
+
|| self
|
|
705
|
+
.peek()
|
|
706
|
+
.map(|c| c.is_ascii_alphabetic() || c == '_')
|
|
707
|
+
.unwrap_or(false))
|
|
708
|
+
&& !self.last_is_value()
|
|
709
|
+
{
|
|
710
|
+
// JSX open tag — only in expression position. After a value (`ident<`, `)<`,
|
|
711
|
+
// `]<`, literal) this is `Lt`: a comparison or generic-args opener.
|
|
712
|
+
self.jsx_depth += 1;
|
|
713
|
+
self.jsx_stack.push(JsxEl {
|
|
714
|
+
in_opener: true,
|
|
715
|
+
attr_value_braces: 0,
|
|
716
|
+
});
|
|
717
|
+
self.jsx_in_opening_tag = true;
|
|
718
|
+
TokenKind::Lt
|
|
719
|
+
} else {
|
|
720
|
+
TokenKind::Lt
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
'>' => {
|
|
724
|
+
if self.peek() == Some('=') {
|
|
725
|
+
self.advance();
|
|
726
|
+
TokenKind::Ge
|
|
727
|
+
} else if self.peek() == Some('>') {
|
|
728
|
+
self.advance();
|
|
729
|
+
if self.peek() == Some('>') {
|
|
730
|
+
self.advance();
|
|
731
|
+
TokenKind::UShr // `>>>`
|
|
732
|
+
} else {
|
|
733
|
+
TokenKind::Shr
|
|
734
|
+
}
|
|
735
|
+
} else {
|
|
736
|
+
if self.jsx_in_closing_tag
|
|
737
|
+
|| (self.jsx_in_opening_tag && self.jsx_saw_slash_before_gt)
|
|
738
|
+
{
|
|
739
|
+
self.jsx_depth = (self.jsx_depth - 1).max(0);
|
|
740
|
+
self.jsx_stack.pop();
|
|
741
|
+
self.jsx_sync_in_opening_tag();
|
|
742
|
+
// A child element just closed (`</span>` or `<br/>`). If a parent element
|
|
743
|
+
// is still open and past its opening tag, we're back in that parent's
|
|
744
|
+
// children region, so the following run is JSX text — re-enter text mode.
|
|
745
|
+
// Without this, trailing text after a child element ("… as JSON") is lexed
|
|
746
|
+
// as code and a bare keyword (`as`, `in`, `if`, …) breaks the parse (#108).
|
|
747
|
+
//
|
|
748
|
+
// Guard on `jsx_child_brace_depth == 0`: if the closed element lived inside a
|
|
749
|
+
// `{…}` expression container (e.g. `<div>{items.map(x => <span/>)}</div>`),
|
|
750
|
+
// we're still in that expression, not the parent's text children — entering
|
|
751
|
+
// text mode there would swallow the following `)`/`,` as JsxText.
|
|
752
|
+
if self.jsx_child_brace_depth == 0
|
|
753
|
+
&& self.jsx_stack.last().map(|e| !e.in_opener).unwrap_or(false)
|
|
754
|
+
{
|
|
755
|
+
self.jsx_after_gt = true;
|
|
756
|
+
}
|
|
757
|
+
} else if let Some(top) = self.jsx_stack.last_mut() {
|
|
758
|
+
if top.in_opener && top.attr_value_braces > 0 {
|
|
759
|
+
// `>` is a comparison (or shift) token inside `{ ... }`, not end of opening tag.
|
|
760
|
+
} else if top.in_opener && !self.jsx_saw_slash_before_gt {
|
|
761
|
+
top.in_opener = false;
|
|
762
|
+
self.jsx_after_gt = true;
|
|
763
|
+
self.jsx_sync_in_opening_tag();
|
|
764
|
+
}
|
|
765
|
+
}
|
|
766
|
+
self.jsx_in_closing_tag = false;
|
|
767
|
+
self.jsx_saw_slash_before_gt = false;
|
|
768
|
+
TokenKind::Gt
|
|
769
|
+
}
|
|
770
|
+
}
|
|
771
|
+
'^' => TokenKind::BitXor,
|
|
772
|
+
'~' => TokenKind::BitNot,
|
|
773
|
+
'+' => {
|
|
774
|
+
if self.peek() == Some('+') {
|
|
775
|
+
self.advance();
|
|
776
|
+
TokenKind::PlusPlus
|
|
777
|
+
} else if self.peek() == Some('=') {
|
|
778
|
+
self.advance();
|
|
779
|
+
TokenKind::PlusAssign
|
|
780
|
+
} else {
|
|
781
|
+
TokenKind::Plus
|
|
782
|
+
}
|
|
783
|
+
}
|
|
784
|
+
'-' => {
|
|
785
|
+
if self.peek() == Some('-') {
|
|
786
|
+
self.advance();
|
|
787
|
+
TokenKind::MinusMinus
|
|
788
|
+
} else if self.peek() == Some('=') {
|
|
789
|
+
self.advance();
|
|
790
|
+
TokenKind::MinusAssign
|
|
791
|
+
} else {
|
|
792
|
+
TokenKind::Minus
|
|
793
|
+
}
|
|
794
|
+
}
|
|
795
|
+
'*' => {
|
|
796
|
+
if self.peek() == Some('*') {
|
|
797
|
+
self.advance();
|
|
798
|
+
TokenKind::StarStar
|
|
799
|
+
} else if self.peek() == Some('=') {
|
|
800
|
+
self.advance();
|
|
801
|
+
TokenKind::StarAssign
|
|
802
|
+
} else {
|
|
803
|
+
TokenKind::Star
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
'/' => {
|
|
807
|
+
if self.peek() == Some('/') {
|
|
808
|
+
self.advance();
|
|
809
|
+
self.skip_line_comment();
|
|
810
|
+
// `skip_line_comment` consumes the newline via `advance()`, which sets
|
|
811
|
+
// `at_line_start` before we would normally run `skip_whitespace()`. Without
|
|
812
|
+
// stripping the next line's leading spaces here, `read_indent_level` would see
|
|
813
|
+
// physical indentation and emit a spurious `Indent` (breaks e.g. object
|
|
814
|
+
// literals with trailing `//` comments). Newlines handled in `skip_whitespace`
|
|
815
|
+
// eat those spaces before the indent pass; match that behavior.
|
|
816
|
+
self.skip_whitespace();
|
|
817
|
+
return self.next_token();
|
|
818
|
+
} else if self.peek() == Some('*') {
|
|
819
|
+
self.advance();
|
|
820
|
+
self.skip_block_comment()?;
|
|
821
|
+
return self.next_token();
|
|
822
|
+
} else if self.peek() == Some('=') {
|
|
823
|
+
self.advance();
|
|
824
|
+
TokenKind::SlashAssign
|
|
825
|
+
} else {
|
|
826
|
+
if self.jsx_in_opening_tag {
|
|
827
|
+
self.jsx_saw_slash_before_gt = true;
|
|
828
|
+
}
|
|
829
|
+
TokenKind::Slash
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
'%' => {
|
|
833
|
+
if self.peek() == Some('=') {
|
|
834
|
+
self.advance();
|
|
835
|
+
TokenKind::PercentAssign
|
|
836
|
+
} else {
|
|
837
|
+
TokenKind::Percent
|
|
838
|
+
}
|
|
839
|
+
}
|
|
840
|
+
'&' => {
|
|
841
|
+
if self.peek() == Some('&') {
|
|
842
|
+
self.advance();
|
|
843
|
+
if self.peek() == Some('=') {
|
|
844
|
+
self.advance();
|
|
845
|
+
TokenKind::AndAndAssign
|
|
846
|
+
} else {
|
|
847
|
+
TokenKind::And
|
|
848
|
+
}
|
|
849
|
+
} else {
|
|
850
|
+
TokenKind::BitAnd
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
'|' => {
|
|
854
|
+
if self.peek() == Some('|') {
|
|
855
|
+
self.advance();
|
|
856
|
+
if self.peek() == Some('=') {
|
|
857
|
+
self.advance();
|
|
858
|
+
TokenKind::OrOrAssign
|
|
859
|
+
} else {
|
|
860
|
+
TokenKind::Or
|
|
861
|
+
}
|
|
862
|
+
} else {
|
|
863
|
+
TokenKind::BitOr
|
|
864
|
+
}
|
|
865
|
+
}
|
|
866
|
+
'?' => {
|
|
867
|
+
if self.peek() == Some('?') {
|
|
868
|
+
self.advance();
|
|
869
|
+
if self.peek() == Some('=') {
|
|
870
|
+
self.advance();
|
|
871
|
+
TokenKind::NullishAssign
|
|
872
|
+
} else {
|
|
873
|
+
TokenKind::NullishCoalesce
|
|
874
|
+
}
|
|
875
|
+
} else if self.peek() == Some('.') {
|
|
876
|
+
self.advance();
|
|
877
|
+
TokenKind::OptionalChain
|
|
878
|
+
} else {
|
|
879
|
+
TokenKind::Question
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
':' => TokenKind::Colon,
|
|
883
|
+
'"' | '\'' => {
|
|
884
|
+
let s = self.read_string(c)?;
|
|
885
|
+
let end = self.span_start();
|
|
886
|
+
return Ok(Some(Token {
|
|
887
|
+
kind: TokenKind::String,
|
|
888
|
+
span: Span { start, end },
|
|
889
|
+
literal: Some(s.into()),
|
|
890
|
+
}));
|
|
891
|
+
}
|
|
892
|
+
'`' => return self.read_template(start, false),
|
|
893
|
+
'0'..='9' => {
|
|
894
|
+
let num = self.read_number(c);
|
|
895
|
+
let end = self.span_start();
|
|
896
|
+
return Ok(Some(Token {
|
|
897
|
+
kind: TokenKind::Number,
|
|
898
|
+
span: Span { start, end },
|
|
899
|
+
literal: Some(num.into()),
|
|
900
|
+
}));
|
|
901
|
+
}
|
|
902
|
+
'a'..='z' | 'A'..='Z' | '_' => {
|
|
903
|
+
let ident = self.read_ident_or_keyword(c);
|
|
904
|
+
let end = self.span_start();
|
|
905
|
+
let kind = TokenKind::keyword_or_ident(&ident);
|
|
906
|
+
return Ok(Some(Token {
|
|
907
|
+
kind,
|
|
908
|
+
span: Span { start, end },
|
|
909
|
+
// Spelling is useful for keywords too (e.g. object keys, type names like `type`).
|
|
910
|
+
literal: Some(ident.into()),
|
|
911
|
+
}));
|
|
912
|
+
}
|
|
913
|
+
'\n' => {
|
|
914
|
+
self.at_line_start = true;
|
|
915
|
+
return self.next_token();
|
|
916
|
+
}
|
|
917
|
+
_ => return Err(format!("Unexpected character: {:?}", c)),
|
|
918
|
+
};
|
|
919
|
+
|
|
920
|
+
let end = self.span_start();
|
|
921
|
+
Ok(Some(Token {
|
|
922
|
+
kind,
|
|
923
|
+
span: Span { start, end },
|
|
924
|
+
literal: None,
|
|
925
|
+
}))
|
|
926
|
+
}
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
impl<'a> Iterator for Lexer<'a> {
|
|
930
|
+
type Item = Result<Token, String>;
|
|
931
|
+
|
|
932
|
+
fn next(&mut self) -> Option<Self::Item> {
|
|
933
|
+
match self.next_token() {
|
|
934
|
+
Ok(Some(t)) => Some(Ok(t)),
|
|
935
|
+
Ok(None) => None,
|
|
936
|
+
Err(e) => Some(Err(e)),
|
|
937
|
+
}
|
|
938
|
+
}
|
|
939
|
+
}
|
|
940
|
+
|
|
941
|
+
#[cfg(test)]
|
|
942
|
+
mod tests {
|
|
943
|
+
use super::*;
|
|
944
|
+
|
|
945
|
+
#[test]
|
|
946
|
+
fn test_string_literal() {
|
|
947
|
+
let tokens: Vec<_> = Lexer::new(r#""H""#).collect();
|
|
948
|
+
let tokens: Result<Vec<_>, _> = tokens.into_iter().collect();
|
|
949
|
+
let tokens = tokens.unwrap();
|
|
950
|
+
assert_eq!(tokens.len(), 1);
|
|
951
|
+
assert_eq!(tokens[0].kind, TokenKind::String);
|
|
952
|
+
assert_eq!(tokens[0].literal.as_deref(), Some("H"));
|
|
953
|
+
}
|
|
954
|
+
|
|
955
|
+
#[test]
|
|
956
|
+
fn test_print_string() {
|
|
957
|
+
let tokens: Vec<_> = Lexer::new(r#"print("H")"#).collect();
|
|
958
|
+
let tokens: Result<Vec<_>, _> = tokens.into_iter().collect();
|
|
959
|
+
let tokens = tokens.unwrap();
|
|
960
|
+
let string_tok = tokens.iter().find(|t| t.kind == TokenKind::String).unwrap();
|
|
961
|
+
assert_eq!(string_tok.literal.as_deref(), Some("H"));
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
#[test]
|
|
965
|
+
fn radix_integer_literals() {
|
|
966
|
+
// Hex / octal / binary prefixes (any case) convert to a decimal `Number` literal,
|
|
967
|
+
// honoring `_` digit separators.
|
|
968
|
+
let cases = [
|
|
969
|
+
("0xff", "255"),
|
|
970
|
+
("0xFF", "255"),
|
|
971
|
+
("0X1a", "26"),
|
|
972
|
+
("0o17", "15"),
|
|
973
|
+
("0O7", "7"),
|
|
974
|
+
("0b1010", "10"),
|
|
975
|
+
("0B0", "0"),
|
|
976
|
+
("0xdeadbeef", "3735928559"),
|
|
977
|
+
("0xFF_FF", "65535"),
|
|
978
|
+
("0b1111_0000", "240"),
|
|
979
|
+
];
|
|
980
|
+
for (src, expected) in cases {
|
|
981
|
+
let tokens = Lexer::new(src).collect::<Result<Vec<_>, _>>().unwrap();
|
|
982
|
+
let num = tokens
|
|
983
|
+
.iter()
|
|
984
|
+
.find(|t| t.kind == TokenKind::Number)
|
|
985
|
+
.unwrap_or_else(|| panic!("no Number token for {src}"));
|
|
986
|
+
assert_eq!(num.literal.as_deref(), Some(expected), "for {src}");
|
|
987
|
+
}
|
|
988
|
+
}
|
|
989
|
+
|
|
990
|
+
#[test]
|
|
991
|
+
fn decimal_numeric_separators() {
|
|
992
|
+
// `_` between digits is a JS numeric separator: dropped from the literal value.
|
|
993
|
+
// Issue #57.
|
|
994
|
+
let only_number = |src: &str| -> String {
|
|
995
|
+
let tokens = Lexer::new(src).collect::<Result<Vec<_>, _>>().unwrap();
|
|
996
|
+
let nums: Vec<_> = tokens
|
|
997
|
+
.iter()
|
|
998
|
+
.filter(|t| t.kind == TokenKind::Number)
|
|
999
|
+
.collect();
|
|
1000
|
+
assert_eq!(nums.len(), 1, "expected exactly one Number token for {src}");
|
|
1001
|
+
// No stray identifier should be produced from the separated digits.
|
|
1002
|
+
assert!(
|
|
1003
|
+
!tokens.iter().any(|t| t.kind == TokenKind::Ident),
|
|
1004
|
+
"unexpected Ident token while lexing {src}"
|
|
1005
|
+
);
|
|
1006
|
+
nums[0].literal.as_deref().unwrap().to_string()
|
|
1007
|
+
};
|
|
1008
|
+
assert_eq!(only_number("15_000"), "15000");
|
|
1009
|
+
assert_eq!(only_number("1_000_000"), "1000000");
|
|
1010
|
+
assert_eq!(only_number("3.14_159"), "3.14159");
|
|
1011
|
+
assert_eq!(only_number("1e1_0"), "1e10");
|
|
1012
|
+
}
|
|
1013
|
+
|
|
1014
|
+
#[test]
|
|
1015
|
+
fn non_radix_zero_prefixed_stays_decimal() {
|
|
1016
|
+
// A leading zero is NOT legacy octal; an invalid prefix is not a radix literal.
|
|
1017
|
+
let num_literal = |src: &str| -> String {
|
|
1018
|
+
Lexer::new(src)
|
|
1019
|
+
.collect::<Result<Vec<_>, _>>()
|
|
1020
|
+
.unwrap()
|
|
1021
|
+
.into_iter()
|
|
1022
|
+
.find(|t| t.kind == TokenKind::Number)
|
|
1023
|
+
.unwrap()
|
|
1024
|
+
.literal
|
|
1025
|
+
.as_deref()
|
|
1026
|
+
.unwrap()
|
|
1027
|
+
.to_string()
|
|
1028
|
+
};
|
|
1029
|
+
assert_eq!(num_literal("07"), "07"); // decimal, not octal
|
|
1030
|
+
assert_eq!(num_literal("0"), "0");
|
|
1031
|
+
// `0xZ` → the Number token is just `0`, then `xZ` lexes as an identifier.
|
|
1032
|
+
let toks = Lexer::new("0xZ").collect::<Result<Vec<_>, _>>().unwrap();
|
|
1033
|
+
assert_eq!(toks[0].kind, TokenKind::Number);
|
|
1034
|
+
assert_eq!(toks[0].literal.as_deref(), Some("0"));
|
|
1035
|
+
assert_eq!(toks[1].kind, TokenKind::Ident);
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
#[test]
|
|
1039
|
+
fn line_comment_does_not_emit_spurious_indent_before_next_line() {
|
|
1040
|
+
let with_comment = "fn f() {\n return {\n a: 1, // c\n b: 2\n }\n}\n";
|
|
1041
|
+
let tokens: Vec<_> = Lexer::new(with_comment)
|
|
1042
|
+
.collect::<Result<Vec<_>, _>>()
|
|
1043
|
+
.unwrap();
|
|
1044
|
+
assert!(
|
|
1045
|
+
!tokens.iter().any(|t| t.kind == TokenKind::Indent),
|
|
1046
|
+
"unexpected Indent after line comment: {:?}",
|
|
1047
|
+
tokens
|
|
1048
|
+
.iter()
|
|
1049
|
+
.map(|t| format!("{:?}", t.kind))
|
|
1050
|
+
.collect::<Vec<_>>()
|
|
1051
|
+
);
|
|
1052
|
+
}
|
|
1053
|
+
|
|
1054
|
+
/// A leading-indented line is what actually drives the lexer to emit virtual tokens:
|
|
1055
|
+
/// ` a()` opens an indent level (Indent) and the dedented `b()` closes it (Dedent).
|
|
1056
|
+
const INDENTED_SRC: &str = " a()\nb()\n";
|
|
1057
|
+
|
|
1058
|
+
#[test]
|
|
1059
|
+
fn default_options_still_emit_indent_and_dedent() {
|
|
1060
|
+
let tokens: Vec<_> = Lexer::with_options(INDENTED_SRC, LexerOptions::default())
|
|
1061
|
+
.collect::<Result<Vec<_>, _>>()
|
|
1062
|
+
.unwrap();
|
|
1063
|
+
assert!(
|
|
1064
|
+
tokens.iter().any(|t| t.kind == TokenKind::Indent),
|
|
1065
|
+
"expected an Indent token in the default (indentation-significant) mode"
|
|
1066
|
+
);
|
|
1067
|
+
assert!(
|
|
1068
|
+
tokens.iter().any(|t| t.kind == TokenKind::Dedent),
|
|
1069
|
+
"expected a Dedent token in the default (indentation-significant) mode"
|
|
1070
|
+
);
|
|
1071
|
+
}
|
|
1072
|
+
|
|
1073
|
+
#[test]
|
|
1074
|
+
fn ignore_indent_emits_no_virtual_tokens() {
|
|
1075
|
+
let tokens: Vec<_> =
|
|
1076
|
+
Lexer::with_options(INDENTED_SRC, LexerOptions { ignore_indent: true })
|
|
1077
|
+
.collect::<Result<Vec<_>, _>>()
|
|
1078
|
+
.unwrap();
|
|
1079
|
+
assert!(
|
|
1080
|
+
!tokens
|
|
1081
|
+
.iter()
|
|
1082
|
+
.any(|t| matches!(t.kind, TokenKind::Indent | TokenKind::Dedent)),
|
|
1083
|
+
"expected no Indent/Dedent with ignore_indent, got: {:?}",
|
|
1084
|
+
tokens.iter().map(|t| t.kind).collect::<Vec<_>>()
|
|
1085
|
+
);
|
|
1086
|
+
}
|
|
1087
|
+
|
|
1088
|
+
#[test]
|
|
1089
|
+
fn env_truthy_enables_only_on_recognized_values() {
|
|
1090
|
+
use std::ffi::OsString;
|
|
1091
|
+
let v = |s: &str| env_truthy(Some(OsString::from(s)));
|
|
1092
|
+
// Recognized truthy values turn the flag on.
|
|
1093
|
+
assert!(v("1"));
|
|
1094
|
+
assert!(v("true"));
|
|
1095
|
+
assert!(v("yes"));
|
|
1096
|
+
// Everything else leaves it off, including unset, empty, and near-misses.
|
|
1097
|
+
assert!(!env_truthy(None));
|
|
1098
|
+
assert!(!v(""));
|
|
1099
|
+
assert!(!v("0"));
|
|
1100
|
+
assert!(!v("false"));
|
|
1101
|
+
assert!(!v("no"));
|
|
1102
|
+
assert!(!v("TRUE")); // exact match only — case-sensitive by design
|
|
1103
|
+
}
|
|
1104
|
+
}
|