@tishlang/tish-format 1.0.12 → 2.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (189) hide show
  1. package/Cargo.toml +51 -0
  2. package/LICENSE +13 -0
  3. package/bin/tish-format +0 -0
  4. package/crates/js_to_tish/Cargo.toml +11 -0
  5. package/crates/js_to_tish/README.md +18 -0
  6. package/crates/js_to_tish/src/error.rs +55 -0
  7. package/crates/js_to_tish/src/lib.rs +11 -0
  8. package/crates/js_to_tish/src/span_util.rs +35 -0
  9. package/crates/js_to_tish/src/transform/expr.rs +611 -0
  10. package/crates/js_to_tish/src/transform/stmt.rs +503 -0
  11. package/crates/js_to_tish/src/transform.rs +60 -0
  12. package/crates/tish/Cargo.toml +62 -0
  13. package/crates/tish/build.rs +21 -0
  14. package/crates/tish/src/cargo_native_registry.rs +32 -0
  15. package/crates/tish/src/cli_help.rs +576 -0
  16. package/crates/tish/src/main.rs +853 -0
  17. package/crates/tish/src/repl_completion.rs +199 -0
  18. package/crates/tish/tests/cargo_example_compile.rs +67 -0
  19. package/crates/tish/tests/error_source_location.rs +36 -0
  20. package/crates/tish/tests/fixtures/cargo_example_project/Cargo.toml +3 -0
  21. package/crates/tish/tests/fixtures/cargo_example_project/crates/demo-shim/Cargo.toml +11 -0
  22. package/crates/tish/tests/fixtures/cargo_example_project/crates/demo-shim/src/lib.rs +12 -0
  23. package/crates/tish/tests/fixtures/cargo_example_project/package.json +10 -0
  24. package/crates/tish/tests/fixtures/cargo_example_project/src/main.tish +3 -0
  25. package/crates/tish/tests/fixtures/runtime_error_location.tish +5 -0
  26. package/crates/tish/tests/fixtures/trycatch_runtime_errors.tish +15 -0
  27. package/crates/tish/tests/fixtures/tty_capability.tish +9 -0
  28. package/crates/tish/tests/integration_test.rs +1406 -0
  29. package/crates/tish/tests/run_optimize_stdout_parity.rs +50 -0
  30. package/crates/tish/tests/shortcircuit.rs +65 -0
  31. package/crates/tish/tests/trycatch_runtime_errors.rs +45 -0
  32. package/crates/tish/tests/tty_capability.rs +43 -0
  33. package/crates/tish_ast/Cargo.toml +9 -0
  34. package/crates/tish_ast/src/ast.rs +649 -0
  35. package/crates/tish_ast/src/lib.rs +5 -0
  36. package/crates/tish_build_utils/Cargo.toml +11 -0
  37. package/crates/tish_build_utils/src/lib.rs +577 -0
  38. package/crates/tish_builtins/Cargo.toml +22 -0
  39. package/crates/tish_builtins/src/array.rs +803 -0
  40. package/crates/tish_builtins/src/collections.rs +481 -0
  41. package/crates/tish_builtins/src/construct.rs +199 -0
  42. package/crates/tish_builtins/src/date.rs +538 -0
  43. package/crates/tish_builtins/src/globals.rs +293 -0
  44. package/crates/tish_builtins/src/helpers.rs +35 -0
  45. package/crates/tish_builtins/src/iterator.rs +129 -0
  46. package/crates/tish_builtins/src/lib.rs +21 -0
  47. package/crates/tish_builtins/src/math.rs +89 -0
  48. package/crates/tish_builtins/src/number.rs +96 -0
  49. package/crates/tish_builtins/src/object.rs +36 -0
  50. package/crates/tish_builtins/src/string.rs +646 -0
  51. package/crates/tish_builtins/src/symbol.rs +83 -0
  52. package/crates/tish_builtins/src/typedarrays.rs +298 -0
  53. package/crates/tish_bytecode/Cargo.toml +17 -0
  54. package/crates/tish_bytecode/src/chunk.rs +164 -0
  55. package/crates/tish_bytecode/src/compiler.rs +2604 -0
  56. package/crates/tish_bytecode/src/encoding.rs +102 -0
  57. package/crates/tish_bytecode/src/lib.rs +20 -0
  58. package/crates/tish_bytecode/src/opcode.rs +185 -0
  59. package/crates/tish_bytecode/src/peephole.rs +189 -0
  60. package/crates/tish_bytecode/src/serialize.rs +193 -0
  61. package/crates/tish_bytecode/tests/break_continue_bytecode.rs +44 -0
  62. package/crates/tish_bytecode/tests/constant_folding.rs +84 -0
  63. package/crates/tish_bytecode/tests/sort_optimization.rs +31 -0
  64. package/crates/tish_compile/Cargo.toml +27 -0
  65. package/crates/tish_compile/src/check.rs +774 -0
  66. package/crates/tish_compile/src/codegen.rs +7317 -0
  67. package/crates/tish_compile/src/infer.rs +1681 -0
  68. package/crates/tish_compile/src/lib.rs +206 -0
  69. package/crates/tish_compile/src/resolve.rs +1951 -0
  70. package/crates/tish_compile/src/types.rs +605 -0
  71. package/crates/tish_compile_js/Cargo.toml +18 -0
  72. package/crates/tish_compile_js/examples/jsx_vdom_smoke.tish +8 -0
  73. package/crates/tish_compile_js/src/codegen.rs +938 -0
  74. package/crates/tish_compile_js/src/error.rs +20 -0
  75. package/crates/tish_compile_js/src/lib.rs +26 -0
  76. package/crates/tish_compile_js/src/tests_jsx.rs +414 -0
  77. package/crates/tish_compiler_wasm/Cargo.toml +21 -0
  78. package/crates/tish_compiler_wasm/src/lib.rs +57 -0
  79. package/crates/tish_compiler_wasm/src/resolve_virtual.rs +473 -0
  80. package/crates/tish_core/Cargo.toml +32 -0
  81. package/crates/tish_core/src/console_style.rs +170 -0
  82. package/crates/tish_core/src/json.rs +430 -0
  83. package/crates/tish_core/src/lib.rs +20 -0
  84. package/crates/tish_core/src/macros.rs +36 -0
  85. package/crates/tish_core/src/shape.rs +85 -0
  86. package/crates/tish_core/src/uri.rs +118 -0
  87. package/crates/tish_core/src/value.rs +1350 -0
  88. package/crates/tish_core/src/vmref.rs +183 -0
  89. package/crates/tish_cranelift/Cargo.toml +19 -0
  90. package/crates/tish_cranelift/src/lib.rs +43 -0
  91. package/crates/tish_cranelift/src/link.rs +130 -0
  92. package/crates/tish_cranelift/src/lower.rs +85 -0
  93. package/crates/tish_cranelift_runtime/Cargo.toml +26 -0
  94. package/crates/tish_cranelift_runtime/src/lib.rs +45 -0
  95. package/crates/tish_eval/Cargo.toml +51 -0
  96. package/crates/tish_eval/src/eval.rs +4265 -0
  97. package/crates/tish_eval/src/http.rs +191 -0
  98. package/crates/tish_eval/src/lib.rs +99 -0
  99. package/crates/tish_eval/src/natives.rs +551 -0
  100. package/crates/tish_eval/src/promise.rs +179 -0
  101. package/crates/tish_eval/src/regex.rs +299 -0
  102. package/crates/tish_eval/src/timers.rs +120 -0
  103. package/crates/tish_eval/src/value.rs +336 -0
  104. package/crates/tish_eval/src/value_convert.rs +117 -0
  105. package/crates/tish_ffi/Cargo.toml +26 -0
  106. package/crates/tish_ffi/src/lib.rs +518 -0
  107. package/crates/tish_ffi/tests/fixtures/testmod/Cargo.toml +18 -0
  108. package/crates/tish_ffi/tests/fixtures/testmod/src/lib.rs +46 -0
  109. package/crates/tish_ffi/tests/loader.rs +65 -0
  110. package/crates/tish_fmt/Cargo.toml +16 -0
  111. package/crates/tish_fmt/src/bin/tish-fmt.rs +41 -0
  112. package/crates/tish_fmt/src/lib.rs +2157 -0
  113. package/crates/tish_jsx_web/Cargo.toml +9 -0
  114. package/crates/tish_jsx_web/README.md +5 -0
  115. package/crates/tish_jsx_web/src/lib.rs +2 -0
  116. package/crates/tish_lexer/Cargo.toml +9 -0
  117. package/crates/tish_lexer/src/lib.rs +1104 -0
  118. package/crates/tish_lexer/src/token.rs +170 -0
  119. package/crates/tish_lint/Cargo.toml +18 -0
  120. package/crates/tish_lint/src/bin/tish-lint.rs +195 -0
  121. package/crates/tish_lint/src/lib.rs +281 -0
  122. package/crates/tish_llvm/Cargo.toml +13 -0
  123. package/crates/tish_llvm/src/lib.rs +115 -0
  124. package/crates/tish_lsp/Cargo.toml +25 -0
  125. package/crates/tish_lsp/README.md +26 -0
  126. package/crates/tish_lsp/src/builtin_goto.rs +362 -0
  127. package/crates/tish_lsp/src/import_goto.rs +564 -0
  128. package/crates/tish_lsp/src/main.rs +1459 -0
  129. package/crates/tish_native/Cargo.toml +16 -0
  130. package/crates/tish_native/src/build.rs +481 -0
  131. package/crates/tish_native/src/config.rs +48 -0
  132. package/crates/tish_native/src/lib.rs +416 -0
  133. package/crates/tish_opt/Cargo.toml +13 -0
  134. package/crates/tish_opt/src/lib.rs +1046 -0
  135. package/crates/tish_parser/Cargo.toml +11 -0
  136. package/crates/tish_parser/src/lib.rs +386 -0
  137. package/crates/tish_parser/src/parser.rs +2726 -0
  138. package/crates/tish_pg/Cargo.toml +34 -0
  139. package/crates/tish_pg/README.md +38 -0
  140. package/crates/tish_pg/src/error.rs +52 -0
  141. package/crates/tish_pg/src/lib.rs +955 -0
  142. package/crates/tish_resolve/Cargo.toml +13 -0
  143. package/crates/tish_resolve/src/lib.rs +3601 -0
  144. package/crates/tish_resolve/src/pos.rs +141 -0
  145. package/crates/tish_runtime/Cargo.toml +100 -0
  146. package/crates/tish_runtime/src/http.rs +1347 -0
  147. package/crates/tish_runtime/src/http_fetch.rs +492 -0
  148. package/crates/tish_runtime/src/http_hyper.rs +441 -0
  149. package/crates/tish_runtime/src/http_prefork.rs +189 -0
  150. package/crates/tish_runtime/src/lib.rs +1447 -0
  151. package/crates/tish_runtime/src/native_promise.rs +15 -0
  152. package/crates/tish_runtime/src/promise.rs +558 -0
  153. package/crates/tish_runtime/src/promise_io.rs +38 -0
  154. package/crates/tish_runtime/src/timers.rs +172 -0
  155. package/crates/tish_runtime/src/tty.rs +226 -0
  156. package/crates/tish_runtime/src/ws.rs +778 -0
  157. package/crates/tish_runtime/tests/fetch_readable_stream.rs +102 -0
  158. package/crates/tish_ui/Cargo.toml +17 -0
  159. package/crates/tish_ui/src/jsx.rs +692 -0
  160. package/crates/tish_ui/src/lib.rs +20 -0
  161. package/crates/tish_ui/src/runtime/hooks.rs +573 -0
  162. package/crates/tish_ui/src/runtime/mod.rs +183 -0
  163. package/crates/tish_vm/Cargo.toml +60 -0
  164. package/crates/tish_vm/src/jit.rs +1050 -0
  165. package/crates/tish_vm/src/lib.rs +41 -0
  166. package/crates/tish_vm/src/vm.rs +3536 -0
  167. package/crates/tish_vm/tests/concurrent_shared_state.rs +140 -0
  168. package/crates/tish_vm/tests/fixtures/or_string_cmd.tish +2 -0
  169. package/crates/tish_vm/tests/lexical_scope_declare.rs +34 -0
  170. package/crates/tish_vm/tests/peephole_jump_chain_logical_or.rs +150 -0
  171. package/crates/tish_wasm/Cargo.toml +15 -0
  172. package/crates/tish_wasm/src/lib.rs +428 -0
  173. package/crates/tish_wasm_runtime/Cargo.toml +37 -0
  174. package/crates/tish_wasm_runtime/src/gpu.rs +429 -0
  175. package/crates/tish_wasm_runtime/src/lib.rs +42 -0
  176. package/crates/tishlang_cargo_bindgen/Cargo.toml +26 -0
  177. package/crates/tishlang_cargo_bindgen/src/classify.rs +261 -0
  178. package/crates/tishlang_cargo_bindgen/src/discover.rs +125 -0
  179. package/crates/tishlang_cargo_bindgen/src/infer.rs +382 -0
  180. package/crates/tishlang_cargo_bindgen/src/lib.rs +349 -0
  181. package/crates/tishlang_cargo_bindgen/src/main.rs +167 -0
  182. package/crates/tishlang_cargo_bindgen/src/metadata.rs +117 -0
  183. package/justfile +276 -0
  184. package/package.json +2 -2
  185. package/platform/darwin-arm64/tish-fmt +0 -0
  186. package/platform/darwin-x64/tish-fmt +0 -0
  187. package/platform/linux-arm64/tish-fmt +0 -0
  188. package/platform/linux-x64/tish-fmt +0 -0
  189. package/platform/win32-x64/tish-fmt.exe +0 -0
@@ -0,0 +1,1104 @@
1
+ //! Tish lexer with indent normalization and tab/space handling.
2
+ //!
3
+ //! Normalizes tabs and spaces to a single indent level so both styles work.
4
+ //! Emits virtual Indent/Dedent tokens for optional-brace blocks.
5
+
6
+ mod token;
7
+
8
+ pub use token::{Span, Token, TokenKind};
9
+
10
+ use std::collections::VecDeque;
11
+ use std::iter::Peekable;
12
+ use std::str::Chars;
13
+
14
+ const INDENT_WIDTH: usize = 2;
15
+ const TAB_AS_LEVELS: usize = 1;
16
+
17
+ /// One JSX element on the stack: tracks whether we are still in its opening tag (`<Tag ...`)
18
+ /// and how many `{` are open inside that element's **attribute values** (embedded JS).
19
+ /// This lets `>` be a comparison operator inside `{...}` while still closing `<span>` when
20
+ /// `attr_value_braces == 0` for the innermost element (React-like).
21
+ #[derive(Debug, Clone)]
22
+ struct JsxEl {
23
+ in_opener: bool,
24
+ attr_value_braces: i32,
25
+ }
26
+
27
+ /// Lexer configuration.
28
+ #[derive(Debug, Clone, Copy, Default)]
29
+ pub struct LexerOptions {
30
+ /// When true, suppress the virtual `Indent`/`Dedent` tokens so blocks are delimited
31
+ /// **only** by braces. Indentation is treated as ordinary whitespace, so off-side
32
+ /// (brace-less) blocks no longer form. Useful for debugging how nested blocks
33
+ /// transpile — see the `TISH_IGNORE_INDENT` environment variable for a global toggle.
34
+ pub ignore_indent: bool,
35
+ }
36
+
37
+ impl LexerOptions {
38
+ /// Build options from the environment. `TISH_IGNORE_INDENT=1` (or `true`/`yes`) sets
39
+ /// `ignore_indent`, so every parse path (run/build/dump-ast/fmt/lint/lsp) honors it
40
+ /// without threading a flag through the whole pipeline.
41
+ pub fn from_env() -> Self {
42
+ Self {
43
+ ignore_indent: env_truthy(std::env::var_os("TISH_IGNORE_INDENT")),
44
+ }
45
+ }
46
+ }
47
+
48
+ /// Interpret an environment-variable value as a boolean flag: `1`, `true`, or `yes`
49
+ /// (exact, case-sensitive) enable it; anything else — including unset — leaves it off.
50
+ /// Split out from the `std::env` read so the rule is unit-testable without mutating
51
+ /// process-global state (which `Lexer::new` reads, so env-mutating tests would race).
52
+ fn env_truthy(value: Option<std::ffi::OsString>) -> bool {
53
+ value
54
+ .map(|v| v == "1" || v == "true" || v == "yes")
55
+ .unwrap_or(false)
56
+ }
57
+
58
+ #[derive(Debug, Clone)]
59
+ pub struct Lexer<'a> {
60
+ chars: Peekable<Chars<'a>>,
61
+ pos: usize,
62
+ line: usize,
63
+ col: usize,
64
+ indent_stack: Vec<usize>,
65
+ at_line_start: bool,
66
+ pending_dedents: VecDeque<Token>,
67
+ template_brace_stack: Vec<usize>,
68
+ jsx_after_gt: bool,
69
+ jsx_in_opening_tag: bool,
70
+ jsx_saw_slash_before_gt: bool,
71
+ jsx_stack: Vec<JsxEl>,
72
+ jsx_depth: i32,
73
+ jsx_child_brace_depth: i32,
74
+ jsx_in_closing_tag: bool,
75
+ ignore_indent: bool,
76
+ /// Kind of the last emitted significant token, for `<` disambiguation: after a *value* position
77
+ /// (ident, `)`, `]`, literal) a `<` is a comparison / generic-args opener (`Lt`), never a JSX tag.
78
+ last_significant_kind: Option<TokenKind>,
79
+ }
80
+
81
+ impl<'a> Lexer<'a> {
82
+ /// Create a lexer, reading options from the environment (e.g. `TISH_IGNORE_INDENT`).
83
+ pub fn new(source: &'a str) -> Self {
84
+ Self::with_options(source, LexerOptions::from_env())
85
+ }
86
+
87
+ /// Create a lexer with explicit options, bypassing the environment.
88
+ pub fn with_options(source: &'a str, options: LexerOptions) -> Self {
89
+ Self {
90
+ chars: source.chars().peekable(),
91
+ pos: 0,
92
+ line: 1,
93
+ col: 1,
94
+ indent_stack: vec![0],
95
+ at_line_start: true,
96
+ pending_dedents: VecDeque::new(),
97
+ template_brace_stack: Vec::new(),
98
+ jsx_after_gt: false,
99
+ jsx_in_opening_tag: false,
100
+ jsx_saw_slash_before_gt: false,
101
+ jsx_stack: Vec::new(),
102
+ jsx_depth: 0,
103
+ jsx_child_brace_depth: 0,
104
+ jsx_in_closing_tag: false,
105
+ ignore_indent: options.ignore_indent,
106
+ last_significant_kind: None,
107
+ }
108
+ }
109
+
110
+ /// True when the previous significant token ends a value, so a following `<` is `Lt`
111
+ /// (comparison / generic args), not the start of a JSX element.
112
+ fn last_is_value(&self) -> bool {
113
+ matches!(
114
+ self.last_significant_kind,
115
+ Some(
116
+ TokenKind::Ident
117
+ | TokenKind::RParen
118
+ | TokenKind::RBracket
119
+ | TokenKind::Number
120
+ | TokenKind::String
121
+ | TokenKind::True
122
+ | TokenKind::False
123
+ | TokenKind::Null
124
+ )
125
+ )
126
+ }
127
+
128
+ #[inline]
129
+ fn jsx_sync_in_opening_tag(&mut self) {
130
+ self.jsx_in_opening_tag = self.jsx_stack.last().map(|e| e.in_opener).unwrap_or(false);
131
+ }
132
+
133
+ fn read_jsx_text(&mut self, start: (usize, usize)) -> Result<Option<Token>, String> {
134
+ let mut s = String::new();
135
+ loop {
136
+ match self.peek() {
137
+ None | Some('{') | Some('<') => break,
138
+ Some(c) => {
139
+ self.advance();
140
+ s.push(c);
141
+ }
142
+ }
143
+ }
144
+ if s.is_empty() {
145
+ Ok(None)
146
+ } else {
147
+ let end = self.span_start();
148
+ Ok(Some(Token {
149
+ kind: TokenKind::JsxText,
150
+ span: Span { start, end },
151
+ literal: Some(s.into()),
152
+ }))
153
+ }
154
+ }
155
+
156
+ fn peek(&mut self) -> Option<char> {
157
+ self.chars.peek().copied()
158
+ }
159
+
160
+ fn advance(&mut self) -> Option<char> {
161
+ let c = self.chars.next()?;
162
+ self.pos += c.len_utf8();
163
+ if c == '\n' {
164
+ self.line += 1;
165
+ self.col = 1;
166
+ self.at_line_start = true;
167
+ } else {
168
+ self.col += 1;
169
+ }
170
+ Some(c)
171
+ }
172
+
173
+ fn span_start(&self) -> (usize, usize) {
174
+ (self.line, self.col)
175
+ }
176
+
177
+ fn read_indent_level(&mut self) -> usize {
178
+ let mut level = 0;
179
+ loop {
180
+ match self.peek() {
181
+ Some(' ') => {
182
+ self.advance();
183
+ level += 1;
184
+ }
185
+ Some('\t') => {
186
+ self.advance();
187
+ level += TAB_AS_LEVELS;
188
+ }
189
+ _ => break,
190
+ }
191
+ }
192
+ level.div_ceil(INDENT_WIDTH)
193
+ }
194
+
195
+ fn skip_whitespace(&mut self) {
196
+ while let Some(c) = self.peek() {
197
+ if c == ' ' || c == '\t' || c == '\r' {
198
+ self.advance();
199
+ } else if c == '\n' {
200
+ self.advance();
201
+ self.at_line_start = true;
202
+ } else {
203
+ break;
204
+ }
205
+ }
206
+ }
207
+
208
+ fn skip_line_comment(&mut self) {
209
+ while let Some(c) = self.advance() {
210
+ if c == '\n' {
211
+ break;
212
+ }
213
+ }
214
+ }
215
+
216
+ fn skip_block_comment(&mut self) -> Result<(), String> {
217
+ let mut depth = 1;
218
+ while depth > 0 {
219
+ match self.advance() {
220
+ Some('*') if self.peek() == Some('/') => {
221
+ self.advance();
222
+ depth -= 1;
223
+ }
224
+ Some('/') if self.peek() == Some('*') => {
225
+ self.advance();
226
+ depth += 1;
227
+ }
228
+ None => return Err("Unterminated block comment".to_string()),
229
+ _ => {}
230
+ }
231
+ }
232
+ Ok(())
233
+ }
234
+
235
+ fn read_number(&mut self, first: char) -> String {
236
+ // Radix-prefixed integer literals: `0x`/`0X` (hex), `0o`/`0O` (octal), `0b`/`0B`
237
+ // (binary), with optional `_` digit separators. JS semantics — a non-negative
238
+ // integer. Convert to a decimal string here so every downstream consumer (the
239
+ // parser's `parse::<f64>()`, the formatter, …) sees a plain number, unchanged.
240
+ if first == '0' {
241
+ if let Some(radix) = self.radix_prefix() {
242
+ self.advance(); // consume the x/o/b marker
243
+ let mut digits = String::with_capacity(16);
244
+ while let Some(c) = self.peek() {
245
+ if c == '_' {
246
+ self.advance(); // digit separator
247
+ } else if c.is_digit(radix) {
248
+ digits.push(c);
249
+ self.advance();
250
+ } else {
251
+ break;
252
+ }
253
+ }
254
+ return Self::radix_digits_to_decimal(&digits, radix);
255
+ }
256
+ }
257
+
258
+ let mut s = String::with_capacity(16);
259
+ s.push(first);
260
+ while let Some(c) = self.peek() {
261
+ if c.is_ascii_digit() || c == '.' {
262
+ s.push(c);
263
+ self.advance();
264
+ } else if c == '_' && Self::ends_with_digit(&s) && self.underscore_between_digits() {
265
+ self.advance(); // numeric separator (`15_000`) — drop it, JS-style
266
+ } else if (c == 'e' || c == 'E') && self.exponent_follows() {
267
+ // Scientific notation: `e`/`E` then optional sign then digits.
268
+ // Guarded by lookahead so `3em` lexes as `3` + `em`, not a bad number.
269
+ s.push(c);
270
+ self.advance(); // consume e/E
271
+ if matches!(self.peek(), Some('+') | Some('-')) {
272
+ s.push(self.peek().unwrap());
273
+ self.advance();
274
+ }
275
+ while let Some(d) = self.peek() {
276
+ if d.is_ascii_digit() {
277
+ s.push(d);
278
+ self.advance();
279
+ } else if d == '_'
280
+ && Self::ends_with_digit(&s)
281
+ && self.underscore_between_digits()
282
+ {
283
+ self.advance(); // numeric separator inside the exponent (`1e1_0`)
284
+ } else {
285
+ break;
286
+ }
287
+ }
288
+ break; // the exponent terminates the numeric literal
289
+ } else {
290
+ break;
291
+ }
292
+ }
293
+ s
294
+ }
295
+
296
+ /// True iff the literal accumulated so far ends in a decimal digit — used to reject a
297
+ /// `_` separator that isn't preceded by a digit (e.g. leading `_5` or post-`.` `1._5`).
298
+ fn ends_with_digit(s: &str) -> bool {
299
+ s.chars().last().is_some_and(|c| c.is_ascii_digit())
300
+ }
301
+
302
+ /// With `peek()` positioned at a `_`, look ahead (without consuming) to confirm the
303
+ /// next character is a decimal digit, i.e. the `_` sits between two digits and is a
304
+ /// valid JS numeric separator (rejects trailing `5_` and doubled `1__0`).
305
+ fn underscore_between_digits(&self) -> bool {
306
+ let mut la = self.chars.clone();
307
+ la.next(); // skip the `_` currently under peek()
308
+ la.next().is_some_and(|c| c.is_ascii_digit())
309
+ }
310
+
311
+ /// With the current peek positioned at an `e`/`E`, decide (without consuming)
312
+ /// whether a valid exponent `[+-]?\d` follows. `Chars` is `Clone`, so we look
313
+ /// ahead on a throwaway clone of the iterator.
314
+ fn exponent_follows(&self) -> bool {
315
+ let mut la = self.chars.clone();
316
+ la.next(); // skip the e/E currently under peek()
317
+ match la.next() {
318
+ Some(d) if d.is_ascii_digit() => true,
319
+ Some('+') | Some('-') => la.next().is_some_and(|d| d.is_ascii_digit()),
320
+ _ => false,
321
+ }
322
+ }
323
+
324
+ /// With a leading `0` already consumed and `peek()` at the radix marker, return the
325
+ /// radix (16 / 8 / 2) iff this is a valid `0x` / `0o` / `0b` prefix followed by at
326
+ /// least one valid digit. Returns `None` otherwise, so `0`, `0.5`, `0e3`, `0xZ`, and
327
+ /// `0x_1` all stay on the decimal path. Looks ahead on a clone of the `Chars` iterator
328
+ /// (`Chars: Clone`) without consuming.
329
+ fn radix_prefix(&self) -> Option<u32> {
330
+ let mut la = self.chars.clone();
331
+ let radix = match la.next()? {
332
+ 'x' | 'X' => 16,
333
+ 'o' | 'O' => 8,
334
+ 'b' | 'B' => 2,
335
+ _ => return None,
336
+ };
337
+ match la.next() {
338
+ Some(c) if c.is_digit(radix) => Some(radix),
339
+ _ => None,
340
+ }
341
+ }
342
+
343
+ /// Convert the (separator-free) digits of a radix-prefixed literal to the decimal
344
+ /// string the `Number` token carries. `u128` is exact for ≤128-bit literals — far
345
+ /// beyond any real input; the `f64` fallback only triggers for absurdly long ones and
346
+ /// loses precision past 2^53, exactly as JS's conversion to a double would.
347
+ fn radix_digits_to_decimal(digits: &str, radix: u32) -> String {
348
+ if let Ok(v) = u128::from_str_radix(digits, radix) {
349
+ return v.to_string();
350
+ }
351
+ let mut v = 0.0_f64;
352
+ for c in digits.chars() {
353
+ v = v * radix as f64 + c.to_digit(radix).unwrap_or(0) as f64;
354
+ }
355
+ format!("{v}")
356
+ }
357
+
358
+ /// Handle escape sequence, returning the unescaped character.
359
+ /// `extra_allowed` contains additional characters that can be escaped in this context.
360
+ fn handle_escape(&mut self, extra_allowed: &[char]) -> Result<char, String> {
361
+ let escaped = self.advance().ok_or("Unterminated escape")?;
362
+ match escaped {
363
+ 'n' => Ok('\n'),
364
+ 'r' => Ok('\r'),
365
+ 't' => Ok('\t'),
366
+ 'b' => Ok('\u{0008}'),
367
+ 'f' => Ok('\u{000C}'),
368
+ 'v' => Ok('\u{000B}'),
369
+ '0' => Ok('\0'),
370
+ '\\' => Ok('\\'),
371
+ // `\xNN` — exactly two hex digits → code point 0x00..=0xFF (JS/TS).
372
+ 'x' => {
373
+ let cp = self.read_hex_digits(2)?;
374
+ char::from_u32(cp).ok_or_else(|| format!("Invalid \\x escape: \\x{:02X}", cp))
375
+ }
376
+ // `\uNNNN` (exactly four hex digits) or `\u{N..}` (1-6 hex digits, ES6).
377
+ 'u' => {
378
+ let cp = if self.peek() == Some('{') {
379
+ self.advance(); // consume '{'
380
+ let cp = self.read_hex_until_brace()?;
381
+ match self.advance() {
382
+ Some('}') => cp,
383
+ _ => return Err("Unterminated \\u{...} escape (expected '}')".to_string()),
384
+ }
385
+ } else {
386
+ self.read_hex_digits(4)?
387
+ };
388
+ // Lone surrogates (0xD800..=0xDFFF) are valid UTF-16 code units in JS but
389
+ // not Unicode scalar values; tish strings are UTF-8, so reject them.
390
+ char::from_u32(cp)
391
+ .ok_or_else(|| format!("Invalid \\u escape: code point U+{:04X}", cp))
392
+ }
393
+ c if extra_allowed.contains(&c) => Ok(c),
394
+ _ => Err(format!("Unknown escape: \\{}", escaped)),
395
+ }
396
+ }
397
+
398
+ /// Read exactly `n` hex digits and return the parsed code point.
399
+ fn read_hex_digits(&mut self, n: usize) -> Result<u32, String> {
400
+ let mut value: u32 = 0;
401
+ for _ in 0..n {
402
+ let c = self.advance().ok_or("Unterminated hex escape")?;
403
+ let digit = c
404
+ .to_digit(16)
405
+ .ok_or_else(|| format!("Invalid hex digit in escape: '{}'", c))?;
406
+ value = value * 16 + digit;
407
+ }
408
+ Ok(value)
409
+ }
410
+
411
+ /// Read 1-6 hex digits for a `\u{...}` escape (stops at `}`); validates the count
412
+ /// and that the value is within the Unicode range.
413
+ fn read_hex_until_brace(&mut self) -> Result<u32, String> {
414
+ let mut value: u32 = 0;
415
+ let mut count = 0;
416
+ while let Some(c) = self.peek() {
417
+ let Some(digit) = c.to_digit(16) else { break };
418
+ self.advance();
419
+ value = value * 16 + digit;
420
+ count += 1;
421
+ if count > 6 || value > 0x10_FFFF {
422
+ return Err("Invalid \\u{...} escape: code point out of range".to_string());
423
+ }
424
+ }
425
+ if count == 0 {
426
+ return Err("Empty \\u{} escape (expected hex digits)".to_string());
427
+ }
428
+ Ok(value)
429
+ }
430
+
431
+ fn read_string(&mut self, quote: char) -> Result<String, String> {
432
+ let mut s = String::with_capacity(32);
433
+ let extra = if quote == '"' {
434
+ &['"', '\''][..]
435
+ } else {
436
+ &['\'', '"'][..]
437
+ };
438
+ loop {
439
+ match self.advance() {
440
+ None => return Err("Unterminated string".to_string()),
441
+ Some(c) if c == quote => break,
442
+ Some('\\') => s.push(self.handle_escape(extra)?),
443
+ Some(c) => s.push(c),
444
+ }
445
+ }
446
+ Ok(s)
447
+ }
448
+
449
+ fn read_ident_or_keyword(&mut self, first: char) -> String {
450
+ let mut s = String::with_capacity(16);
451
+ s.push(first);
452
+ while let Some(c) = self.peek() {
453
+ if c.is_ascii_alphanumeric() || c == '_' {
454
+ s.push(c);
455
+ self.advance();
456
+ } else {
457
+ break;
458
+ }
459
+ }
460
+ s
461
+ }
462
+
463
+ /// Read a template literal. If `is_continuation` is true, we're continuing after a `}`.
464
+ fn read_template(
465
+ &mut self,
466
+ start: (usize, usize),
467
+ is_continuation: bool,
468
+ ) -> Result<Option<Token>, String> {
469
+ let mut s = String::with_capacity(if is_continuation { 32 } else { 64 });
470
+ let extra = &['`', '$', '{'][..];
471
+
472
+ loop {
473
+ match self.advance() {
474
+ None => return Err("Unterminated template literal".to_string()),
475
+ Some('`') => {
476
+ let end = self.span_start();
477
+ let kind = if is_continuation {
478
+ TokenKind::TemplateTail
479
+ } else {
480
+ TokenKind::TemplateNoSub
481
+ };
482
+ return Ok(Some(Token {
483
+ kind,
484
+ span: Span { start, end },
485
+ literal: Some(s.into()),
486
+ }));
487
+ }
488
+ Some('$') if self.peek() == Some('{') => {
489
+ self.advance();
490
+ self.template_brace_stack.push(1);
491
+ let end = self.span_start();
492
+ let kind = if is_continuation {
493
+ TokenKind::TemplateMiddle
494
+ } else {
495
+ TokenKind::TemplateHead
496
+ };
497
+ return Ok(Some(Token {
498
+ kind,
499
+ span: Span { start, end },
500
+ literal: Some(s.into()),
501
+ }));
502
+ }
503
+ Some('\\') => s.push(self.handle_escape(extra)?),
504
+ Some(c) => s.push(c),
505
+ }
506
+ }
507
+ }
508
+
509
+ fn emit_indent_or_dedent(&mut self, level: usize) -> Option<Token> {
510
+ let top = *self.indent_stack.last().unwrap();
511
+ let start = self.span_start();
512
+
513
+ if level > top {
514
+ self.indent_stack.push(level);
515
+ Some(Token {
516
+ kind: TokenKind::Indent,
517
+ span: Span { start, end: start },
518
+ literal: None,
519
+ })
520
+ } else if level < top {
521
+ while self.indent_stack.len() > 1 && *self.indent_stack.last().unwrap() > level {
522
+ self.indent_stack.pop();
523
+ self.pending_dedents.push_back(Token {
524
+ kind: TokenKind::Dedent,
525
+ span: Span { start, end: start },
526
+ literal: None,
527
+ });
528
+ }
529
+ if *self.indent_stack.last().unwrap_or(&0) != level {
530
+ self.indent_stack.push(level);
531
+ }
532
+ self.pending_dedents.pop_front()
533
+ } else {
534
+ None
535
+ }
536
+ }
537
+
538
+ pub fn next_token(&mut self) -> Result<Option<Token>, String> {
539
+ let tok = self.next_token_inner()?;
540
+ if let Some(t) = &tok {
541
+ self.last_significant_kind = Some(t.kind);
542
+ }
543
+ Ok(tok)
544
+ }
545
+
546
+ fn next_token_inner(&mut self) -> Result<Option<Token>, String> {
547
+ if let Some(tok) = self.pending_dedents.pop_front() {
548
+ return Ok(Some(tok));
549
+ }
550
+
551
+ if self.jsx_after_gt {
552
+ self.jsx_after_gt = false;
553
+ if !matches!(self.peek(), Some('{') | Some('<') | None) {
554
+ let start = self.span_start();
555
+ if let Some(tok) = self.read_jsx_text(start)? {
556
+ return Ok(Some(tok));
557
+ }
558
+ }
559
+ }
560
+
561
+ if self.at_line_start {
562
+ self.at_line_start = false;
563
+ // Always consume the leading whitespace; only *emit* Indent/Dedent when indentation
564
+ // is significant. With `ignore_indent`, the level is discarded so the indent stack
565
+ // stays at `[0]` and no virtual tokens are produced (brace-only blocks).
566
+ let level = self.read_indent_level();
567
+ if !self.ignore_indent
568
+ && (level > 0 || self.peek().map(|c| c != '\n').unwrap_or(false))
569
+ {
570
+ if let Some(tok) = self.emit_indent_or_dedent(level) {
571
+ return Ok(Some(tok));
572
+ }
573
+ }
574
+ }
575
+
576
+ self.skip_whitespace();
577
+ if self.at_line_start {
578
+ return self.next_token();
579
+ }
580
+
581
+ let start = self.span_start();
582
+ let c = match self.advance() {
583
+ Some(c) => c,
584
+ None => {
585
+ if let Some(tok) = self.pending_dedents.pop_front() {
586
+ return Ok(Some(tok));
587
+ }
588
+ if self.indent_stack.len() > 1 {
589
+ self.indent_stack.pop();
590
+ return Ok(Some(Token {
591
+ kind: TokenKind::Dedent,
592
+ span: Span {
593
+ start: (self.line, self.col),
594
+ end: (self.line, self.col),
595
+ },
596
+ literal: None,
597
+ }));
598
+ }
599
+ return Ok(None);
600
+ }
601
+ };
602
+
603
+ let kind = match c {
604
+ '(' => TokenKind::LParen,
605
+ ')' => TokenKind::RParen,
606
+ '{' => {
607
+ if self.jsx_in_opening_tag {
608
+ if let Some(top) = self.jsx_stack.last_mut() {
609
+ top.attr_value_braces += 1;
610
+ }
611
+ } else if self.jsx_depth > 0 {
612
+ self.jsx_child_brace_depth += 1;
613
+ }
614
+ if let Some(depth) = self.template_brace_stack.last_mut() {
615
+ *depth += 1;
616
+ }
617
+ TokenKind::LBrace
618
+ }
619
+ '}' => {
620
+ let mut handled = false;
621
+ if let Some(top) = self.jsx_stack.last() {
622
+ if top.in_opener && top.attr_value_braces > 0 {
623
+ if let Some(top) = self.jsx_stack.last_mut() {
624
+ top.attr_value_braces -= 1;
625
+ }
626
+ handled = true;
627
+ }
628
+ }
629
+ if !handled && self.jsx_child_brace_depth > 0 {
630
+ self.jsx_child_brace_depth -= 1;
631
+ if self.jsx_child_brace_depth == 0 {
632
+ self.jsx_after_gt = true;
633
+ }
634
+ }
635
+ if let Some(depth) = self.template_brace_stack.last_mut() {
636
+ *depth -= 1;
637
+ if *depth == 0 {
638
+ self.template_brace_stack.pop();
639
+ return self.read_template(start, true);
640
+ }
641
+ }
642
+ TokenKind::RBrace
643
+ }
644
+ '[' => TokenKind::LBracket,
645
+ ']' => TokenKind::RBracket,
646
+ ';' => TokenKind::Semicolon,
647
+ ',' => TokenKind::Comma,
648
+ '.' => {
649
+ if self.peek() == Some('?') {
650
+ self.advance();
651
+ TokenKind::OptionalChain
652
+ } else if self.peek() == Some('.') {
653
+ self.advance();
654
+ if self.peek() == Some('.') {
655
+ self.advance();
656
+ TokenKind::Spread
657
+ } else {
658
+ return Err("Unexpected .. (use ... for rest params)".to_string());
659
+ }
660
+ } else {
661
+ TokenKind::Dot
662
+ }
663
+ }
664
+ '=' => {
665
+ if self.peek() == Some('=') {
666
+ self.advance();
667
+ if self.peek() == Some('=') {
668
+ self.advance();
669
+ TokenKind::StrictEq
670
+ } else {
671
+ TokenKind::Eq
672
+ }
673
+ } else if self.peek() == Some('>') {
674
+ self.advance();
675
+ TokenKind::Arrow
676
+ } else {
677
+ TokenKind::Assign
678
+ }
679
+ }
680
+ '!' => {
681
+ if self.peek() == Some('=') {
682
+ self.advance();
683
+ if self.peek() == Some('=') {
684
+ self.advance();
685
+ TokenKind::StrictNe
686
+ } else {
687
+ TokenKind::Ne
688
+ }
689
+ } else {
690
+ TokenKind::Not
691
+ }
692
+ }
693
+ '<' => {
694
+ if self.peek() == Some('=') {
695
+ self.advance();
696
+ TokenKind::Le
697
+ } else if self.peek() == Some('<') {
698
+ self.advance();
699
+ TokenKind::Shl
700
+ } else if self.peek() == Some('/') {
701
+ self.jsx_in_closing_tag = true;
702
+ TokenKind::Lt
703
+ } else if (self.peek() == Some('>')
704
+ || self
705
+ .peek()
706
+ .map(|c| c.is_ascii_alphabetic() || c == '_')
707
+ .unwrap_or(false))
708
+ && !self.last_is_value()
709
+ {
710
+ // JSX open tag — only in expression position. After a value (`ident<`, `)<`,
711
+ // `]<`, literal) this is `Lt`: a comparison or generic-args opener.
712
+ self.jsx_depth += 1;
713
+ self.jsx_stack.push(JsxEl {
714
+ in_opener: true,
715
+ attr_value_braces: 0,
716
+ });
717
+ self.jsx_in_opening_tag = true;
718
+ TokenKind::Lt
719
+ } else {
720
+ TokenKind::Lt
721
+ }
722
+ }
723
+ '>' => {
724
+ if self.peek() == Some('=') {
725
+ self.advance();
726
+ TokenKind::Ge
727
+ } else if self.peek() == Some('>') {
728
+ self.advance();
729
+ if self.peek() == Some('>') {
730
+ self.advance();
731
+ TokenKind::UShr // `>>>`
732
+ } else {
733
+ TokenKind::Shr
734
+ }
735
+ } else {
736
+ if self.jsx_in_closing_tag
737
+ || (self.jsx_in_opening_tag && self.jsx_saw_slash_before_gt)
738
+ {
739
+ self.jsx_depth = (self.jsx_depth - 1).max(0);
740
+ self.jsx_stack.pop();
741
+ self.jsx_sync_in_opening_tag();
742
+ // A child element just closed (`</span>` or `<br/>`). If a parent element
743
+ // is still open and past its opening tag, we're back in that parent's
744
+ // children region, so the following run is JSX text — re-enter text mode.
745
+ // Without this, trailing text after a child element ("… as JSON") is lexed
746
+ // as code and a bare keyword (`as`, `in`, `if`, …) breaks the parse (#108).
747
+ //
748
+ // Guard on `jsx_child_brace_depth == 0`: if the closed element lived inside a
749
+ // `{…}` expression container (e.g. `<div>{items.map(x => <span/>)}</div>`),
750
+ // we're still in that expression, not the parent's text children — entering
751
+ // text mode there would swallow the following `)`/`,` as JsxText.
752
+ if self.jsx_child_brace_depth == 0
753
+ && self.jsx_stack.last().map(|e| !e.in_opener).unwrap_or(false)
754
+ {
755
+ self.jsx_after_gt = true;
756
+ }
757
+ } else if let Some(top) = self.jsx_stack.last_mut() {
758
+ if top.in_opener && top.attr_value_braces > 0 {
759
+ // `>` is a comparison (or shift) token inside `{ ... }`, not end of opening tag.
760
+ } else if top.in_opener && !self.jsx_saw_slash_before_gt {
761
+ top.in_opener = false;
762
+ self.jsx_after_gt = true;
763
+ self.jsx_sync_in_opening_tag();
764
+ }
765
+ }
766
+ self.jsx_in_closing_tag = false;
767
+ self.jsx_saw_slash_before_gt = false;
768
+ TokenKind::Gt
769
+ }
770
+ }
771
+ '^' => TokenKind::BitXor,
772
+ '~' => TokenKind::BitNot,
773
+ '+' => {
774
+ if self.peek() == Some('+') {
775
+ self.advance();
776
+ TokenKind::PlusPlus
777
+ } else if self.peek() == Some('=') {
778
+ self.advance();
779
+ TokenKind::PlusAssign
780
+ } else {
781
+ TokenKind::Plus
782
+ }
783
+ }
784
+ '-' => {
785
+ if self.peek() == Some('-') {
786
+ self.advance();
787
+ TokenKind::MinusMinus
788
+ } else if self.peek() == Some('=') {
789
+ self.advance();
790
+ TokenKind::MinusAssign
791
+ } else {
792
+ TokenKind::Minus
793
+ }
794
+ }
795
+ '*' => {
796
+ if self.peek() == Some('*') {
797
+ self.advance();
798
+ TokenKind::StarStar
799
+ } else if self.peek() == Some('=') {
800
+ self.advance();
801
+ TokenKind::StarAssign
802
+ } else {
803
+ TokenKind::Star
804
+ }
805
+ }
806
+ '/' => {
807
+ if self.peek() == Some('/') {
808
+ self.advance();
809
+ self.skip_line_comment();
810
+ // `skip_line_comment` consumes the newline via `advance()`, which sets
811
+ // `at_line_start` before we would normally run `skip_whitespace()`. Without
812
+ // stripping the next line's leading spaces here, `read_indent_level` would see
813
+ // physical indentation and emit a spurious `Indent` (breaks e.g. object
814
+ // literals with trailing `//` comments). Newlines handled in `skip_whitespace`
815
+ // eat those spaces before the indent pass; match that behavior.
816
+ self.skip_whitespace();
817
+ return self.next_token();
818
+ } else if self.peek() == Some('*') {
819
+ self.advance();
820
+ self.skip_block_comment()?;
821
+ return self.next_token();
822
+ } else if self.peek() == Some('=') {
823
+ self.advance();
824
+ TokenKind::SlashAssign
825
+ } else {
826
+ if self.jsx_in_opening_tag {
827
+ self.jsx_saw_slash_before_gt = true;
828
+ }
829
+ TokenKind::Slash
830
+ }
831
+ }
832
+ '%' => {
833
+ if self.peek() == Some('=') {
834
+ self.advance();
835
+ TokenKind::PercentAssign
836
+ } else {
837
+ TokenKind::Percent
838
+ }
839
+ }
840
+ '&' => {
841
+ if self.peek() == Some('&') {
842
+ self.advance();
843
+ if self.peek() == Some('=') {
844
+ self.advance();
845
+ TokenKind::AndAndAssign
846
+ } else {
847
+ TokenKind::And
848
+ }
849
+ } else {
850
+ TokenKind::BitAnd
851
+ }
852
+ }
853
+ '|' => {
854
+ if self.peek() == Some('|') {
855
+ self.advance();
856
+ if self.peek() == Some('=') {
857
+ self.advance();
858
+ TokenKind::OrOrAssign
859
+ } else {
860
+ TokenKind::Or
861
+ }
862
+ } else {
863
+ TokenKind::BitOr
864
+ }
865
+ }
866
+ '?' => {
867
+ if self.peek() == Some('?') {
868
+ self.advance();
869
+ if self.peek() == Some('=') {
870
+ self.advance();
871
+ TokenKind::NullishAssign
872
+ } else {
873
+ TokenKind::NullishCoalesce
874
+ }
875
+ } else if self.peek() == Some('.') {
876
+ self.advance();
877
+ TokenKind::OptionalChain
878
+ } else {
879
+ TokenKind::Question
880
+ }
881
+ }
882
+ ':' => TokenKind::Colon,
883
+ '"' | '\'' => {
884
+ let s = self.read_string(c)?;
885
+ let end = self.span_start();
886
+ return Ok(Some(Token {
887
+ kind: TokenKind::String,
888
+ span: Span { start, end },
889
+ literal: Some(s.into()),
890
+ }));
891
+ }
892
+ '`' => return self.read_template(start, false),
893
+ '0'..='9' => {
894
+ let num = self.read_number(c);
895
+ let end = self.span_start();
896
+ return Ok(Some(Token {
897
+ kind: TokenKind::Number,
898
+ span: Span { start, end },
899
+ literal: Some(num.into()),
900
+ }));
901
+ }
902
+ 'a'..='z' | 'A'..='Z' | '_' => {
903
+ let ident = self.read_ident_or_keyword(c);
904
+ let end = self.span_start();
905
+ let kind = TokenKind::keyword_or_ident(&ident);
906
+ return Ok(Some(Token {
907
+ kind,
908
+ span: Span { start, end },
909
+ // Spelling is useful for keywords too (e.g. object keys, type names like `type`).
910
+ literal: Some(ident.into()),
911
+ }));
912
+ }
913
+ '\n' => {
914
+ self.at_line_start = true;
915
+ return self.next_token();
916
+ }
917
+ _ => return Err(format!("Unexpected character: {:?}", c)),
918
+ };
919
+
920
+ let end = self.span_start();
921
+ Ok(Some(Token {
922
+ kind,
923
+ span: Span { start, end },
924
+ literal: None,
925
+ }))
926
+ }
927
+ }
928
+
929
+ impl<'a> Iterator for Lexer<'a> {
930
+ type Item = Result<Token, String>;
931
+
932
+ fn next(&mut self) -> Option<Self::Item> {
933
+ match self.next_token() {
934
+ Ok(Some(t)) => Some(Ok(t)),
935
+ Ok(None) => None,
936
+ Err(e) => Some(Err(e)),
937
+ }
938
+ }
939
+ }
940
+
941
+ #[cfg(test)]
942
+ mod tests {
943
+ use super::*;
944
+
945
+ #[test]
946
+ fn test_string_literal() {
947
+ let tokens: Vec<_> = Lexer::new(r#""H""#).collect();
948
+ let tokens: Result<Vec<_>, _> = tokens.into_iter().collect();
949
+ let tokens = tokens.unwrap();
950
+ assert_eq!(tokens.len(), 1);
951
+ assert_eq!(tokens[0].kind, TokenKind::String);
952
+ assert_eq!(tokens[0].literal.as_deref(), Some("H"));
953
+ }
954
+
955
+ #[test]
956
+ fn test_print_string() {
957
+ let tokens: Vec<_> = Lexer::new(r#"print("H")"#).collect();
958
+ let tokens: Result<Vec<_>, _> = tokens.into_iter().collect();
959
+ let tokens = tokens.unwrap();
960
+ let string_tok = tokens.iter().find(|t| t.kind == TokenKind::String).unwrap();
961
+ assert_eq!(string_tok.literal.as_deref(), Some("H"));
962
+ }
963
+
964
+ #[test]
965
+ fn radix_integer_literals() {
966
+ // Hex / octal / binary prefixes (any case) convert to a decimal `Number` literal,
967
+ // honoring `_` digit separators.
968
+ let cases = [
969
+ ("0xff", "255"),
970
+ ("0xFF", "255"),
971
+ ("0X1a", "26"),
972
+ ("0o17", "15"),
973
+ ("0O7", "7"),
974
+ ("0b1010", "10"),
975
+ ("0B0", "0"),
976
+ ("0xdeadbeef", "3735928559"),
977
+ ("0xFF_FF", "65535"),
978
+ ("0b1111_0000", "240"),
979
+ ];
980
+ for (src, expected) in cases {
981
+ let tokens = Lexer::new(src).collect::<Result<Vec<_>, _>>().unwrap();
982
+ let num = tokens
983
+ .iter()
984
+ .find(|t| t.kind == TokenKind::Number)
985
+ .unwrap_or_else(|| panic!("no Number token for {src}"));
986
+ assert_eq!(num.literal.as_deref(), Some(expected), "for {src}");
987
+ }
988
+ }
989
+
990
+ #[test]
991
+ fn decimal_numeric_separators() {
992
+ // `_` between digits is a JS numeric separator: dropped from the literal value.
993
+ // Issue #57.
994
+ let only_number = |src: &str| -> String {
995
+ let tokens = Lexer::new(src).collect::<Result<Vec<_>, _>>().unwrap();
996
+ let nums: Vec<_> = tokens
997
+ .iter()
998
+ .filter(|t| t.kind == TokenKind::Number)
999
+ .collect();
1000
+ assert_eq!(nums.len(), 1, "expected exactly one Number token for {src}");
1001
+ // No stray identifier should be produced from the separated digits.
1002
+ assert!(
1003
+ !tokens.iter().any(|t| t.kind == TokenKind::Ident),
1004
+ "unexpected Ident token while lexing {src}"
1005
+ );
1006
+ nums[0].literal.as_deref().unwrap().to_string()
1007
+ };
1008
+ assert_eq!(only_number("15_000"), "15000");
1009
+ assert_eq!(only_number("1_000_000"), "1000000");
1010
+ assert_eq!(only_number("3.14_159"), "3.14159");
1011
+ assert_eq!(only_number("1e1_0"), "1e10");
1012
+ }
1013
+
1014
+ #[test]
1015
+ fn non_radix_zero_prefixed_stays_decimal() {
1016
+ // A leading zero is NOT legacy octal; an invalid prefix is not a radix literal.
1017
+ let num_literal = |src: &str| -> String {
1018
+ Lexer::new(src)
1019
+ .collect::<Result<Vec<_>, _>>()
1020
+ .unwrap()
1021
+ .into_iter()
1022
+ .find(|t| t.kind == TokenKind::Number)
1023
+ .unwrap()
1024
+ .literal
1025
+ .as_deref()
1026
+ .unwrap()
1027
+ .to_string()
1028
+ };
1029
+ assert_eq!(num_literal("07"), "07"); // decimal, not octal
1030
+ assert_eq!(num_literal("0"), "0");
1031
+ // `0xZ` → the Number token is just `0`, then `xZ` lexes as an identifier.
1032
+ let toks = Lexer::new("0xZ").collect::<Result<Vec<_>, _>>().unwrap();
1033
+ assert_eq!(toks[0].kind, TokenKind::Number);
1034
+ assert_eq!(toks[0].literal.as_deref(), Some("0"));
1035
+ assert_eq!(toks[1].kind, TokenKind::Ident);
1036
+ }
1037
+
1038
+ #[test]
1039
+ fn line_comment_does_not_emit_spurious_indent_before_next_line() {
1040
+ let with_comment = "fn f() {\n return {\n a: 1, // c\n b: 2\n }\n}\n";
1041
+ let tokens: Vec<_> = Lexer::new(with_comment)
1042
+ .collect::<Result<Vec<_>, _>>()
1043
+ .unwrap();
1044
+ assert!(
1045
+ !tokens.iter().any(|t| t.kind == TokenKind::Indent),
1046
+ "unexpected Indent after line comment: {:?}",
1047
+ tokens
1048
+ .iter()
1049
+ .map(|t| format!("{:?}", t.kind))
1050
+ .collect::<Vec<_>>()
1051
+ );
1052
+ }
1053
+
1054
+ /// A leading-indented line is what actually drives the lexer to emit virtual tokens:
1055
+ /// ` a()` opens an indent level (Indent) and the dedented `b()` closes it (Dedent).
1056
+ const INDENTED_SRC: &str = " a()\nb()\n";
1057
+
1058
+ #[test]
1059
+ fn default_options_still_emit_indent_and_dedent() {
1060
+ let tokens: Vec<_> = Lexer::with_options(INDENTED_SRC, LexerOptions::default())
1061
+ .collect::<Result<Vec<_>, _>>()
1062
+ .unwrap();
1063
+ assert!(
1064
+ tokens.iter().any(|t| t.kind == TokenKind::Indent),
1065
+ "expected an Indent token in the default (indentation-significant) mode"
1066
+ );
1067
+ assert!(
1068
+ tokens.iter().any(|t| t.kind == TokenKind::Dedent),
1069
+ "expected a Dedent token in the default (indentation-significant) mode"
1070
+ );
1071
+ }
1072
+
1073
+ #[test]
1074
+ fn ignore_indent_emits_no_virtual_tokens() {
1075
+ let tokens: Vec<_> =
1076
+ Lexer::with_options(INDENTED_SRC, LexerOptions { ignore_indent: true })
1077
+ .collect::<Result<Vec<_>, _>>()
1078
+ .unwrap();
1079
+ assert!(
1080
+ !tokens
1081
+ .iter()
1082
+ .any(|t| matches!(t.kind, TokenKind::Indent | TokenKind::Dedent)),
1083
+ "expected no Indent/Dedent with ignore_indent, got: {:?}",
1084
+ tokens.iter().map(|t| t.kind).collect::<Vec<_>>()
1085
+ );
1086
+ }
1087
+
1088
+ #[test]
1089
+ fn env_truthy_enables_only_on_recognized_values() {
1090
+ use std::ffi::OsString;
1091
+ let v = |s: &str| env_truthy(Some(OsString::from(s)));
1092
+ // Recognized truthy values turn the flag on.
1093
+ assert!(v("1"));
1094
+ assert!(v("true"));
1095
+ assert!(v("yes"));
1096
+ // Everything else leaves it off, including unset, empty, and near-misses.
1097
+ assert!(!env_truthy(None));
1098
+ assert!(!v(""));
1099
+ assert!(!v("0"));
1100
+ assert!(!v("false"));
1101
+ assert!(!v("no"));
1102
+ assert!(!v("TRUE")); // exact match only — case-sensitive by design
1103
+ }
1104
+ }