rusty_racer 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -41,14 +41,23 @@ use std::collections::HashMap;
41
41
  use std::ffi::c_void;
42
42
  use std::ptr::null_mut;
43
43
  use std::sync::atomic::{AtomicBool, Ordering};
44
- use std::sync::{Arc, Condvar, Mutex, Once, Weak};
45
- use std::time::{Duration, Instant};
44
+ use std::sync::{Arc, Mutex, Once, Weak};
46
45
 
47
46
  use magnus::block::Proc;
48
47
  use magnus::value::{BoxValue, ReprValue};
49
48
  use magnus::{
50
- function, method, prelude::*, Error, Exception, ExceptionClass, IntoValue, RArray, RHash,
51
- RString, Ruby, TryConvert, Value,
49
+ function, method, prelude::*, Error, Exception, ExceptionClass, RHash, Ruby, TryConvert, Value,
50
+ };
51
+
52
+ mod marshal;
53
+ use marshal::{js_to_jsval, jsval_to_js, jsval_to_ruby, ruby_to_jsval, JsVal};
54
+ mod ops;
55
+ use ops::{run_source, service_request, Compiled, Request, VmReply};
56
+ mod stack;
57
+ use stack::{discover_scan_start_field, set_v8_stack_limit, STACK_DEBUG};
58
+ mod watchdog;
59
+ use watchdog::{
60
+ arm_watchdog, disarm_watchdog, run_js_bracketed, watchdog_loop, WatchdogShared, WATCHDOG_DEBUG,
52
61
  };
53
62
 
54
63
  // A Ruby Proc rooted for as long as the Core holds it. BoxValue registers a
@@ -110,6 +119,7 @@ macro_rules! istate {
110
119
  .expect("IsolateState missing from isolate slot")
111
120
  };
112
121
  }
122
+ pub(crate) use istate;
113
123
 
114
124
  // One attach()'d host fn: the realm it was attached into — so resetting or
115
125
  // disposing that realm can release the GC root — and the rooted proc itself
@@ -166,56 +176,6 @@ fn err_class(ruby: &Ruby, name: &str) -> ExceptionClass {
166
176
  .unwrap_or_else(|_| ruby.exception_runtime_error())
167
177
  }
168
178
 
169
- // ---------------------------------------------------------------------------
170
- // Values crossing threads: plain Rust data. No Ruby allocation off the Ruby
171
- // thread, no V8 handles off the V8 thread, no wire format. Replaces serde.c.
172
- // ---------------------------------------------------------------------------
173
- #[derive(Debug, Clone)]
174
- enum JsVal {
175
- Undefined,
176
- Null,
177
- Bool(bool),
178
- Int(i64),
179
- Num(f64),
180
- Str(String),
181
- // Binary bytes: a JS Uint8Array / ArrayBuffer (view) <-> a Ruby ASCII-8BIT
182
- // (binary-tagged) String. The encoding tag IS the type declaration, so the
183
- // round-trip is symmetric and faithful (Uint8Array -> binary String ->
184
- // Uint8Array), like BigInt/Date/Map/Set — no lossy text coercion. |id| (when
185
- // Some) registers it in the Ref table so a binary blob aliased in a graph
186
- // keeps ONE identity instead of being duplicated; None = not identity-tracked
187
- // (e.g. a to_str result).
188
- Bytes { id: Option<u32>, bytes: Vec<u8> },
189
- // Arbitrary-precision integer (JS BigInt <-> Ruby Integer). Carried as V8's
190
- // word representation: sign + little-endian u64 limbs. Both ends speak this
191
- // natively (V8 BigInt words; Ruby Integer via a hex string), so no value is
192
- // truncated — unlike routing a big int through f64.
193
- BigInt { negative: bool, words: Vec<u64> },
194
- // JS Date <-> Ruby Time, carried as milliseconds since the Unix epoch
195
- // (v8::Date::value_of's unit). mini_racer marshals Date to Time.
196
- Date(f64),
197
- // Containers carry a serialization id so shared/cyclic graphs survive the
198
- // round-trip: the first time an object is seen it is emitted with its id,
199
- // and any later occurrence (a sibling sharing it, or a cycle back to an
200
- // ancestor) is emitted as Ref(id) instead of being re-expanded.
201
- Array { id: u32, items: Vec<JsVal> },
202
- // JS object / Ruby Hash with string keys. Insertion order preserved.
203
- Obj { id: u32, entries: Vec<(String, JsVal)> },
204
- // JS Map <-> Ruby Hash. Keys are arbitrary values (not just strings), so
205
- // this is distinct from Obj. Insertion order preserved.
206
- Map { id: u32, pairs: Vec<(JsVal, JsVal)> },
207
- // JS Set <-> Ruby Set (stdlib).
208
- Set { id: u32, items: Vec<JsVal> },
209
- // Back-reference to an already-emitted container (preserves identity; makes
210
- // cycles representable instead of truncating at a depth cap).
211
- Ref(u32),
212
- }
213
-
214
- // Cycles and sharing are handled by the Ref table (see JsVal::Ref), so this is
215
- // purely a native-stack backstop against a pathologically deep (but acyclic)
216
- // graph — set well above any realistic nesting.
217
- const MAX_MARSHAL_DEPTH: u32 = 256;
218
-
219
179
  #[derive(Debug)]
220
180
  enum VmError {
221
181
  Parse(String), // compile-time failure -> RustyRacer::ParseError
@@ -229,148 +189,6 @@ enum VmError {
229
189
  Terminated, // watchdog/stop -> RustyRacer::ScriptTerminatedError
230
190
  }
231
191
 
232
- // One VM operation, built by a magnus method and run inline by Core::run ->
233
- // service_request -> dispatch_one. |context_id| selects which realm the op runs
234
- // in: 0 = the main realm (Context's own globalThis, swappable by reset_realm),
235
- // N >= 1 = an extra realm made by create_context.
236
- enum Request {
237
- Eval {
238
- context_id: i32,
239
- source: String,
240
- filename: String,
241
- timeout_ms: u64,
242
- },
243
- // Resolve a dotted function path on globalThis and invoke it with marshalled
244
- // args (v8::Function::call), preserving the holder as `this`. Distinct from
245
- // Eval so args keep full type/identity fidelity instead of a JSON literal.
246
- Call {
247
- context_id: i32,
248
- name: String,
249
- args: Vec<JsVal>,
250
- // void = don't marshal the return (fire-and-forget): the called fn may
251
- // return a huge/cyclic JS object the caller never reads.
252
- void: bool,
253
- timeout_ms: u64,
254
- },
255
- // Drain the isolate's microtask queue once (no auto event loop).
256
- DrainMicrotasks {
257
- timeout_ms: u64,
258
- },
259
- Attach {
260
- context_id: i32,
261
- name: String,
262
- host_fn_id: usize,
263
- timeout_ms: u64,
264
- },
265
- // Batch attach: install many (name, host_fn_id) host fns in one round-trip
266
- // (a fresh realm needs ~dozens). Same semantics as Attach, applied in order.
267
- AttachMany {
268
- context_id: i32,
269
- entries: Vec<(String, usize)>,
270
- timeout_ms: u64,
271
- },
272
- // reset: swap globalThis for a fresh v8::Context, reusing the same warm
273
- // isolate — csim's per-visit reset. Applies to the named context.
274
- Reset {
275
- context_id: i32,
276
- },
277
- // create_context: build a fresh, persistent v8::Context in the isolate and
278
- // return its id (the multi-realm model). DisposeContext frees one.
279
- CreateContext,
280
- DisposeContext {
281
- context_id: i32,
282
- },
283
- // Thin ES-module primitives (V8's raw compile/instantiate/evaluate). The
284
- // embedder owns the url->Module registry and the resolve policy; the binding
285
- // just exposes the steps. A compiled module is addressed by an id (like a
286
- // realm) since a v8::Local handle can't outlive the op's scope.
287
- CompileModule {
288
- // The context to compile the module in (modules are realm-bound).
289
- context_id: i32,
290
- source: String,
291
- filename: String,
292
- // Bytecode cache to consume (skip reparse); None compiles fresh.
293
- cached_data: Option<Vec<u8>>,
294
- // Produce a fresh bytecode cache to hand back (Module#cached_data).
295
- produce_cache: bool,
296
- // Eager-compile every function up front (CompileOptions::EagerCompile)
297
- // instead of V8's default lazy top-level-only compile. Ignored when
298
- // cached_data is set (V8 forbids ConsumeCodeCache + EagerCompile).
299
- eager: bool,
300
- },
301
- // instantiate: V8 walks imports, calling back to the Ruby resolve block
302
- // (parked in the slot for the op) per edge via resolve_imported.
303
- InstantiateModule {
304
- module_id: i32,
305
- },
306
- EvaluateModule {
307
- module_id: i32,
308
- timeout_ms: u64,
309
- },
310
- ModuleNamespace {
311
- module_id: i32,
312
- },
313
- // The module's v8::Module::Status, as a lowercase name ("uninstantiated",
314
- // "instantiated", ...) the Ruby wrapper symbolizes.
315
- ModuleStatus {
316
- module_id: i32,
317
- },
318
- DisposeModule {
319
- module_id: i32,
320
- },
321
- // Classic <script> primitives (V8 ScriptCompiler::CompileUnboundScript): an
322
- // unbound script, compiled in a context, runnable repeatedly, with the same
323
- // bytecode-cache options as modules. Addressed by id like a module.
324
- CompileScript {
325
- context_id: i32,
326
- source: String,
327
- filename: String,
328
- cached_data: Option<Vec<u8>>,
329
- produce_cache: bool,
330
- eager: bool,
331
- },
332
- // Bind the script to its context and run it; returns the completion value.
333
- RunScript {
334
- script_id: i32,
335
- timeout_ms: u64,
336
- },
337
- DisposeScript {
338
- script_id: i32,
339
- },
340
- // Serialize a bytecode cache from a compiled handle's CURRENT compile state
341
- // (Script#create_code_cache / Module#create_code_cache). Called after run/
342
- // evaluate, it captures the inner functions V8 lazily compiled while running
343
- // — the only way (as of V8-150) to get inner-function bytecode into a cache,
344
- // since create_code_cache at compile time only sees the top level.
345
- ScriptCodeCache {
346
- script_id: i32,
347
- },
348
- ModuleCodeCache {
349
- module_id: i32,
350
- },
351
- }
352
-
353
- // compile_module result: the module's id plus any produced bytecode cache and
354
- // whether a supplied cache was rejected.
355
- struct Compiled {
356
- id: i32,
357
- cached_data: Option<Vec<u8>>,
358
- cache_rejected: bool,
359
- }
360
-
361
- // The terminal reply of an op: service_request returns it straight up to
362
- // Core::run (no channel). Host callbacks and module resolvers don't round-trip
363
- // through here — they run inline (with_gvl).
364
- enum VmReply {
365
- Done(Result<JsVal, VmError>),
366
- // compile_module / compile's richer reply (id + produced cache + rejected).
367
- ModuleCompiled(Result<Compiled, VmError>),
368
- ScriptCompiled(Result<Compiled, VmError>),
369
- // Script#/Module#create_code_cache: the serialized bytes, or None when V8
370
- // can't produce a cache (or the handle's realm is gone).
371
- CodeCache(Result<Option<Vec<u8>>, VmError>),
372
- }
373
-
374
192
  // ---------------------------------------------------------------------------
375
193
  // GVL plumbing — the unsafe boundary of the gem (two trampolines).
376
194
  // ---------------------------------------------------------------------------
@@ -449,644 +267,6 @@ fn error_to_exception(e: &Error) -> Option<Exception> {
449
267
  None
450
268
  }
451
269
 
452
- // ---------------------------------------------------------------------------
453
- // V8 stack limit (in-thread: V8 runs on the calling Ruby thread's stack)
454
- // ---------------------------------------------------------------------------
455
- // rusty_v8 doesn't wrap the runtime `v8::Isolate::SetStackLimit(uintptr_t)`, so
456
- // link the public V8 symbol directly (stable across V8 versions). It sets the
457
- // lowest address V8's stack may reach before it throws RangeError.
458
- unsafe extern "C" {
459
- #[link_name = "_ZN2v87Isolate13SetStackLimitEm"]
460
- fn v8__Isolate__SetStackLimit(isolate: *mut c_void, stack_limit: usize);
461
- // V8's own (exported) accessors down to the conservative-GC-scan Stack
462
- // object, so we can re-point its stack_start per op when V8 runs on a Ruby
463
- // Fiber (see set_fiber_scan_start / discover_scan_start_field). Member fns:
464
- // the first arg is `this`. The public v8::Isolate* IS i::Isolate*.
465
- #[link_name = "_ZN2v88internal7Isolate4heapEv"]
466
- fn v8__internal__Isolate__heap(isolate: *mut c_void) -> *mut c_void;
467
- #[link_name = "_ZN2v88internal4Heap5stackEv"]
468
- fn v8__internal__Heap__stack(heap: *mut c_void) -> *mut c_void;
469
- // Sets the scan stack_start to v8::base::Stack::GetStackStart() (the native
470
- // pthread top) — used only to positively identify the field during discovery.
471
- #[link_name = "_ZN2v88internal4Heap13SetStackStartEv"]
472
- fn v8__internal__Heap__SetStackStart(heap: *mut c_void);
473
- #[link_name = "_ZN2v84base5Stack13GetStackStartEv"]
474
- fn v8__base__Stack__GetStackStart() -> usize;
475
- }
476
-
477
- // Locate V8's conservative-GC-scan stack_start field
478
- // (heap::base::Stack::current_segment_.start) so set_fiber_scan_start can
479
- // re-point it per op. The scanner walks [SP, stack_start); on a Ruby Fiber V8's
480
- // stack_start is still the NATIVE thread top, a different region, so the walk
481
- // runs off the fiber's mapped top into the guard page and SEGVs (the residual
482
- // after the limit fix). We reach the Stack via V8's exported Isolate::heap()/
483
- // Heap::stack(); the field is the first word of Stack (current_segment_ is its
484
- // first member, .start the first field), but we VERIFY rather than trust the
485
- // layout: Heap::SetStackStart() writes that field to base::Stack::GetStackStart(),
486
- // so if poking a sentinel and re-calling SetStackStart restores the value at
487
- // offset 0, that word IS the field. Any mismatch returns 0 (override disabled —
488
- // V8 keeps its native start, i.e. the rare pre-fix crash, NEVER corruption).
489
- // Must run with the isolate ENTERED. `real_isolate` is the raw v8::Isolate*.
490
- fn discover_scan_start_field(real_isolate: *mut c_void) -> usize {
491
- const SENTINEL: usize = 0xA5A5_A5A5_A5A5_A5A5;
492
- unsafe {
493
- let heap = v8__internal__Isolate__heap(real_isolate);
494
- if heap.is_null() {
495
- return 0;
496
- }
497
- let stack = v8__internal__Heap__stack(heap);
498
- if stack.is_null() {
499
- return 0;
500
- }
501
- let nt = v8__base__Stack__GetStackStart();
502
- if nt == 0 {
503
- return 0;
504
- }
505
- v8__internal__Heap__SetStackStart(heap); // start := nt
506
- let field = stack as *mut usize; // expected &current_segment_.start
507
- if field.read() != nt {
508
- return 0; // offset 0 isn't the field (layout changed) — disable
509
- }
510
- field.write(SENTINEL);
511
- v8__internal__Heap__SetStackStart(heap); // must rewrite the same word
512
- if field.read() != nt {
513
- return 0; // SetStackStart doesn't own offset 0 — disable
514
- }
515
- stack as usize
516
- }
517
- }
518
-
519
- // The native thread's stack bounds are stable per NATIVE thread, but querying
520
- // them (pthread, which reads /proc/self/maps for the main thread on Linux) is
521
- // far too slow per op. Cache (bottom, top) in a native-thread-local — correct
522
- // under M:N (each native thread caches its own stack) and ~free after the first
523
- // op on a thread. (0, 0) if it can't be queried.
524
- thread_local! {
525
- static STACK_BOUNDS: std::cell::Cell<(usize, usize)> =
526
- const { std::cell::Cell::new((0, 0)) };
527
- }
528
-
529
- fn native_stack_bounds_cached() -> (usize, usize) {
530
- STACK_BOUNDS.with(|c| {
531
- let cached = c.get();
532
- if cached.0 != 0 {
533
- return cached;
534
- }
535
- let bounds = native_stack_bounds();
536
- c.set(bounds);
537
- bounds
538
- })
539
- }
540
-
541
- // (bottom, top) of the CURRENT native thread's stack via pthread (uncached —
542
- // callers go through native_stack_bounds_cached). The stack grows DOWN from top
543
- // toward bottom. (0, 0) if it can't be queried. NB: this is the NATIVE thread's
544
- // pthread stack; a Ruby Fiber runs on a separate mmap'd stack invisible here.
545
- #[cfg(target_os = "linux")]
546
- fn native_stack_bounds() -> (usize, usize) {
547
- unsafe {
548
- let mut attr: libc::pthread_attr_t = std::mem::zeroed();
549
- if libc::pthread_getattr_np(libc::pthread_self(), &mut attr) != 0 {
550
- return (0, 0);
551
- }
552
- let mut addr: *mut c_void = null_mut();
553
- let mut size: libc::size_t = 0;
554
- let rc = libc::pthread_attr_getstack(&attr, &mut addr, &mut size);
555
- libc::pthread_attr_destroy(&mut attr);
556
- if rc != 0 {
557
- return (0, 0);
558
- }
559
- (addr as usize, addr as usize + size)
560
- }
561
- }
562
-
563
- #[cfg(target_os = "macos")]
564
- fn native_stack_bounds() -> (usize, usize) {
565
- unsafe {
566
- let top = libc::pthread_get_stackaddr_np(libc::pthread_self()) as usize;
567
- let size = libc::pthread_get_stacksize_np(libc::pthread_self());
568
- (top.saturating_sub(size), top)
569
- }
570
- }
571
-
572
- #[cfg(not(any(target_os = "linux", target_os = "macos")))]
573
- fn native_stack_bounds() -> (usize, usize) {
574
- (0, 0)
575
- }
576
-
577
- // Lower bound (and upper, for caching) of the memory region containing `addr`
578
- // — i.e. the BOTTOM of the stack `addr` is on. Used for a Ruby Fiber, whose
579
- // mmap'd stack pthread can't see: V8's limit must sit ABOVE this bottom or a
580
- // deep fiber recursion overflows the real stack and SEGVs the unmapped guard.
581
- // Cached per native thread keyed by the region (parsing /proc/self/maps is
582
- // slow): reused while successive ops stay on the same fiber. (0, 0) if unknown.
583
- thread_local! {
584
- static FIBER_REGION: std::cell::Cell<(usize, usize)> = const { std::cell::Cell::new((0, 0)) };
585
- }
586
-
587
- fn current_region_bounds_cached(addr: usize) -> (usize, usize) {
588
- FIBER_REGION.with(|c| {
589
- let (lo, hi) = c.get();
590
- if lo != 0 && addr >= lo && addr < hi {
591
- return (lo, hi);
592
- }
593
- let bounds = query_region_bounds(addr);
594
- if bounds.0 != 0 {
595
- c.set(bounds);
596
- }
597
- bounds
598
- })
599
- }
600
-
601
- // The [start, end) of the /proc/self/maps mapping containing `addr`. Linux only;
602
- // (0, 0) elsewhere (and the caller falls back). Reads the file fresh — slow, so
603
- // only called on a cache miss (a new fiber).
604
- #[cfg(target_os = "linux")]
605
- fn query_region_bounds(addr: usize) -> (usize, usize) {
606
- use std::io::Read;
607
- let mut buf = String::new();
608
- if std::fs::File::open("/proc/self/maps")
609
- .and_then(|mut f| f.read_to_string(&mut buf))
610
- .is_err()
611
- {
612
- return (0, 0);
613
- }
614
- for line in buf.lines() {
615
- // e.g. "7f6a...000-7f6a...000 rw-p 00000000 00:00 0 ..."
616
- let Some((range, _)) = line.split_once(' ') else {
617
- continue;
618
- };
619
- let Some((lo, hi)) = range.split_once('-') else {
620
- continue;
621
- };
622
- if let (Ok(lo), Ok(hi)) = (
623
- usize::from_str_radix(lo, 16),
624
- usize::from_str_radix(hi, 16),
625
- ) {
626
- if addr >= lo && addr < hi {
627
- return (lo, hi);
628
- }
629
- }
630
- }
631
- (0, 0)
632
- }
633
-
634
- #[cfg(not(target_os = "linux"))]
635
- fn query_region_bounds(_addr: usize) -> (usize, usize) {
636
- (0, 0)
637
- }
638
-
639
- // Set from RUSTY_RACER_STACK_DEBUG at init; gates the per-op stack diagnostics.
640
- static STACK_DEBUG: AtomicBool = AtomicBool::new(false);
641
-
642
- // Re-point V8's stack limit at the CURRENT stack each op. In-thread V8 runs
643
- // wherever the Ruby code is: usually the native thread's pthread stack, but also
644
- // a Ruby Fiber's separate mmap'd stack (Capybara::Result is an Enumerator) that
645
- // pthread can't see. The limit MUST sit between the current SP and the real
646
- // bottom of whatever stack we're on:
647
- // * Too high (above SP) and V8 declares a FALSE overflow on entry.
648
- // * Too low (below the real bottom) and a deep recursion grows past the
649
- // mapped stack and SEGVs the unmapped guard page below it.
650
- // So detect the stack by comparing the SP to the cached native bounds: on the
651
- // native stack, anchor to its pthread bottom; on a fiber, find the bottom of the
652
- // /proc/self/maps region holding the SP (the fiber's real bottom — anchoring to
653
- // SP minus a fixed guard punched through the bottom of Avo's small/deep Capybara
654
- // fibers and SEGV'd). Must be called with the isolate ENTERED. `real_isolate` is
655
- // the raw v8::Isolate* read out of iso_ptr.
656
- //
657
- // On a fiber it ALSO re-points V8's conservative-GC-scan stack_start (via
658
- // scan_start_field, discovered once per isolate) to `stack_top`: Enter just set
659
- // it to the native top, but the scanner walks [marker, stack_start), so a native
660
- // start runs the scan off the fiber's mapped stack into unmapped memory and
661
- // SEGVs (Avo's Capybara filter chain). scan_start_field is 0 when discovery
662
- // failed (override disabled).
663
- //
664
- // LIMITATION (worker-thread fibers): the GC and a thrown exception ALSO
665
- // `CHECK(IsOnCentralStack(SP))`, which tests the SP against
666
- // `base::Stack::GetStackStart()` — the pthread top, cached per native thread,
667
- // with no API to retarget — NOT the scan start we re-point above. A fiber mmap'd
668
- // ABOVE that top (the common case on a NON-main native thread, whose stack sits
669
- // below later fiber mmaps) fails the CHECK, so V8 aborts on the next GC or throw.
670
- // We can fix the scan (the SEGV) but not that CHECK. On the main thread the
671
- // process stack is the highest address, so every fiber is below it and both the
672
- // scan and the CHECK are safe — the Capybara/Avo case. See README.
673
- fn set_v8_stack_limit(real_isolate: *mut c_void, scan_start_field: usize, stack_top: usize) {
674
- let sp_marker = 0u8;
675
- let sp = &sp_marker as *const u8 as usize;
676
- let (nbottom, ntop) = native_stack_bounds_cached();
677
- let on_native = nbottom != 0 && sp > nbottom && sp <= ntop;
678
- // Reserve below the limit for V8's own RangeError-throw frames.
679
- const NATIVE_GUARD: usize = 128 * 1024;
680
- // V8 throws when SP descends to the limit, then needs some real stack BELOW
681
- // it to build the RangeError (and V8 itself allows growing a little past the
682
- // limit — its overflow slack). On a fiber that reserve must NOT cross the
683
- // fiber's real bottom (the mapping below it is an unmapped guard -> SEGV), so
684
- // keep it comfortably above V8's slack.
685
- const FIBER_RESERVE: usize = 64 * 1024;
686
- let mut region = (0usize, 0usize);
687
- let limit = if on_native {
688
- nbottom + NATIVE_GUARD
689
- } else {
690
- // Anchor to the FIBER's real bottom (the /proc/self/maps region holding
691
- // the SP), not the SP: SP - fixed_guard can punch through the bottom of a
692
- // small/deep fiber stack and SEGV (Avo's deep Capybara filter chain).
693
- // Reserve FIBER_RESERVE above the bottom for the throw, but keep the
694
- // limit below the SP so we don't false-overflow; on a nearly-full fiber
695
- // that clamps the headroom down (an early but CLEAN RangeError).
696
- region = current_region_bounds_cached(sp);
697
- if region.0 != 0 {
698
- (region.0 + FIBER_RESERVE).min(sp.saturating_sub(8 * 1024))
699
- } else {
700
- sp.saturating_sub(64 * 1024) // region unknown (non-linux) — best effort
701
- }
702
- };
703
- if limit == 0 {
704
- return; // couldn't determine a sane limit — leave V8's default
705
- }
706
- unsafe { v8__Isolate__SetStackLimit(real_isolate, limit) };
707
- // On a fiber, re-point V8's conservative-GC-scan stack_start to `stack_top`
708
- // — a live address captured by the caller ABOVE every V8 frame of this op.
709
- // Enter() set the start to the NATIVE top (a different region); the scanner
710
- // walks [marker, start), so a native start runs it off the fiber's mapped
711
- // top into unmapped memory and SEGVs. Anchoring to stack_top keeps the whole
712
- // scan range between two real stack pointers (marker..stack_top), so it's
713
- // guaranteed mapped, and every V8 root (all below stack_top) is still found.
714
- // (We can't use the /proc/maps region top here: that mapping isn't reliably
715
- // contiguous, so the scan could still hit a hole below it.)
716
- if !on_native && stack_top != 0 && scan_start_field != 0 {
717
- unsafe { (scan_start_field as *mut usize).write(stack_top) };
718
- }
719
- // Opt-in diagnostics (RUSTY_RACER_STACK_DEBUG): the SP vs the native stack
720
- // [nbottom, ntop], the fiber region (if any), the per-op limit, and whether
721
- // the SP is above the limit. A crash with sp_above_limit=false means the
722
- // limit is wrong for the current stack.
723
- if STACK_DEBUG.load(Ordering::Relaxed) {
724
- eprintln!(
725
- "[rusty stack] sp={sp:#x} nbottom={nbottom:#x} ntop={ntop:#x} \
726
- region=[{:#x},{:#x}) limit={limit:#x} fiber={} sp_above_limit={} \
727
- fiber_above_native={}",
728
- region.0,
729
- region.1,
730
- !on_native,
731
- sp > limit,
732
- !on_native && nbottom != 0 && sp > ntop,
733
- );
734
- }
735
- }
736
-
737
- // Little-endian u64 limbs -> big-endian hex magnitude (no sign, no "0x"). The
738
- // shared currency between V8 BigInt words and Ruby Integer(str, 16).
739
- fn words_to_hex(words: &[u64]) -> String {
740
- let mut hex = String::new();
741
- for w in words.iter().rev() {
742
- if hex.is_empty() {
743
- hex.push_str(&format!("{w:x}")); // top limb: no leading zeros
744
- } else {
745
- hex.push_str(&format!("{w:016x}")); // lower limbs: full width
746
- }
747
- }
748
- if hex.is_empty() {
749
- hex.push('0');
750
- }
751
- hex
752
- }
753
-
754
- // Big-endian hex magnitude -> little-endian u64 limbs (inverse of words_to_hex).
755
- fn hex_to_words(hex: &str) -> Vec<u64> {
756
- let mut words = Vec::new();
757
- let mut end = hex.len();
758
- while end > 0 {
759
- let start = end.saturating_sub(16);
760
- words.push(u64::from_str_radix(&hex[start..end], 16).unwrap_or(0));
761
- end = start;
762
- }
763
- if words.is_empty() {
764
- words.push(0);
765
- }
766
- words
767
- }
768
-
769
- // Tracks objects already emitted this marshal so a re-encounter becomes a
770
- // Ref instead of re-expansion. Buckets by V8 identity hash (which can collide),
771
- // disambiguated by Local equality — the same trick the module registry uses.
772
- #[derive(Default)]
773
- struct JsSeen {
774
- next_id: u32,
775
- map: HashMap<i32, Vec<(v8::Global<v8::Object>, u32)>>,
776
- }
777
-
778
- // Decide how to emit a container object: Ok(id) = first sighting, register it
779
- // and recurse; Err(jsval) = emit this directly and stop (a Ref to an already-
780
- // seen object, or a truncated Str at the depth backstop). Centralising this in
781
- // one place keeps the four container arms (array/object/map/set) in lockstep —
782
- // and crucially orders the checks so a depth-truncated object is NEVER assigned
783
- // an id (which would leave a sibling Ref dangling).
784
- fn js_container_id(
785
- scope: &mut v8::PinScope<'_, '_>,
786
- seen: &mut JsSeen,
787
- value: v8::Local<v8::Value>,
788
- obj: v8::Local<v8::Object>,
789
- depth: u32,
790
- ) -> Result<u32, JsVal> {
791
- let hash = obj.get_identity_hash().get();
792
- if let Some(bucket) = seen.map.get(&hash) {
793
- for (g, id) in bucket {
794
- if v8::Local::new(scope, g) == obj {
795
- return Err(JsVal::Ref(*id));
796
- }
797
- }
798
- }
799
- // First sighting but too deep: truncate WITHOUT registering, so no later
800
- // Ref can target a container that was never emitted.
801
- if depth >= MAX_MARSHAL_DEPTH {
802
- return Err(JsVal::Str(value.to_rust_string_lossy(scope)));
803
- }
804
- let id = seen.next_id;
805
- seen.next_id += 1;
806
- let g = v8::Global::new(scope, obj);
807
- seen.map.entry(hash).or_default().push((g, id));
808
- Ok(id)
809
- }
810
-
811
- // Copy |len| bytes from a V8 (Shared)ArrayBuffer backing pointer into an owned
812
- // Vec, with one allocation and no zero-fill (data is fully overwritten). |data|
813
- // is None only for a zero-length buffer, where the empty Vec is already right.
814
- fn copy_buffer_bytes(data: Option<std::ptr::NonNull<c_void>>, len: usize) -> Vec<u8> {
815
- let mut buf = Vec::with_capacity(len);
816
- if let Some(p) = data {
817
- unsafe {
818
- std::ptr::copy_nonoverlapping(p.as_ptr() as *const u8, buf.as_mut_ptr(), len);
819
- buf.set_len(len);
820
- }
821
- }
822
- buf
823
- }
824
-
825
- fn js_to_jsval(scope: &mut v8::PinScope<'_, '_>, value: v8::Local<v8::Value>) -> JsVal {
826
- let mut seen = JsSeen::default();
827
- js_to_jsval_d(scope, value, &mut seen, 0)
828
- }
829
-
830
- fn js_to_jsval_d(
831
- scope: &mut v8::PinScope<'_, '_>,
832
- value: v8::Local<v8::Value>,
833
- seen: &mut JsSeen,
834
- depth: u32,
835
- ) -> JsVal {
836
- if value.is_undefined() {
837
- return JsVal::Undefined;
838
- }
839
- if value.is_null() {
840
- return JsVal::Null;
841
- }
842
- if value.is_boolean() {
843
- return JsVal::Bool(value.boolean_value(scope));
844
- }
845
- if value.is_int32() {
846
- return JsVal::Int(value.integer_value(scope).unwrap_or(0));
847
- }
848
- if value.is_number() {
849
- return JsVal::Num(value.number_value(scope).unwrap_or(f64::NAN));
850
- }
851
- if value.is_big_int() {
852
- if let Ok(bi) = v8::Local::<v8::BigInt>::try_from(value) {
853
- let mut words = vec![0u64; bi.word_count()];
854
- let (negative, _) = bi.to_words_array(&mut words);
855
- return JsVal::BigInt { negative, words };
856
- }
857
- }
858
- // Date before the generic object branch (a Date *is* an object).
859
- if value.is_date() {
860
- if let Ok(date) = v8::Local::<v8::Date>::try_from(value) {
861
- return JsVal::Date(date.value_of());
862
- }
863
- }
864
- // Binary buffers before the generic object branch (they are objects too).
865
- // A TypedArray/DataView copies its VIEWED window; a bare ArrayBuffer or
866
- // SharedArrayBuffer copies the whole buffer. All become a Ruby binary
867
- // String. (Without the SharedArrayBuffer arm a bare SAB would fall through
868
- // to the plain-object branch and marshal as an empty Hash — silent loss.)
869
- if value.is_array_buffer_view() {
870
- if let Ok(view) = v8::Local::<v8::ArrayBufferView>::try_from(value) {
871
- let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
872
- // depth 0: a buffer is a leaf (no recursion into children), so it
873
- // never risks native-stack overflow and must stay faithful bytes
874
- // even when deeply nested — only the identity (Ref) check applies,
875
- // never the depth-truncation-to-lossy-string the generic path uses.
876
- let id = match js_container_id(scope, seen, value, obj, 0) {
877
- Ok(id) => id,
878
- Err(jsval) => return jsval, // a Ref to the same buffer
879
- };
880
- let len = view.byte_length();
881
- let mut buf: Vec<u8> = Vec::with_capacity(len);
882
- // copy_contents_uninit writes into the UNINITIALIZED spare capacity
883
- // (a &mut [MaybeUninit<u8>]) — never forming a &mut [u8] over uninit
884
- // memory the way copy_contents would (that's UB). set_len to exactly
885
- // what it wrote so a detached/short view never exposes uninit bytes.
886
- let n = view.copy_contents_uninit(&mut buf.spare_capacity_mut()[..len]);
887
- unsafe { buf.set_len(n) };
888
- return JsVal::Bytes { id: Some(id), bytes: buf };
889
- }
890
- }
891
- if value.is_array_buffer() {
892
- if let Ok(ab) = v8::Local::<v8::ArrayBuffer>::try_from(value) {
893
- let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
894
- // depth 0 — a buffer is a leaf; see the view arm above.
895
- let id = match js_container_id(scope, seen, value, obj, 0) {
896
- Ok(id) => id,
897
- Err(jsval) => return jsval,
898
- };
899
- return JsVal::Bytes {
900
- id: Some(id),
901
- bytes: copy_buffer_bytes(ab.data(), ab.byte_length()),
902
- };
903
- }
904
- }
905
- if value.is_shared_array_buffer() {
906
- if let Ok(sab) = v8::Local::<v8::SharedArrayBuffer>::try_from(value) {
907
- let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
908
- // depth 0 — a buffer is a leaf; see the view arm above.
909
- let id = match js_container_id(scope, seen, value, obj, 0) {
910
- Ok(id) => id,
911
- Err(jsval) => return jsval,
912
- };
913
- let store = sab.get_backing_store();
914
- return JsVal::Bytes {
915
- id: Some(id),
916
- bytes: copy_buffer_bytes(store.data(), sab.byte_length()),
917
- };
918
- }
919
- }
920
- // Map/Set before the generic object branch (both are objects).
921
- if value.is_map() {
922
- let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
923
- let id = match js_container_id(scope, seen, value, obj, depth) {
924
- Ok(id) => id,
925
- Err(jsval) => return jsval,
926
- };
927
- let map = v8::Local::<v8::Map>::try_from(value).unwrap();
928
- let arr = map.as_array(scope); // [k0, v0, k1, v1, ...]
929
- let mut pairs = Vec::with_capacity((arr.length() / 2) as usize);
930
- let mut i = 0;
931
- while i + 1 < arr.length() {
932
- let k = arr.get_index(scope, i).unwrap_or_else(|| v8::undefined(scope).into());
933
- let v = arr.get_index(scope, i + 1).unwrap_or_else(|| v8::undefined(scope).into());
934
- let kj = js_to_jsval_d(scope, k, seen, depth + 1);
935
- let vj = js_to_jsval_d(scope, v, seen, depth + 1);
936
- pairs.push((kj, vj));
937
- i += 2;
938
- }
939
- return JsVal::Map { id, pairs };
940
- }
941
- if value.is_set() {
942
- let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
943
- let id = match js_container_id(scope, seen, value, obj, depth) {
944
- Ok(id) => id,
945
- Err(jsval) => return jsval,
946
- };
947
- let set = v8::Local::<v8::Set>::try_from(value).unwrap();
948
- let arr = set.as_array(scope);
949
- let mut items = Vec::with_capacity(arr.length() as usize);
950
- for i in 0..arr.length() {
951
- let el = arr.get_index(scope, i).unwrap_or_else(|| v8::undefined(scope).into());
952
- items.push(js_to_jsval_d(scope, el, seen, depth + 1));
953
- }
954
- return JsVal::Set { id, items };
955
- }
956
- if value.is_array() {
957
- let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
958
- let id = match js_container_id(scope, seen, value, obj, depth) {
959
- Ok(id) => id,
960
- Err(jsval) => return jsval,
961
- };
962
- let arr = v8::Local::<v8::Array>::try_from(value).unwrap();
963
- let mut items = Vec::with_capacity(arr.length() as usize);
964
- for i in 0..arr.length() {
965
- let el = arr
966
- .get_index(scope, i)
967
- .unwrap_or_else(|| v8::undefined(scope).into());
968
- items.push(js_to_jsval_d(scope, el, seen, depth + 1));
969
- }
970
- return JsVal::Array { id, items };
971
- }
972
- // Plain object -> string-keyed Obj. Functions/Date/etc. fall through to
973
- // their toString (the spike's primitive escape hatch).
974
- if value.is_object() && !value.is_function() {
975
- let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
976
- let id = match js_container_id(scope, seen, value, obj, depth) {
977
- Ok(id) => id,
978
- Err(jsval) => return jsval,
979
- };
980
- if let Some(names) = obj.get_own_property_names(scope, Default::default()) {
981
- let mut entries = Vec::with_capacity(names.length() as usize);
982
- for i in 0..names.length() {
983
- let Some(key) = names.get_index(scope, i) else {
984
- continue;
985
- };
986
- let key_str = key.to_rust_string_lossy(scope);
987
- let val = obj
988
- .get(scope, key)
989
- .unwrap_or_else(|| v8::undefined(scope).into());
990
- entries.push((key_str, js_to_jsval_d(scope, val, seen, depth + 1)));
991
- }
992
- return JsVal::Obj { id, entries };
993
- }
994
- }
995
- JsVal::Str(value.to_rust_string_lossy(scope))
996
- }
997
-
998
- // Owned-by-value (not &JsVal): a JsVal::Bytes hands its Vec straight to V8's
999
- // backing store with no copy of the payload, so a large binary blob crosses
1000
- // Ruby->JS with zero extra allocation.
1001
- fn jsval_to_js<'s>(scope: &mut v8::PinScope<'s, '_>, val: JsVal) -> v8::Local<'s, v8::Value> {
1002
- let mut built: HashMap<u32, v8::Local<'s, v8::Value>> = HashMap::new();
1003
- jsval_to_js_d(scope, val, &mut built)
1004
- }
1005
-
1006
- fn jsval_to_js_d<'s>(
1007
- scope: &mut v8::PinScope<'s, '_>,
1008
- val: JsVal,
1009
- built: &mut HashMap<u32, v8::Local<'s, v8::Value>>,
1010
- ) -> v8::Local<'s, v8::Value> {
1011
- match val {
1012
- JsVal::Undefined => v8::undefined(scope).into(),
1013
- JsVal::Null => v8::null(scope).into(),
1014
- JsVal::Bool(b) => v8::Boolean::new(scope, b).into(),
1015
- JsVal::Int(i) => v8::Number::new(scope, i as f64).into(),
1016
- JsVal::Num(n) => v8::Number::new(scope, n).into(),
1017
- JsVal::Str(s) => v8::String::new(scope, &s)
1018
- .map(|s| s.into())
1019
- .unwrap_or_else(|| v8::undefined(scope).into()),
1020
- // Bytes -> Uint8Array, moving the Vec into V8's backing store (no copy
1021
- // of the payload). Registered under |id| so an aliased blob resolves to
1022
- // the same Uint8Array via Ref.
1023
- JsVal::Bytes { id, bytes } => {
1024
- let len = bytes.len();
1025
- let store = v8::ArrayBuffer::new_backing_store_from_vec(bytes).make_shared();
1026
- let ab = v8::ArrayBuffer::with_backing_store(scope, &store);
1027
- let arr: v8::Local<v8::Value> = v8::Uint8Array::new(scope, ab, 0, len)
1028
- .map(|a| a.into())
1029
- .unwrap_or_else(|| v8::undefined(scope).into());
1030
- if let Some(id) = id {
1031
- built.insert(id, arr);
1032
- }
1033
- arr
1034
- }
1035
- JsVal::BigInt { negative, words } => v8::BigInt::new_from_words(scope, negative, &words)
1036
- .map(|b| b.into())
1037
- .unwrap_or_else(|| v8::undefined(scope).into()),
1038
- JsVal::Date(ms) => v8::Date::new(scope, ms)
1039
- .map(|d| d.into())
1040
- .unwrap_or_else(|| v8::undefined(scope).into()),
1041
- // Register the container under its id BEFORE filling it, so a Ref from
1042
- // a descendant (a cycle back to here) resolves to this same object.
1043
- JsVal::Array { id, items } => {
1044
- let arr = v8::Array::new(scope, items.len() as i32);
1045
- built.insert(id, arr.into());
1046
- for (i, it) in items.into_iter().enumerate() {
1047
- let v = jsval_to_js_d(scope, it, built);
1048
- arr.set_index(scope, i as u32, v);
1049
- }
1050
- arr.into()
1051
- }
1052
- JsVal::Obj { id, entries } => {
1053
- let obj = v8::Object::new(scope);
1054
- built.insert(id, obj.into());
1055
- for (k, it) in entries {
1056
- let Some(key) = v8::String::new(scope, &k) else {
1057
- continue;
1058
- };
1059
- let v = jsval_to_js_d(scope, it, built);
1060
- obj.set(scope, key.into(), v);
1061
- }
1062
- obj.into()
1063
- }
1064
- JsVal::Map { id, pairs } => {
1065
- let map = v8::Map::new(scope);
1066
- built.insert(id, map.into());
1067
- for (k, v) in pairs {
1068
- let kk = jsval_to_js_d(scope, k, built);
1069
- let vv = jsval_to_js_d(scope, v, built);
1070
- map.set(scope, kk, vv);
1071
- }
1072
- map.into()
1073
- }
1074
- JsVal::Set { id, items } => {
1075
- let set = v8::Set::new(scope);
1076
- built.insert(id, set.into());
1077
- for it in items {
1078
- let v = jsval_to_js_d(scope, it, built);
1079
- set.add(scope, v);
1080
- }
1081
- set.into()
1082
- }
1083
- JsVal::Ref(id) => built
1084
- .get(&id)
1085
- .copied()
1086
- .unwrap_or_else(|| v8::undefined(scope).into()),
1087
- }
1088
- }
1089
-
1090
270
  // JS called a host function. We are on the owner thread with the GVL RELEASED
1091
271
  // (the runner's without_gvl). Reacquire the GVL via with_gvl, run the Ruby proc
1092
272
  // inline, and set the JS return — no channel, no other thread. A VM op the proc
@@ -1242,103 +422,6 @@ fn capture_js_error(
1242
422
  VmError::JsError { message, backtrace }
1243
423
  }
1244
424
 
1245
- fn run_source(scope: &mut v8::PinScope<'_, '_>, source: &str, filename: &str) -> Result<JsVal, VmError> {
1246
- v8::tc_scope!(let tc, scope);
1247
- // Compile and run as distinct phases so a compile failure maps to
1248
- // ParseError and a thrown exception to RuntimeError (csim rescues both).
1249
- let Some(code) = v8::String::new(tc, source) else {
1250
- return Err(VmError::Parse("source too large".into()));
1251
- };
1252
- let origin = script_origin(tc, filename);
1253
- let script = match v8::Script::compile(tc, code, Some(&origin)) {
1254
- Some(script) => script,
1255
- None if tc.has_terminated() => return Err(VmError::Terminated),
1256
- None => {
1257
- let msg = tc
1258
- .exception()
1259
- .map(|e| e.to_rust_string_lossy(tc))
1260
- .unwrap_or_else(|| "parse error".to_string());
1261
- // Append the location V8 recorded; always name the file, add the
1262
- // line when V8 reports one.
1263
- let message = tc.message();
1264
- let res = message
1265
- .and_then(|m| m.get_script_resource_name(tc))
1266
- .filter(|v| v.is_string())
1267
- .map(|v| v.to_rust_string_lossy(tc))
1268
- .unwrap_or_else(|| filename.to_string());
1269
- let loc = match message.and_then(|m| m.get_line_number(tc)) {
1270
- Some(line) => format!(" at {res}:{line}"),
1271
- None => format!(" at {res}"),
1272
- };
1273
- return Err(VmError::Parse(format!("{msg}{loc}")));
1274
- }
1275
- };
1276
- match script.run(tc) {
1277
- Some(value) => Ok(js_to_jsval(tc, value)),
1278
- None if tc.has_terminated() => Err(VmError::Terminated),
1279
- None => {
1280
- let exc = tc.exception();
1281
- let stack = tc.stack_trace();
1282
- Err(capture_js_error(tc, exc, stack))
1283
- }
1284
- }
1285
- }
1286
-
1287
- // Resolve a dotted property path on globalThis to a function and invoke it via
1288
- // v8::Function::call, with the property's holder as `this` (so `a.b.f` gets the
1289
- // right receiver). Args/result marshal through the ref-preserving paths.
1290
- fn call_function(
1291
- scope: &mut v8::PinScope<'_, '_>,
1292
- name: &str,
1293
- args: Vec<JsVal>,
1294
- void: bool,
1295
- ) -> Result<JsVal, VmError> {
1296
- v8::tc_scope!(let tc, scope);
1297
- let context = tc.get_current_context();
1298
- let global = context.global(tc);
1299
- let mut recv: v8::Local<v8::Value> = global.into();
1300
- let mut target: v8::Local<v8::Value> = global.into();
1301
- for part in name.split('.') {
1302
- let Some(obj) = target.to_object(tc) else {
1303
- // The holder of `part` (a preceding segment) was null/undefined, so
1304
- // there's nothing to read `part` from — name the holder, not `part`.
1305
- return Err(VmError::Runtime(format!(
1306
- "`{name}`: cannot read `{part}` (a preceding path segment is not an object)"
1307
- )));
1308
- };
1309
- let Some(key) = v8::String::new(tc, part) else {
1310
- return Err(VmError::Runtime("property name too large".into()));
1311
- };
1312
- let Some(next) = obj.get(tc, key.into()) else {
1313
- if tc.has_terminated() {
1314
- return Err(VmError::Terminated);
1315
- }
1316
- let msg = tc
1317
- .exception()
1318
- .map(|e| e.to_rust_string_lossy(tc))
1319
- .unwrap_or_else(|| format!("cannot read `{part}` of `{name}`"));
1320
- return Err(VmError::Runtime(msg));
1321
- };
1322
- recv = target;
1323
- target = next;
1324
- }
1325
- let Ok(func) = v8::Local::<v8::Function>::try_from(target) else {
1326
- return Err(VmError::Runtime(format!("`{name}` is not a function")));
1327
- };
1328
- let argv: Vec<v8::Local<v8::Value>> = args.into_iter().map(|a| jsval_to_js(tc, a)).collect();
1329
- match func.call(tc, recv, &argv) {
1330
- // void: skip marshalling the return so a huge/cyclic result is never walked.
1331
- Some(_) if void => Ok(JsVal::Undefined),
1332
- Some(value) => Ok(js_to_jsval(tc, value)),
1333
- None if tc.has_terminated() => Err(VmError::Terminated),
1334
- None => {
1335
- let exc = tc.exception();
1336
- let stack = tc.stack_trace();
1337
- Err(capture_js_error(tc, exc, stack))
1338
- }
1339
- }
1340
- }
1341
-
1342
425
  // ---------------------------------------------------------------------------
1343
426
  // ES modules: V8's raw compile/instantiate/evaluate steps, with the embedder
1344
427
  // owning the url->Module registry (MODULES) and the resolve policy.
@@ -1350,30 +433,6 @@ fn module_origin<'s>(scope: &v8::PinScope<'s, '_>, url: &str) -> v8::ScriptOrigi
1350
433
  )
1351
434
  }
1352
435
 
1353
- // The (Source, CompileOptions) pair shared by the module and script compile
1354
- // handlers: consume a supplied bytecode cache (skip reparse), else eager-compile
1355
- // every function up front, else compile lazily (V8's default — only the top
1356
- // level). A supplied cache wins over `eager`: V8's CompileOptionsIsValid forbids
1357
- // ConsumeCodeCache + EagerCompile together, so `eager` is ignored on the consume
1358
- // path. (Source is an owned struct — V8 copies the origin in — so returning it
1359
- // across this fn boundary keeps the same handle-lifetime contract as inlining.)
1360
- fn compile_source<'s>(
1361
- code: v8::Local<'s, v8::String>,
1362
- origin: &v8::ScriptOrigin<'s>,
1363
- cached_data: &Option<Vec<u8>>,
1364
- eager: bool,
1365
- ) -> (v8::script_compiler::Source, v8::script_compiler::CompileOptions) {
1366
- use v8::script_compiler::{CompileOptions, Source};
1367
- match cached_data {
1368
- Some(bytes) => (
1369
- Source::new_with_cached_data(code, Some(origin), v8::script_compiler::CachedData::new(bytes)),
1370
- CompileOptions::ConsumeCodeCache,
1371
- ),
1372
- None if eager => (Source::new(code, Some(origin)), CompileOptions::EagerCompile),
1373
- None => (Source::new(code, Some(origin)), CompileOptions::NoCompileOptions),
1374
- }
1375
- }
1376
-
1377
436
  // Registry for the thin compile_module/instantiate API: each compiled module is
1378
437
  // addressed by an id, with its url kept for the resolve round-trip and a
1379
438
  // hash bucket to map a referrer Local<Module> back to its id.
@@ -1483,15 +542,7 @@ impl IsolateState {
1483
542
  core_ptr: std::ptr::null(),
1484
543
  instantiate_resolve: None,
1485
544
  instantiate_resolve_err: None,
1486
- watchdog: Arc::new(WatchdogShared {
1487
- inner: Mutex::new(WatchdogInner {
1488
- frames: Vec::new(),
1489
- next_generation: 0,
1490
- fired_generation: None,
1491
- shutdown: false,
1492
- }),
1493
- cv: Condvar::new(),
1494
- }),
545
+ watchdog: WatchdogShared::new(),
1495
546
  }
1496
547
  }
1497
548
  }
@@ -1557,32 +608,6 @@ fn auto_drain(scope: &mut v8::PinScope<'_, '_>, outermost: bool) {
1557
608
  }
1558
609
  }
1559
610
 
1560
- // The shared bracket every JS-running request (Eval/Call/Attach/RunScript/
1561
- // EvaluateModule) needs: arm the watchdog, run |body|, then on a watchdog
1562
- // timeout flag the leftover terminate for the outermost sweep and — only if
1563
- // |body| actually ran JS (the bool it returns) — override its outcome to
1564
- // Terminated. |body| owns its ContextScope, JS call, and auto_drain, and
1565
- // returns (ran_js, outcome); the realm-disposed/unknown paths return
1566
- // (false, Err(..)) so a raced watchdog can't poison an error for work that ran
1567
- // no JS. Collapsing the five arms onto this keeps the terminate discipline in
1568
- // ONE place.
1569
- fn run_js_bracketed(
1570
- scope: &mut v8::PinScope<'_, '_, ()>,
1571
- outermost: bool,
1572
- timeout_ms: u64,
1573
- body: impl FnOnce(&mut v8::PinScope<'_, '_, ()>, bool) -> (bool, Result<JsVal, VmError>),
1574
- ) -> Result<JsVal, VmError> {
1575
- let watchdog = arm_watchdog(scope, timeout_ms);
1576
- let (ran_js, mut outcome) = body(scope, outermost);
1577
- if disarm_watchdog(scope, watchdog) {
1578
- istate!(scope).watchdog_fired = true;
1579
- if ran_js {
1580
- outcome = Err(VmError::Terminated);
1581
- }
1582
- }
1583
- outcome
1584
- }
1585
-
1586
611
  // Drop every module AND script compiled in `context_id` (its v8::Context is
1587
612
  // going away — on reset or dispose — so those handles are now dead).
1588
613
  fn drop_context_artifacts(state: &mut IsolateState, context_id: i32) {
@@ -1920,853 +945,6 @@ fn finish_dynamic_import(
1920
945
  }
1921
946
  }
1922
947
 
1923
- // The watchdog runs on ONE persistent thread per isolate rather than a fresh
1924
- // std::thread per request: spawning + joining a thread on every op cost ~16µs
1925
- // (5.5x) when a timeout was set, dwarfing the actual work. The thread sleeps on
1926
- // a condvar until a deadline is armed, terminates execution once the deadline
1927
- // passes, then goes back to sleep.
1928
- struct WatchdogShared {
1929
- inner: Mutex<WatchdogInner>,
1930
- cv: Condvar,
1931
- }
1932
-
1933
- // One armed request's deadline. `run_js_bracketed` is RE-ENTRANT — a host fn
1934
- // called from JS can issue a nested op that arms again while the outer op is
1935
- // still running — so the armed deadlines form a LIFO stack, not a single slot.
1936
- // (The old per-op design gave each op its own watchdog thread; collapsing onto
1937
- // one thread must not let a nested arm/disarm clobber the outer op's deadline,
1938
- // or the outer op would run unbounded after the nested call returns.)
1939
- #[derive(Clone, Copy)]
1940
- struct WatchdogFrame {
1941
- generation: u64,
1942
- deadline: Instant,
1943
- }
1944
-
1945
- struct WatchdogInner {
1946
- // Every currently-armed op (with timeout_ms > 0), pushed on arm and removed
1947
- // on disarm. The loop honours the EARLIEST deadline across all frames: the
1948
- // most urgent timeout fires first, and since TerminateExecution is
1949
- // isolate-global it tears down whatever is running (escalating outward).
1950
- frames: Vec<WatchdogFrame>,
1951
- // Monotonic; each arm takes the next value as its frame's id.
1952
- next_generation: u64,
1953
- // The generation whose deadline the loop terminated on — consumed (and
1954
- // cleared) by that op's disarm so it can map its outcome to Terminated.
1955
- fired_generation: Option<u64>,
1956
- // Set at isolate teardown to break the loop.
1957
- shutdown: bool,
1958
- }
1959
-
1960
- // The persistent watchdog loop. Runs off a Send IsolateHandle so it never
1961
- // borrows the isolate the V8 thread owns.
1962
- fn watchdog_loop(shared: Arc<WatchdogShared>, handle: v8::IsolateHandle) {
1963
- let mut inner = shared.inner.lock().unwrap();
1964
- loop {
1965
- if inner.shutdown {
1966
- return;
1967
- }
1968
- // The earliest deadline among all armed frames is the one to enforce.
1969
- match inner.frames.iter().min_by_key(|f| f.deadline).copied() {
1970
- // Idle: sleep until a frame is armed (or shutdown).
1971
- None => inner = shared.cv.wait(inner).unwrap(),
1972
- Some(frame) => {
1973
- let now = Instant::now();
1974
- if now >= frame.deadline {
1975
- handle.terminate_execution();
1976
- inner.fired_generation = Some(frame.generation);
1977
- // Drop the fired frame so the loop moves on to the next
1978
- // deadline instead of re-firing this one every wakeup.
1979
- inner.frames.retain(|f| f.generation != frame.generation);
1980
- } else {
1981
- let (next, _) = shared.cv.wait_timeout(inner, frame.deadline - now).unwrap();
1982
- inner = next;
1983
- }
1984
- }
1985
- }
1986
- }
1987
- }
1988
-
1989
- // (The watchdog Arc now lives in IsolateState; arm/disarm reach it via istate!.)
1990
-
1991
- // Arm the watchdog for this request: push a frame with its own deadline and
1992
- // wake the loop. Returns the generation token to hand to `disarm_watchdog`
1993
- // (None when timeout_ms is 0 — no watchdog for this request).
1994
- fn arm_watchdog(scope: &mut v8::PinScope<'_, '_, ()>, timeout_ms: u64) -> Option<u64> {
1995
- if timeout_ms == 0 {
1996
- return None;
1997
- }
1998
- let shared = &istate!(scope).watchdog;
1999
- let mut inner = shared.inner.lock().unwrap();
2000
- inner.next_generation += 1;
2001
- let generation = inner.next_generation;
2002
- inner.frames.push(WatchdogFrame {
2003
- generation,
2004
- deadline: Instant::now() + Duration::from_millis(timeout_ms),
2005
- });
2006
- shared.cv.notify_one();
2007
- Some(generation)
2008
- }
2009
-
2010
- // Disarm: drop THIS request's frame (leaving any outer frame still armed) and
2011
- // report whether its deadline fired. On fire the caller maps the outcome to
2012
- // Terminated and the outermost frame sweeps the leftover terminate via
2013
- // WATCHDOG_FIRED; removing only this frame keeps a late terminate from
2014
- // poisoning the next request without clobbering a still-running outer op.
2015
- fn disarm_watchdog(scope: &mut v8::PinScope<'_, '_, ()>, generation: Option<u64>) -> bool {
2016
- let Some(generation) = generation else {
2017
- return false;
2018
- };
2019
- let shared = &istate!(scope).watchdog;
2020
- let mut inner = shared.inner.lock().unwrap();
2021
- inner.frames.retain(|f| f.generation != generation);
2022
- let fired = inner.fired_generation == Some(generation);
2023
- if fired {
2024
- inner.fired_generation = None;
2025
- }
2026
- shared.cv.notify_one();
2027
- fired
2028
- }
2029
-
2030
- // Service ONE request inline on the owner thread and RETURN its terminal reply.
2031
- // This is the single dispatcher for BOTH a top-level op and a re-entrant one (a
2032
- // host proc / module resolver that issues another op), so EVERY op — not just
2033
- // eval/call — works re-entrantly. `outermost` (depth == 0, computed by Core::run
2034
- // before it bumped the depth) owns the terminate-flag cleanup; a nested op
2035
- // passes false.
2036
- fn service_request(scope: &mut v8::PinScope<'_, '_, ()>, request: Request, outermost: bool) -> VmReply {
2037
- // Clear any terminate left over from BEFORE this request. An
2038
- // Isolate#terminate fired while no JS was running arms the isolate-global
2039
- // flag but no watchdog_fired, so the end-of-request sweep would miss it and
2040
- // the next eval would abort spuriously — and an idle terminate isn't even
2041
- // observable via is_execution_terminating() yet, so cancel unconditionally.
2042
- // Only at the outermost frame: a terminate aimed at a SUSPENDED outer frame
2043
- // must survive a nested request.
2044
- if outermost {
2045
- scope.cancel_terminate_execution();
2046
- }
2047
- // Mark the realm this request runs in active while it is on the stack, so
2048
- // Reset/DisposeContext can refuse to pull a live realm out from under a
2049
- // suspended frame.
2050
- let realm = request_realm(istate!(scope), &request);
2051
- if let Some(id) = realm {
2052
- istate!(scope).active_realms.push(id);
2053
- }
2054
- let reply = dispatch_one(scope, request, outermost);
2055
- if realm.is_some() {
2056
- istate!(scope).active_realms.pop();
2057
- }
2058
- // Sweep a leftover terminate flag once the whole request stack has
2059
- // unwound (see watchdog_fired for why nested frames must not cancel).
2060
- if outermost && istate!(scope).watchdog_fired {
2061
- istate!(scope).watchdog_fired = false;
2062
- scope.cancel_terminate_execution();
2063
- }
2064
- reply
2065
- }
2066
-
2067
- // The realm a request will run in (None for realm-independent ops); feeds
2068
- // ACTIVE_REALMS above.
2069
- fn request_realm(state: &IsolateState, request: &Request) -> Option<i32> {
2070
- match request {
2071
- Request::Eval { context_id, .. }
2072
- | Request::Call { context_id, .. }
2073
- | Request::Attach { context_id, .. }
2074
- | Request::AttachMany { context_id, .. }
2075
- | Request::CompileModule { context_id, .. }
2076
- | Request::CompileScript { context_id, .. } => Some(*context_id),
2077
- Request::DrainMicrotasks { .. } => Some(0),
2078
- Request::InstantiateModule { module_id, .. }
2079
- | Request::EvaluateModule { module_id, .. }
2080
- | Request::ModuleNamespace { module_id, .. } => {
2081
- module_handle(state, *module_id).map(|(_, cid)| cid)
2082
- }
2083
- Request::RunScript { script_id, .. } => script_handle(state, *script_id).map(|(_, cid)| cid),
2084
- Request::Reset { .. }
2085
- | Request::CreateContext
2086
- | Request::DisposeContext { .. }
2087
- | Request::ModuleStatus { .. }
2088
- | Request::DisposeModule { .. }
2089
- | Request::DisposeScript { .. }
2090
- | Request::ScriptCodeCache { .. }
2091
- | Request::ModuleCodeCache { .. } => None,
2092
- }
2093
- }
2094
-
2095
- fn dispatch_one(scope: &mut v8::PinScope<'_, '_, ()>, request: Request, outermost: bool) -> VmReply {
2096
- // A request-scoped handle scope, so handles created while servicing a
2097
- // nested request don't pile up in the suspended callback's scope.
2098
- v8::scope!(let scope, &mut *scope);
2099
- {
2100
- match request {
2101
- Request::Eval {
2102
- context_id,
2103
- source,
2104
- filename,
2105
- timeout_ms,
2106
- } => {
2107
- let outcome = run_js_bracketed(scope, outermost, timeout_ms, |scope, outermost| {
2108
- let realm = context_for(istate!(scope), context_id);
2109
- match realm {
2110
- Some(ctx) => {
2111
- let context = v8::Local::new(scope, &ctx);
2112
- let scope = &mut v8::ContextScope::new(scope, context);
2113
- let out = run_source(scope, &source, &filename);
2114
- auto_drain(scope, outermost);
2115
- (true, out)
2116
- }
2117
- None => (false, Err(VmError::Runtime("realm disposed or unknown".into()))),
2118
- }
2119
- });
2120
- VmReply::Done(outcome)
2121
- }
2122
- Request::Call {
2123
- context_id,
2124
- name,
2125
- args,
2126
- void,
2127
- timeout_ms,
2128
- } => {
2129
- // A host fn invoked by the called function runs inline
2130
- // (host_fn_callback, with_gvl) — no routing setup needed.
2131
- let outcome = run_js_bracketed(scope, outermost, timeout_ms, |scope, outermost| {
2132
- let realm = context_for(istate!(scope), context_id);
2133
- match realm {
2134
- Some(ctx) => {
2135
- let context = v8::Local::new(scope, &ctx);
2136
- let scope = &mut v8::ContextScope::new(scope, context);
2137
- let out = call_function(scope, &name, args, void);
2138
- auto_drain(scope, outermost);
2139
- (true, out)
2140
- }
2141
- None => (false, Err(VmError::Runtime("realm disposed or unknown".into()))),
2142
- }
2143
- });
2144
- VmReply::Done(outcome)
2145
- }
2146
- Request::DrainMicrotasks { timeout_ms } => {
2147
- // A microtask may call an attached host fn (a Promise .then ->
2148
- // ruby), which runs inline via host_fn_callback — no routing
2149
- // setup needed any more.
2150
- let watchdog = arm_watchdog(scope, timeout_ms);
2151
- let main = context_for(istate!(scope), 0);
2152
- if let Some(ctx) = main {
2153
- let context = v8::Local::new(scope, &ctx);
2154
- let scope = &mut v8::ContextScope::new(scope, context);
2155
- checkpoint_draining(scope);
2156
- }
2157
- let fired = disarm_watchdog(scope, watchdog);
2158
- if fired {
2159
- istate!(scope).watchdog_fired = true;
2160
- }
2161
- let outcome = if fired {
2162
- Err(VmError::Terminated)
2163
- } else {
2164
- Ok(JsVal::Undefined)
2165
- };
2166
- VmReply::Done(outcome)
2167
- }
2168
- Request::Attach {
2169
- context_id,
2170
- name,
2171
- host_fn_id,
2172
- timeout_ms,
2173
- } => {
2174
- // attach_at_path writes onto globalThis (and walks a dotted
2175
- // path), which can fire a user-defined accessor or Proxy trap —
2176
- // arbitrary JS. So it goes through the same bracket as Eval: a
2177
- // host fn the trap calls routes back, and a looping trap is
2178
- // time-capped.
2179
- let outcome = run_js_bracketed(scope, outermost, timeout_ms, |scope, outermost| {
2180
- let realm = context_for(istate!(scope), context_id);
2181
- match realm {
2182
- Some(ctx) => {
2183
- let context = v8::Local::new(scope, &ctx);
2184
- let scope = &mut v8::ContextScope::new(scope, context);
2185
- let external = v8::External::new(scope, host_fn_id as *mut c_void);
2186
- let out = match v8::Function::builder(host_fn_callback)
2187
- .data(external.into())
2188
- .build(scope)
2189
- {
2190
- // A dotted name (e.g. "MiniRacer.foo") attaches
2191
- // under a namespace object, creating missing
2192
- // intermediates, so host fns needn't pollute the
2193
- // bare global.
2194
- Some(function) => attach_at_path(scope, context, &name, function),
2195
- None => Err(VmError::Runtime("failed to build function".into())),
2196
- };
2197
- auto_drain(scope, outermost);
2198
- (true, out)
2199
- }
2200
- None => (false, Err(VmError::Runtime("realm disposed or unknown".into()))),
2201
- }
2202
- });
2203
- VmReply::Done(outcome)
2204
- }
2205
- Request::AttachMany {
2206
- context_id,
2207
- entries,
2208
- timeout_ms,
2209
- } => {
2210
- // Same as Attach (arbitrary JS via accessors/Proxy traps), but
2211
- // installs every entry under one bracket/drain. Applied in order;
2212
- // stops at the first failure and reports its (name-tagged) error.
2213
- // NOT transactional: entries before the failure stay attached —
2214
- // the realm is not rolled back (matches single Attach, which also
2215
- // commits its one write or fails it).
2216
- let outcome = run_js_bracketed(scope, outermost, timeout_ms, |scope, outermost| {
2217
- let realm = context_for(istate!(scope), context_id);
2218
- match realm {
2219
- Some(ctx) => {
2220
- let context = v8::Local::new(scope, &ctx);
2221
- let scope = &mut v8::ContextScope::new(scope, context);
2222
- let mut out = Ok(JsVal::Undefined);
2223
- for (name, host_fn_id) in &entries {
2224
- let external = v8::External::new(scope, *host_fn_id as *mut c_void);
2225
- out = match v8::Function::builder(host_fn_callback)
2226
- .data(external.into())
2227
- .build(scope)
2228
- {
2229
- Some(function) => attach_at_path(scope, context, name, function),
2230
- None => Err(VmError::Runtime(format!(
2231
- "failed to build function for `{name}`"
2232
- ))),
2233
- };
2234
- if out.is_err() {
2235
- break;
2236
- }
2237
- }
2238
- auto_drain(scope, outermost);
2239
- (true, out)
2240
- }
2241
- None => (false, Err(VmError::Runtime("realm disposed or unknown".into()))),
2242
- }
2243
- });
2244
- VmReply::Done(outcome)
2245
- }
2246
- Request::Reset { context_id } => {
2247
- let known =
2248
- context_id == 0 || istate!(scope).realms.contexts.contains_key(&context_id);
2249
- if istate!(scope).draining {
2250
- // A microtask from ANY realm may be mid-flight on the stack;
2251
- // swapping a v8::Context out from under it corrupts state.
2252
- VmReply::Done(Err(VmError::Runtime(
2253
- "cannot reset a realm during a microtask checkpoint".into(),
2254
- )))
2255
- } else if !known {
2256
- VmReply::Done(Err(VmError::Runtime(
2257
- "context disposed or unknown".into(),
2258
- )))
2259
- } else if istate!(scope).active_realms.contains(&context_id) {
2260
- // Swapping the v8::Context behind a suspended frame would
2261
- // drop its in-flight modules/scripts and let the realm id
2262
- // refer to a different context than the one on the stack
2263
- // (defeating the cross-context import guards).
2264
- VmReply::Done(Err(VmError::Runtime(
2265
- "cannot reset a realm while a request for it is suspended on the V8 stack"
2266
- .into(),
2267
- )))
2268
- } else {
2269
- let fresh = new_realm(scope, context_id);
2270
- {
2271
- let realms = &mut istate!(scope).realms;
2272
- if context_id == 0 {
2273
- realms.main_context = Some(fresh);
2274
- } else {
2275
- realms.contexts.insert(context_id, fresh);
2276
- }
2277
- }
2278
- // Drop modules bound to this context — their realm just changed.
2279
- drop_context_artifacts(istate!(scope), context_id);
2280
- VmReply::Done(Ok(JsVal::Undefined))
2281
- }
2282
- }
2283
- Request::CreateContext => {
2284
- let id = {
2285
- let realms = &mut istate!(scope).realms;
2286
- let id = realms.next_context_id;
2287
- realms.next_context_id += 1;
2288
- id
2289
- };
2290
- let fresh = new_realm(scope, id);
2291
- istate!(scope).realms.contexts.insert(id, fresh);
2292
- VmReply::Done(Ok(JsVal::Int(id as i64)))
2293
- }
2294
- Request::DisposeContext { context_id } => {
2295
- if istate!(scope).draining {
2296
- // Same hazard as Reset: a microtask from any realm may be live.
2297
- VmReply::Done(Err(VmError::Runtime(
2298
- "cannot dispose a realm during a microtask checkpoint".into(),
2299
- )))
2300
- } else if istate!(scope).active_realms.contains(&context_id) {
2301
- // Same hazard as Reset: a suspended frame still runs in it.
2302
- VmReply::Done(Err(VmError::Runtime(
2303
- "cannot dispose a realm while a request for it is suspended on the V8 stack"
2304
- .into(),
2305
- )))
2306
- } else {
2307
- // Dropping the Global lets V8 collect the context. id 0 is the
2308
- // default context and never disposed independently.
2309
- istate!(scope).realms.contexts.remove(&context_id);
2310
- // Reclaim the modules compiled in it (else they leak until
2311
- // isolate teardown).
2312
- drop_context_artifacts(istate!(scope), context_id);
2313
- VmReply::Done(Ok(JsVal::Undefined))
2314
- }
2315
- }
2316
- Request::CompileModule {
2317
- context_id,
2318
- source,
2319
- filename,
2320
- cached_data,
2321
- produce_cache,
2322
- eager,
2323
- } => {
2324
- let ctx = context_for(istate!(scope), context_id);
2325
- let outcome = match ctx {
2326
- None => Err(VmError::Runtime("context disposed or unknown".into())),
2327
- Some(cx) => {
2328
- let context = v8::Local::new(scope, &cx);
2329
- let scope = &mut v8::ContextScope::new(scope, context);
2330
- v8::tc_scope!(let tc, scope);
2331
- match v8::String::new(tc, &source) {
2332
- None => Err(VmError::Runtime("module source too large".into())),
2333
- Some(code) => {
2334
- let origin = module_origin(tc, &filename);
2335
- // Consume a supplied bytecode cache (skip reparse),
2336
- // eager-compile every function, or compile fresh
2337
- // (lazy). cached_data wins: V8 forbids combining
2338
- // ConsumeCodeCache with EagerCompile.
2339
- let (mut src, opts) = compile_source(code, &origin, &cached_data, eager);
2340
- let compiled = v8::script_compiler::compile_module2(
2341
- tc,
2342
- &mut src,
2343
- opts,
2344
- v8::script_compiler::NoCacheReason::NoReason,
2345
- );
2346
- match compiled {
2347
- Some(module) => {
2348
- // V8 marks a stale/incompatible supplied cache
2349
- // rejected; the embedder recompiles & re-caches.
2350
- let cache_rejected = cached_data.is_some()
2351
- && src.get_cached_data().is_some_and(|c| c.rejected());
2352
- // Produce a fresh cache from the unbound script.
2353
- let produced = if produce_cache {
2354
- module
2355
- .get_unbound_module_script(tc)
2356
- .create_code_cache()
2357
- .map(|c| c.to_vec())
2358
- } else {
2359
- None
2360
- };
2361
- let hash = module.get_identity_hash().get();
2362
- let g = v8::Global::new(tc, module);
2363
- let id = {
2364
- let m = &mut istate!(tc).modules;
2365
- let id = m.next_id;
2366
- m.next_id += 1;
2367
- m.by_id
2368
- .insert(id, (g.clone(), filename.clone(), context_id));
2369
- m.by_hash.entry(hash).or_default().push((g, id));
2370
- id
2371
- };
2372
- Ok(Compiled {
2373
- id,
2374
- cached_data: produced,
2375
- cache_rejected,
2376
- })
2377
- }
2378
- None if tc.has_terminated() => Err(VmError::Terminated),
2379
- // A module compile failure is a parse error
2380
- // (compile-time), not a thrown exception.
2381
- None => {
2382
- let msg = tc
2383
- .exception()
2384
- .map(|e| e.to_rust_string_lossy(tc))
2385
- .unwrap_or_else(|| "module parse error".to_string());
2386
- let message = tc.message();
2387
- let res = message
2388
- .and_then(|m| m.get_script_resource_name(tc))
2389
- .filter(|v| v.is_string())
2390
- .map(|v| v.to_rust_string_lossy(tc))
2391
- .unwrap_or_else(|| filename.clone());
2392
- let loc = match message.and_then(|m| m.get_line_number(tc)) {
2393
- Some(line) => format!(" at {res}:{line}"),
2394
- None => format!(" at {res}"),
2395
- };
2396
- Err(VmError::Parse(format!("{msg}{loc}")))
2397
- }
2398
- }
2399
- }
2400
- }
2401
- }
2402
- };
2403
- VmReply::ModuleCompiled(outcome)
2404
- }
2405
- Request::InstantiateModule { module_id } => {
2406
- // V8's module instantiation is NOT re-entrant: a nested
2407
- // instantiate issued from a resolve block walks the outer,
2408
- // half-built module graph and SEGVs the process. Refuse it
2409
- // cleanly — a resolve block may COMPILE dependencies lazily
2410
- // and return them; the outer instantiate links them.
2411
- if istate!(scope).instantiating {
2412
- VmReply::Done(Err(VmError::Runtime(
2413
- "instantiate is not re-entrant: another module is currently \
2414
- instantiating (compile the dependency and return it; the outer \
2415
- instantiate links it)"
2416
- .into(),
2417
- )))
2418
- } else {
2419
- istate!(scope).instantiating = true;
2420
- let handle = module_handle(istate!(scope), module_id);
2421
- let outcome = match handle {
2422
- None => Err(VmError::Runtime("unknown module".into())),
2423
- Some((g, cid)) => match context_for(istate!(scope), cid) {
2424
- None => Err(VmError::Runtime("module's context is gone".into())),
2425
- Some(cx) => {
2426
- let context = v8::Local::new(scope, &cx);
2427
- let scope = &mut v8::ContextScope::new(scope, context);
2428
- let module = v8::Local::new(scope, &g);
2429
- match module.get_status() {
2430
- // Already linked (or further along): a no-op,
2431
- // not an error — instantiate is idempotent.
2432
- v8::ModuleStatus::Instantiated
2433
- | v8::ModuleStatus::Evaluating
2434
- | v8::ModuleStatus::Evaluated => Ok(JsVal::Undefined),
2435
- // V8 CHECK-aborts on instantiating an errored
2436
- // module; surface its exception instead.
2437
- v8::ModuleStatus::Errored => Err(VmError::JsError {
2438
- message: module
2439
- .get_exception()
2440
- .to_rust_string_lossy(scope),
2441
- backtrace: vec![],
2442
- }),
2443
- _ => {
2444
- v8::tc_scope!(let tc, scope);
2445
- match module.instantiate_module(tc, resolve_imported) {
2446
- Some(true) => Ok(JsVal::Undefined),
2447
- _ if tc.has_terminated() => Err(VmError::Terminated),
2448
- // A resolver that RAISED is re-raised with its
2449
- // real class by instantiate_module (via the
2450
- // stashed exception); this generic link error
2451
- // is only used when no resolver exception was
2452
- // stashed.
2453
- _ => {
2454
- let exc = tc.exception();
2455
- let stack = tc.stack_trace();
2456
- Err(capture_js_error(tc, exc, stack))
2457
- }
2458
- }
2459
- }
2460
- }
2461
- }
2462
- }
2463
- };
2464
- istate!(scope).instantiating = false;
2465
- VmReply::Done(outcome)
2466
- }
2467
- }
2468
- Request::EvaluateModule { module_id, timeout_ms } => {
2469
- // Top-level module code (and, under :auto, the microtasks its
2470
- // TLA continuation drains) can loop, so it runs in the same
2471
- // watchdog bracket as Eval/Call/RunScript.
2472
- let outcome = run_js_bracketed(scope, outermost, timeout_ms, |scope, outermost| {
2473
- let handle = module_handle(istate!(scope), module_id);
2474
- match handle {
2475
- None => (false, Err(VmError::Runtime("unknown module".into()))),
2476
- Some((g, cid)) => match context_for(istate!(scope), cid) {
2477
- None => (false, Err(VmError::Runtime("module's context is gone".into()))),
2478
- Some(cx) => {
2479
- let context = v8::Local::new(scope, &cx);
2480
- let scope = &mut v8::ContextScope::new(scope, context);
2481
- let module = v8::Local::new(scope, &g);
2482
- // A top-level-await module's evaluate() returns a
2483
- // PENDING promise that only settles once the drain
2484
- // runs its continuation — remember it so we can read
2485
- // its post-drain state instead of reporting a stale Ok.
2486
- let mut eval_promise: Option<v8::Global<v8::Promise>> = None;
2487
- // ran_js is true ONLY for the Instantiated arm that
2488
- // actually calls evaluate(); the Errored/Evaluated/
2489
- // non-instantiated arms run no JS, so a raced watchdog
2490
- // must not override their real outcome to Terminated.
2491
- let mut did_eval = false;
2492
- // V8 CHECK-aborts the process if evaluate runs on a
2493
- // module that isn't exactly Instantiated, so guard
2494
- // status explicitly rather than crash.
2495
- let out = match module.get_status() {
2496
- v8::ModuleStatus::Errored => {
2497
- Err(VmError::JsError {
2498
- message: module
2499
- .get_exception()
2500
- .to_rust_string_lossy(scope),
2501
- backtrace: vec![],
2502
- })
2503
- }
2504
- v8::ModuleStatus::Evaluated => Ok(JsVal::Undefined),
2505
- v8::ModuleStatus::Instantiated => {
2506
- did_eval = true;
2507
- v8::tc_scope!(let tc, scope);
2508
- match module.evaluate(tc) {
2509
- // A synchronous top-level throw yields a
2510
- // *rejected* promise (not None); a pending
2511
- // (TLA) or fulfilled one is remembered and
2512
- // re-checked after the drain.
2513
- Some(value) => match v8::Local::<v8::Promise>::try_from(value) {
2514
- Ok(p) if p.state() == v8::PromiseState::Rejected => {
2515
- let reason = p.result(tc);
2516
- Err(VmError::JsError {
2517
- message: reason.to_rust_string_lossy(tc),
2518
- backtrace: vec![],
2519
- })
2520
- }
2521
- Ok(p) => {
2522
- eval_promise = Some(v8::Global::new(tc, p));
2523
- Ok(JsVal::Undefined)
2524
- }
2525
- _ => Ok(JsVal::Undefined),
2526
- },
2527
- None if tc.has_terminated() => Err(VmError::Terminated),
2528
- None => {
2529
- let exc = tc.exception();
2530
- let stack = tc.stack_trace();
2531
- Err(capture_js_error(tc, exc, stack))
2532
- }
2533
- }
2534
- }
2535
- _ => Err(VmError::Runtime(
2536
- "module must be instantiated before evaluate".into(),
2537
- )),
2538
- };
2539
- auto_drain(scope, outermost);
2540
- // The drain may have settled a TLA module's promise to
2541
- // rejected — surface that instead of the provisional Ok.
2542
- let result = if let (true, Some(g)) = (out.is_ok(), eval_promise) {
2543
- let p = v8::Local::new(scope, &g);
2544
- if p.state() == v8::PromiseState::Rejected {
2545
- let reason = p.result(scope);
2546
- Err(VmError::JsError {
2547
- message: reason.to_rust_string_lossy(scope),
2548
- backtrace: vec![],
2549
- })
2550
- } else {
2551
- out
2552
- }
2553
- } else {
2554
- out
2555
- };
2556
- (did_eval, result)
2557
- }
2558
- }
2559
- }
2560
- });
2561
- VmReply::Done(outcome)
2562
- }
2563
- Request::ModuleNamespace { module_id } => {
2564
- let handle = module_handle(istate!(scope), module_id);
2565
- let outcome = match handle {
2566
- None => Err(VmError::Runtime("unknown module".into())),
2567
- Some((g, cid)) => match context_for(istate!(scope), cid) {
2568
- None => Err(VmError::Runtime("module's context is gone".into())),
2569
- Some(cx) => {
2570
- let context = v8::Local::new(scope, &cx);
2571
- let scope = &mut v8::ContextScope::new(scope, context);
2572
- let module = v8::Local::new(scope, &g);
2573
- // get_module_namespace CHECK-aborts unless the module
2574
- // is at least Instantiated.
2575
- match module.get_status() {
2576
- v8::ModuleStatus::Uninstantiated
2577
- | v8::ModuleStatus::Instantiating => Err(VmError::Runtime(
2578
- "module must be instantiated before namespace".into(),
2579
- )),
2580
- _ => {
2581
- let ns = module.get_module_namespace();
2582
- Ok(js_to_jsval(scope, ns))
2583
- }
2584
- }
2585
- }
2586
- }
2587
- };
2588
- VmReply::Done(outcome)
2589
- }
2590
- Request::ModuleStatus { module_id } => {
2591
- let handle = module_handle(istate!(scope), module_id);
2592
- let outcome = match handle {
2593
- None => Err(VmError::Runtime("unknown module".into())),
2594
- Some((g, _cid)) => {
2595
- let module = v8::Local::new(scope, &g);
2596
- let name = match module.get_status() {
2597
- v8::ModuleStatus::Uninstantiated => "uninstantiated",
2598
- v8::ModuleStatus::Instantiating => "instantiating",
2599
- v8::ModuleStatus::Instantiated => "instantiated",
2600
- v8::ModuleStatus::Evaluating => "evaluating",
2601
- v8::ModuleStatus::Evaluated => "evaluated",
2602
- v8::ModuleStatus::Errored => "errored",
2603
- };
2604
- Ok(JsVal::Str(name.into()))
2605
- }
2606
- };
2607
- VmReply::Done(outcome)
2608
- }
2609
- Request::DisposeModule { module_id } => {
2610
- let m = &mut istate!(scope).modules;
2611
- m.by_id.remove(&module_id);
2612
- for bucket in m.by_hash.values_mut() {
2613
- bucket.retain(|(_, id)| *id != module_id);
2614
- }
2615
- VmReply::Done(Ok(JsVal::Undefined))
2616
- }
2617
- Request::CompileScript {
2618
- context_id,
2619
- source,
2620
- filename,
2621
- cached_data,
2622
- produce_cache,
2623
- eager,
2624
- } => {
2625
- let ctx = context_for(istate!(scope), context_id);
2626
- let outcome = match ctx {
2627
- None => Err(VmError::Runtime("context disposed or unknown".into())),
2628
- Some(cx) => {
2629
- let context = v8::Local::new(scope, &cx);
2630
- let scope = &mut v8::ContextScope::new(scope, context);
2631
- v8::tc_scope!(let tc, scope);
2632
- match v8::String::new(tc, &source) {
2633
- None => Err(VmError::Runtime("script source too large".into())),
2634
- Some(code) => {
2635
- let origin = script_origin(tc, &filename);
2636
- let (mut src, opts) = compile_source(code, &origin, &cached_data, eager);
2637
- match v8::script_compiler::compile_unbound_script(
2638
- tc,
2639
- &mut src,
2640
- opts,
2641
- v8::script_compiler::NoCacheReason::NoReason,
2642
- ) {
2643
- Some(unbound) => {
2644
- let cache_rejected = cached_data.is_some()
2645
- && src.get_cached_data().is_some_and(|c| c.rejected());
2646
- let produced = if produce_cache {
2647
- unbound.create_code_cache().map(|c| c.to_vec())
2648
- } else {
2649
- None
2650
- };
2651
- let g = v8::Global::new(tc, unbound);
2652
- let id = {
2653
- let s = &mut istate!(tc).scripts;
2654
- let id = s.next_id;
2655
- s.next_id += 1;
2656
- s.by_id.insert(id, (g, context_id));
2657
- id
2658
- };
2659
- Ok(Compiled {
2660
- id,
2661
- cached_data: produced,
2662
- cache_rejected,
2663
- })
2664
- }
2665
- None if tc.has_terminated() => Err(VmError::Terminated),
2666
- // Compile failure = a parse error (with location).
2667
- None => {
2668
- let msg = tc
2669
- .exception()
2670
- .map(|e| e.to_rust_string_lossy(tc))
2671
- .unwrap_or_else(|| "script parse error".to_string());
2672
- let message = tc.message();
2673
- let res = message
2674
- .and_then(|m| m.get_script_resource_name(tc))
2675
- .filter(|v| v.is_string())
2676
- .map(|v| v.to_rust_string_lossy(tc))
2677
- .unwrap_or_else(|| filename.clone());
2678
- let loc = match message.and_then(|m| m.get_line_number(tc)) {
2679
- Some(line) => format!(" at {res}:{line}"),
2680
- None => format!(" at {res}"),
2681
- };
2682
- Err(VmError::Parse(format!("{msg}{loc}")))
2683
- }
2684
- }
2685
- }
2686
- }
2687
- }
2688
- };
2689
- VmReply::ScriptCompiled(outcome)
2690
- }
2691
- Request::RunScript {
2692
- script_id,
2693
- timeout_ms,
2694
- } => {
2695
- let outcome = run_js_bracketed(scope, outermost, timeout_ms, |scope, outermost| {
2696
- let handle = script_handle(istate!(scope), script_id);
2697
- match handle {
2698
- None => (false, Err(VmError::Runtime("unknown script".into()))),
2699
- Some((g, cid)) => match context_for(istate!(scope), cid) {
2700
- None => (false, Err(VmError::Runtime("script's context is gone".into()))),
2701
- Some(cx) => {
2702
- let context = v8::Local::new(scope, &cx);
2703
- let scope = &mut v8::ContextScope::new(scope, context);
2704
- let unbound = v8::Local::new(scope, &g);
2705
- let script = unbound.bind_to_current_context(scope);
2706
- let out = {
2707
- v8::tc_scope!(let tc, scope);
2708
- match script.run(tc) {
2709
- Some(value) => Ok(js_to_jsval(tc, value)),
2710
- None if tc.has_terminated() => Err(VmError::Terminated),
2711
- None => {
2712
- let exc = tc.exception();
2713
- let stack = tc.stack_trace();
2714
- Err(capture_js_error(tc, exc, stack))
2715
- }
2716
- }
2717
- };
2718
- auto_drain(scope, outermost);
2719
- (true, out)
2720
- }
2721
- }
2722
- }
2723
- });
2724
- VmReply::Done(outcome)
2725
- }
2726
- Request::DisposeScript { script_id } => {
2727
- istate!(scope).scripts.by_id.remove(&script_id);
2728
- VmReply::Done(Ok(JsVal::Undefined))
2729
- }
2730
- // Serialize the script's CURRENT compile state. The stored handle is
2731
- // the UnboundScript, which V8 fills in with inner-function bytecode as
2732
- // run() lazily compiles them — so calling this after run() captures
2733
- // the functions that actually executed (a warm cache). None when V8
2734
- // can't serialize, or when the realm was reset/disposed out from under
2735
- // the script (its handle is gone): produce nil, not an error.
2736
- Request::ScriptCodeCache { script_id } => {
2737
- let handle = script_handle(istate!(scope), script_id);
2738
- let outcome = match handle {
2739
- None => Ok(None),
2740
- Some((g, _cid)) => {
2741
- let unbound = v8::Local::new(scope, &g);
2742
- Ok(unbound.create_code_cache().map(|c| c.to_vec()))
2743
- }
2744
- };
2745
- VmReply::CodeCache(outcome)
2746
- }
2747
- // Same, for a module: get_unbound_module_script gives the shared
2748
- // compiled script, which evaluate() fills with inner-function bytecode.
2749
- // It needs the module's context entered (unlike UnboundScript), so
2750
- // a gone realm yields nil.
2751
- Request::ModuleCodeCache { module_id } => {
2752
- let mh = module_handle(istate!(scope), module_id);
2753
- let handle = mh.and_then(|(g, cid)| context_for(istate!(scope), cid).map(|cx| (g, cx)));
2754
- let outcome = match handle {
2755
- None => Ok(None),
2756
- Some((g, cx)) => {
2757
- let context = v8::Local::new(scope, &cx);
2758
- let scope = &mut v8::ContextScope::new(scope, context);
2759
- let module = v8::Local::new(scope, &g);
2760
- let unbound = module.get_unbound_module_script(scope);
2761
- Ok(unbound.create_code_cache().map(|c| c.to_vec()))
2762
- }
2763
- };
2764
- VmReply::CodeCache(outcome)
2765
- }
2766
- }
2767
- }
2768
- }
2769
-
2770
948
  // The id of |context|, read O(1) from the realm-id stamped in by new_realm.
2771
949
  // None when the context is not a LIVE realm of this isolate — a context reset
2772
950
  // away still carries its old stamp, so confirm the id currently maps back to
@@ -2925,6 +1103,10 @@ fn init_v8() {
2925
1103
  std::env::var_os("RUSTY_RACER_STACK_DEBUG").is_some(),
2926
1104
  Ordering::Relaxed,
2927
1105
  );
1106
+ WATCHDOG_DEBUG.store(
1107
+ std::env::var_os("RUSTY_RACER_WATCHDOG_DEBUG").is_some(),
1108
+ Ordering::Relaxed,
1109
+ );
2928
1110
  let platform = v8::new_default_platform(0, false).make_shared();
2929
1111
  v8::V8::initialize_platform(platform);
2930
1112
  v8::V8::initialize();
@@ -3692,6 +1874,7 @@ impl Core {
3692
1874
  }
3693
1875
 
3694
1876
  // Thin ESM primitives. compile_module returns the new module's id.
1877
+ #[allow(clippy::too_many_arguments)]
3695
1878
  fn compile_module(
3696
1879
  &self,
3697
1880
  ruby: &Ruby,
@@ -3785,6 +1968,7 @@ impl Core {
3785
1968
  }
3786
1969
 
3787
1970
  // Classic script: compile, run, dispose.
1971
+ #[allow(clippy::too_many_arguments)]
3788
1972
  fn compile_script(
3789
1973
  &self,
3790
1974
  ruby: &Ruby,
@@ -3865,11 +2049,7 @@ impl Core {
3865
2049
  fn teardown(&self) {
3866
2050
  // Stop + join the watchdog before we touch the isolate, so its handle
3867
2051
  // can't fire a terminate into an isolate we're mid-disposing.
3868
- {
3869
- let mut inner = self.watchdog.inner.lock().unwrap();
3870
- inner.shutdown = true;
3871
- }
3872
- self.watchdog.cv.notify_one();
2052
+ self.watchdog.request_shutdown();
3873
2053
  if let Some(join) = self.watchdog_join.lock().unwrap().take() {
3874
2054
  let _ = join.join();
3875
2055
  }
@@ -3946,11 +2126,7 @@ impl Drop for Core {
3946
2126
  // explicitly on the owner thread before the last wrapper drops avoids
3947
2127
  // this leak; the counter makes it observable (RustyRacer.leaked_isolate_count).
3948
2128
  LEAKED_ISOLATES.fetch_add(1, Ordering::Relaxed);
3949
- {
3950
- let mut inner = self.watchdog.inner.lock().unwrap();
3951
- inner.shutdown = true;
3952
- }
3953
- self.watchdog.cv.notify_one();
2129
+ self.watchdog.request_shutdown();
3954
2130
  }
3955
2131
  }
3956
2132
  }
@@ -4418,387 +2594,6 @@ fn resolve_module_via_ruby(
4418
2594
  Ok(Some(obj.module_id))
4419
2595
  }
4420
2596
 
4421
- fn jsval_to_ruby(ruby: &Ruby, val: &JsVal) -> Result<Value, Error> {
4422
- let mut built: HashMap<u32, Value> = HashMap::new();
4423
- jsval_to_ruby_d(ruby, val, &mut built)
4424
- }
4425
-
4426
- // `built` is a HashMap<u32, Value> — the same "bare Values in a heap container,
4427
- // hidden from the GC mark phase" shape that's a use-after-free in call_proc. It
4428
- // is safe HERE only because every entry is, at every allocating safepoint, ALSO
4429
- // reachable from a live stack local: each container arm (Array/Obj/Map/Set)
4430
- // keeps its arr/h/set as a live local while its children recurse and grafts each
4431
- // child into it (push/aset), so the child is marked transitively; Bytes inserts
4432
- // then immediately returns its live local `s`. So `built` never holds the sole
4433
- // reference. This invariant is load-bearing: do NOT refactor an arm to stash a
4434
- // value in `built` without keeping it rooted by a live local until it's grafted.
4435
-
4436
- fn jsval_to_ruby_d(
4437
- ruby: &Ruby,
4438
- val: &JsVal,
4439
- built: &mut HashMap<u32, Value>,
4440
- ) -> Result<Value, Error> {
4441
- Ok(match val {
4442
- JsVal::Undefined | JsVal::Null => ruby.qnil().as_value(),
4443
- JsVal::Bool(b) => (*b).into_value_with(ruby),
4444
- JsVal::Int(i) => (*i).into_value_with(ruby),
4445
- JsVal::Num(n) => (*n).into_value_with(ruby),
4446
- JsVal::Str(s) => s.clone().into_value_with(ruby),
4447
- // Bytes -> a binary (ASCII-8BIT) String: str_from_slice uses rb_str_new,
4448
- // which tags the result ASCII-8BIT — so it round-trips back to bytes.
4449
- // Registered under |id| so an aliased blob stays one String via Ref.
4450
- JsVal::Bytes { id, bytes } => {
4451
- let s = ruby.str_from_slice(bytes).as_value();
4452
- if let Some(id) = id {
4453
- built.insert(*id, s);
4454
- }
4455
- s
4456
- }
4457
- // Reconstruct the Ruby Integer from the hex magnitude (arbitrary
4458
- // precision); negate via Ruby so bignums stay exact.
4459
- JsVal::BigInt { negative, words } => {
4460
- let mag: Value = ruby
4461
- .str_new(&words_to_hex(words))
4462
- .funcall("to_i", (16i64,))?;
4463
- if *negative {
4464
- mag.funcall("-@", ())?
4465
- } else {
4466
- mag
4467
- }
4468
- }
4469
- // Time.at takes seconds; carry sub-second precision as the Float. An
4470
- // invalid Date (value_of NaN) raises RangeError, matching csim's
4471
- // des_date — never a silent nil.
4472
- JsVal::Date(ms) => {
4473
- if !ms.is_finite() {
4474
- return Err(Error::new(ruby.exception_range_error(), "invalid Date"));
4475
- }
4476
- ruby.class_object()
4477
- .const_get::<_, magnus::RClass>("Time")?
4478
- .funcall::<_, _, Value>("at", (*ms / 1000.0,))?
4479
- }
4480
- // Register before filling so a Ref from a descendant resolves to the
4481
- // same Ruby object (shared/cyclic graphs keep their identity).
4482
- JsVal::Array { id, items } => {
4483
- let arr = ruby.ary_new();
4484
- built.insert(*id, arr.as_value());
4485
- for it in items {
4486
- let _ = arr.push(jsval_to_ruby_d(ruby, it, built)?);
4487
- }
4488
- arr.as_value()
4489
- }
4490
- // JS objects -> string-keyed Hashes.
4491
- JsVal::Obj { id, entries } => {
4492
- let h = ruby.hash_new();
4493
- built.insert(*id, h.as_value());
4494
- for (k, it) in entries {
4495
- let _ = h.aset(k.as_str(), jsval_to_ruby_d(ruby, it, built)?);
4496
- }
4497
- h.as_value()
4498
- }
4499
- // JS Map -> Ruby Hash (arbitrary marshalled keys, not just strings).
4500
- JsVal::Map { id, pairs } => {
4501
- let h = ruby.hash_new();
4502
- built.insert(*id, h.as_value());
4503
- for (k, v) in pairs {
4504
- let kk = jsval_to_ruby_d(ruby, k, built)?;
4505
- let vv = jsval_to_ruby_d(ruby, v, built)?;
4506
- let _ = h.aset(kk, vv);
4507
- }
4508
- h.as_value()
4509
- }
4510
- // JS Set -> Ruby Set (stdlib); build empty then add so a cyclic Set
4511
- // (a Set containing itself) resolves through the Ref table.
4512
- JsVal::Set { id, items } => {
4513
- let set: Value = ruby
4514
- .class_object()
4515
- .const_get::<_, magnus::RClass>("Set")?
4516
- .funcall("new", ())?;
4517
- built.insert(*id, set);
4518
- for it in items {
4519
- let v = jsval_to_ruby_d(ruby, it, built)?;
4520
- let _: Value = set.funcall("add", (v,))?;
4521
- }
4522
- set
4523
- }
4524
- JsVal::Ref(id) => built
4525
- .get(id)
4526
- .copied()
4527
- .unwrap_or_else(|| ruby.qnil().as_value()),
4528
- })
4529
- }
4530
-
4531
- // A Ruby String marshalled by its encoding TAG (the tag is the type):
4532
- // - ASCII-8BIT (binary) -> JsVal::Bytes (a JS Uint8Array);
4533
- // - any text encoding -> JsVal::Str (UTF-8). Already-UTF-8 text is taken
4534
- // as-is; other text encodings transcode (Ruby raises on unmappable bytes).
4535
- // Either way the bytes must be VALID UTF-8 — invalid bytes RAISE, never
4536
- // silently degrade to U+FFFD (loud failure beats silent corruption). A
4537
- // text String mis-tagged binary surfaces loudly too (it becomes a Uint8Array).
4538
- fn string_to_jsval(ruby: &Ruby, s: RString) -> Result<JsVal, Error> {
4539
- use magnus::encoding::EncodingCapable;
4540
- if s.enc_get() == ruby.ascii8bit_encindex() {
4541
- // Binary: the bytes ARE the value (O(n) copy, no inflation). id: None —
4542
- // the identity-tracked path is the direct-String branch in
4543
- // ruby_to_jsval_d; a to_str result reaching here is transient.
4544
- return Ok(JsVal::Bytes {
4545
- id: None,
4546
- bytes: unsafe { s.as_slice() }.to_vec(),
4547
- });
4548
- }
4549
- // Text. encode('UTF-8') on an already-UTF-8 source is a no-op that does NOT
4550
- // validate, so skip it (one fewer copy) and let the from_utf8 check below
4551
- // catch invalid bytes; other encodings transcode (raising on unmappable).
4552
- let utf8: RString = if s.enc_get() == ruby.utf8_encindex() {
4553
- s
4554
- } else {
4555
- s.funcall("encode", ("UTF-8",))?
4556
- };
4557
- // Build the Rust String with a real UTF-8 check (not lossy): invalid bytes
4558
- // in a text-tagged String are an error, not silent U+FFFD substitution.
4559
- match String::from_utf8(unsafe { utf8.as_slice() }.to_vec()) {
4560
- Ok(s) => Ok(JsVal::Str(s)),
4561
- Err(_) => Err(Error::new(
4562
- ruby
4563
- .class_object()
4564
- .const_get::<_, ExceptionClass>("EncodingError")
4565
- .unwrap_or_else(|_| ruby.exception_runtime_error()),
4566
- "text-tagged String contains invalid UTF-8 bytes",
4567
- )),
4568
- }
4569
- }
4570
-
4571
- // A JS object key must be a string. A Ruby String key crosses by its bytes as
4572
- // UTF-8 — but unlike a binary VALUE (which becomes a Uint8Array), a key has
4573
- // nowhere to put raw bytes, so invalid UTF-8 RAISES rather than silently
4574
- // degrading to U+FFFD. None for a non-String (the caller then tries to_s).
4575
- fn string_key(ruby: &Ruby, val: Value) -> Option<Result<String, Error>> {
4576
- let s = RString::from_value(val)?;
4577
- let bytes = unsafe { s.as_slice() }.to_vec();
4578
- Some(String::from_utf8(bytes).map_err(|_| {
4579
- Error::new(
4580
- ruby.class_object()
4581
- .const_get::<_, ExceptionClass>("EncodingError")
4582
- .unwrap_or_else(|_| ruby.exception_runtime_error()),
4583
- "hash key is not valid UTF-8",
4584
- )
4585
- }))
4586
- }
4587
-
4588
- // A Ruby String's bytes interpreted as UTF-8 (invalid sequences become U+FFFD),
4589
- // regardless of the encoding tag. Used for the depth-truncation to_s fallback,
4590
- // where the value is already being lossily summarised.
4591
- fn lossy_string(val: Value) -> Option<String> {
4592
- let s = RString::from_value(val)?;
4593
- // Copy the bytes out before any further Ruby call can move/free them.
4594
- let bytes = unsafe { s.as_slice() }.to_vec();
4595
- Some(String::from_utf8_lossy(&bytes).into_owned())
4596
- }
4597
-
4598
- // Tracks Ruby containers already emitted this marshal (by object_id, which is
4599
- // exact — no collision handling needed) so shared/cyclic structures become Refs.
4600
- #[derive(Default)]
4601
- struct RbSeen {
4602
- next_id: u32,
4603
- map: HashMap<usize, u32>,
4604
- }
4605
-
4606
- fn ruby_to_jsval(val: Value) -> Result<JsVal, Error> {
4607
- let mut seen = RbSeen::default();
4608
- ruby_to_jsval_d(val, &mut seen, 0)
4609
- }
4610
-
4611
- fn ruby_to_jsval_d(val: Value, seen: &mut RbSeen, depth: u32) -> Result<JsVal, Error> {
4612
- let ruby = Ruby::get().unwrap();
4613
- if val.is_nil() {
4614
- return Ok(JsVal::Null);
4615
- }
4616
- // NB: bool::try_convert is RTEST (truthiness) — it returns Ok(true) for
4617
- // ANY non-false value — so check the actual true/false singletons by
4618
- // identity instead, or every Integer/String/Array would marshal as `true`.
4619
- if val.eql(ruby.qtrue()).unwrap_or(false) {
4620
- return Ok(JsVal::Bool(true));
4621
- }
4622
- if val.eql(ruby.qfalse()).unwrap_or(false) {
4623
- return Ok(JsVal::Bool(false));
4624
- }
4625
- // Ruby Time -> JS Date. Must precede the numeric checks: magnus's
4626
- // i64/f64 TryConvert coerces a Time via to_i/to_f, so it would otherwise
4627
- // marshal as a bare epoch number. Time#to_f is epoch seconds; Date wants ms.
4628
- if let Ok(time_class) = ruby.class_object().const_get::<_, magnus::RClass>("Time") {
4629
- if val.is_kind_of(time_class) {
4630
- let sec = val.funcall::<_, _, f64>("to_f", ())?;
4631
- return Ok(JsVal::Date(sec * 1000.0));
4632
- }
4633
- }
4634
- // Integer. A JS Number is an f64, so only integers exactly representable
4635
- // there (|n| <= 2^53) become Int/Number; anything larger (the rest of the
4636
- // i64 range AND true bignums) becomes a BigInt so no precision is lost.
4637
- // Use a strict Integer type check, NOT magnus::Integer::try_convert, which
4638
- // coerces a Float / to_int object — that would turn e.g. 1e300 into a BigInt
4639
- // instead of a Number.
4640
- if let Ok(int_class) = ruby.class_object().const_get::<_, magnus::RClass>("Integer") {
4641
- if val.is_kind_of(int_class) {
4642
- if let Ok(i) = i64::try_convert(val) {
4643
- if i.unsigned_abs() <= (1u64 << 53) {
4644
- return Ok(JsVal::Int(i));
4645
- }
4646
- }
4647
- let abs: Value = val.funcall("abs", ())?;
4648
- let hex: String = abs.funcall("to_s", (16i64,))?;
4649
- let negative = val.funcall::<_, _, bool>("negative?", ())?;
4650
- return Ok(JsVal::BigInt {
4651
- negative,
4652
- words: hex_to_words(&hex),
4653
- });
4654
- }
4655
- }
4656
- if let Ok(n) = f64::try_convert(val) {
4657
- return Ok(JsVal::Num(n));
4658
- }
4659
- // Bare Symbol -> JS string (one-way: it comes back as a Ruby String). A
4660
- // binary-encoded symbol surfaces the same curated EncodingError as a text
4661
- // String with invalid UTF-8, not magnus's raw "expected utf-8" message.
4662
- if let Some(sym) = magnus::Symbol::from_value(val) {
4663
- let name = sym.name().map_err(|_| {
4664
- Error::new(
4665
- ruby.class_object()
4666
- .const_get::<_, ExceptionClass>("EncodingError")
4667
- .unwrap_or_else(|_| ruby.exception_runtime_error()),
4668
- "symbol name is not valid UTF-8",
4669
- )
4670
- })?;
4671
- return Ok(JsVal::Str(name.into_owned()));
4672
- }
4673
- // Real Strings: the encoding tag is the type declaration. A binary
4674
- // (ASCII-8BIT) String -> bytes (JS Uint8Array), identity-tracked so an
4675
- // aliased blob stays one Uint8Array; any text encoding -> a JS string.
4676
- if let Some(rstr) = RString::from_value(val) {
4677
- use magnus::encoding::EncodingCapable;
4678
- if rstr.enc_get() == ruby.ascii8bit_encindex() {
4679
- // depth 0 — a binary blob is a leaf, so it stays faithful bytes even
4680
- // when deeply nested (never the depth-truncation-to-lossy-string);
4681
- // only the identity (Ref) check applies. Frozen/interned binary
4682
- // Strings share an object_id, so two `-"x".b` literals deliberately
4683
- // collapse to ONE Uint8Array (they ARE the same Ruby object).
4684
- let id = match rb_container_id(seen, val, 0)? {
4685
- RbId::New(id) => id,
4686
- RbId::Reuse(jv) => return Ok(jv),
4687
- };
4688
- return Ok(JsVal::Bytes {
4689
- id: Some(id),
4690
- bytes: unsafe { rstr.as_slice() }.to_vec(),
4691
- });
4692
- }
4693
- return string_to_jsval(&ruby, rstr);
4694
- }
4695
- // A String-like (to_str) gets the same tag-driven treatment, but its result
4696
- // is transient so it is not identity-tracked.
4697
- if val.respond_to("to_str", false).unwrap_or(false) {
4698
- let s: Value = val.funcall("to_str", ())?;
4699
- if let Some(rstr) = RString::from_value(s) {
4700
- return string_to_jsval(&ruby, rstr);
4701
- }
4702
- }
4703
- // Ruby Set -> JS Set. Before the Array/Hash checks (a Set is neither).
4704
- if let Ok(set_class) = ruby.class_object().const_get::<_, magnus::RClass>("Set") {
4705
- if val.is_kind_of(set_class) {
4706
- let id = match rb_container_id(seen, val, depth)? {
4707
- RbId::New(id) => id,
4708
- RbId::Reuse(jv) => return Ok(jv),
4709
- };
4710
- let arr: RArray = val.funcall("to_a", ())?;
4711
- let mut items = Vec::with_capacity(arr.len());
4712
- for i in 0..arr.len() {
4713
- let el: Value = arr.entry::<Value>(i as isize)?;
4714
- items.push(ruby_to_jsval_d(el, seen, depth + 1)?);
4715
- }
4716
- return Ok(JsVal::Set { id, items });
4717
- }
4718
- }
4719
- if let Ok(arr) = RArray::try_convert(val) {
4720
- let id = match rb_container_id(seen, val, depth)? {
4721
- RbId::New(id) => id,
4722
- RbId::Reuse(jv) => return Ok(jv),
4723
- };
4724
- let mut items = Vec::with_capacity(arr.len());
4725
- for i in 0..arr.len() {
4726
- let el: Value = arr.entry::<Value>(i as isize)?;
4727
- items.push(ruby_to_jsval_d(el, seen, depth + 1)?);
4728
- }
4729
- return Ok(JsVal::Array { id, items });
4730
- }
4731
- if let Ok(hash) = RHash::try_convert(val) {
4732
- let id = match rb_container_id(seen, val, depth)? {
4733
- RbId::New(id) => id,
4734
- RbId::Reuse(jv) => return Ok(jv),
4735
- };
4736
- let entries = RefCell::new(Vec::new());
4737
- hash.foreach(|k: Value, v: Value| {
4738
- // String/Symbol keys -> a UTF-8 String; anything else via to_s. A JS
4739
- // object key has nowhere to put raw bytes, so unlike a binary VALUE
4740
- // (-> Uint8Array) a binary KEY with invalid UTF-8 RAISES (string_key),
4741
- // and a to_s returning a non-String is a loud error, not a silent "".
4742
- let key = match string_key(&ruby, k) {
4743
- Some(r) => r?,
4744
- None => {
4745
- // A non-String key (Symbol, Integer, ...) -> to_s, then the
4746
- // same UTF-8 rule.
4747
- let s: Value = k.funcall("to_s", ())?;
4748
- match string_key(&ruby, s) {
4749
- Some(r) => r?,
4750
- None => {
4751
- return Err(Error::new(
4752
- ruby.exception_type_error(),
4753
- "hash key's to_s did not return a String",
4754
- ))
4755
- }
4756
- }
4757
- }
4758
- };
4759
- entries
4760
- .borrow_mut()
4761
- .push((key, ruby_to_jsval_d(v, seen, depth + 1)?));
4762
- Ok(magnus::r_hash::ForEach::Continue)
4763
- })?;
4764
- return Ok(JsVal::Obj {
4765
- id,
4766
- entries: entries.into_inner(),
4767
- });
4768
- }
4769
- Err(Error::new(
4770
- ruby.exception_type_error(),
4771
- "unsupported type crossing into JS",
4772
- ))
4773
- }
4774
-
4775
- enum RbId {
4776
- New(u32),
4777
- Reuse(JsVal),
4778
- }
4779
-
4780
- // Ruby-side mirror of js_container_id: New(id) to register and recurse, or
4781
- // Reuse(jsval) to emit directly (a Ref to an already-seen object, or a
4782
- // depth-truncated Str). Computes object_id once.
4783
- fn rb_container_id(seen: &mut RbSeen, val: Value, depth: u32) -> Result<RbId, Error> {
4784
- let oid = val.funcall::<_, _, usize>("object_id", ())?;
4785
- if let Some(id) = seen.map.get(&oid) {
4786
- return Ok(RbId::Reuse(JsVal::Ref(*id)));
4787
- }
4788
- if depth >= MAX_MARSHAL_DEPTH {
4789
- let ruby = Ruby::get().unwrap();
4790
- let s: Value = val.funcall("to_s", ())?;
4791
- let s = lossy_string(s).ok_or_else(|| {
4792
- Error::new(ruby.exception_type_error(), "to_s did not return a String")
4793
- })?;
4794
- return Ok(RbId::Reuse(JsVal::Str(s)));
4795
- }
4796
- let id = seen.next_id;
4797
- seen.next_id += 1;
4798
- seen.map.insert(oid, id);
4799
- Ok(RbId::New(id))
4800
- }
4801
-
4802
2597
  #[magnus::init]
4803
2598
  fn init(ruby: &Ruby) -> Result<(), Error> {
4804
2599
  let module = ruby.define_module("RustyRacer")?;