rusty_racer 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,292 @@
1
+ // V8 stack limit + conservative-GC-scan retargeting (in-thread: V8 runs on the
2
+ // calling Ruby thread's stack — a native pthread stack, or a Ruby Fiber's
3
+ // separate mmap'd stack). Self-contained: only raw pointers, std, libc, and the
4
+ // exported V8 symbols below — no IsolateState/JsVal/marshalling. The crate uses
5
+ // discover_scan_start_field (once per isolate), set_v8_stack_limit (per op), and
6
+ // STACK_DEBUG (set at init); everything else is private to this module.
7
+
8
+ use std::ffi::c_void;
9
+ use std::ptr::null_mut;
10
+ use std::sync::atomic::{AtomicBool, Ordering};
11
+
12
+ // rusty_v8 doesn't wrap the runtime `v8::Isolate::SetStackLimit(uintptr_t)`, so
13
+ // link the public V8 symbol directly (stable across V8 versions). It sets the
14
+ // lowest address V8's stack may reach before it throws RangeError.
15
+ unsafe extern "C" {
16
+ #[link_name = "_ZN2v87Isolate13SetStackLimitEm"]
17
+ fn v8__Isolate__SetStackLimit(isolate: *mut c_void, stack_limit: usize);
18
+ // V8's own (exported) accessors down to the conservative-GC-scan Stack
19
+ // object, so we can re-point its stack_start per op when V8 runs on a Ruby
20
+ // Fiber (see set_fiber_scan_start / discover_scan_start_field). Member fns:
21
+ // the first arg is `this`. The public v8::Isolate* IS i::Isolate*.
22
+ #[link_name = "_ZN2v88internal7Isolate4heapEv"]
23
+ fn v8__internal__Isolate__heap(isolate: *mut c_void) -> *mut c_void;
24
+ #[link_name = "_ZN2v88internal4Heap5stackEv"]
25
+ fn v8__internal__Heap__stack(heap: *mut c_void) -> *mut c_void;
26
+ // Sets the scan stack_start to v8::base::Stack::GetStackStart() (the native
27
+ // pthread top) — used only to positively identify the field during discovery.
28
+ #[link_name = "_ZN2v88internal4Heap13SetStackStartEv"]
29
+ fn v8__internal__Heap__SetStackStart(heap: *mut c_void);
30
+ #[link_name = "_ZN2v84base5Stack13GetStackStartEv"]
31
+ fn v8__base__Stack__GetStackStart() -> usize;
32
+ }
33
+
34
+ // Locate V8's conservative-GC-scan stack_start field
35
+ // (heap::base::Stack::current_segment_.start) so set_fiber_scan_start can
36
+ // re-point it per op. The scanner walks [SP, stack_start); on a Ruby Fiber V8's
37
+ // stack_start is still the NATIVE thread top, a different region, so the walk
38
+ // runs off the fiber's mapped top into the guard page and SEGVs (the residual
39
+ // after the limit fix). We reach the Stack via V8's exported Isolate::heap()/
40
+ // Heap::stack(); the field is the first word of Stack (current_segment_ is its
41
+ // first member, .start the first field), but we VERIFY rather than trust the
42
+ // layout: Heap::SetStackStart() writes that field to base::Stack::GetStackStart(),
43
+ // so if poking a sentinel and re-calling SetStackStart restores the value at
44
+ // offset 0, that word IS the field. Any mismatch returns 0 (override disabled —
45
+ // V8 keeps its native start, i.e. the rare pre-fix crash, NEVER corruption).
46
+ // Must run with the isolate ENTERED. `real_isolate` is the raw v8::Isolate*.
47
+ pub(crate) fn discover_scan_start_field(real_isolate: *mut c_void) -> usize {
48
+ const SENTINEL: usize = 0xA5A5_A5A5_A5A5_A5A5;
49
+ unsafe {
50
+ let heap = v8__internal__Isolate__heap(real_isolate);
51
+ if heap.is_null() {
52
+ return 0;
53
+ }
54
+ let stack = v8__internal__Heap__stack(heap);
55
+ if stack.is_null() {
56
+ return 0;
57
+ }
58
+ let nt = v8__base__Stack__GetStackStart();
59
+ if nt == 0 {
60
+ return 0;
61
+ }
62
+ v8__internal__Heap__SetStackStart(heap); // start := nt
63
+ let field = stack as *mut usize; // expected &current_segment_.start
64
+ if field.read() != nt {
65
+ return 0; // offset 0 isn't the field (layout changed) — disable
66
+ }
67
+ field.write(SENTINEL);
68
+ v8__internal__Heap__SetStackStart(heap); // must rewrite the same word
69
+ if field.read() != nt {
70
+ return 0; // SetStackStart doesn't own offset 0 — disable
71
+ }
72
+ stack as usize
73
+ }
74
+ }
75
+
76
+ // The native thread's stack bounds are stable per NATIVE thread, but querying
77
+ // them (pthread, which reads /proc/self/maps for the main thread on Linux) is
78
+ // far too slow per op. Cache (bottom, top) in a native-thread-local — correct
79
+ // under M:N (each native thread caches its own stack) and ~free after the first
80
+ // op on a thread. (0, 0) if it can't be queried.
81
+ thread_local! {
82
+ static STACK_BOUNDS: std::cell::Cell<(usize, usize)> =
83
+ const { std::cell::Cell::new((0, 0)) };
84
+ }
85
+
86
+ fn native_stack_bounds_cached() -> (usize, usize) {
87
+ STACK_BOUNDS.with(|c| {
88
+ let cached = c.get();
89
+ if cached.0 != 0 {
90
+ return cached;
91
+ }
92
+ let bounds = native_stack_bounds();
93
+ c.set(bounds);
94
+ bounds
95
+ })
96
+ }
97
+
98
+ // (bottom, top) of the CURRENT native thread's stack via pthread (uncached —
99
+ // callers go through native_stack_bounds_cached). The stack grows DOWN from top
100
+ // toward bottom. (0, 0) if it can't be queried. NB: this is the NATIVE thread's
101
+ // pthread stack; a Ruby Fiber runs on a separate mmap'd stack invisible here.
102
+ #[cfg(target_os = "linux")]
103
+ fn native_stack_bounds() -> (usize, usize) {
104
+ unsafe {
105
+ let mut attr: libc::pthread_attr_t = std::mem::zeroed();
106
+ if libc::pthread_getattr_np(libc::pthread_self(), &mut attr) != 0 {
107
+ return (0, 0);
108
+ }
109
+ let mut addr: *mut c_void = null_mut();
110
+ let mut size: libc::size_t = 0;
111
+ let rc = libc::pthread_attr_getstack(&attr, &mut addr, &mut size);
112
+ libc::pthread_attr_destroy(&mut attr);
113
+ if rc != 0 {
114
+ return (0, 0);
115
+ }
116
+ (addr as usize, addr as usize + size)
117
+ }
118
+ }
119
+
120
+ #[cfg(target_os = "macos")]
121
+ fn native_stack_bounds() -> (usize, usize) {
122
+ unsafe {
123
+ let top = libc::pthread_get_stackaddr_np(libc::pthread_self()) as usize;
124
+ let size = libc::pthread_get_stacksize_np(libc::pthread_self());
125
+ (top.saturating_sub(size), top)
126
+ }
127
+ }
128
+
129
+ #[cfg(not(any(target_os = "linux", target_os = "macos")))]
130
+ fn native_stack_bounds() -> (usize, usize) {
131
+ (0, 0)
132
+ }
133
+
134
+ // Lower bound (and upper, for caching) of the memory region containing `addr`
135
+ // — i.e. the BOTTOM of the stack `addr` is on. Used for a Ruby Fiber, whose
136
+ // mmap'd stack pthread can't see: V8's limit must sit ABOVE this bottom or a
137
+ // deep fiber recursion overflows the real stack and SEGVs the unmapped guard.
138
+ // Cached per native thread keyed by the region (parsing /proc/self/maps is
139
+ // slow): reused while successive ops stay on the same fiber. (0, 0) if unknown.
140
+ thread_local! {
141
+ static FIBER_REGION: std::cell::Cell<(usize, usize)> = const { std::cell::Cell::new((0, 0)) };
142
+ }
143
+
144
+ fn current_region_bounds_cached(addr: usize) -> (usize, usize) {
145
+ FIBER_REGION.with(|c| {
146
+ let (lo, hi) = c.get();
147
+ if lo != 0 && addr >= lo && addr < hi {
148
+ return (lo, hi);
149
+ }
150
+ let bounds = query_region_bounds(addr);
151
+ if bounds.0 != 0 {
152
+ c.set(bounds);
153
+ }
154
+ bounds
155
+ })
156
+ }
157
+
158
+ // The [start, end) of the /proc/self/maps mapping containing `addr`. Linux only;
159
+ // (0, 0) elsewhere (and the caller falls back). Reads the file fresh — slow, so
160
+ // only called on a cache miss (a new fiber).
161
+ #[cfg(target_os = "linux")]
162
+ fn query_region_bounds(addr: usize) -> (usize, usize) {
163
+ use std::io::Read;
164
+ let mut buf = String::new();
165
+ if std::fs::File::open("/proc/self/maps")
166
+ .and_then(|mut f| f.read_to_string(&mut buf))
167
+ .is_err()
168
+ {
169
+ return (0, 0);
170
+ }
171
+ for line in buf.lines() {
172
+ // e.g. "7f6a...000-7f6a...000 rw-p 00000000 00:00 0 ..."
173
+ let Some((range, _)) = line.split_once(' ') else {
174
+ continue;
175
+ };
176
+ let Some((lo, hi)) = range.split_once('-') else {
177
+ continue;
178
+ };
179
+ if let (Ok(lo), Ok(hi)) = (
180
+ usize::from_str_radix(lo, 16),
181
+ usize::from_str_radix(hi, 16),
182
+ ) {
183
+ if addr >= lo && addr < hi {
184
+ return (lo, hi);
185
+ }
186
+ }
187
+ }
188
+ (0, 0)
189
+ }
190
+
191
+ #[cfg(not(target_os = "linux"))]
192
+ fn query_region_bounds(_addr: usize) -> (usize, usize) {
193
+ (0, 0)
194
+ }
195
+
196
+ // Set from RUSTY_RACER_STACK_DEBUG at init; gates the per-op stack diagnostics.
197
+ pub(crate) static STACK_DEBUG: AtomicBool = AtomicBool::new(false);
198
+
199
+ // Re-point V8's stack limit at the CURRENT stack each op. In-thread V8 runs
200
+ // wherever the Ruby code is: usually the native thread's pthread stack, but also
201
+ // a Ruby Fiber's separate mmap'd stack (Capybara::Result is an Enumerator) that
202
+ // pthread can't see. The limit MUST sit between the current SP and the real
203
+ // bottom of whatever stack we're on:
204
+ // * Too high (above SP) and V8 declares a FALSE overflow on entry.
205
+ // * Too low (below the real bottom) and a deep recursion grows past the
206
+ // mapped stack and SEGVs the unmapped guard page below it.
207
+ // So detect the stack by comparing the SP to the cached native bounds: on the
208
+ // native stack, anchor to its pthread bottom; on a fiber, find the bottom of the
209
+ // /proc/self/maps region holding the SP (the fiber's real bottom — anchoring to
210
+ // SP minus a fixed guard punched through the bottom of Avo's small/deep Capybara
211
+ // fibers and SEGV'd). Must be called with the isolate ENTERED. `real_isolate` is
212
+ // the raw v8::Isolate* read out of iso_ptr.
213
+ //
214
+ // On a fiber it ALSO re-points V8's conservative-GC-scan stack_start (via
215
+ // scan_start_field, discovered once per isolate) to `stack_top`: Enter just set
216
+ // it to the native top, but the scanner walks [marker, stack_start), so a native
217
+ // start runs the scan off the fiber's mapped stack into unmapped memory and
218
+ // SEGVs (Avo's Capybara filter chain). scan_start_field is 0 when discovery
219
+ // failed (override disabled).
220
+ //
221
+ // LIMITATION (worker-thread fibers): the GC and a thrown exception ALSO
222
+ // `CHECK(IsOnCentralStack(SP))`, which tests the SP against
223
+ // `base::Stack::GetStackStart()` — the pthread top, cached per native thread,
224
+ // with no API to retarget — NOT the scan start we re-point above. A fiber mmap'd
225
+ // ABOVE that top (the common case on a NON-main native thread, whose stack sits
226
+ // below later fiber mmaps) fails the CHECK, so V8 aborts on the next GC or throw.
227
+ // We can fix the scan (the SEGV) but not that CHECK. On the main thread the
228
+ // process stack is the highest address, so every fiber is below it and both the
229
+ // scan and the CHECK are safe — the Capybara/Avo case. See README.
230
+ pub(crate) fn set_v8_stack_limit(real_isolate: *mut c_void, scan_start_field: usize, stack_top: usize) {
231
+ let sp_marker = 0u8;
232
+ let sp = &sp_marker as *const u8 as usize;
233
+ let (nbottom, ntop) = native_stack_bounds_cached();
234
+ let on_native = nbottom != 0 && sp > nbottom && sp <= ntop;
235
+ // Reserve below the limit for V8's own RangeError-throw frames.
236
+ const NATIVE_GUARD: usize = 128 * 1024;
237
+ // V8 throws when SP descends to the limit, then needs some real stack BELOW
238
+ // it to build the RangeError (and V8 itself allows growing a little past the
239
+ // limit — its overflow slack). On a fiber that reserve must NOT cross the
240
+ // fiber's real bottom (the mapping below it is an unmapped guard -> SEGV), so
241
+ // keep it comfortably above V8's slack.
242
+ const FIBER_RESERVE: usize = 64 * 1024;
243
+ let mut region = (0usize, 0usize);
244
+ let limit = if on_native {
245
+ nbottom + NATIVE_GUARD
246
+ } else {
247
+ // Anchor to the FIBER's real bottom (the /proc/self/maps region holding
248
+ // the SP), not the SP: SP - fixed_guard can punch through the bottom of a
249
+ // small/deep fiber stack and SEGV (Avo's deep Capybara filter chain).
250
+ // Reserve FIBER_RESERVE above the bottom for the throw, but keep the
251
+ // limit below the SP so we don't false-overflow; on a nearly-full fiber
252
+ // that clamps the headroom down (an early but CLEAN RangeError).
253
+ region = current_region_bounds_cached(sp);
254
+ if region.0 != 0 {
255
+ (region.0 + FIBER_RESERVE).min(sp.saturating_sub(8 * 1024))
256
+ } else {
257
+ sp.saturating_sub(64 * 1024) // region unknown (non-linux) — best effort
258
+ }
259
+ };
260
+ if limit == 0 {
261
+ return; // couldn't determine a sane limit — leave V8's default
262
+ }
263
+ unsafe { v8__Isolate__SetStackLimit(real_isolate, limit) };
264
+ // On a fiber, re-point V8's conservative-GC-scan stack_start to `stack_top`
265
+ // — a live address captured by the caller ABOVE every V8 frame of this op.
266
+ // Enter() set the start to the NATIVE top (a different region); the scanner
267
+ // walks [marker, start), so a native start runs it off the fiber's mapped
268
+ // top into unmapped memory and SEGVs. Anchoring to stack_top keeps the whole
269
+ // scan range between two real stack pointers (marker..stack_top), so it's
270
+ // guaranteed mapped, and every V8 root (all below stack_top) is still found.
271
+ // (We can't use the /proc/maps region top here: that mapping isn't reliably
272
+ // contiguous, so the scan could still hit a hole below it.)
273
+ if !on_native && stack_top != 0 && scan_start_field != 0 {
274
+ unsafe { (scan_start_field as *mut usize).write(stack_top) };
275
+ }
276
+ // Opt-in diagnostics (RUSTY_RACER_STACK_DEBUG): the SP vs the native stack
277
+ // [nbottom, ntop], the fiber region (if any), the per-op limit, and whether
278
+ // the SP is above the limit. A crash with sp_above_limit=false means the
279
+ // limit is wrong for the current stack.
280
+ if STACK_DEBUG.load(Ordering::Relaxed) {
281
+ eprintln!(
282
+ "[rusty stack] sp={sp:#x} nbottom={nbottom:#x} ntop={ntop:#x} \
283
+ region=[{:#x},{:#x}) limit={limit:#x} fiber={} sp_above_limit={} \
284
+ fiber_above_native={}",
285
+ region.0,
286
+ region.1,
287
+ !on_native,
288
+ sp > limit,
289
+ !on_native && nbottom != 0 && sp > ntop,
290
+ );
291
+ }
292
+ }
@@ -0,0 +1,226 @@
1
+ // The execution watchdog: a persistent per-isolate thread that fires
2
+ // TerminateExecution when an armed deadline passes, plus the request bracket
3
+ // (run_js_bracketed) that arms/disarms it around every JS-running op and maps a
4
+ // fired deadline to VmError::Terminated. Extracted from lib.rs verbatim.
5
+ //
6
+ // Only WatchdogShared is pub(crate) (IsolateState holds the Arc<WatchdogShared>
7
+ // and watchdog_loop takes it); its fields and the whole WatchdogInner/
8
+ // WatchdogFrame state stay PRIVATE — lib.rs touches the watchdog through just
9
+ // two methods, WatchdogShared::new() (initial state) and request_shutdown()
10
+ // (teardown: set the flag + wake the loop). run_js_bracketed, arm_watchdog,
11
+ // disarm_watchdog, watchdog_loop and WATCHDOG_DEBUG are pub(crate) because the
12
+ // op handlers and isolate setup (still in lib.rs) call them;
13
+ // report_watchdog_anomaly is private to this module.
14
+
15
+ use std::sync::atomic::{AtomicBool, Ordering};
16
+ use std::sync::{Arc, Condvar, Mutex};
17
+ use std::time::{Duration, Instant};
18
+
19
+ use crate::istate;
20
+ use crate::{IsolateState, JsVal, VmError};
21
+
22
+ // The watchdog runs on ONE persistent thread per isolate rather than a fresh
23
+ // std::thread per request: spawning + joining a thread on every op cost ~16µs
24
+ // (5.5x) when a timeout was set, dwarfing the actual work. The thread sleeps on
25
+ // a condvar until a deadline is armed, terminates execution once the deadline
26
+ // passes, then goes back to sleep.
27
+ pub(crate) struct WatchdogShared {
28
+ inner: Mutex<WatchdogInner>,
29
+ cv: Condvar,
30
+ }
31
+
32
+ impl WatchdogShared {
33
+ // The initial (idle) watchdog state, boxed in the Arc IsolateState holds.
34
+ pub(crate) fn new() -> Arc<Self> {
35
+ Arc::new(WatchdogShared {
36
+ inner: Mutex::new(WatchdogInner {
37
+ frames: Vec::new(),
38
+ next_generation: 0,
39
+ fired_generation: None,
40
+ shutdown: false,
41
+ }),
42
+ cv: Condvar::new(),
43
+ })
44
+ }
45
+
46
+ // Signal the loop to stop and wake it. Called once at isolate teardown,
47
+ // before the isolate is touched, so the loop can't fire a terminate into an
48
+ // isolate we're mid-disposing.
49
+ pub(crate) fn request_shutdown(&self) {
50
+ self.inner.lock().unwrap().shutdown = true;
51
+ self.cv.notify_one();
52
+ }
53
+ }
54
+
55
+ // One armed request's deadline. `run_js_bracketed` is RE-ENTRANT — a host fn
56
+ // called from JS can issue a nested op that arms again while the outer op is
57
+ // still running — so the armed deadlines form a LIFO stack, not a single slot.
58
+ // (The old per-op design gave each op its own watchdog thread; collapsing onto
59
+ // one thread must not let a nested arm/disarm clobber the outer op's deadline,
60
+ // or the outer op would run unbounded after the nested call returns.)
61
+ #[derive(Clone, Copy)]
62
+ struct WatchdogFrame {
63
+ generation: u64,
64
+ deadline: Instant,
65
+ }
66
+
67
+ struct WatchdogInner {
68
+ // Every currently-armed op (with timeout_ms > 0), pushed on arm and removed
69
+ // on disarm. The loop honours the EARLIEST deadline across all frames: the
70
+ // most urgent timeout fires first, and since TerminateExecution is
71
+ // isolate-global it tears down whatever is running (escalating outward).
72
+ frames: Vec<WatchdogFrame>,
73
+ // Monotonic; each arm takes the next value as its frame's id.
74
+ next_generation: u64,
75
+ // The generation whose deadline the loop terminated on — consumed (and
76
+ // cleared) by that op's disarm so it can map its outcome to Terminated.
77
+ fired_generation: Option<u64>,
78
+ // Set at isolate teardown to break the loop.
79
+ shutdown: bool,
80
+ }
81
+
82
+ // The persistent watchdog loop. Runs off a Send IsolateHandle so it never
83
+ // borrows the isolate the V8 thread owns.
84
+ pub(crate) fn watchdog_loop(shared: Arc<WatchdogShared>, handle: v8::IsolateHandle) {
85
+ let mut inner = shared.inner.lock().unwrap();
86
+ loop {
87
+ if inner.shutdown {
88
+ return;
89
+ }
90
+ // The earliest deadline among all armed frames is the one to enforce.
91
+ match inner.frames.iter().min_by_key(|f| f.deadline).copied() {
92
+ // Idle: sleep until a frame is armed (or shutdown).
93
+ None => inner = shared.cv.wait(inner).unwrap(),
94
+ Some(frame) => {
95
+ let now = Instant::now();
96
+ if now >= frame.deadline {
97
+ handle.terminate_execution();
98
+ inner.fired_generation = Some(frame.generation);
99
+ // Drop the fired frame so the loop moves on to the next
100
+ // deadline instead of re-firing this one every wakeup.
101
+ inner.frames.retain(|f| f.generation != frame.generation);
102
+ } else {
103
+ let (next, _) = shared.cv.wait_timeout(inner, frame.deadline - now).unwrap();
104
+ inner = next;
105
+ }
106
+ }
107
+ }
108
+ }
109
+ }
110
+
111
+ // (The watchdog Arc now lives in IsolateState; arm/disarm reach it via istate!.)
112
+
113
+ // Arm the watchdog for this request: push a frame with its own deadline and
114
+ // wake the loop. Returns the generation token to hand to `disarm_watchdog`
115
+ // (None when timeout_ms is 0 — no watchdog for this request).
116
+ pub(crate) fn arm_watchdog(scope: &mut v8::PinScope<'_, '_, ()>, timeout_ms: u64) -> Option<u64> {
117
+ if timeout_ms == 0 {
118
+ return None;
119
+ }
120
+ let shared = &istate!(scope).watchdog;
121
+ let mut inner = shared.inner.lock().unwrap();
122
+ inner.next_generation += 1;
123
+ let generation = inner.next_generation;
124
+ inner.frames.push(WatchdogFrame {
125
+ generation,
126
+ deadline: Instant::now() + Duration::from_millis(timeout_ms),
127
+ });
128
+ shared.cv.notify_one();
129
+ Some(generation)
130
+ }
131
+
132
+ // Disarm: drop THIS request's frame (leaving any outer frame still armed) and
133
+ // report whether its deadline fired. On fire the caller maps the outcome to
134
+ // Terminated and the outermost frame sweeps the leftover terminate via
135
+ // WATCHDOG_FIRED; removing only this frame keeps a late terminate from
136
+ // poisoning the next request without clobbering a still-running outer op.
137
+ pub(crate) fn disarm_watchdog(scope: &mut v8::PinScope<'_, '_, ()>, generation: Option<u64>) -> bool {
138
+ let Some(generation) = generation else {
139
+ return false;
140
+ };
141
+ let shared = &istate!(scope).watchdog;
142
+ let mut inner = shared.inner.lock().unwrap();
143
+ inner.frames.retain(|f| f.generation != generation);
144
+ let fired = inner.fired_generation == Some(generation);
145
+ if fired {
146
+ inner.fired_generation = None;
147
+ }
148
+ shared.cv.notify_one();
149
+ fired
150
+ }
151
+
152
+ // Set from RUSTY_RACER_WATCHDOG_DEBUG at init (OFF by default); gates the
153
+ // watchdog-anomaly canary in run_js_bracketed — a diagnostic for the rare
154
+ // next-op-spuriously-terminated leak. Off in production (it would also fire on a
155
+ // legitimate Isolate#terminate); CI turns it on so a recurrence is diagnosable.
156
+ pub(crate) static WATCHDOG_DEBUG: AtomicBool = AtomicBool::new(false);
157
+
158
+ // The shared bracket every JS-running request (Eval/Call/Attach/RunScript/
159
+ // EvaluateModule) needs: arm the watchdog, run |body|, then on a watchdog
160
+ // timeout flag the leftover terminate for the outermost sweep and — only if
161
+ // |body| actually ran JS (the bool it returns) — override its outcome to
162
+ // Terminated. |body| owns its ContextScope, JS call, and auto_drain, and
163
+ // returns (ran_js, outcome); the realm-disposed/unknown paths return
164
+ // (false, Err(..)) so a raced watchdog can't poison an error for work that ran
165
+ // no JS. Collapsing the five arms onto this keeps the terminate discipline in
166
+ // ONE place.
167
+ pub(crate) fn run_js_bracketed(
168
+ scope: &mut v8::PinScope<'_, '_, ()>,
169
+ outermost: bool,
170
+ timeout_ms: u64,
171
+ label: &'static str,
172
+ body: impl FnOnce(&mut v8::PinScope<'_, '_, ()>, bool) -> (bool, Result<JsVal, VmError>),
173
+ ) -> Result<JsVal, VmError> {
174
+ let started = Instant::now();
175
+ let watchdog = arm_watchdog(scope, timeout_ms);
176
+ let (ran_js, mut outcome) = body(scope, outermost);
177
+ let fired = disarm_watchdog(scope, watchdog);
178
+ // CANARY (RUSTY_RACER_WATCHDOG_DEBUG): the op's JS was terminated but THIS
179
+ // op's OWN watchdog frame did NOT fire — so a terminate LEAKED in from
180
+ // elsewhere (a prior op's timeout surviving both the end- and start-sweep
181
+ // cancels, or a user Isolate#terminate). The rare CI "next op spuriously
182
+ // terminated" bug lands here; dump the watchdog state + timing so a
183
+ // recurrence is diagnosable instead of an unreproducible mystery.
184
+ if WATCHDOG_DEBUG.load(Ordering::Relaxed)
185
+ && ran_js
186
+ && !fired
187
+ && matches!(outcome, Err(VmError::Terminated))
188
+ {
189
+ report_watchdog_anomaly(scope, label, watchdog, timeout_ms, started.elapsed());
190
+ }
191
+ if fired {
192
+ istate!(scope).watchdog_fired = true;
193
+ if ran_js {
194
+ outcome = Err(VmError::Terminated);
195
+ }
196
+ }
197
+ outcome
198
+ }
199
+
200
+ // Dump watchdog/terminate state on the leaked-terminate anomaly (see the CANARY
201
+ // in run_js_bracketed). Only reached on that rare path, and only with the debug
202
+ // flag on. elapsed_ms << timeout_ms with a clean inner = a V8-level stale
203
+ // terminate (not the Rust bookkeeping); a non-empty inner.frames /
204
+ // fired_generation would instead point at a frame-lifecycle bug.
205
+ fn report_watchdog_anomaly(
206
+ scope: &mut v8::PinScope<'_, '_, ()>,
207
+ label: &str,
208
+ this_gen: Option<u64>,
209
+ timeout_ms: u64,
210
+ elapsed: Duration,
211
+ ) {
212
+ let terminating = scope.is_execution_terminating();
213
+ let st = istate!(scope);
214
+ let watchdog_fired_flag = st.watchdog_fired;
215
+ let inner = st.watchdog.inner.lock().unwrap();
216
+ let frames: Vec<u64> = inner.frames.iter().map(|f| f.generation).collect();
217
+ eprintln!(
218
+ "[rusty watchdog ANOMALY] op={label} terminated but its OWN watchdog frame \
219
+ did NOT fire (leaked terminate). this_gen={this_gen:?} timeout_ms={timeout_ms} \
220
+ elapsed_ms={:.2} is_terminating={terminating} watchdog_fired_flag={watchdog_fired_flag} \
221
+ inner.frames={frames:?} inner.fired_generation={:?} inner.next_generation={}",
222
+ elapsed.as_secs_f64() * 1000.0,
223
+ inner.fired_generation,
224
+ inner.next_generation,
225
+ );
226
+ }
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ # An ExecJS runtime backed by rusty_racer, so any ExecJS consumer (asset
4
+ # pipelines, CoffeeScript/Babel/Uglify wrappers, …) can run on V8-in-Ruby with
5
+ # no code change: `require "rusty_racer/execjs"` then
6
+ #
7
+ # ExecJS.runtime = RustyRacer::ExecJSRuntime.new
8
+ #
9
+ # This file is OPTIONAL — rusty_racer never loads it itself, so execjs stays a
10
+ # non-dependency. Requiring it declares you want the integration (and so have
11
+ # execjs installed).
12
+ #
13
+ # Values cross the boundary with ExecJS's JSON semantics, not rusty_racer's
14
+ # richer native marshalling: every result is taken through `JSON.stringify` on
15
+ # the V8 side and parsed back. That is exactly the contract ExecJS's external
16
+ # runtimes (Node, …) provide — functions and `undefined` drop out
17
+ # (`[1, function(){}]` => `[1, nil]`, `{a:1, f(){}}` => `{"a"=>1}`), Dates become
18
+ # ISO strings — so a consumer sees identical results whatever runtime it picked.
19
+
20
+ require 'json'
21
+ require 'execjs'
22
+ require 'rusty_racer'
23
+
24
+ module RustyRacer
25
+ class ExecJSRuntime < ExecJS::Runtime
26
+ class Context < ExecJS::Runtime::Context
27
+ # `filename` for every JS run, so a thrown error's stack (which rusty_racer
28
+ # surfaces as the Ruby backtrace) reads "(execjs):line:col" — ExecJS's test
29
+ # suite asserts the backtrace mentions "(execjs):".
30
+ LOCATION = '(execjs)'
31
+
32
+ def initialize(_runtime, source = '', _options = {})
33
+ @isolate = RustyRacer::Isolate.new
34
+ @context = @isolate.context
35
+ # ExecJS guarantees a bare global (no browser/Node ambient): V8 installs a
36
+ # default `console`, so drop it to match the contract (consumers attach
37
+ # their own if needed), exactly as the mini_racer runtime does.
38
+ @context.eval('delete globalThis.console')
39
+ source = encode(source)
40
+ translate { @context.eval(source, filename: LOCATION) } if /\S/.match?(source)
41
+ end
42
+
43
+ # Run statements in a function body and return what they `return` (nil when
44
+ # nothing is returned), per ExecJS. The trailing newline before `}` ends any
45
+ # `//` line comment the source closes with, so it can't eat the wrapper.
46
+ def exec(source, _options = {})
47
+ source = encode(source)
48
+ eval("(function(){#{source}\n})()") if /\S/.match?(source)
49
+ end
50
+
51
+ # Evaluate an expression and return its (JSON-projected) value. Blank source
52
+ # is nil. The expression is parenthesised so a leading `{` reads as an
53
+ # object literal (and a trailing `//` comment can't swallow the closing
54
+ # parens — hence the newline), then routed through JSON.stringify for ExecJS
55
+ # semantics.
56
+ def eval(source, _options = {})
57
+ source = encode(source)
58
+ return unless /\S/.match?(source)
59
+
60
+ json = translate { @context.eval("JSON.stringify((#{source}\n))", filename: LOCATION) }
61
+ # JSON.stringify yields `undefined` (-> nil here) for a function/undefined
62
+ # result; otherwise a JSON string to parse back.
63
+ json.nil? ? nil : JSON.parse(json)
64
+ end
65
+
66
+ # Evaluate `identifier` to a function and call it with the global object as
67
+ # `this` and the (JSON-marshalled) args. `identifier` is arbitrary JS — a
68
+ # name path ("a.b.fn"), a member expression, or a function literal — so it
69
+ # is applied rather than looked up. The newline guards a trailing comment as
70
+ # in eval/exec.
71
+ def call(identifier, *args)
72
+ eval("(#{encode(identifier)}\n).apply(this, #{JSON.generate(args)})")
73
+ end
74
+
75
+ private
76
+
77
+ # ExecJS speaks UTF-8. Encoding here both normalises the source and turns a
78
+ # genuinely binary input into the Encoding::UndefinedConversionError ExecJS
79
+ # expects, rather than feeding mojibake to V8.
80
+ def encode(source)
81
+ source.encode(Encoding::UTF_8)
82
+ end
83
+
84
+ # Map rusty_racer's exception family onto ExecJS's: a compile/syntax failure
85
+ # is an ExecJS::RuntimeError, a thrown-at-runtime error (or a terminated
86
+ # script) is an ExecJS::ProgramError. ExecJS asserts the backtrace mentions
87
+ # "(execjs):". A thrown error already carries the JS stack tagged with our
88
+ # LOCATION filename; a parse error has only the Ruby call stack, so give it
89
+ # a synthetic "(execjs):1" frame instead of leaking rusty internals.
90
+ def translate
91
+ yield
92
+ rescue RustyRacer::ParseError => e
93
+ raise wrap(ExecJS::RuntimeError, e.message, ["#{LOCATION}:1"])
94
+ rescue RustyRacer::EvalError => e
95
+ backtrace = Array(e.backtrace)
96
+ backtrace = ["#{LOCATION}:1"] if backtrace.empty?
97
+ raise wrap(ExecJS::ProgramError, e.message, backtrace)
98
+ end
99
+
100
+ def wrap(klass, message, backtrace)
101
+ ex = klass.new(message)
102
+ ex.set_backtrace(backtrace)
103
+ ex
104
+ end
105
+ end
106
+
107
+ def name
108
+ 'RustyRacer (V8)'
109
+ end
110
+
111
+ def available?
112
+ require 'rusty_racer'
113
+ true
114
+ rescue LoadError
115
+ false
116
+ end
117
+ end
118
+ end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module RustyRacer
4
- VERSION = "0.1.1"
4
+ VERSION = "0.1.2"
5
5
  end