rusty_racer 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,798 @@
1
+ // Value marshalling: the conversion layer between V8 handles, the thread-crossing
2
+ // plain-data JsVal, and Ruby objects. Extracted from lib.rs verbatim. JsVal and
3
+ // the four conversion entry points (js_to_jsval, jsval_to_js, jsval_to_ruby,
4
+ // ruby_to_jsval) are pub(crate); the depth-recursion helpers, the seen-tables,
5
+ // and the hex<->words BigInt codec stay private to this module.
6
+
7
+ use std::cell::RefCell;
8
+ use std::collections::HashMap;
9
+ use std::ffi::c_void;
10
+
11
+ use magnus::value::ReprValue;
12
+ use magnus::{
13
+ prelude::*, Error, ExceptionClass, IntoValue, RArray, RHash, RString, Ruby, TryConvert, Value,
14
+ };
15
+
16
+ // ---------------------------------------------------------------------------
17
+ // Values crossing threads: plain Rust data. No Ruby allocation off the Ruby
18
+ // thread, no V8 handles off the V8 thread, no wire format. Replaces serde.c.
19
+ // ---------------------------------------------------------------------------
20
+ #[derive(Debug, Clone)]
21
+ pub(crate) enum JsVal {
22
+ Undefined,
23
+ Null,
24
+ Bool(bool),
25
+ Int(i64),
26
+ Num(f64),
27
+ Str(String),
28
+ // Binary bytes: a JS Uint8Array / ArrayBuffer (view) <-> a Ruby ASCII-8BIT
29
+ // (binary-tagged) String. The encoding tag IS the type declaration, so the
30
+ // round-trip is symmetric and faithful (Uint8Array -> binary String ->
31
+ // Uint8Array), like BigInt/Date/Map/Set — no lossy text coercion. |id| (when
32
+ // Some) registers it in the Ref table so a binary blob aliased in a graph
33
+ // keeps ONE identity instead of being duplicated; None = not identity-tracked
34
+ // (e.g. a to_str result).
35
+ Bytes { id: Option<u32>, bytes: Vec<u8> },
36
+ // Arbitrary-precision integer (JS BigInt <-> Ruby Integer). Carried as V8's
37
+ // word representation: sign + little-endian u64 limbs. Both ends speak this
38
+ // natively (V8 BigInt words; Ruby Integer via a hex string), so no value is
39
+ // truncated — unlike routing a big int through f64.
40
+ BigInt { negative: bool, words: Vec<u64> },
41
+ // JS Date <-> Ruby Time, carried as milliseconds since the Unix epoch
42
+ // (v8::Date::value_of's unit). mini_racer marshals Date to Time.
43
+ Date(f64),
44
+ // Containers carry a serialization id so shared/cyclic graphs survive the
45
+ // round-trip: the first time an object is seen it is emitted with its id,
46
+ // and any later occurrence (a sibling sharing it, or a cycle back to an
47
+ // ancestor) is emitted as Ref(id) instead of being re-expanded.
48
+ Array { id: u32, items: Vec<JsVal> },
49
+ // JS object / Ruby Hash with string keys. Insertion order preserved.
50
+ Obj { id: u32, entries: Vec<(String, JsVal)> },
51
+ // JS Map <-> Ruby Hash. Keys are arbitrary values (not just strings), so
52
+ // this is distinct from Obj. Insertion order preserved.
53
+ Map { id: u32, pairs: Vec<(JsVal, JsVal)> },
54
+ // JS Set <-> Ruby Set (stdlib).
55
+ Set { id: u32, items: Vec<JsVal> },
56
+ // Back-reference to an already-emitted container (preserves identity; makes
57
+ // cycles representable instead of truncating at a depth cap).
58
+ Ref(u32),
59
+ }
60
+
61
+ // Cycles and sharing are handled by the Ref table (see JsVal::Ref), so this is
62
+ // purely a native-stack backstop against a pathologically deep (but acyclic)
63
+ // graph — set well above any realistic nesting.
64
+ const MAX_MARSHAL_DEPTH: u32 = 256;
65
+
66
+ // Little-endian u64 limbs -> big-endian hex magnitude (no sign, no "0x"). The
67
+ // shared currency between V8 BigInt words and Ruby Integer(str, 16).
68
+ fn words_to_hex(words: &[u64]) -> String {
69
+ let mut hex = String::new();
70
+ for w in words.iter().rev() {
71
+ if hex.is_empty() {
72
+ hex.push_str(&format!("{w:x}")); // top limb: no leading zeros
73
+ } else {
74
+ hex.push_str(&format!("{w:016x}")); // lower limbs: full width
75
+ }
76
+ }
77
+ if hex.is_empty() {
78
+ hex.push('0');
79
+ }
80
+ hex
81
+ }
82
+
83
+ // Big-endian hex magnitude -> little-endian u64 limbs (inverse of words_to_hex).
84
+ fn hex_to_words(hex: &str) -> Vec<u64> {
85
+ let mut words = Vec::new();
86
+ let mut end = hex.len();
87
+ while end > 0 {
88
+ let start = end.saturating_sub(16);
89
+ words.push(u64::from_str_radix(&hex[start..end], 16).unwrap_or(0));
90
+ end = start;
91
+ }
92
+ if words.is_empty() {
93
+ words.push(0);
94
+ }
95
+ words
96
+ }
97
+
98
+ // Tracks objects already emitted this marshal so a re-encounter becomes a
99
+ // Ref instead of re-expansion. Buckets by V8 identity hash (which can collide),
100
+ // disambiguated by Local equality — the same trick the module registry uses.
101
+ #[derive(Default)]
102
+ struct JsSeen {
103
+ next_id: u32,
104
+ map: HashMap<i32, Vec<(v8::Global<v8::Object>, u32)>>,
105
+ }
106
+
107
+ // Decide how to emit a container object: Ok(id) = first sighting, register it
108
+ // and recurse; Err(jsval) = emit this directly and stop (a Ref to an already-
109
+ // seen object, or a truncated Str at the depth backstop). Centralising this in
110
+ // one place keeps the four container arms (array/object/map/set) in lockstep —
111
+ // and crucially orders the checks so a depth-truncated object is NEVER assigned
112
+ // an id (which would leave a sibling Ref dangling).
113
+ fn js_container_id(
114
+ scope: &mut v8::PinScope<'_, '_>,
115
+ seen: &mut JsSeen,
116
+ value: v8::Local<v8::Value>,
117
+ obj: v8::Local<v8::Object>,
118
+ depth: u32,
119
+ ) -> Result<u32, JsVal> {
120
+ let hash = obj.get_identity_hash().get();
121
+ if let Some(bucket) = seen.map.get(&hash) {
122
+ for (g, id) in bucket {
123
+ if v8::Local::new(scope, g) == obj {
124
+ return Err(JsVal::Ref(*id));
125
+ }
126
+ }
127
+ }
128
+ // First sighting but too deep: truncate WITHOUT registering, so no later
129
+ // Ref can target a container that was never emitted.
130
+ if depth >= MAX_MARSHAL_DEPTH {
131
+ return Err(JsVal::Str(value.to_rust_string_lossy(scope)));
132
+ }
133
+ let id = seen.next_id;
134
+ seen.next_id += 1;
135
+ let g = v8::Global::new(scope, obj);
136
+ seen.map.entry(hash).or_default().push((g, id));
137
+ Ok(id)
138
+ }
139
+
140
+ // Copy |len| bytes from a V8 (Shared)ArrayBuffer backing pointer into an owned
141
+ // Vec, with one allocation and no zero-fill (data is fully overwritten). |data|
142
+ // is None only for a zero-length buffer, where the empty Vec is already right.
143
+ fn copy_buffer_bytes(data: Option<std::ptr::NonNull<c_void>>, len: usize) -> Vec<u8> {
144
+ let mut buf = Vec::with_capacity(len);
145
+ if let Some(p) = data {
146
+ unsafe {
147
+ std::ptr::copy_nonoverlapping(p.as_ptr() as *const u8, buf.as_mut_ptr(), len);
148
+ buf.set_len(len);
149
+ }
150
+ }
151
+ buf
152
+ }
153
+
154
+ pub(crate) fn js_to_jsval(scope: &mut v8::PinScope<'_, '_>, value: v8::Local<v8::Value>) -> JsVal {
155
+ let mut seen = JsSeen::default();
156
+ js_to_jsval_d(scope, value, &mut seen, 0)
157
+ }
158
+
159
+ fn js_to_jsval_d(
160
+ scope: &mut v8::PinScope<'_, '_>,
161
+ value: v8::Local<v8::Value>,
162
+ seen: &mut JsSeen,
163
+ depth: u32,
164
+ ) -> JsVal {
165
+ if value.is_undefined() {
166
+ return JsVal::Undefined;
167
+ }
168
+ if value.is_null() {
169
+ return JsVal::Null;
170
+ }
171
+ if value.is_boolean() {
172
+ return JsVal::Bool(value.boolean_value(scope));
173
+ }
174
+ if value.is_int32() {
175
+ return JsVal::Int(value.integer_value(scope).unwrap_or(0));
176
+ }
177
+ if value.is_number() {
178
+ return JsVal::Num(value.number_value(scope).unwrap_or(f64::NAN));
179
+ }
180
+ if value.is_big_int() {
181
+ if let Ok(bi) = v8::Local::<v8::BigInt>::try_from(value) {
182
+ let mut words = vec![0u64; bi.word_count()];
183
+ let (negative, _) = bi.to_words_array(&mut words);
184
+ return JsVal::BigInt { negative, words };
185
+ }
186
+ }
187
+ // Date before the generic object branch (a Date *is* an object).
188
+ if value.is_date() {
189
+ if let Ok(date) = v8::Local::<v8::Date>::try_from(value) {
190
+ return JsVal::Date(date.value_of());
191
+ }
192
+ }
193
+ // Binary buffers before the generic object branch (they are objects too).
194
+ // A TypedArray/DataView copies its VIEWED window; a bare ArrayBuffer or
195
+ // SharedArrayBuffer copies the whole buffer. All become a Ruby binary
196
+ // String. (Without the SharedArrayBuffer arm a bare SAB would fall through
197
+ // to the plain-object branch and marshal as an empty Hash — silent loss.)
198
+ if value.is_array_buffer_view() {
199
+ if let Ok(view) = v8::Local::<v8::ArrayBufferView>::try_from(value) {
200
+ let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
201
+ // depth 0: a buffer is a leaf (no recursion into children), so it
202
+ // never risks native-stack overflow and must stay faithful bytes
203
+ // even when deeply nested — only the identity (Ref) check applies,
204
+ // never the depth-truncation-to-lossy-string the generic path uses.
205
+ let id = match js_container_id(scope, seen, value, obj, 0) {
206
+ Ok(id) => id,
207
+ Err(jsval) => return jsval, // a Ref to the same buffer
208
+ };
209
+ let len = view.byte_length();
210
+ let mut buf: Vec<u8> = Vec::with_capacity(len);
211
+ // copy_contents_uninit writes into the UNINITIALIZED spare capacity
212
+ // (a &mut [MaybeUninit<u8>]) — never forming a &mut [u8] over uninit
213
+ // memory the way copy_contents would (that's UB). set_len to exactly
214
+ // what it wrote so a detached/short view never exposes uninit bytes.
215
+ let n = view.copy_contents_uninit(&mut buf.spare_capacity_mut()[..len]);
216
+ unsafe { buf.set_len(n) };
217
+ return JsVal::Bytes { id: Some(id), bytes: buf };
218
+ }
219
+ }
220
+ if value.is_array_buffer() {
221
+ if let Ok(ab) = v8::Local::<v8::ArrayBuffer>::try_from(value) {
222
+ let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
223
+ // depth 0 — a buffer is a leaf; see the view arm above.
224
+ let id = match js_container_id(scope, seen, value, obj, 0) {
225
+ Ok(id) => id,
226
+ Err(jsval) => return jsval,
227
+ };
228
+ return JsVal::Bytes {
229
+ id: Some(id),
230
+ bytes: copy_buffer_bytes(ab.data(), ab.byte_length()),
231
+ };
232
+ }
233
+ }
234
+ if value.is_shared_array_buffer() {
235
+ if let Ok(sab) = v8::Local::<v8::SharedArrayBuffer>::try_from(value) {
236
+ let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
237
+ // depth 0 — a buffer is a leaf; see the view arm above.
238
+ let id = match js_container_id(scope, seen, value, obj, 0) {
239
+ Ok(id) => id,
240
+ Err(jsval) => return jsval,
241
+ };
242
+ let store = sab.get_backing_store();
243
+ return JsVal::Bytes {
244
+ id: Some(id),
245
+ bytes: copy_buffer_bytes(store.data(), sab.byte_length()),
246
+ };
247
+ }
248
+ }
249
+ // Map/Set before the generic object branch (both are objects).
250
+ if value.is_map() {
251
+ let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
252
+ let id = match js_container_id(scope, seen, value, obj, depth) {
253
+ Ok(id) => id,
254
+ Err(jsval) => return jsval,
255
+ };
256
+ let map = v8::Local::<v8::Map>::try_from(value).unwrap();
257
+ let arr = map.as_array(scope); // [k0, v0, k1, v1, ...]
258
+ let mut pairs = Vec::with_capacity((arr.length() / 2) as usize);
259
+ let mut i = 0;
260
+ while i + 1 < arr.length() {
261
+ let k = arr.get_index(scope, i).unwrap_or_else(|| v8::undefined(scope).into());
262
+ let v = arr.get_index(scope, i + 1).unwrap_or_else(|| v8::undefined(scope).into());
263
+ let kj = js_to_jsval_d(scope, k, seen, depth + 1);
264
+ let vj = js_to_jsval_d(scope, v, seen, depth + 1);
265
+ pairs.push((kj, vj));
266
+ i += 2;
267
+ }
268
+ return JsVal::Map { id, pairs };
269
+ }
270
+ if value.is_set() {
271
+ let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
272
+ let id = match js_container_id(scope, seen, value, obj, depth) {
273
+ Ok(id) => id,
274
+ Err(jsval) => return jsval,
275
+ };
276
+ let set = v8::Local::<v8::Set>::try_from(value).unwrap();
277
+ let arr = set.as_array(scope);
278
+ let mut items = Vec::with_capacity(arr.length() as usize);
279
+ for i in 0..arr.length() {
280
+ let el = arr.get_index(scope, i).unwrap_or_else(|| v8::undefined(scope).into());
281
+ items.push(js_to_jsval_d(scope, el, seen, depth + 1));
282
+ }
283
+ return JsVal::Set { id, items };
284
+ }
285
+ if value.is_array() {
286
+ let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
287
+ let id = match js_container_id(scope, seen, value, obj, depth) {
288
+ Ok(id) => id,
289
+ Err(jsval) => return jsval,
290
+ };
291
+ let arr = v8::Local::<v8::Array>::try_from(value).unwrap();
292
+ let mut items = Vec::with_capacity(arr.length() as usize);
293
+ for i in 0..arr.length() {
294
+ let el = arr
295
+ .get_index(scope, i)
296
+ .unwrap_or_else(|| v8::undefined(scope).into());
297
+ items.push(js_to_jsval_d(scope, el, seen, depth + 1));
298
+ }
299
+ return JsVal::Array { id, items };
300
+ }
301
+ // Plain object -> string-keyed Obj. Functions/Date/etc. fall through to
302
+ // their toString (the spike's primitive escape hatch).
303
+ if value.is_object() && !value.is_function() {
304
+ let obj = v8::Local::<v8::Object>::try_from(value).unwrap();
305
+ let id = match js_container_id(scope, seen, value, obj, depth) {
306
+ Ok(id) => id,
307
+ Err(jsval) => return jsval,
308
+ };
309
+ if let Some(names) = obj.get_own_property_names(scope, Default::default()) {
310
+ let mut entries = Vec::with_capacity(names.length() as usize);
311
+ for i in 0..names.length() {
312
+ let Some(key) = names.get_index(scope, i) else {
313
+ continue;
314
+ };
315
+ let key_str = key.to_rust_string_lossy(scope);
316
+ let val = obj
317
+ .get(scope, key)
318
+ .unwrap_or_else(|| v8::undefined(scope).into());
319
+ entries.push((key_str, js_to_jsval_d(scope, val, seen, depth + 1)));
320
+ }
321
+ return JsVal::Obj { id, entries };
322
+ }
323
+ }
324
+ JsVal::Str(value.to_rust_string_lossy(scope))
325
+ }
326
+
327
+ // Owned-by-value (not &JsVal): a JsVal::Bytes hands its Vec straight to V8's
328
+ // backing store with no copy of the payload, so a large binary blob crosses
329
+ // Ruby->JS with zero extra allocation.
330
+ pub(crate) fn jsval_to_js<'s>(scope: &mut v8::PinScope<'s, '_>, val: JsVal) -> v8::Local<'s, v8::Value> {
331
+ let mut built: HashMap<u32, v8::Local<'s, v8::Value>> = HashMap::new();
332
+ jsval_to_js_d(scope, val, &mut built)
333
+ }
334
+
335
+ fn jsval_to_js_d<'s>(
336
+ scope: &mut v8::PinScope<'s, '_>,
337
+ val: JsVal,
338
+ built: &mut HashMap<u32, v8::Local<'s, v8::Value>>,
339
+ ) -> v8::Local<'s, v8::Value> {
340
+ match val {
341
+ JsVal::Undefined => v8::undefined(scope).into(),
342
+ JsVal::Null => v8::null(scope).into(),
343
+ JsVal::Bool(b) => v8::Boolean::new(scope, b).into(),
344
+ JsVal::Int(i) => v8::Number::new(scope, i as f64).into(),
345
+ JsVal::Num(n) => v8::Number::new(scope, n).into(),
346
+ JsVal::Str(s) => v8::String::new(scope, &s)
347
+ .map(|s| s.into())
348
+ .unwrap_or_else(|| v8::undefined(scope).into()),
349
+ // Bytes -> Uint8Array, moving the Vec into V8's backing store (no copy
350
+ // of the payload). Registered under |id| so an aliased blob resolves to
351
+ // the same Uint8Array via Ref.
352
+ JsVal::Bytes { id, bytes } => {
353
+ let len = bytes.len();
354
+ let store = v8::ArrayBuffer::new_backing_store_from_vec(bytes).make_shared();
355
+ let ab = v8::ArrayBuffer::with_backing_store(scope, &store);
356
+ let arr: v8::Local<v8::Value> = v8::Uint8Array::new(scope, ab, 0, len)
357
+ .map(|a| a.into())
358
+ .unwrap_or_else(|| v8::undefined(scope).into());
359
+ if let Some(id) = id {
360
+ built.insert(id, arr);
361
+ }
362
+ arr
363
+ }
364
+ JsVal::BigInt { negative, words } => v8::BigInt::new_from_words(scope, negative, &words)
365
+ .map(|b| b.into())
366
+ .unwrap_or_else(|| v8::undefined(scope).into()),
367
+ JsVal::Date(ms) => v8::Date::new(scope, ms)
368
+ .map(|d| d.into())
369
+ .unwrap_or_else(|| v8::undefined(scope).into()),
370
+ // Register the container under its id BEFORE filling it, so a Ref from
371
+ // a descendant (a cycle back to here) resolves to this same object.
372
+ JsVal::Array { id, items } => {
373
+ let arr = v8::Array::new(scope, items.len() as i32);
374
+ built.insert(id, arr.into());
375
+ for (i, it) in items.into_iter().enumerate() {
376
+ let v = jsval_to_js_d(scope, it, built);
377
+ arr.set_index(scope, i as u32, v);
378
+ }
379
+ arr.into()
380
+ }
381
+ JsVal::Obj { id, entries } => {
382
+ let obj = v8::Object::new(scope);
383
+ built.insert(id, obj.into());
384
+ for (k, it) in entries {
385
+ let Some(key) = v8::String::new(scope, &k) else {
386
+ continue;
387
+ };
388
+ let v = jsval_to_js_d(scope, it, built);
389
+ obj.set(scope, key.into(), v);
390
+ }
391
+ obj.into()
392
+ }
393
+ JsVal::Map { id, pairs } => {
394
+ let map = v8::Map::new(scope);
395
+ built.insert(id, map.into());
396
+ for (k, v) in pairs {
397
+ let kk = jsval_to_js_d(scope, k, built);
398
+ let vv = jsval_to_js_d(scope, v, built);
399
+ map.set(scope, kk, vv);
400
+ }
401
+ map.into()
402
+ }
403
+ JsVal::Set { id, items } => {
404
+ let set = v8::Set::new(scope);
405
+ built.insert(id, set.into());
406
+ for it in items {
407
+ let v = jsval_to_js_d(scope, it, built);
408
+ set.add(scope, v);
409
+ }
410
+ set.into()
411
+ }
412
+ JsVal::Ref(id) => built
413
+ .get(&id)
414
+ .copied()
415
+ .unwrap_or_else(|| v8::undefined(scope).into()),
416
+ }
417
+ }
418
+
419
+ pub(crate) fn jsval_to_ruby(ruby: &Ruby, val: &JsVal) -> Result<Value, Error> {
420
+ let mut built: HashMap<u32, Value> = HashMap::new();
421
+ jsval_to_ruby_d(ruby, val, &mut built)
422
+ }
423
+
424
+ // `built` is a HashMap<u32, Value> — the same "bare Values in a heap container,
425
+ // hidden from the GC mark phase" shape that's a use-after-free in call_proc. It
426
+ // is safe HERE only because every entry is, at every allocating safepoint, ALSO
427
+ // reachable from a live stack local: each container arm (Array/Obj/Map/Set)
428
+ // keeps its arr/h/set as a live local while its children recurse and grafts each
429
+ // child into it (push/aset), so the child is marked transitively; Bytes inserts
430
+ // then immediately returns its live local `s`. So `built` never holds the sole
431
+ // reference. This invariant is load-bearing: do NOT refactor an arm to stash a
432
+ // value in `built` without keeping it rooted by a live local until it's grafted.
433
+
434
+ fn jsval_to_ruby_d(
435
+ ruby: &Ruby,
436
+ val: &JsVal,
437
+ built: &mut HashMap<u32, Value>,
438
+ ) -> Result<Value, Error> {
439
+ Ok(match val {
440
+ JsVal::Undefined | JsVal::Null => ruby.qnil().as_value(),
441
+ JsVal::Bool(b) => (*b).into_value_with(ruby),
442
+ JsVal::Int(i) => (*i).into_value_with(ruby),
443
+ JsVal::Num(n) => (*n).into_value_with(ruby),
444
+ JsVal::Str(s) => s.clone().into_value_with(ruby),
445
+ // Bytes -> a binary (ASCII-8BIT) String: str_from_slice uses rb_str_new,
446
+ // which tags the result ASCII-8BIT — so it round-trips back to bytes.
447
+ // Registered under |id| so an aliased blob stays one String via Ref.
448
+ JsVal::Bytes { id, bytes } => {
449
+ let s = ruby.str_from_slice(bytes).as_value();
450
+ if let Some(id) = id {
451
+ built.insert(*id, s);
452
+ }
453
+ s
454
+ }
455
+ // Reconstruct the Ruby Integer from the hex magnitude (arbitrary
456
+ // precision); negate via Ruby so bignums stay exact.
457
+ JsVal::BigInt { negative, words } => {
458
+ let mag: Value = ruby
459
+ .str_new(&words_to_hex(words))
460
+ .funcall("to_i", (16i64,))?;
461
+ if *negative {
462
+ mag.funcall("-@", ())?
463
+ } else {
464
+ mag
465
+ }
466
+ }
467
+ // Time.at takes seconds; carry sub-second precision as the Float. An
468
+ // invalid Date (value_of NaN) raises RangeError, matching csim's
469
+ // des_date — never a silent nil.
470
+ JsVal::Date(ms) => {
471
+ if !ms.is_finite() {
472
+ return Err(Error::new(ruby.exception_range_error(), "invalid Date"));
473
+ }
474
+ ruby.class_object()
475
+ .const_get::<_, magnus::RClass>("Time")?
476
+ .funcall::<_, _, Value>("at", (*ms / 1000.0,))?
477
+ }
478
+ // Register before filling so a Ref from a descendant resolves to the
479
+ // same Ruby object (shared/cyclic graphs keep their identity).
480
+ JsVal::Array { id, items } => {
481
+ let arr = ruby.ary_new();
482
+ built.insert(*id, arr.as_value());
483
+ for it in items {
484
+ let _ = arr.push(jsval_to_ruby_d(ruby, it, built)?);
485
+ }
486
+ arr.as_value()
487
+ }
488
+ // JS objects -> string-keyed Hashes.
489
+ JsVal::Obj { id, entries } => {
490
+ let h = ruby.hash_new();
491
+ built.insert(*id, h.as_value());
492
+ for (k, it) in entries {
493
+ let _ = h.aset(k.as_str(), jsval_to_ruby_d(ruby, it, built)?);
494
+ }
495
+ h.as_value()
496
+ }
497
+ // JS Map -> Ruby Hash (arbitrary marshalled keys, not just strings).
498
+ JsVal::Map { id, pairs } => {
499
+ let h = ruby.hash_new();
500
+ built.insert(*id, h.as_value());
501
+ for (k, v) in pairs {
502
+ let kk = jsval_to_ruby_d(ruby, k, built)?;
503
+ let vv = jsval_to_ruby_d(ruby, v, built)?;
504
+ let _ = h.aset(kk, vv);
505
+ }
506
+ h.as_value()
507
+ }
508
+ // JS Set -> Ruby Set (stdlib); build empty then add so a cyclic Set
509
+ // (a Set containing itself) resolves through the Ref table.
510
+ JsVal::Set { id, items } => {
511
+ let set: Value = ruby
512
+ .class_object()
513
+ .const_get::<_, magnus::RClass>("Set")?
514
+ .funcall("new", ())?;
515
+ built.insert(*id, set);
516
+ for it in items {
517
+ let v = jsval_to_ruby_d(ruby, it, built)?;
518
+ let _: Value = set.funcall("add", (v,))?;
519
+ }
520
+ set
521
+ }
522
+ JsVal::Ref(id) => built
523
+ .get(id)
524
+ .copied()
525
+ .unwrap_or_else(|| ruby.qnil().as_value()),
526
+ })
527
+ }
528
+
529
+ // A Ruby String marshalled by its encoding TAG (the tag is the type):
530
+ // - ASCII-8BIT (binary) -> JsVal::Bytes (a JS Uint8Array);
531
+ // - any text encoding -> JsVal::Str (UTF-8). Already-UTF-8 text is taken
532
+ // as-is; other text encodings transcode (Ruby raises on unmappable bytes).
533
+ // Either way the bytes must be VALID UTF-8 — invalid bytes RAISE, never
534
+ // silently degrade to U+FFFD (loud failure beats silent corruption). A
535
+ // text String mis-tagged binary surfaces loudly too (it becomes a Uint8Array).
536
+ fn string_to_jsval(ruby: &Ruby, s: RString) -> Result<JsVal, Error> {
537
+ use magnus::encoding::EncodingCapable;
538
+ if s.enc_get() == ruby.ascii8bit_encindex() {
539
+ // Binary: the bytes ARE the value (O(n) copy, no inflation). id: None —
540
+ // the identity-tracked path is the direct-String branch in
541
+ // ruby_to_jsval_d; a to_str result reaching here is transient.
542
+ return Ok(JsVal::Bytes {
543
+ id: None,
544
+ bytes: unsafe { s.as_slice() }.to_vec(),
545
+ });
546
+ }
547
+ // Text. encode('UTF-8') on an already-UTF-8 source is a no-op that does NOT
548
+ // validate, so skip it (one fewer copy) and let the from_utf8 check below
549
+ // catch invalid bytes; other encodings transcode (raising on unmappable).
550
+ let utf8: RString = if s.enc_get() == ruby.utf8_encindex() {
551
+ s
552
+ } else {
553
+ s.funcall("encode", ("UTF-8",))?
554
+ };
555
+ // Build the Rust String with a real UTF-8 check (not lossy): invalid bytes
556
+ // in a text-tagged String are an error, not silent U+FFFD substitution.
557
+ match String::from_utf8(unsafe { utf8.as_slice() }.to_vec()) {
558
+ Ok(s) => Ok(JsVal::Str(s)),
559
+ Err(_) => Err(Error::new(
560
+ ruby
561
+ .class_object()
562
+ .const_get::<_, ExceptionClass>("EncodingError")
563
+ .unwrap_or_else(|_| ruby.exception_runtime_error()),
564
+ "text-tagged String contains invalid UTF-8 bytes",
565
+ )),
566
+ }
567
+ }
568
+
569
+ // A JS object key must be a string. A Ruby String key crosses by its bytes as
570
+ // UTF-8 — but unlike a binary VALUE (which becomes a Uint8Array), a key has
571
+ // nowhere to put raw bytes, so invalid UTF-8 RAISES rather than silently
572
+ // degrading to U+FFFD. None for a non-String (the caller then tries to_s).
573
+ fn string_key(ruby: &Ruby, val: Value) -> Option<Result<String, Error>> {
574
+ let s = RString::from_value(val)?;
575
+ let bytes = unsafe { s.as_slice() }.to_vec();
576
+ Some(String::from_utf8(bytes).map_err(|_| {
577
+ Error::new(
578
+ ruby.class_object()
579
+ .const_get::<_, ExceptionClass>("EncodingError")
580
+ .unwrap_or_else(|_| ruby.exception_runtime_error()),
581
+ "hash key is not valid UTF-8",
582
+ )
583
+ }))
584
+ }
585
+
586
+ // A Ruby String's bytes interpreted as UTF-8 (invalid sequences become U+FFFD),
587
+ // regardless of the encoding tag. Used for the depth-truncation to_s fallback,
588
+ // where the value is already being lossily summarised.
589
+ fn lossy_string(val: Value) -> Option<String> {
590
+ let s = RString::from_value(val)?;
591
+ // Copy the bytes out before any further Ruby call can move/free them.
592
+ let bytes = unsafe { s.as_slice() }.to_vec();
593
+ Some(String::from_utf8_lossy(&bytes).into_owned())
594
+ }
595
+
596
+ // Tracks Ruby containers already emitted this marshal (by object_id, which is
597
+ // exact — no collision handling needed) so shared/cyclic structures become Refs.
598
+ #[derive(Default)]
599
+ struct RbSeen {
600
+ next_id: u32,
601
+ map: HashMap<usize, u32>,
602
+ }
603
+
604
+ pub(crate) fn ruby_to_jsval(val: Value) -> Result<JsVal, Error> {
605
+ let mut seen = RbSeen::default();
606
+ ruby_to_jsval_d(val, &mut seen, 0)
607
+ }
608
+
609
+ fn ruby_to_jsval_d(val: Value, seen: &mut RbSeen, depth: u32) -> Result<JsVal, Error> {
610
+ let ruby = Ruby::get().unwrap();
611
+ if val.is_nil() {
612
+ return Ok(JsVal::Null);
613
+ }
614
+ // NB: bool::try_convert is RTEST (truthiness) — it returns Ok(true) for
615
+ // ANY non-false value — so check the actual true/false singletons by
616
+ // identity instead, or every Integer/String/Array would marshal as `true`.
617
+ if val.eql(ruby.qtrue()).unwrap_or(false) {
618
+ return Ok(JsVal::Bool(true));
619
+ }
620
+ if val.eql(ruby.qfalse()).unwrap_or(false) {
621
+ return Ok(JsVal::Bool(false));
622
+ }
623
+ // Ruby Time -> JS Date. Must precede the numeric checks: magnus's
624
+ // i64/f64 TryConvert coerces a Time via to_i/to_f, so it would otherwise
625
+ // marshal as a bare epoch number. Time#to_f is epoch seconds; Date wants ms.
626
+ if let Ok(time_class) = ruby.class_object().const_get::<_, magnus::RClass>("Time") {
627
+ if val.is_kind_of(time_class) {
628
+ let sec = val.funcall::<_, _, f64>("to_f", ())?;
629
+ return Ok(JsVal::Date(sec * 1000.0));
630
+ }
631
+ }
632
+ // Integer. A JS Number is an f64, so only integers exactly representable
633
+ // there (|n| <= 2^53) become Int/Number; anything larger (the rest of the
634
+ // i64 range AND true bignums) becomes a BigInt so no precision is lost.
635
+ // Use a strict Integer type check, NOT magnus::Integer::try_convert, which
636
+ // coerces a Float / to_int object — that would turn e.g. 1e300 into a BigInt
637
+ // instead of a Number.
638
+ if let Ok(int_class) = ruby.class_object().const_get::<_, magnus::RClass>("Integer") {
639
+ if val.is_kind_of(int_class) {
640
+ if let Ok(i) = i64::try_convert(val) {
641
+ if i.unsigned_abs() <= (1u64 << 53) {
642
+ return Ok(JsVal::Int(i));
643
+ }
644
+ }
645
+ let abs: Value = val.funcall("abs", ())?;
646
+ let hex: String = abs.funcall("to_s", (16i64,))?;
647
+ let negative = val.funcall::<_, _, bool>("negative?", ())?;
648
+ return Ok(JsVal::BigInt {
649
+ negative,
650
+ words: hex_to_words(&hex),
651
+ });
652
+ }
653
+ }
654
+ if let Ok(n) = f64::try_convert(val) {
655
+ return Ok(JsVal::Num(n));
656
+ }
657
+ // Bare Symbol -> JS string (one-way: it comes back as a Ruby String). A
658
+ // binary-encoded symbol surfaces the same curated EncodingError as a text
659
+ // String with invalid UTF-8, not magnus's raw "expected utf-8" message.
660
+ if let Some(sym) = magnus::Symbol::from_value(val) {
661
+ let name = sym.name().map_err(|_| {
662
+ Error::new(
663
+ ruby.class_object()
664
+ .const_get::<_, ExceptionClass>("EncodingError")
665
+ .unwrap_or_else(|_| ruby.exception_runtime_error()),
666
+ "symbol name is not valid UTF-8",
667
+ )
668
+ })?;
669
+ return Ok(JsVal::Str(name.into_owned()));
670
+ }
671
+ // Real Strings: the encoding tag is the type declaration. A binary
672
+ // (ASCII-8BIT) String -> bytes (JS Uint8Array), identity-tracked so an
673
+ // aliased blob stays one Uint8Array; any text encoding -> a JS string.
674
+ if let Some(rstr) = RString::from_value(val) {
675
+ use magnus::encoding::EncodingCapable;
676
+ if rstr.enc_get() == ruby.ascii8bit_encindex() {
677
+ // depth 0 — a binary blob is a leaf, so it stays faithful bytes even
678
+ // when deeply nested (never the depth-truncation-to-lossy-string);
679
+ // only the identity (Ref) check applies. Frozen/interned binary
680
+ // Strings share an object_id, so two `-"x".b` literals deliberately
681
+ // collapse to ONE Uint8Array (they ARE the same Ruby object).
682
+ let id = match rb_container_id(seen, val, 0)? {
683
+ RbId::New(id) => id,
684
+ RbId::Reuse(jv) => return Ok(jv),
685
+ };
686
+ return Ok(JsVal::Bytes {
687
+ id: Some(id),
688
+ bytes: unsafe { rstr.as_slice() }.to_vec(),
689
+ });
690
+ }
691
+ return string_to_jsval(&ruby, rstr);
692
+ }
693
+ // A String-like (to_str) gets the same tag-driven treatment, but its result
694
+ // is transient so it is not identity-tracked.
695
+ if val.respond_to("to_str", false).unwrap_or(false) {
696
+ let s: Value = val.funcall("to_str", ())?;
697
+ if let Some(rstr) = RString::from_value(s) {
698
+ return string_to_jsval(&ruby, rstr);
699
+ }
700
+ }
701
+ // Ruby Set -> JS Set. Before the Array/Hash checks (a Set is neither).
702
+ if let Ok(set_class) = ruby.class_object().const_get::<_, magnus::RClass>("Set") {
703
+ if val.is_kind_of(set_class) {
704
+ let id = match rb_container_id(seen, val, depth)? {
705
+ RbId::New(id) => id,
706
+ RbId::Reuse(jv) => return Ok(jv),
707
+ };
708
+ let arr: RArray = val.funcall("to_a", ())?;
709
+ let mut items = Vec::with_capacity(arr.len());
710
+ for i in 0..arr.len() {
711
+ let el: Value = arr.entry::<Value>(i as isize)?;
712
+ items.push(ruby_to_jsval_d(el, seen, depth + 1)?);
713
+ }
714
+ return Ok(JsVal::Set { id, items });
715
+ }
716
+ }
717
+ if let Ok(arr) = RArray::try_convert(val) {
718
+ let id = match rb_container_id(seen, val, depth)? {
719
+ RbId::New(id) => id,
720
+ RbId::Reuse(jv) => return Ok(jv),
721
+ };
722
+ let mut items = Vec::with_capacity(arr.len());
723
+ for i in 0..arr.len() {
724
+ let el: Value = arr.entry::<Value>(i as isize)?;
725
+ items.push(ruby_to_jsval_d(el, seen, depth + 1)?);
726
+ }
727
+ return Ok(JsVal::Array { id, items });
728
+ }
729
+ if let Ok(hash) = RHash::try_convert(val) {
730
+ let id = match rb_container_id(seen, val, depth)? {
731
+ RbId::New(id) => id,
732
+ RbId::Reuse(jv) => return Ok(jv),
733
+ };
734
+ let entries = RefCell::new(Vec::new());
735
+ hash.foreach(|k: Value, v: Value| {
736
+ // String/Symbol keys -> a UTF-8 String; anything else via to_s. A JS
737
+ // object key has nowhere to put raw bytes, so unlike a binary VALUE
738
+ // (-> Uint8Array) a binary KEY with invalid UTF-8 RAISES (string_key),
739
+ // and a to_s returning a non-String is a loud error, not a silent "".
740
+ let key = match string_key(&ruby, k) {
741
+ Some(r) => r?,
742
+ None => {
743
+ // A non-String key (Symbol, Integer, ...) -> to_s, then the
744
+ // same UTF-8 rule.
745
+ let s: Value = k.funcall("to_s", ())?;
746
+ match string_key(&ruby, s) {
747
+ Some(r) => r?,
748
+ None => {
749
+ return Err(Error::new(
750
+ ruby.exception_type_error(),
751
+ "hash key's to_s did not return a String",
752
+ ))
753
+ }
754
+ }
755
+ }
756
+ };
757
+ entries
758
+ .borrow_mut()
759
+ .push((key, ruby_to_jsval_d(v, seen, depth + 1)?));
760
+ Ok(magnus::r_hash::ForEach::Continue)
761
+ })?;
762
+ return Ok(JsVal::Obj {
763
+ id,
764
+ entries: entries.into_inner(),
765
+ });
766
+ }
767
+ Err(Error::new(
768
+ ruby.exception_type_error(),
769
+ "unsupported type crossing into JS",
770
+ ))
771
+ }
772
+
773
+ enum RbId {
774
+ New(u32),
775
+ Reuse(JsVal),
776
+ }
777
+
778
+ // Ruby-side mirror of js_container_id: New(id) to register and recurse, or
779
+ // Reuse(jsval) to emit directly (a Ref to an already-seen object, or a
780
+ // depth-truncated Str). Computes object_id once.
781
+ fn rb_container_id(seen: &mut RbSeen, val: Value, depth: u32) -> Result<RbId, Error> {
782
+ let oid = val.funcall::<_, _, usize>("object_id", ())?;
783
+ if let Some(id) = seen.map.get(&oid) {
784
+ return Ok(RbId::Reuse(JsVal::Ref(*id)));
785
+ }
786
+ if depth >= MAX_MARSHAL_DEPTH {
787
+ let ruby = Ruby::get().unwrap();
788
+ let s: Value = val.funcall("to_s", ())?;
789
+ let s = lossy_string(s).ok_or_else(|| {
790
+ Error::new(ruby.exception_type_error(), "to_s did not return a String")
791
+ })?;
792
+ return Ok(RbId::Reuse(JsVal::Str(s)));
793
+ }
794
+ let id = seen.next_id;
795
+ seen.next_id += 1;
796
+ seen.map.insert(oid, id);
797
+ Ok(RbId::New(id))
798
+ }