yrby 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,389 @@
1
+ use magnus::{
2
+ function, method, prelude::*, Error, ExceptionClass, RString, Ruby, TryConvert, Value,
3
+ };
4
+ use yrs::sync::{Message, SyncMessage};
5
+ use yrs::updates::decoder::Decode;
6
+ use yrs::updates::encoder::Encode;
7
+ use yrs::{Doc, GetString, ReadTxn, Transact};
8
+
9
+ mod protocol;
10
+ mod read;
11
+ use protocol::{classify_message, merged_doc_update, update_advances_doc, update_is_ready};
12
+
13
+ /// Wrapper around yrs Doc.
14
+ ///
15
+ /// Thread safety: `yrs::Doc` is `Send + Sync`. Its `transact()`/`transact_mut()`
16
+ /// acquire an internal RwLock with blocking semantics, so concurrent access from
17
+ /// multiple Ruby threads serializes safely instead of panicking. There's no
18
+ /// interior-mutability wrapper (RefCell and friends): every method opens and
19
+ /// closes its transaction within a single call.
20
+ #[magnus::wrap(class = "Y::Doc", free_immediately, size)]
21
+ struct RbDoc(Doc);
22
+
23
+ /// Compile-time proof that the wrapped Doc is thread-safe. If a future yrs
24
+ /// upgrade makes Doc lose Send/Sync, this fails the build instead of silently
25
+ /// shipping a thread-unsafe gem.
26
+ #[allow(dead_code)]
27
+ fn assert_thread_safe() {
28
+ fn is_send_sync<T: Send + Sync>() {}
29
+ is_send_sync::<Doc>();
30
+ }
31
+
32
+ /// Run `f` with the GVL (Global VM Lock) released, so other Ruby threads,
33
+ /// including ones calling into this extension, can run in parallel.
34
+ ///
35
+ /// Safety rules for the closure:
36
+ /// - It must not touch any Ruby object or call any Ruby API. Inputs are copied
37
+ /// out of Ruby strings before entering, and results are converted to Ruby
38
+ /// objects after returning.
39
+ /// - It must be `Send` (it runs while other threads own the GVL). `&Doc` is
40
+ /// fine: it's `Sync` (asserted above).
41
+ /// - Lock discipline: any native lock it takes (the doc's internal RwLock) must
42
+ /// be acquired and released inside this closure, with the GVL already dropped.
43
+ /// Never lock with the GVL held (e.g. before calling `nogvl`), or a thread
44
+ /// waiting on the lock while holding the GVL can deadlock against the GVL
45
+ /// reacquire. Same reason we never hold a lock across the GVL boundary.
46
+ ///
47
+ /// The closure runs with no unblock function, so it is not interruptible: a
48
+ /// Thread#kill, timeout, or signal can't preempt it mid-run. That's fine for the
49
+ /// bounded CRDT work it does; never call anything blocking or unbounded inside it.
50
+ ///
51
+ /// Panics inside the closure are caught and re-raised (resumed) after the GVL
52
+ /// is reacquired, where magnus converts them to Ruby exceptions.
53
+ fn nogvl<F, R>(f: F) -> R
54
+ where
55
+ F: FnOnce() -> R + Send,
56
+ R: Send,
57
+ {
58
+ use std::ffi::c_void;
59
+ use std::panic::{catch_unwind, resume_unwind, AssertUnwindSafe};
60
+
61
+ struct Ctx<F, R> {
62
+ func: Option<F>,
63
+ result: Option<std::thread::Result<R>>,
64
+ }
65
+
66
+ unsafe extern "C" fn callback<F, R>(arg: *mut c_void) -> *mut c_void
67
+ where
68
+ F: FnOnce() -> R,
69
+ {
70
+ let ctx = &mut *(arg as *mut Ctx<F, R>);
71
+ let func = ctx.func.take().expect("nogvl callback invoked twice");
72
+ ctx.result = Some(catch_unwind(AssertUnwindSafe(func)));
73
+ std::ptr::null_mut()
74
+ }
75
+
76
+ let mut ctx: Ctx<F, R> = Ctx {
77
+ func: Some(f),
78
+ result: None,
79
+ };
80
+ unsafe {
81
+ rb_sys::rb_thread_call_without_gvl(
82
+ Some(callback::<F, R>),
83
+ &mut ctx as *mut Ctx<F, R> as *mut c_void,
84
+ None,
85
+ std::ptr::null_mut(),
86
+ );
87
+ }
88
+ match ctx.result.expect("nogvl callback did not run") {
89
+ Ok(result) => result,
90
+ Err(panic) => resume_unwind(panic),
91
+ }
92
+ }
93
+
94
+ /// Helper to create a binary Ruby string from bytes. Called only with the GVL
95
+ /// held (after the native work finishes), so `Ruby::get` always succeeds.
96
+ fn binary_string(bytes: &[u8]) -> RString {
97
+ let ruby = Ruby::get().unwrap();
98
+ let s = ruby.str_from_slice(bytes);
99
+ let _ = s.enc_associate(ruby.ascii8bit_encindex());
100
+ s
101
+ }
102
+
103
+ /// Copy a Ruby string's bytes so they can be used without the GVL.
104
+ fn copy_bytes(s: RString) -> Vec<u8> {
105
+ unsafe { s.as_slice() }.to_vec()
106
+ }
107
+
108
+ /// Build a `Y::Error` (the gem's own error class, defined in `init`) so
109
+ /// native decode/apply failures surface as a project-specific error rather than
110
+ /// a generic RuntimeError. Falls back to RuntimeError only if the class somehow
111
+ /// can't be resolved.
112
+ fn yrb_error(msg: String) -> Error {
113
+ let ruby = Ruby::get().unwrap();
114
+ let class = ruby
115
+ .eval::<ExceptionClass>("Y::Error")
116
+ .unwrap_or_else(|_| ruby.exception_runtime_error());
117
+ Error::new(class, msg)
118
+ }
119
+
120
+ // ============================================================================
121
+ // Doc Implementation
122
+ // ============================================================================
123
+
124
+ impl RbDoc {
125
+ /// Create a new Doc with an optional client_id
126
+ fn new(args: &[Value]) -> Result<Self, Error> {
127
+ let doc = if args.is_empty() {
128
+ Doc::new()
129
+ } else {
130
+ let client_id: u64 = TryConvert::try_convert(args[0])?;
131
+ Doc::with_client_id(client_id)
132
+ };
133
+ Ok(RbDoc(doc))
134
+ }
135
+
136
+ fn encode_state_vector(&self) -> RString {
137
+ let doc = &self.0;
138
+ let sv = nogvl(move || {
139
+ let txn = doc.transact();
140
+ txn.state_vector().encode_v1()
141
+ });
142
+ binary_string(&sv)
143
+ }
144
+
145
+ /// Names of the document's root types, so a content reader can find the one
146
+ /// holding text without knowing it up front.
147
+ fn root_names(&self) -> Vec<String> {
148
+ let doc = &self.0;
149
+ nogvl(move || {
150
+ doc.transact()
151
+ .root_refs()
152
+ .map(|(name, _)| name.to_string())
153
+ .collect()
154
+ })
155
+ }
156
+
157
+ fn read_text(&self, name: String) -> Option<String> {
158
+ let doc = &self.0;
159
+ nogvl(move || {
160
+ doc.transact()
161
+ .get_text(name.as_str())
162
+ .map(|t| t.get_string(&doc.transact()))
163
+ })
164
+ }
165
+
166
+ /// Text of an XML-shaped root, one top-level block per line. The walk +
167
+ /// block-join logic lives in `read::xml_blocks_text` (pure, Rust-tested);
168
+ /// this just opens the transaction and resolves the root.
169
+ fn read_xml(&self, name: String) -> Option<String> {
170
+ let doc = &self.0;
171
+ nogvl(move || {
172
+ let txn = doc.transact();
173
+ let fragment = txn.get_xml_fragment(name.as_str())?;
174
+ Some(read::xml_blocks_text(&txn, &fragment))
175
+ })
176
+ }
177
+
178
+ /// A `Y.Map` root serialized to a JSON object string (keys sorted; values
179
+ /// recursive). Complements read_text/read_xml for structured shared state.
180
+ /// Callers parse the JSON (e.g. `JSON.parse(doc.read_map("state"))`). The
181
+ /// serialization lives in `read::map_json` (pure, Rust-tested).
182
+ fn read_map(&self, name: String) -> Option<String> {
183
+ let doc = &self.0;
184
+ nogvl(move || {
185
+ let txn = doc.transact();
186
+ let map = txn.get_map(name.as_str())?;
187
+ Some(read::map_json(&txn, &map))
188
+ })
189
+ }
190
+
191
+ /// Encode state as update (optionally diffed against a state vector)
192
+ fn encode_state_as_update(&self, args: &[Value]) -> Result<RString, Error> {
193
+ let sv_bytes: Option<Vec<u8>> = if args.is_empty() {
194
+ None
195
+ } else {
196
+ let sv_string: RString = TryConvert::try_convert(args[0])?;
197
+ Some(copy_bytes(sv_string))
198
+ };
199
+ let doc = &self.0;
200
+ let update = nogvl(move || -> Result<Vec<u8>, String> {
201
+ let sv = match &sv_bytes {
202
+ None => yrs::StateVector::default(),
203
+ Some(bytes) => yrs::StateVector::decode_v1(bytes).map_err(|e| e.to_string())?,
204
+ };
205
+ let txn = doc.transact();
206
+ Ok(txn.encode_state_as_update_v1(&sv))
207
+ })
208
+ .map_err(yrb_error)?;
209
+ Ok(binary_string(&update))
210
+ }
211
+
212
+ fn apply_update(&self, update: RString) -> Result<(), Error> {
213
+ let update_bytes = copy_bytes(update);
214
+ let doc = &self.0;
215
+ nogvl(move || -> Result<(), String> {
216
+ let update = yrs::Update::decode_v1(&update_bytes).map_err(|e| e.to_string())?;
217
+ let mut txn = doc.transact_mut();
218
+ txn.apply_update(update).map_err(|e| e.to_string())
219
+ })
220
+ .map_err(yrb_error)
221
+ }
222
+
223
+ /// True if applying `update` would integrate cleanly (its dependencies are
224
+ /// all present). False means it would leave a pending struct, i.e. an earlier
225
+ /// update is missing. Pure read; does not mutate.
226
+ fn update_ready(&self, update: RString) -> Result<bool, Error> {
227
+ let update_bytes = copy_bytes(update);
228
+ let doc = &self.0;
229
+ nogvl(move || update_is_ready(doc, &update_bytes)).map_err(yrb_error)
230
+ }
231
+
232
+ /// True if applying `update` would change the document (it carries new
233
+ /// content), false if the doc already contains it (an already-applied
234
+ /// retry). See `update_advances_doc`. Pure read; does not mutate.
235
+ fn update_advances(&self, update: RString) -> Result<bool, Error> {
236
+ let update_bytes = copy_bytes(update);
237
+ let doc = &self.0;
238
+ nogvl(move || update_advances_doc(doc, &update_bytes)).map_err(yrb_error)
239
+ }
240
+
241
+ /// Sync step 1: Create a sync message with our state vector
242
+ fn sync_step1(&self) -> RString {
243
+ let doc = &self.0;
244
+ let encoded = nogvl(move || {
245
+ let txn = doc.transact();
246
+ let sv = txn.state_vector();
247
+ Message::Sync(SyncMessage::SyncStep1(sv)).encode_v1()
248
+ });
249
+ binary_string(&encoded)
250
+ }
251
+
252
+ /// Handle a Sync or Awareness message, returning
253
+ /// [message_type, sync_type, response_bytes]. Only Sync (step1/step2/update)
254
+ /// and Awareness are handled; any other frame type is rejected.
255
+ fn handle_sync_message(&self, data: RString) -> Result<(u8, u8, RString), Error> {
256
+ let data_bytes = copy_bytes(data);
257
+ let doc = &self.0;
258
+
259
+ let (msg_type, sync_type, response) =
260
+ nogvl(move || -> Result<(u8, u8, Vec<u8>), String> {
261
+ let msg = Message::decode_v1(&data_bytes).map_err(|e| e.to_string())?;
262
+
263
+ match msg {
264
+ Message::Sync(sync_msg) => match sync_msg {
265
+ SyncMessage::SyncStep1(sv) => {
266
+ // Respond with SyncStep2
267
+ let txn = doc.transact();
268
+ let update = txn.encode_state_as_update_v1(&sv);
269
+ let response = Message::Sync(SyncMessage::SyncStep2(update));
270
+ Ok((0, 0, response.encode_v1()))
271
+ }
272
+ SyncMessage::SyncStep2(update_bytes) => {
273
+ // Apply the update
274
+ let update =
275
+ yrs::Update::decode_v1(&update_bytes).map_err(|e| e.to_string())?;
276
+ let mut txn = doc.transact_mut();
277
+ txn.apply_update(update).map_err(|e| e.to_string())?;
278
+ Ok((0, 1, Vec::new()))
279
+ }
280
+ SyncMessage::Update(update_bytes) => {
281
+ // Apply the update
282
+ let update =
283
+ yrs::Update::decode_v1(&update_bytes).map_err(|e| e.to_string())?;
284
+ let mut txn = doc.transact_mut();
285
+ txn.apply_update(update).map_err(|e| e.to_string())?;
286
+ Ok((0, 2, Vec::new()))
287
+ }
288
+ },
289
+ Message::Awareness(_) => Ok((1, 0, Vec::new())),
290
+ // Auth, awareness-query, and custom frames aren't part of this
291
+ // protocol; reject rather than pretend to handle them.
292
+ _ => Err("unsupported message type".to_string()),
293
+ }
294
+ })
295
+ .map_err(yrb_error)?;
296
+
297
+ Ok((msg_type, sync_type, binary_string(&response)))
298
+ }
299
+ }
300
+
301
+ // ============================================================================
302
+ // Protocol codec (stateless), exposed as `Y` module functions
303
+ // ============================================================================
304
+ //
305
+ // The server never holds presence or document state to classify a frame; these
306
+ // are pure functions of their bytes. (Presence lives in the browser clients; the
307
+ // server only relays awareness frames opaquely.)
308
+
309
+ /// Wrap a raw document update in a sync Update message frame, ready to relay.
310
+ fn wrap_update(update: RString) -> RString {
311
+ let update_bytes = copy_bytes(update);
312
+ let msg = Message::Sync(SyncMessage::Update(update_bytes));
313
+ binary_string(&msg.encode_v1())
314
+ }
315
+
316
+ /// Classify a frame for safe routing and relay. Returns a code only when the
317
+ /// frame is exactly one well-formed message that consumes the whole buffer, so
318
+ /// a malformed, truncated, multi-message, or trailing-garbage frame (which a
319
+ /// malicious client could craft to disrupt others if relayed) is rejected up
320
+ /// front:
321
+ /// 0 = drop (malformed, multiple, unknown, or empty)
322
+ /// 1 = sync step1 (a request: respond, do not relay)
323
+ /// 2 = sync step2/update (a document change: record/apply/relay)
324
+ /// 3 = awareness (presence: relay)
325
+ /// 4 = awareness query (a request: respond, do not relay)
326
+ fn message_kind(data: RString) -> u8 {
327
+ let data_bytes = copy_bytes(data);
328
+ nogvl(move || classify_message(&data_bytes))
329
+ }
330
+
331
+ /// Extract the document-update delta carried by a protocol message: the payloads
332
+ /// of any Update or SyncStep2 sub-messages, merged into a single update. Returns
333
+ /// nil if the message carries no document change (a SyncStep1 request or an
334
+ /// awareness update). The store-backed path records this exact delta before relay.
335
+ fn update_from_message(data: RString) -> Result<Option<RString>, Error> {
336
+ let data_bytes = copy_bytes(data);
337
+ let merged = nogvl(move || merged_doc_update(&data_bytes)).map_err(yrb_error)?;
338
+ Ok(merged.map(|b| binary_string(&b)))
339
+ }
340
+
341
+ // ============================================================================
342
+ // Module Initialization
343
+ // ============================================================================
344
+
345
+ #[magnus::init]
346
+ fn init(ruby: &Ruby) -> Result<(), Error> {
347
+ let module = ruby.define_module("Y")?;
348
+
349
+ // Define error class
350
+ let standard_error: magnus::RClass = ruby.eval("StandardError")?;
351
+ let _error_class = module.define_class("Error", standard_error)?;
352
+
353
+ // Define Doc class
354
+ let doc_class = module.define_class("Doc", ruby.class_object())?;
355
+ doc_class.define_singleton_method("new", function!(RbDoc::new, -1))?;
356
+ doc_class.define_method(
357
+ "encode_state_vector",
358
+ method!(RbDoc::encode_state_vector, 0),
359
+ )?;
360
+ doc_class.define_method(
361
+ "encode_state_as_update",
362
+ method!(RbDoc::encode_state_as_update, -1),
363
+ )?;
364
+ doc_class.define_method("apply_update", method!(RbDoc::apply_update, 1))?;
365
+ doc_class.define_method("root_names", method!(RbDoc::root_names, 0))?;
366
+ doc_class.define_method("read_text", method!(RbDoc::read_text, 1))?;
367
+ doc_class.define_method("read_xml", method!(RbDoc::read_xml, 1))?;
368
+ doc_class.define_method("read_map", method!(RbDoc::read_map, 1))?;
369
+ doc_class.define_method("update_ready?", method!(RbDoc::update_ready, 1))?;
370
+ doc_class.define_method("update_advances?", method!(RbDoc::update_advances, 1))?;
371
+ doc_class.define_method("sync_step1", method!(RbDoc::sync_step1, 0))?;
372
+ doc_class.define_method(
373
+ "handle_sync_message",
374
+ method!(RbDoc::handle_sync_message, 1),
375
+ )?;
376
+ // Stateless protocol codec, as Y module functions.
377
+ module.define_module_function("wrap_update", function!(wrap_update, 1))?;
378
+ module.define_module_function("message_kind", function!(message_kind, 1))?;
379
+ module.define_module_function("update_from_message", function!(update_from_message, 1))?;
380
+
381
+ // Define message type constants
382
+ module.const_set("MSG_SYNC", 0u8)?;
383
+ module.const_set("MSG_AWARENESS", 1u8)?;
384
+ module.const_set("MSG_SYNC_STEP1", 0u8)?;
385
+ module.const_set("MSG_SYNC_STEP2", 1u8)?;
386
+ module.const_set("MSG_SYNC_UPDATE", 2u8)?;
387
+
388
+ Ok(())
389
+ }