yrby 0.2.3 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a5928987b16a5d3a3933b95875dff276e4e18364b46ed0e33baa8e48b5b2e847
4
- data.tar.gz: 2676689ddee9403765353c4612055fb120ce9e1c6dcdbeb956c482eed313374d
3
+ metadata.gz: 2dcb5650668e6cc3f1e0ef1349426ba875d5580a945e90a62216e61a24bf9cef
4
+ data.tar.gz: eead1e7159caeacf517febe05149d0c55491dcceaf6fbbbe2f3c40fdf7620978
5
5
  SHA512:
6
- metadata.gz: 7e13256e4881804c3330bf69ce86203bd2aea4bda9313db042f7d368221130f6624285cbddea1a9b61b967c3831635ace45b1291496100872ad8a93c97be2e10
7
- data.tar.gz: fbf1c8c9ec0f0b23190578d562f75fb14a14ec3d562b54b86437c5abda5bd48a97f2da216927f66fde46b5d52fa12d38b4f39b8073b8c6222d656fd09fd2c24f
6
+ metadata.gz: 68948615ae023becf04deea8c767cdd075ccd7097bbcecefb52868a7c2257fe4bd39d20bbac83b1f6a47d78a2889d76e590a83b7df0d9f6e1ccea1a8fdfa115d
7
+ data.tar.gz: 70716f2948661046b71843e66dcec62e5cf3e2b5c1509e9ea76c49db6039909bc02f510a90e3b9f44054b20ea3c2297d10878834b3c751abd1a28c72d0d2cabc
data/CHANGELOG.md CHANGED
@@ -6,6 +6,34 @@ to follow [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
6
6
 
7
7
  ## [Unreleased]
8
8
 
9
+ ## [0.3.0] - 2026-07-01
10
+
11
+ ### Fixed
12
+
13
+ - **Sync no longer serves un-integrable pending structs.** When a doc holds a
14
+ *pending* struct (a gappy update whose causally-prior update is missing — e.g.
15
+ legacy data recorded before the `update_ready?` gate existed), its integrated
16
+ state vector is empty but `encode_state_as_update` merges the pending bytes back
17
+ in. Answering a peer's `SyncStep1` with that state handed the peer content it
18
+ couldn't integrate, so it parked the same pending forever and the empty-SV /
19
+ non-empty-content mismatch drove endless resync traffic (observed as a browser
20
+ re-sending frames several times a second). `handle_sync_message` now answers
21
+ `SyncStep1` with **integrated-only** state, so a server never serves a struct it
22
+ can't integrate itself. Neutralizes existing poisoned server state on deploy —
23
+ no migration needed. The server's own pending is untouched and still heals if
24
+ the missing dependency later arrives (only then does the content become
25
+ visible in sync). Live delta relay (`Update` frames) is unchanged.
26
+
27
+ ### Added
28
+
29
+ - `Doc#pending?` — true if the doc holds un-integrable pending structs or a
30
+ pending delete set (content waiting on a missing causally-prior update).
31
+ - `Doc#compacted_state_update` — like `encode_state_as_update` (full state) but
32
+ **gap-free**: excludes pending structs/delete set. Use it when persisting or
33
+ serving state other peers will apply. Non-destructive — the doc keeps its
34
+ pending (so it can still heal), and `encode_state_as_update` stays lossless for
35
+ raw-update recovery.
36
+
9
37
  ## [0.2.3] - 2026-07-01
10
38
 
11
39
  ### Fixed
data/README.md CHANGED
@@ -142,18 +142,39 @@ doc = Y::Doc.new(12345) # specific client ID (used for CRDT identity)
142
142
 
143
143
  # Encoding
144
144
  doc.encode_state_vector # => current state vector
145
- doc.encode_state_as_update # => full update
145
+ doc.encode_state_as_update # => full update (lossless: keeps pending)
146
146
  doc.encode_state_as_update(sv) # => update diff against state vector
147
+ doc.compacted_state_update # => full update, gap-free (excludes pending)
147
148
 
148
149
  # Applying updates
149
150
  doc.apply_update(update_bytes) # apply raw V1 update
151
+ doc.pending? # => true if holding un-integrable pending structs
150
152
 
151
153
  # Sync protocol
152
154
  doc.sync_step1 # => SyncStep1 message (this doc's state vector)
153
155
  doc.handle_sync_message(data) # => [msg_type, sync_type, response]; answers a
154
- # peer's SyncStep1 with a SyncStep2
156
+ # peer's SyncStep1 with an integrated-only
157
+ # SyncStep2 (never serves pending structs)
155
158
  ```
156
159
 
160
+ ### Pending structs and gap-free state
161
+
162
+ If a doc applies an update whose causally-prior update is missing (a "gappy"
163
+ update), yrs parks it as a **pending** struct: the integrated state vector stays
164
+ empty, but the pending block is held as a recovery buffer and heals if the
165
+ missing dependency later arrives. `Doc#pending?` reports this.
166
+
167
+ Pending structs are *not* document state, so they must not cross the sync
168
+ boundary — a peer that receives one can't integrate it and gets stuck. Two
169
+ guarantees keep serving safe:
170
+
171
+ - `handle_sync_message` answers `SyncStep1` with **integrated-only** state, so a
172
+ server never serves a struct it can't integrate itself (this is automatic).
173
+ - `Doc#compacted_state_update` gives you the same gap-free full-state update for
174
+ when you persist or hand off state yourself. It's non-destructive (the doc
175
+ keeps its pending), while `encode_state_as_update` stays lossless so you can
176
+ still preserve the raw pending bytes for recovery.
177
+
157
178
  ### Protocol codec (module functions)
158
179
 
159
180
  Classifying and unwrapping wire frames is stateless, so it's exposed as
data/ext/yrby/src/lib.rs CHANGED
@@ -8,7 +8,10 @@ use yrs::{Doc, GetString, ReadTxn, Transact};
8
8
 
9
9
  mod protocol;
10
10
  mod read;
11
- use protocol::{classify_message, merged_doc_update, update_advances_doc, update_is_ready};
11
+ use protocol::{
12
+ classify_message, has_pending, integrated_update, merged_doc_update, update_advances_doc,
13
+ update_is_ready,
14
+ };
12
15
 
13
16
  /// Wrapper around yrs Doc.
14
17
  ///
@@ -188,6 +191,28 @@ impl RbDoc {
188
191
  })
189
192
  }
190
193
 
194
+ /// True if the doc holds un-integrable pending structs or a pending delete
195
+ /// set — content that couldn't integrate because a causally-prior update is
196
+ /// missing. Such content is a recovery buffer, not document state; it heals if
197
+ /// the missing dependency later arrives. A pure read.
198
+ fn pending(&self) -> bool {
199
+ let doc = &self.0;
200
+ nogvl(move || has_pending(doc))
201
+ }
202
+
203
+ /// Like `encode_state_as_update` (full state), but **gap-free**: it excludes
204
+ /// any pending (un-integrable) structs and pending delete set. Use this when
205
+ /// persisting or serving state that other peers will apply — serving pending
206
+ /// content poisons their sync. Non-destructive: this doc keeps its pending, so
207
+ /// a genuine gap still heals if its dependency arrives. (`encode_state_as_update`
208
+ /// stays lossless for raw-update recovery.)
209
+ fn compacted_state_update(&self) -> Result<RString, Error> {
210
+ let doc = &self.0;
211
+ let update = nogvl(move || integrated_update(doc, &yrs::StateVector::default()))
212
+ .map_err(yrb_error)?;
213
+ Ok(binary_string(&update))
214
+ }
215
+
191
216
  /// Encode state as update (optionally diffed against a state vector)
192
217
  fn encode_state_as_update(&self, args: &[Value]) -> Result<RString, Error> {
193
218
  let sv_bytes: Option<Vec<u8>> = if args.is_empty() {
@@ -263,9 +288,13 @@ impl RbDoc {
263
288
  match msg {
264
289
  Message::Sync(sync_msg) => match sync_msg {
265
290
  SyncMessage::SyncStep1(sv) => {
266
- // Respond with SyncStep2
267
- let txn = doc.transact();
268
- let update = txn.encode_state_as_update_v1(&sv);
291
+ // Respond with SyncStep2 carrying only *integrated*
292
+ // state. Never hand a peer un-integrable pending
293
+ // structs: the peer would park the same pending
294
+ // forever and the state-vector/content mismatch drives
295
+ // endless resync traffic. (integrated_update is a no-op
296
+ // fast path when nothing is pending.)
297
+ let update = integrated_update(doc, &sv)?;
269
298
  let response = Message::Sync(SyncMessage::SyncStep2(update));
270
299
  Ok((0, 0, response.encode_v1()))
271
300
  }
@@ -366,6 +395,11 @@ fn init(ruby: &Ruby) -> Result<(), Error> {
366
395
  doc_class.define_method("read_text", method!(RbDoc::read_text, 1))?;
367
396
  doc_class.define_method("read_xml", method!(RbDoc::read_xml, 1))?;
368
397
  doc_class.define_method("read_map", method!(RbDoc::read_map, 1))?;
398
+ doc_class.define_method("pending?", method!(RbDoc::pending, 0))?;
399
+ doc_class.define_method(
400
+ "compacted_state_update",
401
+ method!(RbDoc::compacted_state_update, 0),
402
+ )?;
369
403
  doc_class.define_method("update_ready?", method!(RbDoc::update_ready, 1))?;
370
404
  doc_class.define_method("update_advances?", method!(RbDoc::update_advances, 1))?;
371
405
  doc_class.define_method("sync_step1", method!(RbDoc::sync_step1, 0))?;
@@ -8,7 +8,7 @@ use yrs::encoding::read::{Cursor, Read};
8
8
  use yrs::sync::protocol::MessageReader;
9
9
  use yrs::sync::{Message, SyncMessage};
10
10
  use yrs::updates::decoder::{Decode, DecoderV1};
11
- use yrs::{Doc, ReadTxn, Transact};
11
+ use yrs::{Doc, ReadTxn, StateVector, Transact, Update, WriteTxn};
12
12
 
13
13
  /// Classify a frame: a non-zero code only for exactly one well-formed message
14
14
  /// that consumes the whole buffer (the codes are the match arms below).
@@ -143,21 +143,52 @@ pub(crate) fn update_advances_doc(doc: &Doc, update_bytes: &[u8]) -> Result<bool
143
143
  }
144
144
  }
145
145
 
146
- /// True if the doc holds pending structs or a pending delete set: blocks that
147
- /// couldn't integrate because a dependency is missing. Test-only: asserts the
148
- /// causal-chain parking behavior in the unit tests below.
149
- #[cfg(test)]
150
- pub(crate) fn doc_has_pending(doc: &Doc) -> bool {
146
+ /// True if the doc holds un-integrable pending structs or a pending delete set:
147
+ /// blocks that couldn't integrate because a causally-prior update is missing. A
148
+ /// pure read; does not mutate.
149
+ pub(crate) fn has_pending(doc: &Doc) -> bool {
151
150
  let txn = doc.transact();
152
151
  txn.store().pending_update().is_some() || txn.store().pending_ds().is_some()
153
152
  }
154
153
 
154
+ /// Encode the doc's **integrated** state as a v1 update diffed against `sv`,
155
+ /// excluding any pending (un-integrable) structs and pending delete set.
156
+ ///
157
+ /// Pending blocks are a recovery buffer, not document state. Serving them across
158
+ /// the sync boundary hands a peer content it can't integrate, so the peer parks
159
+ /// the same pending forever and the state-vector/content mismatch drives endless
160
+ /// resync traffic. `encode_state_as_update_v1` merges pending back in (see yrs
161
+ /// `merge_pending_v1`), so to get a gap-free encode we rebuild the state into a
162
+ /// throwaway doc and `prune_pending` there before re-encoding.
163
+ ///
164
+ /// Non-destructive: the prune happens only on the throwaway copy; `doc` keeps its
165
+ /// pending, so a genuine gap still heals if its missing dependency later arrives.
166
+ pub(crate) fn integrated_update(doc: &Doc, sv: &StateVector) -> Result<Vec<u8>, String> {
167
+ // Fast path: with nothing pending the direct encode is already gap-free, so
168
+ // the clean common case keeps the zero-copy behavior.
169
+ if !has_pending(doc) {
170
+ return Ok(doc.transact().encode_state_as_update_v1(sv));
171
+ }
172
+ let full = doc
173
+ .transact()
174
+ .encode_state_as_update_v1(&StateVector::default());
175
+ let clean = Doc::new();
176
+ {
177
+ let mut txn = clean.transact_mut();
178
+ txn.apply_update(Update::decode_v1(&full).map_err(|e| e.to_string())?)
179
+ .map_err(|e| e.to_string())?;
180
+ txn.prune_pending();
181
+ }
182
+ let out = clean.transact().encode_state_as_update_v1(sv);
183
+ Ok(out)
184
+ }
185
+
155
186
  #[cfg(test)]
156
187
  mod tests {
157
188
  use super::*;
158
189
  use yrs::sync::Awareness;
159
190
  use yrs::updates::encoder::Encode;
160
- use yrs::Text;
191
+ use yrs::{GetString, Text};
161
192
 
162
193
  fn text_update(content: &str) -> Vec<u8> {
163
194
  let doc = Doc::new();
@@ -418,24 +449,213 @@ mod tests {
418
449
  !update_is_ready(&doc, u3).unwrap(),
419
450
  "u3 depends on the missing u2"
420
451
  );
421
- assert!(
422
- !doc_has_pending(&doc),
423
- "nothing pending until u3 is applied"
424
- );
452
+ assert!(!has_pending(&doc), "nothing pending until u3 is applied");
425
453
 
426
454
  // Applying u3 anyway parks it as a pending struct.
427
455
  doc.transact_mut()
428
456
  .apply_update(yrs::Update::decode_v1(u3).unwrap())
429
457
  .unwrap();
430
- assert!(
431
- doc_has_pending(&doc),
432
- "u3 is pending: its parent u2 is missing"
433
- );
458
+ assert!(has_pending(&doc), "u3 is pending: its parent u2 is missing");
434
459
 
435
460
  // Once u2 arrives (via resync), u3 integrates and pending clears.
436
461
  doc.transact_mut()
437
462
  .apply_update(yrs::Update::decode_v1(u2).unwrap())
438
463
  .unwrap();
439
- assert!(!doc_has_pending(&doc), "u2 arrived; u3 integrated");
464
+ assert!(!has_pending(&doc), "u2 arrived; u3 integrated");
465
+ }
466
+
467
+ // Build a causal gap: `first` inserts "a", `dependent` inserts "b" after it,
468
+ // so `dependent` alone parks as pending on a doc that lacks `first`.
469
+ fn gap_pair() -> (Vec<u8>, Vec<u8>) {
470
+ let src = Doc::new();
471
+ let txt = src.get_or_insert_text("notepad");
472
+ txt.insert(&mut src.transact_mut(), 0, "a");
473
+ let first = src
474
+ .transact()
475
+ .encode_state_as_update_v1(&yrs::StateVector::default());
476
+ let sv = src.transact().state_vector();
477
+ txt.insert(&mut src.transact_mut(), 1, "b");
478
+ let dependent = src.transact().encode_state_as_update_v1(&sv);
479
+ (first, dependent)
480
+ }
481
+
482
+ #[test]
483
+ fn integrated_update_strips_pending_and_is_non_destructive() {
484
+ let (_first, dependent) = gap_pair();
485
+ let doc = Doc::new();
486
+ doc.transact_mut()
487
+ .apply_update(yrs::Update::decode_v1(&dependent).unwrap())
488
+ .unwrap();
489
+ assert!(has_pending(&doc), "the gappy update parked as pending");
490
+
491
+ // encode_state_as_update carries the pending; integrated_update does not.
492
+ let full = doc
493
+ .transact()
494
+ .encode_state_as_update_v1(&yrs::StateVector::default());
495
+ let gap_free = integrated_update(&doc, &yrs::StateVector::default()).unwrap();
496
+ assert_ne!(full, gap_free, "integrated_update drops the pending bytes");
497
+
498
+ // Applying the gap-free encode to a fresh peer must NOT poison it.
499
+ let peer = Doc::new();
500
+ peer.transact_mut()
501
+ .apply_update(yrs::Update::decode_v1(&gap_free).unwrap())
502
+ .unwrap();
503
+ assert!(
504
+ !has_pending(&peer),
505
+ "peer got no pending from the gap-free state"
506
+ );
507
+
508
+ // Non-destructive: the source doc keeps its pending (so it can still heal).
509
+ assert!(
510
+ has_pending(&doc),
511
+ "integrated_update did not mutate the source"
512
+ );
513
+ }
514
+
515
+ #[test]
516
+ fn integrated_update_fast_path_matches_direct_encode_when_clean() {
517
+ // No pending -> byte-identical to encode_state_as_update (zero-copy path).
518
+ let (first, _dependent) = gap_pair();
519
+ let doc = Doc::new();
520
+ doc.transact_mut()
521
+ .apply_update(yrs::Update::decode_v1(&first).unwrap())
522
+ .unwrap();
523
+ assert!(!has_pending(&doc));
524
+ let direct = doc
525
+ .transact()
526
+ .encode_state_as_update_v1(&yrs::StateVector::default());
527
+ let via = integrated_update(&doc, &yrs::StateVector::default()).unwrap();
528
+ assert_eq!(direct, via);
529
+ }
530
+
531
+ #[test]
532
+ fn a_healed_gap_serves_its_content() {
533
+ // After the missing dependency arrives, the (formerly pending) content is
534
+ // integrated and integrated_update includes it.
535
+ let (first, dependent) = gap_pair();
536
+ let doc = Doc::new();
537
+ doc.transact_mut()
538
+ .apply_update(yrs::Update::decode_v1(&dependent).unwrap())
539
+ .unwrap();
540
+ doc.transact_mut()
541
+ .apply_update(yrs::Update::decode_v1(&first).unwrap())
542
+ .unwrap();
543
+ assert!(!has_pending(&doc), "gap healed once first arrived");
544
+ let gap_free = integrated_update(&doc, &yrs::StateVector::default()).unwrap();
545
+ let peer = Doc::new();
546
+ peer.transact_mut()
547
+ .apply_update(yrs::Update::decode_v1(&gap_free).unwrap())
548
+ .unwrap();
549
+ assert_eq!(
550
+ peer.get_or_insert_text("notepad")
551
+ .get_string(&peer.transact()),
552
+ "ab"
553
+ );
554
+ }
555
+
556
+ // A gappy insert from its own independent client: inserts two chars and
557
+ // returns only the second delta, which depends on the (missing) first.
558
+ fn independent_gappy_insert() -> Vec<u8> {
559
+ let src = Doc::new();
560
+ let txt = src.get_or_insert_text("notepad");
561
+ txt.insert(&mut src.transact_mut(), 0, "x");
562
+ let sv = src.transact().state_vector();
563
+ txt.insert(&mut src.transact_mut(), 1, "y");
564
+ let txn = src.transact();
565
+ txn.encode_state_as_update_v1(&sv)
566
+ }
567
+
568
+ #[test]
569
+ fn integrated_update_keeps_content_and_drops_pending_when_mixed() {
570
+ // The realistic case: a doc with real integrated content AND a pending
571
+ // struct. Pruning must keep the content and drop only the pending.
572
+ let (first, _dep) = gap_pair();
573
+ let doc = Doc::new();
574
+ doc.transact_mut()
575
+ .apply_update(yrs::Update::decode_v1(&first).unwrap())
576
+ .unwrap(); // integrated "a"
577
+ doc.transact_mut()
578
+ .apply_update(yrs::Update::decode_v1(&independent_gappy_insert()).unwrap())
579
+ .unwrap(); // + a pending struct from another client
580
+ assert!(has_pending(&doc));
581
+
582
+ let gap_free = integrated_update(&doc, &yrs::StateVector::default()).unwrap();
583
+ let peer = Doc::new();
584
+ peer.transact_mut()
585
+ .apply_update(yrs::Update::decode_v1(&gap_free).unwrap())
586
+ .unwrap();
587
+ assert_eq!(
588
+ peer.get_or_insert_text("notepad")
589
+ .get_string(&peer.transact()),
590
+ "a",
591
+ "kept the integrated content"
592
+ );
593
+ assert!(!has_pending(&peer), "dropped the pending");
594
+ }
595
+
596
+ #[test]
597
+ fn integrated_update_diffs_against_a_peer_sv_and_excludes_pending() {
598
+ // The production signature: `handle_sync_message` calls
599
+ // integrated_update(doc, peer_sv). A peer already holding the integrated
600
+ // content should get a diff carrying no new content and no pending.
601
+ let (first, _dep) = gap_pair();
602
+ let server = Doc::new();
603
+ server
604
+ .transact_mut()
605
+ .apply_update(yrs::Update::decode_v1(&first).unwrap())
606
+ .unwrap();
607
+ server
608
+ .transact_mut()
609
+ .apply_update(yrs::Update::decode_v1(&independent_gappy_insert()).unwrap())
610
+ .unwrap();
611
+
612
+ let peer = Doc::new();
613
+ peer.transact_mut()
614
+ .apply_update(yrs::Update::decode_v1(&first).unwrap())
615
+ .unwrap();
616
+ let peer_sv = peer.transact().state_vector();
617
+
618
+ let diff = integrated_update(&server, &peer_sv).unwrap();
619
+ peer.transact_mut()
620
+ .apply_update(yrs::Update::decode_v1(&diff).unwrap())
621
+ .unwrap();
622
+ assert_eq!(
623
+ peer.get_or_insert_text("notepad")
624
+ .get_string(&peer.transact()),
625
+ "a"
626
+ );
627
+ assert!(!has_pending(&peer), "the diff carried no pending");
628
+ }
629
+
630
+ #[test]
631
+ fn integrated_update_strips_a_pending_delete_set() {
632
+ // A deletion whose target struct is absent parks as a pending *delete
633
+ // set* -- the delete-side counterpart to a pending struct.
634
+ let src = Doc::new();
635
+ let txt = src.get_or_insert_text("notepad");
636
+ txt.insert(&mut src.transact_mut(), 0, "z");
637
+ let sv = src.transact().state_vector();
638
+ txt.remove_range(&mut src.transact_mut(), 0, 1);
639
+ let deletion = src.transact().encode_state_as_update_v1(&sv); // delete-only
640
+
641
+ let doc = Doc::new();
642
+ doc.transact_mut()
643
+ .apply_update(yrs::Update::decode_v1(&deletion).unwrap())
644
+ .unwrap();
645
+ assert!(
646
+ has_pending(&doc),
647
+ "the orphan deletion parked as a pending delete set"
648
+ );
649
+
650
+ let gap_free = integrated_update(&doc, &yrs::StateVector::default()).unwrap();
651
+ let peer = Doc::new();
652
+ peer.transact_mut()
653
+ .apply_update(yrs::Update::decode_v1(&gap_free).unwrap())
654
+ .unwrap();
655
+ assert!(!has_pending(&peer), "the pending delete set was not served");
656
+ assert!(
657
+ has_pending(&doc),
658
+ "non-destructive: source keeps its pending"
659
+ );
440
660
  }
441
661
  }
data/lib/y/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Y
4
- VERSION = "0.2.3"
4
+ VERSION = "0.3.0"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yrby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.3
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - JP Camara